#!/usr/bin/env python3
"""
Fix malformed file entries in the database.
Corrects files that were added with malformed paths.
"""

import sys
import re
from db_utils import MigrationDB

def fix_malformed_files():
    """Fix malformed file paths in the database."""
    db = MigrationDB()

    print("🔧 Fixing malformed file entries in database...")
    print("=" * 50)

    total_fixed = 0

    with db.get_connection() as conn:
        # Find files with malformed primary_folder or sub_folder
        malformed_files = conn.execute("""
            SELECT id, primary_folder, sub_folder, file_name
            FROM files
            WHERE primary_folder LIKE '%$baseurl%'
                OR sub_folder LIKE '%$baseurl%'
                OR primary_folder LIKE '%<?php%'
                OR sub_folder LIKE '%<?php%'
        """).fetchall()

        for file_record in malformed_files:
            file_id = file_record['id']
            primary = file_record['primary_folder']
            sub = file_record['sub_folder']
            name = file_record['file_name']

            print(f"Fixing file ID {file_id}: {primary}/{sub}/{name}")

            # Special handling for malformed entries
            if primary.startswith('<?php') and len(primary) < 30:  # Truncated entries
                print(f"  Truncated malformed entry, deleting file ID {file_id}")
                conn.execute("DELETE FROM files WHERE id = ?", (file_id,))
                total_fixed += 1
                continue
            elif primary.startswith('<?php'):
                print(f"  Debug: primary = '{primary}'")
                # Extract path from PHP print statement
                # Example: <?php print "/intranet/databases/crm/file.php
                if 'print "' in primary:
                    full_path = primary.split('print "')[1].strip('/')
                elif 'print \'' in primary:
                    full_path = primary.split('print \'')[1].strip('/')
                else:
                    full_path = primary.replace('<?php', '').strip('/')
                print(f"  Debug: extracted full_path = '{full_path}'")
            else:
                # Reconstruct the full path and parse correctly
                full_path = f"{primary}/{sub}/{name}".strip('/')

                # Remove $baseurl/ or <?php ... ?>/ prefixes
                full_path = re.sub(r'^\$baseurl/', '', full_path)
                full_path = re.sub(r'^<\?php[^>]*\?>/', '', full_path)

            # Now parse the corrected path
            parts = full_path.split('/')
            parts = [p for p in parts if p]  # Remove empty parts

            # Skip 'intranet' if it's the first part
            if parts and parts[0] == 'intranet':
                parts = parts[1:]

            if len(parts) == 0:
                print(f"  Warning: No parts found for {full_path}, skipping")
                continue
            elif len(parts) == 1:
                new_primary, new_sub, new_name = '', '', parts[0]
            elif len(parts) == 2:
                new_primary, new_sub, new_name = parts[0], '', parts[1]
            else:
                new_primary, new_sub, new_name = parts[0], parts[1], '/'.join(parts[2:])

            print(f"  Corrected to: {new_primary}/{new_sub}/{new_name}")

            # Update the database
            conn.execute("""
                UPDATE files
                SET primary_folder = ?, sub_folder = ?, file_name = ?
                WHERE id = ?
            """, (new_primary, new_sub, new_name, file_id))

            total_fixed += 1

        conn.commit()

    print(f"\n✅ Fixed {total_fixed} malformed file entries")

    # Show updated statistics
    stats = db.get_statistics()
    print("\n📊 Database Statistics:")
    print(f"   Total files: {stats['total_files']}")
    print(f"   Links scanned: {stats['links_scanned']}")
    print(f"   Total links: {stats['total_links']}")

if __name__ == '__main__':
    fix_malformed_files()