#!/usr/bin/env python3
"""
Data migration script for PHP migration tracking system.
Imports existing data from progress.md and TO_DO.md into the database.
"""

import os
import re
from pathlib import Path
from typing import List, Dict, Tuple, Optional
from db_utils import MigrationDB

def parse_progress_md(file_path: str) -> List[Dict]:
    """
    Parse progress.md file and extract file records.
    
    Args:
        file_path: Path to progress.md file
    
    Returns:
        List of file record dictionaries
    """
    records = []
    
    if not os.path.exists(file_path):
        print(f"Warning: {file_path} not found")
        return records
    
    print(f"📖 Reading {file_path}...")
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return records
    
    # Find table start
    table_start = None
    for i, line in enumerate(lines):
        if line.strip().startswith('| Primary Folder'):
            table_start = i + 1  # Skip header line
            break
    
    if table_start is None:
        print("Warning: Table header not found in progress.md")
        return records
    
    # Parse data rows
    for i, line in enumerate(lines[table_start:], start=table_start):
        line = line.strip()
        if not line.startswith('|') or line.startswith('|--'):
            continue
        
        # Split by | and clean up
        parts = [p.strip() for p in line.split('|')[1:-1]]  # Remove first/last empty parts
        
        if len(parts) < 6:
            continue
        
        primary_folder = parts[0] if parts[0] else ''
        sub_folder = parts[1] if parts[1] else ''
        file_name = parts[2] if parts[2] else ''
        php8_status = parts[3] if len(parts) > 3 else 'no'
        tested_status = parts[4] if len(parts) > 4 else 'No'
        links_status = parts[5] if len(parts) > 5 else 'No'
        
        if not file_name:
            continue
        
        # Convert status values
        php8_rewritten = 1 if php8_status.lower() in ['yes', 'y', '1', 'true'] else 0
        tested = 1 if tested_status.lower() in ['yes', 'y', '1', 'true'] else 0
        links_scanned = 1 if links_status.lower() in ['yes', 'y', '1', 'true'] else 0
        
        # Determine status based on completion
        if php8_rewritten and tested:
            status = 'keep'
        else:
            status = 'new'  # Will be updated based on completion flags
        
        record = {
            'primary_folder': primary_folder,
            'sub_folder': sub_folder,
            'file_name': file_name,
            'status': status,
            'php8_rewritten': php8_rewritten,
            'tested': tested,
            'links_scanned': links_scanned,
            'notes': f'Migrated from progress.md line {i+1}'
        }
        
        records.append(record)
    
    print(f"✅ Parsed {len(records)} records from progress.md")
    return records

def parse_todo_md(file_path: str) -> List[str]:
    """
    Parse TO_DO.md file and extract file paths.
    
    Args:
        file_path: Path to TO_DO.md file
    
    Returns:
        List of file paths
    """
    file_paths = []
    
    if not os.path.exists(file_path):
        print(f"Warning: {file_path} not found")
        return file_paths
    
    print(f"📖 Reading {file_path}...")
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return file_paths
    
    for line in lines:
        line = line.strip()
        if line and line.startswith('/intranet/'):
            # Normalize path
            normalized_path = os.path.normpath(line)
            file_paths.append(normalized_path)
    
    print(f"✅ Found {len(file_paths)} file paths in TO_DO.md")
    return file_paths

def convert_todo_paths_to_records(file_paths: List[str]) -> List[Dict]:
    """
    Convert TO_DO.md file paths to database records.
    
    Args:
        file_paths: List of file paths from TO_DO.md
    
    Returns:
        List of file record dictionaries
    """
    records = []
    
    for path in file_paths:
        # Remove /intranet/ prefix
        if path.startswith('/intranet/'):
            rel_path = path[10:]  # Remove '/intranet/'
        else:
            rel_path = path
        
        # Split path into components
        parts = rel_path.split('/')
        
        if len(parts) == 1:
            # File in root
            primary_folder = ''
            sub_folder = ''
            file_name = parts[0]
        elif len(parts) == 2:
            # File in primary folder
            primary_folder = parts[0]
            sub_folder = ''
            file_name = parts[1]
        elif len(parts) >= 3:
            # File in sub-folder or deeper
            primary_folder = parts[0]
            sub_folder = parts[1]
            file_name = '/'.join(parts[2:])
        else:
            continue
        
        # Only add PHP files
        if not file_name.endswith('.php'):
            continue
        
        record = {
            'primary_folder': primary_folder,
            'sub_folder': sub_folder,
            'file_name': file_name,
            'status': 'new',  # Files in TODO are typically new/unprocessed
            'php8_rewritten': 0,
            'tested': 0,
            'links_scanned': 0,
            'notes': 'Migrated from TO_DO.md'
        }
        
        records.append(record)
    
    print(f"✅ Converted {len(records)} TODO paths to records")
    return records

def merge_records(progress_records: List[Dict], todo_records: List[Dict]) -> List[Dict]:
    """
    Merge progress and TODO records, giving priority to progress records.
    
    Args:
        progress_records: Records from progress.md
        todo_records: Records from TO_DO.md
    
    Returns:
        Merged list of records
    """
    # Create lookup for progress records
    progress_lookup = {}
    for record in progress_records:
        key = (record['primary_folder'], record['sub_folder'], record['file_name'])
        progress_lookup[key] = record
    
    # Add TODO records that aren't in progress
    merged = list(progress_records)
    added_from_todo = 0
    
    for todo_record in todo_records:
        key = (todo_record['primary_folder'], todo_record['sub_folder'], todo_record['file_name'])
        if key not in progress_lookup:
            merged.append(todo_record)
            added_from_todo += 1
    
    print(f"✅ Merged records: {len(progress_records)} from progress.md + {added_from_todo} from TO_DO.md = {len(merged)} total")
    return merged

def import_records_to_database(records: List[Dict], overwrite: bool = False) -> None:
    """
    Import records into the database.
    
    Args:
        records: List of file records to import
        overwrite: If True, overwrite existing records
    """
    db = MigrationDB()
    
    print(f"📥 Importing {len(records)} records to database...")
    
    imported = 0
    updated = 0
    skipped = 0
    
    for record in records:
        # Check if record already exists
        existing = db.get_file_by_path(
            record['primary_folder'], 
            record['sub_folder'], 
            record['file_name']
        )
        
        if existing and not overwrite:
            # Update notes to indicate migration source
            current_notes = existing.get('notes', '')
            if 'Migrated from' not in current_notes:
                new_notes = f"{current_notes}\n{record['notes']}" if current_notes else record['notes']
                db.update_file_notes(existing['id'], new_notes)
                updated += 1
            else:
                skipped += 1
        else:
            # Add new record or overwrite existing
            db.add_file(
                record['primary_folder'],
                record['sub_folder'],
                record['file_name'],
                record['status'],
                record['php8_rewritten'],
                record['tested'],
                record['links_scanned'],
                1,  # file_exists
                0,  # checked
                0,  # key_file
                0,  # isLive
                record['notes']
            )
            imported += 1
    
    print(f"✅ Import complete:")
    print(f"   - New records imported: {imported}")
    print(f"   - Existing records updated: {updated}")
    print(f"   - Records skipped: {skipped}")

def backup_existing_files(backup_dir: str = '../Backups') -> str:
    """
    Create backup of existing markdown files before migration.
    
    Args:
        backup_dir: Directory to store backups
    
    Returns:
        Path to backup file
    """
    import zipfile
    from datetime import datetime
    
    if not os.path.exists(backup_dir):
        os.makedirs(backup_dir)
    
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup_file = os.path.join(backup_dir, f'markdown_backup_{timestamp}.zip')
    
    files_to_backup = [
        '../Progress/progress.md',
        '../Progress/TO_DO.md'
    ]
    
    with zipfile.ZipFile(backup_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file_path in files_to_backup:
            if os.path.exists(file_path):
                zipf.write(file_path, os.path.basename(file_path))
    
    print(f"📦 Created backup: {backup_file}")
    return backup_file

def main():
    """Main migration function."""
    import argparse
    
    parser = argparse.ArgumentParser(description='Migrate data from markdown files to database')
    parser.add_argument('--progress-file', default='../Progress/progress.md',
                       help='Path to progress.md file')
    parser.add_argument('--todo-file', default='../Progress/TO_DO.md',
                       help='Path to TO_DO.md file')
    parser.add_argument('--overwrite', action='store_true',
                       help='Overwrite existing database records')
    parser.add_argument('--backup', action='store_true', default=True,
                       help='Create backup of markdown files (default: True)')
    parser.add_argument('--dry-run', action='store_true',
                       help='Show what would be imported without updating database')
    
    args = parser.parse_args()
    
    print("📊 PHP Migration Data Migration")
    print("=" * 40)
    
    # Create backup if requested
    if args.backup and not args.dry_run:
        backup_existing_files()
    
    # Parse markdown files
    progress_records = parse_progress_md(args.progress_file)
    todo_paths = parse_todo_md(args.todo_file)
    todo_records = convert_todo_paths_to_records(todo_paths)
    
    # Merge records
    merged_records = merge_records(progress_records, todo_records)
    
    if not merged_records:
        print("No records to import!")
        return
    
    # Show sample records
    print(f"\n📋 Sample records to import:")
    for i, record in enumerate(merged_records[:5]):
        path = f"{record['primary_folder']}/{record['sub_folder']}/{record['file_name']}"
        path = path.replace('//', '/').strip('/')
        flags = []
        if record['php8_rewritten']:
            flags.append('PHP8')
        if record['tested']:
            flags.append('Tested')
        if record['links_scanned']:
            flags.append('Links')
        flag_str = f" [{', '.join(flags)}]" if flags else ""
        print(f"   {i+1}. {path} ({record['status']}){flag_str}")
    
    if len(merged_records) > 5:
        print(f"   ... and {len(merged_records) - 5} more")
    
    # Import to database
    if args.dry_run:
        print(f"\n🔍 DRY RUN: Would import {len(merged_records)} records")
        print(f"   Overwrite existing: {args.overwrite}")
    else:
        import_records_to_database(merged_records, args.overwrite)
        
        # Show final statistics
        db = MigrationDB()
        stats = db.get_statistics()
        print(f"\n📊 Final Database Statistics:")
        print(f"   - Total files: {stats['total_files']}")
        print(f"   - By status: {stats.get('by_status', {})}")
        print(f"   - PHP8 completed: {stats['php8_completed']}")
        print(f"   - Tested: {stats['tested_completed']}")
        print(f"   - Links scanned: {stats['links_scanned']}")
    
    print("\n✅ Data migration complete!")

if __name__ == '__main__':
    main()