#!/usr/bin/env python3
"""
Inventory script for PHP migration tracking system.
Scans vmserver10/intranet directory and populates the database with file records.
"""

import os
import sys
from pathlib import Path
from typing import List, Tuple
from db_utils import MigrationDB

def scan_directory(base_path: str) -> List[Tuple[str, str, str]]:
    """
    Scan directory for PHP files and return list of (primary_folder, sub_folder, file_name) tuples.
    
    Args:
        base_path: Base directory to scan (e.g., '../vmserver10/intranet')
    
    Returns:
        List of tuples containing path components
    """
    php_files = []
    
    try:
        base_path = Path(base_path).resolve()
    except Exception as e:
        print(f"Error: Failed to resolve base path '{base_path}': {e}")
        return []
    
    if not base_path.exists():
        print(f"Error: Base path does not exist: {base_path}")
        return []
    
    print(f"Scanning directory: {base_path}")
    
    for root, dirs, files in os.walk(base_path):
        # Skip hidden directories and common non-source directories
        dirs[:] = [d for d in dirs if not d.startswith('.') and d.lower() not in ['backups', '__pycache__']]
        
        for file in files:
            if file.endswith('.php'):
                full_path = Path(root) / file
                rel_path = full_path.relative_to(base_path)
                
                # Parse path components
                parts = rel_path.parts
                
                if len(parts) == 1:
                    # File in root directory
                    primary_folder = ''
                    sub_folder = ''
                    file_name = parts[0]
                elif len(parts) == 2:
                    # File in primary folder
                    primary_folder = parts[0]
                    sub_folder = ''
                    file_name = parts[1]
                elif len(parts) >= 3:
                    # File in sub-folder or deeper
                    primary_folder = parts[0]
                    sub_folder = parts[1]
                    # Handle nested directories by joining remaining parts
                    file_name = '/'.join(parts[2:])
                else:
                    continue
                
                php_files.append((primary_folder, sub_folder, file_name))
    
    return php_files

def update_database(php_files: List[Tuple[str, str, str]], preserve_existing: bool = True) -> None:
    """
    Update database with discovered PHP files.
    
    Args:
        php_files: List of (primary_folder, sub_folder, file_name) tuples
        preserve_existing: If True, preserve existing status and flags for known files
    """
    if php_files is None:
        print("Error: php_files is None - cannot update database")
        return
    
    if not isinstance(php_files, list):
        print(f"Error: php_files is not a list, got {type(php_files)}")
        return
    
    db = MigrationDB()
    
    print(f"Updating database with {len(php_files)} PHP files...")
    
    new_files = 0
    updated_files = 0
    
    for primary_folder, sub_folder, file_name in php_files:
        # Check if file already exists
        existing_file = db.get_file_by_path(primary_folder, sub_folder, file_name)
        
        if existing_file:
            if preserve_existing:
                # File exists, just update timestamp
                db.update_file_flags(existing_file['id'], file_exists=1)
                updated_files += 1
            else:
                # Reset to new status (include file_exists=1)
                db.add_file(primary_folder, sub_folder, file_name, 'new', 0, 0, 0, 1, 0, 0, 0, '')
                updated_files += 1
        else:
            # New file, add with default status (include file_exists=1)
            db.add_file(primary_folder, sub_folder, file_name, 'new', 0, 0, 0, 1, 0, 0, 0, '')
            new_files += 1
    
    print(f"✅ Database updated:")
    print(f"   - New files added: {new_files}")
    print(f"   - Existing files updated: {updated_files}")
    
    # Show statistics
    stats = db.get_statistics()
    print(f"\n📊 Database Statistics:")
    print(f"   - Total files: {stats['total_files']}")
    print(f"   - By status: {stats.get('by_status', {})}")
    print(f"   - PHP8 completed: {stats['php8_completed']}")
    print(f"   - Tested: {stats['tested_completed']}")
    print(f"   - Links scanned: {stats['links_scanned']}")

def verify_files(php_files: List[Tuple[str, str, str]], base_path: str = '../vmserver10/intranet') -> None:
    """
    Verify that discovered files actually exist on filesystem.
    
    Args:
        php_files: List of file tuples to verify
        base_path: Base directory path
    """
    print(f"\n🔍 Verifying {len(php_files)} files...")
    
    missing_files = []
    base_path = Path(base_path).resolve()
    
    for primary_folder, sub_folder, file_name in php_files:
        # Construct full path
        path_parts = []
        if primary_folder:
            path_parts.append(primary_folder)
        if sub_folder:
            path_parts.append(sub_folder)
        path_parts.append(file_name)
        
        full_path = base_path / Path(*path_parts)
        
        if not full_path.exists():
            missing_files.append((primary_folder, sub_folder, file_name))
    
    if missing_files:
        print(f"⚠️  Warning: {len(missing_files)} files not found on filesystem:")
        for primary, sub, name in missing_files[:10]:  # Show first 10
            print(f"   - {primary}/{sub}/{name}")
        if len(missing_files) > 10:
            print(f"   ... and {len(missing_files) - 10} more")
    else:
        print("✅ All files verified on filesystem")

def main():
    """Main inventory function."""
    import argparse
    
    parser = argparse.ArgumentParser(description='Scan PHP files and update migration database')
    parser.add_argument('--base-path', default='../vmserver10/intranet', 
                       help='Base directory to scan (default: ../vmserver10/intranet)')
    parser.add_argument('--reset', action='store_true', 
                       help='Reset existing files to new status (default: preserve existing)')
    parser.add_argument('--verify', action='store_true', 
                       help='Verify files exist on filesystem')
    parser.add_argument('--dry-run', action='store_true', 
                       help='Show what would be done without updating database')
    
    args = parser.parse_args()
    
    print("🔍 PHP Migration Inventory Scanner")
    print("=" * 40)
    
    # Scan for PHP files
    php_files = scan_directory(args.base_path)
    
    if not php_files:
        print("No PHP files found!")
        return
    
    print(f"Found {len(php_files)} PHP files")
    
    # Show sample files
    print(f"\n📁 Sample files found:")
    for i, (primary, sub, name) in enumerate(php_files[:5]):
        path = f"{primary}/{sub}/{name}" if sub else f"{primary}/{name}" if primary else name
        print(f"   {i+1}. {path}")
    if len(php_files) > 5:
        print(f"   ... and {len(php_files) - 5} more")
    
    # Verify files if requested
    if args.verify:
        verify_files(php_files, args.base_path)
    
    # Update database unless dry run
    if args.dry_run:
        print(f"\n🔍 DRY RUN: Would update database with {len(php_files)} files")
        print(f"   Preserve existing: {not args.reset}")
    else:
        update_database(php_files, preserve_existing=not args.reset)
    
    print("\n✅ Inventory scan complete!")

if __name__ == '__main__':
    main()