#!/usr/bin/env python3
"""
IsLive Verification Tool for PHP Migration System

This standalone script verifies which files in the migration database
are present in the live system by comparing MD5 hashes.

Usage: python3 verify_islive.py
"""

import os
import hashlib
import sqlite3
from datetime import datetime
from typing import List, Dict, Optional
from pathlib import Path


class IsLiveVerifier:
    """Verifies IsLive status by comparing files with live system."""

    def __init__(self, db_path: str = None, live_base_path: str = None):
        """Initialize the verifier.

        Args:
            db_path: Path to migration database (default: migration.db in script dir)
            live_base_path: Path to live files directory (default: ../live_files_readonly)
        """
        if db_path is None:
            script_dir = Path(__file__).parent
            db_path = script_dir / 'migration.db'

        if live_base_path is None:
            script_dir = Path(__file__).parent
            live_base_path = script_dir.parent / 'live_files_readonly'

        self.db_path = Path(db_path)
        self.live_base_path = Path(live_base_path)

        if not self.db_path.exists():
            raise FileNotFoundError(f"Database not found: {self.db_path}")

        if not self.live_base_path.exists():
            raise FileNotFoundError(f"Live files directory not found: {self.live_base_path}")

    def get_connection(self) -> sqlite3.Connection:
        """Get database connection."""
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
        return conn

    def get_qualifying_files(self) -> List[Dict]:
        """Get files that qualify for IsLive verification.

        Criteria:
        - Status is NOT 'archive' or 'remove'
        - File exists (file_exists = 1)
        - IsLive is NOT ticked (isLive = 0)
        """
        with self.get_connection() as conn:
            cursor = conn.execute('''
                SELECT * FROM files
                WHERE status NOT IN ('archive', 'remove')
                AND file_exists = 1
                AND isLive = 0
                ORDER BY primary_folder, sub_folder, file_name
            ''')
            return [dict(row) for row in cursor.fetchall()]

    def calculate_md5(self, file_path: Path) -> Optional[str]:
        """Calculate MD5 hash of a file.

        Args:
            file_path: Path to the file

        Returns:
            MD5 hash as hex string, or None if file not found or error
        """
        try:
            if not file_path.exists():
                return None

            with open(file_path, 'rb') as f:
                hash_md5 = hashlib.md5()
                for chunk in iter(lambda: f.read(4096), b""):
                    hash_md5.update(chunk)
                return hash_md5.hexdigest()

        except Exception as e:
            print(f"Error calculating MD5 for {file_path}: {e}")
            return None

    def get_live_file_path(self, file_record: Dict) -> Path:
        """Get the corresponding path in the live files directory.

        Args:
            file_record: File record from database

        Returns:
            Path to the file in live_files_readonly directory
        """
        # Build relative path from file record
        parts = []
        if file_record['primary_folder']:
            parts.append(file_record['primary_folder'])
        if file_record['sub_folder']:
            parts.append(file_record['sub_folder'])
        parts.append(file_record['file_name'])

        rel_path = '/'.join(parts)
        return self.live_base_path / rel_path

    def verify_file_is_live(self, file_record: Dict) -> bool:
        """Check if a file exists in the live system with matching MD5.

        Args:
            file_record: File record from database

        Returns:
            True if file exists in live system with matching MD5
        """
        # Get paths
        vmserver_path = self.get_vmserver_path(file_record)
        live_path = self.get_live_file_path(file_record)

        # Calculate MD5s
        vmserver_md5 = self.calculate_md5(vmserver_path)
        live_md5 = self.calculate_md5(live_path)

        # Check if both exist and match
        if vmserver_md5 is None:
            print(f"Warning: vmserver10 file not found: {vmserver_path}")
            return False

        if live_md5 is None:
            # Live file doesn't exist
            return False

        # Compare MD5s
        return vmserver_md5 == live_md5

    def get_vmserver_path(self, file_record: Dict) -> Path:
        """Get the path to the file in vmserver10 directory.

        Args:
            file_record: File record from database

        Returns:
            Path to the file in vmserver10/intranet directory
        """
        script_dir = Path(__file__).parent
        vmserver_base = script_dir.parent / 'vmserver10' / 'intranet'

        # Build relative path
        parts = []
        if file_record['primary_folder']:
            parts.append(file_record['primary_folder'])
        if file_record['sub_folder']:
            parts.append(file_record['sub_folder'])
        parts.append(file_record['file_name'])

        rel_path = '/'.join(parts)
        return vmserver_base / rel_path

    def update_is_live_status(self, file_id: int, is_live: bool) -> bool:
        """Update the IsLive status for a file.

        Args:
            file_id: Database ID of the file
            is_live: New IsLive status

        Returns:
            True if update was successful
        """
        with self.get_connection() as conn:
            cursor = conn.execute('''
                UPDATE files SET isLive = ?, last_updated = ? WHERE id = ?
            ''', (int(is_live), datetime.now().isoformat(), file_id))
            conn.commit()
            return cursor.rowcount > 0

    def run_verification(self, dry_run: bool = False) -> Dict[str, int]:
        """Run the IsLive verification process.

        Args:
            dry_run: If True, don't update database, just report what would be done

        Returns:
            Statistics dictionary
        """
        print("Starting IsLive verification...")
        print(f"Database: {self.db_path}")
        print(f"Live files: {self.live_base_path}")
        print(f"Dry run: {dry_run}")
        print()

        # Get qualifying files
        qualifying_files = self.get_qualifying_files()
        print(f"Found {len(qualifying_files)} qualifying files to check")

        if not qualifying_files:
            return {'total_checked': 0, 'marked_live': 0, 'errors': 0}

        stats = {'total_checked': len(qualifying_files), 'marked_live': 0, 'errors': 0}

        # Process each file
        for i, file_record in enumerate(qualifying_files, 1):
            file_path = self.get_file_path(file_record)

            print(f"[{i}/{len(qualifying_files)}] Checking: {file_path}")

            try:
                is_live = self.verify_file_is_live(file_record)

                if is_live:
                    print(f"  ✓ MATCH: File is live")
                    stats['marked_live'] += 1

                    if not dry_run:
                        success = self.update_is_live_status(file_record['id'], True)
                        if not success:
                            print(f"  ⚠ Warning: Failed to update database for {file_path}")
                            stats['errors'] += 1
                    else:
                        print(f"  (Dry run: would mark as live)")
                else:
                    print(f"  ✗ NO MATCH: File not live or MD5 mismatch")

            except Exception as e:
                print(f"  ⚠ Error checking {file_path}: {e}")
                stats['errors'] += 1

        print()
        print("Verification complete!")
        print(f"Files checked: {stats['total_checked']}")
        print(f"Marked as live: {stats['marked_live']}")
        if stats['errors'] > 0:
            print(f"Errors: {stats['errors']}")

        return stats

    def get_file_path(self, file_record: Dict) -> str:
        """Get display path for a file record."""
        parts = []
        if file_record['primary_folder']:
            parts.append(file_record['primary_folder'])
        if file_record['sub_folder']:
            parts.append(file_record['sub_folder'])
        parts.append(file_record['file_name'])
        return '/'.join(parts)


def main():
    """Main function for command-line usage."""
    import argparse

    parser = argparse.ArgumentParser(description='Verify IsLive status by comparing MD5 hashes with live system')
    parser.add_argument('--db', help='Path to migration database')
    parser.add_argument('--live-dir', help='Path to live files directory')
    parser.add_argument('--dry-run', action='store_true', help='Show what would be done without updating database')

    args = parser.parse_args()

    try:
        verifier = IsLiveVerifier(db_path=args.db, live_base_path=args.live_dir)
        stats = verifier.run_verification(dry_run=args.dry_run)

        # Exit with error code if there were errors
        if stats.get('errors', 0) > 0:
            exit(1)

    except Exception as e:
        print(f"Error: {e}")
        exit(1)


if __name__ == '__main__':
    main()