#!/usr/bin/env python3
"""
Report generator for PHP migration tracking system.
Generates markdown and CSV reports from the database.
"""

import os
import csv
from datetime import datetime
from typing import List, Dict, Any, Optional
from db_utils import MigrationDB

class ReportGenerator:
    """Generate various reports from the migration database."""
    
    def __init__(self, output_dir: str = '../Progress'):
        self.db = MigrationDB()
        self.output_dir = output_dir
        
        # Ensure output directory exists
        os.makedirs(output_dir, exist_ok=True)
    
    def generate_progress_markdown(self, filename: str = None) -> str:
        """
        Generate progress report in markdown format compatible with existing format.
        
        Args:
            filename: Output filename (default: progress_YYYYMMDD_HHMMSS.md)
        
        Returns:
            Path to generated file
        """
        if filename is None:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            filename = f'progress_{timestamp}.md'
        
        output_path = os.path.join(self.output_dir, filename)
        
        # Get all files ordered by path
        files = self.db.get_all_files('primary_folder, sub_folder, file_name')
        
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write("Confirmed Working in PHP8\n\n\n")
            f.write("| Primary Folder | Folder | File | php8 rewritten | Confirmed Working | php links from page added to todo list\n")
            f.write("|  |  |  |  |  | \n")
            
            for file_record in files:
                primary = file_record['primary_folder'] or ''
                sub = file_record['sub_folder'] or ''
                name = file_record['file_name']
                php8 = 'yes' if file_record['php8_rewritten'] else 'no'
                tested = 'Yes' if file_record['tested'] else 'No'
                links = 'Yes' if file_record['links_scanned'] else 'No'
                
                f.write(f"| {primary} | {sub} | {name} | {php8} | {tested} | {links}\n")
            
            f.write("\n\n")
        
        return output_path
    
    def generate_summary_markdown(self, filename: str = None) -> str:
        """
        Generate summary report with statistics and progress overview.
        
        Args:
            filename: Output filename (default: summary_YYYYMMDD_HHMMSS.md)
        
        Returns:
            Path to generated file
        """
        if filename is None:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            filename = f'summary_{timestamp}.md'
        
        output_path = os.path.join(self.output_dir, filename)
        stats = self.db.get_statistics()
        
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write("# PHP Migration Progress Summary\n\n")
            f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            
            # Overall statistics
            f.write("## 📊 Overall Statistics\n\n")
            f.write(f"- **Total Files**: {stats['total_files']}\n")
            f.write(f"- **PHP8 Completed**: {stats['php8_completed']} ({stats['php8_completed']/stats['total_files']*100:.1f}%)\n")
            f.write(f"- **Tested**: {stats['tested_completed']} ({stats['tested_completed']/stats['total_files']*100:.1f}%)\n")
            f.write(f"- **Links Scanned**: {stats['links_scanned']} ({stats['links_scanned']/stats['total_files']*100:.1f}%)\n")
            f.write(f"- **Total Dependencies**: {stats['total_links']}\n\n")
            
            # Status breakdown
            f.write("## 📋 Files by Status\n\n")
            by_status = stats.get('by_status', {})
            for status, count in by_status.items():
                percentage = count / stats['total_files'] * 100
                f.write(f"- **{status.title()}**: {count} ({percentage:.1f}%)\n")
            f.write("\n")
            
            # Progress by folder
            f.write("## 📁 Progress by Primary Folder\n\n")
            folder_stats = self._get_folder_statistics()
            
            f.write("| Folder | Total | PHP8 Done | Tested | Links Scanned | Progress |\n")
            f.write("|--------|-------|-----------|--------|---------------|----------|\n")
            
            for folder, data in folder_stats.items():
                folder_name = folder if folder else '(root)'
                total = data['total']
                php8_done = data['php8_completed']
                tested = data['tested_completed']
                links = data['links_scanned']
                
                # Calculate overall progress (average of three metrics)
                progress = (php8_done + tested + links) / (total * 3) * 100
                progress_bar = self._create_progress_bar(progress)
                
                f.write(f"| {folder_name} | {total} | {php8_done} | {tested} | {links} | {progress_bar} {progress:.1f}% |\n")
            
            f.write("\n")
            
            # Files needing attention
            f.write("## ⚠️ Files Needing Attention\n\n")
            
            # Files needing PHP8 work
            needing_work = self.db.get_files_needing_work()
            if needing_work:
                f.write("### PHP8 Rewrite or Testing Needed\n\n")
                for file_record in needing_work[:20]:  # Show first 20
                    path = self.db.get_file_path(file_record)
                    issues = []
                    if not file_record['php8_rewritten']:
                        issues.append('PHP8 rewrite')
                    if not file_record['tested']:
                        issues.append('testing')
                    f.write(f"- `{path}` - needs {', '.join(issues)}\n")
                
                if len(needing_work) > 20:
                    f.write(f"\n... and {len(needing_work) - 20} more files\n")
                f.write("\n")
            
            # Files needing link scanning
            needing_links = self.db.get_files_needing_links_scan()
            if needing_links:
                f.write("### Link Scanning Needed\n\n")
                for file_record in needing_links[:10]:  # Show first 10
                    path = self.db.get_file_path(file_record)
                    f.write(f"- `{path}`\n")
                
                if len(needing_links) > 10:
                    f.write(f"\n... and {len(needing_links) - 10} more files\n")
                f.write("\n")
            
            # Recent updates
            f.write("## 🕒 Recent Updates\n\n")
            recent_files = self._get_recently_updated_files(10)
            if recent_files:
                for file_record in recent_files:
                    path = self.db.get_file_path(file_record)
                    updated = file_record['last_updated']
                    status_icons = []
                    if file_record['php8_rewritten']:
                        status_icons.append('✅ PHP8')
                    if file_record['tested']:
                        status_icons.append('✅ Tested')
                    if file_record['links_scanned']:
                        status_icons.append('🔗 Links')
                    
                    status_str = ' '.join(status_icons) if status_icons else '⏳ Pending'
                    f.write(f"- `{path}` - {status_str} (updated: {updated})\n")
            else:
                f.write("No recent updates found.\n")
            
            f.write("\n")
        
        return output_path
    
    def generate_csv_report(self, filename: str = None, include_links: bool = False) -> str:
        """
        Generate CSV report with all file data.
        
        Args:
            filename: Output filename (default: migration_data_YYYYMMDD_HHMMSS.csv)
            include_links: Include dependency information
        
        Returns:
            Path to generated file
        """
        if filename is None:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            filename = f'migration_data_{timestamp}.csv'
        
        output_path = os.path.join(self.output_dir, filename)
        files = self.db.get_all_files('primary_folder, sub_folder, file_name')
        
        fieldnames = [
            'primary_folder', 'sub_folder', 'file_name', 'full_path',
            'status', 'php8_rewritten', 'tested', 'links_scanned',
            'notes', 'last_updated'
        ]
        
        if include_links:
            fieldnames.extend(['dependencies_count', 'dependencies'])
        
        with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            
            for file_record in files:
                row = {
                    'primary_folder': file_record['primary_folder'] or '',
                    'sub_folder': file_record['sub_folder'] or '',
                    'file_name': file_record['file_name'],
                    'full_path': self.db.get_file_path(file_record),
                    'status': file_record['status'],
                    'php8_rewritten': 'Yes' if file_record['php8_rewritten'] else 'No',
                    'tested': 'Yes' if file_record['tested'] else 'No',
                    'links_scanned': 'Yes' if file_record['links_scanned'] else 'No',
                    'notes': file_record['notes'] or '',
                    'last_updated': file_record['last_updated']
                }
                
                if include_links:
                    links = self.db.get_links_for_file(file_record['id'])
                    row['dependencies_count'] = len(links)
                    row['dependencies'] = '; '.join([link['target_path'] for link in links])
                
                writer.writerow(row)
        
        return output_path
    
    def generate_dependency_report(self, filename: str = None) -> str:
        """
        Generate detailed dependency report.
        
        Args:
            filename: Output filename (default: dependencies_YYYYMMDD_HHMMSS.md)
        
        Returns:
            Path to generated file
        """
        if filename is None:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            filename = f'dependencies_{timestamp}.md'
        
        output_path = os.path.join(self.output_dir, filename)
        
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write("# PHP Migration Dependency Report\n\n")
            f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            
            # Get files with dependencies
            files_with_links = []
            all_files = self.db.get_all_files()
            
            for file_record in all_files:
                links = self.db.get_links_for_file(file_record['id'])
                if links:
                    files_with_links.append((file_record, links))
            
            f.write(f"## 📊 Dependency Overview\n\n")
            f.write(f"- Files with dependencies: {len(files_with_links)}\n")
            f.write(f"- Files without dependencies: {len(all_files) - len(files_with_links)}\n\n")
            
            # Most connected files
            f.write("## 🔗 Most Connected Files\n\n")
            files_with_links.sort(key=lambda x: len(x[1]), reverse=True)
            
            f.write("| File | Dependencies | Status |\n")
            f.write("|------|--------------|--------|\n")
            
            for file_record, links in files_with_links[:20]:
                path = self.db.get_file_path(file_record)
                dep_count = len(links)
                status_icons = []
                if file_record['php8_rewritten']:
                    status_icons.append('✅')
                if file_record['tested']:
                    status_icons.append('🧪')
                if file_record['links_scanned']:
                    status_icons.append('🔗')
                
                status = ' '.join(status_icons) if status_icons else '⏳'
                f.write(f"| `{path}` | {dep_count} | {status} |\n")
            
            f.write("\n")
            
            # Missing dependencies
            f.write("## ❌ Missing Dependencies\n\n")
            missing_deps = self._find_missing_dependencies()
            
            if missing_deps:
                f.write("Files that are referenced but not found in the database:\n\n")
                for dep_path, referencing_files in missing_deps.items():
                    f.write(f"### `{dep_path}`\n")
                    f.write("Referenced by:\n")
                    for ref_file in referencing_files:
                        f.write(f"- `{ref_file}`\n")
                    f.write("\n")
            else:
                f.write("✅ No missing dependencies found!\n\n")
        
        return output_path
    
    def _get_folder_statistics(self) -> Dict[str, Dict[str, int]]:
        """Get statistics grouped by primary folder."""
        folder_stats = {}
        
        all_files = self.db.get_all_files()
        
        for file_record in all_files:
            folder = file_record['primary_folder'] or ''
            
            if folder not in folder_stats:
                folder_stats[folder] = {
                    'total': 0,
                    'php8_completed': 0,
                    'tested_completed': 0,
                    'links_scanned': 0
                }
            
            folder_stats[folder]['total'] += 1
            if file_record['php8_rewritten']:
                folder_stats[folder]['php8_completed'] += 1
            if file_record['tested']:
                folder_stats[folder]['tested_completed'] += 1
            if file_record['links_scanned']:
                folder_stats[folder]['links_scanned'] += 1
        
        return folder_stats
    
    def _get_recently_updated_files(self, limit: int = 10) -> List[Dict]:
        """Get recently updated files."""
        with self.db.get_connection() as conn:
            cursor = conn.execute('''
                SELECT * FROM files 
                ORDER BY last_updated DESC 
                LIMIT ?
            ''', (limit,))
            return [dict(row) for row in cursor.fetchall()]
    
    def _find_missing_dependencies(self) -> Dict[str, List[str]]:
        """Find dependencies that are referenced but not in database."""
        missing_deps = {}
        
        # Get all links
        with self.db.get_connection() as conn:
            cursor = conn.execute('''
                SELECT l.target_path, f.primary_folder, f.sub_folder, f.file_name
                FROM links l
                JOIN files f ON l.source_file_id = f.id
            ''')
            
            for row in cursor.fetchall():
                target_path = row['target_path']
                source_path = self.db.get_file_path({
                    'primary_folder': row['primary_folder'],
                    'sub_folder': row['sub_folder'],
                    'file_name': row['file_name']
                })
                
                # Check if target exists in database
                if target_path.startswith('/intranet/'):
                    rel_path = target_path[10:]
                    parts = rel_path.split('/')
                    
                    if len(parts) == 1:
                        primary, sub, name = '', '', parts[0]
                    elif len(parts) == 2:
                        primary, sub, name = parts[0], '', parts[1]
                    else:
                        primary, sub, name = parts[0], parts[1], '/'.join(parts[2:])
                    
                    target_file = self.db.get_file_by_path(primary, sub, name)
                    
                    if not target_file:
                        if target_path not in missing_deps:
                            missing_deps[target_path] = []
                        missing_deps[target_path].append(source_path)
        
        return missing_deps
    
    def _create_progress_bar(self, percentage: float, width: int = 20) -> str:
        """Create a text-based progress bar."""
        filled = int(width * percentage / 100)
        bar = '█' * filled + '░' * (width - filled)
        return f"[{bar}]"

def main():
    """Main reporting function."""
    import argparse
    
    parser = argparse.ArgumentParser(description='Generate migration reports')
    parser.add_argument('--output-dir', default='../Progress', help='Output directory for reports')
    parser.add_argument('--format', choices=['markdown', 'csv', 'summary', 'dependencies', 'all'], 
                       default='summary', help='Report format to generate')
    parser.add_argument('--filename', help='Custom output filename')
    parser.add_argument('--include-links', action='store_true', help='Include dependency data in CSV')
    
    args = parser.parse_args()
    
    print("📊 PHP Migration Report Generator")
    print("=" * 40)
    
    generator = ReportGenerator(args.output_dir)
    generated_files = []
    
    if args.format == 'markdown' or args.format == 'all':
        output_file = generator.generate_progress_markdown(args.filename)
        generated_files.append(output_file)
        print(f"✅ Generated progress markdown: {output_file}")
    
    if args.format == 'csv' or args.format == 'all':
        output_file = generator.generate_csv_report(args.filename, args.include_links)
        generated_files.append(output_file)
        print(f"✅ Generated CSV report: {output_file}")
    
    if args.format == 'summary' or args.format == 'all':
        output_file = generator.generate_summary_markdown(args.filename)
        generated_files.append(output_file)
        print(f"✅ Generated summary report: {output_file}")
    
    if args.format == 'dependencies' or args.format == 'all':
        output_file = generator.generate_dependency_report(args.filename)
        generated_files.append(output_file)
        print(f"✅ Generated dependency report: {output_file}")
    
    # Show database statistics
    stats = generator.db.get_statistics()
    print(f"\n📊 Current Database Statistics:")
    print(f"   Total files: {stats['total_files']}")
    print(f"   PHP8 completed: {stats['php8_completed']} ({stats['php8_completed']/stats['total_files']*100:.1f}%)")
    print(f"   Tested: {stats['tested_completed']} ({stats['tested_completed']/stats['total_files']*100:.1f}%)")
    print(f"   Links scanned: {stats['links_scanned']} ({stats['links_scanned']/stats['total_files']*100:.1f}%)")
    
    print(f"\n✅ Report generation complete! Generated {len(generated_files)} files.")

if __name__ == '__main__':
    main()