import { pool } from '../db.js';
import { readFileSync, createReadStream } from 'fs';
import { createGunzip } from 'zlib';
import { pipeline } from 'stream/promises';
import { Writable } from 'stream';
import { dirname, join } from 'path';
import { fileURLToPath } from 'url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const SQL_DIR = join(__dirname, '..', '..', 'sql');

/**
 * Table name mapping from SQL dumps to our schema
 */
const TABLE_MAPPING = {
    'public.regions': 'public.continents',
    'regions': 'public.continents',
    'public.subregions': 'public.subregions',
};

/**
 * Transform subregions INSERT statements to match migration schema
 * SQL dump: INSERT INTO subregions VALUES (id, name, translations, region_id, created_at, updated_at, flag, wikiDataId)
 * Migration: id, name, continent_id, translations, created_at, updated_at, flag, wiki_data_id
 */
function transformSubregionsInsert(stmt) {
    // Extract VALUES from the INSERT statement
    const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
    if (!valuesMatch) return stmt;

    const valuesStr = valuesMatch[1];

    // Parse values carefully (handling JSON with commas inside)
    const values = [];
    let current = '';
    let depth = 0;
    let inString = false;
    let stringChar = '';

    for (let i = 0; i < valuesStr.length; i++) {
        const char = valuesStr[i];
        const prevChar = i > 0 ? valuesStr[i - 1] : '';

        if (!inString && (char === "'" || char === '"')) {
            inString = true;
            stringChar = char;
        } else if (inString && char === stringChar && prevChar !== '\\') {
            // Check for escaped quotes (doubled)
            if (valuesStr[i + 1] === stringChar) {
                current += char;
                i++; // Skip next quote
            } else {
                inString = false;
            }
        }

        if (!inString) {
            if (char === '(' || char === '{' || char === '[') depth++;
            if (char === ')' || char === '}' || char === ']') depth--;

            if (char === ',' && depth === 0) {
                values.push(current.trim());
                current = '';
                continue;
            }
        }

        current += char;
    }
    if (current.trim()) values.push(current.trim());

    // Reorder: [id, name, translations, region_id, created, updated, flag, wikiDataId]
    // To:      [id, name, continent_id, translations, created, updated, flag, wiki_data_id]
    if (values.length >= 8) {
        const [id, name, translations, region_id, created, updated, flag, wikiDataId] = values;
        const reordered = [id, name, region_id, translations, created, updated, flag, wikiDataId];
        return `INSERT INTO subregions (id, name, continent_id, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${reordered.join(', ')});`;
    }

    return stmt;
}

/**
 * Transform countries INSERT statements to match migration schema
 * SQL dump columns (32): id, name, iso3, numeric_code, iso2, phonecode, capital, currency, currency_name, 
 *   currency_symbol, tld, native, population, gdp, region, region_id, subregion, subregion_id, nationality, 
 *   area_sq_km, postal_code_format, postal_code_regex, timezones, translations, latitude, longitude, 
 *   emoji, emojiU, created_at, updated_at, flag, wikiDataId
 * Migration columns (25): id, name, iso2, iso3, numeric_code, phonecode, capital, currency, currency_name, 
 *   currency_symbol, tld, native, continent_id, subregion_id, nationality, latitude, longitude, 
 *   emoji, emoji_u, timezones, translations, created_at, updated_at, flag, wiki_data_id
 */
function transformCountriesInsert(stmt) {
    const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
    if (!valuesMatch) return stmt;

    const valuesStr = valuesMatch[1];
    const values = parseValues(valuesStr);

    if (values.length >= 32) {
        // Extract the 32 columns from dump
        const [id, name, iso3, numeric_code, iso2, phonecode, capital, currency, currency_name,
            currency_symbol, tld, native, population, gdp, region, region_id, subregion,
            subregion_id, nationality, area_sq_km, postal_code_format, postal_code_regex,
            timezones, translations, latitude, longitude, emoji, emojiU, created_at,
            updated_at, flag, wikiDataId] = values;

        // Map to our 25-column schema (region_id becomes continent_id)
        const mapped = [id, name, iso2, iso3, numeric_code, phonecode, capital, currency,
            currency_name, currency_symbol, tld, native, region_id, subregion_id,
            nationality, latitude, longitude, emoji, emojiU, timezones, translations,
            created_at, updated_at, flag, wikiDataId];

        return `INSERT INTO countries (id, name, iso2, iso3, numeric_code, phonecode, capital, currency, currency_name, currency_symbol, tld, native, continent_id, subregion_id, nationality, latitude, longitude, emoji, emoji_u, timezones, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`;
    }

    return stmt;
}

/**
 * Transform states INSERT statements to match migration schema
 * The SQL dump has 20 columns, migration has 15 columns.
 * Instead of parsing VALUES, we add explicit column list and use a subselect to remap.
 * This avoids issues with complex JSON/Unicode parsing.
 */
function transformStatesInsert(stmt) {
    // Just return the original statement - we need to handle this at the table level
    // by adding a view or adjusting the schema
    // For now, let's try a different approach: extract each value position individually

    const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
    if (!valuesMatch) return stmt;

    const valuesStr = valuesMatch[1];

    // Use regex to find value positions based on comma counting outside quotes
    const values = parseValues(valuesStr);

    // Debug: Log if we have issues
    if (values.length !== 20) {
        console.log(`   ⚠️ Expected 20 columns for states, got ${values.length}`);
        return stmt;
    }

    // Dump indices: 0:id, 1:name, 2:country_id, 3:country_code, 4:fips_code, 5:iso2, 
    // 6:iso3166_2, 7:type, 8:level, 9:parent_id, 10:native, 11:latitude, 12:longitude, 
    // 13:timezone, 14:translations, 15:created_at, 16:updated_at, 17:flag, 18:wikiDataId, 19:population
    // 
    // Migration needs: id, name, country_id, country_code, iso2, fips_code, type, latitude, 
    // longitude, timezone, translations, created_at, updated_at, flag, wiki_data_id
    // 
    // Mapping: 0, 1, 2, 3, 5, 4, 7, 11, 12, 13, 14, 15, 16, 17, 18

    const mapped = [
        values[0],  // id
        values[1],  // name
        values[2],  // country_id
        values[3],  // country_code
        values[5],  // iso2
        values[4],  // fips_code
        values[7],  // type
        values[11], // latitude
        values[12], // longitude
        values[13], // timezone
        values[14], // translations
        values[15], // created_at
        values[16], // updated_at
        values[17], // flag
        values[18]  // wikiDataId -> wiki_data_id
    ];

    return `INSERT INTO states (id, name, country_id, country_code, iso2, fips_code, type, latitude, longitude, timezone, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`;
}

/**
 * Transform cities INSERT statements to match migration schema
 * SQL dump columns (19): id, name, state_id, state_code, country_id, country_code, type, level, parent_id,
 *   latitude, longitude, native, population, timezone, translations, created_at, updated_at, flag, wikiDataId
 * Migration columns (15): id, name, state_id, state_code, country_id, country_code, latitude, longitude,
 *   population, timezone, translations, created_at, updated_at, flag, wiki_data_id
 */
function transformCitiesInsert(stmt) {
    const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
    if (!valuesMatch) return stmt;

    const valuesStr = valuesMatch[1];
    const values = parseValues(valuesStr);

    if (values.length !== 19) {
        // Skip transformation if column count doesn't match expected
        return stmt;
    }

    // Dump indices: 0:id, 1:name, 2:state_id, 3:state_code, 4:country_id, 5:country_code, 
    // 6:type, 7:level, 8:parent_id, 9:latitude, 10:longitude, 11:native, 12:population, 
    // 13:timezone, 14:translations, 15:created_at, 16:updated_at, 17:flag, 18:wikiDataId
    // 
    // Migration needs: id, name, state_id, state_code, country_id, country_code, latitude, 
    // longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id
    // 
    // Mapping: 0, 1, 2, 3, 4, 5, 9, 10, 12, 13, 14, 15, 16, 17, 18

    const mapped = [
        values[0],  // id
        values[1],  // name
        values[2],  // state_id
        values[3],  // state_code
        values[4],  // country_id
        values[5],  // country_code
        values[9],  // latitude
        values[10], // longitude
        values[12], // population
        values[13], // timezone
        values[14], // translations
        values[15], // created_at
        values[16], // updated_at
        values[17], // flag
        values[18]  // wikiDataId -> wiki_data_id
    ];

    return `INSERT INTO cities (id, name, state_id, state_code, country_id, country_code, latitude, longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`;
}

/**
 * Helper to parse VALUES string with proper handling of nested JSON and quoted strings
 * This handles SQL values like: 1, 'name', '{"br": "value"}', NULL
 */
function parseValues(valuesStr) {
    const values = [];
    let current = '';
    let depth = 0;
    let inString = false;
    let stringChar = '';

    for (let i = 0; i < valuesStr.length; i++) {
        const char = valuesStr[i];
        const prevChar = i > 0 ? valuesStr[i - 1] : '';

        // Only start a new string if we're not already in one
        if (!inString && (char === "'")) {
            inString = true;
            stringChar = char;
        } else if (inString && char === stringChar && prevChar !== '\\') {
            // Check for escaped quotes (doubled like '' in SQL)
            if (valuesStr[i + 1] === stringChar) {
                current += char;
                i++; // Skip next quote
            } else {
                inString = false;
            }
        }

        // Track depth for parentheses/brackets (only outside strings)
        if (!inString) {
            if (char === '(' || char === '{' || char === '[') depth++;
            if (char === ')' || char === '}' || char === ']') depth--;

            if (char === ',' && depth === 0) {
                values.push(current.trim());
                current = '';
                continue;
            }
        }

        current += char;
    }
    if (current.trim()) values.push(current.trim());

    return values;
}

/**
 * Execute a SQL file directly
 */
async function executeSqlFile(filename, tableName) {
    const filePath = join(SQL_DIR, filename);
    console.log(`   📄 Loading ${filename}...`);

    try {
        let sql = readFileSync(filePath, 'utf8');

        // Clean up postgres-specific commands that might cause issues
        // These need to match ONLY standalone commands, not content inside VALUES
        sql = sql
            .replace(/\\restrict[^\n]*/g, '')
            .replace(/\\unrestrict[^\n]*/g, '')
            .replace(/^SELECT pg_catalog\.setval[^;]*;/gm, '')
            .replace(/^ALTER TABLE[^;]*OWNER TO[^;]*;/gm, '')
            .replace(/^COMMENT ON[^;]*;/gm, '')
            .replace(/^SET [a-z_]+\s*=/gmi, (match) => '-- ' + match) // Comment out SET statements
            .replace(/^SET [a-z_]+;$/gmi, (match) => '-- ' + match)   // Comment out simple SET statements
            .replace(/^SELECT[^;]*set_config[^;]*;/gm, '');

        // Extract only INSERT statements
        const insertStatements = sql.match(/INSERT INTO[^;]+;/g) || [];

        if (insertStatements.length === 0) {
            console.log(`   ⚠️  No INSERT statements found in ${filename}`);
            return 0;
        }

        // Execute each INSERT statement
        for (const stmt of insertStatements) {
            // Convert MySQL column names to PostgreSQL (camelCase -> snake_case for some)
            let pgStmt = stmt
                .replace(/`/g, '"')
                .replace(/"emojiU"/g, 'emoji_u')
                .replace(/"wikiDataId"/g, 'wiki_data_id');

            // Apply table name mapping
            for (const [oldName, newName] of Object.entries(TABLE_MAPPING)) {
                pgStmt = pgStmt.replace(new RegExp(`INSERT INTO ${oldName}`, 'gi'), `INSERT INTO ${newName}`);
            }

            // Apply special transformations for subregions (column reordering)
            if (pgStmt.includes('INSERT INTO subregions')) {
                pgStmt = transformSubregionsInsert(pgStmt);
            }

            // Apply special transformations for countries (32 cols -> 25 cols)
            if (pgStmt.includes('INSERT INTO countries') || pgStmt.includes('INSERT INTO public.countries')) {
                pgStmt = transformCountriesInsert(pgStmt);
            }

            // Apply special transformations for states (20 cols -> 15 cols)
            if (pgStmt.includes('INSERT INTO states') || pgStmt.includes('INSERT INTO public.states')) {
                pgStmt = transformStatesInsert(pgStmt);
            }

            // prevent duplicate key errors
            if (pgStmt.trim().endsWith(';')) {
                pgStmt = pgStmt.trim().slice(0, -1) + ' ON CONFLICT DO NOTHING;';
            } else {
                pgStmt += ' ON CONFLICT DO NOTHING;';
            }

            await pool.query(pgStmt);
        }

        console.log(`   ✓ ${insertStatements.length} records inserted into ${tableName}`);
        return insertStatements.length;
    } catch (error) {
        console.error(`   ❌ Error loading ${filename}:`, error.message);
        throw error;
    }
}

/**
 * Execute a gzipped SQL file
 */
/**
 * Execute a gzipped SQL file using optimized bulk inserts
 */
async function executeGzippedSqlFile(filename, tableName) {
    const filePath = join(SQL_DIR, filename);
    console.log(`   📄 Loading ${filename} (gzipped)...`);

    try {
        // Read and decompress
        let sql = '';
        const gunzip = createGunzip();
        const readStream = createReadStream(filePath);

        await pipeline(
            readStream,
            gunzip,
            new Writable({
                write(chunk, encoding, callback) {
                    sql += chunk.toString();
                    callback();
                }
            })
        );

        // Clean up postgres-specific commands
        sql = sql
            .replace(/\\restrict[^\n]*/g, '')
            .replace(/\\unrestrict[^\n]*/g, '')
            .replace(/^SELECT pg_catalog\.setval[^;]*;/gm, '')
            .replace(/^ALTER TABLE[^;]*OWNER TO[^;]*;/gm, '')
            .replace(/^COMMENT ON[^;]*;/gm, '')
            .replace(/^SET [a-z_]+\s*=/gmi, (match) => '-- ' + match)
            .replace(/^SET [a-z_]+;$/gmi, (match) => '-- ' + match)
            .replace(/^SELECT[^;]*set_config[^;]*;/gm, '');

        // Extract only INSERT statements
        const insertStatements = sql.match(/INSERT INTO[^;]+;/g) || [];

        if (insertStatements.length === 0) {
            console.log(`   ⚠️  No INSERT statements found in ${filename}`);
            return 0;
        }

        console.log(`   📊 Found ${insertStatements.length} records to process...`);
        console.log(`   🚀 Optimizing: Grouping into bulk INSERTs...`);

        // Helper to extract values part only
        const extractValues = (stmt) => {
            const match = stmt.match(/VALUES\s*\((.+)\);?$/is);
            if (!match) return null;
            return match[1];
        };

        const BATCH_SIZE = 2000; // Insert 2000 rows per query
        let processedCount = 0;

        // We need to determine the columns for the bulk insert
        // We'll peek at the first valid statement for each table type
        let columns = "";
        let transformFunc = null;

        if (tableName === 'cities') {
            columns = "(id, name, state_id, state_code, country_id, country_code, latitude, longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id)";
            transformFunc = (stmt) => {
                // Reuse existing logic to parse and map, but strip the "INSERT INTO..." wrapper
                // This is a bit inefficient (re-parsing) but safe given existing logic
                const fullStmt = transformCitiesInsert(stmt);
                return extractValues(fullStmt);
            };
        } else {
            // Fallback for other tables if we use this function for them
            transformFunc = (stmt) => extractValues(stmt);
        }

        const valueBatches = [];
        let currentBatch = [];

        for (const stmt of insertStatements) {
            const values = transformFunc(stmt);
            if (values) {
                currentBatch.push(`(${values})`);
                if (currentBatch.length >= BATCH_SIZE) {
                    valueBatches.push(currentBatch);
                    currentBatch = [];
                }
                processedCount++;
            }
        }
        if (currentBatch.length > 0) valueBatches.push(currentBatch);

        // Execute batches
        console.log(`   ⚡ Executing ${valueBatches.length} bulk queries...`);

        for (let i = 0; i < valueBatches.length; i++) {
            const batch = valueBatches[i];
            const query = `INSERT INTO ${tableName} ${columns} VALUES ${batch.join(', ')} ON CONFLICT DO NOTHING`;
            await pool.query(query);

            if ((i + 1) % 10 === 0 || i === valueBatches.length - 1) {
                process.stdout.write(`\r   ... ${Math.min((i + 1) * BATCH_SIZE, processedCount)} / ${processedCount} rows`);
            }
        }
        console.log("");

        console.log(`   ✓ ${processedCount} records inserted into ${tableName}`);
        return processedCount;
    } catch (error) {
        console.error(`   ❌ Error loading ${filename}:`, error.message);
        throw error;
    }
}

/**
 * Seed all location data from SQL dumps
 */
/**
 * Seed base location data (Continents, Subregions, Countries)
 * This is fast and required for Company seeding
 */
export async function seedBaseLocations() {
    console.log('🌍 Seeding base location data (Continents -> Countries)...');

    try {
        // 1. Continents (from regions.sql - 6 records)
        console.log('1️⃣  Seeding Continents...');
        console.time('   ⏱️  Continents');
        await executeSqlFile('regions.sql', 'continents');
        console.timeEnd('   ⏱️  Continents');

        // 2. Subregions (22 records)
        console.log('2️⃣  Seeding Subregions...');
        console.time('   ⏱️  Subregions');
        await executeSqlFile('subregions.sql', 'subregions');
        console.timeEnd('   ⏱️  Subregions');

        // 3. Countries (~250 records)
        console.log('3️⃣  Seeding Countries...');
        console.time('   ⏱️  Countries');
        await executeSqlFile('countries.sql', 'countries');
        console.timeEnd('   ⏱️  Countries');

    } catch (error) {
        console.error('❌ Base location seeding failed:', error.message);
        throw error;
    }
}

/**
 * Seed detailed location data (States, Cities)
 * This is slower/heavy and should be run after Users/Companies are ready
 */
export async function seedDetailedLocations() {
    console.log('🌍 Seeding detailed location data (States -> Cities)...');

    try {
        // 4. States (~5400 records)
        console.log('4️⃣  Seeding States...');
        console.time('   ⏱️  States');
        await executeSqlFile('states.sql', 'states');
        console.timeEnd('   ⏱️  States');

        // 5. Cities (~160k records) - This is the big one
        console.log('5️⃣  Seeding Cities (this may take a while)...');
        console.time('   ⏱️  Cities (Bulk Insert)');
        await executeGzippedSqlFile('cities.sql.gz', 'cities');
        console.timeEnd('   ⏱️  Cities (Bulk Insert)');

    } catch (error) {
        console.error('❌ Detailed location seeding failed:', error.message);
        throw error;
    }
}

/**
 * Seed all location data from SQL dumps (Legacy wrapper)
 */
export async function seedLocationData() {
    console.log('🌍 Seeding comprehensive location data...');
    console.log('   Source: GeoDB Cities (https://github.com/dr5hn/countries-states-cities-database)\n');

    try {
        await seedBaseLocations();
        await seedDetailedLocations();

        console.log('\n   ✅ Location data seeding complete!');

        // Print counts
        const counts = await pool.query(`
            SELECT 
                (SELECT COUNT(*) FROM continents) as continents,
                (SELECT COUNT(*) FROM subregions) as subregions,
                (SELECT COUNT(*) FROM countries) as countries,
                (SELECT COUNT(*) FROM states) as states,
                (SELECT COUNT(*) FROM cities) as cities
        `);

        const c = counts.rows[0];
        console.log(`   📊 Totals: ${c.continents} continents, ${c.subregions} subregions, ${c.countries} countries, ${c.states} states, ${c.cities} cities`);

    } catch (error) {
        console.error('❌ Location seeding failed:', error.message);
        throw error;
    }
}

/**
 * Seed location data WITHOUT cities (fast mode for development)
 * Skips the ~153k cities import for faster database reset
 */
export async function seedLocationDataLite() {
    console.log('🌍 Seeding location data (LITE - no cities)...');
    console.log('   Source: GeoDB Cities (https://github.com/dr5hn/countries-states-cities-database)');
    console.log('   ⚡ Skipping cities for faster seeding\n');

    try {
        // 1. Continents (from regions.sql - 6 records)
        console.log('1️⃣  Seeding Continents...');
        await executeSqlFile('regions.sql', 'continents');

        // 2. Subregions (22 records)
        console.log('2️⃣  Seeding Subregions...');
        await executeSqlFile('subregions.sql', 'subregions');

        // 3. Countries (~250 records)
        console.log('3️⃣  Seeding Countries...');
        await executeSqlFile('countries.sql', 'countries');

        // 4. States (~5400 records)
        console.log('4️⃣  Seeding States...');
        await executeSqlFile('states.sql', 'states');

        // 5. Skip cities
        console.log('5️⃣  ⏭️  Skipping Cities (use full seed for cities)\n');

        console.log('   ✅ Location data LITE seeding complete!');

        // Print counts (cities will be 0)
        const counts = await pool.query(`
            SELECT 
                (SELECT COUNT(*) FROM continents) as continents,
                (SELECT COUNT(*) FROM subregions) as subregions,
                (SELECT COUNT(*) FROM countries) as countries,
                (SELECT COUNT(*) FROM states) as states,
                (SELECT COUNT(*) FROM cities) as cities
        `);

        const c = counts.rows[0];
        console.log(`   📊 Totals: ${c.continents} continents, ${c.subregions} subregions, ${c.countries} countries, ${c.states} states, ${c.cities} cities`);

    } catch (error) {
        console.error('❌ Location seeding failed:', error.message);
        throw error;
    }
}

// For direct execution
if (process.argv[1] === fileURLToPath(import.meta.url)) {
    import('../db.js').then(async ({ testConnection, closePool }) => {
        const connected = await testConnection();
        if (!connected) {
            console.error('Could not connect to database');
            process.exit(1);
        }
        await seedLocationData();
        await closePool();
    });
}