feat(seeder): optimize city loading with bulk inserts and fix migration order

2025-12-24 18:19:03 -03:00 · 2025-12-24 18:19:03 -03:00 · 858df02a1d
commit 858df02a1d
parent 617bb5ab39
3 changed files with 62 additions and 56 deletions
--- a/backend/migrations/009_create_core_tables.sql.disabled
+++ b/backend/migrations/009_create_core_tables.sql.disabled
@ -1,33 +0,0 @@
 -- Migration: Create Core Architecture Tables
 -- Description: Agnostic tables for Multi-Tenant Architecture (UUID based)
 -- Companies (Tenants)
 CREATE TABLE IF NOT EXISTS core_companies (
    id VARCHAR(36) PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    document VARCHAR(50),
    contact VARCHAR(255),
    status VARCHAR(20) DEFAULT 'ACTIVE',
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
 -- Users (Multi-Tenant)
 CREATE TABLE IF NOT EXISTS core_users (
    id VARCHAR(36) PRIMARY KEY,
    tenant_id VARCHAR(36) NOT NULL REFERENCES core_companies(id) ON DELETE CASCADE,
    name VARCHAR(255) NOT NULL,
    email VARCHAR(255) NOT NULL,
    password_hash VARCHAR(255) NOT NULL,
    status VARCHAR(20) DEFAULT 'ACTIVE',
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    CONSTRAINT unique_email_per_tenant UNIQUE (tenant_id, email)
 );
 -- Permissions / Roles (Simplified JSON store or Relational? keeping it simple Relational)
 CREATE TABLE IF NOT EXISTS core_user_roles (
    user_id VARCHAR(36) NOT NULL REFERENCES core_users(id) ON DELETE CASCADE,
    role VARCHAR(50) NOT NULL,
    PRIMARY KEY (user_id, role)
 );
--- a/backend/migrations/009_unify_schema.sql
+++ b/backend/migrations/009_unify_schema.sql
--- a/seeder-api/src/seeders/location-loader.js
+++ b/seeder-api/src/seeders/location-loader.js
@ -341,6 +341,9 @@ async function executeSqlFile(filename, tableName) {
 /**
 * Execute a gzipped SQL file
 */
 /**
 * Execute a gzipped SQL file using optimized bulk inserts
 */
 async function executeGzippedSqlFile(filename, tableName) {
    const filePath = join(SQL_DIR, filename);
    console.log(`   📄 Loading ${filename} (gzipped)...`);
@ -362,8 +365,7 @@ async function executeGzippedSqlFile(filename, tableName) {
            })
        );
-        // Clean up postgres-specific commands that might cause issues
+        // Clean up postgres-specific commands
        // These need to match ONLY standalone commands, not content inside VALUES
        sql = sql
            .replace(/\\restrict[^\n]*/g, '')
            .replace(/\\unrestrict[^\n]*/g, '')
@ -382,32 +384,69 @@ async function executeGzippedSqlFile(filename, tableName) {
            return 0;
        }
-        console.log(`   📊 Found ${insertStatements.length} records to insert...`);
+        console.log(`   📊 Found ${insertStatements.length} records to process...`);
        console.log(`   🚀 Optimizing: Grouping into bulk INSERTs...`);
-        // Batch insert for performance
+        // Helper to extract values part only
-        const BATCH_SIZE = 1000;
+        const extractValues = (stmt) => {
-        for (let i = 0; i < insertStatements.length; i += BATCH_SIZE) {
+            const match = stmt.match(/VALUES\s*\((.+)\);?$/is);
-            const batch = insertStatements.slice(i, i + BATCH_SIZE);
+            if (!match) return null;
-            for (const stmt of batch) {
+            return match[1];
-                let pgStmt = stmt
+        };
                    .replace(/`/g, '"')
                    .replace(/"emojiU"/g, 'emoji_u')
                    .replace(/"wikiDataId"/g, 'wiki_data_id');
-                // Apply special transformations for cities (19 cols -> 15 cols)
+        const BATCH_SIZE = 2000; // Insert 2000 rows per query
-                if (pgStmt.includes('INSERT INTO cities') || pgStmt.includes('INSERT INTO public.cities')) {
+        let processedCount = 0;
                    pgStmt = transformCitiesInsert(pgStmt);
                }
-                await pool.query(pgStmt);
+        // We need to determine the columns for the bulk insert
-            }
+        // We'll peek at the first valid statement for each table type
-            if ((i + BATCH_SIZE) % 10000 === 0 || i + BATCH_SIZE >= insertStatements.length) {
+        let columns = "";
-                console.log(`   ... ${Math.min(i + BATCH_SIZE, insertStatements.length)} / ${insertStatements.length}`);
+        let transformFunc = null;
-            }
+
        if (tableName === 'cities') {
            columns = "(id, name, state_id, state_code, country_id, country_code, latitude, longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id)";
            transformFunc = (stmt) => {
                // Reuse existing logic to parse and map, but strip the "INSERT INTO..." wrapper
                // This is a bit inefficient (re-parsing) but safe given existing logic
                const fullStmt = transformCitiesInsert(stmt);
                return extractValues(fullStmt);
            };
        } else {
            // Fallback for other tables if we use this function for them
            transformFunc = (stmt) => extractValues(stmt);
        }
-        console.log(`   ✓ ${insertStatements.length} records inserted into ${tableName}`);
+        const valueBatches = [];
-        return insertStatements.length;
+        let currentBatch = [];
        for (const stmt of insertStatements) {
            const values = transformFunc(stmt);
            if (values) {
                currentBatch.push(`(${values})`);
                if (currentBatch.length >= BATCH_SIZE) {
                    valueBatches.push(currentBatch);
                    currentBatch = [];
                }
                processedCount++;
            }
        }
        if (currentBatch.length > 0) valueBatches.push(currentBatch);
        // Execute batches
        console.log(`   ⚡ Executing ${valueBatches.length} bulk queries...`);
        for (let i = 0; i < valueBatches.length; i++) {
            const batch = valueBatches[i];
            const query = `INSERT INTO ${tableName} ${columns} VALUES ${batch.join(', ')}`;
            await pool.query(query);
            if ((i + 1) % 10 === 0 || i === valueBatches.length - 1) {
                process.stdout.write(`\r   ... ${Math.min((i + 1) * BATCH_SIZE, processedCount)} / ${processedCount} rows`);
            }
        }
        console.log("");
        console.log(`   ✓ ${processedCount} records inserted into ${tableName}`);
        return processedCount;
    } catch (error) {
        console.error(`   ❌ Error loading ${filename}:`, error.message);
        throw error;