feat(seeder): optimize city loading with bulk inserts and fix migration order

2025-12-24 18:19:03 -03:00 · 2025-12-24 18:19:03 -03:00 · 858df02a1d
commit 858df02a1d
parent 617bb5ab39
3 changed files with 62 additions and 56 deletions
--- a/backend/migrations/009_create_core_tables.sql.disabled
+++ b/backend/migrations/009_create_core_tables.sql.disabled
@ -1,33 +0,0 @@
-- Migration: Create Core Architecture Tables
-- Description: Agnostic tables for Multi-Tenant Architecture (UUID based)
-
-- Companies (Tenants)
-CREATE TABLE IF NOT EXISTS core_companies (
-    id VARCHAR(36) PRIMARY KEY,
-    name VARCHAR(255) NOT NULL,
-    document VARCHAR(50),
-    contact VARCHAR(255),
-    status VARCHAR(20) DEFAULT 'ACTIVE',
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-);
-
-- Users (Multi-Tenant)
-CREATE TABLE IF NOT EXISTS core_users (
-    id VARCHAR(36) PRIMARY KEY,
-    tenant_id VARCHAR(36) NOT NULL REFERENCES core_companies(id) ON DELETE CASCADE,
-    name VARCHAR(255) NOT NULL,
-    email VARCHAR(255) NOT NULL,
-    password_hash VARCHAR(255) NOT NULL,
-    status VARCHAR(20) DEFAULT 'ACTIVE',
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    CONSTRAINT unique_email_per_tenant UNIQUE (tenant_id, email)
-);
-
-- Permissions / Roles (Simplified JSON store or Relational? keeping it simple Relational)
-CREATE TABLE IF NOT EXISTS core_user_roles (
-    user_id VARCHAR(36) NOT NULL REFERENCES core_users(id) ON DELETE CASCADE,
-    role VARCHAR(50) NOT NULL,
-    PRIMARY KEY (user_id, role)
-);
--- a/backend/migrations/009_unify_schema.sql
+++ b/backend/migrations/009_unify_schema.sql
--- a/seeder-api/src/seeders/location-loader.js
+++ b/seeder-api/src/seeders/location-loader.js
@ -341,6 +341,9 @@ async function executeSqlFile(filename, tableName) {
 /**
 * Execute a gzipped SQL file
 */
+/**
+ * Execute a gzipped SQL file using optimized bulk inserts
+ */
 async function executeGzippedSqlFile(filename, tableName) {
    const filePath = join(SQL_DIR, filename);
    console.log(`   📄 Loading ${filename} (gzipped)...`);
@ -362,8 +365,7 @@ async function executeGzippedSqlFile(filename, tableName) {
            })
        );

-        // Clean up postgres-specific commands that might cause issues
-        // These need to match ONLY standalone commands, not content inside VALUES
+        // Clean up postgres-specific commands
        sql = sql
            .replace(/\\restrict[^\n]*/g, '')
            .replace(/\\unrestrict[^\n]*/g, '')
@ -382,32 +384,69 @@ async function executeGzippedSqlFile(filename, tableName) {
            return 0;
        }

-        console.log(`   📊 Found ${insertStatements.length} records to insert...`);
+        console.log(`   📊 Found ${insertStatements.length} records to process...`);
+        console.log(`   🚀 Optimizing: Grouping into bulk INSERTs...`);

-        // Batch insert for performance
-        const BATCH_SIZE = 1000;
-        for (let i = 0; i < insertStatements.length; i += BATCH_SIZE) {
-            const batch = insertStatements.slice(i, i + BATCH_SIZE);
-            for (const stmt of batch) {
-                let pgStmt = stmt
-                    .replace(/`/g, '"')
-                    .replace(/"emojiU"/g, 'emoji_u')
-                    .replace(/"wikiDataId"/g, 'wiki_data_id');
+        // Helper to extract values part only
+        const extractValues = (stmt) => {
+            const match = stmt.match(/VALUES\s*\((.+)\);?$/is);
+            if (!match) return null;
+            return match[1];
+        };

-                // Apply special transformations for cities (19 cols -> 15 cols)
-                if (pgStmt.includes('INSERT INTO cities') || pgStmt.includes('INSERT INTO public.cities')) {
-                    pgStmt = transformCitiesInsert(pgStmt);
-                }
+        const BATCH_SIZE = 2000; // Insert 2000 rows per query
+        let processedCount = 0;

-                await pool.query(pgStmt);
-            }
-            if ((i + BATCH_SIZE) % 10000 === 0 || i + BATCH_SIZE >= insertStatements.length) {
-                console.log(`   ... ${Math.min(i + BATCH_SIZE, insertStatements.length)} / ${insertStatements.length}`);
-            }
+        // We need to determine the columns for the bulk insert
+        // We'll peek at the first valid statement for each table type
+        let columns = "";
+        let transformFunc = null;
+
+        if (tableName === 'cities') {
+            columns = "(id, name, state_id, state_code, country_id, country_code, latitude, longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id)";
+            transformFunc = (stmt) => {
+                // Reuse existing logic to parse and map, but strip the "INSERT INTO..." wrapper
+                // This is a bit inefficient (re-parsing) but safe given existing logic
+                const fullStmt = transformCitiesInsert(stmt);
+                return extractValues(fullStmt);
+            };
+        } else {
+            // Fallback for other tables if we use this function for them
+            transformFunc = (stmt) => extractValues(stmt);
        }

-        console.log(`   ✓ ${insertStatements.length} records inserted into ${tableName}`);
-        return insertStatements.length;
+        const valueBatches = [];
+        let currentBatch = [];
+
+        for (const stmt of insertStatements) {
+            const values = transformFunc(stmt);
+            if (values) {
+                currentBatch.push(`(${values})`);
+                if (currentBatch.length >= BATCH_SIZE) {
+                    valueBatches.push(currentBatch);
+                    currentBatch = [];
+                }
+                processedCount++;
+            }
+        }
+        if (currentBatch.length > 0) valueBatches.push(currentBatch);
+
+        // Execute batches
+        console.log(`   ⚡ Executing ${valueBatches.length} bulk queries...`);
+
+        for (let i = 0; i < valueBatches.length; i++) {
+            const batch = valueBatches[i];
+            const query = `INSERT INTO ${tableName} ${columns} VALUES ${batch.join(', ')}`;
+            await pool.query(query);
+
+            if ((i + 1) % 10 === 0 || i === valueBatches.length - 1) {
+                process.stdout.write(`\r   ... ${Math.min((i + 1) * BATCH_SIZE, processedCount)} / ${processedCount} rows`);
+            }
+        }
+        console.log("");
+
+        console.log(`   ✓ ${processedCount} records inserted into ${tableName}`);
+        return processedCount;
    } catch (error) {
        console.error(`   ❌ Error loading ${filename}:`, error.message);
        throw error;