From 858df02a1db86acf692a66b6bac2ba5a0db7f6dd Mon Sep 17 00:00:00 2001
From: Tiago Yamamoto <tiago.ribeiro@inventcloud.com.br>
Date: Wed, 24 Dec 2025 18:19:03 -0300
Subject: [PATCH] feat(seeder): optimize city loading with bulk inserts and fix
 migration order

---
 .../009_create_core_tables.sql.disabled       | 33 -------
 ..._unify_schema.sql => 009_unify_schema.sql} |  0
 seeder-api/src/seeders/location-loader.js     | 85 ++++++++++++++-----
 3 files changed, 62 insertions(+), 56 deletions(-)
 delete mode 100644 backend/migrations/009_create_core_tables.sql.disabled
 rename backend/migrations/{020_unify_schema.sql => 009_unify_schema.sql} (100%)

diff --git a/backend/migrations/009_create_core_tables.sql.disabled b/backend/migrations/009_create_core_tables.sql.disabled
deleted file mode 100644
index ade141a..0000000
--- a/backend/migrations/009_create_core_tables.sql.disabled
+++ /dev/null
@@ -1,33 +0,0 @@
--- Migration: Create Core Architecture Tables
--- Description: Agnostic tables for Multi-Tenant Architecture (UUID based)
-
--- Companies (Tenants)
-CREATE TABLE IF NOT EXISTS core_companies (
-    id VARCHAR(36) PRIMARY KEY,
-    name VARCHAR(255) NOT NULL,
-    document VARCHAR(50),
-    contact VARCHAR(255),
-    status VARCHAR(20) DEFAULT 'ACTIVE',
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-);
-
--- Users (Multi-Tenant)
-CREATE TABLE IF NOT EXISTS core_users (
-    id VARCHAR(36) PRIMARY KEY,
-    tenant_id VARCHAR(36) NOT NULL REFERENCES core_companies(id) ON DELETE CASCADE,
-    name VARCHAR(255) NOT NULL,
-    email VARCHAR(255) NOT NULL,
-    password_hash VARCHAR(255) NOT NULL,
-    status VARCHAR(20) DEFAULT 'ACTIVE',
-    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    CONSTRAINT unique_email_per_tenant UNIQUE (tenant_id, email)
-);
-
--- Permissions / Roles (Simplified JSON store or Relational? keeping it simple Relational)
-CREATE TABLE IF NOT EXISTS core_user_roles (
-    user_id VARCHAR(36) NOT NULL REFERENCES core_users(id) ON DELETE CASCADE,
-    role VARCHAR(50) NOT NULL,
-    PRIMARY KEY (user_id, role)
-);
diff --git a/backend/migrations/020_unify_schema.sql b/backend/migrations/009_unify_schema.sql
similarity index 100%
rename from backend/migrations/020_unify_schema.sql
rename to backend/migrations/009_unify_schema.sql
diff --git a/seeder-api/src/seeders/location-loader.js b/seeder-api/src/seeders/location-loader.js
index cc9c316..8269200 100644
--- a/seeder-api/src/seeders/location-loader.js
+++ b/seeder-api/src/seeders/location-loader.js
@@ -341,6 +341,9 @@ async function executeSqlFile(filename, tableName) {
 /**
  * Execute a gzipped SQL file
  */
+/**
+ * Execute a gzipped SQL file using optimized bulk inserts
+ */
 async function executeGzippedSqlFile(filename, tableName) {
     const filePath = join(SQL_DIR, filename);
     console.log(`   📄 Loading ${filename} (gzipped)...`);
@@ -362,8 +365,7 @@ async function executeGzippedSqlFile(filename, tableName) {
             })
         );
 
-        // Clean up postgres-specific commands that might cause issues
-        // These need to match ONLY standalone commands, not content inside VALUES
+        // Clean up postgres-specific commands
         sql = sql
             .replace(/\\restrict[^\n]*/g, '')
             .replace(/\\unrestrict[^\n]*/g, '')
@@ -382,32 +384,69 @@ async function executeGzippedSqlFile(filename, tableName) {
             return 0;
         }
 
-        console.log(`   📊 Found ${insertStatements.length} records to insert...`);
+        console.log(`   📊 Found ${insertStatements.length} records to process...`);
+        console.log(`   🚀 Optimizing: Grouping into bulk INSERTs...`);
 
-        // Batch insert for performance
-        const BATCH_SIZE = 1000;
-        for (let i = 0; i < insertStatements.length; i += BATCH_SIZE) {
-            const batch = insertStatements.slice(i, i + BATCH_SIZE);
-            for (const stmt of batch) {
-                let pgStmt = stmt
-                    .replace(/`/g, '"')
-                    .replace(/"emojiU"/g, 'emoji_u')
-                    .replace(/"wikiDataId"/g, 'wiki_data_id');
+        // Helper to extract values part only
+        const extractValues = (stmt) => {
+            const match = stmt.match(/VALUES\s*\((.+)\);?$/is);
+            if (!match) return null;
+            return match[1];
+        };
 
-                // Apply special transformations for cities (19 cols -> 15 cols)
-                if (pgStmt.includes('INSERT INTO cities') || pgStmt.includes('INSERT INTO public.cities')) {
-                    pgStmt = transformCitiesInsert(pgStmt);
-                }
+        const BATCH_SIZE = 2000; // Insert 2000 rows per query
+        let processedCount = 0;
 
-                await pool.query(pgStmt);
-            }
-            if ((i + BATCH_SIZE) % 10000 === 0 || i + BATCH_SIZE >= insertStatements.length) {
-                console.log(`   ... ${Math.min(i + BATCH_SIZE, insertStatements.length)} / ${insertStatements.length}`);
-            }
+        // We need to determine the columns for the bulk insert
+        // We'll peek at the first valid statement for each table type
+        let columns = "";
+        let transformFunc = null;
+
+        if (tableName === 'cities') {
+            columns = "(id, name, state_id, state_code, country_id, country_code, latitude, longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id)";
+            transformFunc = (stmt) => {
+                // Reuse existing logic to parse and map, but strip the "INSERT INTO..." wrapper
+                // This is a bit inefficient (re-parsing) but safe given existing logic
+                const fullStmt = transformCitiesInsert(stmt);
+                return extractValues(fullStmt);
+            };
+        } else {
+            // Fallback for other tables if we use this function for them
+            transformFunc = (stmt) => extractValues(stmt);
         }
 
-        console.log(`   ✓ ${insertStatements.length} records inserted into ${tableName}`);
-        return insertStatements.length;
+        const valueBatches = [];
+        let currentBatch = [];
+
+        for (const stmt of insertStatements) {
+            const values = transformFunc(stmt);
+            if (values) {
+                currentBatch.push(`(${values})`);
+                if (currentBatch.length >= BATCH_SIZE) {
+                    valueBatches.push(currentBatch);
+                    currentBatch = [];
+                }
+                processedCount++;
+            }
+        }
+        if (currentBatch.length > 0) valueBatches.push(currentBatch);
+
+        // Execute batches
+        console.log(`   ⚡ Executing ${valueBatches.length} bulk queries...`);
+
+        for (let i = 0; i < valueBatches.length; i++) {
+            const batch = valueBatches[i];
+            const query = `INSERT INTO ${tableName} ${columns} VALUES ${batch.join(', ')}`;
+            await pool.query(query);
+
+            if ((i + 1) % 10 === 0 || i === valueBatches.length - 1) {
+                process.stdout.write(`\r   ... ${Math.min((i + 1) * BATCH_SIZE, processedCount)} / ${processedCount} rows`);
+            }
+        }
+        console.log("");
+
+        console.log(`   ✓ ${processedCount} records inserted into ${tableName}`);
+        return processedCount;
     } catch (error) {
         console.error(`   ❌ Error loading ${filename}:`, error.message);
         throw error;