From 858df02a1db86acf692a66b6bac2ba5a0db7f6dd Mon Sep 17 00:00:00 2001 From: Tiago Yamamoto Date: Wed, 24 Dec 2025 18:19:03 -0300 Subject: [PATCH] feat(seeder): optimize city loading with bulk inserts and fix migration order --- .../009_create_core_tables.sql.disabled | 33 ------- ..._unify_schema.sql => 009_unify_schema.sql} | 0 seeder-api/src/seeders/location-loader.js | 85 ++++++++++++++----- 3 files changed, 62 insertions(+), 56 deletions(-) delete mode 100644 backend/migrations/009_create_core_tables.sql.disabled rename backend/migrations/{020_unify_schema.sql => 009_unify_schema.sql} (100%) diff --git a/backend/migrations/009_create_core_tables.sql.disabled b/backend/migrations/009_create_core_tables.sql.disabled deleted file mode 100644 index ade141a..0000000 --- a/backend/migrations/009_create_core_tables.sql.disabled +++ /dev/null @@ -1,33 +0,0 @@ --- Migration: Create Core Architecture Tables --- Description: Agnostic tables for Multi-Tenant Architecture (UUID based) - --- Companies (Tenants) -CREATE TABLE IF NOT EXISTS core_companies ( - id VARCHAR(36) PRIMARY KEY, - name VARCHAR(255) NOT NULL, - document VARCHAR(50), - contact VARCHAR(255), - status VARCHAR(20) DEFAULT 'ACTIVE', - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - --- Users (Multi-Tenant) -CREATE TABLE IF NOT EXISTS core_users ( - id VARCHAR(36) PRIMARY KEY, - tenant_id VARCHAR(36) NOT NULL REFERENCES core_companies(id) ON DELETE CASCADE, - name VARCHAR(255) NOT NULL, - email VARCHAR(255) NOT NULL, - password_hash VARCHAR(255) NOT NULL, - status VARCHAR(20) DEFAULT 'ACTIVE', - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - CONSTRAINT unique_email_per_tenant UNIQUE (tenant_id, email) -); - --- Permissions / Roles (Simplified JSON store or Relational? keeping it simple Relational) -CREATE TABLE IF NOT EXISTS core_user_roles ( - user_id VARCHAR(36) NOT NULL REFERENCES core_users(id) ON DELETE CASCADE, - role VARCHAR(50) NOT NULL, - PRIMARY KEY (user_id, role) -); diff --git a/backend/migrations/020_unify_schema.sql b/backend/migrations/009_unify_schema.sql similarity index 100% rename from backend/migrations/020_unify_schema.sql rename to backend/migrations/009_unify_schema.sql diff --git a/seeder-api/src/seeders/location-loader.js b/seeder-api/src/seeders/location-loader.js index cc9c316..8269200 100644 --- a/seeder-api/src/seeders/location-loader.js +++ b/seeder-api/src/seeders/location-loader.js @@ -341,6 +341,9 @@ async function executeSqlFile(filename, tableName) { /** * Execute a gzipped SQL file */ +/** + * Execute a gzipped SQL file using optimized bulk inserts + */ async function executeGzippedSqlFile(filename, tableName) { const filePath = join(SQL_DIR, filename); console.log(` 📄 Loading ${filename} (gzipped)...`); @@ -362,8 +365,7 @@ async function executeGzippedSqlFile(filename, tableName) { }) ); - // Clean up postgres-specific commands that might cause issues - // These need to match ONLY standalone commands, not content inside VALUES + // Clean up postgres-specific commands sql = sql .replace(/\\restrict[^\n]*/g, '') .replace(/\\unrestrict[^\n]*/g, '') @@ -382,32 +384,69 @@ async function executeGzippedSqlFile(filename, tableName) { return 0; } - console.log(` 📊 Found ${insertStatements.length} records to insert...`); + console.log(` 📊 Found ${insertStatements.length} records to process...`); + console.log(` 🚀 Optimizing: Grouping into bulk INSERTs...`); - // Batch insert for performance - const BATCH_SIZE = 1000; - for (let i = 0; i < insertStatements.length; i += BATCH_SIZE) { - const batch = insertStatements.slice(i, i + BATCH_SIZE); - for (const stmt of batch) { - let pgStmt = stmt - .replace(/`/g, '"') - .replace(/"emojiU"/g, 'emoji_u') - .replace(/"wikiDataId"/g, 'wiki_data_id'); + // Helper to extract values part only + const extractValues = (stmt) => { + const match = stmt.match(/VALUES\s*\((.+)\);?$/is); + if (!match) return null; + return match[1]; + }; - // Apply special transformations for cities (19 cols -> 15 cols) - if (pgStmt.includes('INSERT INTO cities') || pgStmt.includes('INSERT INTO public.cities')) { - pgStmt = transformCitiesInsert(pgStmt); - } + const BATCH_SIZE = 2000; // Insert 2000 rows per query + let processedCount = 0; - await pool.query(pgStmt); - } - if ((i + BATCH_SIZE) % 10000 === 0 || i + BATCH_SIZE >= insertStatements.length) { - console.log(` ... ${Math.min(i + BATCH_SIZE, insertStatements.length)} / ${insertStatements.length}`); - } + // We need to determine the columns for the bulk insert + // We'll peek at the first valid statement for each table type + let columns = ""; + let transformFunc = null; + + if (tableName === 'cities') { + columns = "(id, name, state_id, state_code, country_id, country_code, latitude, longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id)"; + transformFunc = (stmt) => { + // Reuse existing logic to parse and map, but strip the "INSERT INTO..." wrapper + // This is a bit inefficient (re-parsing) but safe given existing logic + const fullStmt = transformCitiesInsert(stmt); + return extractValues(fullStmt); + }; + } else { + // Fallback for other tables if we use this function for them + transformFunc = (stmt) => extractValues(stmt); } - console.log(` ✓ ${insertStatements.length} records inserted into ${tableName}`); - return insertStatements.length; + const valueBatches = []; + let currentBatch = []; + + for (const stmt of insertStatements) { + const values = transformFunc(stmt); + if (values) { + currentBatch.push(`(${values})`); + if (currentBatch.length >= BATCH_SIZE) { + valueBatches.push(currentBatch); + currentBatch = []; + } + processedCount++; + } + } + if (currentBatch.length > 0) valueBatches.push(currentBatch); + + // Execute batches + console.log(` ⚡ Executing ${valueBatches.length} bulk queries...`); + + for (let i = 0; i < valueBatches.length; i++) { + const batch = valueBatches[i]; + const query = `INSERT INTO ${tableName} ${columns} VALUES ${batch.join(', ')}`; + await pool.query(query); + + if ((i + 1) % 10 === 0 || i === valueBatches.length - 1) { + process.stdout.write(`\r ... ${Math.min((i + 1) * BATCH_SIZE, processedCount)} / ${processedCount} rows`); + } + } + console.log(""); + + console.log(` ✓ ${processedCount} records inserted into ${tableName}`); + return processedCount; } catch (error) { console.error(` ❌ Error loading ${filename}:`, error.message); throw error;