feat(seeder): optimize city loading with bulk inserts and fix migration order

This commit is contained in:
Tiago Yamamoto 2025-12-24 18:19:03 -03:00
parent 617bb5ab39
commit 858df02a1d
3 changed files with 62 additions and 56 deletions

View file

@ -1,33 +0,0 @@
-- Migration: Create Core Architecture Tables
-- Description: Agnostic tables for Multi-Tenant Architecture (UUID based)
-- Companies (Tenants)
CREATE TABLE IF NOT EXISTS core_companies (
id VARCHAR(36) PRIMARY KEY,
name VARCHAR(255) NOT NULL,
document VARCHAR(50),
contact VARCHAR(255),
status VARCHAR(20) DEFAULT 'ACTIVE',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Users (Multi-Tenant)
CREATE TABLE IF NOT EXISTS core_users (
id VARCHAR(36) PRIMARY KEY,
tenant_id VARCHAR(36) NOT NULL REFERENCES core_companies(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL,
email VARCHAR(255) NOT NULL,
password_hash VARCHAR(255) NOT NULL,
status VARCHAR(20) DEFAULT 'ACTIVE',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT unique_email_per_tenant UNIQUE (tenant_id, email)
);
-- Permissions / Roles (Simplified JSON store or Relational? keeping it simple Relational)
CREATE TABLE IF NOT EXISTS core_user_roles (
user_id VARCHAR(36) NOT NULL REFERENCES core_users(id) ON DELETE CASCADE,
role VARCHAR(50) NOT NULL,
PRIMARY KEY (user_id, role)
);

View file

@ -341,6 +341,9 @@ async function executeSqlFile(filename, tableName) {
/**
* Execute a gzipped SQL file
*/
/**
* Execute a gzipped SQL file using optimized bulk inserts
*/
async function executeGzippedSqlFile(filename, tableName) {
const filePath = join(SQL_DIR, filename);
console.log(` 📄 Loading ${filename} (gzipped)...`);
@ -362,8 +365,7 @@ async function executeGzippedSqlFile(filename, tableName) {
})
);
// Clean up postgres-specific commands that might cause issues
// These need to match ONLY standalone commands, not content inside VALUES
// Clean up postgres-specific commands
sql = sql
.replace(/\\restrict[^\n]*/g, '')
.replace(/\\unrestrict[^\n]*/g, '')
@ -382,32 +384,69 @@ async function executeGzippedSqlFile(filename, tableName) {
return 0;
}
console.log(` 📊 Found ${insertStatements.length} records to insert...`);
console.log(` 📊 Found ${insertStatements.length} records to process...`);
console.log(` 🚀 Optimizing: Grouping into bulk INSERTs...`);
// Batch insert for performance
const BATCH_SIZE = 1000;
for (let i = 0; i < insertStatements.length; i += BATCH_SIZE) {
const batch = insertStatements.slice(i, i + BATCH_SIZE);
for (const stmt of batch) {
let pgStmt = stmt
.replace(/`/g, '"')
.replace(/"emojiU"/g, 'emoji_u')
.replace(/"wikiDataId"/g, 'wiki_data_id');
// Helper to extract values part only
const extractValues = (stmt) => {
const match = stmt.match(/VALUES\s*\((.+)\);?$/is);
if (!match) return null;
return match[1];
};
// Apply special transformations for cities (19 cols -> 15 cols)
if (pgStmt.includes('INSERT INTO cities') || pgStmt.includes('INSERT INTO public.cities')) {
pgStmt = transformCitiesInsert(pgStmt);
}
const BATCH_SIZE = 2000; // Insert 2000 rows per query
let processedCount = 0;
await pool.query(pgStmt);
}
if ((i + BATCH_SIZE) % 10000 === 0 || i + BATCH_SIZE >= insertStatements.length) {
console.log(` ... ${Math.min(i + BATCH_SIZE, insertStatements.length)} / ${insertStatements.length}`);
}
// We need to determine the columns for the bulk insert
// We'll peek at the first valid statement for each table type
let columns = "";
let transformFunc = null;
if (tableName === 'cities') {
columns = "(id, name, state_id, state_code, country_id, country_code, latitude, longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id)";
transformFunc = (stmt) => {
// Reuse existing logic to parse and map, but strip the "INSERT INTO..." wrapper
// This is a bit inefficient (re-parsing) but safe given existing logic
const fullStmt = transformCitiesInsert(stmt);
return extractValues(fullStmt);
};
} else {
// Fallback for other tables if we use this function for them
transformFunc = (stmt) => extractValues(stmt);
}
console.log(`${insertStatements.length} records inserted into ${tableName}`);
return insertStatements.length;
const valueBatches = [];
let currentBatch = [];
for (const stmt of insertStatements) {
const values = transformFunc(stmt);
if (values) {
currentBatch.push(`(${values})`);
if (currentBatch.length >= BATCH_SIZE) {
valueBatches.push(currentBatch);
currentBatch = [];
}
processedCount++;
}
}
if (currentBatch.length > 0) valueBatches.push(currentBatch);
// Execute batches
console.log(` ⚡ Executing ${valueBatches.length} bulk queries...`);
for (let i = 0; i < valueBatches.length; i++) {
const batch = valueBatches[i];
const query = `INSERT INTO ${tableName} ${columns} VALUES ${batch.join(', ')}`;
await pool.query(query);
if ((i + 1) % 10 === 0 || i === valueBatches.length - 1) {
process.stdout.write(`\r ... ${Math.min((i + 1) * BATCH_SIZE, processedCount)} / ${processedCount} rows`);
}
}
console.log("");
console.log(`${processedCount} records inserted into ${tableName}`);
return processedCount;
} catch (error) {
console.error(` ❌ Error loading ${filename}:`, error.message);
throw error;