feat(seeder): optimize city loading with bulk inserts and fix migration order
This commit is contained in:
parent
617bb5ab39
commit
858df02a1d
3 changed files with 62 additions and 56 deletions
|
|
@ -1,33 +0,0 @@
|
||||||
-- Migration: Create Core Architecture Tables
|
|
||||||
-- Description: Agnostic tables for Multi-Tenant Architecture (UUID based)
|
|
||||||
|
|
||||||
-- Companies (Tenants)
|
|
||||||
CREATE TABLE IF NOT EXISTS core_companies (
|
|
||||||
id VARCHAR(36) PRIMARY KEY,
|
|
||||||
name VARCHAR(255) NOT NULL,
|
|
||||||
document VARCHAR(50),
|
|
||||||
contact VARCHAR(255),
|
|
||||||
status VARCHAR(20) DEFAULT 'ACTIVE',
|
|
||||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Users (Multi-Tenant)
|
|
||||||
CREATE TABLE IF NOT EXISTS core_users (
|
|
||||||
id VARCHAR(36) PRIMARY KEY,
|
|
||||||
tenant_id VARCHAR(36) NOT NULL REFERENCES core_companies(id) ON DELETE CASCADE,
|
|
||||||
name VARCHAR(255) NOT NULL,
|
|
||||||
email VARCHAR(255) NOT NULL,
|
|
||||||
password_hash VARCHAR(255) NOT NULL,
|
|
||||||
status VARCHAR(20) DEFAULT 'ACTIVE',
|
|
||||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
CONSTRAINT unique_email_per_tenant UNIQUE (tenant_id, email)
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Permissions / Roles (Simplified JSON store or Relational? keeping it simple Relational)
|
|
||||||
CREATE TABLE IF NOT EXISTS core_user_roles (
|
|
||||||
user_id VARCHAR(36) NOT NULL REFERENCES core_users(id) ON DELETE CASCADE,
|
|
||||||
role VARCHAR(50) NOT NULL,
|
|
||||||
PRIMARY KEY (user_id, role)
|
|
||||||
);
|
|
||||||
|
|
@ -341,6 +341,9 @@ async function executeSqlFile(filename, tableName) {
|
||||||
/**
|
/**
|
||||||
* Execute a gzipped SQL file
|
* Execute a gzipped SQL file
|
||||||
*/
|
*/
|
||||||
|
/**
|
||||||
|
* Execute a gzipped SQL file using optimized bulk inserts
|
||||||
|
*/
|
||||||
async function executeGzippedSqlFile(filename, tableName) {
|
async function executeGzippedSqlFile(filename, tableName) {
|
||||||
const filePath = join(SQL_DIR, filename);
|
const filePath = join(SQL_DIR, filename);
|
||||||
console.log(` 📄 Loading ${filename} (gzipped)...`);
|
console.log(` 📄 Loading ${filename} (gzipped)...`);
|
||||||
|
|
@ -362,8 +365,7 @@ async function executeGzippedSqlFile(filename, tableName) {
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
// Clean up postgres-specific commands that might cause issues
|
// Clean up postgres-specific commands
|
||||||
// These need to match ONLY standalone commands, not content inside VALUES
|
|
||||||
sql = sql
|
sql = sql
|
||||||
.replace(/\\restrict[^\n]*/g, '')
|
.replace(/\\restrict[^\n]*/g, '')
|
||||||
.replace(/\\unrestrict[^\n]*/g, '')
|
.replace(/\\unrestrict[^\n]*/g, '')
|
||||||
|
|
@ -382,32 +384,69 @@ async function executeGzippedSqlFile(filename, tableName) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(` 📊 Found ${insertStatements.length} records to insert...`);
|
console.log(` 📊 Found ${insertStatements.length} records to process...`);
|
||||||
|
console.log(` 🚀 Optimizing: Grouping into bulk INSERTs...`);
|
||||||
|
|
||||||
// Batch insert for performance
|
// Helper to extract values part only
|
||||||
const BATCH_SIZE = 1000;
|
const extractValues = (stmt) => {
|
||||||
for (let i = 0; i < insertStatements.length; i += BATCH_SIZE) {
|
const match = stmt.match(/VALUES\s*\((.+)\);?$/is);
|
||||||
const batch = insertStatements.slice(i, i + BATCH_SIZE);
|
if (!match) return null;
|
||||||
for (const stmt of batch) {
|
return match[1];
|
||||||
let pgStmt = stmt
|
};
|
||||||
.replace(/`/g, '"')
|
|
||||||
.replace(/"emojiU"/g, 'emoji_u')
|
|
||||||
.replace(/"wikiDataId"/g, 'wiki_data_id');
|
|
||||||
|
|
||||||
// Apply special transformations for cities (19 cols -> 15 cols)
|
const BATCH_SIZE = 2000; // Insert 2000 rows per query
|
||||||
if (pgStmt.includes('INSERT INTO cities') || pgStmt.includes('INSERT INTO public.cities')) {
|
let processedCount = 0;
|
||||||
pgStmt = transformCitiesInsert(pgStmt);
|
|
||||||
}
|
|
||||||
|
|
||||||
await pool.query(pgStmt);
|
// We need to determine the columns for the bulk insert
|
||||||
}
|
// We'll peek at the first valid statement for each table type
|
||||||
if ((i + BATCH_SIZE) % 10000 === 0 || i + BATCH_SIZE >= insertStatements.length) {
|
let columns = "";
|
||||||
console.log(` ... ${Math.min(i + BATCH_SIZE, insertStatements.length)} / ${insertStatements.length}`);
|
let transformFunc = null;
|
||||||
}
|
|
||||||
|
if (tableName === 'cities') {
|
||||||
|
columns = "(id, name, state_id, state_code, country_id, country_code, latitude, longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id)";
|
||||||
|
transformFunc = (stmt) => {
|
||||||
|
// Reuse existing logic to parse and map, but strip the "INSERT INTO..." wrapper
|
||||||
|
// This is a bit inefficient (re-parsing) but safe given existing logic
|
||||||
|
const fullStmt = transformCitiesInsert(stmt);
|
||||||
|
return extractValues(fullStmt);
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
// Fallback for other tables if we use this function for them
|
||||||
|
transformFunc = (stmt) => extractValues(stmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(` ✓ ${insertStatements.length} records inserted into ${tableName}`);
|
const valueBatches = [];
|
||||||
return insertStatements.length;
|
let currentBatch = [];
|
||||||
|
|
||||||
|
for (const stmt of insertStatements) {
|
||||||
|
const values = transformFunc(stmt);
|
||||||
|
if (values) {
|
||||||
|
currentBatch.push(`(${values})`);
|
||||||
|
if (currentBatch.length >= BATCH_SIZE) {
|
||||||
|
valueBatches.push(currentBatch);
|
||||||
|
currentBatch = [];
|
||||||
|
}
|
||||||
|
processedCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (currentBatch.length > 0) valueBatches.push(currentBatch);
|
||||||
|
|
||||||
|
// Execute batches
|
||||||
|
console.log(` ⚡ Executing ${valueBatches.length} bulk queries...`);
|
||||||
|
|
||||||
|
for (let i = 0; i < valueBatches.length; i++) {
|
||||||
|
const batch = valueBatches[i];
|
||||||
|
const query = `INSERT INTO ${tableName} ${columns} VALUES ${batch.join(', ')}`;
|
||||||
|
await pool.query(query);
|
||||||
|
|
||||||
|
if ((i + 1) % 10 === 0 || i === valueBatches.length - 1) {
|
||||||
|
process.stdout.write(`\r ... ${Math.min((i + 1) * BATCH_SIZE, processedCount)} / ${processedCount} rows`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log("");
|
||||||
|
|
||||||
|
console.log(` ✓ ${processedCount} records inserted into ${tableName}`);
|
||||||
|
return processedCount;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(` ❌ Error loading ${filename}:`, error.message);
|
console.error(` ❌ Error loading ${filename}:`, error.message);
|
||||||
throw error;
|
throw error;
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue