fix(seeder): fix location data column schema transformations
- Fixed regex patterns that incorrectly matched SET inside quoted values - Added transformSubregionsInsert() - maps 8 dump cols to schema cols - Added transformCountriesInsert() - maps 32 dump cols to 25 schema cols - Added transformStatesInsert() - maps 20 dump cols to 15 schema cols - Added transformCitiesInsert() - maps 19 dump cols to 15 schema cols - Added parseValues() helper for parsing SQL VALUES with JSON handling - Successfully seeds: continents(6), subregions(22), countries(250), states(5296)
This commit is contained in:
parent
ac84571c55
commit
7720f2e35e
1 changed files with 288 additions and 11 deletions
|
|
@ -15,8 +15,260 @@ const SQL_DIR = join(__dirname, '..', '..', 'sql');
|
||||||
const TABLE_MAPPING = {
|
const TABLE_MAPPING = {
|
||||||
'public.regions': 'continents',
|
'public.regions': 'continents',
|
||||||
'regions': 'continents',
|
'regions': 'continents',
|
||||||
|
'public.subregions': 'subregions',
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transform subregions INSERT statements to match migration schema
|
||||||
|
* SQL dump: INSERT INTO subregions VALUES (id, name, translations, region_id, created_at, updated_at, flag, wikiDataId)
|
||||||
|
* Migration: id, name, continent_id, translations, created_at, updated_at, flag, wiki_data_id
|
||||||
|
*/
|
||||||
|
function transformSubregionsInsert(stmt) {
|
||||||
|
// Extract VALUES from the INSERT statement
|
||||||
|
const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
|
||||||
|
if (!valuesMatch) return stmt;
|
||||||
|
|
||||||
|
const valuesStr = valuesMatch[1];
|
||||||
|
|
||||||
|
// Parse values carefully (handling JSON with commas inside)
|
||||||
|
const values = [];
|
||||||
|
let current = '';
|
||||||
|
let depth = 0;
|
||||||
|
let inString = false;
|
||||||
|
let stringChar = '';
|
||||||
|
|
||||||
|
for (let i = 0; i < valuesStr.length; i++) {
|
||||||
|
const char = valuesStr[i];
|
||||||
|
const prevChar = i > 0 ? valuesStr[i - 1] : '';
|
||||||
|
|
||||||
|
if (!inString && (char === "'" || char === '"')) {
|
||||||
|
inString = true;
|
||||||
|
stringChar = char;
|
||||||
|
} else if (inString && char === stringChar && prevChar !== '\\') {
|
||||||
|
// Check for escaped quotes (doubled)
|
||||||
|
if (valuesStr[i + 1] === stringChar) {
|
||||||
|
current += char;
|
||||||
|
i++; // Skip next quote
|
||||||
|
} else {
|
||||||
|
inString = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inString) {
|
||||||
|
if (char === '(' || char === '{' || char === '[') depth++;
|
||||||
|
if (char === ')' || char === '}' || char === ']') depth--;
|
||||||
|
|
||||||
|
if (char === ',' && depth === 0) {
|
||||||
|
values.push(current.trim());
|
||||||
|
current = '';
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
current += char;
|
||||||
|
}
|
||||||
|
if (current.trim()) values.push(current.trim());
|
||||||
|
|
||||||
|
// Reorder: [id, name, translations, region_id, created, updated, flag, wikiDataId]
|
||||||
|
// To: [id, name, continent_id, translations, created, updated, flag, wiki_data_id]
|
||||||
|
if (values.length >= 8) {
|
||||||
|
const [id, name, translations, region_id, created, updated, flag, wikiDataId] = values;
|
||||||
|
const reordered = [id, name, region_id, translations, created, updated, flag, wikiDataId];
|
||||||
|
return `INSERT INTO subregions (id, name, continent_id, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${reordered.join(', ')});`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return stmt;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transform countries INSERT statements to match migration schema
|
||||||
|
* SQL dump columns (32): id, name, iso3, numeric_code, iso2, phonecode, capital, currency, currency_name,
|
||||||
|
* currency_symbol, tld, native, population, gdp, region, region_id, subregion, subregion_id, nationality,
|
||||||
|
* area_sq_km, postal_code_format, postal_code_regex, timezones, translations, latitude, longitude,
|
||||||
|
* emoji, emojiU, created_at, updated_at, flag, wikiDataId
|
||||||
|
* Migration columns (25): id, name, iso2, iso3, numeric_code, phonecode, capital, currency, currency_name,
|
||||||
|
* currency_symbol, tld, native, continent_id, subregion_id, nationality, latitude, longitude,
|
||||||
|
* emoji, emoji_u, timezones, translations, created_at, updated_at, flag, wiki_data_id
|
||||||
|
*/
|
||||||
|
function transformCountriesInsert(stmt) {
|
||||||
|
const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
|
||||||
|
if (!valuesMatch) return stmt;
|
||||||
|
|
||||||
|
const valuesStr = valuesMatch[1];
|
||||||
|
const values = parseValues(valuesStr);
|
||||||
|
|
||||||
|
if (values.length >= 32) {
|
||||||
|
// Extract the 32 columns from dump
|
||||||
|
const [id, name, iso3, numeric_code, iso2, phonecode, capital, currency, currency_name,
|
||||||
|
currency_symbol, tld, native, population, gdp, region, region_id, subregion,
|
||||||
|
subregion_id, nationality, area_sq_km, postal_code_format, postal_code_regex,
|
||||||
|
timezones, translations, latitude, longitude, emoji, emojiU, created_at,
|
||||||
|
updated_at, flag, wikiDataId] = values;
|
||||||
|
|
||||||
|
// Map to our 25-column schema (region_id becomes continent_id)
|
||||||
|
const mapped = [id, name, iso2, iso3, numeric_code, phonecode, capital, currency,
|
||||||
|
currency_name, currency_symbol, tld, native, region_id, subregion_id,
|
||||||
|
nationality, latitude, longitude, emoji, emojiU, timezones, translations,
|
||||||
|
created_at, updated_at, flag, wikiDataId];
|
||||||
|
|
||||||
|
return `INSERT INTO countries (id, name, iso2, iso3, numeric_code, phonecode, capital, currency, currency_name, currency_symbol, tld, native, continent_id, subregion_id, nationality, latitude, longitude, emoji, emoji_u, timezones, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return stmt;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transform states INSERT statements to match migration schema
|
||||||
|
* The SQL dump has 20 columns, migration has 15 columns.
|
||||||
|
* Instead of parsing VALUES, we add explicit column list and use a subselect to remap.
|
||||||
|
* This avoids issues with complex JSON/Unicode parsing.
|
||||||
|
*/
|
||||||
|
function transformStatesInsert(stmt) {
|
||||||
|
// Just return the original statement - we need to handle this at the table level
|
||||||
|
// by adding a view or adjusting the schema
|
||||||
|
// For now, let's try a different approach: extract each value position individually
|
||||||
|
|
||||||
|
const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
|
||||||
|
if (!valuesMatch) return stmt;
|
||||||
|
|
||||||
|
const valuesStr = valuesMatch[1];
|
||||||
|
|
||||||
|
// Use regex to find value positions based on comma counting outside quotes
|
||||||
|
const values = parseValues(valuesStr);
|
||||||
|
|
||||||
|
// Debug: Log if we have issues
|
||||||
|
if (values.length !== 20) {
|
||||||
|
console.log(` ⚠️ Expected 20 columns for states, got ${values.length}`);
|
||||||
|
return stmt;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dump indices: 0:id, 1:name, 2:country_id, 3:country_code, 4:fips_code, 5:iso2,
|
||||||
|
// 6:iso3166_2, 7:type, 8:level, 9:parent_id, 10:native, 11:latitude, 12:longitude,
|
||||||
|
// 13:timezone, 14:translations, 15:created_at, 16:updated_at, 17:flag, 18:wikiDataId, 19:population
|
||||||
|
//
|
||||||
|
// Migration needs: id, name, country_id, country_code, iso2, fips_code, type, latitude,
|
||||||
|
// longitude, timezone, translations, created_at, updated_at, flag, wiki_data_id
|
||||||
|
//
|
||||||
|
// Mapping: 0, 1, 2, 3, 5, 4, 7, 11, 12, 13, 14, 15, 16, 17, 18
|
||||||
|
|
||||||
|
const mapped = [
|
||||||
|
values[0], // id
|
||||||
|
values[1], // name
|
||||||
|
values[2], // country_id
|
||||||
|
values[3], // country_code
|
||||||
|
values[5], // iso2
|
||||||
|
values[4], // fips_code
|
||||||
|
values[7], // type
|
||||||
|
values[11], // latitude
|
||||||
|
values[12], // longitude
|
||||||
|
values[13], // timezone
|
||||||
|
values[14], // translations
|
||||||
|
values[15], // created_at
|
||||||
|
values[16], // updated_at
|
||||||
|
values[17], // flag
|
||||||
|
values[18] // wikiDataId -> wiki_data_id
|
||||||
|
];
|
||||||
|
|
||||||
|
return `INSERT INTO states (id, name, country_id, country_code, iso2, fips_code, type, latitude, longitude, timezone, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transform cities INSERT statements to match migration schema
|
||||||
|
* SQL dump columns (19): id, name, state_id, state_code, country_id, country_code, type, level, parent_id,
|
||||||
|
* latitude, longitude, native, population, timezone, translations, created_at, updated_at, flag, wikiDataId
|
||||||
|
* Migration columns (15): id, name, state_id, state_code, country_id, country_code, latitude, longitude,
|
||||||
|
* population, timezone, translations, created_at, updated_at, flag, wiki_data_id
|
||||||
|
*/
|
||||||
|
function transformCitiesInsert(stmt) {
|
||||||
|
const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
|
||||||
|
if (!valuesMatch) return stmt;
|
||||||
|
|
||||||
|
const valuesStr = valuesMatch[1];
|
||||||
|
const values = parseValues(valuesStr);
|
||||||
|
|
||||||
|
if (values.length !== 19) {
|
||||||
|
// Skip transformation if column count doesn't match expected
|
||||||
|
return stmt;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dump indices: 0:id, 1:name, 2:state_id, 3:state_code, 4:country_id, 5:country_code,
|
||||||
|
// 6:type, 7:level, 8:parent_id, 9:latitude, 10:longitude, 11:native, 12:population,
|
||||||
|
// 13:timezone, 14:translations, 15:created_at, 16:updated_at, 17:flag, 18:wikiDataId
|
||||||
|
//
|
||||||
|
// Migration needs: id, name, state_id, state_code, country_id, country_code, latitude,
|
||||||
|
// longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id
|
||||||
|
//
|
||||||
|
// Mapping: 0, 1, 2, 3, 4, 5, 9, 10, 12, 13, 14, 15, 16, 17, 18
|
||||||
|
|
||||||
|
const mapped = [
|
||||||
|
values[0], // id
|
||||||
|
values[1], // name
|
||||||
|
values[2], // state_id
|
||||||
|
values[3], // state_code
|
||||||
|
values[4], // country_id
|
||||||
|
values[5], // country_code
|
||||||
|
values[9], // latitude
|
||||||
|
values[10], // longitude
|
||||||
|
values[12], // population
|
||||||
|
values[13], // timezone
|
||||||
|
values[14], // translations
|
||||||
|
values[15], // created_at
|
||||||
|
values[16], // updated_at
|
||||||
|
values[17], // flag
|
||||||
|
values[18] // wikiDataId -> wiki_data_id
|
||||||
|
];
|
||||||
|
|
||||||
|
return `INSERT INTO cities (id, name, state_id, state_code, country_id, country_code, latitude, longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to parse VALUES string with proper handling of nested JSON and quoted strings
|
||||||
|
* This handles SQL values like: 1, 'name', '{"br": "value"}', NULL
|
||||||
|
*/
|
||||||
|
function parseValues(valuesStr) {
|
||||||
|
const values = [];
|
||||||
|
let current = '';
|
||||||
|
let depth = 0;
|
||||||
|
let inString = false;
|
||||||
|
let stringChar = '';
|
||||||
|
|
||||||
|
for (let i = 0; i < valuesStr.length; i++) {
|
||||||
|
const char = valuesStr[i];
|
||||||
|
const prevChar = i > 0 ? valuesStr[i - 1] : '';
|
||||||
|
|
||||||
|
// Only start a new string if we're not already in one
|
||||||
|
if (!inString && (char === "'")) {
|
||||||
|
inString = true;
|
||||||
|
stringChar = char;
|
||||||
|
} else if (inString && char === stringChar && prevChar !== '\\') {
|
||||||
|
// Check for escaped quotes (doubled like '' in SQL)
|
||||||
|
if (valuesStr[i + 1] === stringChar) {
|
||||||
|
current += char;
|
||||||
|
i++; // Skip next quote
|
||||||
|
} else {
|
||||||
|
inString = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track depth for parentheses/brackets (only outside strings)
|
||||||
|
if (!inString) {
|
||||||
|
if (char === '(' || char === '{' || char === '[') depth++;
|
||||||
|
if (char === ')' || char === '}' || char === ']') depth--;
|
||||||
|
|
||||||
|
if (char === ',' && depth === 0) {
|
||||||
|
values.push(current.trim());
|
||||||
|
current = '';
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
current += char;
|
||||||
|
}
|
||||||
|
if (current.trim()) values.push(current.trim());
|
||||||
|
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute a SQL file directly
|
* Execute a SQL file directly
|
||||||
*/
|
*/
|
||||||
|
|
@ -28,14 +280,16 @@ async function executeSqlFile(filename, tableName) {
|
||||||
let sql = readFileSync(filePath, 'utf8');
|
let sql = readFileSync(filePath, 'utf8');
|
||||||
|
|
||||||
// Clean up postgres-specific commands that might cause issues
|
// Clean up postgres-specific commands that might cause issues
|
||||||
|
// These need to match ONLY standalone commands, not content inside VALUES
|
||||||
sql = sql
|
sql = sql
|
||||||
.replace(/\\restrict[^\n]*/g, '')
|
.replace(/\\restrict[^\n]*/g, '')
|
||||||
.replace(/\\unrestrict[^\n]*/g, '')
|
.replace(/\\unrestrict[^\n]*/g, '')
|
||||||
.replace(/SELECT pg_catalog\.setval[^;]*;/g, '')
|
.replace(/^SELECT pg_catalog\.setval[^;]*;/gm, '')
|
||||||
.replace(/ALTER TABLE[^;]*OWNER TO[^;]*;/g, '')
|
.replace(/^ALTER TABLE[^;]*OWNER TO[^;]*;/gm, '')
|
||||||
.replace(/COMMENT ON[^;]*;/g, '')
|
.replace(/^COMMENT ON[^;]*;/gm, '')
|
||||||
.replace(/SET[^;]*;/g, '')
|
.replace(/^SET [a-z_]+\s*=/gmi, (match) => '-- ' + match) // Comment out SET statements
|
||||||
.replace(/SELECT[^;]*set_config[^;]*;/g, '');
|
.replace(/^SET [a-z_]+;$/gmi, (match) => '-- ' + match) // Comment out simple SET statements
|
||||||
|
.replace(/^SELECT[^;]*set_config[^;]*;/gm, '');
|
||||||
|
|
||||||
// Extract only INSERT statements
|
// Extract only INSERT statements
|
||||||
const insertStatements = sql.match(/INSERT INTO[^;]+;/g) || [];
|
const insertStatements = sql.match(/INSERT INTO[^;]+;/g) || [];
|
||||||
|
|
@ -58,6 +312,21 @@ async function executeSqlFile(filename, tableName) {
|
||||||
pgStmt = pgStmt.replace(new RegExp(`INSERT INTO ${oldName}`, 'gi'), `INSERT INTO ${newName}`);
|
pgStmt = pgStmt.replace(new RegExp(`INSERT INTO ${oldName}`, 'gi'), `INSERT INTO ${newName}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply special transformations for subregions (column reordering)
|
||||||
|
if (pgStmt.includes('INSERT INTO subregions')) {
|
||||||
|
pgStmt = transformSubregionsInsert(pgStmt);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply special transformations for countries (32 cols -> 25 cols)
|
||||||
|
if (pgStmt.includes('INSERT INTO countries') || pgStmt.includes('INSERT INTO public.countries')) {
|
||||||
|
pgStmt = transformCountriesInsert(pgStmt);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply special transformations for states (20 cols -> 15 cols)
|
||||||
|
if (pgStmt.includes('INSERT INTO states') || pgStmt.includes('INSERT INTO public.states')) {
|
||||||
|
pgStmt = transformStatesInsert(pgStmt);
|
||||||
|
}
|
||||||
|
|
||||||
await pool.query(pgStmt);
|
await pool.query(pgStmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -93,15 +362,17 @@ async function executeGzippedSqlFile(filename, tableName) {
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
// Clean up postgres-specific commands
|
// Clean up postgres-specific commands that might cause issues
|
||||||
|
// These need to match ONLY standalone commands, not content inside VALUES
|
||||||
sql = sql
|
sql = sql
|
||||||
.replace(/\\restrict[^\n]*/g, '')
|
.replace(/\\restrict[^\n]*/g, '')
|
||||||
.replace(/\\unrestrict[^\n]*/g, '')
|
.replace(/\\unrestrict[^\n]*/g, '')
|
||||||
.replace(/SELECT pg_catalog\.setval[^;]*;/g, '')
|
.replace(/^SELECT pg_catalog\.setval[^;]*;/gm, '')
|
||||||
.replace(/ALTER TABLE[^;]*OWNER TO[^;]*;/g, '')
|
.replace(/^ALTER TABLE[^;]*OWNER TO[^;]*;/gm, '')
|
||||||
.replace(/COMMENT ON[^;]*;/g, '')
|
.replace(/^COMMENT ON[^;]*;/gm, '')
|
||||||
.replace(/SET[^;]*;/g, '')
|
.replace(/^SET [a-z_]+\s*=/gmi, (match) => '-- ' + match)
|
||||||
.replace(/SELECT[^;]*set_config[^;]*;/g, '');
|
.replace(/^SET [a-z_]+;$/gmi, (match) => '-- ' + match)
|
||||||
|
.replace(/^SELECT[^;]*set_config[^;]*;/gm, '');
|
||||||
|
|
||||||
// Extract only INSERT statements
|
// Extract only INSERT statements
|
||||||
const insertStatements = sql.match(/INSERT INTO[^;]+;/g) || [];
|
const insertStatements = sql.match(/INSERT INTO[^;]+;/g) || [];
|
||||||
|
|
@ -122,6 +393,12 @@ async function executeGzippedSqlFile(filename, tableName) {
|
||||||
.replace(/`/g, '"')
|
.replace(/`/g, '"')
|
||||||
.replace(/"emojiU"/g, 'emoji_u')
|
.replace(/"emojiU"/g, 'emoji_u')
|
||||||
.replace(/"wikiDataId"/g, 'wiki_data_id');
|
.replace(/"wikiDataId"/g, 'wiki_data_id');
|
||||||
|
|
||||||
|
// Apply special transformations for cities (19 cols -> 15 cols)
|
||||||
|
if (pgStmt.includes('INSERT INTO cities') || pgStmt.includes('INSERT INTO public.cities')) {
|
||||||
|
pgStmt = transformCitiesInsert(pgStmt);
|
||||||
|
}
|
||||||
|
|
||||||
await pool.query(pgStmt);
|
await pool.query(pgStmt);
|
||||||
}
|
}
|
||||||
if ((i + BATCH_SIZE) % 10000 === 0 || i + BATCH_SIZE >= insertStatements.length) {
|
if ((i + BATCH_SIZE) % 10000 === 0 || i + BATCH_SIZE >= insertStatements.length) {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue