fix(seeder): fix location data column schema transformations
- Fixed regex patterns that incorrectly matched SET inside quoted values - Added transformSubregionsInsert() - maps 8 dump cols to schema cols - Added transformCountriesInsert() - maps 32 dump cols to 25 schema cols - Added transformStatesInsert() - maps 20 dump cols to 15 schema cols - Added transformCitiesInsert() - maps 19 dump cols to 15 schema cols - Added parseValues() helper for parsing SQL VALUES with JSON handling - Successfully seeds: continents(6), subregions(22), countries(250), states(5296)
This commit is contained in:
parent
ac84571c55
commit
7720f2e35e
1 changed files with 288 additions and 11 deletions
|
|
@ -15,8 +15,260 @@ const SQL_DIR = join(__dirname, '..', '..', 'sql');
|
|||
const TABLE_MAPPING = {
|
||||
'public.regions': 'continents',
|
||||
'regions': 'continents',
|
||||
'public.subregions': 'subregions',
|
||||
};
|
||||
|
||||
/**
|
||||
* Transform subregions INSERT statements to match migration schema
|
||||
* SQL dump: INSERT INTO subregions VALUES (id, name, translations, region_id, created_at, updated_at, flag, wikiDataId)
|
||||
* Migration: id, name, continent_id, translations, created_at, updated_at, flag, wiki_data_id
|
||||
*/
|
||||
function transformSubregionsInsert(stmt) {
|
||||
// Extract VALUES from the INSERT statement
|
||||
const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
|
||||
if (!valuesMatch) return stmt;
|
||||
|
||||
const valuesStr = valuesMatch[1];
|
||||
|
||||
// Parse values carefully (handling JSON with commas inside)
|
||||
const values = [];
|
||||
let current = '';
|
||||
let depth = 0;
|
||||
let inString = false;
|
||||
let stringChar = '';
|
||||
|
||||
for (let i = 0; i < valuesStr.length; i++) {
|
||||
const char = valuesStr[i];
|
||||
const prevChar = i > 0 ? valuesStr[i - 1] : '';
|
||||
|
||||
if (!inString && (char === "'" || char === '"')) {
|
||||
inString = true;
|
||||
stringChar = char;
|
||||
} else if (inString && char === stringChar && prevChar !== '\\') {
|
||||
// Check for escaped quotes (doubled)
|
||||
if (valuesStr[i + 1] === stringChar) {
|
||||
current += char;
|
||||
i++; // Skip next quote
|
||||
} else {
|
||||
inString = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!inString) {
|
||||
if (char === '(' || char === '{' || char === '[') depth++;
|
||||
if (char === ')' || char === '}' || char === ']') depth--;
|
||||
|
||||
if (char === ',' && depth === 0) {
|
||||
values.push(current.trim());
|
||||
current = '';
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
current += char;
|
||||
}
|
||||
if (current.trim()) values.push(current.trim());
|
||||
|
||||
// Reorder: [id, name, translations, region_id, created, updated, flag, wikiDataId]
|
||||
// To: [id, name, continent_id, translations, created, updated, flag, wiki_data_id]
|
||||
if (values.length >= 8) {
|
||||
const [id, name, translations, region_id, created, updated, flag, wikiDataId] = values;
|
||||
const reordered = [id, name, region_id, translations, created, updated, flag, wikiDataId];
|
||||
return `INSERT INTO subregions (id, name, continent_id, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${reordered.join(', ')});`;
|
||||
}
|
||||
|
||||
return stmt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform countries INSERT statements to match migration schema
|
||||
* SQL dump columns (32): id, name, iso3, numeric_code, iso2, phonecode, capital, currency, currency_name,
|
||||
* currency_symbol, tld, native, population, gdp, region, region_id, subregion, subregion_id, nationality,
|
||||
* area_sq_km, postal_code_format, postal_code_regex, timezones, translations, latitude, longitude,
|
||||
* emoji, emojiU, created_at, updated_at, flag, wikiDataId
|
||||
* Migration columns (25): id, name, iso2, iso3, numeric_code, phonecode, capital, currency, currency_name,
|
||||
* currency_symbol, tld, native, continent_id, subregion_id, nationality, latitude, longitude,
|
||||
* emoji, emoji_u, timezones, translations, created_at, updated_at, flag, wiki_data_id
|
||||
*/
|
||||
function transformCountriesInsert(stmt) {
|
||||
const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
|
||||
if (!valuesMatch) return stmt;
|
||||
|
||||
const valuesStr = valuesMatch[1];
|
||||
const values = parseValues(valuesStr);
|
||||
|
||||
if (values.length >= 32) {
|
||||
// Extract the 32 columns from dump
|
||||
const [id, name, iso3, numeric_code, iso2, phonecode, capital, currency, currency_name,
|
||||
currency_symbol, tld, native, population, gdp, region, region_id, subregion,
|
||||
subregion_id, nationality, area_sq_km, postal_code_format, postal_code_regex,
|
||||
timezones, translations, latitude, longitude, emoji, emojiU, created_at,
|
||||
updated_at, flag, wikiDataId] = values;
|
||||
|
||||
// Map to our 25-column schema (region_id becomes continent_id)
|
||||
const mapped = [id, name, iso2, iso3, numeric_code, phonecode, capital, currency,
|
||||
currency_name, currency_symbol, tld, native, region_id, subregion_id,
|
||||
nationality, latitude, longitude, emoji, emojiU, timezones, translations,
|
||||
created_at, updated_at, flag, wikiDataId];
|
||||
|
||||
return `INSERT INTO countries (id, name, iso2, iso3, numeric_code, phonecode, capital, currency, currency_name, currency_symbol, tld, native, continent_id, subregion_id, nationality, latitude, longitude, emoji, emoji_u, timezones, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`;
|
||||
}
|
||||
|
||||
return stmt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform states INSERT statements to match migration schema
|
||||
* The SQL dump has 20 columns, migration has 15 columns.
|
||||
* Instead of parsing VALUES, we add explicit column list and use a subselect to remap.
|
||||
* This avoids issues with complex JSON/Unicode parsing.
|
||||
*/
|
||||
function transformStatesInsert(stmt) {
|
||||
// Just return the original statement - we need to handle this at the table level
|
||||
// by adding a view or adjusting the schema
|
||||
// For now, let's try a different approach: extract each value position individually
|
||||
|
||||
const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
|
||||
if (!valuesMatch) return stmt;
|
||||
|
||||
const valuesStr = valuesMatch[1];
|
||||
|
||||
// Use regex to find value positions based on comma counting outside quotes
|
||||
const values = parseValues(valuesStr);
|
||||
|
||||
// Debug: Log if we have issues
|
||||
if (values.length !== 20) {
|
||||
console.log(` ⚠️ Expected 20 columns for states, got ${values.length}`);
|
||||
return stmt;
|
||||
}
|
||||
|
||||
// Dump indices: 0:id, 1:name, 2:country_id, 3:country_code, 4:fips_code, 5:iso2,
|
||||
// 6:iso3166_2, 7:type, 8:level, 9:parent_id, 10:native, 11:latitude, 12:longitude,
|
||||
// 13:timezone, 14:translations, 15:created_at, 16:updated_at, 17:flag, 18:wikiDataId, 19:population
|
||||
//
|
||||
// Migration needs: id, name, country_id, country_code, iso2, fips_code, type, latitude,
|
||||
// longitude, timezone, translations, created_at, updated_at, flag, wiki_data_id
|
||||
//
|
||||
// Mapping: 0, 1, 2, 3, 5, 4, 7, 11, 12, 13, 14, 15, 16, 17, 18
|
||||
|
||||
const mapped = [
|
||||
values[0], // id
|
||||
values[1], // name
|
||||
values[2], // country_id
|
||||
values[3], // country_code
|
||||
values[5], // iso2
|
||||
values[4], // fips_code
|
||||
values[7], // type
|
||||
values[11], // latitude
|
||||
values[12], // longitude
|
||||
values[13], // timezone
|
||||
values[14], // translations
|
||||
values[15], // created_at
|
||||
values[16], // updated_at
|
||||
values[17], // flag
|
||||
values[18] // wikiDataId -> wiki_data_id
|
||||
];
|
||||
|
||||
return `INSERT INTO states (id, name, country_id, country_code, iso2, fips_code, type, latitude, longitude, timezone, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform cities INSERT statements to match migration schema
|
||||
* SQL dump columns (19): id, name, state_id, state_code, country_id, country_code, type, level, parent_id,
|
||||
* latitude, longitude, native, population, timezone, translations, created_at, updated_at, flag, wikiDataId
|
||||
* Migration columns (15): id, name, state_id, state_code, country_id, country_code, latitude, longitude,
|
||||
* population, timezone, translations, created_at, updated_at, flag, wiki_data_id
|
||||
*/
|
||||
function transformCitiesInsert(stmt) {
|
||||
const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is);
|
||||
if (!valuesMatch) return stmt;
|
||||
|
||||
const valuesStr = valuesMatch[1];
|
||||
const values = parseValues(valuesStr);
|
||||
|
||||
if (values.length !== 19) {
|
||||
// Skip transformation if column count doesn't match expected
|
||||
return stmt;
|
||||
}
|
||||
|
||||
// Dump indices: 0:id, 1:name, 2:state_id, 3:state_code, 4:country_id, 5:country_code,
|
||||
// 6:type, 7:level, 8:parent_id, 9:latitude, 10:longitude, 11:native, 12:population,
|
||||
// 13:timezone, 14:translations, 15:created_at, 16:updated_at, 17:flag, 18:wikiDataId
|
||||
//
|
||||
// Migration needs: id, name, state_id, state_code, country_id, country_code, latitude,
|
||||
// longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id
|
||||
//
|
||||
// Mapping: 0, 1, 2, 3, 4, 5, 9, 10, 12, 13, 14, 15, 16, 17, 18
|
||||
|
||||
const mapped = [
|
||||
values[0], // id
|
||||
values[1], // name
|
||||
values[2], // state_id
|
||||
values[3], // state_code
|
||||
values[4], // country_id
|
||||
values[5], // country_code
|
||||
values[9], // latitude
|
||||
values[10], // longitude
|
||||
values[12], // population
|
||||
values[13], // timezone
|
||||
values[14], // translations
|
||||
values[15], // created_at
|
||||
values[16], // updated_at
|
||||
values[17], // flag
|
||||
values[18] // wikiDataId -> wiki_data_id
|
||||
];
|
||||
|
||||
return `INSERT INTO cities (id, name, state_id, state_code, country_id, country_code, latitude, longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to parse VALUES string with proper handling of nested JSON and quoted strings
|
||||
* This handles SQL values like: 1, 'name', '{"br": "value"}', NULL
|
||||
*/
|
||||
function parseValues(valuesStr) {
|
||||
const values = [];
|
||||
let current = '';
|
||||
let depth = 0;
|
||||
let inString = false;
|
||||
let stringChar = '';
|
||||
|
||||
for (let i = 0; i < valuesStr.length; i++) {
|
||||
const char = valuesStr[i];
|
||||
const prevChar = i > 0 ? valuesStr[i - 1] : '';
|
||||
|
||||
// Only start a new string if we're not already in one
|
||||
if (!inString && (char === "'")) {
|
||||
inString = true;
|
||||
stringChar = char;
|
||||
} else if (inString && char === stringChar && prevChar !== '\\') {
|
||||
// Check for escaped quotes (doubled like '' in SQL)
|
||||
if (valuesStr[i + 1] === stringChar) {
|
||||
current += char;
|
||||
i++; // Skip next quote
|
||||
} else {
|
||||
inString = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Track depth for parentheses/brackets (only outside strings)
|
||||
if (!inString) {
|
||||
if (char === '(' || char === '{' || char === '[') depth++;
|
||||
if (char === ')' || char === '}' || char === ']') depth--;
|
||||
|
||||
if (char === ',' && depth === 0) {
|
||||
values.push(current.trim());
|
||||
current = '';
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
current += char;
|
||||
}
|
||||
if (current.trim()) values.push(current.trim());
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a SQL file directly
|
||||
*/
|
||||
|
|
@ -28,14 +280,16 @@ async function executeSqlFile(filename, tableName) {
|
|||
let sql = readFileSync(filePath, 'utf8');
|
||||
|
||||
// Clean up postgres-specific commands that might cause issues
|
||||
// These need to match ONLY standalone commands, not content inside VALUES
|
||||
sql = sql
|
||||
.replace(/\\restrict[^\n]*/g, '')
|
||||
.replace(/\\unrestrict[^\n]*/g, '')
|
||||
.replace(/SELECT pg_catalog\.setval[^;]*;/g, '')
|
||||
.replace(/ALTER TABLE[^;]*OWNER TO[^;]*;/g, '')
|
||||
.replace(/COMMENT ON[^;]*;/g, '')
|
||||
.replace(/SET[^;]*;/g, '')
|
||||
.replace(/SELECT[^;]*set_config[^;]*;/g, '');
|
||||
.replace(/^SELECT pg_catalog\.setval[^;]*;/gm, '')
|
||||
.replace(/^ALTER TABLE[^;]*OWNER TO[^;]*;/gm, '')
|
||||
.replace(/^COMMENT ON[^;]*;/gm, '')
|
||||
.replace(/^SET [a-z_]+\s*=/gmi, (match) => '-- ' + match) // Comment out SET statements
|
||||
.replace(/^SET [a-z_]+;$/gmi, (match) => '-- ' + match) // Comment out simple SET statements
|
||||
.replace(/^SELECT[^;]*set_config[^;]*;/gm, '');
|
||||
|
||||
// Extract only INSERT statements
|
||||
const insertStatements = sql.match(/INSERT INTO[^;]+;/g) || [];
|
||||
|
|
@ -58,6 +312,21 @@ async function executeSqlFile(filename, tableName) {
|
|||
pgStmt = pgStmt.replace(new RegExp(`INSERT INTO ${oldName}`, 'gi'), `INSERT INTO ${newName}`);
|
||||
}
|
||||
|
||||
// Apply special transformations for subregions (column reordering)
|
||||
if (pgStmt.includes('INSERT INTO subregions')) {
|
||||
pgStmt = transformSubregionsInsert(pgStmt);
|
||||
}
|
||||
|
||||
// Apply special transformations for countries (32 cols -> 25 cols)
|
||||
if (pgStmt.includes('INSERT INTO countries') || pgStmt.includes('INSERT INTO public.countries')) {
|
||||
pgStmt = transformCountriesInsert(pgStmt);
|
||||
}
|
||||
|
||||
// Apply special transformations for states (20 cols -> 15 cols)
|
||||
if (pgStmt.includes('INSERT INTO states') || pgStmt.includes('INSERT INTO public.states')) {
|
||||
pgStmt = transformStatesInsert(pgStmt);
|
||||
}
|
||||
|
||||
await pool.query(pgStmt);
|
||||
}
|
||||
|
||||
|
|
@ -93,15 +362,17 @@ async function executeGzippedSqlFile(filename, tableName) {
|
|||
})
|
||||
);
|
||||
|
||||
// Clean up postgres-specific commands
|
||||
// Clean up postgres-specific commands that might cause issues
|
||||
// These need to match ONLY standalone commands, not content inside VALUES
|
||||
sql = sql
|
||||
.replace(/\\restrict[^\n]*/g, '')
|
||||
.replace(/\\unrestrict[^\n]*/g, '')
|
||||
.replace(/SELECT pg_catalog\.setval[^;]*;/g, '')
|
||||
.replace(/ALTER TABLE[^;]*OWNER TO[^;]*;/g, '')
|
||||
.replace(/COMMENT ON[^;]*;/g, '')
|
||||
.replace(/SET[^;]*;/g, '')
|
||||
.replace(/SELECT[^;]*set_config[^;]*;/g, '');
|
||||
.replace(/^SELECT pg_catalog\.setval[^;]*;/gm, '')
|
||||
.replace(/^ALTER TABLE[^;]*OWNER TO[^;]*;/gm, '')
|
||||
.replace(/^COMMENT ON[^;]*;/gm, '')
|
||||
.replace(/^SET [a-z_]+\s*=/gmi, (match) => '-- ' + match)
|
||||
.replace(/^SET [a-z_]+;$/gmi, (match) => '-- ' + match)
|
||||
.replace(/^SELECT[^;]*set_config[^;]*;/gm, '');
|
||||
|
||||
// Extract only INSERT statements
|
||||
const insertStatements = sql.match(/INSERT INTO[^;]+;/g) || [];
|
||||
|
|
@ -122,6 +393,12 @@ async function executeGzippedSqlFile(filename, tableName) {
|
|||
.replace(/`/g, '"')
|
||||
.replace(/"emojiU"/g, 'emoji_u')
|
||||
.replace(/"wikiDataId"/g, 'wiki_data_id');
|
||||
|
||||
// Apply special transformations for cities (19 cols -> 15 cols)
|
||||
if (pgStmt.includes('INSERT INTO cities') || pgStmt.includes('INSERT INTO public.cities')) {
|
||||
pgStmt = transformCitiesInsert(pgStmt);
|
||||
}
|
||||
|
||||
await pool.query(pgStmt);
|
||||
}
|
||||
if ((i + BATCH_SIZE) % 10000 === 0 || i + BATCH_SIZE >= insertStatements.length) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue