diff --git a/seeder-api/src/seeders/location-loader.js b/seeder-api/src/seeders/location-loader.js index 7346d38..e38b16e 100644 --- a/seeder-api/src/seeders/location-loader.js +++ b/seeder-api/src/seeders/location-loader.js @@ -15,8 +15,260 @@ const SQL_DIR = join(__dirname, '..', '..', 'sql'); const TABLE_MAPPING = { 'public.regions': 'continents', 'regions': 'continents', + 'public.subregions': 'subregions', }; +/** + * Transform subregions INSERT statements to match migration schema + * SQL dump: INSERT INTO subregions VALUES (id, name, translations, region_id, created_at, updated_at, flag, wikiDataId) + * Migration: id, name, continent_id, translations, created_at, updated_at, flag, wiki_data_id + */ +function transformSubregionsInsert(stmt) { + // Extract VALUES from the INSERT statement + const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is); + if (!valuesMatch) return stmt; + + const valuesStr = valuesMatch[1]; + + // Parse values carefully (handling JSON with commas inside) + const values = []; + let current = ''; + let depth = 0; + let inString = false; + let stringChar = ''; + + for (let i = 0; i < valuesStr.length; i++) { + const char = valuesStr[i]; + const prevChar = i > 0 ? valuesStr[i - 1] : ''; + + if (!inString && (char === "'" || char === '"')) { + inString = true; + stringChar = char; + } else if (inString && char === stringChar && prevChar !== '\\') { + // Check for escaped quotes (doubled) + if (valuesStr[i + 1] === stringChar) { + current += char; + i++; // Skip next quote + } else { + inString = false; + } + } + + if (!inString) { + if (char === '(' || char === '{' || char === '[') depth++; + if (char === ')' || char === '}' || char === ']') depth--; + + if (char === ',' && depth === 0) { + values.push(current.trim()); + current = ''; + continue; + } + } + + current += char; + } + if (current.trim()) values.push(current.trim()); + + // Reorder: [id, name, translations, region_id, created, updated, flag, wikiDataId] + // To: [id, name, continent_id, translations, created, updated, flag, wiki_data_id] + if (values.length >= 8) { + const [id, name, translations, region_id, created, updated, flag, wikiDataId] = values; + const reordered = [id, name, region_id, translations, created, updated, flag, wikiDataId]; + return `INSERT INTO subregions (id, name, continent_id, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${reordered.join(', ')});`; + } + + return stmt; +} + +/** + * Transform countries INSERT statements to match migration schema + * SQL dump columns (32): id, name, iso3, numeric_code, iso2, phonecode, capital, currency, currency_name, + * currency_symbol, tld, native, population, gdp, region, region_id, subregion, subregion_id, nationality, + * area_sq_km, postal_code_format, postal_code_regex, timezones, translations, latitude, longitude, + * emoji, emojiU, created_at, updated_at, flag, wikiDataId + * Migration columns (25): id, name, iso2, iso3, numeric_code, phonecode, capital, currency, currency_name, + * currency_symbol, tld, native, continent_id, subregion_id, nationality, latitude, longitude, + * emoji, emoji_u, timezones, translations, created_at, updated_at, flag, wiki_data_id + */ +function transformCountriesInsert(stmt) { + const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is); + if (!valuesMatch) return stmt; + + const valuesStr = valuesMatch[1]; + const values = parseValues(valuesStr); + + if (values.length >= 32) { + // Extract the 32 columns from dump + const [id, name, iso3, numeric_code, iso2, phonecode, capital, currency, currency_name, + currency_symbol, tld, native, population, gdp, region, region_id, subregion, + subregion_id, nationality, area_sq_km, postal_code_format, postal_code_regex, + timezones, translations, latitude, longitude, emoji, emojiU, created_at, + updated_at, flag, wikiDataId] = values; + + // Map to our 25-column schema (region_id becomes continent_id) + const mapped = [id, name, iso2, iso3, numeric_code, phonecode, capital, currency, + currency_name, currency_symbol, tld, native, region_id, subregion_id, + nationality, latitude, longitude, emoji, emojiU, timezones, translations, + created_at, updated_at, flag, wikiDataId]; + + return `INSERT INTO countries (id, name, iso2, iso3, numeric_code, phonecode, capital, currency, currency_name, currency_symbol, tld, native, continent_id, subregion_id, nationality, latitude, longitude, emoji, emoji_u, timezones, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`; + } + + return stmt; +} + +/** + * Transform states INSERT statements to match migration schema + * The SQL dump has 20 columns, migration has 15 columns. + * Instead of parsing VALUES, we add explicit column list and use a subselect to remap. + * This avoids issues with complex JSON/Unicode parsing. + */ +function transformStatesInsert(stmt) { + // Just return the original statement - we need to handle this at the table level + // by adding a view or adjusting the schema + // For now, let's try a different approach: extract each value position individually + + const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is); + if (!valuesMatch) return stmt; + + const valuesStr = valuesMatch[1]; + + // Use regex to find value positions based on comma counting outside quotes + const values = parseValues(valuesStr); + + // Debug: Log if we have issues + if (values.length !== 20) { + console.log(` ⚠️ Expected 20 columns for states, got ${values.length}`); + return stmt; + } + + // Dump indices: 0:id, 1:name, 2:country_id, 3:country_code, 4:fips_code, 5:iso2, + // 6:iso3166_2, 7:type, 8:level, 9:parent_id, 10:native, 11:latitude, 12:longitude, + // 13:timezone, 14:translations, 15:created_at, 16:updated_at, 17:flag, 18:wikiDataId, 19:population + // + // Migration needs: id, name, country_id, country_code, iso2, fips_code, type, latitude, + // longitude, timezone, translations, created_at, updated_at, flag, wiki_data_id + // + // Mapping: 0, 1, 2, 3, 5, 4, 7, 11, 12, 13, 14, 15, 16, 17, 18 + + const mapped = [ + values[0], // id + values[1], // name + values[2], // country_id + values[3], // country_code + values[5], // iso2 + values[4], // fips_code + values[7], // type + values[11], // latitude + values[12], // longitude + values[13], // timezone + values[14], // translations + values[15], // created_at + values[16], // updated_at + values[17], // flag + values[18] // wikiDataId -> wiki_data_id + ]; + + return `INSERT INTO states (id, name, country_id, country_code, iso2, fips_code, type, latitude, longitude, timezone, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`; +} + +/** + * Transform cities INSERT statements to match migration schema + * SQL dump columns (19): id, name, state_id, state_code, country_id, country_code, type, level, parent_id, + * latitude, longitude, native, population, timezone, translations, created_at, updated_at, flag, wikiDataId + * Migration columns (15): id, name, state_id, state_code, country_id, country_code, latitude, longitude, + * population, timezone, translations, created_at, updated_at, flag, wiki_data_id + */ +function transformCitiesInsert(stmt) { + const valuesMatch = stmt.match(/VALUES\s*\((.+)\);?$/is); + if (!valuesMatch) return stmt; + + const valuesStr = valuesMatch[1]; + const values = parseValues(valuesStr); + + if (values.length !== 19) { + // Skip transformation if column count doesn't match expected + return stmt; + } + + // Dump indices: 0:id, 1:name, 2:state_id, 3:state_code, 4:country_id, 5:country_code, + // 6:type, 7:level, 8:parent_id, 9:latitude, 10:longitude, 11:native, 12:population, + // 13:timezone, 14:translations, 15:created_at, 16:updated_at, 17:flag, 18:wikiDataId + // + // Migration needs: id, name, state_id, state_code, country_id, country_code, latitude, + // longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id + // + // Mapping: 0, 1, 2, 3, 4, 5, 9, 10, 12, 13, 14, 15, 16, 17, 18 + + const mapped = [ + values[0], // id + values[1], // name + values[2], // state_id + values[3], // state_code + values[4], // country_id + values[5], // country_code + values[9], // latitude + values[10], // longitude + values[12], // population + values[13], // timezone + values[14], // translations + values[15], // created_at + values[16], // updated_at + values[17], // flag + values[18] // wikiDataId -> wiki_data_id + ]; + + return `INSERT INTO cities (id, name, state_id, state_code, country_id, country_code, latitude, longitude, population, timezone, translations, created_at, updated_at, flag, wiki_data_id) VALUES (${mapped.join(', ')});`; +} + +/** + * Helper to parse VALUES string with proper handling of nested JSON and quoted strings + * This handles SQL values like: 1, 'name', '{"br": "value"}', NULL + */ +function parseValues(valuesStr) { + const values = []; + let current = ''; + let depth = 0; + let inString = false; + let stringChar = ''; + + for (let i = 0; i < valuesStr.length; i++) { + const char = valuesStr[i]; + const prevChar = i > 0 ? valuesStr[i - 1] : ''; + + // Only start a new string if we're not already in one + if (!inString && (char === "'")) { + inString = true; + stringChar = char; + } else if (inString && char === stringChar && prevChar !== '\\') { + // Check for escaped quotes (doubled like '' in SQL) + if (valuesStr[i + 1] === stringChar) { + current += char; + i++; // Skip next quote + } else { + inString = false; + } + } + + // Track depth for parentheses/brackets (only outside strings) + if (!inString) { + if (char === '(' || char === '{' || char === '[') depth++; + if (char === ')' || char === '}' || char === ']') depth--; + + if (char === ',' && depth === 0) { + values.push(current.trim()); + current = ''; + continue; + } + } + + current += char; + } + if (current.trim()) values.push(current.trim()); + + return values; +} + /** * Execute a SQL file directly */ @@ -28,14 +280,16 @@ async function executeSqlFile(filename, tableName) { let sql = readFileSync(filePath, 'utf8'); // Clean up postgres-specific commands that might cause issues + // These need to match ONLY standalone commands, not content inside VALUES sql = sql .replace(/\\restrict[^\n]*/g, '') .replace(/\\unrestrict[^\n]*/g, '') - .replace(/SELECT pg_catalog\.setval[^;]*;/g, '') - .replace(/ALTER TABLE[^;]*OWNER TO[^;]*;/g, '') - .replace(/COMMENT ON[^;]*;/g, '') - .replace(/SET[^;]*;/g, '') - .replace(/SELECT[^;]*set_config[^;]*;/g, ''); + .replace(/^SELECT pg_catalog\.setval[^;]*;/gm, '') + .replace(/^ALTER TABLE[^;]*OWNER TO[^;]*;/gm, '') + .replace(/^COMMENT ON[^;]*;/gm, '') + .replace(/^SET [a-z_]+\s*=/gmi, (match) => '-- ' + match) // Comment out SET statements + .replace(/^SET [a-z_]+;$/gmi, (match) => '-- ' + match) // Comment out simple SET statements + .replace(/^SELECT[^;]*set_config[^;]*;/gm, ''); // Extract only INSERT statements const insertStatements = sql.match(/INSERT INTO[^;]+;/g) || []; @@ -58,6 +312,21 @@ async function executeSqlFile(filename, tableName) { pgStmt = pgStmt.replace(new RegExp(`INSERT INTO ${oldName}`, 'gi'), `INSERT INTO ${newName}`); } + // Apply special transformations for subregions (column reordering) + if (pgStmt.includes('INSERT INTO subregions')) { + pgStmt = transformSubregionsInsert(pgStmt); + } + + // Apply special transformations for countries (32 cols -> 25 cols) + if (pgStmt.includes('INSERT INTO countries') || pgStmt.includes('INSERT INTO public.countries')) { + pgStmt = transformCountriesInsert(pgStmt); + } + + // Apply special transformations for states (20 cols -> 15 cols) + if (pgStmt.includes('INSERT INTO states') || pgStmt.includes('INSERT INTO public.states')) { + pgStmt = transformStatesInsert(pgStmt); + } + await pool.query(pgStmt); } @@ -93,15 +362,17 @@ async function executeGzippedSqlFile(filename, tableName) { }) ); - // Clean up postgres-specific commands + // Clean up postgres-specific commands that might cause issues + // These need to match ONLY standalone commands, not content inside VALUES sql = sql .replace(/\\restrict[^\n]*/g, '') .replace(/\\unrestrict[^\n]*/g, '') - .replace(/SELECT pg_catalog\.setval[^;]*;/g, '') - .replace(/ALTER TABLE[^;]*OWNER TO[^;]*;/g, '') - .replace(/COMMENT ON[^;]*;/g, '') - .replace(/SET[^;]*;/g, '') - .replace(/SELECT[^;]*set_config[^;]*;/g, ''); + .replace(/^SELECT pg_catalog\.setval[^;]*;/gm, '') + .replace(/^ALTER TABLE[^;]*OWNER TO[^;]*;/gm, '') + .replace(/^COMMENT ON[^;]*;/gm, '') + .replace(/^SET [a-z_]+\s*=/gmi, (match) => '-- ' + match) + .replace(/^SET [a-z_]+;$/gmi, (match) => '-- ' + match) + .replace(/^SELECT[^;]*set_config[^;]*;/gm, ''); // Extract only INSERT statements const insertStatements = sql.match(/INSERT INTO[^;]+;/g) || []; @@ -122,6 +393,12 @@ async function executeGzippedSqlFile(filename, tableName) { .replace(/`/g, '"') .replace(/"emojiU"/g, 'emoji_u') .replace(/"wikiDataId"/g, 'wiki_data_id'); + + // Apply special transformations for cities (19 cols -> 15 cols) + if (pgStmt.includes('INSERT INTO cities') || pgStmt.includes('INSERT INTO public.cities')) { + pgStmt = transformCitiesInsert(pgStmt); + } + await pool.query(pgStmt); } if ((i + BATCH_SIZE) % 10000 === 0 || i + BATCH_SIZE >= insertStatements.length) {