const fs = require("fs"); const path = require("path"); const https = require("https"); const { parse } = require("csv-parse/sync"); const START_TOP_YEAR = 1970; const END_TOP_YEAR = 1991; const START_FULL_YEAR = 1992; const END_YEAR = 2026; const OUTPUT_PATH = path.join(__dirname, "..", "components", "data", "vehicleCatalog.json"); const EXTRA_MAKES = [ "AMC", "Alfa Romeo", "Audi", "Buick", "Cadillac", "Chevrolet", "Chrysler", "Datsun", "Dodge", "Fiat", "Ford", "GMC", "Honda", "Jeep", "Land Rover", "Lincoln", "Mazda", "Mercedes-Benz", "Mercury", "Nissan", "Oldsmobile", "Peugeot", "Plymouth", "Pontiac", "Renault", "Saab", "Subaru", "Toyota", "Volkswagen", "Volvo", ]; function fetchUrl(url) { return new Promise((resolve, reject) => { https .get(url, (res) => { if (res.statusCode && res.statusCode >= 400) { reject(new Error(`HTTP ${res.statusCode} for ${url}`)); res.resume(); return; } let data = ""; res.on("data", (chunk) => { data += chunk; }); res.on("end", () => resolve(data)); }) .on("error", reject); }); } function normalizeSpaces(value) { return value.replace(/\s+/g, " ").trim(); } function tokenize(value) { return normalizeSpaces(value) .replace(/[.,;:]/g, " ") .replace(/\.+/g, " ") .replace(/&/gi, "&") .replace(/[()]/g, " ") .split(/\s+/) .filter(Boolean); } function buildMakeMatchers(makes) { const uniqueMakes = Array.from(new Set(makes)); uniqueMakes.sort((a, b) => b.length - a.length); return uniqueMakes.map((make) => ({ make, tokens: make.toLowerCase().split(/\s+/), })); } function parseTopList(description, makeMatchers) { if (!description) return []; const lower = description.toLowerCase(); const marker = "sold:"; const idx = lower.indexOf(marker); if (idx === -1) return []; let list = description.slice(idx + marker.length).trim(); list = list.split("...")[0]; list = list.replace(/\.$/, ""); const rawTokens = tokenize(list); const lowerTokens = rawTokens.map((t) => t.toLowerCase()); const results = []; let i = 0; const matchMakeAt = (start) => { for (const entry of makeMatchers) { const { tokens } = entry; if (start + tokens.length > lowerTokens.length) continue; let matched = true; for (let j = 0; j < tokens.length; j += 1) { if (lowerTokens[start + j] !== tokens[j]) { matched = false; break; } } if (matched) return entry; } return null; }; while (i < lowerTokens.length) { const match = matchMakeAt(i); if (!match) { i += 1; continue; } const make = match.make; const startModel = i + match.tokens.length; let endModel = startModel; while (endModel < lowerTokens.length) { if (matchMakeAt(endModel)) break; endModel += 1; } const modelTokens = rawTokens.slice(startModel, endModel); const model = normalizeSpaces(modelTokens.join(" ")); if (model) { results.push({ make, model }); } i = endModel; } return results; } async function loadFullList() { const rows = []; for (let year = START_FULL_YEAR; year <= END_YEAR; year += 1) { const url = `https://raw.githubusercontent.com/abhionlyone/us-car-models-data/master/${year}.csv`; // eslint-disable-next-line no-console console.log(`Downloading ${url}`); const csv = await fetchUrl(url); const records = parse(csv, { columns: true, skip_empty_lines: true }); for (const record of records) { const make = String(record.make || "").trim(); const model = String(record.model || "").trim(); if (!make || !model) continue; rows.push({ year, make, model, trim: "Base" }); } } return rows; } async function loadTopList(makeMatchers) { const rows = []; for (let year = START_TOP_YEAR; year <= END_TOP_YEAR; year += 1) { const url = `https://carorigins.com/top-12-cars-of-${year}/`; // eslint-disable-next-line no-console console.log(`Downloading ${url}`); let html = ""; try { html = await fetchUrl(url); } catch (err) { // eslint-disable-next-line no-console console.warn(`Skipping ${year}: ${err.message}`); continue; } const match = html.match(/]+property=\"og:description\"[^>]+content=\"([^\"]+)\"/i); if (!match) { // eslint-disable-next-line no-console console.warn(`Skipping ${year}: og:description not found`); continue; } const description = match[1]; const vehicles = parseTopList(description, makeMatchers); for (const vehicle of vehicles) { rows.push({ year, make: vehicle.make, model: vehicle.model, trim: "Base" }); } } return rows; } async function main() { const fullRows = await loadFullList(); const makesFromFull = fullRows.map((row) => row.make); const makeMatchers = buildMakeMatchers([...makesFromFull, ...EXTRA_MAKES]); const topRows = await loadTopList(makeMatchers); const allRows = [...topRows, ...fullRows]; const seen = new Set(); const deduped = []; for (const row of allRows) { const key = `${row.year}|${row.make}|${row.model}|${row.trim}`; if (seen.has(key)) continue; seen.add(key); deduped.push(row); } deduped.sort((a, b) => { if (a.year !== b.year) return a.year - b.year; if (a.make !== b.make) return a.make.localeCompare(b.make); if (a.model !== b.model) return a.model.localeCompare(b.model); return a.trim.localeCompare(b.trim); }); fs.writeFileSync(OUTPUT_PATH, JSON.stringify(deduped, null, 2)); // eslint-disable-next-line no-console console.log(`Wrote ${deduped.length} rows to ${OUTPUT_PATH}`); } main().catch((err) => { // eslint-disable-next-line no-console console.error(err); process.exit(1); });