Files
Shifted/scripts/build-vehicle-catalog.js
2026-02-10 01:14:19 +00:00

238 lines
5.8 KiB
JavaScript

const fs = require("fs");
const path = require("path");
const https = require("https");
const { parse } = require("csv-parse/sync");
const START_TOP_YEAR = 1970;
const END_TOP_YEAR = 1991;
const START_FULL_YEAR = 1992;
const END_YEAR = 2026;
const OUTPUT_PATH = path.join(__dirname, "..", "components", "data", "vehicleCatalog.json");
const EXTRA_MAKES = [
"AMC",
"Alfa Romeo",
"Audi",
"Buick",
"Cadillac",
"Chevrolet",
"Chrysler",
"Datsun",
"Dodge",
"Fiat",
"Ford",
"GMC",
"Honda",
"Jeep",
"Land Rover",
"Lincoln",
"Mazda",
"Mercedes-Benz",
"Mercury",
"Nissan",
"Oldsmobile",
"Peugeot",
"Plymouth",
"Pontiac",
"Renault",
"Saab",
"Subaru",
"Toyota",
"Volkswagen",
"Volvo",
];
function fetchUrl(url) {
return new Promise((resolve, reject) => {
https
.get(url, (res) => {
if (res.statusCode && res.statusCode >= 400) {
reject(new Error(`HTTP ${res.statusCode} for ${url}`));
res.resume();
return;
}
let data = "";
res.on("data", (chunk) => {
data += chunk;
});
res.on("end", () => resolve(data));
})
.on("error", reject);
});
}
function normalizeSpaces(value) {
return value.replace(/\s+/g, " ").trim();
}
function tokenize(value) {
return normalizeSpaces(value)
.replace(/[.,;:]/g, " ")
.replace(/\.+/g, " ")
.replace(/&/gi, "&")
.replace(/[()]/g, " ")
.split(/\s+/)
.filter(Boolean);
}
function buildMakeMatchers(makes) {
const uniqueMakes = Array.from(new Set(makes));
uniqueMakes.sort((a, b) => b.length - a.length);
return uniqueMakes.map((make) => ({
make,
tokens: make.toLowerCase().split(/\s+/),
}));
}
function parseTopList(description, makeMatchers) {
if (!description) return [];
const lower = description.toLowerCase();
const marker = "sold:";
const idx = lower.indexOf(marker);
if (idx === -1) return [];
let list = description.slice(idx + marker.length).trim();
list = list.split("...")[0];
list = list.replace(/\.$/, "");
const rawTokens = tokenize(list);
const lowerTokens = rawTokens.map((t) => t.toLowerCase());
const results = [];
let i = 0;
const matchMakeAt = (start) => {
for (const entry of makeMatchers) {
const { tokens } = entry;
if (start + tokens.length > lowerTokens.length) continue;
let matched = true;
for (let j = 0; j < tokens.length; j += 1) {
if (lowerTokens[start + j] !== tokens[j]) {
matched = false;
break;
}
}
if (matched) return entry;
}
return null;
};
while (i < lowerTokens.length) {
const match = matchMakeAt(i);
if (!match) {
i += 1;
continue;
}
const make = match.make;
const startModel = i + match.tokens.length;
let endModel = startModel;
while (endModel < lowerTokens.length) {
if (matchMakeAt(endModel)) break;
endModel += 1;
}
const modelTokens = rawTokens.slice(startModel, endModel);
const model = normalizeSpaces(modelTokens.join(" "));
if (model) {
results.push({ make, model });
}
i = endModel;
}
return results;
}
async function loadFullList() {
const rows = [];
for (let year = START_FULL_YEAR; year <= END_YEAR; year += 1) {
const url = `https://raw.githubusercontent.com/abhionlyone/us-car-models-data/master/${year}.csv`;
// eslint-disable-next-line no-console
console.log(`Downloading ${url}`);
const csv = await fetchUrl(url);
const records = parse(csv, { columns: true, skip_empty_lines: true });
for (const record of records) {
const make = String(record.make || "").trim();
const model = String(record.model || "").trim();
if (!make || !model) continue;
rows.push({ year, make, model, trim: "Base" });
}
}
return rows;
}
async function loadTopList(makeMatchers) {
const rows = [];
for (let year = START_TOP_YEAR; year <= END_TOP_YEAR; year += 1) {
const url = `https://carorigins.com/top-12-cars-of-${year}/`;
// eslint-disable-next-line no-console
console.log(`Downloading ${url}`);
let html = "";
try {
html = await fetchUrl(url);
} catch (err) {
// eslint-disable-next-line no-console
console.warn(`Skipping ${year}: ${err.message}`);
continue;
}
const match = html.match(/<meta[^>]+property=\"og:description\"[^>]+content=\"([^\"]+)\"/i);
if (!match) {
// eslint-disable-next-line no-console
console.warn(`Skipping ${year}: og:description not found`);
continue;
}
const description = match[1];
const vehicles = parseTopList(description, makeMatchers);
for (const vehicle of vehicles) {
rows.push({ year, make: vehicle.make, model: vehicle.model, trim: "Base" });
}
}
return rows;
}
async function main() {
const fullRows = await loadFullList();
const makesFromFull = fullRows.map((row) => row.make);
const makeMatchers = buildMakeMatchers([...makesFromFull, ...EXTRA_MAKES]);
const topRows = await loadTopList(makeMatchers);
const allRows = [...topRows, ...fullRows];
const seen = new Set();
const deduped = [];
for (const row of allRows) {
const key = `${row.year}|${row.make}|${row.model}|${row.trim}`;
if (seen.has(key)) continue;
seen.add(key);
deduped.push(row);
}
deduped.sort((a, b) => {
if (a.year !== b.year) return a.year - b.year;
if (a.make !== b.make) return a.make.localeCompare(b.make);
if (a.model !== b.model) return a.model.localeCompare(b.model);
return a.trim.localeCompare(b.trim);
});
fs.writeFileSync(OUTPUT_PATH, JSON.stringify(deduped, null, 2));
// eslint-disable-next-line no-console
console.log(`Wrote ${deduped.length} rows to ${OUTPUT_PATH}`);
}
main().catch((err) => {
// eslint-disable-next-line no-console
console.error(err);
process.exit(1);
});