238 lines
5.8 KiB
JavaScript
238 lines
5.8 KiB
JavaScript
const fs = require("fs");
|
|
const path = require("path");
|
|
const https = require("https");
|
|
const { parse } = require("csv-parse/sync");
|
|
|
|
const START_TOP_YEAR = 1970;
|
|
const END_TOP_YEAR = 1991;
|
|
const START_FULL_YEAR = 1992;
|
|
const END_YEAR = 2026;
|
|
|
|
const OUTPUT_PATH = path.join(__dirname, "..", "components", "data", "vehicleCatalog.json");
|
|
|
|
const EXTRA_MAKES = [
|
|
"AMC",
|
|
"Alfa Romeo",
|
|
"Audi",
|
|
"Buick",
|
|
"Cadillac",
|
|
"Chevrolet",
|
|
"Chrysler",
|
|
"Datsun",
|
|
"Dodge",
|
|
"Fiat",
|
|
"Ford",
|
|
"GMC",
|
|
"Honda",
|
|
"Jeep",
|
|
"Land Rover",
|
|
"Lincoln",
|
|
"Mazda",
|
|
"Mercedes-Benz",
|
|
"Mercury",
|
|
"Nissan",
|
|
"Oldsmobile",
|
|
"Peugeot",
|
|
"Plymouth",
|
|
"Pontiac",
|
|
"Renault",
|
|
"Saab",
|
|
"Subaru",
|
|
"Toyota",
|
|
"Volkswagen",
|
|
"Volvo",
|
|
];
|
|
|
|
function fetchUrl(url) {
|
|
return new Promise((resolve, reject) => {
|
|
https
|
|
.get(url, (res) => {
|
|
if (res.statusCode && res.statusCode >= 400) {
|
|
reject(new Error(`HTTP ${res.statusCode} for ${url}`));
|
|
res.resume();
|
|
return;
|
|
}
|
|
let data = "";
|
|
res.on("data", (chunk) => {
|
|
data += chunk;
|
|
});
|
|
res.on("end", () => resolve(data));
|
|
})
|
|
.on("error", reject);
|
|
});
|
|
}
|
|
|
|
function normalizeSpaces(value) {
|
|
return value.replace(/\s+/g, " ").trim();
|
|
}
|
|
|
|
function tokenize(value) {
|
|
return normalizeSpaces(value)
|
|
.replace(/[.,;:]/g, " ")
|
|
.replace(/\.+/g, " ")
|
|
.replace(/&/gi, "&")
|
|
.replace(/[()]/g, " ")
|
|
.split(/\s+/)
|
|
.filter(Boolean);
|
|
}
|
|
|
|
function buildMakeMatchers(makes) {
|
|
const uniqueMakes = Array.from(new Set(makes));
|
|
uniqueMakes.sort((a, b) => b.length - a.length);
|
|
return uniqueMakes.map((make) => ({
|
|
make,
|
|
tokens: make.toLowerCase().split(/\s+/),
|
|
}));
|
|
}
|
|
|
|
function parseTopList(description, makeMatchers) {
|
|
if (!description) return [];
|
|
const lower = description.toLowerCase();
|
|
const marker = "sold:";
|
|
const idx = lower.indexOf(marker);
|
|
if (idx === -1) return [];
|
|
|
|
let list = description.slice(idx + marker.length).trim();
|
|
list = list.split("...")[0];
|
|
list = list.replace(/\.$/, "");
|
|
|
|
const rawTokens = tokenize(list);
|
|
const lowerTokens = rawTokens.map((t) => t.toLowerCase());
|
|
|
|
const results = [];
|
|
let i = 0;
|
|
|
|
const matchMakeAt = (start) => {
|
|
for (const entry of makeMatchers) {
|
|
const { tokens } = entry;
|
|
if (start + tokens.length > lowerTokens.length) continue;
|
|
let matched = true;
|
|
for (let j = 0; j < tokens.length; j += 1) {
|
|
if (lowerTokens[start + j] !== tokens[j]) {
|
|
matched = false;
|
|
break;
|
|
}
|
|
}
|
|
if (matched) return entry;
|
|
}
|
|
return null;
|
|
};
|
|
|
|
while (i < lowerTokens.length) {
|
|
const match = matchMakeAt(i);
|
|
if (!match) {
|
|
i += 1;
|
|
continue;
|
|
}
|
|
|
|
const make = match.make;
|
|
const startModel = i + match.tokens.length;
|
|
let endModel = startModel;
|
|
|
|
while (endModel < lowerTokens.length) {
|
|
if (matchMakeAt(endModel)) break;
|
|
endModel += 1;
|
|
}
|
|
|
|
const modelTokens = rawTokens.slice(startModel, endModel);
|
|
const model = normalizeSpaces(modelTokens.join(" "));
|
|
if (model) {
|
|
results.push({ make, model });
|
|
}
|
|
|
|
i = endModel;
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function loadFullList() {
|
|
const rows = [];
|
|
|
|
for (let year = START_FULL_YEAR; year <= END_YEAR; year += 1) {
|
|
const url = `https://raw.githubusercontent.com/abhionlyone/us-car-models-data/master/${year}.csv`;
|
|
// eslint-disable-next-line no-console
|
|
console.log(`Downloading ${url}`);
|
|
const csv = await fetchUrl(url);
|
|
const records = parse(csv, { columns: true, skip_empty_lines: true });
|
|
for (const record of records) {
|
|
const make = String(record.make || "").trim();
|
|
const model = String(record.model || "").trim();
|
|
if (!make || !model) continue;
|
|
rows.push({ year, make, model, trim: "Base" });
|
|
}
|
|
}
|
|
|
|
return rows;
|
|
}
|
|
|
|
async function loadTopList(makeMatchers) {
|
|
const rows = [];
|
|
|
|
for (let year = START_TOP_YEAR; year <= END_TOP_YEAR; year += 1) {
|
|
const url = `https://carorigins.com/top-12-cars-of-${year}/`;
|
|
// eslint-disable-next-line no-console
|
|
console.log(`Downloading ${url}`);
|
|
let html = "";
|
|
try {
|
|
html = await fetchUrl(url);
|
|
} catch (err) {
|
|
// eslint-disable-next-line no-console
|
|
console.warn(`Skipping ${year}: ${err.message}`);
|
|
continue;
|
|
}
|
|
|
|
const match = html.match(/<meta[^>]+property=\"og:description\"[^>]+content=\"([^\"]+)\"/i);
|
|
if (!match) {
|
|
// eslint-disable-next-line no-console
|
|
console.warn(`Skipping ${year}: og:description not found`);
|
|
continue;
|
|
}
|
|
|
|
const description = match[1];
|
|
const vehicles = parseTopList(description, makeMatchers);
|
|
|
|
for (const vehicle of vehicles) {
|
|
rows.push({ year, make: vehicle.make, model: vehicle.model, trim: "Base" });
|
|
}
|
|
}
|
|
|
|
return rows;
|
|
}
|
|
|
|
async function main() {
|
|
const fullRows = await loadFullList();
|
|
const makesFromFull = fullRows.map((row) => row.make);
|
|
const makeMatchers = buildMakeMatchers([...makesFromFull, ...EXTRA_MAKES]);
|
|
const topRows = await loadTopList(makeMatchers);
|
|
|
|
const allRows = [...topRows, ...fullRows];
|
|
const seen = new Set();
|
|
const deduped = [];
|
|
|
|
for (const row of allRows) {
|
|
const key = `${row.year}|${row.make}|${row.model}|${row.trim}`;
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
deduped.push(row);
|
|
}
|
|
|
|
deduped.sort((a, b) => {
|
|
if (a.year !== b.year) return a.year - b.year;
|
|
if (a.make !== b.make) return a.make.localeCompare(b.make);
|
|
if (a.model !== b.model) return a.model.localeCompare(b.model);
|
|
return a.trim.localeCompare(b.trim);
|
|
});
|
|
|
|
fs.writeFileSync(OUTPUT_PATH, JSON.stringify(deduped, null, 2));
|
|
|
|
// eslint-disable-next-line no-console
|
|
console.log(`Wrote ${deduped.length} rows to ${OUTPUT_PATH}`);
|
|
}
|
|
|
|
main().catch((err) => {
|
|
// eslint-disable-next-line no-console
|
|
console.error(err);
|
|
process.exit(1);
|
|
});
|