feat(catalog): supplier catalog cleanup, fuzzy matching, and navigation fixes
- Cleaned 137+ fake engine-displacement models from supplier imports (v3/v4 scripts: Chevrolet, Ford, Chrysler, Dodge, Jeep, Nissan, etc.) - Removed 1,251+ corrupted models (INT. prefixes, year-suffix, torque specs, empty names, trailing-year variants) - Migrated supplier tables to master DB (supplier_catalog, supplier_catalog_compat, supplier_catalog_interchange) - Fixed _get_mye_ids_with_parts() to query supplier_catalog_compat from master DB so supplier-only vehicles appear for all tenants - Added fuzzy model matcher with parenthesis stripping, noise suffix removal, compact matching, prefix/substring fallback, model aliases, and ±3 year proximity - Matched compat rows: KEEP GREEN +14,152, KNADIAN +3,021, VAZLO +127,500, LUK +477, RAYBESTOS +1,743 - Added KNADIAN catalog importer with year-range expansion and future-year filtering - Added VAZLO catalog importer with position parsing and SKU-in-model cleanup - Added Keep Green, LUK, Yokomitsu, Raybestos catalog importers - Cache clearing after cleanups (_classify_cache_*, nexus:mye_ids:*, nexus:brand_mye_counts:*) Final match rates: - KEEP GREEN: 90.3% - VAZLO: 93.6% - YOKOMITSU: 100.0% - KNADIAN: 57.4% - LUK: 51.0% - RAYBESTOS: 55.9%
This commit is contained in:
57
scripts/clean_year_suffix_models.py
Executable file
57
scripts/clean_year_suffix_models.py
Executable file
@@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Clean models with year suffixes like 'Model 17-18' or 'Model 2010-2015'."""
|
||||
|
||||
import psycopg2, re, sys
|
||||
|
||||
MASTER_DSN = "host=localhost dbname=nexus_autoparts user=postgres password=1123517"
|
||||
|
||||
def main():
|
||||
dry_run = '--execute' not in sys.argv
|
||||
conn = psycopg2.connect(MASTER_DSN)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("""
|
||||
SELECT m.id_model, m.name_model, m.brand_id, b.name_brand,
|
||||
(SELECT COUNT(*) FROM model_year_engine mye WHERE mye.model_id = m.id_model) as mye_count
|
||||
FROM models m
|
||||
JOIN brands b ON b.id_brand = m.brand_id
|
||||
WHERE m.name_model ~ ' [0-9]{2}-[0-9]{2}$' OR m.name_model ~ ' [0-9]{4}-[0-9]{4}$'
|
||||
ORDER BY mye_count DESC
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
print(f"Found {len(rows)} models with year suffix")
|
||||
|
||||
total_myes = 0
|
||||
total_models = 0
|
||||
total_scc = 0
|
||||
|
||||
for model_id, name_model, brand_id, brand_name, mye_count in rows:
|
||||
total_models += 1
|
||||
print(f"[{brand_name}] \"{name_model}\" id={model_id}, MYEs={mye_count}")
|
||||
|
||||
if mye_count > 0:
|
||||
cur.execute("SELECT id_mye FROM model_year_engine WHERE model_id = %s", (model_id,))
|
||||
mye_ids = [r[0] for r in cur.fetchall()]
|
||||
total_myes += len(mye_ids)
|
||||
|
||||
# Count supplier_catalog_compat affected
|
||||
cur.execute("SELECT COUNT(*) FROM supplier_catalog_compat WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
|
||||
scc_count = cur.fetchone()[0]
|
||||
total_scc += scc_count
|
||||
print(f" -> {scc_count} supplier_catalog_compat rows will be nulled")
|
||||
|
||||
if not dry_run:
|
||||
cur.execute("UPDATE supplier_catalog_compat SET model_year_engine_id = NULL WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
|
||||
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
|
||||
cur.execute("DELETE FROM model_year_engine WHERE id_mye = ANY(%s)", (mye_ids,))
|
||||
|
||||
if not dry_run:
|
||||
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
|
||||
conn.commit()
|
||||
|
||||
print(f"\n{'DRY RUN' if dry_run else 'EXECUTED'}: {total_models} models, {total_myes} MYEs, {total_scc} SCC rows affected")
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user