Files
Autoparts-DB/scripts/clean_year_suffix_models.py
consultoria-as ea29cc31c0 feat(catalog): supplier catalog cleanup, fuzzy matching, and navigation fixes
- Cleaned 137+ fake engine-displacement models from supplier imports
  (v3/v4 scripts: Chevrolet, Ford, Chrysler, Dodge, Jeep, Nissan, etc.)
- Removed 1,251+ corrupted models (INT. prefixes, year-suffix, torque specs,
  empty names, trailing-year variants)
- Migrated supplier tables to master DB (supplier_catalog,
  supplier_catalog_compat, supplier_catalog_interchange)
- Fixed _get_mye_ids_with_parts() to query supplier_catalog_compat from
  master DB so supplier-only vehicles appear for all tenants
- Added fuzzy model matcher with parenthesis stripping, noise suffix removal,
  compact matching, prefix/substring fallback, model aliases, and ±3 year
  proximity
- Matched compat rows: KEEP GREEN +14,152, KNADIAN +3,021, VAZLO +127,500,
  LUK +477, RAYBESTOS +1,743
- Added KNADIAN catalog importer with year-range expansion and future-year
  filtering
- Added VAZLO catalog importer with position parsing and SKU-in-model cleanup
- Added Keep Green, LUK, Yokomitsu, Raybestos catalog importers
- Cache clearing after cleanups (_classify_cache_*, nexus:mye_ids:*,
  nexus:brand_mye_counts:*)

Final match rates:
- KEEP GREEN: 90.3%
- VAZLO: 93.6%
- YOKOMITSU: 100.0%
- KNADIAN: 57.4%
- LUK: 51.0%
- RAYBESTOS: 55.9%
2026-06-09 07:47:42 +00:00

58 lines
2.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""Clean models with year suffixes like 'Model 17-18' or 'Model 2010-2015'."""
import psycopg2, re, sys
MASTER_DSN = "host=localhost dbname=nexus_autoparts user=postgres password=1123517"
def main():
dry_run = '--execute' not in sys.argv
conn = psycopg2.connect(MASTER_DSN)
cur = conn.cursor()
cur.execute("""
SELECT m.id_model, m.name_model, m.brand_id, b.name_brand,
(SELECT COUNT(*) FROM model_year_engine mye WHERE mye.model_id = m.id_model) as mye_count
FROM models m
JOIN brands b ON b.id_brand = m.brand_id
WHERE m.name_model ~ ' [0-9]{2}-[0-9]{2}$' OR m.name_model ~ ' [0-9]{4}-[0-9]{4}$'
ORDER BY mye_count DESC
""")
rows = cur.fetchall()
print(f"Found {len(rows)} models with year suffix")
total_myes = 0
total_models = 0
total_scc = 0
for model_id, name_model, brand_id, brand_name, mye_count in rows:
total_models += 1
print(f"[{brand_name}] \"{name_model}\" id={model_id}, MYEs={mye_count}")
if mye_count > 0:
cur.execute("SELECT id_mye FROM model_year_engine WHERE model_id = %s", (model_id,))
mye_ids = [r[0] for r in cur.fetchall()]
total_myes += len(mye_ids)
# Count supplier_catalog_compat affected
cur.execute("SELECT COUNT(*) FROM supplier_catalog_compat WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
scc_count = cur.fetchone()[0]
total_scc += scc_count
print(f" -> {scc_count} supplier_catalog_compat rows will be nulled")
if not dry_run:
cur.execute("UPDATE supplier_catalog_compat SET model_year_engine_id = NULL WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM model_year_engine WHERE id_mye = ANY(%s)", (mye_ids,))
if not dry_run:
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
conn.commit()
print(f"\n{'DRY RUN' if dry_run else 'EXECUTED'}: {total_models} models, {total_myes} MYEs, {total_scc} SCC rows affected")
cur.close()
conn.close()
if __name__ == '__main__':
main()