#!/usr/bin/env python3 """ Clean fake/corrupted models from master DB caused by supplier catalog imports. Handles: - Models ending in ' INT.' -> map to base model - Empty-name models -> delete or merge - Year-range models (09-15, etc.) -> delete - Torque-spec models ((60 Nm+90°), etc.) -> delete """ import sys import re import psycopg2 from collections import defaultdict MASTER_DSN = "host=localhost dbname=nexus_autoparts user=postgres password=1123517" def get_connection(): return psycopg2.connect(MASTER_DSN) def delete_model_and_myes(conn, model_id, dry_run=True): """Delete all MYEs for a model, then the model itself.""" cur = conn.cursor() cur.execute("SELECT id_mye FROM model_year_engine WHERE model_id = %s", (model_id,)) mye_ids = [r[0] for r in cur.fetchall()] if mye_ids: print(f" Would delete {len(mye_ids)} MYEs for model {model_id}") if not dry_run: # supplier_catalog_compat has no FK, just update to null cur.execute("UPDATE supplier_catalog_compat SET model_year_engine_id = NULL WHERE model_year_engine_id = ANY(%s)", (mye_ids,)) cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = ANY(%s)", (mye_ids,)) cur.execute("DELETE FROM model_year_engine WHERE id_mye = ANY(%s)", (mye_ids,)) else: print(f" No MYEs for model {model_id}") print(f" Would delete model {model_id}") if not dry_run: cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,)) cur.close() def merge_int_models(conn, dry_run=True): """Merge 'X INT.' models into their base equivalents.""" cur = conn.cursor() cur.execute(""" SELECT m.id_model, m.name_model, m.brand_id, b.name_brand FROM models m JOIN brands b ON b.id_brand = m.brand_id WHERE m.name_model LIKE '%INT.' ORDER BY m.brand_id, m.name_model """) int_models = cur.fetchall() print(f"Found {len(int_models)} INT. models to process") merged = 0 renamed = 0 skipped = 0 for model_id, name_model, brand_id, brand_name in int_models: base_name = name_model[:-5] # Remove ' INT.' # Find base model (case-insensitive) cur.execute(""" SELECT id_model, name_model FROM models WHERE brand_id = %s AND LOWER(name_model) = LOWER(%s) LIMIT 1 """, (brand_id, base_name)) base = cur.fetchone() if base: base_id, base_name_exact = base print(f"[{brand_name}] {name_model} -> {base_name_exact} (id={base_id})") else: # No base exists: rename this model to base name print(f"[{brand_name}] {name_model} -> RENAME to '{base_name}' (no base found)") if not dry_run: cur.execute("UPDATE models SET name_model = %s WHERE id_model = %s", (base_name, model_id)) conn.commit() renamed += 1 continue # Migrate MYEs from INT model to base model cur.execute(""" SELECT id_mye, year_id, engine_id FROM model_year_engine WHERE model_id = %s """, (model_id,)) myes = cur.fetchall() mye_migrated = 0 mye_deleted = 0 for mye_id, year_id, engine_id in myes: # Find equivalent MYE in base model cur.execute(""" SELECT id_mye FROM model_year_engine WHERE model_id = %s AND year_id = %s AND (engine_id = %s OR (engine_id IS NULL AND %s IS NULL)) """, (base_id, year_id, engine_id, engine_id)) base_mye = cur.fetchone() if base_mye: base_mye_id = base_mye[0] # Update supplier_catalog_compat cur.execute(""" UPDATE supplier_catalog_compat SET model_year_engine_id = %s WHERE model_year_engine_id = %s """, (base_mye_id, mye_id)) # Delete the old MYE cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = %s", (mye_id,)) cur.execute("DELETE FROM model_year_engine WHERE id_mye = %s", (mye_id,)) mye_migrated += 1 else: # Move MYE to base model cur.execute(""" UPDATE model_year_engine SET model_id = %s WHERE id_mye = %s """, (base_id, mye_id)) mye_migrated += 1 # Now delete the INT model (should have no MYEs left) if not dry_run: cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,)) conn.commit() print(f" Migrated {mye_migrated} MYEs, deleted model") merged += 1 print(f"\nINT. summary: merged={merged}, renamed={renamed}, skipped={skipped}") cur.close() return merged, renamed, skipped def clean_empty_models(conn, dry_run=True): """Delete or merge models with empty names.""" cur = conn.cursor() cur.execute(""" SELECT m.id_model, m.name_model, m.brand_id, b.name_brand, (SELECT COUNT(*) FROM model_year_engine mye WHERE mye.model_id = m.id_model) as mye_count FROM models m JOIN brands b ON b.id_brand = m.brand_id WHERE m.name_model IS NULL OR TRIM(m.name_model) = '' ORDER BY mye_count DESC """) empty_models = cur.fetchall() print(f"\nFound {len(empty_models)} empty-name models") deleted = 0 for model_id, name_model, brand_id, brand_name, mye_count in empty_models: print(f"[{brand_name}] empty model id={model_id}, MYEs={mye_count}") if mye_count == 0: print(f" -> Safe to delete (no MYEs)") if not dry_run: cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,)) conn.commit() deleted += 1 else: # Check MYEs: if they have supplier_catalog_compat, we need to find a target cur.execute(""" SELECT mye.id_mye, mye.year_id, mye.engine_id, y.year_car, e.name_engine FROM model_year_engine mye LEFT JOIN years y ON y.id_year = mye.year_id LEFT JOIN engines e ON e.id_engine = mye.engine_id WHERE mye.model_id = %s """, (model_id,)) myes = cur.fetchall() print(f" -> Has {len(myes)} MYEs. Details:") for mye_id, yid, eid, yname, ename in myes: print(f" MYE {mye_id}: year={yname}, engine={ename}") # Check if there's a real model in same brand with this year+engine combo cur.execute(""" SELECT m2.id_model, m2.name_model FROM model_year_engine mye2 JOIN models m2 ON m2.id_model = mye2.model_id WHERE m2.brand_id = %s AND mye2.year_id = %s AND (mye2.engine_id = %s OR (mye2.engine_id IS NULL AND %s IS NULL)) LIMIT 3 """, (brand_id, yid, eid, eid)) candidates = cur.fetchall() print(f" Candidates: {candidates}") if candidates and not dry_run: target_id = candidates[0][0] cur.execute(""" UPDATE supplier_catalog_compat SET model_year_engine_id = NULL WHERE model_year_engine_id = %s """, (mye_id,)) cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = %s", (mye_id,)) cur.execute("DELETE FROM model_year_engine WHERE id_mye = %s", (mye_id,)) conn.commit() print(f" -> Cleared MYE {mye_id} (moved to NULL, manual remap needed)") if not dry_run: cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,)) conn.commit() deleted += 1 print(f"Empty models processed: {deleted}") cur.close() return deleted def clean_year_range_models(conn, dry_run=True): """Delete models that are year ranges like '09-15'.""" cur = conn.cursor() cur.execute("SELECT id_model, name_model, brand_id FROM models") year_ranges = [] for mid, name, bid in cur.fetchall(): if name and re.match(r'^(\d{2}-\d{2}|\d{4}-\d{4})$', name.strip()): year_ranges.append((mid, name.strip(), bid)) print(f"\nFound {len(year_ranges)} year-range models") deleted = 0 for mid, name, bid in year_ranges: cur.execute("SELECT COUNT(*) FROM model_year_engine WHERE model_id = %s", (mid,)) count = cur.fetchone()[0] cur.execute("SELECT name_brand FROM brands WHERE id_brand = %s", (bid,)) bname = cur.fetchone()[0] print(f"[{bname}] '{name}' id={mid}, MYEs={count}") if not dry_run: delete_model_and_myes(conn, mid, dry_run=False) conn.commit() deleted += 1 print(f"Year-range models deleted: {deleted}") cur.close() return deleted def clean_torque_models(conn, dry_run=True): """Delete models that contain torque specs like 'Nm'.""" cur = conn.cursor() cur.execute("SELECT id_model, name_model, brand_id FROM models") torque_models = [] for mid, name, bid in cur.fetchall(): if name and ('Nm' in name or 'nm' in name.lower()): torque_models.append((mid, name, bid)) print(f"\nFound {len(torque_models)} torque-spec models") deleted = 0 for mid, name, bid in torque_models: cur.execute("SELECT COUNT(*) FROM model_year_engine WHERE model_id = %s", (mid,)) count = cur.fetchone()[0] cur.execute("SELECT name_brand FROM brands WHERE id_brand = %s", (bid,)) bname = cur.fetchone()[0] print(f"[{bname}] '{name}' id={mid}, MYEs={count}") if not dry_run: delete_model_and_myes(conn, mid, dry_run=False) conn.commit() deleted += 1 print(f"Torque-spec models deleted: {deleted}") cur.close() return deleted def main(): dry_run = '--execute' not in sys.argv if dry_run: print("=" * 60) print("DRY RUN MODE — no changes will be made") print("Run with --execute to apply changes") print("=" * 60) conn = get_connection() try: merge_int_models(conn, dry_run=dry_run) clean_empty_models(conn, dry_run=dry_run) clean_year_range_models(conn, dry_run=dry_run) clean_torque_models(conn, dry_run=dry_run) finally: conn.close() if dry_run: print("\n" + "=" * 60) print("DRY RUN complete. Run with --execute to apply.") print("=" * 60) if __name__ == '__main__': main()