- Cleaned 137+ fake engine-displacement models from supplier imports (v3/v4 scripts: Chevrolet, Ford, Chrysler, Dodge, Jeep, Nissan, etc.) - Removed 1,251+ corrupted models (INT. prefixes, year-suffix, torque specs, empty names, trailing-year variants) - Migrated supplier tables to master DB (supplier_catalog, supplier_catalog_compat, supplier_catalog_interchange) - Fixed _get_mye_ids_with_parts() to query supplier_catalog_compat from master DB so supplier-only vehicles appear for all tenants - Added fuzzy model matcher with parenthesis stripping, noise suffix removal, compact matching, prefix/substring fallback, model aliases, and ±3 year proximity - Matched compat rows: KEEP GREEN +14,152, KNADIAN +3,021, VAZLO +127,500, LUK +477, RAYBESTOS +1,743 - Added KNADIAN catalog importer with year-range expansion and future-year filtering - Added VAZLO catalog importer with position parsing and SKU-in-model cleanup - Added Keep Green, LUK, Yokomitsu, Raybestos catalog importers - Cache clearing after cleanups (_classify_cache_*, nexus:mye_ids:*, nexus:brand_mye_counts:*) Final match rates: - KEEP GREEN: 90.3% - VAZLO: 93.6% - YOKOMITSU: 100.0% - KNADIAN: 57.4% - LUK: 51.0% - RAYBESTOS: 55.9%
275 lines
11 KiB
Python
Executable File
275 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Clean fake/corrupted models from master DB caused by supplier catalog imports.
|
|
Handles:
|
|
- Models ending in ' INT.' -> map to base model
|
|
- Empty-name models -> delete or merge
|
|
- Year-range models (09-15, etc.) -> delete
|
|
- Torque-spec models ((60 Nm+90°), etc.) -> delete
|
|
"""
|
|
|
|
import sys
|
|
import re
|
|
import psycopg2
|
|
from collections import defaultdict
|
|
|
|
MASTER_DSN = "host=localhost dbname=nexus_autoparts user=postgres password=1123517"
|
|
|
|
|
|
def get_connection():
|
|
return psycopg2.connect(MASTER_DSN)
|
|
|
|
|
|
def delete_model_and_myes(conn, model_id, dry_run=True):
|
|
"""Delete all MYEs for a model, then the model itself."""
|
|
cur = conn.cursor()
|
|
cur.execute("SELECT id_mye FROM model_year_engine WHERE model_id = %s", (model_id,))
|
|
mye_ids = [r[0] for r in cur.fetchall()]
|
|
if mye_ids:
|
|
print(f" Would delete {len(mye_ids)} MYEs for model {model_id}")
|
|
if not dry_run:
|
|
# supplier_catalog_compat has no FK, just update to null
|
|
cur.execute("UPDATE supplier_catalog_compat SET model_year_engine_id = NULL WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
|
|
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
|
|
cur.execute("DELETE FROM model_year_engine WHERE id_mye = ANY(%s)", (mye_ids,))
|
|
else:
|
|
print(f" No MYEs for model {model_id}")
|
|
print(f" Would delete model {model_id}")
|
|
if not dry_run:
|
|
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
|
|
cur.close()
|
|
|
|
|
|
def merge_int_models(conn, dry_run=True):
|
|
"""Merge 'X INT.' models into their base equivalents."""
|
|
cur = conn.cursor()
|
|
cur.execute("""
|
|
SELECT m.id_model, m.name_model, m.brand_id, b.name_brand
|
|
FROM models m
|
|
JOIN brands b ON b.id_brand = m.brand_id
|
|
WHERE m.name_model LIKE '%INT.'
|
|
ORDER BY m.brand_id, m.name_model
|
|
""")
|
|
int_models = cur.fetchall()
|
|
print(f"Found {len(int_models)} INT. models to process")
|
|
|
|
merged = 0
|
|
renamed = 0
|
|
skipped = 0
|
|
|
|
for model_id, name_model, brand_id, brand_name in int_models:
|
|
base_name = name_model[:-5] # Remove ' INT.'
|
|
# Find base model (case-insensitive)
|
|
cur.execute("""
|
|
SELECT id_model, name_model FROM models
|
|
WHERE brand_id = %s AND LOWER(name_model) = LOWER(%s)
|
|
LIMIT 1
|
|
""", (brand_id, base_name))
|
|
base = cur.fetchone()
|
|
|
|
if base:
|
|
base_id, base_name_exact = base
|
|
print(f"[{brand_name}] {name_model} -> {base_name_exact} (id={base_id})")
|
|
else:
|
|
# No base exists: rename this model to base name
|
|
print(f"[{brand_name}] {name_model} -> RENAME to '{base_name}' (no base found)")
|
|
if not dry_run:
|
|
cur.execute("UPDATE models SET name_model = %s WHERE id_model = %s", (base_name, model_id))
|
|
conn.commit()
|
|
renamed += 1
|
|
continue
|
|
|
|
# Migrate MYEs from INT model to base model
|
|
cur.execute("""
|
|
SELECT id_mye, year_id, engine_id FROM model_year_engine
|
|
WHERE model_id = %s
|
|
""", (model_id,))
|
|
myes = cur.fetchall()
|
|
|
|
mye_migrated = 0
|
|
mye_deleted = 0
|
|
for mye_id, year_id, engine_id in myes:
|
|
# Find equivalent MYE in base model
|
|
cur.execute("""
|
|
SELECT id_mye FROM model_year_engine
|
|
WHERE model_id = %s AND year_id = %s
|
|
AND (engine_id = %s OR (engine_id IS NULL AND %s IS NULL))
|
|
""", (base_id, year_id, engine_id, engine_id))
|
|
base_mye = cur.fetchone()
|
|
|
|
if base_mye:
|
|
base_mye_id = base_mye[0]
|
|
# Update supplier_catalog_compat
|
|
cur.execute("""
|
|
UPDATE supplier_catalog_compat
|
|
SET model_year_engine_id = %s
|
|
WHERE model_year_engine_id = %s
|
|
""", (base_mye_id, mye_id))
|
|
# Delete the old MYE
|
|
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = %s", (mye_id,))
|
|
cur.execute("DELETE FROM model_year_engine WHERE id_mye = %s", (mye_id,))
|
|
mye_migrated += 1
|
|
else:
|
|
# Move MYE to base model
|
|
cur.execute("""
|
|
UPDATE model_year_engine SET model_id = %s WHERE id_mye = %s
|
|
""", (base_id, mye_id))
|
|
mye_migrated += 1
|
|
|
|
# Now delete the INT model (should have no MYEs left)
|
|
if not dry_run:
|
|
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
|
|
conn.commit()
|
|
|
|
print(f" Migrated {mye_migrated} MYEs, deleted model")
|
|
merged += 1
|
|
|
|
print(f"\nINT. summary: merged={merged}, renamed={renamed}, skipped={skipped}")
|
|
cur.close()
|
|
return merged, renamed, skipped
|
|
|
|
|
|
def clean_empty_models(conn, dry_run=True):
|
|
"""Delete or merge models with empty names."""
|
|
cur = conn.cursor()
|
|
cur.execute("""
|
|
SELECT m.id_model, m.name_model, m.brand_id, b.name_brand,
|
|
(SELECT COUNT(*) FROM model_year_engine mye WHERE mye.model_id = m.id_model) as mye_count
|
|
FROM models m
|
|
JOIN brands b ON b.id_brand = m.brand_id
|
|
WHERE m.name_model IS NULL OR TRIM(m.name_model) = ''
|
|
ORDER BY mye_count DESC
|
|
""")
|
|
empty_models = cur.fetchall()
|
|
print(f"\nFound {len(empty_models)} empty-name models")
|
|
|
|
deleted = 0
|
|
for model_id, name_model, brand_id, brand_name, mye_count in empty_models:
|
|
print(f"[{brand_name}] empty model id={model_id}, MYEs={mye_count}")
|
|
if mye_count == 0:
|
|
print(f" -> Safe to delete (no MYEs)")
|
|
if not dry_run:
|
|
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
|
|
conn.commit()
|
|
deleted += 1
|
|
else:
|
|
# Check MYEs: if they have supplier_catalog_compat, we need to find a target
|
|
cur.execute("""
|
|
SELECT mye.id_mye, mye.year_id, mye.engine_id, y.year_car, e.name_engine
|
|
FROM model_year_engine mye
|
|
LEFT JOIN years y ON y.id_year = mye.year_id
|
|
LEFT JOIN engines e ON e.id_engine = mye.engine_id
|
|
WHERE mye.model_id = %s
|
|
""", (model_id,))
|
|
myes = cur.fetchall()
|
|
print(f" -> Has {len(myes)} MYEs. Details:")
|
|
for mye_id, yid, eid, yname, ename in myes:
|
|
print(f" MYE {mye_id}: year={yname}, engine={ename}")
|
|
# Check if there's a real model in same brand with this year+engine combo
|
|
cur.execute("""
|
|
SELECT m2.id_model, m2.name_model FROM model_year_engine mye2
|
|
JOIN models m2 ON m2.id_model = mye2.model_id
|
|
WHERE m2.brand_id = %s AND mye2.year_id = %s
|
|
AND (mye2.engine_id = %s OR (mye2.engine_id IS NULL AND %s IS NULL))
|
|
LIMIT 3
|
|
""", (brand_id, yid, eid, eid))
|
|
candidates = cur.fetchall()
|
|
print(f" Candidates: {candidates}")
|
|
if candidates and not dry_run:
|
|
target_id = candidates[0][0]
|
|
cur.execute("""
|
|
UPDATE supplier_catalog_compat SET model_year_engine_id = NULL
|
|
WHERE model_year_engine_id = %s
|
|
""", (mye_id,))
|
|
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = %s", (mye_id,))
|
|
cur.execute("DELETE FROM model_year_engine WHERE id_mye = %s", (mye_id,))
|
|
conn.commit()
|
|
print(f" -> Cleared MYE {mye_id} (moved to NULL, manual remap needed)")
|
|
if not dry_run:
|
|
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
|
|
conn.commit()
|
|
deleted += 1
|
|
|
|
print(f"Empty models processed: {deleted}")
|
|
cur.close()
|
|
return deleted
|
|
|
|
|
|
def clean_year_range_models(conn, dry_run=True):
|
|
"""Delete models that are year ranges like '09-15'."""
|
|
cur = conn.cursor()
|
|
cur.execute("SELECT id_model, name_model, brand_id FROM models")
|
|
year_ranges = []
|
|
for mid, name, bid in cur.fetchall():
|
|
if name and re.match(r'^(\d{2}-\d{2}|\d{4}-\d{4})$', name.strip()):
|
|
year_ranges.append((mid, name.strip(), bid))
|
|
|
|
print(f"\nFound {len(year_ranges)} year-range models")
|
|
deleted = 0
|
|
for mid, name, bid in year_ranges:
|
|
cur.execute("SELECT COUNT(*) FROM model_year_engine WHERE model_id = %s", (mid,))
|
|
count = cur.fetchone()[0]
|
|
cur.execute("SELECT name_brand FROM brands WHERE id_brand = %s", (bid,))
|
|
bname = cur.fetchone()[0]
|
|
print(f"[{bname}] '{name}' id={mid}, MYEs={count}")
|
|
if not dry_run:
|
|
delete_model_and_myes(conn, mid, dry_run=False)
|
|
conn.commit()
|
|
deleted += 1
|
|
print(f"Year-range models deleted: {deleted}")
|
|
cur.close()
|
|
return deleted
|
|
|
|
|
|
def clean_torque_models(conn, dry_run=True):
|
|
"""Delete models that contain torque specs like 'Nm'."""
|
|
cur = conn.cursor()
|
|
cur.execute("SELECT id_model, name_model, brand_id FROM models")
|
|
torque_models = []
|
|
for mid, name, bid in cur.fetchall():
|
|
if name and ('Nm' in name or 'nm' in name.lower()):
|
|
torque_models.append((mid, name, bid))
|
|
|
|
print(f"\nFound {len(torque_models)} torque-spec models")
|
|
deleted = 0
|
|
for mid, name, bid in torque_models:
|
|
cur.execute("SELECT COUNT(*) FROM model_year_engine WHERE model_id = %s", (mid,))
|
|
count = cur.fetchone()[0]
|
|
cur.execute("SELECT name_brand FROM brands WHERE id_brand = %s", (bid,))
|
|
bname = cur.fetchone()[0]
|
|
print(f"[{bname}] '{name}' id={mid}, MYEs={count}")
|
|
if not dry_run:
|
|
delete_model_and_myes(conn, mid, dry_run=False)
|
|
conn.commit()
|
|
deleted += 1
|
|
print(f"Torque-spec models deleted: {deleted}")
|
|
cur.close()
|
|
return deleted
|
|
|
|
|
|
def main():
|
|
dry_run = '--execute' not in sys.argv
|
|
if dry_run:
|
|
print("=" * 60)
|
|
print("DRY RUN MODE — no changes will be made")
|
|
print("Run with --execute to apply changes")
|
|
print("=" * 60)
|
|
|
|
conn = get_connection()
|
|
try:
|
|
merge_int_models(conn, dry_run=dry_run)
|
|
clean_empty_models(conn, dry_run=dry_run)
|
|
clean_year_range_models(conn, dry_run=dry_run)
|
|
clean_torque_models(conn, dry_run=dry_run)
|
|
finally:
|
|
conn.close()
|
|
|
|
if dry_run:
|
|
print("\n" + "=" * 60)
|
|
print("DRY RUN complete. Run with --execute to apply.")
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|