feat(catalog): supplier catalog cleanup, fuzzy matching, and navigation fixes

- Cleaned 137+ fake engine-displacement models from supplier imports
  (v3/v4 scripts: Chevrolet, Ford, Chrysler, Dodge, Jeep, Nissan, etc.)
- Removed 1,251+ corrupted models (INT. prefixes, year-suffix, torque specs,
  empty names, trailing-year variants)
- Migrated supplier tables to master DB (supplier_catalog,
  supplier_catalog_compat, supplier_catalog_interchange)
- Fixed _get_mye_ids_with_parts() to query supplier_catalog_compat from
  master DB so supplier-only vehicles appear for all tenants
- Added fuzzy model matcher with parenthesis stripping, noise suffix removal,
  compact matching, prefix/substring fallback, model aliases, and ±3 year
  proximity
- Matched compat rows: KEEP GREEN +14,152, KNADIAN +3,021, VAZLO +127,500,
  LUK +477, RAYBESTOS +1,743
- Added KNADIAN catalog importer with year-range expansion and future-year
  filtering
- Added VAZLO catalog importer with position parsing and SKU-in-model cleanup
- Added Keep Green, LUK, Yokomitsu, Raybestos catalog importers
- Cache clearing after cleanups (_classify_cache_*, nexus:mye_ids:*,
  nexus:brand_mye_counts:*)

Final match rates:
- KEEP GREEN: 90.3%
- VAZLO: 93.6%
- YOKOMITSU: 100.0%
- KNADIAN: 57.4%
- LUK: 51.0%
- RAYBESTOS: 55.9%
This commit is contained in:
2026-06-09 07:47:42 +00:00
parent 5ea667b80e
commit ea29cc31c0
53 changed files with 7727 additions and 548 deletions

274
scripts/clean_fake_models.py Executable file
View File

@@ -0,0 +1,274 @@
#!/usr/bin/env python3
"""
Clean fake/corrupted models from master DB caused by supplier catalog imports.
Handles:
- Models ending in ' INT.' -> map to base model
- Empty-name models -> delete or merge
- Year-range models (09-15, etc.) -> delete
- Torque-spec models ((60 Nm+90°), etc.) -> delete
"""
import sys
import re
import psycopg2
from collections import defaultdict
MASTER_DSN = "host=localhost dbname=nexus_autoparts user=postgres password=1123517"
def get_connection():
return psycopg2.connect(MASTER_DSN)
def delete_model_and_myes(conn, model_id, dry_run=True):
"""Delete all MYEs for a model, then the model itself."""
cur = conn.cursor()
cur.execute("SELECT id_mye FROM model_year_engine WHERE model_id = %s", (model_id,))
mye_ids = [r[0] for r in cur.fetchall()]
if mye_ids:
print(f" Would delete {len(mye_ids)} MYEs for model {model_id}")
if not dry_run:
# supplier_catalog_compat has no FK, just update to null
cur.execute("UPDATE supplier_catalog_compat SET model_year_engine_id = NULL WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM model_year_engine WHERE id_mye = ANY(%s)", (mye_ids,))
else:
print(f" No MYEs for model {model_id}")
print(f" Would delete model {model_id}")
if not dry_run:
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
cur.close()
def merge_int_models(conn, dry_run=True):
"""Merge 'X INT.' models into their base equivalents."""
cur = conn.cursor()
cur.execute("""
SELECT m.id_model, m.name_model, m.brand_id, b.name_brand
FROM models m
JOIN brands b ON b.id_brand = m.brand_id
WHERE m.name_model LIKE '%INT.'
ORDER BY m.brand_id, m.name_model
""")
int_models = cur.fetchall()
print(f"Found {len(int_models)} INT. models to process")
merged = 0
renamed = 0
skipped = 0
for model_id, name_model, brand_id, brand_name in int_models:
base_name = name_model[:-5] # Remove ' INT.'
# Find base model (case-insensitive)
cur.execute("""
SELECT id_model, name_model FROM models
WHERE brand_id = %s AND LOWER(name_model) = LOWER(%s)
LIMIT 1
""", (brand_id, base_name))
base = cur.fetchone()
if base:
base_id, base_name_exact = base
print(f"[{brand_name}] {name_model} -> {base_name_exact} (id={base_id})")
else:
# No base exists: rename this model to base name
print(f"[{brand_name}] {name_model} -> RENAME to '{base_name}' (no base found)")
if not dry_run:
cur.execute("UPDATE models SET name_model = %s WHERE id_model = %s", (base_name, model_id))
conn.commit()
renamed += 1
continue
# Migrate MYEs from INT model to base model
cur.execute("""
SELECT id_mye, year_id, engine_id FROM model_year_engine
WHERE model_id = %s
""", (model_id,))
myes = cur.fetchall()
mye_migrated = 0
mye_deleted = 0
for mye_id, year_id, engine_id in myes:
# Find equivalent MYE in base model
cur.execute("""
SELECT id_mye FROM model_year_engine
WHERE model_id = %s AND year_id = %s
AND (engine_id = %s OR (engine_id IS NULL AND %s IS NULL))
""", (base_id, year_id, engine_id, engine_id))
base_mye = cur.fetchone()
if base_mye:
base_mye_id = base_mye[0]
# Update supplier_catalog_compat
cur.execute("""
UPDATE supplier_catalog_compat
SET model_year_engine_id = %s
WHERE model_year_engine_id = %s
""", (base_mye_id, mye_id))
# Delete the old MYE
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = %s", (mye_id,))
cur.execute("DELETE FROM model_year_engine WHERE id_mye = %s", (mye_id,))
mye_migrated += 1
else:
# Move MYE to base model
cur.execute("""
UPDATE model_year_engine SET model_id = %s WHERE id_mye = %s
""", (base_id, mye_id))
mye_migrated += 1
# Now delete the INT model (should have no MYEs left)
if not dry_run:
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
conn.commit()
print(f" Migrated {mye_migrated} MYEs, deleted model")
merged += 1
print(f"\nINT. summary: merged={merged}, renamed={renamed}, skipped={skipped}")
cur.close()
return merged, renamed, skipped
def clean_empty_models(conn, dry_run=True):
"""Delete or merge models with empty names."""
cur = conn.cursor()
cur.execute("""
SELECT m.id_model, m.name_model, m.brand_id, b.name_brand,
(SELECT COUNT(*) FROM model_year_engine mye WHERE mye.model_id = m.id_model) as mye_count
FROM models m
JOIN brands b ON b.id_brand = m.brand_id
WHERE m.name_model IS NULL OR TRIM(m.name_model) = ''
ORDER BY mye_count DESC
""")
empty_models = cur.fetchall()
print(f"\nFound {len(empty_models)} empty-name models")
deleted = 0
for model_id, name_model, brand_id, brand_name, mye_count in empty_models:
print(f"[{brand_name}] empty model id={model_id}, MYEs={mye_count}")
if mye_count == 0:
print(f" -> Safe to delete (no MYEs)")
if not dry_run:
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
conn.commit()
deleted += 1
else:
# Check MYEs: if they have supplier_catalog_compat, we need to find a target
cur.execute("""
SELECT mye.id_mye, mye.year_id, mye.engine_id, y.year_car, e.name_engine
FROM model_year_engine mye
LEFT JOIN years y ON y.id_year = mye.year_id
LEFT JOIN engines e ON e.id_engine = mye.engine_id
WHERE mye.model_id = %s
""", (model_id,))
myes = cur.fetchall()
print(f" -> Has {len(myes)} MYEs. Details:")
for mye_id, yid, eid, yname, ename in myes:
print(f" MYE {mye_id}: year={yname}, engine={ename}")
# Check if there's a real model in same brand with this year+engine combo
cur.execute("""
SELECT m2.id_model, m2.name_model FROM model_year_engine mye2
JOIN models m2 ON m2.id_model = mye2.model_id
WHERE m2.brand_id = %s AND mye2.year_id = %s
AND (mye2.engine_id = %s OR (mye2.engine_id IS NULL AND %s IS NULL))
LIMIT 3
""", (brand_id, yid, eid, eid))
candidates = cur.fetchall()
print(f" Candidates: {candidates}")
if candidates and not dry_run:
target_id = candidates[0][0]
cur.execute("""
UPDATE supplier_catalog_compat SET model_year_engine_id = NULL
WHERE model_year_engine_id = %s
""", (mye_id,))
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = %s", (mye_id,))
cur.execute("DELETE FROM model_year_engine WHERE id_mye = %s", (mye_id,))
conn.commit()
print(f" -> Cleared MYE {mye_id} (moved to NULL, manual remap needed)")
if not dry_run:
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
conn.commit()
deleted += 1
print(f"Empty models processed: {deleted}")
cur.close()
return deleted
def clean_year_range_models(conn, dry_run=True):
"""Delete models that are year ranges like '09-15'."""
cur = conn.cursor()
cur.execute("SELECT id_model, name_model, brand_id FROM models")
year_ranges = []
for mid, name, bid in cur.fetchall():
if name and re.match(r'^(\d{2}-\d{2}|\d{4}-\d{4})$', name.strip()):
year_ranges.append((mid, name.strip(), bid))
print(f"\nFound {len(year_ranges)} year-range models")
deleted = 0
for mid, name, bid in year_ranges:
cur.execute("SELECT COUNT(*) FROM model_year_engine WHERE model_id = %s", (mid,))
count = cur.fetchone()[0]
cur.execute("SELECT name_brand FROM brands WHERE id_brand = %s", (bid,))
bname = cur.fetchone()[0]
print(f"[{bname}] '{name}' id={mid}, MYEs={count}")
if not dry_run:
delete_model_and_myes(conn, mid, dry_run=False)
conn.commit()
deleted += 1
print(f"Year-range models deleted: {deleted}")
cur.close()
return deleted
def clean_torque_models(conn, dry_run=True):
"""Delete models that contain torque specs like 'Nm'."""
cur = conn.cursor()
cur.execute("SELECT id_model, name_model, brand_id FROM models")
torque_models = []
for mid, name, bid in cur.fetchall():
if name and ('Nm' in name or 'nm' in name.lower()):
torque_models.append((mid, name, bid))
print(f"\nFound {len(torque_models)} torque-spec models")
deleted = 0
for mid, name, bid in torque_models:
cur.execute("SELECT COUNT(*) FROM model_year_engine WHERE model_id = %s", (mid,))
count = cur.fetchone()[0]
cur.execute("SELECT name_brand FROM brands WHERE id_brand = %s", (bid,))
bname = cur.fetchone()[0]
print(f"[{bname}] '{name}' id={mid}, MYEs={count}")
if not dry_run:
delete_model_and_myes(conn, mid, dry_run=False)
conn.commit()
deleted += 1
print(f"Torque-spec models deleted: {deleted}")
cur.close()
return deleted
def main():
dry_run = '--execute' not in sys.argv
if dry_run:
print("=" * 60)
print("DRY RUN MODE — no changes will be made")
print("Run with --execute to apply changes")
print("=" * 60)
conn = get_connection()
try:
merge_int_models(conn, dry_run=dry_run)
clean_empty_models(conn, dry_run=dry_run)
clean_year_range_models(conn, dry_run=dry_run)
clean_torque_models(conn, dry_run=dry_run)
finally:
conn.close()
if dry_run:
print("\n" + "=" * 60)
print("DRY RUN complete. Run with --execute to apply.")
print("=" * 60)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,240 @@
#!/usr/bin/env python3
"""
Clean supplier-corrupted models from master DB.
Handles trailing years, year ranges, engine specs, trim variants, etc.
Usage:
python scripts/clean_supplier_corrupted_models.py [--execute]
"""
import os
import re
import sys
import psycopg2
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
def connect():
return psycopg2.connect(MASTER_DB_URL)
def delete_model_and_myes(conn, model_id):
cur = conn.cursor()
cur.execute("SELECT id_mye FROM model_year_engine WHERE model_id = %s", (model_id,))
mye_ids = [r[0] for r in cur.fetchall()]
if mye_ids:
cur.execute("UPDATE supplier_catalog_compat SET model_year_engine_id = NULL WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM model_year_engine WHERE id_mye = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
cur.close()
def normalize_for_match(name):
"""Normalize model name for matching: uppercase, remove extra spaces, replace spaces with hyphens and vice versa."""
if not name:
return ''
return ' '.join(str(name).upper().split())
def find_base_model(cur, brand_id, base_name):
"""Find a base model in same brand by normalized name match."""
normalized = normalize_for_match(base_name)
# Try exact
cur.execute("""
SELECT id_model, name_model FROM models
WHERE brand_id = %s AND LOWER(name_model) = LOWER(%s)
LIMIT 1
""", (brand_id, normalized))
row = cur.fetchone()
if row:
return row
# Try with spaces replaced by hyphens
hyphenated = normalized.replace(' ', '-')
cur.execute("""
SELECT id_model, name_model FROM models
WHERE brand_id = %s AND REPLACE(UPPER(name_model), ' ', '-') = %s
LIMIT 1
""", (brand_id, hyphenated))
row = cur.fetchone()
if row:
return row
# Try with hyphens replaced by spaces
spaced = normalized.replace('-', ' ')
cur.execute("""
SELECT id_model, name_model FROM models
WHERE brand_id = %s AND REPLACE(UPPER(name_model), '-', ' ') = %s
LIMIT 1
""", (brand_id, spaced))
return cur.fetchone()
def merge_model_to_base(conn, model_id, base_id, base_name):
cur = conn.cursor()
cur.execute("SELECT id_mye, year_id, engine_id FROM model_year_engine WHERE model_id = %s", (model_id,))
myes = cur.fetchall()
migrated = 0
for mye_id, year_id, engine_id in myes:
cur.execute("""
SELECT id_mye FROM model_year_engine
WHERE model_id = %s AND year_id = %s
AND (engine_id = %s OR (engine_id IS NULL AND %s IS NULL))
""", (base_id, year_id, engine_id, engine_id))
base_mye = cur.fetchone()
if base_mye:
base_mye_id = base_mye[0]
cur.execute("UPDATE supplier_catalog_compat SET model_year_engine_id = %s WHERE model_year_engine_id = %s", (base_mye_id, mye_id))
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = %s", (mye_id,))
cur.execute("DELETE FROM model_year_engine WHERE id_mye = %s", (mye_id,))
else:
cur.execute("UPDATE model_year_engine SET model_id = %s WHERE id_mye = %s", (base_id, mye_id))
migrated += 1
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
cur.close()
return migrated
def extract_base_name(name, reason):
n = name.strip()
if reason == 'trailing_year':
m = re.search(r'^(.*?)\s+(19|20)\d{2}$', n)
if m:
return m.group(1).strip()
elif reason == 'year_range_parens':
m = re.search(r'^(.*?)\s+\d{2}-\d{2}\s*\(', n)
if m:
return m.group(1).strip()
elif reason == 'hasta_tas':
if 'Tas.' in n:
m = re.search(r'^(.*?)(?:\s+\d+\.\d+L)?\s+\d{2}-\d{2}\s+Tas\.', n)
if m:
return m.group(1).strip()
if 'hasta' in n.lower():
m = re.search(r'^(.*?)\s+hasta', n, re.IGNORECASE)
if m:
return m.group(1).strip()
return None
def main():
dry_run = '--execute' not in sys.argv
if dry_run:
print("=" * 60)
print("DRY RUN MODE — no changes will be made")
print("Run with --execute to apply changes")
print("=" * 60)
conn = connect()
cur = conn.cursor()
cur.execute('SELECT id_model, name_model, brand_id FROM models')
models = cur.fetchall()
patterns = {
'trailing_year': (re.compile(r' (19|20)\d{2}$'), lambda b: b != 'MCLAREN'),
'year_range_parens': (re.compile(r'[A-Za-z]+ \d{2}-\d{2} \('), None),
'engine_spec': (re.compile(r',?\s*\(\d+ HP\)|DOHC|SOHC|Valv\.|Turbo L4|L4,\s*\(', re.IGNORECASE), None),
'hasta_tas': (re.compile(r'hasta|Tas\.', re.IGNORECASE), None),
'engine_only': (re.compile(r'^\d+\.\d+L$', re.IGNORECASE), None),
'engine_config': (re.compile(r'^\d+\.\d+L\s+(?:L\d|V\d|R\s|Turbo|TDI|GSI)', re.IGNORECASE),
lambda n: not re.search(r'\([A-Z0-9_]{3,}\)$', n)),
}
suspicious = []
for mid, name, bid in models:
if not name:
continue
for reason, (pat, extra_check) in patterns.items():
if pat.search(name):
ok = True
if extra_check:
if reason == 'trailing_year':
cur.execute('SELECT name_brand FROM brands WHERE id_brand=%s', (bid,))
bname = cur.fetchone()[0]
ok = extra_check(bname)
else:
ok = extra_check(name)
if ok:
suspicious.append((bid, name, mid, reason))
break
# Trim variant detection: "500 POP", "FIESTA SE", etc.
trim_variants = ['LOUNGE', 'POP', 'SPORT', 'ADVENTURE', 'FIRE', 'GT', 'GTV', 'STD', 'SE', 'LE', 'XLE', 'LIMITED', 'LX', 'EX', 'SX']
trim_pattern = re.compile(r'^(\S+?)\s*(' + '|'.join(trim_variants) + r')$')
trim_matches = []
for mid, name, bid in models:
if not name:
continue
if any(s[2] == mid for s in suspicious):
continue # already flagged
m = trim_pattern.match(name.upper())
if m:
base = m.group(1)
base_model = find_base_model(cur, bid, base)
if base_model:
trim_matches.append((bid, name, mid, 'trim_variant', base_model[0], base_model[1]))
print(f"\nFound {len(suspicious)} suspicious models by pattern")
print(f"Found {len(trim_matches)} trim variant models")
to_merge = []
to_delete = []
for bid, name, mid, reason in suspicious:
if reason in ('engine_spec', 'engine_only', 'engine_config'):
to_delete.append((bid, name, mid, reason))
continue
base_name = extract_base_name(name, reason)
if base_name:
base = find_base_model(cur, bid, base_name)
if base:
to_merge.append((bid, name, mid, reason, base[0], base[1]))
continue
to_delete.append((bid, name, mid, reason))
# Add trim matches to merge list
for item in trim_matches:
to_merge.append(item)
print(f"\nTo merge: {len(to_merge)}")
for bid, name, mid, reason, base_id, base_name in to_merge:
print(f" [{bid}] '{name}' -> '{base_name}' (reason={reason})")
print(f"\nTo delete: {len(to_delete)}")
for bid, name, mid, reason in to_delete:
print(f" [{bid}] '{name}' reason={reason}")
if dry_run:
print("\n" + "=" * 60)
print("DRY RUN complete. Run with --execute to apply.")
print("=" * 60)
cur.close()
conn.close()
return
print("\nApplying merges...")
for bid, name, mid, reason, base_id, base_name in to_merge:
cur.execute('SELECT name_brand FROM brands WHERE id_brand=%s', (bid,))
bname = cur.fetchone()[0]
migrated = merge_model_to_base(conn, mid, base_id, base_name)
print(f" [{bname}] '{name}' -> '{base_name}' ({migrated} MYEs migrated)")
conn.commit()
print("\nApplying deletes...")
for bid, name, mid, reason in to_delete:
cur.execute('SELECT name_brand FROM brands WHERE id_brand=%s', (bid,))
bname = cur.fetchone()[0]
delete_model_and_myes(conn, mid)
print(f" [{bname}] '{name}' deleted")
conn.commit()
print(f"\nDone. Merged {len(to_merge)}, deleted {len(to_delete)}.")
cur.close()
conn.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,118 @@
#!/usr/bin/env python3
"""
Second pass: clean remaining supplier-corrupted models.
More aggressive patterns for engine specs mixed with years.
Usage:
python scripts/clean_supplier_corrupted_models_v2.py [--execute]
"""
import os
import re
import sys
import psycopg2
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
def connect():
return psycopg2.connect(MASTER_DB_URL)
def delete_model_and_myes(conn, model_id):
cur = conn.cursor()
cur.execute("SELECT id_mye FROM model_year_engine WHERE model_id = %s", (model_id,))
mye_ids = [r[0] for r in cur.fetchall()]
if mye_ids:
cur.execute("UPDATE supplier_catalog_compat SET model_year_engine_id = NULL WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM model_year_engine WHERE id_mye = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
cur.close()
def main():
dry_run = '--execute' not in sys.argv
if dry_run:
print("=" * 60)
print("DRY RUN MODE — no changes will be made")
print("Run with --execute to apply changes")
print("=" * 60)
conn = connect()
cur = conn.cursor()
# Aggressive patterns for remaining garbage
# Pattern: starts with displacement and contains year range or engine config
patterns = [
re.compile(r'^\d+\.\d+L.*\d{2}-\d{2}', re.IGNORECASE), # 2.2L 98-99 L4 Amigo
re.compile(r'^\d+\.\d+L\s+[A-Za-z].*L\d', re.IGNORECASE), # 1.5L March
re.compile(r'^\d+\.\d+L\s+[A-Za-z]{3,}$', re.IGNORECASE), # 1.8L R
re.compile(r'^\d+\.\d+L\s+Datsun', re.IGNORECASE), # 1.5L Datsun 1600
re.compile(r'\d{2}-\d{2}.*L4,', re.IGNORECASE), # ...98-99...L4,
re.compile(r'\d{2}-\d{2}.*\d+\.\d+L.*Gasolina', re.IGNORECASE), # ...07-16...2.4L Gasolina
re.compile(r'^370 Z\s+\d+\.\d+L', re.IGNORECASE), # 370 Z 1.5L
re.compile(r'Brakes.*\d{2}-\d{2}', re.IGNORECASE), # Cooper JC Works Brakes 09-15
re.compile(r'Cabstar.*\d{2}-\d{2}', re.IGNORECASE), # Cabstar 3.5T 07-16
re.compile(r'X Terra.*Chevrolet', re.IGNORECASE), # X Terra 05-15 Chevrolet City
]
# Specific known-bad models by exact name
known_bad = {
'1.8L R', '2.5L 08 - 13', 'Eclipse 2.0L Aspiración Natural',
'Cooper JC Works Brakes 09-15 Disco de 316mm',
'Cabstar 3.5T 07-16 C/Sensor', 'X Terra 05-15 Chevrolet City Ex-',
'NP-300 (D-22) 2WD 2.4L GASOLINA', 'NV-350 2.5L GAS',
}
cur.execute('SELECT id_model, name_model, brand_id FROM models')
models = cur.fetchall()
to_delete = []
for mid, name, bid in models:
if not name:
continue
n = name.strip()
if n in known_bad:
to_delete.append((bid, n, mid, 'known_bad'))
continue
for pat in patterns:
if pat.search(n):
# Exclude legitimate TecDoc patterns like "1100-1900 (101_)"
if re.search(r'^\d{4}-\d{4} \([A-Z0-9_]+\)$', n):
continue
to_delete.append((bid, n, mid, 'pattern'))
break
print(f"\nFound {len(to_delete)} remaining corrupted models")
for bid, n, mid, reason in to_delete:
cur.execute('SELECT name_brand FROM brands WHERE id_brand=%s', (bid,))
bname = cur.fetchone()[0]
cur.execute('SELECT COUNT(*) FROM model_year_engine WHERE model_id=%s', (mid,))
mye_count = cur.fetchone()[0]
print(f" [{bname}] '{n}' id={mid} MYEs={mye_count} reason={reason}")
if dry_run:
print("\n" + "=" * 60)
print("DRY RUN complete. Run with --execute to apply.")
print("=" * 60)
cur.close()
conn.close()
return
print("\nApplying deletes...")
for bid, n, mid, reason in to_delete:
cur.execute('SELECT name_brand FROM brands WHERE id_brand=%s', (bid,))
bname = cur.fetchone()[0]
delete_model_and_myes(conn, mid)
print(f" [{bname}] '{n}' deleted")
conn.commit()
print(f"\nDone. Deleted {len(to_delete)} models.")
cur.close()
conn.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,228 @@
#!/usr/bin/env python3
"""
Third pass: clean specific remaining supplier-corrupted models.
Usage:
python scripts/clean_supplier_corrupted_models_v3.py [--execute]
"""
import os
import sys
import psycopg2
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
def connect():
return psycopg2.connect(MASTER_DB_URL)
def delete_model_and_myes(conn, model_id):
cur = conn.cursor()
cur.execute("SELECT id_mye FROM model_year_engine WHERE model_id = %s", (model_id,))
mye_ids = [r[0] for r in cur.fetchall()]
if mye_ids:
cur.execute("UPDATE supplier_catalog_compat SET model_year_engine_id = NULL WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM model_year_engine WHERE id_mye = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
cur.close()
def merge_model_to_base(conn, model_id, base_id):
cur = conn.cursor()
cur.execute("SELECT id_mye, year_id, engine_id FROM model_year_engine WHERE model_id = %s", (model_id,))
myes = cur.fetchall()
migrated = 0
for mye_id, year_id, engine_id in myes:
cur.execute("""
SELECT id_mye FROM model_year_engine
WHERE model_id = %s AND year_id = %s
AND (engine_id = %s OR (engine_id IS NULL AND %s IS NULL))
""", (base_id, year_id, engine_id, engine_id))
base_mye = cur.fetchone()
if base_mye:
cur.execute("UPDATE supplier_catalog_compat SET model_year_engine_id = %s WHERE model_year_engine_id = %s", (base_mye[0], mye_id))
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = %s", (mye_id,))
cur.execute("DELETE FROM model_year_engine WHERE id_mye = %s", (mye_id,))
else:
cur.execute("UPDATE model_year_engine SET model_id = %s WHERE id_mye = %s", (base_id, mye_id))
migrated += 1
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
cur.close()
return migrated
def main():
dry_run = '--execute' not in sys.argv
if dry_run:
print("=" * 60)
print("DRY RUN MODE — no changes will be made")
print("Run with --execute to apply changes")
print("=" * 60)
conn = connect()
cur = conn.cursor()
# Exact (brand, model_name) pairs to delete
delete_exact = [
('ISUZU', '1.9L 81 - 86 Pick-Up'),
('ISUZU', '2.2L 98 - 02 L4 2.3L Turbo 84 - 87'),
('ISUZU', '2.2L 98 - 99 L4 Amigo'),
('ISUZU', '2.3L 81 - 95 2.0L Turbo 84 - 87'),
('ISUZU', '2.3L 81 - 95 Pick-Up'),
('ISUZU', '3.2L 92 - 93 V6'),
('ISUZU', '3.2L 92 - 93 V6 2.3L Turbo 84 - 87'),
('CHEVROLET', '2.0L 1984 L4'),
('CHEVROLET', '2.0L 83 - 84 L4'),
('CHEVROLET', '2.0L 83 - 84 L4 Jimmy'),
('CHEVROLET', '2.8L 1984 V6'),
('CHEVROLET', '2.8L 85 - 93 V6'),
('CHEVROLET', '3.1L 91 - 95 V6'),
('CHEVROLET', '3.1L 91 - 98 V6'),
('CHEVROLET', '4.1L 81 - 84 L6'),
('CHEVROLET', '4.1L 81 - 84 L6 10'),
('CHEVROLET', '4.3L 10'),
('CHEVROLET', '4.3L 90 - 93 V6'),
('CHEVROLET', '4.3L 90 - 93 V6 10'),
('CHEVROLET', '4.3L 96 - 00 V6 10'),
('CHEVROLET', '5.0L 96 - 98 V8'),
('CHEVROLET', '5.0L 96 - 98 V8 10'),
('CHEVROLET', '5.1L 82 - 91 V8'),
('CHEVROLET', '5.7L (19.15 mm)'),
('CHEVROLET', '5.7L 10'),
('CHEVROLET', '5.7L 69 - 91 V8'),
('CHEVROLET', '5.7L 69 - 91 V8 10'),
('CHRYSLER / DODGE', '2.5L Chasis Cabina'),
('CHRYSLER / DODGE', '3.7L 10'),
('CHRYSLER / DODGE', '3.7L 86 - 87 L6 23'),
('CHRYSLER / DODGE', '3.9L 88 - 91 V6'),
('CHRYSLER / DODGE', '3.9L 88 - 91 V6 23'),
('CHRYSLER / DODGE', '4.7L 04 - 05 V8'),
('CHRYSLER / DODGE', '4.7L 04 - 05 V8 10'),
('CHRYSLER / DODGE', '4.7L 10'),
('CHRYSLER / DODGE', '5.2L 10'),
('CHRYSLER / DODGE', '5.2L 85 - 93 V8'),
('CHRYSLER / DODGE', '5.7L 10'),
('CHRYSLER / DODGE', '5.9L 10'),
('CHRYSLER / DODGE', '5.9L 19'),
('CHRYSLER / DODGE', '5.9L 1992 V8 19'),
('CHRYSLER / DODGE', '5.9L 88 - 91 V8 19'),
('CHRYSLER / DODGE', '5.9L 94 - 97 V8 Ram 7000'),
('FORD', '2.0L 97 - 03 L4'),
('FORD', '2.0L 97 - 03 L4 10'),
('FORD', '2.0L LX, SE'),
('FORD', '2.3L 87 - 88 L4'),
('FORD', '2.3L 87 - 88 L4 10'),
('FORD', '2.3L 87 - 88 L4 Aerostar'),
('FORD', '2.3L 87 - 88 L4 Bronco'),
('FORD', '3.0L 98 - 01 V6'),
('FORD', '4.0L 93 - 97 V6'),
('FORD', '4.6L 10'),
('FORD', '4.9L 83 - 93 L6 10'),
('FORD', '4.9L 88 - 89 L6'),
('FORD', '4.9L 88 - 92 L6 10'),
('FORD', '5.0L 10'),
('FORD', '5.0L 65 - 91 V8 10'),
('FORD', '5.0L 88 - 89 V8 10'),
('FORD', '5.7L 65 - 84 V8'),
('FORD', '5.8L 10'),
('FORD', '5.8L 1992 10'),
('JEEP', '4.0L 10'),
('JEEP', '4.0L 14'),
('JEEP', '4.0L 19'),
('JEEP', '4.0L 1”'),
('JEEP', '4.2L 87 - 89 L6 10'),
('JEEP', '4.2L 87 - 89 L6 14'),
('NISSAN', '2.4L 85 - 92 L4'),
('NISSAN', '2.4L 85 - 92 L4 2.0L 720 74 - 83 L4, L20B, Z22, SD22'),
('NISSAN', '2.4L 85 - 92 L4 2.4L 91 - 00 L4, D21'),
('NISSAN', '3.0L 89 - 94 Micra'),
('NISSAN', '3.0L 89 - 94 NX Coupé DE'),
('MAZDA', '4.0L B4000'),
('PONTIAC', '5.7L 1998 V8'),
('VW', '1.6L (23.40 mm)'),
('AUDI', '1.4L TFSI (17.0 mm)'),
('BMW', 'Q60 14-16 Sin Paq. Sport'),
('MERCEDES BENZ', 'Cayenne Turbo 05-10 R-19”'),
('MERCEDES BENZ', 'ne S 05-10 R-19”'),
('DODGE', 'Sienna 11-19 Lexux NX200t 15-'),
('PEUGEOT', '406 00-05 4Cil.'),
('PEUGEOT', 'RAV-4 06-18 Nacio-'),
('TOYOTA', 'Sonic RS 12-17 Che-'),
('SEAT', 'Ibiza 18. Volkswagen Golf 17-18 9193-D1968 SG'),
]
# (brand, bad_model, base_model)
merge_map = [
('KIA', 'Sorento 14-16 Latino', 'SORENTO'),
('HYUNDAI', 'IX20 11-16 Latino', 'ix20 (JC)'),
('TOYOTA', 'Corolla CE 07-11 Brasil', 'COROLLA'),
('SUZUKI', 'Grand Vitara 09-13 Na-', 'GRAND VITARA'),
('CHRYSLER', 'Voyager 00-04 Modelos Europeos', 'VOYAGER'),
]
# Resolve IDs
to_delete = []
for bname, mname in delete_exact:
cur.execute("""
SELECT b.id_brand, m.id_model
FROM brands b JOIN models m ON m.brand_id = b.id_brand
WHERE b.name_brand = %s AND m.name_model = %s
""", (bname, mname))
row = cur.fetchone()
if row:
to_delete.append((bname, mname, row[1]))
to_merge = []
for bname, bad_name, base_name in merge_map:
cur.execute("""
SELECT b.id_brand, m.id_model
FROM brands b JOIN models m ON m.brand_id = b.id_brand
WHERE b.name_brand = %s AND m.name_model = %s
""", (bname, bad_name))
bad = cur.fetchone()
cur.execute("""
SELECT m.id_model FROM models m JOIN brands b ON b.id_brand = m.brand_id
WHERE b.name_brand = %s AND m.name_model = %s
""", (bname, base_name))
base = cur.fetchone()
if bad and base:
to_merge.append((bname, bad_name, bad[1], base_name, base[0]))
print(f"\nTo delete: {len(to_delete)}")
for bname, mname, mid in to_delete:
print(f" [{bname}] '{mname}' id={mid}")
print(f"\nTo merge: {len(to_merge)}")
for bname, bad_name, mid, base_name, base_id in to_merge:
print(f" [{bname}] '{bad_name}' -> '{base_name}'")
if dry_run:
print("\n" + "=" * 60)
print("DRY RUN complete. Run with --execute to apply.")
print("=" * 60)
cur.close()
conn.close()
return
print("\nApplying merges...")
for bname, bad_name, mid, base_name, base_id in to_merge:
migrated = merge_model_to_base(conn, mid, base_id)
print(f" [{bname}] '{bad_name}' -> '{base_name}' ({migrated} MYEs migrated)")
conn.commit()
print("\nApplying deletes...")
for bname, mname, mid in to_delete:
delete_model_and_myes(conn, mid)
print(f" [{bname}] '{mname}' deleted")
conn.commit()
print(f"\nDone. Merged {len(to_merge)}, deleted {len(to_delete)}.")
cur.close()
conn.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,126 @@
#!/usr/bin/env python3
"""Final wave: delete remaining engine-displacement models left by supplier imports.
These are not real models (e.g. '5.7L 85 - 96 V8', '7.4L 73 - 91 V8', '5.8L 93 - 96 V8').
Deleting them removes fake MYEs; compat rows are unlinked (model_year_engine_id=NULL)
and remain searchable by SKU / part number.
"""
import argparse
import os
import psycopg2
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
# Remaining engine-pattern models after v3 cleanup.
ENGINE_PATTERN_MODELS = [
# Chevrolet
("CHEVROLET", "5.7L 85 - 96 V8"),
("CHEVROLET", "5.7L 85 - 96 V8 Yukon"),
("CHEVROLET", "5.7L 99 - 00 V8"),
("CHEVROLET", "5.7L P-300 85 - 98 V8 10"),
("CHEVROLET", "7.4L 73 - 91 V8"),
("CHEVROLET", "7.4L 73 - 91 V8 10"),
("CHEVROLET", "7.4L 85 - 95 V8"),
("CHEVROLET", "7.4L 85 - 95 V8 10"),
("CHEVROLET", "7.4L 87 - 91 V8"),
("CHEVROLET", "7.4L 87 - 91 V8 10"),
# Ford
("FORD", "5.8L 1998 V8"),
("FORD", "5.8L 1998 V8 10"),
("FORD", "5.8L 84 - 87 V8"),
("FORD", "5.8L 84 - 87 V8 10"),
("FORD", "5.8L 84 - 87 V8 Pro"),
("FORD", "5.8L 88 - 89 V8"),
("FORD", "5.8L 88 - 95 V8 10"),
("FORD", "5.8L 89 - 91 V8"),
("FORD", "5.8L 93 - 96 V8"),
("FORD", "5.8L XLT 91 - 97 V8"),
("FORD", "6.2L 10"),
("FORD", "6.8L XL Super Duty 05 - 06"),
("FORD", "6.8L XL Super Duty 05 - 06 10"),
("FORD", "7.3L 1994 V8, FI, Turbo Diesel"),
("FORD", "7.3L 94 - 98 10"),
# Toyota
("TOYOTA", "2.2L 74 - 80 L4, 20R Engine"),
]
def delete_model_and_myes(conn, model_id):
cur = conn.cursor()
cur.execute("SELECT id_mye FROM model_year_engine WHERE model_id = %s", (model_id,))
mye_ids = [r[0] for r in cur.fetchall()]
if mye_ids:
cur.execute(
"UPDATE supplier_catalog_compat SET model_year_engine_id = NULL WHERE model_year_engine_id = ANY(%s)",
(mye_ids,),
)
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM model_year_engine WHERE id_mye = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
cur.close()
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--execute", action="store_true")
args = parser.parse_args()
if not args.execute:
print("=" * 60)
print("DRY RUN MODE — no changes will be made")
print("Run with --execute to apply changes")
print("=" * 60)
conn = psycopg2.connect(MASTER_DB_URL)
conn.autocommit = False
cur = conn.cursor()
to_delete = []
for brand_name, model_name in ENGINE_PATTERN_MODELS:
cur.execute(
"""
SELECT m.id_model, b.name_brand, m.name_model
FROM models m
JOIN brands b ON b.id_brand = m.brand_id
WHERE UPPER(b.name_brand) = %s AND m.name_model = %s
""",
(brand_name, model_name),
)
row = cur.fetchone()
if row:
to_delete.append(row)
else:
print(f" NOT FOUND: [{brand_name}] {model_name!r}")
print(f"\nTo delete: {len(to_delete)}")
total_myes = 0
for mid, bname, mname in to_delete:
cur.execute("SELECT COUNT(*) FROM model_year_engine WHERE model_id = %s", (mid,))
cnt = cur.fetchone()[0]
total_myes += cnt
print(f" [{bname}] {mname!r} id={mid} MYEs={cnt}")
print(f"Total MYEs to remove: {total_myes}")
if not args.execute:
print("\n" + "=" * 60)
print("DRY RUN complete. Run with --execute to apply.")
print("=" * 60)
cur.close()
conn.close()
return
for mid, bname, mname in to_delete:
delete_model_and_myes(conn, mid)
print(f" Deleted [{bname}] {mname!r}")
conn.commit()
print(f"\nDone. Deleted {len(to_delete)} models ({total_myes} MYEs removed).")
cur.close()
conn.close()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,57 @@
#!/usr/bin/env python3
"""Clean models with year suffixes like 'Model 17-18' or 'Model 2010-2015'."""
import psycopg2, re, sys
MASTER_DSN = "host=localhost dbname=nexus_autoparts user=postgres password=1123517"
def main():
dry_run = '--execute' not in sys.argv
conn = psycopg2.connect(MASTER_DSN)
cur = conn.cursor()
cur.execute("""
SELECT m.id_model, m.name_model, m.brand_id, b.name_brand,
(SELECT COUNT(*) FROM model_year_engine mye WHERE mye.model_id = m.id_model) as mye_count
FROM models m
JOIN brands b ON b.id_brand = m.brand_id
WHERE m.name_model ~ ' [0-9]{2}-[0-9]{2}$' OR m.name_model ~ ' [0-9]{4}-[0-9]{4}$'
ORDER BY mye_count DESC
""")
rows = cur.fetchall()
print(f"Found {len(rows)} models with year suffix")
total_myes = 0
total_models = 0
total_scc = 0
for model_id, name_model, brand_id, brand_name, mye_count in rows:
total_models += 1
print(f"[{brand_name}] \"{name_model}\" id={model_id}, MYEs={mye_count}")
if mye_count > 0:
cur.execute("SELECT id_mye FROM model_year_engine WHERE model_id = %s", (model_id,))
mye_ids = [r[0] for r in cur.fetchall()]
total_myes += len(mye_ids)
# Count supplier_catalog_compat affected
cur.execute("SELECT COUNT(*) FROM supplier_catalog_compat WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
scc_count = cur.fetchone()[0]
total_scc += scc_count
print(f" -> {scc_count} supplier_catalog_compat rows will be nulled")
if not dry_run:
cur.execute("UPDATE supplier_catalog_compat SET model_year_engine_id = NULL WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM vin_cache WHERE model_year_engine_id = ANY(%s)", (mye_ids,))
cur.execute("DELETE FROM model_year_engine WHERE id_mye = ANY(%s)", (mye_ids,))
if not dry_run:
cur.execute("DELETE FROM models WHERE id_model = %s", (model_id,))
conn.commit()
print(f"\n{'DRY RUN' if dry_run else 'EXECUTED'}: {total_models} models, {total_myes} MYEs, {total_scc} SCC rows affected")
cur.close()
conn.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,240 @@
#!/usr/bin/env python3
"""
Import Keep Green (KG) catalog from Excel into supplier_catalog tables.
Usage:
python scripts/import_keepgreen_catalog.py
"""
import os
import re
import sys
from collections import defaultdict
from datetime import datetime
import psycopg2
from openpyxl import load_workbook
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
EXCEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'KG (1).xlsx')
SUPPLIER_NAME = 'KEEP GREEN'
MULTI_WORD_MAKES = {
('MERCEDES', 'BENZ'): 'MERCEDES BENZ',
('LAND', 'ROVER'): 'LAND ROVER',
('ALFA', 'ROMEO'): 'ALFA ROMEO',
('AMERICAN', 'MOTORS'): 'AMERICAN MOTORS',
('ROLLS', 'ROYCE'): 'ROLLS ROYCE',
('ASTON', 'MARTIN'): 'ASTON MARTIN',
('GREAT', 'WALL'): 'GREAT WALL',
}
def connect_master():
return psycopg2.connect(MASTER_DB_URL)
def normalize_name(name):
if not name:
return ''
return ' '.join(str(name).replace('\n', ' ').split())
def parse_make(carro):
"""Extract make from CARRO_PERTENECIENTE text."""
if not carro:
return None
parts = str(carro).strip().split()
if not parts:
return None
make = parts[0]
if len(parts) >= 2:
key = (parts[0].upper(), parts[1].upper())
if key in MULTI_WORD_MAKES:
make = MULTI_WORD_MAKES[key]
return make
def extract_interchanges(row):
"""Extract (brand, part_number) pairs from interchange columns.
KG: interchanges start at col 5 (MARCA.1) through col 16 (INTERCAMBIO.5).
"""
interchanges = []
for i in range(6):
marca_col = 5 + i * 2
inter_col = 6 + i * 2
if marca_col < len(row) and row[marca_col]:
brand = str(row[marca_col]).strip()
pn = str(row[inter_col]).strip() if inter_col < len(row) and row[inter_col] else ''
if brand and pn:
interchanges.append((brand, pn))
return interchanges
def expand_year(year_val):
"""Return list of integer years from a year value.
Handles: 1998, 1998-1999, 98-99, '1998 1999', etc.
"""
if year_val is None:
return [None]
s = str(year_val).strip()
if not s:
return [None]
# Single 4-digit year
if re.match(r'^(19|20)\d{2}$', s):
return [int(s)]
# Range with dash or slash: 1998-1999, 98-99, 1998/1999
m = re.match(r'^(\d{2,4})\s*[-/]\s*(\d{2,4})$', s)
if m:
start = int(m.group(1))
end = int(m.group(2))
# Normalize 2-digit years
if start < 100:
start = 1900 + start if start >= 70 else 2000 + start
if end < 100:
end = 1900 + end if end >= 70 else 2000 + end
if end < start:
start, end = end, start
# Sanity: cap range length
if end - start > 100:
return [None]
return list(range(start, end + 1))
# Try plain integer
try:
y = int(float(s))
if 1900 <= y <= 2100:
return [y]
except ValueError:
pass
return [None]
def main():
print(f"[{datetime.now().isoformat()}] Starting Keep Green import...")
if not os.path.exists(EXCEL_PATH):
print(f"ERROR: Excel not found at {EXCEL_PATH}")
sys.exit(1)
print(f"Loading {EXCEL_PATH}...")
wb = load_workbook(EXCEL_PATH, read_only=True, data_only=True)
master_conn = connect_master()
master_cur = master_conn.cursor()
upsert_catalog_sql = """
INSERT INTO supplier_catalog (supplier_name, sku, name, category, is_active)
VALUES (%s, %s, %s, %s, true)
ON CONFLICT (supplier_name, sku, category) DO UPDATE SET
name = EXCLUDED.name,
category = EXCLUDED.category,
is_active = true
RETURNING id
"""
insert_compat_sql = """
INSERT INTO supplier_catalog_compat
(catalog_id, make, model, year, engine, model_year_engine_id, source)
VALUES (%s, %s, %s, %s, %s, NULL, %s)
ON CONFLICT (catalog_id, make, model, year, engine) DO NOTHING
"""
insert_interchange_sql = """
INSERT INTO supplier_catalog_interchange (catalog_id, brand, part_number)
VALUES (%s, %s, %s)
ON CONFLICT DO NOTHING
"""
stats = defaultdict(int)
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
rows = list(ws.iter_rows(values_only=True))
if not rows:
continue
data_rows = rows[1:]
stats['sheets'] += 1
print(f"\nProcessing sheet '{sheet_name}' with {len(data_rows)} rows...")
catalog_id_cache = {}
for idx, row in enumerate(data_rows):
if idx % 2000 == 0 and idx > 0:
print(f" ...{idx} rows processed")
if not row or len(row) < 5 or not row[4]:
stats['skipped_no_sku'] += 1
continue
make = str(row[0]).strip().upper() if row[0] else ''
model = str(row[1]).strip() if row[1] else ''
engine = normalize_name(row[2]) if row[2] else None
year_raw = row[3]
sku = str(row[4]).strip()
name = normalize_name(row[17]) if len(row) > 17 and row[17] else sheet_name
carro = str(row[18]).strip() if len(row) > 18 and row[18] else ''
if not sku:
stats['skipped_no_sku'] += 1
continue
if not make or not model:
stats['skipped_no_vehicle'] += 1
continue
stats['rows'] += 1
# Prefer make from MARCA column; fall back to parsing CARRO_PERTENECIENTE
parsed_make = parse_make(carro) or make
# Upsert catalog item (keyed by sku; category = sheet name)
cache_key = sku
catalog_id = catalog_id_cache.get(cache_key)
if catalog_id is None:
master_cur.execute(upsert_catalog_sql, (SUPPLIER_NAME, sku, name, sheet_name))
row_result = master_cur.fetchone()
catalog_id = row_result[0] if row_result else None
catalog_id_cache[cache_key] = catalog_id
stats['catalog_items'] += 1
if catalog_id is None:
stats['skipped_no_catalog'] += 1
continue
# Expand years and insert compat rows
years = expand_year(year_raw)
for year in years:
master_cur.execute(insert_compat_sql, (
catalog_id,
parsed_make,
model,
year,
engine or None,
'import_text',
))
stats['compat_rows'] += 1
# Insert interchanges
interchanges = extract_interchanges(row)
for brand, pn in interchanges:
master_cur.execute(insert_interchange_sql, (catalog_id, brand, pn))
stats['interchange_rows'] += 1
master_conn.commit()
print(f" Sheet '{sheet_name}' committed.")
print(f"\n{'='*60}")
print("IMPORT COMPLETE")
print(f"{'='*60}")
for k, v in sorted(stats.items()):
print(f"{k:25s}: {v}")
master_cur.close()
master_conn.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,312 @@
#!/usr/bin/env python3
"""
Import KNADIAN catalog from Excel into supplier_catalog tables.
Usage:
python scripts/import_knadian_catalog.py
"""
import os
import re
import sys
from collections import defaultdict
from datetime import datetime
import psycopg2
from openpyxl import load_workbook
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
EXCEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'KNADIAN.xlsx')
SUPPLIER_NAME = 'KNADIAN'
MAX_IMPORT_YEAR = datetime.now().year + 1 # reject future years from bad supplier data
MULTI_WORD_MAKES = {
('MERCEDES', 'BENZ'): 'MERCEDES BENZ',
('LAND', 'ROVER'): 'LAND ROVER',
('ALFA', 'ROMEO'): 'ALFA ROMEO',
('AMERICAN', 'MOTORS'): 'AMERICAN MOTORS',
('ROLLS', 'ROYCE'): 'ROLLS ROYCE',
('ASTON', 'MARTIN'): 'ASTON MARTIN',
('GREAT', 'WALL'): 'GREAT WALL',
}
def connect_master():
return psycopg2.connect(MASTER_DB_URL)
def normalize_name(name):
if not name:
return ''
return ' '.join(str(name).replace('\n', ' ').split())
def parse_year_token(token):
"""Parse a year token like '05', '1998', '2015'."""
if not token or not re.match(r'^\d+$', str(token)):
return None
val = int(token)
if 1000 <= val <= 2100:
return val
if 70 <= val <= 99:
return 1900 + val
if 0 <= val <= 69:
return 2000 + val
return None
def extract_years(text):
"""Extract year(s) from end of a string like '05/10', '2011', '1315', '97/99'."""
if not text:
return [None], ''
s = str(text).strip()
# Try trailing range with / or -: YY/YY, YYYY-YYYY, YY-YY
m = re.search(r'\s+(\d{2,4})\s*[-/]\s*(\d{2,4})$', s)
if m:
start = parse_year_token(m.group(1))
end = parse_year_token(m.group(2))
if start and end:
if end < start:
start, end = end, start
if end - start <= 100:
rest = s[:m.start()].strip()
return list(range(start, end + 1)), rest
# Try trailing 4-digit year
m = re.search(r'\s+(19|20)\d{2}$', s)
if m:
year = int(m.group(0).strip())
rest = s[:m.start()].strip()
return [year], rest
# Try trailing 4 consecutive digits that look like a merged range: 1315 -> 2013,2014,2015
m = re.search(r'\s+(\d{4})$', s)
if m:
digits = m.group(1)
# If first two and last two are valid years, treat as range
y1 = parse_year_token(digits[:2])
y2 = parse_year_token(digits[2:])
if y1 and y2 and y1 <= y2 and y2 - y1 <= 30:
rest = s[:m.start()].strip()
return list(range(y1, y2 + 1)), rest
return [None], s
def parse_carro(carro):
"""Parse CARRO_PERTENECIENTE like 'ACURA TL 05/10' -> make, model, years."""
if not carro:
return {'make': None, 'model': None, 'years': [None], 'raw': carro}
s = str(carro).strip()
years, rest = extract_years(s)
parts = rest.split()
if not parts:
return {'make': None, 'model': None, 'years': years, 'raw': s}
# Extract make
make = parts[0].upper()
if len(parts) >= 2:
key = (parts[0].upper(), parts[1].upper())
if key in MULTI_WORD_MAKES:
make = MULTI_WORD_MAKES[key]
parts = parts[2:]
else:
parts = parts[1:]
else:
parts = parts[1:]
model = ' '.join(parts) if parts else None
return {
'make': make,
'model': model,
'years': years,
'raw': s,
}
def extract_engine(name):
"""Extract engine description from NOMBRE_PIEZA like 'BOMBA_REFRIGERANTE L4 2.0'."""
if not name:
return None
s = normalize_name(name)
parts = s.split()
if len(parts) <= 1:
return None
# Everything after first word
engine = ' '.join(parts[1:])
# Filter out meaningless tokens that should not be engines
if engine.upper() in {'DEL.', 'TRAS.', 'FRONT.', 'EXT.', 'IZQ.', 'DER.', 'INF.', 'SUP.', 'TRANS.'}:
return None
return engine or None
def extract_interchanges(row):
"""Extract (brand, part_number) pairs from interchange columns.
KNADIAN: interchanges start at col 3 (MARCA.1) through col 15 (INTERCAMBIO.5).
"""
interchanges = []
for i in range(6):
marca_col = 3 + i * 2
inter_col = 4 + i * 2
if marca_col < len(row) and row[marca_col]:
brand = str(row[marca_col]).strip()
pn = str(row[inter_col]).strip() if inter_col < len(row) and row[inter_col] else ''
if brand and pn:
interchanges.append((brand, pn))
return interchanges
def main():
print(f"[{datetime.now().isoformat()}] Starting KNADIAN import...")
if not os.path.exists(EXCEL_PATH):
print(f"ERROR: Excel not found at {EXCEL_PATH}")
sys.exit(1)
print(f"Loading {EXCEL_PATH}...")
wb = load_workbook(EXCEL_PATH, read_only=True, data_only=True)
master_conn = connect_master()
master_cur = master_conn.cursor()
upsert_catalog_sql = """
INSERT INTO supplier_catalog (supplier_name, sku, name, category, is_active)
VALUES (%s, %s, %s, %s, true)
ON CONFLICT (supplier_name, sku, category) DO UPDATE SET
name = EXCLUDED.name,
category = EXCLUDED.category,
is_active = true
RETURNING id
"""
insert_compat_sql = """
INSERT INTO supplier_catalog_compat
(catalog_id, make, model, year, engine, model_year_engine_id, source)
VALUES (%s, %s, %s, %s, %s, NULL, %s)
ON CONFLICT (catalog_id, make, model, year, engine) DO NOTHING
"""
insert_interchange_sql = """
INSERT INTO supplier_catalog_interchange (catalog_id, brand, part_number)
VALUES (%s, %s, %s)
ON CONFLICT DO NOTHING
"""
stats = defaultdict(int)
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
rows = list(ws.iter_rows(values_only=True))
if not rows:
continue
data_rows = rows[1:]
stats['sheets'] += 1
print(f"\nProcessing sheet '{sheet_name}' with {len(data_rows)} rows...")
catalog_id_cache = {}
for idx, row in enumerate(data_rows):
if idx % 2000 == 0 and idx > 0:
print(f" ...{idx} rows processed")
if not row or len(row) < 3 or not row[2]:
stats['skipped_no_sku'] += 1
continue
make_col = str(row[0]).strip().upper() if row[0] else ''
model_col = str(row[1]).strip() if row[1] else ''
sku = str(row[2]).strip()
name = normalize_name(row[15]) if len(row) > 15 and row[15] else sheet_name
carro = str(row[16]).strip() if len(row) > 16 and row[16] else ''
if not sku:
stats['skipped_no_sku'] += 1
continue
# Always try to parse year from CARRO_PERTENECIENTE
parsed = parse_carro(carro)
years = parsed['years']
# Prefer explicit make/model columns; fallback to parsed carro
if make_col:
make = make_col
else:
make = parsed['make']
if model_col:
model = model_col
else:
model = parsed['model']
# If year still missing, maybe the model column itself contains a year
if years == [None] and model_col:
years, _ = extract_years(model_col)
if not make or not model:
stats['skipped_no_vehicle'] += 1
continue
# Filter out future years and de-duplicate
filtered_years = []
for y in years:
if y is None:
if None not in filtered_years:
filtered_years.append(None)
elif y <= MAX_IMPORT_YEAR:
if y not in filtered_years:
filtered_years.append(y)
years = filtered_years if filtered_years else [None]
stats['rows'] += 1
# Upsert catalog item (keyed by sku)
cache_key = sku
catalog_id = catalog_id_cache.get(cache_key)
if catalog_id is None:
master_cur.execute(upsert_catalog_sql, (SUPPLIER_NAME, sku, name, sheet_name))
row_result = master_cur.fetchone()
catalog_id = row_result[0] if row_result else None
catalog_id_cache[cache_key] = catalog_id
stats['catalog_items'] += 1
if catalog_id is None:
stats['skipped_no_catalog'] += 1
continue
engine = extract_engine(name)
for year in years:
master_cur.execute(insert_compat_sql, (
catalog_id,
make,
model,
year,
engine,
'import_text',
))
stats['compat_rows'] += 1
interchanges = extract_interchanges(row)
for brand, pn in interchanges:
master_cur.execute(insert_interchange_sql, (catalog_id, brand, pn))
stats['interchange_rows'] += 1
master_conn.commit()
print(f" Sheet '{sheet_name}' committed.")
print(f"\n{'='*60}")
print("IMPORT COMPLETE")
print(f"{'='*60}")
for k, v in sorted(stats.items()):
print(f"{k:25s}: {v}")
master_cur.close()
master_conn.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,235 @@
#!/usr/bin/env python3
"""
Import LUK catalog from Excel into supplier_catalog tables.
Usage:
python scripts/import_luk_catalog.py
"""
import os
import re
import sys
from collections import Counter
from datetime import datetime
import psycopg2
from openpyxl import load_workbook
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
TENANT_DB_URL = os.environ.get('TENANT_DB_URL', 'postgresql://postgres@localhost/tenant_refaccionaria_rached')
EXCEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'LUK.xlsx')
SUPPLIER_NAME = 'LUK'
TENANT_ID = 31
MULTI_WORD_MAKES = {
('ALFA', 'ROMEO'): 'ALFA ROMEO',
('MERCEDES', 'BENZ'): 'MERCEDES BENZ',
('MG', 'ROVER'): 'MG ROVER',
}
NOTE_KEYWORDS = {
'VOLANTE', 'SÓLIDO', 'SOLIDO', 'TIPO', 'CAJA', 'PLANO',
'ESCALÓN', 'ESCALON', 'MOTOR', 'EMBRAGUE', 'DOBLE', 'HUMEDO',
}
def connect_master():
return psycopg2.connect(MASTER_DB_URL)
def connect_tenant():
return psycopg2.connect(TENANT_DB_URL)
def normalize_name(name):
if not name:
return ''
return ' '.join(str(name).replace('\n', ' ').split())
def parse_luk(carro):
"""Parse CARRO_PERTENECIENTE into make, model, year."""
if not carro:
return None, None, None
s = ' '.join(str(carro).strip().split())
if not s:
return None, None, None
parts = s.split()
# Extract year (last occurrence of 19xx or 20xx)
year = None
year_idx = None
for i in range(len(parts)):
if re.match(r'^(19|20)\d{2}$', parts[i]):
year = int(parts[i])
year_idx = i
# Extract make
make = parts[0] if parts else ''
make_len = 1
if len(parts) >= 2:
key2 = (parts[0].upper(), parts[1].upper())
if key2 in MULTI_WORD_MAKES:
make = MULTI_WORD_MAKES[key2]
make_len = 2
elif len(parts) >= 3 and parts[0].upper() == 'CHRYSLER' and parts[1] == '/' and parts[2].upper() == 'DODGE':
make = 'CHRYSLER / DODGE'
make_len = 3
# Remaining parts between make and year
if year_idx is not None:
remaining = parts[make_len:year_idx] + parts[year_idx + 1:]
else:
remaining = parts[make_len:]
# Clean note keywords
cleaned = [p for p in remaining if p.upper() not in NOTE_KEYWORDS]
model = ' '.join(cleaned)
# If empty after cleaning, use original remaining text
if not model and remaining:
model = ' '.join(remaining)
return make, model, year
def extract_interchanges(row):
"""Extract (brand, part_number) pairs from 4 interchange columns."""
interchanges = []
for i in range(4):
marca_col = 2 + i * 2
inter_col = 3 + i * 2
if marca_col < len(row) and row[marca_col]:
brand = str(row[marca_col]).strip()
pn = str(row[inter_col]).strip() if inter_col < len(row) and row[inter_col] else ''
if brand and pn:
interchanges.append((brand, pn))
return interchanges
def main():
print(f"[{datetime.now().isoformat()}] Starting LUK import...")
if not os.path.exists(EXCEL_PATH):
print(f"ERROR: Excel not found at {EXCEL_PATH}")
sys.exit(1)
print(f"Loading {EXCEL_PATH}...")
wb = load_workbook(EXCEL_PATH, read_only=True, data_only=True)
ws = wb['KIT_CLUTCH']
master_conn = connect_master()
master_conn = connect_master()
master_cur = master_conn.cursor()
# Pre-scan: determine most common name per SKU
print("Pre-scanning SKUs...")
sku_name_counter = Counter()
for row in ws.iter_rows(min_row=2, values_only=True):
sku = str(row[1]).strip() if row[1] else ''
name = normalize_name(row[10])
if sku and name:
sku_name_counter[(sku, name)] += 1
sku_best_name = {}
for (sku, name), count in sku_name_counter.items():
if sku not in sku_best_name or count > sku_best_name[sku][1]:
sku_best_name[sku] = (name, count)
print(f" Found {len(sku_best_name)} unique SKUs")
upsert_catalog_sql = """
INSERT INTO supplier_catalog (supplier_name, sku, name, category)
VALUES (%s, %s, %s, %s, %s)
ON CONFLICT (supplier_name, sku, category) DO UPDATE SET
name = EXCLUDED.name,
category = EXCLUDED.category
RETURNING id
"""
insert_compat_sql = """
INSERT INTO supplier_catalog_compat
(catalog_id, make, model, year, engine, model_year_engine_id, source)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (catalog_id, make, model, year, engine) DO NOTHING
"""
insert_interchange_sql = """
INSERT INTO supplier_catalog_interchange (catalog_id, brand, part_number)
VALUES (%s, %s, %s)
ON CONFLICT DO NOTHING
"""
stats = {
'rows': 0,
'catalog_items': 0,
'compat_rows': 0,
'interchange_rows': 0,
'vehicles_parsed': 0,
}
catalog_id_cache = {}
for idx, row in enumerate(ws.iter_rows(min_row=2, values_only=True)):
if idx % 1000 == 0 and idx > 0:
print(f" ...{idx} rows processed")
if not row or not row[1]:
continue
sku = str(row[1]).strip()
name = sku_best_name.get(sku, ('', 0))[0]
carro_raw = str(row[11]).strip() if row[11] else ''
if not sku or not name:
continue
stats['rows'] += 1
cache_key = (sku, 'KIT_CLUTCH')
catalog_id = catalog_id_cache.get(cache_key)
if catalog_id is None:
master_cur.execute(upsert_catalog_sql, (SUPPLIER_NAME, sku, name, 'KIT_CLUTCH'))
catalog_id = master_cur.fetchone()[0]
catalog_id_cache[cache_key] = catalog_id
stats['catalog_items'] += 1
parsed = parse_luk(carro_raw)
stats['vehicles_parsed'] += 1
master_cur.execute(insert_compat_sql, (
catalog_id,
parsed[0],
parsed[1],
parsed[2],
None,
None,
'import_text',
))
stats['compat_rows'] += 1
interchanges = extract_interchanges(row)
for brand, pn in interchanges:
master_cur.execute(insert_interchange_sql, (catalog_id, brand, pn))
stats['interchange_rows'] += 1
master_conn.commit()
print(f"\n{'='*60}")
print("IMPORT COMPLETE")
print(f"{'='*60}")
print(f"Total rows read: {stats['rows']}")
print(f"Catalog items: {stats['catalog_items']}")
print(f"Compat rows: {stats['compat_rows']}")
print(f"Interchange rows: {stats['interchange_rows']}")
print(f"Vehicles parsed: {stats['vehicles_parsed']}")
master_cur.close()
master_conn.close()
master_conn.close()
if __name__ == '__main__':
main()

183
scripts/import_rached_excel.py Executable file
View File

@@ -0,0 +1,183 @@
#!/usr/bin/env python3
"""
Importar inventario de refaccionaria_rached desde Excel.
Archivo fuente: /home/Autopartes/data/PRODUCTOS_RACHED_2026.xlsx
Hoja: Hoja1
Columnas:
A: Codigo -> part_number
B: CB -> barcode (ignored, mostly empty)
C: Cve -> sku_alias (inventory_sku_aliases)
D: Descripcion -> name
E: Precio Costo -> cost
F: Precio Venta -> price_1
No hay columnas de stock, marca, ni vehiculo. Stock se deja en 0.
"""
import os
import sys
import re
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'pos'))
import psycopg2
from services.barcode_generator import generate_barcodes_batch
# ─── Config ──────────────────────────────────────────
DB_NAME = "tenant_refaccionaria_rached"
BRANCH_ID = 1
EXCEL_PATH = "/home/Autopartes/data/PRODUCTOS_RACHED_2026.xlsx"
BATCH_SIZE = 500
# Connect as local postgres user (peer auth)
conn = psycopg2.connect(f"dbname={DB_NAME} user=postgres")
conn.autocommit = False
cur = conn.cursor()
# ─── Read Excel ──────────────────────────────────────
import openpyxl
wb = openpyxl.load_workbook(EXCEL_PATH, data_only=True)
ws = wb["Hoja1"]
rows = list(ws.iter_rows(min_row=2, values_only=True))
print(f"Filas leidas del Excel: {len(rows)}")
# ─── Pre-fetch existing part_numbers ─────────────────
existing_map = {}
cur.execute("SELECT id, part_number FROM inventory WHERE branch_id = %s", (BRANCH_ID,))
for item_id, pn in cur.fetchall():
existing_map[pn.strip().upper()] = item_id
cur.close()
conn.commit()
# ─── Prepare lists ───────────────────────────────────
to_insert = [] # (part_number, name, cost, price_1)
to_alias = [] # (part_number, alias_sku)
skipped = 0
for row in rows:
codigo = str(row[0]).strip() if row[0] is not None else ""
cve = str(row[2]).strip() if row[2] is not None else ""
descripcion = str(row[3]).strip() if row[3] is not None else ""
precio_costo = float(row[4]) if row[4] is not None else 0.0
precio_venta = float(row[5]) if row[5] is not None else 0.0
if not codigo or not descripcion:
skipped += 1
continue
# Clean description (remove weird chars)
descripcion = descripcion.replace("\x81", "").replace("\x80", "").strip()
to_insert.append((codigo, descripcion, precio_costo, precio_venta))
if cve:
to_alias.append((codigo, cve))
print(f"Filas validas para importar: {len(to_insert)}")
print(f"Filas con SKU alternativo (Cve): {len(to_alias)}")
print(f"Filas saltadas (sin codigo/descripcion): {skipped}")
# ─── Batch insert / update inventory ─────────────────
cur = conn.cursor()
inserted_count = 0
updated_count = 0
# Split into new vs existing
new_items = []
update_items = []
for codigo, descripcion, cost, price in to_insert:
key = codigo.upper()
if key in existing_map:
update_items.append((descripcion, cost, price, existing_map[key]))
else:
new_items.append((codigo, descripcion, cost, price))
print(f"Nuevos: {len(new_items)} | Existentes a actualizar: {len(update_items)}")
# Generate barcodes for new items in batch
barcodes = []
if new_items:
barcodes = generate_barcodes_batch(conn, DB_NAME, len(new_items))
# Insert new items
for i, (codigo, descripcion, cost, price) in enumerate(new_items):
barcode = barcodes[i]
cur.execute(
"""
INSERT INTO inventory
(branch_id, part_number, barcode, name, cost, price_1, unit, is_active)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (branch_id, part_number) DO UPDATE SET
name = EXCLUDED.name,
cost = CASE WHEN EXCLUDED.cost > 0 THEN EXCLUDED.cost ELSE inventory.cost END,
price_1 = CASE WHEN EXCLUDED.price_1 > 0 THEN EXCLUDED.price_1 ELSE inventory.price_1 END
RETURNING id, (xmax = 0) AS inserted
""",
(BRANCH_ID, codigo, barcode, descripcion, cost, price, "PZA", True)
)
item_id, was_inserted = cur.fetchone()
if was_inserted:
inserted_count += 1
else:
updated_count += 1
# Add to map for alias linking
existing_map[codigo.upper()] = item_id
if (i + 1) % BATCH_SIZE == 0:
conn.commit()
print(f" Procesados {i + 1}/{len(new_items)} nuevos...")
# Update existing items (that weren't caught by ON CONFLICT above, if any)
for descripcion, cost, price, item_id in update_items:
cur.execute(
"""
UPDATE inventory SET
name = %s,
cost = CASE WHEN %s > 0 THEN %s ELSE cost END,
price_1 = CASE WHEN %s > 0 THEN %s ELSE price_1 END
WHERE id = %s
""",
(descripcion, cost, cost, price, price, item_id)
)
updated_count += 1
conn.commit()
print(f"Insertados: {inserted_count} | Actualizados: {updated_count}")
# ─── Insert SKU aliases ──────────────────────────────
alias_inserted = 0
alias_skipped = 0
for codigo, cve in to_alias:
item_id = existing_map.get(codigo.upper())
if not item_id:
alias_skipped += 1
continue
try:
cur.execute(
"""
INSERT INTO inventory_sku_aliases (inventory_id, sku, label)
VALUES (%s, %s, %s)
ON CONFLICT (inventory_id, sku) DO NOTHING
""",
(item_id, cve, "Cve")
)
if cur.rowcount > 0:
alias_inserted += 1
except Exception as e:
print(f" Alias error for {codigo}/{cve}: {e}")
alias_skipped += 1
conn.commit()
cur.close()
conn.close()
print("\n========================================")
print("IMPORTACION RACHED COMPLETADA")
print("========================================")
print(f"Filas procesadas: {len(to_insert)}")
print(f"Nuevos insertados: {inserted_count}")
print(f"Exist. actualizados:{updated_count}")
print(f"SKU aliases creados:{alias_inserted}")
print(f"Aliases fallidos: {alias_skipped}")
print(f"Filas saltadas: {skipped}")
print("========================================")

View File

@@ -0,0 +1,303 @@
#!/usr/bin/env python3
"""
Import Raybestos catalog from Excel into supplier_catalog tables.
Usage:
python scripts/import_raybestos_catalog.py
"""
import os
import re
import sys
from collections import Counter
from datetime import datetime
import psycopg2
from openpyxl import load_workbook
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
TENANT_DB_URL = os.environ.get('TENANT_DB_URL', 'postgresql://postgres@localhost/tenant_refaccionaria_rached')
EXCEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'RAYBESTOS.xlsx')
SUPPLIER_NAME = 'RAYBESTOS'
TENANT_ID = 31
KNOWN_MAKES = {
'ACURA', 'ALFA', 'AMERICAN', 'ASTON', 'AUDI', 'BMW', 'BUICK', 'CADILLAC',
'CHEVROLET', 'CHRYSLER', 'CITROEN', 'DAEWOO', 'DODGE', 'FIAT', 'FORD',
'GMC', 'GREAT', 'HONDA', 'HYUNDAI', 'INFINITI', 'ISUZU', 'JAGUAR', 'JEEP',
'KIA', 'LAMBORGHINI', 'LAND', 'LEXUS', 'LINCOLN', 'MAZDA', 'MERCEDES',
'MERCURY', 'MINI', 'MITSUBISHI', 'NISSAN', 'PEUGEOT', 'PONTIAC', 'PORSCHE',
'RENAULT', 'ROLLS', 'SATURN', 'SCION', 'SEAT', 'SKODA', 'SMART', 'SUBARU',
'SUZUKI', 'TESLA', 'TOYOTA', 'VOLKSWAGEN', 'VOLSWAGEN', 'VOLVO', 'VW'
}
POS_KEYWORDS = {'DELANTERA', 'TRASERA', 'TAS', 'DEL', 'TRAS', 'FRONT', 'REAR', 'LAT', 'IZQ', 'DER'}
NOTE_KEYWORDS = {'LATIN', 'AMERICA', 'NACIONAL', 'USA', 'EUROPA', 'IMPORTADO'}
def connect_master():
return psycopg2.connect(MASTER_DB_URL)
def connect_tenant():
return psycopg2.connect(TENANT_DB_URL)
def normalize_name(name):
if not name:
return ''
return ' '.join(str(name).replace('\n', ' ').split())
def parse_abbr_year(token):
if not token or not token.isdigit():
return None
n = int(token)
if n < 50:
return 2000 + n
if n < 100:
return 1900 + n
return None
def extract_make(parts):
"""Return (make, make_len) if first words form a known make, else (None, 0)."""
if not parts:
return None, 0
first = parts[0].upper()
if first not in KNOWN_MAKES:
return None, 0
if first == 'ALFA' and len(parts) >= 2 and parts[1].upper() == 'ROMEO':
return 'ALFA ROMEO', 2
if first == 'MERCEDES' and len(parts) >= 2 and parts[1].upper() == 'BENZ':
return 'MERCEDES BENZ', 2
if first == 'ROLLS' and len(parts) >= 2 and parts[1].upper() == 'ROYCE':
return 'ROLLS ROYCE', 2
if first == 'LAND' and len(parts) >= 2 and parts[1].upper() == 'ROVER':
return 'LAND ROVER', 2
if first == 'GREAT' and len(parts) >= 2 and parts[1].upper() == 'WALL':
return 'GREAT WALL', 2
if first == 'AMERICAN' and len(parts) >= 2 and parts[1].upper() == 'MOTORS':
return 'AMERICAN MOTORS', 2
if first == 'ASTON' and len(parts) >= 2 and parts[1].upper() == 'MARTIN':
return 'ASTON MARTIN', 2
# Normalize common typos
if first == 'VOLSWAGEN':
return 'Volkswagen', 1
if first == 'VW':
return 'Volkswagen', 1
return parts[0], 1
def parse_raybestos(carro, last_make):
if not carro:
return None, None, None, None, last_make
s = ' '.join(str(carro).strip().split())
if not s:
return None, None, None, None, last_make
parts = s.split()
# Extract 4-digit year from end
year = None
if parts and re.match(r'^(19|20)\d{2}$', parts[-1]):
year = int(parts[-1])
parts = parts[:-1]
# Extract make
make, make_len = extract_make(parts)
if make:
last_make = make
remaining = parts[make_len:]
elif last_make:
make = last_make
remaining = parts[:]
else:
make = None
remaining = parts[:]
# Extract abbreviated year or year range from remaining
if year is None and remaining:
for i in range(len(remaining)):
# Year range like 17-18, 90-05
m = re.match(r'^(\d{2})-(\d{2})$', remaining[i])
if m:
year = parse_abbr_year(m.group(2)) # use end year
remaining = remaining[:i] + remaining[i + 1:]
break
# Single 2-digit year
if re.match(r'^\d{2}$', remaining[i]):
y = parse_abbr_year(remaining[i])
if y:
year = y
remaining = remaining[:i] + remaining[i + 1:]
break
# Extract position keywords and notes
position = None
cleaned = []
for p in remaining:
pup = p.upper()
if pup in POS_KEYWORDS:
if pup == 'TAS':
position = 'TRASERA'
elif pup in ('DEL', 'FRONT'):
position = 'DELANTERA'
elif pup in ('TRAS', 'REAR'):
position = 'TRASERA'
else:
position = pup.title()
elif pup in NOTE_KEYWORDS:
pass # skip notes
else:
cleaned.append(p)
model = ' '.join(cleaned)
return make, model, position, year, last_make
def extract_interchanges(row):
"""Extract (brand, part_number) pairs from 2 interchange columns."""
interchanges = []
for i in range(2):
marca_col = 2 + i * 2
inter_col = 3 + i * 2
if marca_col < len(row) and row[marca_col]:
brand = str(row[marca_col]).strip()
pn = str(row[inter_col]).strip() if inter_col < len(row) and row[inter_col] else ''
if brand and pn:
interchanges.append((brand, pn))
return interchanges
def main():
print(f"[{datetime.now().isoformat()}] Starting Raybestos import...")
if not os.path.exists(EXCEL_PATH):
print(f"ERROR: Excel not found at {EXCEL_PATH}")
sys.exit(1)
print(f"Loading {EXCEL_PATH}...")
wb = load_workbook(EXCEL_PATH, read_only=True, data_only=True)
ws = wb['Freno_de_disco']
master_conn = connect_master()
master_conn = connect_master()
master_cur = master_conn.cursor()
# Pre-scan: determine most common name per SKU
print("Pre-scanning SKUs...")
sku_name_counter = Counter()
for row in ws.iter_rows(min_row=2, values_only=True):
sku = str(row[1]).strip() if row[1] else ''
name = normalize_name(row[6])
if sku and name:
sku_name_counter[(sku, name)] += 1
sku_best_name = {}
for (sku, name), count in sku_name_counter.items():
if sku not in sku_best_name or count > sku_best_name[sku][1]:
sku_best_name[sku] = (name, count)
print(f" Found {len(sku_best_name)} unique SKUs")
upsert_catalog_sql = """
INSERT INTO supplier_catalog (supplier_name, sku, name, category)
VALUES (%s, %s, %s, %s, %s)
ON CONFLICT (supplier_name, sku, category) DO UPDATE SET
name = EXCLUDED.name,
category = EXCLUDED.category
RETURNING id
"""
insert_compat_sql = """
INSERT INTO supplier_catalog_compat
(catalog_id, make, model, year, engine, model_year_engine_id, source)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (catalog_id, make, model, year, engine) DO NOTHING
"""
insert_interchange_sql = """
INSERT INTO supplier_catalog_interchange (catalog_id, brand, part_number)
VALUES (%s, %s, %s)
ON CONFLICT DO NOTHING
"""
stats = {
'rows': 0,
'catalog_items': 0,
'compat_rows': 0,
'interchange_rows': 0,
'vehicles_parsed': 0,
'forward_filled_make': 0,
}
catalog_id_cache = {}
last_make = None
for idx, row in enumerate(ws.iter_rows(min_row=2, values_only=True)):
if idx % 1000 == 0 and idx > 0:
print(f" ...{idx} rows processed")
if not row or not row[1]:
continue
sku = str(row[1]).strip()
name = sku_best_name.get(sku, ('', 0))[0]
carro_raw = str(row[7]).strip() if row[7] else ''
if not sku or not name:
continue
stats['rows'] += 1
cache_key = (sku, 'Freno_de_disco')
catalog_id = catalog_id_cache.get(cache_key)
if catalog_id is None:
master_cur.execute(upsert_catalog_sql, (SUPPLIER_NAME, sku, name, 'Freno_de_disco'))
catalog_id = master_cur.fetchone()[0]
catalog_id_cache[cache_key] = catalog_id
stats['catalog_items'] += 1
make, model, position, year, last_make = parse_raybestos(carro_raw, last_make)
if make and carro_raw and not extract_make(carro_raw.split())[0]:
stats['forward_filled_make'] += 1
stats['vehicles_parsed'] += 1
master_cur.execute(insert_compat_sql, (
catalog_id,
make,
model,
year,
position,
None,
'import_text',
))
stats['compat_rows'] += 1
interchanges = extract_interchanges(row)
for brand, pn in interchanges:
master_cur.execute(insert_interchange_sql, (catalog_id, brand, pn))
stats['interchange_rows'] += 1
master_conn.commit()
print(f"\n{'='*60}")
print("IMPORT COMPLETE")
print(f"{'='*60}")
print(f"Total rows read: {stats['rows']}")
print(f"Catalog items: {stats['catalog_items']}")
print(f"Compat rows: {stats['compat_rows']}")
print(f"Interchange rows: {stats['interchange_rows']}")
print(f"Vehicles parsed: {stats['vehicles_parsed']}")
print(f"Forward-filled makes: {stats['forward_filled_make']}")
master_cur.close()
master_conn.close()
master_conn.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,285 @@
#!/usr/bin/env python3
"""
Import VAZLO catalog from Excel into supplier_catalog tables.
Usage:
python scripts/import_vazlo_catalog.py
"""
import os
import re
import sys
from collections import defaultdict
from datetime import datetime
import psycopg2
from openpyxl import load_workbook
# DB connections
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
TENANT_DB_URL = os.environ.get('TENANT_DB_URL', 'postgresql://postgres@localhost/tenant_refaccionaria_rached')
EXCEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'VAZLO (1).xlsx')
SUPPLIER_NAME = 'VAZLO'
TENANT_ID = 31
POS_KEYWORDS = {
'DEL.', 'TRAS.', 'FRONT.', 'EXT.', 'IZQ.', 'DER.', 'RUEDA', 'CAJA',
'INF.', 'SUP.', 'TRANS.', 'STD', 'AWD', '2/4WD', '4WD', 'FWD', 'RWD',
'4X4', 'TURBO', 'GASOLINA', 'DIESEL',
'DEL', 'TRAS', 'FRONT', 'EXT', 'IZQ', 'DER', 'INF', 'SUP', 'TRANS',
}
MULTI_WORD_MAKES = {
('MERCEDES', 'BENZ'): 'MERCEDES BENZ',
('LAND', 'ROVER'): 'LAND ROVER',
('ALFA', 'ROMEO'): 'ALFA ROMEO',
('AMERICAN', 'MOTORS'): 'AMERICAN MOTORS',
('ROLLS', 'ROYCE'): 'ROLLS ROYCE',
('ASTON', 'MARTIN'): 'ASTON MARTIN',
('GREAT', 'WALL'): 'GREAT WALL',
}
def connect_master():
return psycopg2.connect(MASTER_DB_URL)
def connect_tenant():
return psycopg2.connect(TENANT_DB_URL)
def collect_all_skus(wb):
"""Pre-scan all SKUs to detect SKU-in-model cases."""
skus = set()
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
for row in ws.iter_rows(min_row=2, values_only=True):
sku = str(row[1]).strip() if row[1] else ''
if sku:
skus.add(sku)
return skus
def parse_carro(carro, all_skus):
"""
Parse CARRO_PERTENECIENTE like:
'ACURA TL DEL. 2015'
'BMW X1 SDRIVE 20IA TRAS. 2018'
'ACURA TL FRONT. DER. 2004'
'AUDI 4000S CAJA 1980'
'MERCEDES BENZ C350 E --'
'ACURA TLX 3429' (3429 is a SKU inserted into model)
Returns dict with make, model, year, position, raw.
"""
if not carro:
return {'make': None, 'model': None, 'year': None, 'position': None, 'raw': carro}
s = str(carro).strip()
parts = s.split()
if not parts:
return {'make': None, 'model': None, 'year': None, 'position': None, 'raw': s}
# Extract year from end
year = None
if re.match(r'^(19|20)\d{2}$', parts[-1]):
year = int(parts[-1])
parts = parts[:-1]
# Remove trailing '--' (no-year marker)
if parts and parts[-1] == '--':
parts = parts[:-1]
# Extract make
make = parts[0] if parts else ''
if len(parts) >= 2:
key = (parts[0].upper(), parts[1].upper())
if key in MULTI_WORD_MAKES:
make = MULTI_WORD_MAKES[key]
parts = parts[2:]
else:
parts = parts[1:]
else:
parts = parts[1:]
# Extract position keywords from the end
position_parts = []
while parts and parts[-1].upper() in POS_KEYWORDS:
position_parts.insert(0, parts[-1])
parts = parts[:-1]
model = ' '.join(parts)
# Remove trailing SKU numbers that match known VAZLO SKUs
# e.g. "ACURA TLX 3429" -> model="TLX", sku_suffix="3429"
model_parts = model.split()
if model_parts and re.match(r'^\d{3,4}$', model_parts[-1]) and model_parts[-1] in all_skus:
model = ' '.join(model_parts[:-1])
return {
'make': make,
'model': model,
'year': year,
'position': ' '.join(position_parts),
'raw': s,
}
def extract_interchanges(row):
"""Extract (brand, part_number) pairs from all 11 interchange columns."""
interchanges = []
for i in range(11):
marca_col = 2 + i * 2
inter_col = 3 + i * 2
if marca_col < len(row) and row[marca_col]:
brand = str(row[marca_col]).strip()
pn = str(row[inter_col]).strip() if inter_col < len(row) and row[inter_col] else ''
if brand and pn:
interchanges.append((brand, pn))
return interchanges
def normalize_name(name):
"""Clean up piece name: collapse whitespace, replace newlines."""
if not name:
return ''
return ' '.join(str(name).replace('\n', ' ').split())
def main():
print(f"[{datetime.now().isoformat()}] Starting VAZLO import...")
if not os.path.exists(EXCEL_PATH):
print(f"ERROR: Excel not found at {EXCEL_PATH}")
sys.exit(1)
print(f"Loading {EXCEL_PATH}...")
wb = load_workbook(EXCEL_PATH, read_only=True, data_only=True)
# Pre-scan SKUs for SKU-in-model detection
print("Pre-scanning SKUs...")
all_skus = collect_all_skus(wb)
print(f" Found {len(all_skus)} unique SKUs")
master_conn = connect_master()
master_conn = connect_master()
master_cur = master_conn.cursor()
upsert_catalog_sql = """
INSERT INTO supplier_catalog (supplier_name, sku, name, category, is_active)
VALUES (%s, %s, %s, %s, true)
ON CONFLICT (supplier_name, sku, category) DO UPDATE SET
name = EXCLUDED.name,
category = EXCLUDED.category,
is_active = true
RETURNING id
"""
insert_compat_sql = """
INSERT INTO supplier_catalog_compat
(catalog_id, make, model, year, engine, model_year_engine_id, source)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (catalog_id, make, model, year, engine) DO NOTHING
"""
insert_interchange_sql = """
INSERT INTO supplier_catalog_interchange (catalog_id, brand, part_number)
VALUES (%s, %s, %s)
ON CONFLICT DO NOTHING
"""
stats = {
'sheets': 0,
'rows': 0,
'catalog_items': 0,
'compat_rows': 0,
'interchange_rows': 0,
'vehicles_parsed': 0,
'skipped_no_sku': 0,
'skipped_no_carro': 0,
}
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
rows = list(ws.iter_rows(values_only=True))
if not rows:
continue
data_rows = rows[1:]
stats['sheets'] += 1
print(f"\nProcessing sheet '{sheet_name}' with {len(data_rows)} rows...")
# Cache catalog_id per (sku, sheet_name) to avoid repeated upserts
catalog_id_cache = {}
for idx, row in enumerate(data_rows):
if idx % 2000 == 0 and idx > 0:
print(f" ...{idx} rows processed")
if not row or not row[1]:
stats['skipped_no_sku'] += 1
continue
sku = str(row[1]).strip()
name = normalize_name(row[24])
carro_raw = str(row[25]).strip() if row[25] else ''
if not sku:
stats['skipped_no_sku'] += 1
continue
stats['rows'] += 1
# Upsert catalog item (keyed by sku + category)
cache_key = (sku, sheet_name)
catalog_id = catalog_id_cache.get(cache_key)
if catalog_id is None:
master_cur.execute(upsert_catalog_sql, (SUPPLIER_NAME, sku, name, sheet_name))
catalog_id = master_cur.fetchone()[0]
catalog_id_cache[cache_key] = catalog_id
stats['catalog_items'] += 1
# Parse vehicle
parsed = parse_carro(carro_raw, all_skus)
stats['vehicles_parsed'] += 1
# Insert compatibility (text-only, no MYE matching during import)
master_cur.execute(insert_compat_sql, (
catalog_id,
parsed['make'],
parsed['model'],
parsed['year'],
parsed['position'] or None,
None,
'import_text',
))
stats['compat_rows'] += 1
# Insert interchanges
interchanges = extract_interchanges(row)
for brand, pn in interchanges:
master_cur.execute(insert_interchange_sql, (catalog_id, brand, pn))
stats['interchange_rows'] += 1
# Commit per sheet
master_conn.commit()
print(f" Sheet '{sheet_name}' committed.")
print(f"\n{'='*60}")
print("IMPORT COMPLETE")
print(f"{'='*60}")
print(f"Sheets processed: {stats['sheets']}")
print(f"Total rows read: {stats['rows']}")
print(f"Catalog items: {stats['catalog_items']}")
print(f"Compat rows: {stats['compat_rows']}")
print(f"Interchange rows: {stats['interchange_rows']}")
print(f"Vehicles parsed: {stats['vehicles_parsed']}")
print(f"Skipped (no SKU): {stats['skipped_no_sku']}")
master_cur.close()
master_conn.close()
master_conn.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,393 @@
#!/usr/bin/env python3
"""
Import Yokomitsu catalog from Excel into supplier_catalog tables.
Usage:
python scripts/import_yokomitsu_catalog.py
"""
import os
import re
import sys
from datetime import datetime
import psycopg2
from openpyxl import load_workbook
# DB connections
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
TENANT_DB_URL = os.environ.get('TENANT_DB_URL', 'postgresql://postgres@localhost/tenant_refaccionaria_rached')
EXCEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'YOKOMITSU_CATALOGOS_COMPLETOS_TODOS.xlsx')
SUPPLIER_NAME = 'YOKOMITSU'
TENANT_ID = 31
def connect_master():
return psycopg2.connect(MASTER_DB_URL)
def connect_tenant():
return psycopg2.connect(TENANT_DB_URL)
def parse_year(token):
"""Parse a 2-digit or 4-digit year string."""
token = token.strip()
if not token:
return None
# Handle ranges like 08-13 or 08-15 -> use first year
if '-' in token:
token = token.split('-')[0]
token = token.strip()
if not token.isdigit():
return None
n = int(token)
if n < 50:
return 2000 + n
if n < 100:
return 1900 + n
if n >= 1900 and n <= 2050:
return n
return None
def parse_vehicle(vehicle_raw):
"""
Parse a vehicle string like:
'Chevrolet AVEO 1.5L 18'
'Audi A4 1.8L/2.0L 09'
'Dodge GRAND CHEROKEE 2/4WD 3.0L/3.7L/4.7L 08'
'Volkswagen JETTA A4/CLASICO 1.8L/2.0L 06 V'
'NISSAN 720 1988'
'Dodge CARAVAN/VOYAGER 00'
'ER 08-15 10' (garbage/unknown)
Returns dict with make, model, year, engine, vehicle_raw.
"""
if not vehicle_raw:
return {'make': None, 'model': None, 'year': None, 'engine': None, 'vehicle_raw': vehicle_raw}
s = str(vehicle_raw).strip()
# Remove trailing 'V' (variant marker)
s = re.sub(r'\s+V$', '', s)
tokens = s.split()
if len(tokens) < 2:
return {'make': None, 'model': None, 'year': None, 'engine': None, 'vehicle_raw': s}
# Last token is usually year (or year with suffix)
year = parse_year(tokens[-1])
if year is None and len(tokens) >= 3:
# Try second-to-last if last doesn't look like year
year = parse_year(tokens[-2])
if year:
tokens = tokens[:-2] + [tokens[-1]] # keep last as extra, but year found at -2
year = parse_year(tokens[-2])
if year is None:
# No year found; keep raw and try best-effort
make = tokens[0] if tokens else None
return {'make': make, 'model': ' '.join(tokens[1:]) if len(tokens) > 1 else None,
'year': None, 'engine': None, 'vehicle_raw': s}
# Remove year token
tokens_without_year = tokens[:-1]
make = tokens_without_year[0] if tokens_without_year else None
# Try to extract engine from remaining tokens
# Engine patterns: contains 'L', 'WD', 'DIESEL', 'TURBO', numeric with decimal
remaining = ' '.join(tokens_without_year[1:]) if len(tokens_without_year) > 1 else ''
# Heuristic: look for engine tokens at the END of remaining string
# Common patterns: "1.5L", "1.8L/2.0L", "2/4WD", "3.0L/3.7L/4.7L", "1.9L DIESEL"
engine = None
model = remaining
# Try to find engine pattern from the end
engine_match = re.search(r'(\d+(?:\.\d+)?\s*L(?:/\d+(?:\.\d+)?\s*L)*|\d+/\d+WD|\d+\.\d+L\s+DIESEL|\d+\.\d+L\s+TURBO)$', remaining, re.IGNORECASE)
if engine_match:
engine = engine_match.group(1)
model = remaining[:engine_match.start()].strip()
else:
# Try simpler: anything with digits and 'L' or 'WD' at the very end
parts = remaining.split()
if parts and re.search(r'\d', parts[-1]) and ('L' in parts[-1].upper() or 'WD' in parts[-1].upper()):
engine = parts[-1]
model = ' '.join(parts[:-1])
return {
'make': make,
'model': model,
'year': year,
'engine': engine,
'vehicle_raw': s,
}
def build_brand_cache(cur):
"""Fetch all brands from master DB."""
cur.execute("SELECT id_brand, name_brand FROM brands")
return {row[1].upper(): row[0] for row in cur.fetchall()}
def build_model_cache(cur):
"""Fetch all models from master DB."""
cur.execute("SELECT id_model, brand_id, name_model FROM models")
rows = cur.fetchall()
# Index by brand_id for fast lookup
cache = {}
for mid, bid, name in rows:
cache.setdefault(bid, []).append((mid, name))
return cache
def build_year_cache(cur):
"""Fetch all years from master DB."""
cur.execute("SELECT id_year, year_car FROM years")
return {row[1]: row[0] for row in cur.fetchall()}
def build_mye_cache(cur):
"""Fetch all MYE entries."""
cur.execute("SELECT id_mye, model_id, year_id FROM model_year_engine")
cache = {}
for mye_id, model_id, year_id in cur.fetchall():
cache.setdefault((model_id, year_id), []).append(mye_id)
return cache
def fuzzy_match_vehicle(parsed, brand_cache, model_cache, year_cache, mye_cache):
"""
Try to match parsed vehicle to MYE IDs.
Returns list of mye_ids (may be empty).
"""
make = parsed.get('make')
model_keyword = parsed.get('model')
year = parsed.get('year')
if not make or not model_keyword or not year:
return []
# Find brand
brand_id = brand_cache.get(make.upper())
if not brand_id:
# Try partial match
for name, bid in brand_cache.items():
if make.upper() in name or name in make.upper():
brand_id = bid
break
if not brand_id:
return []
# Find models for this brand that contain the keyword
models = model_cache.get(brand_id, [])
# Extract keyword: longest uppercase word from model string
keyword = model_keyword.upper()
# Try exact word match first
matched_model_ids = []
for mid, mname in models:
if keyword in mname.upper():
matched_model_ids.append(mid)
if not matched_model_ids:
# Try with each word in keyword
words = [w for w in keyword.split() if len(w) >= 3]
for mid, mname in models:
mname_up = mname.upper()
if any(w in mname_up for w in words):
matched_model_ids.append(mid)
if not matched_model_ids:
return []
# Find year_id
year_id = year_cache.get(year)
if not year_id:
return []
# Collect MYEs for all matched model+year combos
mye_ids = []
for mid in matched_model_ids:
mye_ids.extend(mye_cache.get((mid, year_id), []))
return mye_ids
def extract_interchanges(row):
"""Extract (brand, part_number) pairs from the interchange columns."""
interchanges = []
# Columns: MARCA.1=2, INTERCAMBIO=3, MARCA.2=4, INTERCAMBIO.1=5, ... up to MARCA.6=12, INTERCAMBIO.5=13
pairs = [
(row[2], row[3]),
(row[4], row[5]),
(row[6], row[7]),
(row[8], row[9]),
(row[10], row[11]),
(row[12], row[13]),
]
for brand, pn in pairs:
if brand and pn:
brand = str(brand).strip()
pn = str(pn).strip()
if brand and pn:
interchanges.append((brand, pn))
return interchanges
def main():
print(f"[{datetime.now().isoformat()}] Starting import...")
if not os.path.exists(EXCEL_PATH):
print(f"ERROR: Excel not found at {EXCEL_PATH}")
sys.exit(1)
print(f"Loading {EXCEL_PATH}...")
wb = load_workbook(EXCEL_PATH, read_only=True, data_only=True)
master_conn = connect_master()
master_conn = connect_master()
master_cur = master_conn.cursor()
master_cur = master_conn.cursor()
print("Building caches...")
brand_cache = build_brand_cache(master_cur)
model_cache = build_model_cache(master_cur)
year_cache = build_year_cache(master_cur)
mye_cache = build_mye_cache(master_cur)
print(f" Brands: {len(brand_cache)}, Models: {sum(len(v) for v in model_cache.values())}, Years: {len(year_cache)}, MYE combos: {len(mye_cache)}")
# Prepare UPSERT statements
upsert_catalog_sql = """
INSERT INTO supplier_catalog (supplier_name, sku, name, category)
VALUES (%s, %s, %s, %s, %s)
ON CONFLICT (supplier_name, sku, category) DO UPDATE SET
name = EXCLUDED.name,
category = EXCLUDED.category
RETURNING id
"""
insert_compat_sql = """
INSERT INTO supplier_catalog_compat
(catalog_id, make, model, year, engine, model_year_engine_id, source)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (catalog_id, make, model, year, engine) DO NOTHING
"""
insert_interchange_sql = """
INSERT INTO supplier_catalog_interchange (catalog_id, brand, part_number)
VALUES (%s, %s, %s)
ON CONFLICT DO NOTHING
"""
# Track stats
stats = {
'sheets': 0,
'rows': 0,
'catalog_items': 0,
'compat_rows': 0,
'interchange_rows': 0,
'vehicles_parsed': 0,
'vehicles_matched': 0,
'mye_matches': 0,
}
# Process each sheet
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
rows = list(ws.iter_rows(values_only=True))
if not rows:
continue
headers = rows[0]
data_rows = rows[1:]
stats['sheets'] += 1
print(f"\nProcessing sheet '{sheet_name}' with {len(data_rows)} rows...")
for idx, row in enumerate(data_rows):
if idx % 1000 == 0 and idx > 0:
print(f" ...{idx} rows processed")
# Skip empty rows
if not row or not row[1]:
continue
sku = str(row[1]).strip()
name = str(row[14]).strip() if row[14] else ''
vehicle_raw = str(row[15]).strip() if row[15] else ''
if not sku or not name:
continue
stats['rows'] += 1
# Upsert catalog item
master_cur.execute(upsert_catalog_sql, (SUPPLIER_NAME, sku, name, sheet_name))
catalog_id = master_cur.fetchone()[0]
stats['catalog_items'] += 1
# Parse vehicle
parsed = parse_vehicle(vehicle_raw)
stats['vehicles_parsed'] += 1
mye_ids = fuzzy_match_vehicle(parsed, brand_cache, model_cache, year_cache, mye_cache)
if mye_ids:
stats['vehicles_matched'] += 1
stats['mye_matches'] += len(mye_ids)
# Insert compatibility rows
# If we have MYE matches, insert one row per MYE
if mye_ids:
for mye_id in mye_ids:
master_cur.execute(insert_compat_sql, (
catalog_id,
parsed['make'],
parsed['model'],
parsed['year'],
parsed['engine'],
mye_id,
'fuzzy_match',
))
stats['compat_rows'] += 1
else:
# No MYE match: insert with text only
master_cur.execute(insert_compat_sql, (
catalog_id,
parsed['make'],
parsed['model'],
parsed['year'],
parsed['engine'],
None,
'import_text',
))
stats['compat_rows'] += 1
# Insert interchanges
interchanges = extract_interchanges(row)
for brand, pn in interchanges:
master_cur.execute(insert_interchange_sql, (catalog_id, brand, pn))
stats['interchange_rows'] += 1
# Commit per sheet
master_conn.commit()
print(f" Sheet '{sheet_name}' committed.")
# Final stats
print(f"\n{'='*60}")
print("IMPORT COMPLETE")
print(f"{'='*60}")
print(f"Sheets processed: {stats['sheets']}")
print(f"Total rows read: {stats['rows']}")
print(f"Catalog items: {stats['catalog_items']}")
print(f"Compat rows: {stats['compat_rows']}")
print(f"Interchange rows: {stats['interchange_rows']}")
print(f"Vehicles parsed: {stats['vehicles_parsed']}")
print(f"Vehicles with MYE: {stats['vehicles_matched']}")
print(f"Total MYE matches: {stats['mye_matches']}")
master_cur.close()
master_cur.close()
master_conn.close()
master_conn.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,369 @@
#!/usr/bin/env python3
"""
Match supplier_catalog_compat rows to model_year_engine ids by fuzzy (make, model, year).
Supports exact match, parenthesis-stripped match, whitespace/dash normalization,
prefix/substring fallback, model aliases, and year proximity (±2 years).
Usage:
python scripts/match_supplier_compat_to_mye.py [--dry-run] <supplier_name|--all>
"""
import os
import re
import sys
from collections import defaultdict
import psycopg2
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
MAKE_ALIASES = {
'VOLKSWAGEN': 'VW',
'VOLKWAGEN': 'VW',
'MERCEDES BENZ': 'MERCEDES BENZ',
'MERCEDES-BENZ': 'MERCEDES BENZ',
'BMW MOTORRAD': 'BMW',
}
NOISE_SUFFIXES = {
'SEDAN', 'SALOON', 'COUPE', 'HATCHBACK', 'HATCH', 'WAGON', 'ESTATE',
'SUV', 'VAN', 'PICK', 'UP', 'PICKUP', 'CABRIOLET', 'CONVERTIBLE',
'LATINO', 'BRASIL', 'MEXICO', 'USA', 'EUROPA', 'EUROPE', 'NACIO',
'LIMITED', 'LTD', 'XLT', 'LE', 'SE', 'XLE', 'SPORT', 'LX', 'EX',
'4X2', '4X4', '4WD', 'AWD', 'FWD', 'RWD', '2WD',
}
# Specific model aliases: (make, supplier_model) -> list of possible master model substrings
MODEL_ALIASES = {
('INFINITI', 'JX35'): ['JX SUV'],
('INFINITI', 'G35'): ['G Coupe', 'G Saloon', 'G37'],
('INFINITI', 'G37'): ['G Coupe', 'G Saloon', 'G37'],
('HONDA', 'CRX'): ['CRX'],
('MAZDA', 'PROTEGE'): ['PROTEGE'],
('MAZDA', 'PROTEGE5'): ['PROTEGE'],
('KIA', 'SPECTRA'): ['SPECTRA', 'SEPHIA'],
('KIA', 'FORTE5'): ['FORTE'],
('CHEVROLET', 'OPTRA'): ['OPTRA', 'LACETTI'],
('CHEVROLET', 'AGILE'): ['AGILE'],
('FIAT', 'SIENA'): ['SIENA'],
('PONTIAC', 'G4'): ['G4', 'PURSUIT'],
('FORD', 'FIVE HUNDRED'): ['FIVE HUNDRED', '500', 'TAURUS'],
('FORD', 'POLICE INTERCEPTOR UTILITY'): ['POLICE INTERCEPTOR UTILITY', 'EXPLORER'],
('FORD', 'POLICE INTERCEPTOR SEDAN'): ['POLICE INTERCEPTOR SEDAN', 'TAURUS'],
('SCION', 'XA'): ['XA'],
('SAAB', '9-2X'): ['9-2X'],
('BUICK', 'LACROSSE'): ['LACROSSE'],
('DODGE', 'CALIBER'): ['CALIBER'],
('SUZUKI', 'EQUATOR'): ['EQUATOR'],
('CHRYSLER', 'LEBARON K'): ['LEBARON'],
('MERCEDES BENZ', 'A170'): ['A-CLASS'],
('MERCEDES BENZ', 'A210'): ['A-CLASS'],
}
# Regex-based class extraction for Mercedes: e.g. C350E -> C-Class, SL600 -> SL
MERCEDES_CLASS_PATTERNS = [
# These Mercedes classes use "X-CLASS" in master (C-CLASS, E-CLASS, S-CLASS, etc.)
(r'^(A|B|C|E|G|GL|GLA|GLB|GLC|GLE|GLK|GLS|M|R|S|V|X)\d', 'CLASS'),
# These use just the letters (SL, SLK, CLS, CL, CLK) without -CLASS
(r'^(SL|SLK|CLS|CL|CLK)\d', 'LETTERS'),
(r'^(260E|300E|320E|400E|500E)$', 'E-CLASS'),
(r'^(300SL|500SL)$', 'SL'),
(r'^(400SEL|500SEL|600SEL)$', 'S-CLASS'),
]
def normalize_make(make):
if not make:
return ''
m = str(make).strip().upper()
return MAKE_ALIASES.get(m, m)
def normalize_model(model):
if not model:
return ''
return ' '.join(str(model).upper().split())
def strip_parentheses(text):
return re.sub(r'\s*\([^)]*\)', '', text).strip()
def strip_noise_suffixes(text):
parts = text.split()
cleaned = []
for p in parts:
if p in NOISE_SUFFIXES:
break
cleaned.append(p)
return ' '.join(cleaned)
def compact_alnum(text):
return re.sub(r'[^A-Z0-9]', '', text)
def build_model_variants(model_name):
variants = set()
base = normalize_model(model_name)
if not base:
return variants
no_paren = strip_parentheses(base)
no_noise = strip_noise_suffixes(no_paren)
compact = compact_alnum(no_noise)
compact_paren = compact_alnum(no_paren)
compact_base = compact_alnum(base)
variants.add(base)
if no_paren:
variants.add(no_paren)
if no_noise:
variants.add(no_noise)
if compact:
variants.add(compact)
if compact_paren:
variants.add(compact_paren)
if compact_base:
variants.add(compact_base)
return variants
def mercedes_class_alias(model):
"""Return a master model substring for Mercedes class-based models."""
m = normalize_model(model)
for pat, repl in MERCEDES_CLASS_PATTERNS:
match = re.match(pat, m)
if match:
if repl == 'CLASS':
return match.group(1) + '-CLASS'
if repl == 'LETTERS':
return match.group(1)
return repl
return None
def connect():
return psycopg2.connect(MASTER_DB_URL)
def build_mye_index(cur):
print('Building MYE index...')
cur.execute('''
SELECT b.name_brand, m.name_model, y.year_car, mye.id_mye
FROM model_year_engine mye
JOIN models m ON m.id_model = mye.model_id
JOIN brands b ON b.id_brand = m.brand_id
JOIN years y ON y.id_year = mye.year_id
''')
exact_index = defaultdict(list)
compact_index = defaultdict(list)
models_by_make = defaultdict(list)
# For year proximity: make -> compact_model -> {year: [mye_ids]}
year_range_index = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
for make, model, year, mye_id in cur.fetchall():
nmake = normalize_make(make)
if not nmake or not model or year is None:
continue
variants = build_model_variants(model)
for v in variants:
exact_index[(nmake, v, year)].append(mye_id)
compact = compact_alnum(strip_parentheses(normalize_model(model)))
if compact:
compact_index[(nmake, compact, year)].append(mye_id)
year_range_index[nmake][compact][year].append(mye_id)
models_by_make[nmake].append((normalize_model(model), mye_id, year, compact))
total_myes = sum(len(v) for v in exact_index.values())
print(f' {len(exact_index):,} exact keys, {total_myes:,} MYE entries')
return exact_index, compact_index, models_by_make, year_range_index
def find_by_alias(nmake, nmodel, year, models_by_make):
"""Try specific model aliases and Mercedes class patterns."""
aliases = MODEL_ALIASES.get((nmake, nmodel), [])
# Mercedes fallback
if nmake == 'MERCEDES BENZ':
cls = mercedes_class_alias(nmodel)
if cls and cls not in aliases:
aliases = list(aliases) + [cls]
if not aliases:
return None
# Try to find a master model that contains any alias substring and matches year
for alias in aliases:
alias_compact = compact_alnum(alias)
for master_model, mye_id, mye_year, master_compact in models_by_make.get(nmake, []):
if mye_year != year:
continue
if alias in master_model or alias_compact in master_compact:
return mye_id
return None
def find_by_year_proximity(nmake, supplier_compact, year, year_range_index, max_diff=2):
"""If exact year missing, find closest year within ±max_diff for same model."""
years = year_range_index.get(nmake, {}).get(supplier_compact)
if not years:
return None
best_y = None
best_diff = None
for y in years.keys():
diff = abs(y - year)
if diff <= max_diff:
if best_diff is None or diff < best_diff:
best_diff = diff
best_y = y
if best_y is not None:
return year_range_index[nmake][supplier_compact][best_y][0]
return None
def find_mye_id(make, model, year, exact_index, compact_index, models_by_make, year_range_index):
nmake = normalize_make(make)
nmodel = normalize_model(model)
if not nmake or not nmodel:
return None
variants = build_model_variants(nmodel)
# 1) Exact/near-exact on any variant
for v in variants:
myes = exact_index.get((nmake, v, year))
if myes:
return myes[0]
supplier_compact = compact_alnum(strip_parentheses(nmodel))
# 2) Compact match
myes = compact_index.get((nmake, supplier_compact, year))
if myes:
return myes[0]
# 3) Prefix/substring containment
for master_model, mye_id, mye_year, master_compact in models_by_make.get(nmake, []):
if mye_year != year:
continue
if not master_compact:
continue
if supplier_compact in master_compact or master_compact in supplier_compact:
return mye_id
if year is None:
return None
# 4) Model aliases
mye_id = find_by_alias(nmake, nmodel, year, models_by_make)
if mye_id:
return mye_id
# 5) Year proximity ±3 years (same compact model)
if supplier_compact:
mye_id = find_by_year_proximity(nmake, supplier_compact, year, year_range_index, max_diff=3)
if mye_id:
return mye_id
return None
def main():
args = sys.argv[1:]
dry_run = False
if '--dry-run' in args:
dry_run = True
args.remove('--dry-run')
if len(args) < 1:
print('Usage: match_supplier_compat_to_mye.py [--dry-run] <supplier_name|--all>')
sys.exit(1)
supplier_arg = args[0]
suppliers = None if supplier_arg == '--all' else [supplier_arg]
if dry_run:
print('=' * 60)
print('DRY RUN MODE — no changes will be made')
print('=' * 60)
conn = connect()
cur = conn.cursor()
exact_index, compact_index, models_by_make, year_range_index = build_mye_index(cur)
if suppliers:
cur.execute('''
SELECT scc.id, scc.make, scc.model, scc.year
FROM supplier_catalog_compat scc
JOIN supplier_catalog sc ON sc.id = scc.catalog_id
WHERE sc.supplier_name = ANY(%s) AND scc.model_year_engine_id IS NULL
''', (suppliers,))
else:
cur.execute('''
SELECT scc.id, scc.make, scc.model, scc.year
FROM supplier_catalog_compat scc
WHERE scc.model_year_engine_id IS NULL
''')
rows = cur.fetchall()
print(f'\nMatching {len(rows):,} compat rows...')
matched = 0
unmatched = 0
sample_matches = []
sample_unmatched = []
updates = []
for scc_id, make, model, year in rows:
mye_id = find_mye_id(make, model, year, exact_index, compact_index, models_by_make, year_range_index)
if mye_id:
updates.append((mye_id, scc_id))
matched += 1
if len(sample_matches) < 10:
sample_matches.append((make, model, year, mye_id))
else:
unmatched += 1
if len(sample_unmatched) < 10:
sample_unmatched.append((make, model, year))
print(f'Matched: {matched:,}')
print(f'Unmatched: {unmatched:,}')
if sample_matches:
print('\nSample matches:')
for make, model, year, mye_id in sample_matches:
print(f' {make} {model} {year} -> mye_id={mye_id}')
if sample_unmatched:
print('\nSample unmatched:')
for make, model, year in sample_unmatched:
print(f' {make} {model} {year}')
if dry_run or not updates:
cur.close()
conn.close()
if dry_run:
print('\n' + '=' * 60)
print('DRY RUN complete. Run without --dry-run to apply.')
print('=' * 60)
return
print(f'\nApplying {len(updates):,} updates...')
cur.executemany('''
UPDATE supplier_catalog_compat
SET model_year_engine_id = %s, source = 'matched_fuzzy'
WHERE id = %s
''', updates)
conn.commit()
print('Updates committed.')
cur.close()
conn.close()
if __name__ == '__main__':
main()