- Cleaned 137+ fake engine-displacement models from supplier imports (v3/v4 scripts: Chevrolet, Ford, Chrysler, Dodge, Jeep, Nissan, etc.) - Removed 1,251+ corrupted models (INT. prefixes, year-suffix, torque specs, empty names, trailing-year variants) - Migrated supplier tables to master DB (supplier_catalog, supplier_catalog_compat, supplier_catalog_interchange) - Fixed _get_mye_ids_with_parts() to query supplier_catalog_compat from master DB so supplier-only vehicles appear for all tenants - Added fuzzy model matcher with parenthesis stripping, noise suffix removal, compact matching, prefix/substring fallback, model aliases, and ±3 year proximity - Matched compat rows: KEEP GREEN +14,152, KNADIAN +3,021, VAZLO +127,500, LUK +477, RAYBESTOS +1,743 - Added KNADIAN catalog importer with year-range expansion and future-year filtering - Added VAZLO catalog importer with position parsing and SKU-in-model cleanup - Added Keep Green, LUK, Yokomitsu, Raybestos catalog importers - Cache clearing after cleanups (_classify_cache_*, nexus:mye_ids:*, nexus:brand_mye_counts:*) Final match rates: - KEEP GREEN: 90.3% - VAZLO: 93.6% - YOKOMITSU: 100.0% - KNADIAN: 57.4% - LUK: 51.0% - RAYBESTOS: 55.9%
236 lines
6.7 KiB
Python
236 lines
6.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Import LUK catalog from Excel into supplier_catalog tables.
|
|
|
|
Usage:
|
|
python scripts/import_luk_catalog.py
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
from collections import Counter
|
|
from datetime import datetime
|
|
|
|
import psycopg2
|
|
from openpyxl import load_workbook
|
|
|
|
MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts')
|
|
TENANT_DB_URL = os.environ.get('TENANT_DB_URL', 'postgresql://postgres@localhost/tenant_refaccionaria_rached')
|
|
|
|
EXCEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'LUK.xlsx')
|
|
SUPPLIER_NAME = 'LUK'
|
|
TENANT_ID = 31
|
|
|
|
MULTI_WORD_MAKES = {
|
|
('ALFA', 'ROMEO'): 'ALFA ROMEO',
|
|
('MERCEDES', 'BENZ'): 'MERCEDES BENZ',
|
|
('MG', 'ROVER'): 'MG ROVER',
|
|
}
|
|
|
|
NOTE_KEYWORDS = {
|
|
'VOLANTE', 'SÓLIDO', 'SOLIDO', 'TIPO', 'CAJA', 'PLANO',
|
|
'ESCALÓN', 'ESCALON', 'MOTOR', 'EMBRAGUE', 'DOBLE', 'HUMEDO',
|
|
}
|
|
|
|
|
|
def connect_master():
|
|
return psycopg2.connect(MASTER_DB_URL)
|
|
|
|
|
|
def connect_tenant():
|
|
return psycopg2.connect(TENANT_DB_URL)
|
|
|
|
|
|
def normalize_name(name):
|
|
if not name:
|
|
return ''
|
|
return ' '.join(str(name).replace('\n', ' ').split())
|
|
|
|
|
|
def parse_luk(carro):
|
|
"""Parse CARRO_PERTENECIENTE into make, model, year."""
|
|
if not carro:
|
|
return None, None, None
|
|
s = ' '.join(str(carro).strip().split())
|
|
if not s:
|
|
return None, None, None
|
|
|
|
parts = s.split()
|
|
|
|
# Extract year (last occurrence of 19xx or 20xx)
|
|
year = None
|
|
year_idx = None
|
|
for i in range(len(parts)):
|
|
if re.match(r'^(19|20)\d{2}$', parts[i]):
|
|
year = int(parts[i])
|
|
year_idx = i
|
|
|
|
# Extract make
|
|
make = parts[0] if parts else ''
|
|
make_len = 1
|
|
if len(parts) >= 2:
|
|
key2 = (parts[0].upper(), parts[1].upper())
|
|
if key2 in MULTI_WORD_MAKES:
|
|
make = MULTI_WORD_MAKES[key2]
|
|
make_len = 2
|
|
elif len(parts) >= 3 and parts[0].upper() == 'CHRYSLER' and parts[1] == '/' and parts[2].upper() == 'DODGE':
|
|
make = 'CHRYSLER / DODGE'
|
|
make_len = 3
|
|
|
|
# Remaining parts between make and year
|
|
if year_idx is not None:
|
|
remaining = parts[make_len:year_idx] + parts[year_idx + 1:]
|
|
else:
|
|
remaining = parts[make_len:]
|
|
|
|
# Clean note keywords
|
|
cleaned = [p for p in remaining if p.upper() not in NOTE_KEYWORDS]
|
|
model = ' '.join(cleaned)
|
|
|
|
# If empty after cleaning, use original remaining text
|
|
if not model and remaining:
|
|
model = ' '.join(remaining)
|
|
|
|
return make, model, year
|
|
|
|
|
|
def extract_interchanges(row):
|
|
"""Extract (brand, part_number) pairs from 4 interchange columns."""
|
|
interchanges = []
|
|
for i in range(4):
|
|
marca_col = 2 + i * 2
|
|
inter_col = 3 + i * 2
|
|
if marca_col < len(row) and row[marca_col]:
|
|
brand = str(row[marca_col]).strip()
|
|
pn = str(row[inter_col]).strip() if inter_col < len(row) and row[inter_col] else ''
|
|
if brand and pn:
|
|
interchanges.append((brand, pn))
|
|
return interchanges
|
|
|
|
|
|
def main():
|
|
print(f"[{datetime.now().isoformat()}] Starting LUK import...")
|
|
|
|
if not os.path.exists(EXCEL_PATH):
|
|
print(f"ERROR: Excel not found at {EXCEL_PATH}")
|
|
sys.exit(1)
|
|
|
|
print(f"Loading {EXCEL_PATH}...")
|
|
wb = load_workbook(EXCEL_PATH, read_only=True, data_only=True)
|
|
ws = wb['KIT_CLUTCH']
|
|
|
|
master_conn = connect_master()
|
|
master_conn = connect_master()
|
|
master_cur = master_conn.cursor()
|
|
|
|
# Pre-scan: determine most common name per SKU
|
|
print("Pre-scanning SKUs...")
|
|
sku_name_counter = Counter()
|
|
for row in ws.iter_rows(min_row=2, values_only=True):
|
|
sku = str(row[1]).strip() if row[1] else ''
|
|
name = normalize_name(row[10])
|
|
if sku and name:
|
|
sku_name_counter[(sku, name)] += 1
|
|
|
|
sku_best_name = {}
|
|
for (sku, name), count in sku_name_counter.items():
|
|
if sku not in sku_best_name or count > sku_best_name[sku][1]:
|
|
sku_best_name[sku] = (name, count)
|
|
|
|
print(f" Found {len(sku_best_name)} unique SKUs")
|
|
|
|
upsert_catalog_sql = """
|
|
INSERT INTO supplier_catalog (supplier_name, sku, name, category)
|
|
VALUES (%s, %s, %s, %s, %s)
|
|
ON CONFLICT (supplier_name, sku, category) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
category = EXCLUDED.category
|
|
RETURNING id
|
|
"""
|
|
|
|
insert_compat_sql = """
|
|
INSERT INTO supplier_catalog_compat
|
|
(catalog_id, make, model, year, engine, model_year_engine_id, source)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
|
ON CONFLICT (catalog_id, make, model, year, engine) DO NOTHING
|
|
"""
|
|
|
|
insert_interchange_sql = """
|
|
INSERT INTO supplier_catalog_interchange (catalog_id, brand, part_number)
|
|
VALUES (%s, %s, %s)
|
|
ON CONFLICT DO NOTHING
|
|
"""
|
|
|
|
stats = {
|
|
'rows': 0,
|
|
'catalog_items': 0,
|
|
'compat_rows': 0,
|
|
'interchange_rows': 0,
|
|
'vehicles_parsed': 0,
|
|
}
|
|
|
|
catalog_id_cache = {}
|
|
|
|
for idx, row in enumerate(ws.iter_rows(min_row=2, values_only=True)):
|
|
if idx % 1000 == 0 and idx > 0:
|
|
print(f" ...{idx} rows processed")
|
|
|
|
if not row or not row[1]:
|
|
continue
|
|
|
|
sku = str(row[1]).strip()
|
|
name = sku_best_name.get(sku, ('', 0))[0]
|
|
carro_raw = str(row[11]).strip() if row[11] else ''
|
|
|
|
if not sku or not name:
|
|
continue
|
|
|
|
stats['rows'] += 1
|
|
|
|
cache_key = (sku, 'KIT_CLUTCH')
|
|
catalog_id = catalog_id_cache.get(cache_key)
|
|
if catalog_id is None:
|
|
master_cur.execute(upsert_catalog_sql, (SUPPLIER_NAME, sku, name, 'KIT_CLUTCH'))
|
|
catalog_id = master_cur.fetchone()[0]
|
|
catalog_id_cache[cache_key] = catalog_id
|
|
stats['catalog_items'] += 1
|
|
|
|
parsed = parse_luk(carro_raw)
|
|
stats['vehicles_parsed'] += 1
|
|
|
|
master_cur.execute(insert_compat_sql, (
|
|
catalog_id,
|
|
parsed[0],
|
|
parsed[1],
|
|
parsed[2],
|
|
None,
|
|
None,
|
|
'import_text',
|
|
))
|
|
stats['compat_rows'] += 1
|
|
|
|
interchanges = extract_interchanges(row)
|
|
for brand, pn in interchanges:
|
|
master_cur.execute(insert_interchange_sql, (catalog_id, brand, pn))
|
|
stats['interchange_rows'] += 1
|
|
|
|
master_conn.commit()
|
|
|
|
print(f"\n{'='*60}")
|
|
print("IMPORT COMPLETE")
|
|
print(f"{'='*60}")
|
|
print(f"Total rows read: {stats['rows']}")
|
|
print(f"Catalog items: {stats['catalog_items']}")
|
|
print(f"Compat rows: {stats['compat_rows']}")
|
|
print(f"Interchange rows: {stats['interchange_rows']}")
|
|
print(f"Vehicles parsed: {stats['vehicles_parsed']}")
|
|
|
|
master_cur.close()
|
|
master_conn.close()
|
|
master_conn.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|