#!/usr/bin/env python3 """ Import LUK catalog from Excel into supplier_catalog tables. Usage: python scripts/import_luk_catalog.py """ import os import re import sys from collections import Counter from datetime import datetime import psycopg2 from openpyxl import load_workbook MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts') TENANT_DB_URL = os.environ.get('TENANT_DB_URL', 'postgresql://postgres@localhost/tenant_refaccionaria_rached') EXCEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'LUK.xlsx') SUPPLIER_NAME = 'LUK' TENANT_ID = 31 MULTI_WORD_MAKES = { ('ALFA', 'ROMEO'): 'ALFA ROMEO', ('MERCEDES', 'BENZ'): 'MERCEDES BENZ', ('MG', 'ROVER'): 'MG ROVER', } NOTE_KEYWORDS = { 'VOLANTE', 'SÓLIDO', 'SOLIDO', 'TIPO', 'CAJA', 'PLANO', 'ESCALÓN', 'ESCALON', 'MOTOR', 'EMBRAGUE', 'DOBLE', 'HUMEDO', } def connect_master(): return psycopg2.connect(MASTER_DB_URL) def connect_tenant(): return psycopg2.connect(TENANT_DB_URL) def normalize_name(name): if not name: return '' return ' '.join(str(name).replace('\n', ' ').split()) def parse_luk(carro): """Parse CARRO_PERTENECIENTE into make, model, year.""" if not carro: return None, None, None s = ' '.join(str(carro).strip().split()) if not s: return None, None, None parts = s.split() # Extract year (last occurrence of 19xx or 20xx) year = None year_idx = None for i in range(len(parts)): if re.match(r'^(19|20)\d{2}$', parts[i]): year = int(parts[i]) year_idx = i # Extract make make = parts[0] if parts else '' make_len = 1 if len(parts) >= 2: key2 = (parts[0].upper(), parts[1].upper()) if key2 in MULTI_WORD_MAKES: make = MULTI_WORD_MAKES[key2] make_len = 2 elif len(parts) >= 3 and parts[0].upper() == 'CHRYSLER' and parts[1] == '/' and parts[2].upper() == 'DODGE': make = 'CHRYSLER / DODGE' make_len = 3 # Remaining parts between make and year if year_idx is not None: remaining = parts[make_len:year_idx] + parts[year_idx + 1:] else: remaining = parts[make_len:] # Clean note keywords cleaned = [p for p in remaining if p.upper() not in NOTE_KEYWORDS] model = ' '.join(cleaned) # If empty after cleaning, use original remaining text if not model and remaining: model = ' '.join(remaining) return make, model, year def extract_interchanges(row): """Extract (brand, part_number) pairs from 4 interchange columns.""" interchanges = [] for i in range(4): marca_col = 2 + i * 2 inter_col = 3 + i * 2 if marca_col < len(row) and row[marca_col]: brand = str(row[marca_col]).strip() pn = str(row[inter_col]).strip() if inter_col < len(row) and row[inter_col] else '' if brand and pn: interchanges.append((brand, pn)) return interchanges def main(): print(f"[{datetime.now().isoformat()}] Starting LUK import...") if not os.path.exists(EXCEL_PATH): print(f"ERROR: Excel not found at {EXCEL_PATH}") sys.exit(1) print(f"Loading {EXCEL_PATH}...") wb = load_workbook(EXCEL_PATH, read_only=True, data_only=True) ws = wb['KIT_CLUTCH'] master_conn = connect_master() master_conn = connect_master() master_cur = master_conn.cursor() # Pre-scan: determine most common name per SKU print("Pre-scanning SKUs...") sku_name_counter = Counter() for row in ws.iter_rows(min_row=2, values_only=True): sku = str(row[1]).strip() if row[1] else '' name = normalize_name(row[10]) if sku and name: sku_name_counter[(sku, name)] += 1 sku_best_name = {} for (sku, name), count in sku_name_counter.items(): if sku not in sku_best_name or count > sku_best_name[sku][1]: sku_best_name[sku] = (name, count) print(f" Found {len(sku_best_name)} unique SKUs") upsert_catalog_sql = """ INSERT INTO supplier_catalog (supplier_name, sku, name, category) VALUES (%s, %s, %s, %s, %s) ON CONFLICT (supplier_name, sku, category) DO UPDATE SET name = EXCLUDED.name, category = EXCLUDED.category RETURNING id """ insert_compat_sql = """ INSERT INTO supplier_catalog_compat (catalog_id, make, model, year, engine, model_year_engine_id, source) VALUES (%s, %s, %s, %s, %s, %s, %s) ON CONFLICT (catalog_id, make, model, year, engine) DO NOTHING """ insert_interchange_sql = """ INSERT INTO supplier_catalog_interchange (catalog_id, brand, part_number) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING """ stats = { 'rows': 0, 'catalog_items': 0, 'compat_rows': 0, 'interchange_rows': 0, 'vehicles_parsed': 0, } catalog_id_cache = {} for idx, row in enumerate(ws.iter_rows(min_row=2, values_only=True)): if idx % 1000 == 0 and idx > 0: print(f" ...{idx} rows processed") if not row or not row[1]: continue sku = str(row[1]).strip() name = sku_best_name.get(sku, ('', 0))[0] carro_raw = str(row[11]).strip() if row[11] else '' if not sku or not name: continue stats['rows'] += 1 cache_key = (sku, 'KIT_CLUTCH') catalog_id = catalog_id_cache.get(cache_key) if catalog_id is None: master_cur.execute(upsert_catalog_sql, (SUPPLIER_NAME, sku, name, 'KIT_CLUTCH')) catalog_id = master_cur.fetchone()[0] catalog_id_cache[cache_key] = catalog_id stats['catalog_items'] += 1 parsed = parse_luk(carro_raw) stats['vehicles_parsed'] += 1 master_cur.execute(insert_compat_sql, ( catalog_id, parsed[0], parsed[1], parsed[2], None, None, 'import_text', )) stats['compat_rows'] += 1 interchanges = extract_interchanges(row) for brand, pn in interchanges: master_cur.execute(insert_interchange_sql, (catalog_id, brand, pn)) stats['interchange_rows'] += 1 master_conn.commit() print(f"\n{'='*60}") print("IMPORT COMPLETE") print(f"{'='*60}") print(f"Total rows read: {stats['rows']}") print(f"Catalog items: {stats['catalog_items']}") print(f"Compat rows: {stats['compat_rows']}") print(f"Interchange rows: {stats['interchange_rows']}") print(f"Vehicles parsed: {stats['vehicles_parsed']}") master_cur.close() master_conn.close() master_conn.close() if __name__ == '__main__': main()