#!/usr/bin/env python3 """ Import Raybestos catalog from Excel into supplier_catalog tables. Usage: python scripts/import_raybestos_catalog.py """ import os import re import sys from collections import Counter from datetime import datetime import psycopg2 from openpyxl import load_workbook MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts') TENANT_DB_URL = os.environ.get('TENANT_DB_URL', 'postgresql://postgres@localhost/tenant_refaccionaria_rached') EXCEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'RAYBESTOS.xlsx') SUPPLIER_NAME = 'RAYBESTOS' TENANT_ID = 31 KNOWN_MAKES = { 'ACURA', 'ALFA', 'AMERICAN', 'ASTON', 'AUDI', 'BMW', 'BUICK', 'CADILLAC', 'CHEVROLET', 'CHRYSLER', 'CITROEN', 'DAEWOO', 'DODGE', 'FIAT', 'FORD', 'GMC', 'GREAT', 'HONDA', 'HYUNDAI', 'INFINITI', 'ISUZU', 'JAGUAR', 'JEEP', 'KIA', 'LAMBORGHINI', 'LAND', 'LEXUS', 'LINCOLN', 'MAZDA', 'MERCEDES', 'MERCURY', 'MINI', 'MITSUBISHI', 'NISSAN', 'PEUGEOT', 'PONTIAC', 'PORSCHE', 'RENAULT', 'ROLLS', 'SATURN', 'SCION', 'SEAT', 'SKODA', 'SMART', 'SUBARU', 'SUZUKI', 'TESLA', 'TOYOTA', 'VOLKSWAGEN', 'VOLSWAGEN', 'VOLVO', 'VW' } POS_KEYWORDS = {'DELANTERA', 'TRASERA', 'TAS', 'DEL', 'TRAS', 'FRONT', 'REAR', 'LAT', 'IZQ', 'DER'} NOTE_KEYWORDS = {'LATIN', 'AMERICA', 'NACIONAL', 'USA', 'EUROPA', 'IMPORTADO'} def connect_master(): return psycopg2.connect(MASTER_DB_URL) def connect_tenant(): return psycopg2.connect(TENANT_DB_URL) def normalize_name(name): if not name: return '' return ' '.join(str(name).replace('\n', ' ').split()) def parse_abbr_year(token): if not token or not token.isdigit(): return None n = int(token) if n < 50: return 2000 + n if n < 100: return 1900 + n return None def extract_make(parts): """Return (make, make_len) if first words form a known make, else (None, 0).""" if not parts: return None, 0 first = parts[0].upper() if first not in KNOWN_MAKES: return None, 0 if first == 'ALFA' and len(parts) >= 2 and parts[1].upper() == 'ROMEO': return 'ALFA ROMEO', 2 if first == 'MERCEDES' and len(parts) >= 2 and parts[1].upper() == 'BENZ': return 'MERCEDES BENZ', 2 if first == 'ROLLS' and len(parts) >= 2 and parts[1].upper() == 'ROYCE': return 'ROLLS ROYCE', 2 if first == 'LAND' and len(parts) >= 2 and parts[1].upper() == 'ROVER': return 'LAND ROVER', 2 if first == 'GREAT' and len(parts) >= 2 and parts[1].upper() == 'WALL': return 'GREAT WALL', 2 if first == 'AMERICAN' and len(parts) >= 2 and parts[1].upper() == 'MOTORS': return 'AMERICAN MOTORS', 2 if first == 'ASTON' and len(parts) >= 2 and parts[1].upper() == 'MARTIN': return 'ASTON MARTIN', 2 # Normalize common typos if first == 'VOLSWAGEN': return 'Volkswagen', 1 if first == 'VW': return 'Volkswagen', 1 return parts[0], 1 def parse_raybestos(carro, last_make): if not carro: return None, None, None, None, last_make s = ' '.join(str(carro).strip().split()) if not s: return None, None, None, None, last_make parts = s.split() # Extract 4-digit year from end year = None if parts and re.match(r'^(19|20)\d{2}$', parts[-1]): year = int(parts[-1]) parts = parts[:-1] # Extract make make, make_len = extract_make(parts) if make: last_make = make remaining = parts[make_len:] elif last_make: make = last_make remaining = parts[:] else: make = None remaining = parts[:] # Extract abbreviated year or year range from remaining if year is None and remaining: for i in range(len(remaining)): # Year range like 17-18, 90-05 m = re.match(r'^(\d{2})-(\d{2})$', remaining[i]) if m: year = parse_abbr_year(m.group(2)) # use end year remaining = remaining[:i] + remaining[i + 1:] break # Single 2-digit year if re.match(r'^\d{2}$', remaining[i]): y = parse_abbr_year(remaining[i]) if y: year = y remaining = remaining[:i] + remaining[i + 1:] break # Extract position keywords and notes position = None cleaned = [] for p in remaining: pup = p.upper() if pup in POS_KEYWORDS: if pup == 'TAS': position = 'TRASERA' elif pup in ('DEL', 'FRONT'): position = 'DELANTERA' elif pup in ('TRAS', 'REAR'): position = 'TRASERA' else: position = pup.title() elif pup in NOTE_KEYWORDS: pass # skip notes else: cleaned.append(p) model = ' '.join(cleaned) return make, model, position, year, last_make def extract_interchanges(row): """Extract (brand, part_number) pairs from 2 interchange columns.""" interchanges = [] for i in range(2): marca_col = 2 + i * 2 inter_col = 3 + i * 2 if marca_col < len(row) and row[marca_col]: brand = str(row[marca_col]).strip() pn = str(row[inter_col]).strip() if inter_col < len(row) and row[inter_col] else '' if brand and pn: interchanges.append((brand, pn)) return interchanges def main(): print(f"[{datetime.now().isoformat()}] Starting Raybestos import...") if not os.path.exists(EXCEL_PATH): print(f"ERROR: Excel not found at {EXCEL_PATH}") sys.exit(1) print(f"Loading {EXCEL_PATH}...") wb = load_workbook(EXCEL_PATH, read_only=True, data_only=True) ws = wb['Freno_de_disco'] master_conn = connect_master() master_conn = connect_master() master_cur = master_conn.cursor() # Pre-scan: determine most common name per SKU print("Pre-scanning SKUs...") sku_name_counter = Counter() for row in ws.iter_rows(min_row=2, values_only=True): sku = str(row[1]).strip() if row[1] else '' name = normalize_name(row[6]) if sku and name: sku_name_counter[(sku, name)] += 1 sku_best_name = {} for (sku, name), count in sku_name_counter.items(): if sku not in sku_best_name or count > sku_best_name[sku][1]: sku_best_name[sku] = (name, count) print(f" Found {len(sku_best_name)} unique SKUs") upsert_catalog_sql = """ INSERT INTO supplier_catalog (supplier_name, sku, name, category) VALUES (%s, %s, %s, %s, %s) ON CONFLICT (supplier_name, sku, category) DO UPDATE SET name = EXCLUDED.name, category = EXCLUDED.category RETURNING id """ insert_compat_sql = """ INSERT INTO supplier_catalog_compat (catalog_id, make, model, year, engine, model_year_engine_id, source) VALUES (%s, %s, %s, %s, %s, %s, %s) ON CONFLICT (catalog_id, make, model, year, engine) DO NOTHING """ insert_interchange_sql = """ INSERT INTO supplier_catalog_interchange (catalog_id, brand, part_number) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING """ stats = { 'rows': 0, 'catalog_items': 0, 'compat_rows': 0, 'interchange_rows': 0, 'vehicles_parsed': 0, 'forward_filled_make': 0, } catalog_id_cache = {} last_make = None for idx, row in enumerate(ws.iter_rows(min_row=2, values_only=True)): if idx % 1000 == 0 and idx > 0: print(f" ...{idx} rows processed") if not row or not row[1]: continue sku = str(row[1]).strip() name = sku_best_name.get(sku, ('', 0))[0] carro_raw = str(row[7]).strip() if row[7] else '' if not sku or not name: continue stats['rows'] += 1 cache_key = (sku, 'Freno_de_disco') catalog_id = catalog_id_cache.get(cache_key) if catalog_id is None: master_cur.execute(upsert_catalog_sql, (SUPPLIER_NAME, sku, name, 'Freno_de_disco')) catalog_id = master_cur.fetchone()[0] catalog_id_cache[cache_key] = catalog_id stats['catalog_items'] += 1 make, model, position, year, last_make = parse_raybestos(carro_raw, last_make) if make and carro_raw and not extract_make(carro_raw.split())[0]: stats['forward_filled_make'] += 1 stats['vehicles_parsed'] += 1 master_cur.execute(insert_compat_sql, ( catalog_id, make, model, year, position, None, 'import_text', )) stats['compat_rows'] += 1 interchanges = extract_interchanges(row) for brand, pn in interchanges: master_cur.execute(insert_interchange_sql, (catalog_id, brand, pn)) stats['interchange_rows'] += 1 master_conn.commit() print(f"\n{'='*60}") print("IMPORT COMPLETE") print(f"{'='*60}") print(f"Total rows read: {stats['rows']}") print(f"Catalog items: {stats['catalog_items']}") print(f"Compat rows: {stats['compat_rows']}") print(f"Interchange rows: {stats['interchange_rows']}") print(f"Vehicles parsed: {stats['vehicles_parsed']}") print(f"Forward-filled makes: {stats['forward_filled_make']}") master_cur.close() master_conn.close() master_conn.close() if __name__ == '__main__': main()