#!/usr/bin/env python3 """ Import VAZLO catalog from Excel into supplier_catalog tables. Usage: python scripts/import_vazlo_catalog.py """ import os import re import sys from collections import defaultdict from datetime import datetime import psycopg2 from openpyxl import load_workbook # DB connections MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts') TENANT_DB_URL = os.environ.get('TENANT_DB_URL', 'postgresql://postgres@localhost/tenant_refaccionaria_rached') EXCEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'VAZLO (1).xlsx') SUPPLIER_NAME = 'VAZLO' TENANT_ID = 31 POS_KEYWORDS = { 'DEL.', 'TRAS.', 'FRONT.', 'EXT.', 'IZQ.', 'DER.', 'RUEDA', 'CAJA', 'INF.', 'SUP.', 'TRANS.', 'STD', 'AWD', '2/4WD', '4WD', 'FWD', 'RWD', '4X4', 'TURBO', 'GASOLINA', 'DIESEL', 'DEL', 'TRAS', 'FRONT', 'EXT', 'IZQ', 'DER', 'INF', 'SUP', 'TRANS', } MULTI_WORD_MAKES = { ('MERCEDES', 'BENZ'): 'MERCEDES BENZ', ('LAND', 'ROVER'): 'LAND ROVER', ('ALFA', 'ROMEO'): 'ALFA ROMEO', ('AMERICAN', 'MOTORS'): 'AMERICAN MOTORS', ('ROLLS', 'ROYCE'): 'ROLLS ROYCE', ('ASTON', 'MARTIN'): 'ASTON MARTIN', ('GREAT', 'WALL'): 'GREAT WALL', } def connect_master(): return psycopg2.connect(MASTER_DB_URL) def connect_tenant(): return psycopg2.connect(TENANT_DB_URL) def collect_all_skus(wb): """Pre-scan all SKUs to detect SKU-in-model cases.""" skus = set() for sheet_name in wb.sheetnames: ws = wb[sheet_name] for row in ws.iter_rows(min_row=2, values_only=True): sku = str(row[1]).strip() if row[1] else '' if sku: skus.add(sku) return skus def parse_carro(carro, all_skus): """ Parse CARRO_PERTENECIENTE like: 'ACURA TL DEL. 2015' 'BMW X1 SDRIVE 20IA TRAS. 2018' 'ACURA TL FRONT. DER. 2004' 'AUDI 4000S CAJA 1980' 'MERCEDES BENZ C350 E --' 'ACURA TLX 3429' (3429 is a SKU inserted into model) Returns dict with make, model, year, position, raw. """ if not carro: return {'make': None, 'model': None, 'year': None, 'position': None, 'raw': carro} s = str(carro).strip() parts = s.split() if not parts: return {'make': None, 'model': None, 'year': None, 'position': None, 'raw': s} # Extract year from end year = None if re.match(r'^(19|20)\d{2}$', parts[-1]): year = int(parts[-1]) parts = parts[:-1] # Remove trailing '--' (no-year marker) if parts and parts[-1] == '--': parts = parts[:-1] # Extract make make = parts[0] if parts else '' if len(parts) >= 2: key = (parts[0].upper(), parts[1].upper()) if key in MULTI_WORD_MAKES: make = MULTI_WORD_MAKES[key] parts = parts[2:] else: parts = parts[1:] else: parts = parts[1:] # Extract position keywords from the end position_parts = [] while parts and parts[-1].upper() in POS_KEYWORDS: position_parts.insert(0, parts[-1]) parts = parts[:-1] model = ' '.join(parts) # Remove trailing SKU numbers that match known VAZLO SKUs # e.g. "ACURA TLX 3429" -> model="TLX", sku_suffix="3429" model_parts = model.split() if model_parts and re.match(r'^\d{3,4}$', model_parts[-1]) and model_parts[-1] in all_skus: model = ' '.join(model_parts[:-1]) return { 'make': make, 'model': model, 'year': year, 'position': ' '.join(position_parts), 'raw': s, } def extract_interchanges(row): """Extract (brand, part_number) pairs from all 11 interchange columns.""" interchanges = [] for i in range(11): marca_col = 2 + i * 2 inter_col = 3 + i * 2 if marca_col < len(row) and row[marca_col]: brand = str(row[marca_col]).strip() pn = str(row[inter_col]).strip() if inter_col < len(row) and row[inter_col] else '' if brand and pn: interchanges.append((brand, pn)) return interchanges def normalize_name(name): """Clean up piece name: collapse whitespace, replace newlines.""" if not name: return '' return ' '.join(str(name).replace('\n', ' ').split()) def main(): print(f"[{datetime.now().isoformat()}] Starting VAZLO import...") if not os.path.exists(EXCEL_PATH): print(f"ERROR: Excel not found at {EXCEL_PATH}") sys.exit(1) print(f"Loading {EXCEL_PATH}...") wb = load_workbook(EXCEL_PATH, read_only=True, data_only=True) # Pre-scan SKUs for SKU-in-model detection print("Pre-scanning SKUs...") all_skus = collect_all_skus(wb) print(f" Found {len(all_skus)} unique SKUs") master_conn = connect_master() master_conn = connect_master() master_cur = master_conn.cursor() upsert_catalog_sql = """ INSERT INTO supplier_catalog (supplier_name, sku, name, category, is_active) VALUES (%s, %s, %s, %s, true) ON CONFLICT (supplier_name, sku, category) DO UPDATE SET name = EXCLUDED.name, category = EXCLUDED.category, is_active = true RETURNING id """ insert_compat_sql = """ INSERT INTO supplier_catalog_compat (catalog_id, make, model, year, engine, model_year_engine_id, source) VALUES (%s, %s, %s, %s, %s, %s, %s) ON CONFLICT (catalog_id, make, model, year, engine) DO NOTHING """ insert_interchange_sql = """ INSERT INTO supplier_catalog_interchange (catalog_id, brand, part_number) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING """ stats = { 'sheets': 0, 'rows': 0, 'catalog_items': 0, 'compat_rows': 0, 'interchange_rows': 0, 'vehicles_parsed': 0, 'skipped_no_sku': 0, 'skipped_no_carro': 0, } for sheet_name in wb.sheetnames: ws = wb[sheet_name] rows = list(ws.iter_rows(values_only=True)) if not rows: continue data_rows = rows[1:] stats['sheets'] += 1 print(f"\nProcessing sheet '{sheet_name}' with {len(data_rows)} rows...") # Cache catalog_id per (sku, sheet_name) to avoid repeated upserts catalog_id_cache = {} for idx, row in enumerate(data_rows): if idx % 2000 == 0 and idx > 0: print(f" ...{idx} rows processed") if not row or not row[1]: stats['skipped_no_sku'] += 1 continue sku = str(row[1]).strip() name = normalize_name(row[24]) carro_raw = str(row[25]).strip() if row[25] else '' if not sku: stats['skipped_no_sku'] += 1 continue stats['rows'] += 1 # Upsert catalog item (keyed by sku + category) cache_key = (sku, sheet_name) catalog_id = catalog_id_cache.get(cache_key) if catalog_id is None: master_cur.execute(upsert_catalog_sql, (SUPPLIER_NAME, sku, name, sheet_name)) catalog_id = master_cur.fetchone()[0] catalog_id_cache[cache_key] = catalog_id stats['catalog_items'] += 1 # Parse vehicle parsed = parse_carro(carro_raw, all_skus) stats['vehicles_parsed'] += 1 # Insert compatibility (text-only, no MYE matching during import) master_cur.execute(insert_compat_sql, ( catalog_id, parsed['make'], parsed['model'], parsed['year'], parsed['position'] or None, None, 'import_text', )) stats['compat_rows'] += 1 # Insert interchanges interchanges = extract_interchanges(row) for brand, pn in interchanges: master_cur.execute(insert_interchange_sql, (catalog_id, brand, pn)) stats['interchange_rows'] += 1 # Commit per sheet master_conn.commit() print(f" Sheet '{sheet_name}' committed.") print(f"\n{'='*60}") print("IMPORT COMPLETE") print(f"{'='*60}") print(f"Sheets processed: {stats['sheets']}") print(f"Total rows read: {stats['rows']}") print(f"Catalog items: {stats['catalog_items']}") print(f"Compat rows: {stats['compat_rows']}") print(f"Interchange rows: {stats['interchange_rows']}") print(f"Vehicles parsed: {stats['vehicles_parsed']}") print(f"Skipped (no SKU): {stats['skipped_no_sku']}") master_cur.close() master_conn.close() master_conn.close() if __name__ == '__main__': main()