#!/usr/bin/env python3 """ Import Keep Green (KG) catalog from Excel into supplier_catalog tables. Usage: python scripts/import_keepgreen_catalog.py """ import os import re import sys from collections import defaultdict from datetime import datetime import psycopg2 from openpyxl import load_workbook MASTER_DB_URL = os.environ.get('MASTER_DB_URL', 'postgresql://postgres@localhost/nexus_autoparts') EXCEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'KG (1).xlsx') SUPPLIER_NAME = 'KEEP GREEN' MULTI_WORD_MAKES = { ('MERCEDES', 'BENZ'): 'MERCEDES BENZ', ('LAND', 'ROVER'): 'LAND ROVER', ('ALFA', 'ROMEO'): 'ALFA ROMEO', ('AMERICAN', 'MOTORS'): 'AMERICAN MOTORS', ('ROLLS', 'ROYCE'): 'ROLLS ROYCE', ('ASTON', 'MARTIN'): 'ASTON MARTIN', ('GREAT', 'WALL'): 'GREAT WALL', } def connect_master(): return psycopg2.connect(MASTER_DB_URL) def normalize_name(name): if not name: return '' return ' '.join(str(name).replace('\n', ' ').split()) def parse_make(carro): """Extract make from CARRO_PERTENECIENTE text.""" if not carro: return None parts = str(carro).strip().split() if not parts: return None make = parts[0] if len(parts) >= 2: key = (parts[0].upper(), parts[1].upper()) if key in MULTI_WORD_MAKES: make = MULTI_WORD_MAKES[key] return make def extract_interchanges(row): """Extract (brand, part_number) pairs from interchange columns. KG: interchanges start at col 5 (MARCA.1) through col 16 (INTERCAMBIO.5). """ interchanges = [] for i in range(6): marca_col = 5 + i * 2 inter_col = 6 + i * 2 if marca_col < len(row) and row[marca_col]: brand = str(row[marca_col]).strip() pn = str(row[inter_col]).strip() if inter_col < len(row) and row[inter_col] else '' if brand and pn: interchanges.append((brand, pn)) return interchanges def expand_year(year_val): """Return list of integer years from a year value. Handles: 1998, 1998-1999, 98-99, '1998 1999', etc. """ if year_val is None: return [None] s = str(year_val).strip() if not s: return [None] # Single 4-digit year if re.match(r'^(19|20)\d{2}$', s): return [int(s)] # Range with dash or slash: 1998-1999, 98-99, 1998/1999 m = re.match(r'^(\d{2,4})\s*[-/]\s*(\d{2,4})$', s) if m: start = int(m.group(1)) end = int(m.group(2)) # Normalize 2-digit years if start < 100: start = 1900 + start if start >= 70 else 2000 + start if end < 100: end = 1900 + end if end >= 70 else 2000 + end if end < start: start, end = end, start # Sanity: cap range length if end - start > 100: return [None] return list(range(start, end + 1)) # Try plain integer try: y = int(float(s)) if 1900 <= y <= 2100: return [y] except ValueError: pass return [None] def main(): print(f"[{datetime.now().isoformat()}] Starting Keep Green import...") if not os.path.exists(EXCEL_PATH): print(f"ERROR: Excel not found at {EXCEL_PATH}") sys.exit(1) print(f"Loading {EXCEL_PATH}...") wb = load_workbook(EXCEL_PATH, read_only=True, data_only=True) master_conn = connect_master() master_cur = master_conn.cursor() upsert_catalog_sql = """ INSERT INTO supplier_catalog (supplier_name, sku, name, category, is_active) VALUES (%s, %s, %s, %s, true) ON CONFLICT (supplier_name, sku, category) DO UPDATE SET name = EXCLUDED.name, category = EXCLUDED.category, is_active = true RETURNING id """ insert_compat_sql = """ INSERT INTO supplier_catalog_compat (catalog_id, make, model, year, engine, model_year_engine_id, source) VALUES (%s, %s, %s, %s, %s, NULL, %s) ON CONFLICT (catalog_id, make, model, year, engine) DO NOTHING """ insert_interchange_sql = """ INSERT INTO supplier_catalog_interchange (catalog_id, brand, part_number) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING """ stats = defaultdict(int) for sheet_name in wb.sheetnames: ws = wb[sheet_name] rows = list(ws.iter_rows(values_only=True)) if not rows: continue data_rows = rows[1:] stats['sheets'] += 1 print(f"\nProcessing sheet '{sheet_name}' with {len(data_rows)} rows...") catalog_id_cache = {} for idx, row in enumerate(data_rows): if idx % 2000 == 0 and idx > 0: print(f" ...{idx} rows processed") if not row or len(row) < 5 or not row[4]: stats['skipped_no_sku'] += 1 continue make = str(row[0]).strip().upper() if row[0] else '' model = str(row[1]).strip() if row[1] else '' engine = normalize_name(row[2]) if row[2] else None year_raw = row[3] sku = str(row[4]).strip() name = normalize_name(row[17]) if len(row) > 17 and row[17] else sheet_name carro = str(row[18]).strip() if len(row) > 18 and row[18] else '' if not sku: stats['skipped_no_sku'] += 1 continue if not make or not model: stats['skipped_no_vehicle'] += 1 continue stats['rows'] += 1 # Prefer make from MARCA column; fall back to parsing CARRO_PERTENECIENTE parsed_make = parse_make(carro) or make # Upsert catalog item (keyed by sku; category = sheet name) cache_key = sku catalog_id = catalog_id_cache.get(cache_key) if catalog_id is None: master_cur.execute(upsert_catalog_sql, (SUPPLIER_NAME, sku, name, sheet_name)) row_result = master_cur.fetchone() catalog_id = row_result[0] if row_result else None catalog_id_cache[cache_key] = catalog_id stats['catalog_items'] += 1 if catalog_id is None: stats['skipped_no_catalog'] += 1 continue # Expand years and insert compat rows years = expand_year(year_raw) for year in years: master_cur.execute(insert_compat_sql, ( catalog_id, parsed_make, model, year, engine or None, 'import_text', )) stats['compat_rows'] += 1 # Insert interchanges interchanges = extract_interchanges(row) for brand, pn in interchanges: master_cur.execute(insert_interchange_sql, (catalog_id, brand, pn)) stats['interchange_rows'] += 1 master_conn.commit() print(f" Sheet '{sheet_name}' committed.") print(f"\n{'='*60}") print("IMPORT COMPLETE") print(f"{'='*60}") for k, v in sorted(stats.items()): print(f"{k:25s}: {v}") master_cur.close() master_conn.close() if __name__ == '__main__': main()