#!/usr/bin/env python3 """ IMPORTADOR COMPLETO DEL CATÁLOGO GONHER 2022 - Crea vehículos faltantes - Crea partes de filtros Gonher - Crea referencias cruzadas con otras marcas (AC Delco, Fram, etc.) - Crea fitments (vincula partes a vehículos) """ import sqlite3 import re import pypdf from pathlib import Path from collections import defaultdict # Paths DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db' PDF_PATH = '/tmp/filtros_catalog.pdf' # Filter type configuration FILTER_TYPES = { 'ACEITE': {'category': 'Engine', 'group': 'Oil Filters', 'prefix': 'Oil Filter', 'prefix_es': 'Filtro de Aceite'}, 'SINTÉTICO': {'category': 'Engine', 'group': 'Oil Filters', 'prefix': 'Synthetic Oil Filter', 'prefix_es': 'Filtro de Aceite Sintético'}, 'AIRE': {'category': 'Engine', 'group': 'Air Filters', 'prefix': 'Air Filter', 'prefix_es': 'Filtro de Aire'}, 'COMB.': {'category': 'Fuel & Air', 'group': 'Fuel Filters', 'prefix': 'Fuel Filter', 'prefix_es': 'Filtro de Combustible'}, 'CABINA': {'category': 'Heat & Air Conditioning', 'group': 'Cabin Air Filters', 'prefix': 'Cabin Air Filter', 'prefix_es': 'Filtro de Cabina'} } # Known brands in catalog CATALOG_BRANDS = [ 'ACURA', 'ALFA ROMEO', 'AUDI', 'BMW', 'BUICK', 'CADILLAC', 'CHEVROLET', 'CHRYSLER', 'DODGE', 'FIAT', 'FORD', 'GMC', 'HONDA', 'HYUNDAI', 'INFINITI', 'JAGUAR', 'JEEP', 'KIA', 'LEXUS', 'LINCOLN', 'MAZDA', 'MERCEDES BENZ', 'MERCURY', 'MINI', 'MITSUBISHI', 'NISSAN', 'PEUGEOT', 'PONTIAC', 'PORSCHE', 'RAM', 'RENAULT', 'SEAT', 'SMART', 'SUBARU', 'SUZUKI', 'TOYOTA', 'VOLKSWAGEN', 'VOLVO' ] # Cross-reference brands XREF_BRANDS = ['AC DELCO', 'FRAM', 'INTERFIL', 'MANN', 'MOTORCRAFT'] def get_db(): conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row return conn def ensure_manufacturer(cursor, name, type_='aftermarket', quality='standard', country=None): """Create manufacturer if not exists""" cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,)) row = cursor.fetchone() if row: return row['id'] cursor.execute(""" INSERT INTO manufacturers (name, type, quality_tier, country) VALUES (?, ?, ?, ?) """, (name, type_, quality, country)) return cursor.lastrowid def ensure_brand(cursor, name): """Create brand if not exists""" cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,)) row = cursor.fetchone() if row: return row['id'] cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,)) return cursor.lastrowid def ensure_model(cursor, brand_id, name): """Create model if not exists""" cursor.execute(""" SELECT id FROM models WHERE brand_id = ? AND UPPER(name) = UPPER(?) """, (brand_id, name)) row = cursor.fetchone() if row: return row['id'] cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name)) return cursor.lastrowid def ensure_year(cursor, year): """Create year if not exists""" cursor.execute("SELECT id FROM years WHERE year = ?", (year,)) row = cursor.fetchone() if row: return row['id'] cursor.execute("INSERT INTO years (year) VALUES (?)", (year,)) return cursor.lastrowid def ensure_engine(cursor, name): """Create engine if not exists""" cursor.execute("SELECT id FROM engines WHERE name = ?", (name,)) row = cursor.fetchone() if row: return row['id'] # Parse engine details from name displacement = None cylinders = None fuel_type = 'gasoline' # lowercase to match DB constraint # Parse displacement and cylinders from patterns like "L4-2.0L" or "V6-3.5L" match = re.match(r'([LV])(\d+)[-]?([\d.]+)L?', name) if match: engine_type = match.group(1) # L or V cylinders = int(match.group(2)) displacement = int(float(match.group(3)) * 1000) if 'DIESEL' in name.upper() or 'DIÉSEL' in name.upper(): fuel_type = 'diesel' elif 'ELECTRIC' in name.upper(): fuel_type = 'electric' elif 'HYBRID' in name.upper(): fuel_type = 'hybrid' # Note: 'TURBO' is not a fuel type, it's a modifier - default to gasoline cursor.execute(""" INSERT INTO engines (name, displacement_cc, cylinders, fuel_type) VALUES (?, ?, ?, ?) """, (name, displacement, cylinders, fuel_type)) return cursor.lastrowid def ensure_mye(cursor, model_id, year_id, engine_id): """Create model_year_engine if not exists""" cursor.execute(""" SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ? AND engine_id = ? """, (model_id, year_id, engine_id)) row = cursor.fetchone() if row: return row['id'] cursor.execute(""" INSERT INTO model_year_engine (model_id, year_id, engine_id) VALUES (?, ?, ?) """, (model_id, year_id, engine_id)) return cursor.lastrowid def get_or_create_group(cursor, category_name, group_name): """Get or create part group""" cursor.execute("SELECT id FROM part_categories WHERE name = ?", (category_name,)) cat_row = cursor.fetchone() if not cat_row: return None category_id = cat_row['id'] cursor.execute(""" SELECT id FROM part_groups WHERE category_id = ? AND name = ? """, (category_id, group_name)) group_row = cursor.fetchone() if group_row: return group_row['id'] cursor.execute(""" INSERT INTO part_groups (category_id, name, name_es) VALUES (?, ?, ?) """, (category_id, group_name, group_name)) return cursor.lastrowid def parse_year_range(year_str): """Parse year range into list of years""" year_str = year_str.strip() if re.match(r'^\d{4}$', year_str): return [int(year_str)] match = re.match(r'(\d{4})\s*[-–]\s*(\d{4})', year_str) if match: start, end = int(match.group(1)), int(match.group(2)) if start > end: start, end = end, start return list(range(end, start + 1)) return [] def classify_filter(part_number): """Classify filter type by part number prefix""" part_number = part_number.upper() if part_number.startswith('GP-') or part_number.startswith('GPS-'): if part_number.startswith('GPS-'): return 'SINTÉTICO' return 'ACEITE' elif part_number.startswith('GA-') and not part_number.startswith('GAC-') and not part_number.startswith('GAVW-'): return 'AIRE' elif part_number.startswith('GG-'): return 'COMB.' elif part_number.startswith('GAC-'): return 'CABINA' elif part_number.startswith('G-'): return 'ACEITE' # Generic oil filter return None def extract_vehicle_entries(pdf): """Extract all vehicle entries from catalog""" entries = [] current_brand = None current_model = None for page in pdf.pages: text = page.extract_text() if not text: continue for line in text.split('\n'): line = line.strip() if not line: continue # Skip header/footer if 'AÑO' in line and 'MOTOR' in line: continue if 'Los filtros Gonher' in line: continue # Brand detection if line in CATALOG_BRANDS: current_brand = line current_model = None continue # Handle (Continúa) lines if '(Continúa)' in line: match = re.match(r'^([A-Z][A-Z0-9\s\-]+)\s*\(Continúa\)', line) if match: potential = match.group(1).strip() if potential in CATALOG_BRANDS: current_brand = potential elif current_brand: current_model = potential continue # Model detection if current_brand: if re.match(r'^[A-Z][A-Z0-9\s\-/]+$', line) and not re.match(r'^\d{4}', line): if line not in ['ACEITE', 'AIRE', 'COMB', 'CABINA', 'SINTÉTICO', 'AÑO', 'MOTOR']: if not re.match(r'^G[APCS]?[-]?\d', line): # Not a part number current_model = line continue # Data line with year if current_brand and current_model: match = re.match(r'^(\d{4}(?:\s*[-–]\s*\d{4})?)\s+(.+)$', line) if match: year_str = match.group(1) rest = match.group(2) # Extract motor motor_match = re.match(r'^([LV]\d+[-][\d.]+L(?:\s+(?:Turbo|TURBOCHARGED|diésel|ELECTRIC|HYBRID))?)\s*(.*)$', rest, re.IGNORECASE) if motor_match: motor = motor_match.group(1).strip() filters_str = motor_match.group(2).strip() # Parse filter part numbers filter_parts = re.findall(r'G[A-Z]*[-]?[\dA-Z]+(?:\(\d+\))?', filters_str) years = parse_year_range(year_str) if years: for year in years: entry = { 'brand': current_brand, 'model': current_model, 'year': year, 'motor': motor, 'filters': {} } for fp in filter_parts: fp_clean = re.sub(r'\(\d+\)', '', fp) filter_type = classify_filter(fp_clean) if filter_type: entry['filters'][filter_type] = fp_clean if entry['filters']: entries.append(entry) return entries def extract_cross_references(pdf): """Extract cross-reference data from catalog""" xrefs = [] current_brand = None # Cross-references are typically in pages 117+ for i in range(117, len(pdf.pages)): text = pdf.pages[i].extract_text() if not text: continue for line in text.split('\n'): line = line.strip() # Brand header if line in XREF_BRANDS: current_brand = line continue # Cross-reference line if current_brand: match = re.match(r'^([A-Z0-9\-/]+)\s+(G[A-Z]*[-]?\d+[A-Z]*)$', line) if match: xrefs.append({ 'brand': current_brand, 'part_number': match.group(1), 'gonher_part': match.group(2) }) return xrefs def main(): print("=" * 70) print("IMPORTADOR COMPLETO - CATÁLOGO GONHER 2022") print("=" * 70) # Read PDF print(f"\n[1/7] Leyendo PDF: {PDF_PATH}") pdf = pypdf.PdfReader(PDF_PATH) print(f" Total páginas: {len(pdf.pages)}") # Extract data print("\n[2/7] Extrayendo datos del catálogo...") vehicle_entries = extract_vehicle_entries(pdf) cross_refs = extract_cross_references(pdf) print(f" Entradas de vehículos: {len(vehicle_entries)}") print(f" Referencias cruzadas: {len(cross_refs)}") # Get unique filters unique_filters = {} for entry in vehicle_entries: for filter_type, part_num in entry['filters'].items(): if part_num not in unique_filters: unique_filters[part_num] = filter_type print(f" Filtros únicos: {len(unique_filters)}") # Connect to database conn = get_db() cursor = conn.cursor() # Create manufacturers print("\n[3/7] Creando fabricantes...") manufacturers = { 'Gonher': ensure_manufacturer(cursor, 'Gonher', 'aftermarket', 'standard', 'Mexico'), 'AC Delco': ensure_manufacturer(cursor, 'AC Delco', 'oem', 'oem', 'USA'), 'Fram': ensure_manufacturer(cursor, 'Fram', 'aftermarket', 'standard', 'USA'), 'Interfil': ensure_manufacturer(cursor, 'Interfil', 'aftermarket', 'economy', 'Mexico'), 'Mann': ensure_manufacturer(cursor, 'Mann', 'aftermarket', 'premium', 'Germany'), 'Motorcraft': ensure_manufacturer(cursor, 'Motorcraft', 'oem', 'oem', 'USA'), } print(f" Fabricantes: {list(manufacturers.keys())}") # Create vehicles print("\n[4/7] Creando vehículos faltantes...") vehicles_created = 0 mye_cache = {} for entry in vehicle_entries: cache_key = (entry['brand'], entry['model'], entry['year'], entry['motor']) if cache_key in mye_cache: continue # Check if vehicle exists cursor.execute(""" SELECT mye.id FROM model_year_engine mye JOIN models m ON mye.model_id = m.id JOIN brands b ON m.brand_id = b.id JOIN years y ON mye.year_id = y.id JOIN engines e ON mye.engine_id = e.id WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?) AND y.year = ? AND e.name = ? LIMIT 1 """, (entry['brand'], entry['model'], entry['year'], entry['motor'])) existing = cursor.fetchone() if existing: mye_cache[cache_key] = existing['id'] else: # Create vehicle brand_id = ensure_brand(cursor, entry['brand']) model_id = ensure_model(cursor, brand_id, entry['model']) year_id = ensure_year(cursor, entry['year']) engine_id = ensure_engine(cursor, entry['motor']) mye_id = ensure_mye(cursor, model_id, year_id, engine_id) mye_cache[cache_key] = mye_id vehicles_created += 1 print(f" Vehículos creados: {vehicles_created}") # Create filter parts print("\n[5/7] Creando partes de filtros...") filter_parts = {} parts_created = 0 for part_num, filter_type in unique_filters.items(): config = FILTER_TYPES.get(filter_type) if not config: continue group_id = get_or_create_group(cursor, config['category'], config['group']) if not group_id: continue # Check if part exists cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_num,)) existing = cursor.fetchone() if existing: filter_parts[part_num] = existing['id'] else: name = f"{config['prefix']} {part_num}" name_es = f"{config['prefix_es']} {part_num}" cursor.execute(""" INSERT INTO parts (oem_part_number, name, name_es, group_id, description) VALUES (?, ?, ?, ?, ?) """, (part_num, name, name_es, group_id, f"Gonher {config['prefix']}")) filter_parts[part_num] = cursor.lastrowid parts_created += 1 print(f" Partes creadas: {parts_created}") # Create fitments print("\n[6/7] Creando fitments (vehículo-parte)...") fitments_created = 0 for entry in vehicle_entries: cache_key = (entry['brand'], entry['model'], entry['year'], entry['motor']) mye_id = mye_cache.get(cache_key) if not mye_id: continue for filter_type, part_num in entry['filters'].items(): part_id = filter_parts.get(part_num) if not part_id: continue # Check if fitment exists cursor.execute(""" SELECT id FROM vehicle_parts WHERE model_year_engine_id = ? AND part_id = ? """, (mye_id, part_id)) if not cursor.fetchone(): cursor.execute(""" INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes) VALUES (?, ?, 1, ?) """, (mye_id, part_id, f"Catálogo Gonher 2022 - {filter_type}")) fitments_created += 1 print(f" Fitments creados: {fitments_created}") # Create cross-references print("\n[7/7] Creando referencias cruzadas...") xrefs_created = 0 for xref in cross_refs: gonher_part_id = filter_parts.get(xref['gonher_part']) if not gonher_part_id: # Part might not exist yet, try to find by OEM number cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (xref['gonher_part'],)) row = cursor.fetchone() if row: gonher_part_id = row['id'] else: continue # Check if cross-reference exists cursor.execute(""" SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ? """, (gonher_part_id, xref['part_number'])) if not cursor.fetchone(): # Map brand to reference type ref_type = 'interchange' if xref['brand'] in ['AC DELCO', 'MOTORCRAFT']: ref_type = 'oem_alternate' cursor.execute(""" INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type) VALUES (?, ?, ?) """, (gonher_part_id, xref['part_number'], ref_type)) xrefs_created += 1 print(f" Referencias cruzadas creadas: {xrefs_created}") # Commit conn.commit() conn.close() print("\n" + "=" * 70) print("IMPORTACIÓN COMPLETADA") print("=" * 70) print(f""" RESUMEN: - Vehículos creados: {vehicles_created:,} - Partes creadas: {parts_created:,} - Fitments creados: {fitments_created:,} - Cross-refs creadas: {xrefs_created:,} - Fabricantes: {len(manufacturers)} """) if __name__ == '__main__': main()