#!/usr/bin/env python3 """ GENERADOR DE REFERENCIAS CRUZADAS ENTRE MARCAS Encuentra partes de diferentes fabricantes que cubren los mismos vehículos y crea referencias cruzadas bidireccionales entre ellas. """ import sqlite3 from pathlib import Path from collections import defaultdict DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db' def get_db(): conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row return conn def main(): print("=" * 70) print("GENERADOR DE REFERENCIAS CRUZADAS ENTRE MARCAS") print("=" * 70) conn = get_db() cursor = conn.cursor() # Get existing cross-ref count cursor.execute("SELECT COUNT(*) FROM part_cross_references") existing_xrefs = cursor.fetchone()[0] print(f"\nCross-refs existentes: {existing_xrefs:,}") # Step 1: For each part_group, find parts from different brands # that fit the same vehicle (model_year_engine) print("\n[1/3] Buscando partes que cubren los mismos vehículos...") # Build a map: (group_id, mye_id) -> list of (part_id, part_number) cursor.execute(""" SELECT vp.model_year_engine_id, vp.part_id, p.oem_part_number, p.group_id FROM vehicle_parts vp JOIN parts p ON vp.part_id = p.id WHERE p.group_id IS NOT NULL ORDER BY p.group_id, vp.model_year_engine_id """) group_mye_parts = defaultdict(set) for row in cursor.fetchall(): key = (row['group_id'], row['model_year_engine_id']) group_mye_parts[key].add((row['part_id'], row['oem_part_number'])) print(f" Combinaciones grupo+vehículo: {len(group_mye_parts):,}") # Step 2: For each (group, vehicle) with multiple parts from different brands, # create cross-references print("\n[2/3] Generando pares de cross-reference...") # Build existing cross-ref set for fast lookup cursor.execute("SELECT part_id, cross_reference_number FROM part_cross_references") existing = set() for row in cursor.fetchall(): existing.add((row['part_id'], row['cross_reference_number'])) print(f" Cross-refs existentes en set: {len(existing):,}") # Collect new cross-reference pairs new_xrefs = [] for key, parts_set in group_mye_parts.items(): if len(parts_set) < 2: continue parts_list = list(parts_set) for i in range(len(parts_list)): pid_a, pn_a = parts_list[i] for j in range(i + 1, len(parts_list)): pid_b, pn_b = parts_list[j] # Skip if same part number prefix (same brand) if pn_a[:3] == pn_b[:3]: continue # Add A->B if (pid_a, pn_b) not in existing: new_xrefs.append((pid_a, pn_b)) existing.add((pid_a, pn_b)) # Add B->A if (pid_b, pn_a) not in existing: new_xrefs.append((pid_b, pn_a)) existing.add((pid_b, pn_a)) print(f" Nuevas cross-refs a crear: {len(new_xrefs):,}") # Step 3: Insert print("\n[3/3] Insertando cross-references...") inserted = 0 for i, (part_id, xref_number) in enumerate(new_xrefs): if i % 5000 == 0 and i > 0: print(f" Insertando {i}/{len(new_xrefs)}...") cursor.execute( "INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'Vehicle Fitment Match')", (part_id, xref_number)) inserted += 1 conn.commit() # Final stats cursor.execute("SELECT COUNT(*) FROM part_cross_references") total_xrefs = cursor.fetchone()[0] conn.close() print("\n" + "=" * 70) print("CROSS-REFERENCES COMPLETADAS") print("=" * 70) print(f""" RESUMEN: - Cross-refs antes: {existing_xrefs:,} - Nuevas cross-refs: {inserted:,} - Total cross-refs: {total_xrefs:,} """) if __name__ == '__main__': main()