Files
Autoparts-DB/vehicle_database/scripts/create_cross_references.py
consultoria-as 7ecf1295a5 fix: performance improvements, shared UI, and cross-reference data quality
Backend (server.py):
- Fix N+1 query in /api/diagrams/<id>/parts with batch cross-ref query
- Add LIMIT safety nets to 15 endpoints (50-5000 per data type)
- Add pagination to /api/vehicles, /api/model-year-engine, /api/vehicles/<id>/parts, /api/admin/export
- Optimize search_vehicles() EXISTS subquery to JOIN
- Restrict static route to /static/* subdir (security fix)
- Add detailed=true support to /api/brands and /api/models

Frontend:
- Extract shared CSS into shared.css (variables, reset, buttons, forms, scrollbar)
- Create shared nav.js component (logo + navigation links, auto-highlights)
- Update all 4 HTML pages to use shared CSS and nav
- Update JS to handle paginated API responses

Data quality:
- Fix cross-reference source field: map 72K records from catalog names to actual brands
- Fix aftermarket_parts manufacturer_id: correct 8K records with wrong brand attribution
- Delete 98MB backup file, orphan records, and garbage cross-references
- Add import scripts for DAR, FRAM, WIX, MOOG, Cartek catalogs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 03:09:22 +00:00

126 lines
3.9 KiB
Python

#!/usr/bin/env python3
"""
GENERADOR DE REFERENCIAS CRUZADAS ENTRE MARCAS
Encuentra partes de diferentes fabricantes que cubren los mismos vehículos
y crea referencias cruzadas bidireccionales entre ellas.
"""
import sqlite3
from pathlib import Path
from collections import defaultdict
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def main():
print("=" * 70)
print("GENERADOR DE REFERENCIAS CRUZADAS ENTRE MARCAS")
print("=" * 70)
conn = get_db()
cursor = conn.cursor()
# Get existing cross-ref count
cursor.execute("SELECT COUNT(*) FROM part_cross_references")
existing_xrefs = cursor.fetchone()[0]
print(f"\nCross-refs existentes: {existing_xrefs:,}")
# Step 1: For each part_group, find parts from different brands
# that fit the same vehicle (model_year_engine)
print("\n[1/3] Buscando partes que cubren los mismos vehículos...")
# Build a map: (group_id, mye_id) -> list of (part_id, part_number)
cursor.execute("""
SELECT vp.model_year_engine_id, vp.part_id, p.oem_part_number, p.group_id
FROM vehicle_parts vp
JOIN parts p ON vp.part_id = p.id
WHERE p.group_id IS NOT NULL
ORDER BY p.group_id, vp.model_year_engine_id
""")
group_mye_parts = defaultdict(set)
for row in cursor.fetchall():
key = (row['group_id'], row['model_year_engine_id'])
group_mye_parts[key].add((row['part_id'], row['oem_part_number']))
print(f" Combinaciones grupo+vehículo: {len(group_mye_parts):,}")
# Step 2: For each (group, vehicle) with multiple parts from different brands,
# create cross-references
print("\n[2/3] Generando pares de cross-reference...")
# Build existing cross-ref set for fast lookup
cursor.execute("SELECT part_id, cross_reference_number FROM part_cross_references")
existing = set()
for row in cursor.fetchall():
existing.add((row['part_id'], row['cross_reference_number']))
print(f" Cross-refs existentes en set: {len(existing):,}")
# Collect new cross-reference pairs
new_xrefs = []
for key, parts_set in group_mye_parts.items():
if len(parts_set) < 2:
continue
parts_list = list(parts_set)
for i in range(len(parts_list)):
pid_a, pn_a = parts_list[i]
for j in range(i + 1, len(parts_list)):
pid_b, pn_b = parts_list[j]
# Skip if same part number prefix (same brand)
if pn_a[:3] == pn_b[:3]:
continue
# Add A->B
if (pid_a, pn_b) not in existing:
new_xrefs.append((pid_a, pn_b))
existing.add((pid_a, pn_b))
# Add B->A
if (pid_b, pn_a) not in existing:
new_xrefs.append((pid_b, pn_a))
existing.add((pid_b, pn_a))
print(f" Nuevas cross-refs a crear: {len(new_xrefs):,}")
# Step 3: Insert
print("\n[3/3] Insertando cross-references...")
inserted = 0
for i, (part_id, xref_number) in enumerate(new_xrefs):
if i % 5000 == 0 and i > 0:
print(f" Insertando {i}/{len(new_xrefs)}...")
cursor.execute(
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'Vehicle Fitment Match')",
(part_id, xref_number))
inserted += 1
conn.commit()
# Final stats
cursor.execute("SELECT COUNT(*) FROM part_cross_references")
total_xrefs = cursor.fetchone()[0]
conn.close()
print("\n" + "=" * 70)
print("CROSS-REFERENCES COMPLETADAS")
print("=" * 70)
print(f"""
RESUMEN:
- Cross-refs antes: {existing_xrefs:,}
- Nuevas cross-refs: {inserted:,}
- Total cross-refs: {total_xrefs:,}
""")
if __name__ == '__main__':
main()