fix: performance improvements, shared UI, and cross-reference data quality
Backend (server.py): - Fix N+1 query in /api/diagrams/<id>/parts with batch cross-ref query - Add LIMIT safety nets to 15 endpoints (50-5000 per data type) - Add pagination to /api/vehicles, /api/model-year-engine, /api/vehicles/<id>/parts, /api/admin/export - Optimize search_vehicles() EXISTS subquery to JOIN - Restrict static route to /static/* subdir (security fix) - Add detailed=true support to /api/brands and /api/models Frontend: - Extract shared CSS into shared.css (variables, reset, buttons, forms, scrollbar) - Create shared nav.js component (logo + navigation links, auto-highlights) - Update all 4 HTML pages to use shared CSS and nav - Update JS to handle paginated API responses Data quality: - Fix cross-reference source field: map 72K records from catalog names to actual brands - Fix aftermarket_parts manufacturer_id: correct 8K records with wrong brand attribution - Delete 98MB backup file, orphan records, and garbage cross-references - Add import scripts for DAR, FRAM, WIX, MOOG, Cartek catalogs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
125
vehicle_database/scripts/create_cross_references.py
Normal file
125
vehicle_database/scripts/create_cross_references.py
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
GENERADOR DE REFERENCIAS CRUZADAS ENTRE MARCAS
|
||||
Encuentra partes de diferentes fabricantes que cubren los mismos vehículos
|
||||
y crea referencias cruzadas bidireccionales entre ellas.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
|
||||
|
||||
|
||||
def get_db():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("GENERADOR DE REFERENCIAS CRUZADAS ENTRE MARCAS")
|
||||
print("=" * 70)
|
||||
|
||||
conn = get_db()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get existing cross-ref count
|
||||
cursor.execute("SELECT COUNT(*) FROM part_cross_references")
|
||||
existing_xrefs = cursor.fetchone()[0]
|
||||
print(f"\nCross-refs existentes: {existing_xrefs:,}")
|
||||
|
||||
# Step 1: For each part_group, find parts from different brands
|
||||
# that fit the same vehicle (model_year_engine)
|
||||
print("\n[1/3] Buscando partes que cubren los mismos vehículos...")
|
||||
|
||||
# Build a map: (group_id, mye_id) -> list of (part_id, part_number)
|
||||
cursor.execute("""
|
||||
SELECT vp.model_year_engine_id, vp.part_id, p.oem_part_number, p.group_id
|
||||
FROM vehicle_parts vp
|
||||
JOIN parts p ON vp.part_id = p.id
|
||||
WHERE p.group_id IS NOT NULL
|
||||
ORDER BY p.group_id, vp.model_year_engine_id
|
||||
""")
|
||||
|
||||
group_mye_parts = defaultdict(set)
|
||||
for row in cursor.fetchall():
|
||||
key = (row['group_id'], row['model_year_engine_id'])
|
||||
group_mye_parts[key].add((row['part_id'], row['oem_part_number']))
|
||||
|
||||
print(f" Combinaciones grupo+vehículo: {len(group_mye_parts):,}")
|
||||
|
||||
# Step 2: For each (group, vehicle) with multiple parts from different brands,
|
||||
# create cross-references
|
||||
print("\n[2/3] Generando pares de cross-reference...")
|
||||
|
||||
# Build existing cross-ref set for fast lookup
|
||||
cursor.execute("SELECT part_id, cross_reference_number FROM part_cross_references")
|
||||
existing = set()
|
||||
for row in cursor.fetchall():
|
||||
existing.add((row['part_id'], row['cross_reference_number']))
|
||||
|
||||
print(f" Cross-refs existentes en set: {len(existing):,}")
|
||||
|
||||
# Collect new cross-reference pairs
|
||||
new_xrefs = []
|
||||
for key, parts_set in group_mye_parts.items():
|
||||
if len(parts_set) < 2:
|
||||
continue
|
||||
|
||||
parts_list = list(parts_set)
|
||||
for i in range(len(parts_list)):
|
||||
pid_a, pn_a = parts_list[i]
|
||||
for j in range(i + 1, len(parts_list)):
|
||||
pid_b, pn_b = parts_list[j]
|
||||
|
||||
# Skip if same part number prefix (same brand)
|
||||
if pn_a[:3] == pn_b[:3]:
|
||||
continue
|
||||
|
||||
# Add A->B
|
||||
if (pid_a, pn_b) not in existing:
|
||||
new_xrefs.append((pid_a, pn_b))
|
||||
existing.add((pid_a, pn_b))
|
||||
|
||||
# Add B->A
|
||||
if (pid_b, pn_a) not in existing:
|
||||
new_xrefs.append((pid_b, pn_a))
|
||||
existing.add((pid_b, pn_a))
|
||||
|
||||
print(f" Nuevas cross-refs a crear: {len(new_xrefs):,}")
|
||||
|
||||
# Step 3: Insert
|
||||
print("\n[3/3] Insertando cross-references...")
|
||||
inserted = 0
|
||||
for i, (part_id, xref_number) in enumerate(new_xrefs):
|
||||
if i % 5000 == 0 and i > 0:
|
||||
print(f" Insertando {i}/{len(new_xrefs)}...")
|
||||
cursor.execute(
|
||||
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'Vehicle Fitment Match')",
|
||||
(part_id, xref_number))
|
||||
inserted += 1
|
||||
|
||||
conn.commit()
|
||||
|
||||
# Final stats
|
||||
cursor.execute("SELECT COUNT(*) FROM part_cross_references")
|
||||
total_xrefs = cursor.fetchone()[0]
|
||||
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("CROSS-REFERENCES COMPLETADAS")
|
||||
print("=" * 70)
|
||||
print(f"""
|
||||
RESUMEN:
|
||||
- Cross-refs antes: {existing_xrefs:,}
|
||||
- Nuevas cross-refs: {inserted:,}
|
||||
- Total cross-refs: {total_xrefs:,}
|
||||
""")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
179
vehicle_database/scripts/extract_moog_diagrams.py
Normal file
179
vehicle_database/scripts/extract_moog_diagrams.py
Normal file
@@ -0,0 +1,179 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
EXTRACTOR DE IMÁGENES DE DIAGRAMAS MOOG
|
||||
Extrae las ilustraciones de suspensión/dirección de los PDFs MOOG
|
||||
y las guarda como archivos de imagen mapeados a sus figure codes.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import io
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
import pypdf
|
||||
|
||||
OUTPUT_DIR = Path(__file__).parent.parent.parent / 'dashboard' / 'static' / 'diagrams' / 'moog'
|
||||
|
||||
VOLUMES = {
|
||||
'1': {
|
||||
'path': '/tmp/catalogs/suspension/moog_vol1_1989back.pdf',
|
||||
'start_page': 3,
|
||||
'end_page': 1037,
|
||||
'label': 'Vol 1 (≤1989)',
|
||||
},
|
||||
'2': {
|
||||
'path': '/tmp/catalogs/suspension/moog_vol2_1990_2005.pdf',
|
||||
'start_page': 6,
|
||||
'end_page': 1641,
|
||||
'label': 'Vol 2 (1990-2005)',
|
||||
},
|
||||
'3': {
|
||||
'path': '/tmp/catalogs/suspension/moog_vol3_2006up.pdf',
|
||||
'start_page': 7,
|
||||
'end_page': 1089,
|
||||
'label': 'Vol 3 (2006+)',
|
||||
},
|
||||
}
|
||||
|
||||
FIGURE_RE = re.compile(r'\b([FSR]\d{3})\b')
|
||||
|
||||
|
||||
def extract_figure_codes(text):
|
||||
"""Extract ordered unique figure codes from page text."""
|
||||
codes = []
|
||||
seen = set()
|
||||
for m in FIGURE_RE.finditer(text):
|
||||
code = m.group(1)
|
||||
if code not in seen:
|
||||
codes.append(code)
|
||||
seen.add(code)
|
||||
return codes
|
||||
|
||||
|
||||
def extract_volume(vol_key, already_extracted):
|
||||
"""Extract diagram images from one MOOG volume."""
|
||||
vol = VOLUMES[vol_key]
|
||||
print(f"\n--- Procesando {vol['label']} ---")
|
||||
print(f" PDF: {vol['path']}")
|
||||
|
||||
pdf = pypdf.PdfReader(vol['path'])
|
||||
total_pages = len(pdf.pages)
|
||||
end_page = min(vol['end_page'], total_pages - 1)
|
||||
|
||||
extracted = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
|
||||
for page_idx in range(vol['start_page'], end_page + 1):
|
||||
if page_idx % 100 == 0:
|
||||
print(f" Página {page_idx}/{end_page}... (extraídas: {extracted})")
|
||||
|
||||
try:
|
||||
page = pdf.pages[page_idx]
|
||||
text = page.extract_text() or ''
|
||||
|
||||
# Get figure codes from this page
|
||||
fig_codes = extract_figure_codes(text)
|
||||
if not fig_codes:
|
||||
continue
|
||||
|
||||
# Filter out already-extracted codes
|
||||
needed_codes = [c for c in fig_codes if c not in already_extracted]
|
||||
if not needed_codes:
|
||||
skipped += len(fig_codes)
|
||||
continue
|
||||
|
||||
# Extract images from page
|
||||
images = []
|
||||
try:
|
||||
for img_key in page.images:
|
||||
img_data = img_key.data
|
||||
# Filter by size - diagram images are >10KB typically
|
||||
if len(img_data) > 5000:
|
||||
images.append(img_data)
|
||||
except Exception:
|
||||
# Fallback: try to extract from xobjects directly
|
||||
try:
|
||||
if '/XObject' in page['/Resources']:
|
||||
xobjects = page['/Resources']['/XObject'].get_object()
|
||||
for obj_name in sorted(xobjects.keys()):
|
||||
xobj = xobjects[obj_name].get_object()
|
||||
if xobj.get('/Subtype') == '/Image':
|
||||
w = int(xobj.get('/Width', 0))
|
||||
h = int(xobj.get('/Height', 0))
|
||||
if w > 200 and h > 100:
|
||||
try:
|
||||
img_data = xobj.get_data()
|
||||
if len(img_data) > 5000:
|
||||
images.append(img_data)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not images:
|
||||
continue
|
||||
|
||||
# Match figure codes to images
|
||||
# Strategy: if same number of large images and figure codes, match 1:1 in order
|
||||
# If fewer images than codes, some codes share images (use first available)
|
||||
# If more images than codes, filter further by size
|
||||
for i, code in enumerate(needed_codes):
|
||||
if i < len(images):
|
||||
img_data = images[i]
|
||||
# Determine file extension from magic bytes
|
||||
ext = 'jpg'
|
||||
if img_data[:4] == b'\x89PNG':
|
||||
ext = 'png'
|
||||
elif img_data[:4] == b'\x00\x00\x00\x0c':
|
||||
ext = 'jp2'
|
||||
|
||||
out_path = OUTPUT_DIR / f"{code}.{ext}"
|
||||
out_path.write_bytes(img_data)
|
||||
already_extracted.add(code)
|
||||
extracted += 1
|
||||
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
if errors <= 5:
|
||||
print(f" Error en página {page_idx}: {e}")
|
||||
|
||||
print(f" Resultado: {extracted} extraídas, {skipped} ya existentes, {errors} errores")
|
||||
return extracted
|
||||
|
||||
|
||||
def main():
|
||||
volumes = sys.argv[1:] if len(sys.argv) > 1 else ['3', '2', '1']
|
||||
|
||||
print("=" * 70)
|
||||
print("EXTRACTOR DE DIAGRAMAS MOOG")
|
||||
print("=" * 70)
|
||||
|
||||
# Create output directory
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
print(f"Directorio de salida: {OUTPUT_DIR}")
|
||||
|
||||
# Check what's already extracted
|
||||
already_extracted = set()
|
||||
for f in OUTPUT_DIR.iterdir():
|
||||
if f.suffix in ('.jpg', '.png', '.jp2'):
|
||||
already_extracted.add(f.stem)
|
||||
print(f"Ya extraídas: {len(already_extracted)}")
|
||||
|
||||
total = 0
|
||||
for vol_key in volumes:
|
||||
if vol_key not in VOLUMES:
|
||||
print(f"Volumen {vol_key} no reconocido, saltando...")
|
||||
continue
|
||||
count = extract_volume(vol_key, already_extracted)
|
||||
total += count
|
||||
|
||||
print(f"\n{'=' * 70}")
|
||||
print(f"EXTRACCIÓN COMPLETADA: {total} nuevas imágenes")
|
||||
print(f"Total en directorio: {len(list(OUTPUT_DIR.iterdir()))}")
|
||||
print(f"{'=' * 70}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
362
vehicle_database/scripts/import_cartek_catalog.py
Normal file
362
vehicle_database/scripts/import_cartek_catalog.py
Normal file
@@ -0,0 +1,362 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
IMPORTADOR DEL CATÁLOGO CARTEK - FILTROS DE ACEITE
|
||||
Formato: Brand → Model | YearFrom | YearTo | CTK#### | Observations
|
||||
Solo aceite. PDF: /tmp/catalogs/cartek_aceite.pdf
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import re
|
||||
import pypdf
|
||||
from pathlib import Path
|
||||
|
||||
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
|
||||
PDF_PATH = '/tmp/catalogs/cartek_aceite.pdf'
|
||||
|
||||
# Known brand headers in the Cartek catalog
|
||||
BRAND_HEADERS = {
|
||||
'ACURA', 'ALFA ROMEO', 'AM GENERAL', 'AMERICAN MOTORS', 'ASTON MARTIN',
|
||||
'ASUNA', 'AUDI', 'AUSTIN', 'AUSTIN HEALEY', 'AVANTI', 'BAIC', 'BENTLEY',
|
||||
'BERTONE', 'BMW', 'BRICKLIN', 'BUICK', 'CADILLAC', 'CHECKER', 'CHEVROLET',
|
||||
'CHRYSLER', 'DAEWOO', 'DAIHATSU', 'DATSUN', 'DELOREAN', 'DESOTO',
|
||||
'DETOMASO', 'DODGE', 'EAGLE', 'EDSEL', 'EXCALIBUR', 'FAW', 'FIAT', 'FORD',
|
||||
'FREIGHTLINER', 'GEO', 'GMC', 'HILLMAN', 'HONDA', 'HUMMER', 'HYUNDAI',
|
||||
'IC CORPORATION', 'INFINITI', 'INTERNATIONAL', 'ISUZU', 'JAC', 'JAGUAR',
|
||||
'JEEP', 'JENSEN', 'KARMA', 'KIA', 'KUBOTA', 'LAFORZA', 'LAND ROVER',
|
||||
'LEXUS', 'LINCOLN', 'LOTUS', 'MACK', 'MAZDA', 'MERCEDES-BENZ', 'MERCURY',
|
||||
'MERKUR', 'MINI', 'MITSUBISHI', 'MORGAN', 'NISSAN', 'NSU', 'OLDSMOBILE',
|
||||
'OPEL', 'OSHKOSH MOTOR TRUCK CO.', 'PETERBILT', 'PEUGEOT', 'PLYMOUTH',
|
||||
'POLARIS', 'PONTIAC', 'PORSCHE', 'QVALE', 'RAM', 'RENAULT', 'ROLLS ROYCE',
|
||||
'SAAB', 'SATURN', 'SCION', 'SEAT', 'SHELBY', 'SMART', 'SRT',
|
||||
'STERLING TRUCK', 'STUDEBAKER', 'SUBARU', 'SUNBEAM', 'SUZUKI', 'TOYOTA',
|
||||
'TRIUMPH', 'VAM', 'VOLKSWAGEN', 'VOLVO', 'VPG', 'WORKHORSE',
|
||||
'WORKHORSE CUSTOM CHASSIS', 'YAMAHA', 'YUGO',
|
||||
}
|
||||
|
||||
|
||||
def get_db():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def ensure_manufacturer(cursor, name, type_='aftermarket', quality='standard', country=None):
|
||||
cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute(
|
||||
"INSERT INTO manufacturers (name, type, quality_tier, country) VALUES (?, ?, ?, ?)",
|
||||
(name, type_, quality, country))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_brand(cursor, name):
|
||||
cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_model(cursor, brand_id, name):
|
||||
cursor.execute(
|
||||
"SELECT id FROM models WHERE brand_id = ? AND UPPER(name) = UPPER(?)",
|
||||
(brand_id, name))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_year(cursor, year):
|
||||
cursor.execute("SELECT id FROM years WHERE year = ?", (year,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO years (year) VALUES (?)", (year,))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def get_generic_engine(cursor):
|
||||
"""Get or create a generic engine for catalogs without engine data."""
|
||||
cursor.execute("SELECT id FROM engines WHERE name = 'Generic'")
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO engines (name, fuel_type) VALUES ('Generic', 'gasoline')")
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_mye(cursor, model_id, year_id, engine_id=None):
|
||||
if engine_id:
|
||||
cursor.execute(
|
||||
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ? AND engine_id = ?",
|
||||
(model_id, year_id, engine_id))
|
||||
else:
|
||||
cursor.execute(
|
||||
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ?",
|
||||
(model_id, year_id))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
if not engine_id:
|
||||
engine_id = get_generic_engine(cursor)
|
||||
cursor.execute(
|
||||
"INSERT INTO model_year_engine (model_id, year_id, engine_id) VALUES (?, ?, ?)",
|
||||
(model_id, year_id, engine_id))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def get_or_create_part(cursor, part_number, group_id, name, name_es, description):
|
||||
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_number,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id'], False
|
||||
cursor.execute(
|
||||
"INSERT INTO parts (oem_part_number, name, name_es, group_id, description) VALUES (?, ?, ?, ?, ?)",
|
||||
(part_number, name, name_es, group_id, description))
|
||||
return cursor.lastrowid, True
|
||||
|
||||
|
||||
def get_oil_filter_group(cursor):
|
||||
cursor.execute(
|
||||
"SELECT id FROM part_groups WHERE name = 'Oil Filters' LIMIT 1")
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("SELECT id FROM part_categories WHERE name = 'Engine' LIMIT 1")
|
||||
cat = cursor.fetchone()
|
||||
if not cat:
|
||||
return None
|
||||
cursor.execute(
|
||||
"INSERT INTO part_groups (category_id, name, name_es) VALUES (?, 'Oil Filters', 'Filtros de Aceite')",
|
||||
(cat['id'],))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def parse_cartek_pdf(pdf_path):
|
||||
"""Parse the Cartek oil filter catalog PDF."""
|
||||
pdf = pypdf.PdfReader(pdf_path)
|
||||
entries = []
|
||||
current_brand = None
|
||||
|
||||
for page_num in range(4, len(pdf.pages)): # Skip cover/index pages
|
||||
text = pdf.pages[page_num].extract_text()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
lines = text.split('\n')
|
||||
pending_model = None
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Skip header/footer lines
|
||||
if 'Marca/Modelo' in line or 'Observaciones' in line:
|
||||
continue
|
||||
# Skip page numbers
|
||||
if re.match(r'^\d{1,3}$', line):
|
||||
continue
|
||||
|
||||
# Check for brand header
|
||||
if line in BRAND_HEADERS:
|
||||
current_brand = line
|
||||
pending_model = None
|
||||
continue
|
||||
|
||||
if not current_brand:
|
||||
continue
|
||||
|
||||
# Try to parse data line: Model YearFrom YearTo CTK#### Observations
|
||||
match = re.match(
|
||||
r'^(.+?)\s+(\d{4})\s+(\d{4})\s+(CTK\w+)\s+(.*)$', line)
|
||||
if match:
|
||||
model = match.group(1).strip()
|
||||
if pending_model:
|
||||
model = f"{pending_model} {model}"
|
||||
pending_model = None
|
||||
|
||||
year_from = int(match.group(2))
|
||||
year_to = int(match.group(3))
|
||||
part_number = match.group(4).strip()
|
||||
observations = match.group(5).strip()
|
||||
|
||||
for year in range(year_from, year_to + 1):
|
||||
entries.append({
|
||||
'brand': current_brand,
|
||||
'model': model,
|
||||
'year': year,
|
||||
'part_number': part_number,
|
||||
'observations': observations,
|
||||
})
|
||||
else:
|
||||
# Check if this is a continuation model name (e.g., "Avalanche")
|
||||
# followed by a sub-model on the next line
|
||||
if not re.match(r'^\d', line) and not line.startswith('CTK'):
|
||||
# Could be a model name prefix (like "Avalanche" before "1500")
|
||||
# or a sub-brand header we don't recognize
|
||||
pending_model = line
|
||||
else:
|
||||
pending_model = None
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("IMPORTADOR - CATÁLOGO CARTEK FILTROS DE ACEITE")
|
||||
print("=" * 70)
|
||||
|
||||
print(f"\n[1/5] Leyendo PDF: {PDF_PATH}")
|
||||
entries = parse_cartek_pdf(PDF_PATH)
|
||||
print(f" Entradas parseadas: {len(entries)}")
|
||||
|
||||
# Get unique parts and brands
|
||||
unique_parts = set(e['part_number'] for e in entries)
|
||||
unique_brands = set(e['brand'] for e in entries)
|
||||
print(f" Partes únicas: {len(unique_parts)}")
|
||||
print(f" Marcas de vehículos: {len(unique_brands)}")
|
||||
|
||||
conn = get_db()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Create Cartek manufacturer
|
||||
print("\n[2/5] Creando fabricante Cartek...")
|
||||
cartek_mfr_id = ensure_manufacturer(cursor, 'Cartek', 'aftermarket', 'standard', 'Mexico')
|
||||
print(f" Cartek manufacturer_id: {cartek_mfr_id}")
|
||||
|
||||
# Get oil filter group
|
||||
oil_group_id = get_oil_filter_group(cursor)
|
||||
print(f" Oil Filters group_id: {oil_group_id}")
|
||||
|
||||
# Create parts
|
||||
print("\n[3/5] Creando partes de filtros...")
|
||||
part_ids = {}
|
||||
parts_created = 0
|
||||
for pn in sorted(unique_parts):
|
||||
name = f"Oil Filter {pn}"
|
||||
name_es = f"Filtro de Aceite {pn}"
|
||||
part_id, created = get_or_create_part(
|
||||
cursor, pn, oil_group_id, name, name_es, "Cartek Oil Filter")
|
||||
part_ids[pn] = part_id
|
||||
if created:
|
||||
parts_created += 1
|
||||
print(f" Partes creadas: {parts_created}")
|
||||
print(f" Partes existentes: {len(unique_parts) - parts_created}")
|
||||
|
||||
# Create vehicles and fitments
|
||||
print("\n[4/5] Creando vehículos y fitments...")
|
||||
vehicles_created = 0
|
||||
fitments_created = 0
|
||||
mye_cache = {}
|
||||
|
||||
for entry in entries:
|
||||
cache_key = (entry['brand'], entry['model'], entry['year'])
|
||||
if cache_key not in mye_cache:
|
||||
brand_id = ensure_brand(cursor, entry['brand'])
|
||||
model_id = ensure_model(cursor, brand_id, entry['model'])
|
||||
year_id = ensure_year(cursor, entry['year'])
|
||||
|
||||
# Try to find existing MYE (any engine)
|
||||
cursor.execute(
|
||||
"""SELECT mye.id FROM model_year_engine mye
|
||||
JOIN models m ON mye.model_id = m.id
|
||||
JOIN brands b ON m.brand_id = b.id
|
||||
JOIN years y ON mye.year_id = y.id
|
||||
WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?) AND y.year = ?
|
||||
LIMIT 1""",
|
||||
(entry['brand'], entry['model'], entry['year']))
|
||||
existing = cursor.fetchone()
|
||||
|
||||
if existing:
|
||||
mye_cache[cache_key] = existing['id']
|
||||
else:
|
||||
mye_id = ensure_mye(cursor, model_id, year_id)
|
||||
mye_cache[cache_key] = mye_id
|
||||
vehicles_created += 1
|
||||
|
||||
mye_id = mye_cache[cache_key]
|
||||
part_id = part_ids.get(entry['part_number'])
|
||||
if not part_id:
|
||||
continue
|
||||
|
||||
# Check if fitment exists
|
||||
cursor.execute(
|
||||
"SELECT id FROM vehicle_parts WHERE model_year_engine_id = ? AND part_id = ?",
|
||||
(mye_id, part_id))
|
||||
if not cursor.fetchone():
|
||||
notes = f"Catálogo Cartek - ACEITE"
|
||||
if entry['observations'] and entry['observations'] != '-':
|
||||
notes += f" ({entry['observations']})"
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes) VALUES (?, ?, 1, ?)",
|
||||
(mye_id, part_id, notes))
|
||||
fitments_created += 1
|
||||
|
||||
print(f" Vehículos creados: {vehicles_created}")
|
||||
print(f" Fitments creados: {fitments_created}")
|
||||
|
||||
# Create cross-references by matching Cartek parts to existing parts (Gonher, etc.)
|
||||
# that fit the same vehicle
|
||||
print("\n[5/5] Creando referencias cruzadas...")
|
||||
xrefs_created = 0
|
||||
|
||||
for pn, part_id in part_ids.items():
|
||||
# Find other parts in the same group that fit the same vehicles
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT p2.id, p2.oem_part_number
|
||||
FROM vehicle_parts vp1
|
||||
JOIN vehicle_parts vp2 ON vp1.model_year_engine_id = vp2.model_year_engine_id
|
||||
JOIN parts p2 ON vp2.part_id = p2.id
|
||||
WHERE vp1.part_id = ?
|
||||
AND p2.id != ?
|
||||
AND p2.group_id = (SELECT group_id FROM parts WHERE id = ?)
|
||||
AND p2.oem_part_number NOT LIKE 'CTK%'
|
||||
LIMIT 20
|
||||
""", (part_id, part_id, part_id))
|
||||
|
||||
for row in cursor.fetchall():
|
||||
# Add cross-ref from Cartek to other brand
|
||||
cursor.execute(
|
||||
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
|
||||
(part_id, row['oem_part_number']))
|
||||
if not cursor.fetchone():
|
||||
cursor.execute(
|
||||
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'Cartek Catalog')",
|
||||
(part_id, row['oem_part_number']))
|
||||
xrefs_created += 1
|
||||
|
||||
# Add reverse cross-ref
|
||||
cursor.execute(
|
||||
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
|
||||
(row['id'], pn))
|
||||
if not cursor.fetchone():
|
||||
cursor.execute(
|
||||
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'Cartek Catalog')",
|
||||
(row['id'], pn))
|
||||
xrefs_created += 1
|
||||
|
||||
print(f" Cross-refs creadas: {xrefs_created}")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("IMPORTACIÓN CARTEK COMPLETADA")
|
||||
print("=" * 70)
|
||||
print(f"""
|
||||
RESUMEN:
|
||||
- Partes creadas: {parts_created:,}
|
||||
- Vehículos creados: {vehicles_created:,}
|
||||
- Fitments creados: {fitments_created:,}
|
||||
- Cross-refs creadas: {xrefs_created:,}
|
||||
""")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
680
vehicle_database/scripts/import_dar_catalog.py
Normal file
680
vehicle_database/scripts/import_dar_catalog.py
Normal file
@@ -0,0 +1,680 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
IMPORTADOR DEL CATÁLOGO DAR "LÍNEA AZUL" 2020
|
||||
Formato: Brand → Model → AÑO DESCRIPCIÓN SKU #PÁG
|
||||
Pages 27-571 contain vehicle application data.
|
||||
PDF: /tmp/catalogs/suspension/catalogo_azul_2020.pdf
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import re
|
||||
import pypdf
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
|
||||
PDF_PATH = '/tmp/catalogs/suspension/catalogo_azul_2020.pdf'
|
||||
|
||||
# Page range (0-indexed) for vehicle application data
|
||||
START_PAGE = 27
|
||||
END_PAGE = 571
|
||||
|
||||
# Known brand headers in the DAR catalog
|
||||
DAR_BRANDS = {
|
||||
'ACURA', 'ALFA ROMEO', 'AUDI', 'BMW', 'BUICK', 'CADILLAC',
|
||||
'CHEVROLET, GMC', 'CHRYSLER', 'DATSUN', 'DODGE', 'EAGLE',
|
||||
'FIAT', 'FORD, MERCURY', 'GEO', 'HONDA', 'HUMMER', 'HYUNDAI',
|
||||
'INFINITI', 'ISUZU', 'JAGUAR', 'JEEP', 'KIA',
|
||||
'LAND ROVER', 'LEXUS', 'LINCOLN', 'MAZDA', 'MERCEDES-BENZ',
|
||||
'MERKUR', 'MINI', 'MITSUBISHI', 'NISSAN', 'OLDSMOBILE',
|
||||
'OPEL', 'PEUGEOT', 'PLYMOUTH', 'PONTIAC', 'PORSCHE',
|
||||
'RAM', 'RENAULT', 'SAAB', 'SATURN', 'SCION', 'SEAT', 'SMART',
|
||||
'SUBARU', 'SUZUKI', 'TOYOTA', 'TRIUMPH', 'VOLKSWAGEN',
|
||||
'VOLVO', 'VOLVO/MASA',
|
||||
}
|
||||
|
||||
# Year range regex: 2-digit or 4-digit years, or TODOS
|
||||
YEAR_RE = re.compile(r'^(\d{2,4})\s*-\s*(\d{2,4})\b')
|
||||
YEAR_SINGLE_RE = re.compile(r'^(\d{2,4})\b')
|
||||
TODOS_RE = re.compile(r'^TODOS\b', re.IGNORECASE)
|
||||
|
||||
# Line ending with SKU + page ref: ...SKU_TOKEN 3-4_DIGIT_PAGEREF
|
||||
ENTRY_END_RE = re.compile(r'^(.+?)\s+(\S+)\s+(\d{3,4})\s*$')
|
||||
|
||||
# Skip patterns
|
||||
SKIP_PATTERNS = [
|
||||
'Línea Azul',
|
||||
'CATALOGO AZUL',
|
||||
'AÑO DESCRIPCIÓN SKU #PÁG',
|
||||
'AÑO DESCRIPCIÓN SKU',
|
||||
'.indb',
|
||||
]
|
||||
|
||||
|
||||
def get_db():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def ensure_manufacturer(cursor, name, type_='aftermarket', quality='standard', country=None):
|
||||
cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute(
|
||||
"INSERT INTO manufacturers (name, type, quality_tier, country) VALUES (?, ?, ?, ?)",
|
||||
(name, type_, quality, country))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_brand(cursor, name):
|
||||
cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_model(cursor, brand_id, name):
|
||||
cursor.execute(
|
||||
"SELECT id FROM models WHERE brand_id = ? AND UPPER(name) = UPPER(?)",
|
||||
(brand_id, name))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_year(cursor, year):
|
||||
cursor.execute("SELECT id FROM years WHERE year = ?", (year,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO years (year) VALUES (?)", (year,))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def get_generic_engine(cursor):
|
||||
cursor.execute("SELECT id FROM engines WHERE name = 'Generic'")
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO engines (name, fuel_type) VALUES ('Generic', 'gasoline')")
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_mye(cursor, model_id, year_id, engine_id=None):
|
||||
if engine_id:
|
||||
cursor.execute(
|
||||
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ? AND engine_id = ?",
|
||||
(model_id, year_id, engine_id))
|
||||
else:
|
||||
cursor.execute(
|
||||
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ?",
|
||||
(model_id, year_id))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
if not engine_id:
|
||||
engine_id = get_generic_engine(cursor)
|
||||
cursor.execute(
|
||||
"INSERT INTO model_year_engine (model_id, year_id, engine_id) VALUES (?, ?, ?)",
|
||||
(model_id, year_id, engine_id))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def get_or_create_part(cursor, part_number, group_id, name, name_es, description):
|
||||
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_number,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id'], False
|
||||
cursor.execute(
|
||||
"INSERT INTO parts (oem_part_number, name, name_es, group_id, description) VALUES (?, ?, ?, ?, ?)",
|
||||
(part_number, name, name_es, group_id, description))
|
||||
return cursor.lastrowid, True
|
||||
|
||||
|
||||
# --- Group ID lookup cache ---
|
||||
_group_cache = {}
|
||||
|
||||
|
||||
def get_group_id(cursor, name_en):
|
||||
if name_en not in _group_cache:
|
||||
cursor.execute("SELECT id FROM part_groups WHERE name = ?", (name_en,))
|
||||
row = cursor.fetchone()
|
||||
_group_cache[name_en] = row['id'] if row else None
|
||||
return _group_cache[name_en]
|
||||
|
||||
|
||||
def classify_description(cursor, desc):
|
||||
"""Map DAR description text to a DB group_id."""
|
||||
d = desc.upper()
|
||||
|
||||
# Amortiguadores (Shocks)
|
||||
if 'AMORTIGUADOR' in d and 'BASE' not in d:
|
||||
if 'CAJUELA' in d or 'COFRE' in d or 'VIDRIO' in d:
|
||||
return get_group_id(cursor, 'Struts') # trunk/hood/glass struts
|
||||
if 'DIRECCIÓN' in d or 'DIRECCION' in d:
|
||||
return get_group_id(cursor, 'Steering Dampers')
|
||||
return get_group_id(cursor, 'Shocks')
|
||||
|
||||
# Base amortiguador (Strut Mounts)
|
||||
if 'BASE AMORTIGUADOR' in d:
|
||||
return get_group_id(cursor, 'Strut Mounts')
|
||||
|
||||
# Balero (Bearings)
|
||||
if 'BALERO' in d:
|
||||
return get_group_id(cursor, 'Wheel Bearings')
|
||||
|
||||
# Maza (Wheel Hubs)
|
||||
if 'MAZA' in d:
|
||||
return get_group_id(cursor, 'Wheel Hubs')
|
||||
|
||||
# Soporte de Motor / Transmisión (Mounts)
|
||||
if 'SOPORTE DE MOTOR' in d or 'SOPORTE MOTOR' in d:
|
||||
return get_group_id(cursor, 'Engine Mounts')
|
||||
if 'SOPORTE DE TRANSMIS' in d or 'SOPORTE TRANSMIS' in d:
|
||||
return get_group_id(cursor, 'Transmission Mounts')
|
||||
if 'SOPORTE' in d and 'AMORTIGUADOR' in d:
|
||||
return get_group_id(cursor, 'Strut Mounts')
|
||||
if 'SOPORTE BRAZO' in d:
|
||||
return get_group_id(cursor, 'Idler Arms')
|
||||
|
||||
# Rotula (Ball Joint)
|
||||
if 'RÓTULA' in d or 'ROTULA' in d:
|
||||
return get_group_id(cursor, 'Ball Joints')
|
||||
|
||||
# Terminal exterior / dirección (Tie Rod Ends)
|
||||
if 'TERMINAL EXTERIOR' in d or 'TERMINAL DIREC' in d:
|
||||
return get_group_id(cursor, 'Tie Rod Ends')
|
||||
|
||||
# Terminal interior (Inner Tie Rods)
|
||||
if 'TERMINAL INTERIOR' in d:
|
||||
return get_group_id(cursor, 'Inner Tie Rods')
|
||||
|
||||
# Horquilla (Control Arms)
|
||||
if 'HORQUILLA' in d:
|
||||
return get_group_id(cursor, 'Control Arms')
|
||||
|
||||
# Buje de varilla estabilizadora
|
||||
if 'GOMA' in d and 'ESTABILIZADORA' in d:
|
||||
return get_group_id(cursor, 'Sway Bar Bushings')
|
||||
if 'BUJE' in d and 'ESTABILIZADORA' in d:
|
||||
return get_group_id(cursor, 'Sway Bar Bushings')
|
||||
|
||||
# Tornillo estabilizador (Sway Bar Links)
|
||||
if 'TORNILLO ESTABILIZADOR' in d:
|
||||
return get_group_id(cursor, 'Sway Bar Links')
|
||||
|
||||
# Buje (Bushings)
|
||||
if 'BUJE' in d:
|
||||
return get_group_id(cursor, 'Bushings')
|
||||
|
||||
# Resorte (Springs)
|
||||
if 'RESORTE' in d:
|
||||
return get_group_id(cursor, 'Coil Springs')
|
||||
|
||||
# Brazo auxiliar (Idler Arm)
|
||||
if 'BRAZO AUXILIAR' in d:
|
||||
return get_group_id(cursor, 'Idler Arms')
|
||||
|
||||
# Brazo Pitman
|
||||
if 'BRAZO PITMAN' in d or 'PITMAN' in d:
|
||||
return get_group_id(cursor, 'Pitman Arms')
|
||||
|
||||
# Varilla / Barra central (Center Links)
|
||||
if 'BARRA CENTRAL' in d or 'VARILLA CENTRAL' in d:
|
||||
return get_group_id(cursor, 'Center Links')
|
||||
|
||||
# Varilla lateral / Barra de arrastre (Drag Links)
|
||||
if 'VARILLA' in d:
|
||||
return get_group_id(cursor, 'Drag Links')
|
||||
|
||||
# Cremallera (Steering Rack)
|
||||
if 'CREMALLERA' in d:
|
||||
return get_group_id(cursor, 'Steering Racks')
|
||||
|
||||
# Bomba dirección (Power Steering Pump)
|
||||
if 'BOMBA DIREC' in d:
|
||||
return get_group_id(cursor, 'Power Steering Pumps')
|
||||
|
||||
# Cople dirección (Steering Gearbox / Coupling)
|
||||
if 'COPLE DIREC' in d:
|
||||
return get_group_id(cursor, 'Steering Gearboxes')
|
||||
|
||||
# Flector dirección
|
||||
if 'FLECTOR' in d:
|
||||
return get_group_id(cursor, 'Steering Gearboxes')
|
||||
|
||||
# Nudo dirección (Steering Knuckle)
|
||||
if 'NUDO DIREC' in d:
|
||||
return get_group_id(cursor, 'Steering Knuckles')
|
||||
|
||||
# Excéntrico (Camber/Caster)
|
||||
if 'EXCÉNTRICO' in d or 'EXCENTRICO' in d or 'CAMBER' in d:
|
||||
return get_group_id(cursor, 'Camber/Caster Kits')
|
||||
|
||||
# Junta CV
|
||||
if 'JUNTA' in d and ('RUEDA' in d or 'CAJA' in d):
|
||||
return get_group_id(cursor, 'CV Joints')
|
||||
|
||||
# Macheta / Flecha
|
||||
if 'MACHETA' in d or 'FLECHA' in d:
|
||||
return get_group_id(cursor, 'CV Axles')
|
||||
|
||||
# Tirante (Trailing Arm)
|
||||
if 'TIRANTE' in d:
|
||||
return get_group_id(cursor, 'Trailing Arms')
|
||||
|
||||
# Barra horquilla / Barra torsión
|
||||
if 'BARRA' in d and 'TORSIÓN' in d:
|
||||
return get_group_id(cursor, 'Torsion Bars')
|
||||
if 'BARRA' in d and 'HORQUILLA' in d:
|
||||
return get_group_id(cursor, 'Control Arms')
|
||||
|
||||
# Default: Ball Joints
|
||||
return get_group_id(cursor, 'Ball Joints')
|
||||
|
||||
|
||||
# --- Part type name from description ---
|
||||
def part_names_from_desc(desc, sku):
|
||||
"""Generate English and Spanish names from DAR description."""
|
||||
name_es = f"{desc} {sku}"
|
||||
# Simplified English name
|
||||
name_en = desc
|
||||
for es, en in [
|
||||
('AMORTIGUADOR DELANTERO', 'Front Shock'),
|
||||
('AMORTIGUADOR TRASERO', 'Rear Shock'),
|
||||
('AMORTIGUADOR', 'Shock Absorber'),
|
||||
('BASE AMORTIGUADOR', 'Strut Mount'),
|
||||
('BALERO DOBLE', 'Double Bearing'),
|
||||
('BALERO CONICO', 'Tapered Bearing'),
|
||||
('BALERO', 'Wheel Bearing'),
|
||||
('BOMBA DIREC', 'Power Steering Pump'),
|
||||
('BRAZO AUXILIAR', 'Idler Arm'),
|
||||
('BRAZO PITMAN', 'Pitman Arm'),
|
||||
('BUJE', 'Bushing'),
|
||||
('CREMALLERA', 'Steering Rack'),
|
||||
('COPLE DIREC', 'Steering Coupler'),
|
||||
('FLECTOR', 'Steering Flex Disc'),
|
||||
('GOMA VARILLA ESTABILIZADORA', 'Sway Bar Bushing'),
|
||||
('HORQUILLA INFERIOR', 'Lower Control Arm'),
|
||||
('HORQUILLA SUPERIOR', 'Upper Control Arm'),
|
||||
('HORQUILLA', 'Control Arm'),
|
||||
('MAZA DELANTERA', 'Front Wheel Hub'),
|
||||
('MAZA TRASERA', 'Rear Wheel Hub'),
|
||||
('MAZA', 'Wheel Hub'),
|
||||
('RESORTE DELANTERO', 'Front Coil Spring'),
|
||||
('RESORTE TRASERO', 'Rear Coil Spring'),
|
||||
('RESORTE', 'Coil Spring'),
|
||||
('RÓTULA INFERIOR', 'Lower Ball Joint'),
|
||||
('RÓTULA SUPERIOR', 'Upper Ball Joint'),
|
||||
('ROTULA INFERIOR', 'Lower Ball Joint'),
|
||||
('ROTULA SUPERIOR', 'Upper Ball Joint'),
|
||||
('RÓTULA', 'Ball Joint'),
|
||||
('ROTULA', 'Ball Joint'),
|
||||
('SOPORTE DE MOTOR', 'Engine Mount'),
|
||||
('SOPORTE DE TRANSMIS', 'Transmission Mount'),
|
||||
('TERMINAL EXTERIOR', 'Outer Tie Rod End'),
|
||||
('TERMINAL INTERIOR', 'Inner Tie Rod'),
|
||||
('TERMINAL DIREC', 'Tie Rod End'),
|
||||
('TIRANTE', 'Trailing Arm'),
|
||||
('TORNILLO ESTABILIZADOR', 'Sway Bar Link'),
|
||||
('VARILLA', 'Drag Link'),
|
||||
('EXCÉNTRICO', 'Camber Kit'),
|
||||
]:
|
||||
if es in desc.upper():
|
||||
name_en = f"{en} {sku}"
|
||||
break
|
||||
else:
|
||||
name_en = f"{desc} {sku}"
|
||||
return name_en, name_es
|
||||
|
||||
|
||||
def convert_year(yy):
|
||||
"""Convert 2-digit year to 4-digit. 00-30 → 2000-2030, 31-99 → 1931-1999."""
|
||||
y = int(yy)
|
||||
if y >= 100:
|
||||
return y # already 4-digit
|
||||
if y <= 30:
|
||||
return 2000 + y
|
||||
return 1900 + y
|
||||
|
||||
|
||||
def is_skip_line(line):
|
||||
for pat in SKIP_PATTERNS:
|
||||
if pat in line:
|
||||
return True
|
||||
# Pure page numbers
|
||||
if re.match(r'^\d{1,3}$', line.strip()):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_brand_line(line):
|
||||
"""Check if line is a brand header."""
|
||||
stripped = line.strip()
|
||||
if stripped in DAR_BRANDS:
|
||||
return True
|
||||
# Some brands have extra whitespace or minor variations
|
||||
for b in DAR_BRANDS:
|
||||
if stripped.upper() == b:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def parse_dar_pdf(pdf_path):
|
||||
"""Parse the DAR Catalogo Azul vehicle application pages."""
|
||||
pdf = pypdf.PdfReader(pdf_path)
|
||||
entries = []
|
||||
current_brands = [] # List because some pages have "CHEVROLET, GMC"
|
||||
current_model = None
|
||||
|
||||
# Accumulator for multi-line entries
|
||||
entry_year_from = None
|
||||
entry_year_to = None
|
||||
entry_lines = []
|
||||
|
||||
def flush_entry():
|
||||
nonlocal entry_year_from, entry_year_to, entry_lines
|
||||
if not entry_lines or entry_year_from is None:
|
||||
entry_lines = []
|
||||
entry_year_from = None
|
||||
entry_year_to = None
|
||||
return
|
||||
|
||||
# Join accumulated lines
|
||||
full_text = ' '.join(entry_lines)
|
||||
|
||||
# Try to extract SKU and page ref from the end
|
||||
m = ENTRY_END_RE.match(full_text)
|
||||
if m:
|
||||
desc_text = m.group(1).strip()
|
||||
sku = m.group(2).strip()
|
||||
# page_ref = m.group(3) # not used for import
|
||||
|
||||
if sku and desc_text and current_model:
|
||||
for brand_name in current_brands:
|
||||
for year in range(entry_year_from, entry_year_to + 1):
|
||||
entries.append({
|
||||
'brand': brand_name,
|
||||
'model': current_model,
|
||||
'year': year,
|
||||
'description': desc_text,
|
||||
'sku': sku,
|
||||
})
|
||||
|
||||
entry_lines = []
|
||||
entry_year_from = None
|
||||
entry_year_to = None
|
||||
|
||||
for page_num in range(START_PAGE, min(END_PAGE + 1, len(pdf.pages))):
|
||||
text = pdf.pages[page_num].extract_text()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if is_skip_line(line):
|
||||
continue
|
||||
|
||||
# Check for brand header
|
||||
if is_brand_line(line):
|
||||
flush_entry()
|
||||
# Split combined brands like "CHEVROLET, GMC"
|
||||
current_brands = [b.strip() for b in line.split(',')]
|
||||
current_model = None
|
||||
continue
|
||||
|
||||
# Check for model line
|
||||
# A model line is: not starting with a digit, not a data entry,
|
||||
# not a brand, and we already have a brand
|
||||
if not current_brands:
|
||||
continue
|
||||
|
||||
# Check if this line starts with a year range
|
||||
m_year = YEAR_RE.match(line)
|
||||
m_single = YEAR_SINGLE_RE.match(line) if not m_year else None
|
||||
m_todos = TODOS_RE.match(line)
|
||||
|
||||
if m_year or m_todos:
|
||||
# Flush previous entry
|
||||
flush_entry()
|
||||
|
||||
if m_todos:
|
||||
# "TODOS" = all years, use a reasonable range
|
||||
entry_year_from = 1960
|
||||
entry_year_to = 2020
|
||||
rest = line[m_todos.end():].strip()
|
||||
else:
|
||||
y1 = convert_year(m_year.group(1))
|
||||
y2 = convert_year(m_year.group(2))
|
||||
entry_year_from = min(y1, y2)
|
||||
entry_year_to = max(y1, y2)
|
||||
rest = line[m_year.end():].strip()
|
||||
|
||||
if rest:
|
||||
entry_lines.append(rest)
|
||||
continue
|
||||
|
||||
# If we're accumulating an entry, add continuation line
|
||||
if entry_year_from is not None:
|
||||
entry_lines.append(line)
|
||||
continue
|
||||
|
||||
# Check if it's a single year + data (rare)
|
||||
if m_single and len(line) > 4:
|
||||
y_val = int(m_single.group(1))
|
||||
# Only treat as year if it's a plausible 2-digit year (not a 4+ digit number)
|
||||
if y_val < 100 and len(m_single.group(1)) == 2:
|
||||
flush_entry()
|
||||
entry_year_from = convert_year(m_single.group(1))
|
||||
entry_year_to = entry_year_from
|
||||
rest = line[m_single.end():].strip()
|
||||
if rest:
|
||||
entry_lines.append(rest)
|
||||
continue
|
||||
|
||||
# If we get here, it's likely a model name
|
||||
# Strip "(cont)" suffix
|
||||
model_name = re.sub(r'\s*\(cont\)\s*$', '', line, flags=re.IGNORECASE).strip()
|
||||
if model_name and not model_name.startswith('AÑO') and len(model_name) > 1:
|
||||
flush_entry()
|
||||
current_model = model_name
|
||||
|
||||
# Flush last entry
|
||||
flush_entry()
|
||||
return entries
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("IMPORTADOR - CATÁLOGO DAR 'LÍNEA AZUL' 2020")
|
||||
print("=" * 70)
|
||||
|
||||
print(f"\n[1/5] Leyendo PDF: {PDF_PATH}")
|
||||
entries = parse_dar_pdf(PDF_PATH)
|
||||
print(f" Entradas parseadas: {len(entries):,}")
|
||||
|
||||
unique_skus = set(e['sku'] for e in entries)
|
||||
unique_brands = set(e['brand'] for e in entries)
|
||||
unique_models = set((e['brand'], e['model']) for e in entries)
|
||||
print(f" SKUs únicos: {len(unique_skus):,}")
|
||||
print(f" Marcas de vehículos: {len(unique_brands):,}")
|
||||
print(f" Modelos únicos: {len(unique_models):,}")
|
||||
|
||||
# Show sample entries
|
||||
print("\n Primeras 5 entradas:")
|
||||
for e in entries[:5]:
|
||||
print(f" {e['brand']} {e['model']} {e['year']} | {e['description']} | {e['sku']}")
|
||||
|
||||
conn = get_db()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Create DAR manufacturer
|
||||
print("\n[2/5] Creando fabricante DAR...")
|
||||
dar_mfr_id = ensure_manufacturer(cursor, 'DAR', 'aftermarket', 'standard', 'Mexico')
|
||||
print(f" DAR manufacturer_id: {dar_mfr_id}")
|
||||
|
||||
# Create parts
|
||||
print("\n[3/5] Creando partes...")
|
||||
part_ids = {}
|
||||
parts_created = 0
|
||||
for sku in sorted(unique_skus):
|
||||
# Find one entry with this SKU to get description
|
||||
sample = next(e for e in entries if e['sku'] == sku)
|
||||
group_id = classify_description(cursor, sample['description'])
|
||||
name_en, name_es = part_names_from_desc(sample['description'], sku)
|
||||
part_id, created = get_or_create_part(
|
||||
cursor, sku, group_id, name_en, name_es, 'DAR Línea Azul')
|
||||
part_ids[sku] = part_id
|
||||
if created:
|
||||
parts_created += 1
|
||||
|
||||
print(f" Partes creadas: {parts_created:,}")
|
||||
print(f" Partes existentes: {len(unique_skus) - parts_created:,}")
|
||||
|
||||
# Create aftermarket entries for DAR-specific parts
|
||||
print(" Creando aftermarket entries...")
|
||||
am_created = 0
|
||||
for sku in sorted(unique_skus):
|
||||
part_id = part_ids.get(sku)
|
||||
if not part_id:
|
||||
continue
|
||||
cursor.execute(
|
||||
"SELECT id FROM aftermarket_parts WHERE manufacturer_id = ? AND part_number = ?",
|
||||
(dar_mfr_id, sku))
|
||||
if not cursor.fetchone():
|
||||
sample = next(e for e in entries if e['sku'] == sku)
|
||||
name_en, name_es = part_names_from_desc(sample['description'], sku)
|
||||
cursor.execute(
|
||||
"INSERT INTO aftermarket_parts (oem_part_id, manufacturer_id, part_number, name, name_es) VALUES (?, ?, ?, ?, ?)",
|
||||
(part_id, dar_mfr_id, sku, name_en, name_es))
|
||||
am_created += 1
|
||||
print(f" Aftermarket entries creadas: {am_created:,}")
|
||||
|
||||
# Create vehicles and fitments
|
||||
print("\n[4/5] Creando vehículos y fitments...")
|
||||
vehicles_created = 0
|
||||
fitments_created = 0
|
||||
mye_cache = {}
|
||||
|
||||
for i, entry in enumerate(entries):
|
||||
if i % 10000 == 0 and i > 0:
|
||||
print(f" Procesando {i:,}/{len(entries):,}...")
|
||||
|
||||
cache_key = (entry['brand'], entry['model'], entry['year'])
|
||||
if cache_key not in mye_cache:
|
||||
brand_id = ensure_brand(cursor, entry['brand'])
|
||||
model_id = ensure_model(cursor, brand_id, entry['model'])
|
||||
year_id = ensure_year(cursor, entry['year'])
|
||||
|
||||
# Try to find existing MYE
|
||||
cursor.execute(
|
||||
"""SELECT mye.id FROM model_year_engine mye
|
||||
JOIN models m ON mye.model_id = m.id
|
||||
JOIN brands b ON m.brand_id = b.id
|
||||
JOIN years y ON mye.year_id = y.id
|
||||
WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?) AND y.year = ?
|
||||
LIMIT 1""",
|
||||
(entry['brand'], entry['model'], entry['year']))
|
||||
existing = cursor.fetchone()
|
||||
|
||||
if existing:
|
||||
mye_cache[cache_key] = existing['id']
|
||||
else:
|
||||
mye_id = ensure_mye(cursor, model_id, year_id)
|
||||
mye_cache[cache_key] = mye_id
|
||||
vehicles_created += 1
|
||||
|
||||
mye_id = mye_cache[cache_key]
|
||||
part_id = part_ids.get(entry['sku'])
|
||||
if not part_id:
|
||||
continue
|
||||
|
||||
# Check if fitment exists
|
||||
cursor.execute(
|
||||
"SELECT id FROM vehicle_parts WHERE model_year_engine_id = ? AND part_id = ?",
|
||||
(mye_id, part_id))
|
||||
if not cursor.fetchone():
|
||||
notes = f"Catálogo DAR Línea Azul 2020"
|
||||
if entry.get('description'):
|
||||
notes += f" - {entry['description']}"
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes) VALUES (?, ?, 1, ?)",
|
||||
(mye_id, part_id, notes))
|
||||
fitments_created += 1
|
||||
|
||||
print(f" Vehículos creados: {vehicles_created:,}")
|
||||
print(f" Fitments creados: {fitments_created:,}")
|
||||
|
||||
# Cross-references: match DAR parts to MOOG parts on same vehicles
|
||||
print("\n[5/5] Creando referencias cruzadas...")
|
||||
xrefs_created = 0
|
||||
|
||||
for sku, part_id in part_ids.items():
|
||||
# Find other parts (different brand) in same group fitting same vehicles
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT p2.id, p2.oem_part_number
|
||||
FROM vehicle_parts vp1
|
||||
JOIN vehicle_parts vp2 ON vp1.model_year_engine_id = vp2.model_year_engine_id
|
||||
JOIN parts p2 ON vp2.part_id = p2.id
|
||||
WHERE vp1.part_id = ?
|
||||
AND p2.id != ?
|
||||
AND p2.group_id = (SELECT group_id FROM parts WHERE id = ?)
|
||||
AND p2.oem_part_number != ?
|
||||
LIMIT 30
|
||||
""", (part_id, part_id, part_id, sku))
|
||||
|
||||
for row in cursor.fetchall():
|
||||
other_pn = row['oem_part_number']
|
||||
# Skip if same part number prefix pattern (same brand)
|
||||
if other_pn[:3] == sku[:3]:
|
||||
continue
|
||||
|
||||
# A -> B
|
||||
cursor.execute(
|
||||
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
|
||||
(part_id, other_pn))
|
||||
if not cursor.fetchone():
|
||||
cursor.execute(
|
||||
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'DAR Catalog')",
|
||||
(part_id, other_pn))
|
||||
xrefs_created += 1
|
||||
|
||||
# B -> A
|
||||
cursor.execute(
|
||||
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
|
||||
(row['id'], sku))
|
||||
if not cursor.fetchone():
|
||||
cursor.execute(
|
||||
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'DAR Catalog')",
|
||||
(row['id'], sku))
|
||||
xrefs_created += 1
|
||||
|
||||
print(f" Cross-refs creadas: {xrefs_created:,}")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("IMPORTACIÓN DAR COMPLETADA")
|
||||
print("=" * 70)
|
||||
print(f"""
|
||||
RESUMEN:
|
||||
- Partes creadas: {parts_created:,}
|
||||
- Aftermarket entries: {am_created:,}
|
||||
- Vehículos creados: {vehicles_created:,}
|
||||
- Fitments creados: {fitments_created:,}
|
||||
- Cross-refs creadas: {xrefs_created:,}
|
||||
""")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
548
vehicle_database/scripts/import_fram_catalog.py
Normal file
548
vehicle_database/scripts/import_fram_catalog.py
Normal file
@@ -0,0 +1,548 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
IMPORTADOR DEL CATÁLOGO FRAM 2017
|
||||
- Sección de vehículos livianos (páginas 3-87): Brand → Model + Motor + Dates + Filters
|
||||
- Sección de equivalencias (páginas 149-199): Competitor → FRAM mappings
|
||||
- Filtros: PH/CH = Aceite, CA/PA = Aire, G/P/PS = Combustible, CF/CFA = Cabina
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import re
|
||||
import pypdf
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
|
||||
PDF_PATH = '/tmp/catalogs/fram_2017.pdf'
|
||||
|
||||
# Filter type classification by part number prefix
|
||||
FILTER_PREFIXES = {
|
||||
'PH': ('Oil Filters', 'Oil Filter', 'Filtro de Aceite'),
|
||||
'CH': ('Oil Filters', 'Oil Filter Cartridge', 'Filtro de Aceite Cartucho'),
|
||||
'CA': ('Air Filters', 'Air Filter', 'Filtro de Aire'),
|
||||
'PA': ('Air Filters', 'Air Filter', 'Filtro de Aire'),
|
||||
'G': ('Fuel Filters', 'Fuel Filter', 'Filtro de Combustible'),
|
||||
'P': ('Fuel Filters', 'Fuel Filter', 'Filtro de Combustible'),
|
||||
'PS': ('Fuel Filters', 'Fuel Filter', 'Filtro de Combustible'),
|
||||
'CF': ('Cabin Air Filters', 'Cabin Air Filter', 'Filtro de Cabina'),
|
||||
'CFA': ('Cabin Air Filters', 'Cabin Air Filter', 'Filtro de Cabina'),
|
||||
}
|
||||
|
||||
# FRAM part number pattern
|
||||
FRAM_PART_RE = re.compile(r'\b(CFA?\d[\w-]*|PH\d[\w-]*|CH\d[\w-]*|CA\d[\w-]*|PA\d[\w-]*|PS\d[\w-]*|G\d[\w-]*|P\d[\w-]*)\b')
|
||||
|
||||
# Known brands that appear as headers in the FRAM catalog
|
||||
KNOWN_BRANDS = {
|
||||
'ACURA', 'ALEKO', 'ALFA ROMEO', 'ASIA MOTORS', 'ASTON MARTIN', 'AUDI',
|
||||
'BEDFORD', 'BENTLEY', 'BMW', 'BUICK', 'CADILLAC', 'CHANA', 'CHERY',
|
||||
'CHEVROLET', 'CHRYSLER', 'CITROEN', 'DAEWOO', 'DACIA', 'DAIHATSU',
|
||||
'DODGE', 'EAGLE', 'FAW', 'FIAT', 'FORD', 'GALLOPER', 'GEO', 'GEELY',
|
||||
'GREAT WALL', 'HONDA', 'HUMMER', 'HYUNDAI', 'INFINITI', 'ISUZU',
|
||||
'IVECO', 'JAC', 'JAGUAR', 'JEEP', 'KIA', 'LADA', 'LANCIA', 'LAND ROVER',
|
||||
'LEXUS', 'LIFAN', 'LINCOLN', 'LOTUS', 'MAHINDRA', 'MASERATI', 'MAZDA',
|
||||
'MERCEDES BENZ', 'MERCURY', 'MG', 'MINI', 'MITSUBISHI', 'NISSAN',
|
||||
'OLDSMOBILE', 'OPEL', 'PEUGEOT', 'PLYMOUTH', 'PONTIAC', 'PORSCHE',
|
||||
'RAM', 'RENAULT', 'ROVER', 'SAAB', 'SAMSUNG', 'SATURN', 'SCION',
|
||||
'SEAT', 'SKODA', 'SMART', 'SSANGYONG', 'SUBARU', 'SUZUKI', 'TATA',
|
||||
'TOYOTA', 'TRIUMPH', 'VAUXHALL', 'VOLKSWAGEN', 'VOLVO',
|
||||
}
|
||||
|
||||
|
||||
def get_db():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def ensure_manufacturer(cursor, name, type_='aftermarket', quality='standard', country=None):
|
||||
cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute(
|
||||
"INSERT INTO manufacturers (name, type, quality_tier, country) VALUES (?, ?, ?, ?)",
|
||||
(name, type_, quality, country))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_brand(cursor, name):
|
||||
cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_model(cursor, brand_id, name):
|
||||
cursor.execute(
|
||||
"SELECT id FROM models WHERE brand_id = ? AND UPPER(name) = UPPER(?)",
|
||||
(brand_id, name))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_year(cursor, year):
|
||||
cursor.execute("SELECT id FROM years WHERE year = ?", (year,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO years (year) VALUES (?)", (year,))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_engine(cursor, name):
|
||||
cursor.execute("SELECT id FROM engines WHERE name = ?", (name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
displacement = None
|
||||
cylinders = None
|
||||
fuel_type = 'gasoline'
|
||||
m = re.search(r'(\d+)cc', name)
|
||||
if m:
|
||||
displacement = int(m.group(1))
|
||||
if 'diesel' in name.lower() or 'td' in name.lower() or 'tdi' in name.lower() or 'jtd' in name.lower():
|
||||
fuel_type = 'diesel'
|
||||
cursor.execute(
|
||||
"INSERT INTO engines (name, displacement_cc, cylinders, fuel_type) VALUES (?, ?, ?, ?)",
|
||||
(name, displacement, cylinders, fuel_type))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def get_generic_engine(cursor):
|
||||
cursor.execute("SELECT id FROM engines WHERE name = 'Generic'")
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO engines (name, fuel_type) VALUES ('Generic', 'gasoline')")
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_mye(cursor, model_id, year_id, engine_id=None):
|
||||
if engine_id:
|
||||
cursor.execute(
|
||||
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ? AND engine_id = ?",
|
||||
(model_id, year_id, engine_id))
|
||||
else:
|
||||
cursor.execute(
|
||||
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ?",
|
||||
(model_id, year_id))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
if not engine_id:
|
||||
engine_id = get_generic_engine(cursor)
|
||||
cursor.execute(
|
||||
"INSERT INTO model_year_engine (model_id, year_id, engine_id) VALUES (?, ?, ?)",
|
||||
(model_id, year_id, engine_id))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def classify_filter(part_number):
|
||||
"""Classify FRAM filter by part number prefix and return (group_name, name_en, name_es)."""
|
||||
pn_upper = part_number.upper()
|
||||
# Check longer prefixes first
|
||||
for prefix in ['CFA', 'CF', 'PS', 'PH', 'CH', 'CA', 'PA']:
|
||||
if pn_upper.startswith(prefix):
|
||||
return FILTER_PREFIXES[prefix]
|
||||
# Single letter prefixes
|
||||
if pn_upper.startswith('G') and re.match(r'^G\d', pn_upper):
|
||||
return FILTER_PREFIXES['G']
|
||||
if pn_upper.startswith('P') and re.match(r'^P\d', pn_upper):
|
||||
return FILTER_PREFIXES['P']
|
||||
return None
|
||||
|
||||
|
||||
def get_or_create_group(cursor, group_name):
|
||||
"""Get group ID by name."""
|
||||
cursor.execute("SELECT id FROM part_groups WHERE name = ?", (group_name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
# Find category
|
||||
cat_map = {
|
||||
'Oil Filters': 'Engine', 'Air Filters': 'Engine',
|
||||
'Fuel Filters': 'Fuel & Air', 'Cabin Air Filters': 'Heat & Air Conditioning',
|
||||
}
|
||||
cat_name = cat_map.get(group_name, 'Engine')
|
||||
cursor.execute("SELECT id FROM part_categories WHERE name = ?", (cat_name,))
|
||||
cat = cursor.fetchone()
|
||||
if not cat:
|
||||
return None
|
||||
cursor.execute(
|
||||
"INSERT INTO part_groups (category_id, name) VALUES (?, ?)",
|
||||
(cat['id'], group_name))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def get_or_create_part(cursor, part_number, group_id, name, name_es, description):
|
||||
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_number,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id'], False
|
||||
cursor.execute(
|
||||
"INSERT INTO parts (oem_part_number, name, name_es, group_id, description) VALUES (?, ?, ?, ?, ?)",
|
||||
(part_number, name, name_es, group_id, description))
|
||||
return cursor.lastrowid, True
|
||||
|
||||
|
||||
def parse_date_range(date_str):
|
||||
"""Parse FRAM date range like (03/88 - 09/97) into year range."""
|
||||
m = re.match(r'\(?\s*(\d{2})/(\d{2,4})\s*-\s*(\d{2})/(\d{2,4})\s*\)?', date_str)
|
||||
if m:
|
||||
y1 = int(m.group(2))
|
||||
y2 = int(m.group(4))
|
||||
if y1 < 100:
|
||||
y1 += 2000 if y1 < 50 else 1900
|
||||
if y2 < 100:
|
||||
y2 += 2000 if y2 < 50 else 1900
|
||||
return list(range(y1, y2 + 1))
|
||||
# Try single year
|
||||
m = re.match(r'\(?\s*(\d{2})/(\d{2,4})\s*-?\s*\)?', date_str)
|
||||
if m:
|
||||
y = int(m.group(2))
|
||||
if y < 100:
|
||||
y += 2000 if y < 50 else 1900
|
||||
return [y]
|
||||
return []
|
||||
|
||||
|
||||
def extract_fram_parts(text):
|
||||
"""Extract FRAM part numbers from a text string."""
|
||||
return FRAM_PART_RE.findall(text)
|
||||
|
||||
|
||||
def parse_vehicle_entries(pdf):
|
||||
"""Parse vehicle entries from FRAM catalog (light vehicles section)."""
|
||||
entries = []
|
||||
current_brand = None
|
||||
current_model_group = None
|
||||
|
||||
for page_num in range(2, 87): # Pages 3-87 (0-indexed)
|
||||
text = pdf.pages[page_num].extract_text()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
lines = text.split('\n')
|
||||
prev_line = ""
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Skip headers/footers
|
||||
if line.startswith('LIVIANOS') or line.startswith('PESADOS'):
|
||||
continue
|
||||
if re.match(r'^\d{1,3}$', line):
|
||||
continue
|
||||
if 'MARCA/CATEGORÍA' in line:
|
||||
continue
|
||||
# Skip dimension notes
|
||||
if re.match(r'^H1=', line) or line.startswith('Parcial') or line.startswith('Panel') or line.startswith('Redondo'):
|
||||
continue
|
||||
if line.startswith('C/C.') or line.startswith('Unidad Sellada'):
|
||||
continue
|
||||
|
||||
# Brand detection
|
||||
if line in KNOWN_BRANDS:
|
||||
current_brand = line
|
||||
current_model_group = None
|
||||
continue
|
||||
|
||||
# Check if line is a brand listed with other brands (e.g., "Acura - Aleko - Alfa Romeo")
|
||||
if ' - ' in line and all(b.strip() in KNOWN_BRANDS for b in line.split(' - ') if b.strip()):
|
||||
continue
|
||||
|
||||
if not current_brand:
|
||||
continue
|
||||
|
||||
# Try to extract data from line
|
||||
# Format: [MODEL_GROUP] description - Mot.CODE-DISPcc-Powerkw/hp (date_from - date_to) FILTER_CODES
|
||||
|
||||
# Check if this is a continuation of previous line
|
||||
if prev_line and not re.match(r'^[A-Z]', line) and not FRAM_PART_RE.search(line):
|
||||
prev_line = ""
|
||||
continue
|
||||
|
||||
# Extract date range and parts
|
||||
date_match = re.search(r'\((\d{2}/\d{2,4}\s*-\s*(?:\d{2}/\d{2,4}\s*)?)\)', line)
|
||||
parts = extract_fram_parts(line)
|
||||
|
||||
if parts:
|
||||
years = []
|
||||
if date_match:
|
||||
years = parse_date_range(date_match.group(1))
|
||||
|
||||
# Extract model name
|
||||
model_name = None
|
||||
# Check if line starts with an uppercase model group
|
||||
model_match = re.match(r'^([A-Z][A-Z0-9\s/\-]+?)\s+\S', line)
|
||||
if model_match:
|
||||
potential_model = model_match.group(1).strip()
|
||||
# If it looks like a model group (all caps, short)
|
||||
if potential_model.isupper() and len(potential_model) < 30:
|
||||
current_model_group = potential_model
|
||||
model_name = current_model_group
|
||||
else:
|
||||
model_name = current_model_group or "Unknown"
|
||||
else:
|
||||
model_name = current_model_group or "Unknown"
|
||||
|
||||
if not years:
|
||||
years = [2017] # Default to catalog year
|
||||
|
||||
for year in years:
|
||||
for part in parts:
|
||||
info = classify_filter(part)
|
||||
if info:
|
||||
entries.append({
|
||||
'brand': current_brand,
|
||||
'model': model_name,
|
||||
'year': year,
|
||||
'part_number': part,
|
||||
'filter_type': info[0],
|
||||
})
|
||||
|
||||
prev_line = line
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def parse_cross_references(pdf):
|
||||
"""Parse the equivalencias/cross-reference section."""
|
||||
xrefs = []
|
||||
|
||||
for page_num in range(148, min(200, len(pdf.pages))):
|
||||
text = pdf.pages[page_num].extract_text()
|
||||
if not text:
|
||||
continue
|
||||
if 'EQUIVALENCIAS' not in text and 'Código' not in text:
|
||||
continue
|
||||
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line or 'EQUIVALENCIAS' in line or 'Código' in line:
|
||||
continue
|
||||
if re.match(r'^\d{1,3}$', line):
|
||||
continue
|
||||
# Skip brand header lines
|
||||
if re.match(r'^[A-Z][a-z]', line) and ' - ' in line:
|
||||
continue
|
||||
if line.istitle() or (line[0].isupper() and line[1:2].islower() and len(line.split()) <= 3):
|
||||
continue
|
||||
|
||||
# Parse: CompetitorNumber FRAMNumber
|
||||
# FRAM numbers start with PH, CH, CA, PA, G, P, PS, CF, CFA
|
||||
match = re.match(r'^(\S+)\s+((?:PH|CH|CA|PA|PS|CF|CFA|G|P)\w+)', line)
|
||||
if match:
|
||||
competitor_pn = match.group(1).strip()
|
||||
fram_pn = match.group(2).strip()
|
||||
# Skip if competitor number looks like a FRAM number
|
||||
if re.match(r'^(PH|CH|CA|PA|PS|CF|CFA)', competitor_pn):
|
||||
continue
|
||||
xrefs.append({
|
||||
'competitor': competitor_pn,
|
||||
'fram': fram_pn,
|
||||
})
|
||||
|
||||
return xrefs
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("IMPORTADOR - CATÁLOGO FRAM 2017")
|
||||
print("=" * 70)
|
||||
|
||||
print(f"\n[1/6] Leyendo PDF: {PDF_PATH}")
|
||||
pdf = pypdf.PdfReader(PDF_PATH)
|
||||
print(f" Total páginas: {len(pdf.pages)}")
|
||||
|
||||
print("\n[2/6] Extrayendo datos del catálogo...")
|
||||
vehicle_entries = parse_vehicle_entries(pdf)
|
||||
cross_refs = parse_cross_references(pdf)
|
||||
print(f" Entradas de vehículos: {len(vehicle_entries)}")
|
||||
print(f" Equivalencias (cross-refs): {len(cross_refs)}")
|
||||
|
||||
# Get unique parts
|
||||
unique_parts = {}
|
||||
for e in vehicle_entries:
|
||||
if e['part_number'] not in unique_parts:
|
||||
info = classify_filter(e['part_number'])
|
||||
if info:
|
||||
unique_parts[e['part_number']] = info
|
||||
print(f" Partes únicas: {len(unique_parts)}")
|
||||
|
||||
# Also get parts from cross-refs
|
||||
for xref in cross_refs:
|
||||
if xref['fram'] not in unique_parts:
|
||||
info = classify_filter(xref['fram'])
|
||||
if info:
|
||||
unique_parts[xref['fram']] = info
|
||||
|
||||
print(f" Partes únicas (incl. cross-refs): {len(unique_parts)}")
|
||||
|
||||
conn = get_db()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Create FRAM manufacturer
|
||||
print("\n[3/6] Creando fabricante FRAM...")
|
||||
# Check if Fram already exists (from Gonher import)
|
||||
fram_mfr_id = ensure_manufacturer(cursor, 'FRAM', 'aftermarket', 'standard', 'USA')
|
||||
print(f" FRAM manufacturer_id: {fram_mfr_id}")
|
||||
|
||||
# Create parts
|
||||
print("\n[4/6] Creando partes de filtros...")
|
||||
part_ids = {}
|
||||
parts_created = 0
|
||||
group_cache = {}
|
||||
|
||||
for pn, (group_name, name_en, name_es) in unique_parts.items():
|
||||
if group_name not in group_cache:
|
||||
group_cache[group_name] = get_or_create_group(cursor, group_name)
|
||||
group_id = group_cache[group_name]
|
||||
if not group_id:
|
||||
continue
|
||||
|
||||
full_name = f"{name_en} {pn}"
|
||||
full_name_es = f"{name_es} {pn}"
|
||||
part_id, created = get_or_create_part(
|
||||
cursor, pn, group_id, full_name, full_name_es, "FRAM Filter")
|
||||
part_ids[pn] = part_id
|
||||
if created:
|
||||
parts_created += 1
|
||||
|
||||
print(f" Partes creadas: {parts_created}")
|
||||
|
||||
# Create vehicles and fitments
|
||||
print("\n[5/6] Creando vehículos y fitments...")
|
||||
vehicles_created = 0
|
||||
fitments_created = 0
|
||||
mye_cache = {}
|
||||
|
||||
for entry in vehicle_entries:
|
||||
part_id = part_ids.get(entry['part_number'])
|
||||
if not part_id:
|
||||
continue
|
||||
|
||||
cache_key = (entry['brand'], entry['model'], entry['year'])
|
||||
if cache_key not in mye_cache:
|
||||
brand_id = ensure_brand(cursor, entry['brand'])
|
||||
model_id = ensure_model(cursor, brand_id, entry['model'])
|
||||
year_id = ensure_year(cursor, entry['year'])
|
||||
|
||||
cursor.execute(
|
||||
"""SELECT mye.id FROM model_year_engine mye
|
||||
JOIN models m ON mye.model_id = m.id
|
||||
JOIN brands b ON m.brand_id = b.id
|
||||
JOIN years y ON mye.year_id = y.id
|
||||
WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?) AND y.year = ?
|
||||
LIMIT 1""",
|
||||
(entry['brand'], entry['model'], entry['year']))
|
||||
existing = cursor.fetchone()
|
||||
|
||||
if existing:
|
||||
mye_cache[cache_key] = existing['id']
|
||||
else:
|
||||
mye_id = ensure_mye(cursor, model_id, year_id)
|
||||
mye_cache[cache_key] = mye_id
|
||||
vehicles_created += 1
|
||||
|
||||
mye_id = mye_cache[cache_key]
|
||||
|
||||
cursor.execute(
|
||||
"SELECT id FROM vehicle_parts WHERE model_year_engine_id = ? AND part_id = ?",
|
||||
(mye_id, part_id))
|
||||
if not cursor.fetchone():
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes) VALUES (?, ?, 1, ?)",
|
||||
(mye_id, part_id, f"Catálogo FRAM 2017 - {entry['filter_type']}"))
|
||||
fitments_created += 1
|
||||
|
||||
print(f" Vehículos creados: {vehicles_created}")
|
||||
print(f" Fitments creados: {fitments_created}")
|
||||
|
||||
# Create cross-references
|
||||
print("\n[6/6] Creando referencias cruzadas...")
|
||||
xrefs_created = 0
|
||||
|
||||
# A) From equivalencias section
|
||||
for xref in cross_refs:
|
||||
fram_part_id = part_ids.get(xref['fram'])
|
||||
if not fram_part_id:
|
||||
continue
|
||||
|
||||
cursor.execute(
|
||||
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
|
||||
(fram_part_id, xref['competitor']))
|
||||
if not cursor.fetchone():
|
||||
cursor.execute(
|
||||
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'FRAM Equivalencias 2017')",
|
||||
(fram_part_id, xref['competitor']))
|
||||
xrefs_created += 1
|
||||
|
||||
# B) Match FRAM parts to other brands' parts by vehicle fitment
|
||||
for pn, part_id in part_ids.items():
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT p2.id, p2.oem_part_number
|
||||
FROM vehicle_parts vp1
|
||||
JOIN vehicle_parts vp2 ON vp1.model_year_engine_id = vp2.model_year_engine_id
|
||||
JOIN parts p2 ON vp2.part_id = p2.id
|
||||
WHERE vp1.part_id = ?
|
||||
AND p2.id != ?
|
||||
AND p2.group_id = (SELECT group_id FROM parts WHERE id = ?)
|
||||
AND p2.oem_part_number NOT LIKE 'PH%'
|
||||
AND p2.oem_part_number NOT LIKE 'CH%'
|
||||
AND p2.oem_part_number NOT LIKE 'CA%'
|
||||
AND p2.oem_part_number NOT LIKE 'PA%'
|
||||
AND p2.oem_part_number NOT LIKE 'CF%'
|
||||
AND p2.oem_part_number NOT LIKE 'CFA%'
|
||||
LIMIT 20
|
||||
""", (part_id, part_id, part_id))
|
||||
|
||||
for row in cursor.fetchall():
|
||||
# Cross-ref FRAM → other
|
||||
cursor.execute(
|
||||
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
|
||||
(part_id, row['oem_part_number']))
|
||||
if not cursor.fetchone():
|
||||
cursor.execute(
|
||||
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'FRAM Catalog 2017')",
|
||||
(part_id, row['oem_part_number']))
|
||||
xrefs_created += 1
|
||||
|
||||
# Reverse cross-ref
|
||||
cursor.execute(
|
||||
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
|
||||
(row['id'], pn))
|
||||
if not cursor.fetchone():
|
||||
cursor.execute(
|
||||
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'FRAM Catalog 2017')",
|
||||
(row['id'], pn))
|
||||
xrefs_created += 1
|
||||
|
||||
print(f" Cross-refs creadas: {xrefs_created}")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("IMPORTACIÓN FRAM COMPLETADA")
|
||||
print("=" * 70)
|
||||
print(f"""
|
||||
RESUMEN:
|
||||
- Partes creadas: {parts_created:,}
|
||||
- Vehículos creados: {vehicles_created:,}
|
||||
- Fitments creados: {fitments_created:,}
|
||||
- Cross-refs creadas: {xrefs_created:,}
|
||||
- Equivalencias leídas: {len(cross_refs):,}
|
||||
""")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
705
vehicle_database/scripts/import_moog_catalog.py
Normal file
705
vehicle_database/scripts/import_moog_catalog.py
Normal file
@@ -0,0 +1,705 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
IMPORTADOR DEL CATÁLOGO MOOG - SUSPENSIÓN Y DIRECCIÓN
|
||||
Funciona para los 3 volúmenes:
|
||||
Vol 1: ≤1989 /tmp/catalogs/suspension/moog_vol1_1989back.pdf pages 4-1037
|
||||
Vol 2: 1990-2005 /tmp/catalogs/suspension/moog_vol2_1990_2005.pdf pages 7-1641
|
||||
Vol 3: 2006+ /tmp/catalogs/suspension/moog_vol3_2006up.pdf pages 8-1089
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import re
|
||||
import sys
|
||||
import pypdf
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
|
||||
|
||||
VOLUMES = {
|
||||
'1': {
|
||||
'path': '/tmp/catalogs/suspension/moog_vol1_1989back.pdf',
|
||||
'start_page': 3, # 0-indexed
|
||||
'end_page': 1037,
|
||||
'label': 'Vol 1 (≤1989)',
|
||||
},
|
||||
'2': {
|
||||
'path': '/tmp/catalogs/suspension/moog_vol2_1990_2005.pdf',
|
||||
'start_page': 6,
|
||||
'end_page': 1641,
|
||||
'label': 'Vol 2 (1990-2005)',
|
||||
},
|
||||
'3': {
|
||||
'path': '/tmp/catalogs/suspension/moog_vol3_2006up.pdf',
|
||||
'start_page': 7,
|
||||
'end_page': 1089,
|
||||
'label': 'Vol 3 (2006+)',
|
||||
},
|
||||
}
|
||||
|
||||
MOOG_BRANDS = {
|
||||
'ACURA', 'ALFA ROMEO', 'AMERICAN MOTORS', 'AMERICAN MOTORS CORP.',
|
||||
'ASTON MARTIN', 'AUDI', 'BMW', 'BUICK', 'CADILLAC',
|
||||
'CHEVROLET', 'CHEVROLET TRUCK', 'CHRYSLER',
|
||||
'DATSUN', 'DODGE', 'DODGE TRUCK',
|
||||
'EAGLE', 'FIAT', 'FORD', 'FORD TRUCK', 'FREIGHTLINER',
|
||||
'GEO', 'GEO TRUCK', 'GENERAL MOTORS TRUCK',
|
||||
'HONDA', 'HUMMER', 'HYUNDAI',
|
||||
'INFINITI', 'INTERNATIONAL', 'ISUZU', 'ISUZU TRUCK',
|
||||
'JAGUAR', 'JEEP', 'KIA',
|
||||
'LAFORZA', 'LAND ROVER', 'LEXUS', 'LINCOLN', 'LOTUS',
|
||||
'MAZDA', 'MAZDA TRUCK', 'MERCEDES BENZ', 'MERCEDES-BENZ',
|
||||
'MERCURY', 'MERKUR', 'MINI', 'MITSUBISHI', 'MITSUBISHI TRUCK',
|
||||
'NISSAN', 'NISSAN TRUCK',
|
||||
'OLDSMOBILE', 'OPEL',
|
||||
'PEUGEOT', 'PLYMOUTH', 'PLYMOUTH TRUCK', 'PONTIAC', 'PORSCHE',
|
||||
'RAM TRUCK', 'RENAULT', 'ROLLS ROYCE',
|
||||
'SAAB', 'SATURN', 'SCION', 'SEAT', 'SHELBY', 'SMART', 'STERLING',
|
||||
'SUBARU', 'SUBARU TRUCK', 'SUZUKI', 'SUZUKI TRUCK',
|
||||
'TOYOTA', 'TOYOTA TRUCK', 'TRIUMPH',
|
||||
'VOLKSWAGEN', 'VOLKSWAGEN TRUCK', 'VOLVO', 'VOLVO TRUCK',
|
||||
'WILLYS MOTORS INC.',
|
||||
}
|
||||
|
||||
# MOOG part number regex
|
||||
MOOG_PART_RE = re.compile(
|
||||
r'\b(K\d{3,7}T?|ES\d{3,7}[A-Z]{0,3}T?|EV\d{3,7}[A-Z]?|DS\d{3,7}'
|
||||
r'|CC\d{3,6}|CK\d{3,7}|SSD\d{2,4}|BK\d{3,4}[A-Z]?'
|
||||
r'|SB\d{3,4}|NIBJ\d+|VO[A-Z]{2}\d+|HY[A-Z]{2}\d+|AU[A-Z]{2}\d+|BM[A-Z]{2}\d+)\b'
|
||||
)
|
||||
|
||||
# Numeric-only springs (only used within spring category context)
|
||||
SPRING_NUM_RE = re.compile(r'\b(\d{4,6})\b')
|
||||
|
||||
# Figure code
|
||||
FIGURE_RE = re.compile(r'\b([FSR]\d{3})\b')
|
||||
|
||||
# Year range at start of line
|
||||
YEAR_RE = re.compile(r'^(\d{4})(?:\s*-\s*(\d{4}))?')
|
||||
|
||||
# System sections
|
||||
SYSTEM_PATTERNS = {
|
||||
'SUSPENSION DELANTERA': 'front_suspension',
|
||||
'SUSPENSIÓN DELANTERA': 'front_suspension',
|
||||
'DIRECCIÓN': 'steering',
|
||||
'DIRECCION': 'steering',
|
||||
'SUSPENSION TRASERA': 'rear_suspension',
|
||||
'SUSPENSIÓN TRASERA': 'rear_suspension',
|
||||
}
|
||||
|
||||
# Header/footer markers to skip
|
||||
SKIP_MARKERS = [
|
||||
'www.moogproblemsolver.com',
|
||||
'CATÁLOGO MASTER',
|
||||
'CATALOGO MASTER',
|
||||
'Solucionador de problemas',
|
||||
'búsqueda de piezas electrónicas',
|
||||
'FMe-cat.mx',
|
||||
'Año Observaciones',
|
||||
'Total Solución',
|
||||
'P/C\nCTD',
|
||||
'Imagenes de piezas',
|
||||
]
|
||||
|
||||
|
||||
def get_db():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def ensure_manufacturer(cursor, name, type_='aftermarket', quality='premium', country=None):
|
||||
cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute(
|
||||
"INSERT INTO manufacturers (name, type, quality_tier, country) VALUES (?, ?, ?, ?)",
|
||||
(name, type_, quality, country))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_brand(cursor, name):
|
||||
cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_model(cursor, brand_id, name):
|
||||
cursor.execute(
|
||||
"SELECT id FROM models WHERE brand_id = ? AND UPPER(name) = UPPER(?)",
|
||||
(brand_id, name))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_year(cursor, year):
|
||||
cursor.execute("SELECT id FROM years WHERE year = ?", (year,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO years (year) VALUES (?)", (year,))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def get_generic_engine(cursor):
|
||||
cursor.execute("SELECT id FROM engines WHERE name = 'Generic'")
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO engines (name, fuel_type) VALUES ('Generic', 'gasoline')")
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_mye(cursor, model_id, year_id, engine_id=None):
|
||||
if engine_id:
|
||||
cursor.execute(
|
||||
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ? AND engine_id = ?",
|
||||
(model_id, year_id, engine_id))
|
||||
else:
|
||||
cursor.execute(
|
||||
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ?",
|
||||
(model_id, year_id))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
if not engine_id:
|
||||
engine_id = get_generic_engine(cursor)
|
||||
cursor.execute(
|
||||
"INSERT INTO model_year_engine (model_id, year_id, engine_id) VALUES (?, ?, ?)",
|
||||
(model_id, year_id, engine_id))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def get_or_create_part(cursor, part_number, group_id, name, name_es, description):
|
||||
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_number,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id'], False
|
||||
cursor.execute(
|
||||
"INSERT INTO parts (oem_part_number, name, name_es, group_id, description) VALUES (?, ?, ?, ?, ?)",
|
||||
(part_number, name, name_es, group_id, description))
|
||||
return cursor.lastrowid, True
|
||||
|
||||
|
||||
# --- Group ID lookup cache ---
|
||||
_group_cache = {}
|
||||
|
||||
|
||||
def get_group_id(cursor, name_en):
|
||||
"""Get group ID by English name."""
|
||||
if name_en not in _group_cache:
|
||||
cursor.execute("SELECT id FROM part_groups WHERE name = ?", (name_en,))
|
||||
row = cursor.fetchone()
|
||||
_group_cache[name_en] = row['id'] if row else None
|
||||
return _group_cache[name_en]
|
||||
|
||||
|
||||
def classify_part(cursor, category_text, part_number):
|
||||
"""Map MOOG category text + part number to a DB group_id."""
|
||||
cat = category_text.lower() if category_text else ''
|
||||
|
||||
# By category text (Spanish)
|
||||
if 'rótula' in cat and 'suspensión' in cat:
|
||||
return get_group_id(cursor, 'Ball Joints')
|
||||
if 'rótula' in cat and 'prensad' in cat:
|
||||
return get_group_id(cursor, 'Ball Joints')
|
||||
if 'brazo de control' in cat and 'rótula' in cat:
|
||||
return get_group_id(cursor, 'Control Arms')
|
||||
if 'ensamble de brazo' in cat:
|
||||
return get_group_id(cursor, 'Control Arms')
|
||||
if 'brazo de control' in cat:
|
||||
return get_group_id(cursor, 'Control Arms')
|
||||
if 'horquilla' in cat:
|
||||
return get_group_id(cursor, 'Control Arms')
|
||||
if 'buje' in cat and 'estabilizadora' in cat:
|
||||
return get_group_id(cursor, 'Sway Bar Bushings')
|
||||
if 'buje' in cat and 'brazo' in cat:
|
||||
return get_group_id(cursor, 'Bushings')
|
||||
if 'buje' in cat and 'amortiguador' in cat:
|
||||
return get_group_id(cursor, 'Bushings')
|
||||
if 'buje' in cat and 'tracción' in cat:
|
||||
return get_group_id(cursor, 'Bushings')
|
||||
if 'buje' in cat and 'camber' in cat:
|
||||
return get_group_id(cursor, 'Camber/Caster Kits')
|
||||
if 'buje' in cat:
|
||||
return get_group_id(cursor, 'Bushings')
|
||||
if 'cople' in cat and 'estabilizadora' in cat:
|
||||
return get_group_id(cursor, 'Sway Bar Links')
|
||||
if 'soporte' in cat and ('strut' in cat.lower() or 'amortiguador' in cat):
|
||||
return get_group_id(cursor, 'Strut Mounts')
|
||||
if 'montaje' in cat and 'amortiguador' in cat:
|
||||
return get_group_id(cursor, 'Strut Mounts')
|
||||
if 'fuelle' in cat or 'cubrepolvo' in cat:
|
||||
return get_group_id(cursor, 'Struts')
|
||||
if 'asiento' in cat and 'resorte' in cat:
|
||||
return get_group_id(cursor, 'Spring Seats')
|
||||
if 'ensamble de terminal' in cat:
|
||||
return get_group_id(cursor, 'Tie Rod Ends')
|
||||
if 'terminal' in cat and 'dirección' in cat:
|
||||
if part_number and part_number.startswith('EV'):
|
||||
return get_group_id(cursor, 'Inner Tie Rods')
|
||||
return get_group_id(cursor, 'Tie Rod Ends')
|
||||
if 'barra central' in cat:
|
||||
return get_group_id(cursor, 'Center Links')
|
||||
if 'barra de arrastre' in cat or 'barra de acoplamiento' in cat:
|
||||
return get_group_id(cursor, 'Drag Links')
|
||||
if 'varilla de dirección' in cat:
|
||||
return get_group_id(cursor, 'Drag Links')
|
||||
if 'resorte' in cat and 'suspensión' in cat:
|
||||
return get_group_id(cursor, 'Coil Springs')
|
||||
if 'camber' in cat or 'caster' in cat:
|
||||
return get_group_id(cursor, 'Camber/Caster Kits')
|
||||
if 'brazo auxiliar' in cat or 'brazo loco' in cat:
|
||||
return get_group_id(cursor, 'Idler Arms')
|
||||
if 'brazo pitman' in cat:
|
||||
return get_group_id(cursor, 'Pitman Arms')
|
||||
if 'amortiguador de dirección' in cat:
|
||||
return get_group_id(cursor, 'Steering Dampers')
|
||||
if 'pasador' in cat and 'dirección' in cat:
|
||||
return get_group_id(cursor, 'King Pin Sets')
|
||||
if 'muelle' in cat:
|
||||
return get_group_id(cursor, 'Leaf Springs')
|
||||
if 'barra de torsión' in cat:
|
||||
return get_group_id(cursor, 'Torsion Bars')
|
||||
|
||||
# Fallback by part prefix
|
||||
if part_number:
|
||||
if part_number.startswith('ES'):
|
||||
return get_group_id(cursor, 'Tie Rod Ends')
|
||||
if part_number.startswith('EV'):
|
||||
return get_group_id(cursor, 'Inner Tie Rods')
|
||||
if part_number.startswith('DS'):
|
||||
return get_group_id(cursor, 'Center Links')
|
||||
if part_number.startswith('CC') or (part_number.isdigit() and len(part_number) >= 4):
|
||||
return get_group_id(cursor, 'Coil Springs')
|
||||
if part_number.startswith('SSD'):
|
||||
return get_group_id(cursor, 'Steering Dampers')
|
||||
if part_number.startswith('CK'):
|
||||
return get_group_id(cursor, 'Control Arms')
|
||||
if part_number.startswith('BK'):
|
||||
return get_group_id(cursor, 'King Pin Sets')
|
||||
if part_number.startswith('SB'):
|
||||
return get_group_id(cursor, 'Bushings')
|
||||
|
||||
return get_group_id(cursor, 'Ball Joints') # Default
|
||||
|
||||
|
||||
# --- Part type names for DB ---
|
||||
|
||||
PART_TYPE_NAMES = {
|
||||
'Ball Joints': ('Ball Joint', 'Rótula de Suspensión'),
|
||||
'Bushings': ('Bushing', 'Buje'),
|
||||
'Sway Bar Bushings': ('Sway Bar Bushing', 'Buje de Barra Estabilizadora'),
|
||||
'Control Arms': ('Control Arm', 'Brazo de Control'),
|
||||
'Sway Bar Links': ('Sway Bar Link', 'Cople de Barra Estabilizadora'),
|
||||
'Strut Mounts': ('Strut Mount', 'Soporte de Strut'),
|
||||
'Struts': ('Strut Boot', 'Fuelle de Strut'),
|
||||
'Spring Seats': ('Spring Seat', 'Asiento de Resorte'),
|
||||
'Tie Rod Ends': ('Tie Rod End', 'Terminal de Dirección'),
|
||||
'Inner Tie Rods': ('Inner Tie Rod', 'Terminal Interior de Dirección'),
|
||||
'Center Links': ('Center Link', 'Barra Central'),
|
||||
'Drag Links': ('Drag Link', 'Barra de Arrastre'),
|
||||
'Coil Springs': ('Coil Spring', 'Resorte Helicoidal'),
|
||||
'Camber/Caster Kits': ('Camber/Caster Kit', 'Kit de Camber/Caster'),
|
||||
'Idler Arms': ('Idler Arm', 'Brazo Auxiliar'),
|
||||
'Pitman Arms': ('Pitman Arm', 'Brazo Pitman'),
|
||||
'Steering Dampers': ('Steering Damper', 'Amortiguador de Dirección'),
|
||||
'King Pin Sets': ('King Pin Set', 'Juego de Pivote'),
|
||||
'Leaf Springs': ('Leaf Spring', 'Muelle'),
|
||||
'Torsion Bars': ('Torsion Bar', 'Barra de Torsión'),
|
||||
}
|
||||
|
||||
|
||||
# --- Parsing ---
|
||||
|
||||
def is_skip_line(line):
|
||||
"""Check if line is header/footer to skip."""
|
||||
return any(m in line for m in SKIP_MARKERS)
|
||||
|
||||
|
||||
def parse_brand_model(line):
|
||||
"""Try to parse a brand-model line. Returns (brand, model) or (None, None)."""
|
||||
for dash in ['−', '–', '—', '-']:
|
||||
if dash not in line:
|
||||
continue
|
||||
parts = line.split(dash, 1)
|
||||
if len(parts) != 2:
|
||||
continue
|
||||
left = re.sub(r'\s*\(Cont\.?\)\.?\s*', '', parts[0]).strip()
|
||||
right = re.sub(r'\s*\(Cont\.?\)\.?\s*', '', parts[1]).strip()
|
||||
if not left or not right:
|
||||
continue
|
||||
|
||||
left_up = left.upper()
|
||||
right_up = right.upper()
|
||||
|
||||
# Check which side matches a known brand
|
||||
for brand in MOOG_BRANDS:
|
||||
if left_up == brand or left_up.startswith(brand + ' '):
|
||||
return left, right
|
||||
if right_up == brand or right_up.startswith(brand + ' '):
|
||||
return right, left
|
||||
|
||||
# Heuristic: if left is all uppercase words and right has mixed case
|
||||
if left.isupper() and len(left) > 2:
|
||||
return left, right
|
||||
if right.isupper() and len(right) > 2:
|
||||
return right, left
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def detect_system(line):
|
||||
"""Check if line is a system section header."""
|
||||
clean = line.strip().upper()
|
||||
for pattern, system in SYSTEM_PATTERNS.items():
|
||||
if clean.startswith(pattern.upper()):
|
||||
return system
|
||||
return None
|
||||
|
||||
|
||||
CATEGORY_KEYWORDS = [
|
||||
'Rótula', 'Rotula', 'Buje', 'Brazo de control', 'Brazo auxiliar',
|
||||
'Brazo pitman', 'Brazo loco', 'Cople', 'Soporte', 'Fuelle',
|
||||
'Asiento del resorte', 'Terminal de dirección', 'Terminal de direccion',
|
||||
'Ensamble de terminal', 'Ensamble de brazo', 'Barra central',
|
||||
'Barra de arrastre', 'Barra de dirección', 'Varilla',
|
||||
'Juego de resortes', 'Resorte de suspensión', 'Juego para ajuste',
|
||||
'Placa para ajuste', 'Seguro guia', 'Amortiguador de dirección',
|
||||
'Pasador de dirección', 'Horquilla', 'Muelle',
|
||||
'Juego de coples', 'Juego de soporte', 'Juego de montaje',
|
||||
'Montaje del amortiguador',
|
||||
]
|
||||
|
||||
|
||||
def is_category_line(line):
|
||||
"""Check if line is a part category header."""
|
||||
for kw in CATEGORY_KEYWORDS:
|
||||
if kw.lower() in line.lower():
|
||||
# Make sure it doesn't also contain a part number (data line)
|
||||
if not MOOG_PART_RE.search(line):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def parse_moog_pdf(pdf_path, start_page, end_page):
|
||||
"""Parse a MOOG catalog PDF and return entries."""
|
||||
pdf = pypdf.PdfReader(pdf_path)
|
||||
entries = []
|
||||
|
||||
current_brand = None
|
||||
current_model = None
|
||||
current_submodel = None
|
||||
current_system = None
|
||||
current_figure = None
|
||||
current_category = None
|
||||
current_year_from = None
|
||||
current_year_to = None
|
||||
|
||||
total = min(len(pdf.pages), end_page)
|
||||
|
||||
for page_num in range(start_page, total):
|
||||
if (page_num - start_page) % 100 == 0:
|
||||
print(f" Página {page_num + 1}/{total}...")
|
||||
|
||||
text = pdf.pages[page_num].extract_text()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
lines = text.split('\n')
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if is_skip_line(line):
|
||||
continue
|
||||
|
||||
# Skip standalone page numbers
|
||||
if re.match(r'^\d{1,4}$', line) and not current_category:
|
||||
continue
|
||||
|
||||
# Brand-model line
|
||||
brand, model = parse_brand_model(line)
|
||||
if brand and model:
|
||||
current_brand = brand
|
||||
current_model = model
|
||||
current_submodel = None
|
||||
current_system = None
|
||||
current_figure = None
|
||||
current_category = None
|
||||
continue
|
||||
|
||||
# System section
|
||||
system = detect_system(line)
|
||||
if system:
|
||||
current_system = system
|
||||
current_category = None
|
||||
current_submodel = None
|
||||
# Check for figure code on same line or next
|
||||
fig = FIGURE_RE.search(line)
|
||||
if fig:
|
||||
current_figure = fig.group(1)
|
||||
continue
|
||||
|
||||
# Standalone figure code line
|
||||
fig_match = re.match(r'^([FSR]\d{3})$', line.strip())
|
||||
if fig_match:
|
||||
current_figure = fig_match.group(1)
|
||||
continue
|
||||
|
||||
# Figure code with comma (e.g., "F530,\nF531")
|
||||
fig_multi = re.match(r'^([FSR]\d{3}),?$', line.strip())
|
||||
if fig_multi and not YEAR_RE.match(line):
|
||||
current_figure = fig_multi.group(1)
|
||||
continue
|
||||
|
||||
if not current_brand or not current_model:
|
||||
continue
|
||||
|
||||
# Part category header
|
||||
if is_category_line(line):
|
||||
current_category = line.strip()
|
||||
continue
|
||||
|
||||
# Data line with year
|
||||
year_match = YEAR_RE.match(line)
|
||||
if year_match:
|
||||
y1 = int(year_match.group(1))
|
||||
y2 = int(year_match.group(2)) if year_match.group(2) else y1
|
||||
if 1930 <= y1 <= 2025 and 1930 <= y2 <= 2025:
|
||||
current_year_from = min(y1, y2)
|
||||
current_year_to = max(y1, y2)
|
||||
|
||||
# Extract MOOG part numbers from line
|
||||
parts_found = MOOG_PART_RE.findall(line)
|
||||
|
||||
# Also check for numeric springs in spring context
|
||||
if current_category and 'resorte' in current_category.lower():
|
||||
for m in SPRING_NUM_RE.finditer(line):
|
||||
num = m.group(1)
|
||||
if len(num) >= 4 and not any(num == p for p in parts_found):
|
||||
# Avoid matching years
|
||||
n = int(num)
|
||||
if not (1930 <= n <= 2025):
|
||||
parts_found.append(num)
|
||||
|
||||
if not parts_found or not current_year_from:
|
||||
continue
|
||||
|
||||
# Build entries for each part found
|
||||
model_name = current_model
|
||||
if current_submodel:
|
||||
model_name = f"{current_model} {current_submodel}"
|
||||
|
||||
for pn in parts_found:
|
||||
# Clean part number (remove trailing T for Problem Solver)
|
||||
clean_pn = pn.rstrip('T') if pn.endswith('T') and len(pn) > 4 else pn
|
||||
|
||||
for year in range(current_year_from, current_year_to + 1):
|
||||
entries.append({
|
||||
'brand': current_brand,
|
||||
'model': model_name,
|
||||
'year': year,
|
||||
'system': current_system or 'front_suspension',
|
||||
'figure': current_figure,
|
||||
'category': current_category or '',
|
||||
'part_number': clean_pn,
|
||||
'notes': line.strip(),
|
||||
})
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def normalize_brand(brand):
|
||||
"""Normalize MOOG brand names to standard form."""
|
||||
mappings = {
|
||||
'CHEVROLET TRUCK': 'CHEVROLET',
|
||||
'DODGE TRUCK': 'DODGE',
|
||||
'FORD TRUCK': 'FORD',
|
||||
'GENERAL MOTORS TRUCK': 'GMC',
|
||||
'GEO TRUCK': 'GEO',
|
||||
'ISUZU TRUCK': 'ISUZU',
|
||||
'MAZDA TRUCK': 'MAZDA',
|
||||
'MITSUBISHI TRUCK': 'MITSUBISHI',
|
||||
'NISSAN TRUCK': 'NISSAN',
|
||||
'PLYMOUTH TRUCK': 'PLYMOUTH',
|
||||
'SUBARU TRUCK': 'SUBARU',
|
||||
'SUZUKI TRUCK': 'SUZUKI',
|
||||
'TOYOTA TRUCK': 'TOYOTA',
|
||||
'VOLKSWAGEN TRUCK': 'VOLKSWAGEN',
|
||||
'VOLVO TRUCK': 'VOLVO',
|
||||
'AMERICAN MOTORS CORP.': 'AMERICAN MOTORS',
|
||||
'AMERICAN MOTORS': 'AMERICAN MOTORS',
|
||||
'MERCEDES BENZ': 'MERCEDES-BENZ',
|
||||
'WILLYS MOTORS INC.': 'WILLYS',
|
||||
'RAM TRUCK': 'RAM',
|
||||
}
|
||||
up = brand.upper().strip()
|
||||
return mappings.get(up, brand.strip())
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2 or sys.argv[1] not in VOLUMES:
|
||||
print("Uso: python3 import_moog_catalog.py <1|2|3>")
|
||||
print(" 1 = Vol 1 (≤1989)")
|
||||
print(" 2 = Vol 2 (1990-2005)")
|
||||
print(" 3 = Vol 3 (2006+)")
|
||||
sys.exit(1)
|
||||
|
||||
vol = sys.argv[1]
|
||||
config = VOLUMES[vol]
|
||||
|
||||
print("=" * 70)
|
||||
print(f"IMPORTADOR - CATÁLOGO MOOG {config['label']}")
|
||||
print("=" * 70)
|
||||
|
||||
print(f"\n[1/5] Leyendo PDF: {config['path']}")
|
||||
entries = parse_moog_pdf(config['path'], config['start_page'], config['end_page'])
|
||||
print(f" Entradas parseadas: {len(entries):,}")
|
||||
|
||||
unique_parts = {}
|
||||
for e in entries:
|
||||
if e['part_number'] not in unique_parts:
|
||||
unique_parts[e['part_number']] = e['category']
|
||||
|
||||
unique_brands = set(normalize_brand(e['brand']) for e in entries)
|
||||
print(f" Partes únicas: {len(unique_parts):,}")
|
||||
print(f" Marcas de vehículos: {len(unique_brands)}")
|
||||
|
||||
conn = get_db()
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("\n[2/5] Creando fabricante MOOG...")
|
||||
moog_mfr_id = ensure_manufacturer(cursor, 'MOOG', 'aftermarket', 'premium', 'USA')
|
||||
print(f" MOOG manufacturer_id: {moog_mfr_id}")
|
||||
|
||||
print("\n[3/5] Creando partes...")
|
||||
part_ids = {}
|
||||
parts_created = 0
|
||||
|
||||
for pn, cat_text in sorted(unique_parts.items()):
|
||||
group_id = classify_part(cursor, cat_text, pn)
|
||||
if not group_id:
|
||||
group_id = get_group_id(cursor, 'Ball Joints')
|
||||
|
||||
# Get group name for part description
|
||||
cursor.execute("SELECT name FROM part_groups WHERE id = ?", (group_id,))
|
||||
group_row = cursor.fetchone()
|
||||
group_name = group_row['name'] if group_row else 'Suspension Part'
|
||||
|
||||
names = PART_TYPE_NAMES.get(group_name, (group_name, group_name))
|
||||
name_en = f"{names[0]} {pn}"
|
||||
name_es = f"{names[1]} {pn}"
|
||||
|
||||
part_id, created = get_or_create_part(
|
||||
cursor, pn, group_id, name_en, name_es, f"MOOG {names[0]}")
|
||||
part_ids[pn] = part_id
|
||||
if created:
|
||||
parts_created += 1
|
||||
|
||||
print(f" Partes creadas: {parts_created:,}")
|
||||
print(f" Partes existentes: {len(unique_parts) - parts_created:,}")
|
||||
|
||||
print("\n[4/5] Creando vehículos y fitments...")
|
||||
vehicles_created = 0
|
||||
fitments_created = 0
|
||||
mye_cache = {}
|
||||
|
||||
for i, entry in enumerate(entries):
|
||||
if i % 10000 == 0 and i > 0:
|
||||
print(f" Procesando {i:,}/{len(entries):,}...")
|
||||
|
||||
brand_name = normalize_brand(entry['brand'])
|
||||
cache_key = (brand_name.upper(), entry['model'].upper(), entry['year'])
|
||||
|
||||
if cache_key not in mye_cache:
|
||||
brand_id = ensure_brand(cursor, brand_name)
|
||||
model_id = ensure_model(cursor, brand_id, entry['model'])
|
||||
year_id = ensure_year(cursor, entry['year'])
|
||||
|
||||
cursor.execute("""
|
||||
SELECT mye.id FROM model_year_engine mye
|
||||
JOIN models m ON mye.model_id = m.id
|
||||
JOIN brands b ON m.brand_id = b.id
|
||||
JOIN years y ON mye.year_id = y.id
|
||||
WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?) AND y.year = ?
|
||||
LIMIT 1
|
||||
""", (brand_name, entry['model'], entry['year']))
|
||||
existing = cursor.fetchone()
|
||||
|
||||
if existing:
|
||||
mye_cache[cache_key] = existing['id']
|
||||
else:
|
||||
mye_id = ensure_mye(cursor, model_id, year_id)
|
||||
mye_cache[cache_key] = mye_id
|
||||
vehicles_created += 1
|
||||
|
||||
mye_id = mye_cache[cache_key]
|
||||
part_id = part_ids.get(entry['part_number'])
|
||||
if not part_id:
|
||||
continue
|
||||
|
||||
cursor.execute(
|
||||
"SELECT id FROM vehicle_parts WHERE model_year_engine_id = ? AND part_id = ?",
|
||||
(mye_id, part_id))
|
||||
if not cursor.fetchone():
|
||||
notes = f"MOOG Catalog {config['label']}"
|
||||
if entry['figure']:
|
||||
notes += f" - Fig {entry['figure']}"
|
||||
if entry['system']:
|
||||
notes += f" - {entry['system']}"
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes) VALUES (?, ?, 1, ?)",
|
||||
(mye_id, part_id, notes))
|
||||
fitments_created += 1
|
||||
|
||||
print(f" Vehículos creados: {vehicles_created:,}")
|
||||
print(f" Fitments creados: {fitments_created:,}")
|
||||
|
||||
# Store diagram references
|
||||
print("\n[5/5] Guardando referencias de diagramas...")
|
||||
figures_seen = set()
|
||||
# Get a default group_id for diagrams
|
||||
susp_group = get_group_id(cursor, 'Ball Joints') or 164
|
||||
for entry in entries:
|
||||
if entry['figure'] and entry['figure'] not in figures_seen:
|
||||
figures_seen.add(entry['figure'])
|
||||
cursor.execute("SELECT id FROM diagrams WHERE name = ?", (entry['figure'],))
|
||||
if not cursor.fetchone():
|
||||
sys_label = {
|
||||
'front_suspension': 'Suspensión Delantera',
|
||||
'steering': 'Dirección',
|
||||
'rear_suspension': 'Suspensión Trasera',
|
||||
}.get(entry.get('system'), 'Suspensión')
|
||||
cursor.execute(
|
||||
"INSERT INTO diagrams (name, name_es, group_id, image_path, source) VALUES (?, ?, ?, ?, ?)",
|
||||
(entry['figure'], f"MOOG {sys_label} - {entry['figure']}",
|
||||
susp_group, f"moog/{entry['figure']}.png", 'MOOG Catalog'))
|
||||
|
||||
print(f" Diagramas registrados: {len(figures_seen)}")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print(f"IMPORTACIÓN MOOG {config['label']} COMPLETADA")
|
||||
print("=" * 70)
|
||||
print(f"""
|
||||
RESUMEN:
|
||||
- Partes creadas: {parts_created:,}
|
||||
- Vehículos creados: {vehicles_created:,}
|
||||
- Fitments creados: {fitments_created:,}
|
||||
- Diagramas: {len(figures_seen)}
|
||||
""")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
554
vehicle_database/scripts/import_wix_catalog.py
Normal file
554
vehicle_database/scripts/import_wix_catalog.py
Normal file
@@ -0,0 +1,554 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
IMPORTADOR DEL CATÁLOGO WIX 2021 - FILTROS
|
||||
Formato: Brand → Year → Model → Engine + filter columns
|
||||
Páginas 77-687: Autos de pasajeros / camionetas ligeras
|
||||
PDF: /tmp/catalogs/wix_2021.pdf
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import re
|
||||
import pypdf
|
||||
from pathlib import Path
|
||||
|
||||
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
|
||||
PDF_PATH = '/tmp/catalogs/wix_2021.pdf'
|
||||
|
||||
BRAND_HEADERS = {
|
||||
'ACURA', 'ALFA ROMEO', 'AM GENERAL', 'AMERICAN MOTORS', 'ASTON MARTIN',
|
||||
'ASUNA', 'AUDI', 'AUSTIN', 'AUSTIN HEALEY', 'AVANTI', 'BENTLEY', 'BMW',
|
||||
'BUICK', 'CADILLAC', 'CHECKER', 'CHEVROLET', 'CHRYSLER', 'DAEWOO',
|
||||
'DAIHATSU', 'DATSUN', 'DELOREAN', 'DODGE', 'EAGLE', 'FIAT', 'FORD',
|
||||
'FREIGHTLINER', 'GEO', 'GMC', 'HILLMAN', 'HONDA', 'HUMMER', 'HYUNDAI',
|
||||
'INFINITI', 'INTERNATIONAL', 'ISUZU', 'JAGUAR', 'JEEP', 'KIA',
|
||||
'LAFORZA', 'LAND ROVER', 'LEXUS', 'LINCOLN', 'LOTUS', 'MACK', 'MAZDA',
|
||||
'MERCEDES-BENZ', 'MERCURY', 'MERKUR', 'MINI', 'MITSUBISHI', 'MORGAN',
|
||||
'NISSAN', 'OLDSMOBILE', 'OPEL', 'PEUGEOT', 'PLYMOUTH', 'PONTIAC',
|
||||
'PORSCHE', 'RAM', 'RENAULT', 'ROLLS ROYCE', 'SAAB', 'SATURN', 'SCION',
|
||||
'SEAT', 'SHELBY', 'SMART', 'SRT', 'STUDEBAKER', 'SUBARU', 'SUNBEAM',
|
||||
'SUZUKI', 'TOYOTA', 'TRIUMPH', 'VOLKSWAGEN', 'VOLVO', 'WORKHORSE',
|
||||
'WORKHORSE CUSTOM CHASSIS',
|
||||
}
|
||||
|
||||
ENGINE_RE = re.compile(r'^[VLH]\s*\d+\s+\d+\.\d+L', re.IGNORECASE)
|
||||
|
||||
FOOTER_MARKERS = [
|
||||
'Pass Car/Light Truck',
|
||||
'Year/Año/Année',
|
||||
'Model/Modelo/Modèle',
|
||||
'N/A = Not Available',
|
||||
'N/A = Non disponible',
|
||||
'N/A = No disponible',
|
||||
'Italicized Part Numbers',
|
||||
'Las piezas con números',
|
||||
'Les numéros de pièc',
|
||||
'Engine/Motor/Moteur',
|
||||
'Eng. Code',
|
||||
'Código de',
|
||||
'Code moteur',
|
||||
'Oil XP',
|
||||
'Aceite XP',
|
||||
'Cabina Aire',
|
||||
'Cabin Air XP',
|
||||
'Combustible',
|
||||
'Transmisión',
|
||||
'Carburant',
|
||||
]
|
||||
|
||||
FILTER_GROUPS = {
|
||||
'oil': ('Oil Filters', 'Filtros de Aceite', 'Engine'),
|
||||
'air': ('Air Filters', 'Filtros de Aire', 'Engine'),
|
||||
'cabin_air': ('Cabin Air Filters', 'Filtros de Aire de Cabina', 'HVAC'),
|
||||
'fuel': ('Fuel Filters', 'Filtros de Combustible', 'Fuel System'),
|
||||
'transmission': ('Transmission Filters', 'Filtros de Transmisión', 'Transmission'),
|
||||
}
|
||||
|
||||
TYPE_NAMES = {
|
||||
'oil': ('Oil Filter', 'Filtro de Aceite'),
|
||||
'oil_xp': ('Oil Filter XP', 'Filtro de Aceite XP'),
|
||||
'air': ('Air Filter', 'Filtro de Aire'),
|
||||
'air_xp': ('Air Filter XP', 'Filtro de Aire XP'),
|
||||
'cabin_air': ('Cabin Air Filter', 'Filtro de Aire de Cabina'),
|
||||
'cabin_air_xp': ('Cabin Air Filter XP', 'Filtro de Aire de Cabina XP'),
|
||||
'fuel': ('Fuel Filter', 'Filtro de Combustible'),
|
||||
'fuel_xp': ('Fuel Filter XP', 'Filtro de Combustible XP'),
|
||||
'transmission': ('Transmission Filter', 'Filtro de Transmisión'),
|
||||
'transmission_xp': ('Transmission Filter XP', 'Filtro de Transmisión XP'),
|
||||
}
|
||||
|
||||
SKIP_VALUES = {'N/A', 'N/R', 'N/S', 'MT72', '-'}
|
||||
|
||||
|
||||
def get_db():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def ensure_manufacturer(cursor, name, type_='aftermarket', quality='standard', country=None):
|
||||
cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute(
|
||||
"INSERT INTO manufacturers (name, type, quality_tier, country) VALUES (?, ?, ?, ?)",
|
||||
(name, type_, quality, country))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_brand(cursor, name):
|
||||
cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_model(cursor, brand_id, name):
|
||||
cursor.execute(
|
||||
"SELECT id FROM models WHERE brand_id = ? AND UPPER(name) = UPPER(?)",
|
||||
(brand_id, name))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_year(cursor, year):
|
||||
cursor.execute("SELECT id FROM years WHERE year = ?", (year,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO years (year) VALUES (?)", (year,))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def get_generic_engine(cursor):
|
||||
cursor.execute("SELECT id FROM engines WHERE name = 'Generic'")
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("INSERT INTO engines (name, fuel_type) VALUES ('Generic', 'gasoline')")
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def ensure_mye(cursor, model_id, year_id, engine_id=None):
|
||||
if engine_id:
|
||||
cursor.execute(
|
||||
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ? AND engine_id = ?",
|
||||
(model_id, year_id, engine_id))
|
||||
else:
|
||||
cursor.execute(
|
||||
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ?",
|
||||
(model_id, year_id))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
if not engine_id:
|
||||
engine_id = get_generic_engine(cursor)
|
||||
cursor.execute(
|
||||
"INSERT INTO model_year_engine (model_id, year_id, engine_id) VALUES (?, ?, ?)",
|
||||
(model_id, year_id, engine_id))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def get_or_create_part(cursor, part_number, group_id, name, name_es, description):
|
||||
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_number,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id'], False
|
||||
cursor.execute(
|
||||
"INSERT INTO parts (oem_part_number, name, name_es, group_id, description) VALUES (?, ?, ?, ?, ?)",
|
||||
(part_number, name, name_es, group_id, description))
|
||||
return cursor.lastrowid, True
|
||||
|
||||
|
||||
def get_filter_group(cursor, filter_type):
|
||||
name_en, name_es, category_name = FILTER_GROUPS[filter_type]
|
||||
cursor.execute("SELECT id FROM part_groups WHERE name = ? LIMIT 1", (name_en,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row['id']
|
||||
cursor.execute("SELECT id FROM part_categories WHERE name = ? LIMIT 1", (category_name,))
|
||||
cat = cursor.fetchone()
|
||||
if not cat:
|
||||
cursor.execute(
|
||||
"INSERT INTO part_categories (name, name_es) VALUES (?, ?)",
|
||||
(category_name, category_name))
|
||||
cat_id = cursor.lastrowid
|
||||
else:
|
||||
cat_id = cat['id']
|
||||
cursor.execute(
|
||||
"INSERT INTO part_groups (category_id, name, name_es) VALUES (?, ?, ?)",
|
||||
(cat_id, name_en, name_es))
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
# --- Part number extraction ---
|
||||
|
||||
def extract_wix_part(token):
|
||||
"""Extract WIX part number from token, stripping footnote suffixes."""
|
||||
token = token.strip().rstrip('.')
|
||||
if not token or token in SKIP_VALUES:
|
||||
return None
|
||||
|
||||
# XP variants: 5digits+XP
|
||||
xp_match = re.match(r'^(\d{5}XP)', token)
|
||||
if xp_match:
|
||||
return xp_match.group(1)
|
||||
|
||||
# Alpha-prefixed parts
|
||||
wl = re.match(r'^(WL\d{4,6})', token)
|
||||
if wl:
|
||||
return wl.group(1)
|
||||
wa = re.match(r'^(WA\d{4,5})', token)
|
||||
if wa:
|
||||
return wa.group(1)
|
||||
wp = re.match(r'^(WP\d{4,5})', token)
|
||||
if wp:
|
||||
return wp.group(1)
|
||||
wf = re.match(r'^(WF\d{4})', token)
|
||||
if wf:
|
||||
return wf.group(1)
|
||||
|
||||
# Numeric 5-digit WIX parts
|
||||
num = re.match(r'^(\d{5})', token)
|
||||
if num:
|
||||
pn = num.group(1)
|
||||
p2 = pn[:2]
|
||||
if p2 in ('51', '57', '42', '43', '44', '45', '46', '47', '48', '49',
|
||||
'24', '33', '58'):
|
||||
return pn
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def classify_filter(pn):
|
||||
"""Classify a WIX part number by filter type."""
|
||||
if not pn:
|
||||
return None
|
||||
if pn.endswith('XP'):
|
||||
base_type = classify_filter(pn[:-2])
|
||||
return f"{base_type}_xp" if base_type else None
|
||||
if pn.startswith('WL'):
|
||||
return 'oil'
|
||||
if pn.startswith('WA'):
|
||||
return 'air'
|
||||
if pn.startswith('WP'):
|
||||
return 'cabin_air'
|
||||
if pn.startswith('WF'):
|
||||
return 'fuel'
|
||||
if re.match(r'^5[17]\d{3}$', pn):
|
||||
return 'oil'
|
||||
if re.match(r'^4[2-9]\d{3}$', pn):
|
||||
return 'air'
|
||||
if re.match(r'^24\d{3}$', pn):
|
||||
return 'cabin_air'
|
||||
if re.match(r'^33\d{3}$', pn):
|
||||
return 'fuel'
|
||||
if re.match(r'^58\d{3}$', pn):
|
||||
return 'transmission'
|
||||
return None
|
||||
|
||||
|
||||
def extract_parts_from_tokens(tokens):
|
||||
"""Extract all unique WIX part numbers from tokens."""
|
||||
parts = []
|
||||
seen = set()
|
||||
for token in tokens:
|
||||
pn = extract_wix_part(token)
|
||||
if pn and pn not in seen:
|
||||
ftype = classify_filter(pn)
|
||||
if ftype:
|
||||
parts.append((pn, ftype))
|
||||
seen.add(pn)
|
||||
return parts
|
||||
|
||||
|
||||
# --- Line classification ---
|
||||
|
||||
def is_footer_line(line):
|
||||
return any(m in line for m in FOOTER_MARKERS)
|
||||
|
||||
|
||||
def is_continuation(line):
|
||||
"""Check if line continues engine data (not a new model/brand/year)."""
|
||||
tokens = line.split()
|
||||
if not tokens:
|
||||
return False
|
||||
first = tokens[0]
|
||||
if first in ('Electric/Gas', 'Turbo', 'Diesel', 'Hybrid', 'O'):
|
||||
return True
|
||||
if first.startswith('N/'):
|
||||
return True
|
||||
if first.startswith('MT'):
|
||||
return True
|
||||
if re.match(r'^(WL|WA|WP|WF)\d', first):
|
||||
return True
|
||||
if re.match(r'^\d{5}', first):
|
||||
return True
|
||||
if first == '-':
|
||||
return True
|
||||
# Single/double digit + more tokens with part numbers
|
||||
if re.match(r'^\d{1,2}$', first) and len(tokens) > 1:
|
||||
for t in tokens[1:4]:
|
||||
if extract_wix_part(t):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# --- PDF parsing ---
|
||||
|
||||
def parse_wix_pdf(pdf_path):
|
||||
"""Parse WIX 2021 catalog pages 77-687."""
|
||||
pdf = pypdf.PdfReader(pdf_path)
|
||||
entries = []
|
||||
|
||||
current_brand = None
|
||||
current_year = None
|
||||
current_model = None
|
||||
current_tokens = []
|
||||
|
||||
def flush_engine():
|
||||
nonlocal current_tokens
|
||||
if current_brand and current_year and current_model and current_tokens:
|
||||
parts = extract_parts_from_tokens(current_tokens)
|
||||
if parts:
|
||||
entries.append({
|
||||
'brand': current_brand,
|
||||
'model': current_model,
|
||||
'year': current_year,
|
||||
'parts': parts,
|
||||
})
|
||||
current_tokens = []
|
||||
|
||||
total_pages = min(len(pdf.pages), 687)
|
||||
for page_num in range(76, total_pages):
|
||||
if (page_num - 76) % 50 == 0:
|
||||
print(f" Procesando página {page_num + 1}/{total_pages}...")
|
||||
|
||||
text = pdf.pages[page_num].extract_text()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
for line in text.split('\n'):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Skip footer lines
|
||||
if is_footer_line(line):
|
||||
continue
|
||||
|
||||
# Clean continuation markers
|
||||
clean = re.sub(r"\s*\(Cont'd/Suite\)\s*", '', line).strip()
|
||||
if not clean:
|
||||
continue
|
||||
|
||||
# Brand header
|
||||
upper_clean = clean.upper()
|
||||
if upper_clean in BRAND_HEADERS:
|
||||
flush_engine()
|
||||
current_brand = clean
|
||||
current_year = None
|
||||
current_model = None
|
||||
continue
|
||||
|
||||
# Year
|
||||
year_match = re.match(r'^(\d{4})$', clean)
|
||||
if year_match:
|
||||
y = int(year_match.group(1))
|
||||
if 1940 <= y <= 2025:
|
||||
flush_engine()
|
||||
current_year = y
|
||||
current_model = None
|
||||
continue
|
||||
|
||||
if not current_brand or not current_year:
|
||||
continue
|
||||
|
||||
# Engine line
|
||||
if ENGINE_RE.match(clean):
|
||||
flush_engine()
|
||||
current_tokens = clean.split()
|
||||
continue
|
||||
|
||||
# Continuation of engine data
|
||||
if current_tokens and is_continuation(clean):
|
||||
current_tokens.extend(clean.split())
|
||||
continue
|
||||
|
||||
# Model name (must contain alpha characters)
|
||||
if re.search(r'[A-Za-z]', clean):
|
||||
flush_engine()
|
||||
current_model = clean
|
||||
continue
|
||||
|
||||
flush_engine()
|
||||
return entries
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("IMPORTADOR - CATÁLOGO WIX 2021")
|
||||
print("=" * 70)
|
||||
|
||||
print(f"\n[1/6] Leyendo PDF: {PDF_PATH}")
|
||||
entries = parse_wix_pdf(PDF_PATH)
|
||||
print(f" Entradas parseadas: {len(entries)}")
|
||||
|
||||
unique_parts = {}
|
||||
for entry in entries:
|
||||
for pn, ftype in entry['parts']:
|
||||
if pn not in unique_parts:
|
||||
unique_parts[pn] = ftype
|
||||
|
||||
unique_brands = set(e['brand'] for e in entries)
|
||||
print(f" Partes únicas: {len(unique_parts)}")
|
||||
print(f" Marcas de vehículos: {len(unique_brands)}")
|
||||
|
||||
conn = get_db()
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("\n[2/6] Creando fabricante WIX...")
|
||||
wix_mfr_id = ensure_manufacturer(cursor, 'WIX', 'aftermarket', 'premium', 'USA')
|
||||
print(f" WIX manufacturer_id: {wix_mfr_id}")
|
||||
|
||||
print("\n[3/6] Creando partes de filtros...")
|
||||
group_ids = {}
|
||||
for ftype in FILTER_GROUPS:
|
||||
group_ids[ftype] = get_filter_group(cursor, ftype)
|
||||
group_ids[f"{ftype}_xp"] = group_ids[ftype]
|
||||
|
||||
part_ids = {}
|
||||
parts_created = 0
|
||||
for pn, ftype in sorted(unique_parts.items()):
|
||||
gid = group_ids.get(ftype)
|
||||
if not gid:
|
||||
continue
|
||||
name_en, name_es = TYPE_NAMES.get(ftype, ('Filter', 'Filtro'))
|
||||
part_id, created = get_or_create_part(
|
||||
cursor, pn, gid,
|
||||
f"{name_en} {pn}", f"{name_es} {pn}",
|
||||
f"WIX {name_en}")
|
||||
part_ids[pn] = part_id
|
||||
if created:
|
||||
parts_created += 1
|
||||
|
||||
print(f" Partes creadas: {parts_created}")
|
||||
print(f" Partes existentes: {len(unique_parts) - parts_created}")
|
||||
|
||||
print("\n[4/6] Creando vehículos y fitments...")
|
||||
vehicles_created = 0
|
||||
fitments_created = 0
|
||||
mye_cache = {}
|
||||
|
||||
for i, entry in enumerate(entries):
|
||||
if i % 5000 == 0 and i > 0:
|
||||
print(f" Procesando entrada {i}/{len(entries)}...")
|
||||
|
||||
cache_key = (entry['brand'].upper(), entry['model'].upper(), entry['year'])
|
||||
if cache_key not in mye_cache:
|
||||
brand_id = ensure_brand(cursor, entry['brand'])
|
||||
model_id = ensure_model(cursor, brand_id, entry['model'])
|
||||
year_id = ensure_year(cursor, entry['year'])
|
||||
|
||||
cursor.execute("""
|
||||
SELECT mye.id FROM model_year_engine mye
|
||||
JOIN models m ON mye.model_id = m.id
|
||||
JOIN brands b ON m.brand_id = b.id
|
||||
JOIN years y ON mye.year_id = y.id
|
||||
WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?) AND y.year = ?
|
||||
LIMIT 1
|
||||
""", (entry['brand'], entry['model'], entry['year']))
|
||||
existing = cursor.fetchone()
|
||||
|
||||
if existing:
|
||||
mye_cache[cache_key] = existing['id']
|
||||
else:
|
||||
mye_id = ensure_mye(cursor, model_id, year_id)
|
||||
mye_cache[cache_key] = mye_id
|
||||
vehicles_created += 1
|
||||
|
||||
mye_id = mye_cache[cache_key]
|
||||
|
||||
for pn, ftype in entry['parts']:
|
||||
part_id = part_ids.get(pn)
|
||||
if not part_id:
|
||||
continue
|
||||
|
||||
cursor.execute(
|
||||
"SELECT id FROM vehicle_parts WHERE model_year_engine_id = ? AND part_id = ?",
|
||||
(mye_id, part_id))
|
||||
if not cursor.fetchone():
|
||||
notes = f"Catálogo WIX 2021 - {ftype.replace('_', ' ').upper()}"
|
||||
cursor.execute(
|
||||
"INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes) VALUES (?, ?, 1, ?)",
|
||||
(mye_id, part_id, notes))
|
||||
fitments_created += 1
|
||||
|
||||
print(f" Vehículos creados: {vehicles_created}")
|
||||
print(f" Fitments creados: {fitments_created}")
|
||||
|
||||
print("\n[5/6] Creando referencias cruzadas...")
|
||||
xrefs_created = 0
|
||||
wix_part_id_set = set(part_ids.values())
|
||||
|
||||
for i, (pn, part_id) in enumerate(part_ids.items()):
|
||||
if i % 200 == 0 and i > 0:
|
||||
print(f" Procesando cross-ref {i}/{len(part_ids)}...")
|
||||
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT p2.id, p2.oem_part_number
|
||||
FROM vehicle_parts vp1
|
||||
JOIN vehicle_parts vp2 ON vp1.model_year_engine_id = vp2.model_year_engine_id
|
||||
JOIN parts p2 ON vp2.part_id = p2.id
|
||||
WHERE vp1.part_id = ?
|
||||
AND p2.id != ?
|
||||
AND p2.group_id = (SELECT group_id FROM parts WHERE id = ?)
|
||||
LIMIT 50
|
||||
""", (part_id, part_id, part_id))
|
||||
|
||||
for row in cursor.fetchall():
|
||||
if row['id'] in wix_part_id_set:
|
||||
continue
|
||||
|
||||
cursor.execute(
|
||||
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
|
||||
(part_id, row['oem_part_number']))
|
||||
if not cursor.fetchone():
|
||||
cursor.execute(
|
||||
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'WIX 2021 Catalog')",
|
||||
(part_id, row['oem_part_number']))
|
||||
xrefs_created += 1
|
||||
|
||||
cursor.execute(
|
||||
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
|
||||
(row['id'], pn))
|
||||
if not cursor.fetchone():
|
||||
cursor.execute(
|
||||
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'WIX 2021 Catalog')",
|
||||
(row['id'], pn))
|
||||
xrefs_created += 1
|
||||
|
||||
print(f" Cross-refs creadas: {xrefs_created}")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("IMPORTACIÓN WIX COMPLETADA")
|
||||
print("=" * 70)
|
||||
print(f"""
|
||||
RESUMEN:
|
||||
- Partes creadas: {parts_created:,}
|
||||
- Vehículos creados: {vehicles_created:,}
|
||||
- Fitments creados: {fitments_created:,}
|
||||
- Cross-refs creadas: {xrefs_created:,}
|
||||
""")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user