fix: performance improvements, shared UI, and cross-reference data quality

Backend (server.py):
- Fix N+1 query in /api/diagrams/<id>/parts with batch cross-ref query
- Add LIMIT safety nets to 15 endpoints (50-5000 per data type)
- Add pagination to /api/vehicles, /api/model-year-engine, /api/vehicles/<id>/parts, /api/admin/export
- Optimize search_vehicles() EXISTS subquery to JOIN
- Restrict static route to /static/* subdir (security fix)
- Add detailed=true support to /api/brands and /api/models

Frontend:
- Extract shared CSS into shared.css (variables, reset, buttons, forms, scrollbar)
- Create shared nav.js component (logo + navigation links, auto-highlights)
- Update all 4 HTML pages to use shared CSS and nav
- Update JS to handle paginated API responses

Data quality:
- Fix cross-reference source field: map 72K records from catalog names to actual brands
- Fix aftermarket_parts manufacturer_id: correct 8K records with wrong brand attribution
- Delete 98MB backup file, orphan records, and garbage cross-references
- Add import scripts for DAR, FRAM, WIX, MOOG, Cartek catalogs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-17 03:09:22 +00:00
parent 3ea2de61e2
commit 7ecf1295a5
17 changed files with 6605 additions and 848 deletions

View File

@@ -0,0 +1,125 @@
#!/usr/bin/env python3
"""
GENERADOR DE REFERENCIAS CRUZADAS ENTRE MARCAS
Encuentra partes de diferentes fabricantes que cubren los mismos vehículos
y crea referencias cruzadas bidireccionales entre ellas.
"""
import sqlite3
from pathlib import Path
from collections import defaultdict
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def main():
print("=" * 70)
print("GENERADOR DE REFERENCIAS CRUZADAS ENTRE MARCAS")
print("=" * 70)
conn = get_db()
cursor = conn.cursor()
# Get existing cross-ref count
cursor.execute("SELECT COUNT(*) FROM part_cross_references")
existing_xrefs = cursor.fetchone()[0]
print(f"\nCross-refs existentes: {existing_xrefs:,}")
# Step 1: For each part_group, find parts from different brands
# that fit the same vehicle (model_year_engine)
print("\n[1/3] Buscando partes que cubren los mismos vehículos...")
# Build a map: (group_id, mye_id) -> list of (part_id, part_number)
cursor.execute("""
SELECT vp.model_year_engine_id, vp.part_id, p.oem_part_number, p.group_id
FROM vehicle_parts vp
JOIN parts p ON vp.part_id = p.id
WHERE p.group_id IS NOT NULL
ORDER BY p.group_id, vp.model_year_engine_id
""")
group_mye_parts = defaultdict(set)
for row in cursor.fetchall():
key = (row['group_id'], row['model_year_engine_id'])
group_mye_parts[key].add((row['part_id'], row['oem_part_number']))
print(f" Combinaciones grupo+vehículo: {len(group_mye_parts):,}")
# Step 2: For each (group, vehicle) with multiple parts from different brands,
# create cross-references
print("\n[2/3] Generando pares de cross-reference...")
# Build existing cross-ref set for fast lookup
cursor.execute("SELECT part_id, cross_reference_number FROM part_cross_references")
existing = set()
for row in cursor.fetchall():
existing.add((row['part_id'], row['cross_reference_number']))
print(f" Cross-refs existentes en set: {len(existing):,}")
# Collect new cross-reference pairs
new_xrefs = []
for key, parts_set in group_mye_parts.items():
if len(parts_set) < 2:
continue
parts_list = list(parts_set)
for i in range(len(parts_list)):
pid_a, pn_a = parts_list[i]
for j in range(i + 1, len(parts_list)):
pid_b, pn_b = parts_list[j]
# Skip if same part number prefix (same brand)
if pn_a[:3] == pn_b[:3]:
continue
# Add A->B
if (pid_a, pn_b) not in existing:
new_xrefs.append((pid_a, pn_b))
existing.add((pid_a, pn_b))
# Add B->A
if (pid_b, pn_a) not in existing:
new_xrefs.append((pid_b, pn_a))
existing.add((pid_b, pn_a))
print(f" Nuevas cross-refs a crear: {len(new_xrefs):,}")
# Step 3: Insert
print("\n[3/3] Insertando cross-references...")
inserted = 0
for i, (part_id, xref_number) in enumerate(new_xrefs):
if i % 5000 == 0 and i > 0:
print(f" Insertando {i}/{len(new_xrefs)}...")
cursor.execute(
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'Vehicle Fitment Match')",
(part_id, xref_number))
inserted += 1
conn.commit()
# Final stats
cursor.execute("SELECT COUNT(*) FROM part_cross_references")
total_xrefs = cursor.fetchone()[0]
conn.close()
print("\n" + "=" * 70)
print("CROSS-REFERENCES COMPLETADAS")
print("=" * 70)
print(f"""
RESUMEN:
- Cross-refs antes: {existing_xrefs:,}
- Nuevas cross-refs: {inserted:,}
- Total cross-refs: {total_xrefs:,}
""")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,179 @@
#!/usr/bin/env python3
"""
EXTRACTOR DE IMÁGENES DE DIAGRAMAS MOOG
Extrae las ilustraciones de suspensión/dirección de los PDFs MOOG
y las guarda como archivos de imagen mapeados a sus figure codes.
"""
import re
import sys
import io
import hashlib
from pathlib import Path
import pypdf
OUTPUT_DIR = Path(__file__).parent.parent.parent / 'dashboard' / 'static' / 'diagrams' / 'moog'
VOLUMES = {
'1': {
'path': '/tmp/catalogs/suspension/moog_vol1_1989back.pdf',
'start_page': 3,
'end_page': 1037,
'label': 'Vol 1 (≤1989)',
},
'2': {
'path': '/tmp/catalogs/suspension/moog_vol2_1990_2005.pdf',
'start_page': 6,
'end_page': 1641,
'label': 'Vol 2 (1990-2005)',
},
'3': {
'path': '/tmp/catalogs/suspension/moog_vol3_2006up.pdf',
'start_page': 7,
'end_page': 1089,
'label': 'Vol 3 (2006+)',
},
}
FIGURE_RE = re.compile(r'\b([FSR]\d{3})\b')
def extract_figure_codes(text):
"""Extract ordered unique figure codes from page text."""
codes = []
seen = set()
for m in FIGURE_RE.finditer(text):
code = m.group(1)
if code not in seen:
codes.append(code)
seen.add(code)
return codes
def extract_volume(vol_key, already_extracted):
"""Extract diagram images from one MOOG volume."""
vol = VOLUMES[vol_key]
print(f"\n--- Procesando {vol['label']} ---")
print(f" PDF: {vol['path']}")
pdf = pypdf.PdfReader(vol['path'])
total_pages = len(pdf.pages)
end_page = min(vol['end_page'], total_pages - 1)
extracted = 0
skipped = 0
errors = 0
for page_idx in range(vol['start_page'], end_page + 1):
if page_idx % 100 == 0:
print(f" Página {page_idx}/{end_page}... (extraídas: {extracted})")
try:
page = pdf.pages[page_idx]
text = page.extract_text() or ''
# Get figure codes from this page
fig_codes = extract_figure_codes(text)
if not fig_codes:
continue
# Filter out already-extracted codes
needed_codes = [c for c in fig_codes if c not in already_extracted]
if not needed_codes:
skipped += len(fig_codes)
continue
# Extract images from page
images = []
try:
for img_key in page.images:
img_data = img_key.data
# Filter by size - diagram images are >10KB typically
if len(img_data) > 5000:
images.append(img_data)
except Exception:
# Fallback: try to extract from xobjects directly
try:
if '/XObject' in page['/Resources']:
xobjects = page['/Resources']['/XObject'].get_object()
for obj_name in sorted(xobjects.keys()):
xobj = xobjects[obj_name].get_object()
if xobj.get('/Subtype') == '/Image':
w = int(xobj.get('/Width', 0))
h = int(xobj.get('/Height', 0))
if w > 200 and h > 100:
try:
img_data = xobj.get_data()
if len(img_data) > 5000:
images.append(img_data)
except Exception:
pass
except Exception:
pass
if not images:
continue
# Match figure codes to images
# Strategy: if same number of large images and figure codes, match 1:1 in order
# If fewer images than codes, some codes share images (use first available)
# If more images than codes, filter further by size
for i, code in enumerate(needed_codes):
if i < len(images):
img_data = images[i]
# Determine file extension from magic bytes
ext = 'jpg'
if img_data[:4] == b'\x89PNG':
ext = 'png'
elif img_data[:4] == b'\x00\x00\x00\x0c':
ext = 'jp2'
out_path = OUTPUT_DIR / f"{code}.{ext}"
out_path.write_bytes(img_data)
already_extracted.add(code)
extracted += 1
except Exception as e:
errors += 1
if errors <= 5:
print(f" Error en página {page_idx}: {e}")
print(f" Resultado: {extracted} extraídas, {skipped} ya existentes, {errors} errores")
return extracted
def main():
volumes = sys.argv[1:] if len(sys.argv) > 1 else ['3', '2', '1']
print("=" * 70)
print("EXTRACTOR DE DIAGRAMAS MOOG")
print("=" * 70)
# Create output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
print(f"Directorio de salida: {OUTPUT_DIR}")
# Check what's already extracted
already_extracted = set()
for f in OUTPUT_DIR.iterdir():
if f.suffix in ('.jpg', '.png', '.jp2'):
already_extracted.add(f.stem)
print(f"Ya extraídas: {len(already_extracted)}")
total = 0
for vol_key in volumes:
if vol_key not in VOLUMES:
print(f"Volumen {vol_key} no reconocido, saltando...")
continue
count = extract_volume(vol_key, already_extracted)
total += count
print(f"\n{'=' * 70}")
print(f"EXTRACCIÓN COMPLETADA: {total} nuevas imágenes")
print(f"Total en directorio: {len(list(OUTPUT_DIR.iterdir()))}")
print(f"{'=' * 70}")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,362 @@
#!/usr/bin/env python3
"""
IMPORTADOR DEL CATÁLOGO CARTEK - FILTROS DE ACEITE
Formato: Brand → Model | YearFrom | YearTo | CTK#### | Observations
Solo aceite. PDF: /tmp/catalogs/cartek_aceite.pdf
"""
import sqlite3
import re
import pypdf
from pathlib import Path
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
PDF_PATH = '/tmp/catalogs/cartek_aceite.pdf'
# Known brand headers in the Cartek catalog
BRAND_HEADERS = {
'ACURA', 'ALFA ROMEO', 'AM GENERAL', 'AMERICAN MOTORS', 'ASTON MARTIN',
'ASUNA', 'AUDI', 'AUSTIN', 'AUSTIN HEALEY', 'AVANTI', 'BAIC', 'BENTLEY',
'BERTONE', 'BMW', 'BRICKLIN', 'BUICK', 'CADILLAC', 'CHECKER', 'CHEVROLET',
'CHRYSLER', 'DAEWOO', 'DAIHATSU', 'DATSUN', 'DELOREAN', 'DESOTO',
'DETOMASO', 'DODGE', 'EAGLE', 'EDSEL', 'EXCALIBUR', 'FAW', 'FIAT', 'FORD',
'FREIGHTLINER', 'GEO', 'GMC', 'HILLMAN', 'HONDA', 'HUMMER', 'HYUNDAI',
'IC CORPORATION', 'INFINITI', 'INTERNATIONAL', 'ISUZU', 'JAC', 'JAGUAR',
'JEEP', 'JENSEN', 'KARMA', 'KIA', 'KUBOTA', 'LAFORZA', 'LAND ROVER',
'LEXUS', 'LINCOLN', 'LOTUS', 'MACK', 'MAZDA', 'MERCEDES-BENZ', 'MERCURY',
'MERKUR', 'MINI', 'MITSUBISHI', 'MORGAN', 'NISSAN', 'NSU', 'OLDSMOBILE',
'OPEL', 'OSHKOSH MOTOR TRUCK CO.', 'PETERBILT', 'PEUGEOT', 'PLYMOUTH',
'POLARIS', 'PONTIAC', 'PORSCHE', 'QVALE', 'RAM', 'RENAULT', 'ROLLS ROYCE',
'SAAB', 'SATURN', 'SCION', 'SEAT', 'SHELBY', 'SMART', 'SRT',
'STERLING TRUCK', 'STUDEBAKER', 'SUBARU', 'SUNBEAM', 'SUZUKI', 'TOYOTA',
'TRIUMPH', 'VAM', 'VOLKSWAGEN', 'VOLVO', 'VPG', 'WORKHORSE',
'WORKHORSE CUSTOM CHASSIS', 'YAMAHA', 'YUGO',
}
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def ensure_manufacturer(cursor, name, type_='aftermarket', quality='standard', country=None):
cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute(
"INSERT INTO manufacturers (name, type, quality_tier, country) VALUES (?, ?, ?, ?)",
(name, type_, quality, country))
return cursor.lastrowid
def ensure_brand(cursor, name):
cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,))
return cursor.lastrowid
def ensure_model(cursor, brand_id, name):
cursor.execute(
"SELECT id FROM models WHERE brand_id = ? AND UPPER(name) = UPPER(?)",
(brand_id, name))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name))
return cursor.lastrowid
def ensure_year(cursor, year):
cursor.execute("SELECT id FROM years WHERE year = ?", (year,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO years (year) VALUES (?)", (year,))
return cursor.lastrowid
def get_generic_engine(cursor):
"""Get or create a generic engine for catalogs without engine data."""
cursor.execute("SELECT id FROM engines WHERE name = 'Generic'")
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO engines (name, fuel_type) VALUES ('Generic', 'gasoline')")
return cursor.lastrowid
def ensure_mye(cursor, model_id, year_id, engine_id=None):
if engine_id:
cursor.execute(
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ? AND engine_id = ?",
(model_id, year_id, engine_id))
else:
cursor.execute(
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ?",
(model_id, year_id))
row = cursor.fetchone()
if row:
return row['id']
if not engine_id:
engine_id = get_generic_engine(cursor)
cursor.execute(
"INSERT INTO model_year_engine (model_id, year_id, engine_id) VALUES (?, ?, ?)",
(model_id, year_id, engine_id))
return cursor.lastrowid
def get_or_create_part(cursor, part_number, group_id, name, name_es, description):
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_number,))
row = cursor.fetchone()
if row:
return row['id'], False
cursor.execute(
"INSERT INTO parts (oem_part_number, name, name_es, group_id, description) VALUES (?, ?, ?, ?, ?)",
(part_number, name, name_es, group_id, description))
return cursor.lastrowid, True
def get_oil_filter_group(cursor):
cursor.execute(
"SELECT id FROM part_groups WHERE name = 'Oil Filters' LIMIT 1")
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("SELECT id FROM part_categories WHERE name = 'Engine' LIMIT 1")
cat = cursor.fetchone()
if not cat:
return None
cursor.execute(
"INSERT INTO part_groups (category_id, name, name_es) VALUES (?, 'Oil Filters', 'Filtros de Aceite')",
(cat['id'],))
return cursor.lastrowid
def parse_cartek_pdf(pdf_path):
"""Parse the Cartek oil filter catalog PDF."""
pdf = pypdf.PdfReader(pdf_path)
entries = []
current_brand = None
for page_num in range(4, len(pdf.pages)): # Skip cover/index pages
text = pdf.pages[page_num].extract_text()
if not text:
continue
lines = text.split('\n')
pending_model = None
for line in lines:
line = line.strip()
if not line:
continue
# Skip header/footer lines
if 'Marca/Modelo' in line or 'Observaciones' in line:
continue
# Skip page numbers
if re.match(r'^\d{1,3}$', line):
continue
# Check for brand header
if line in BRAND_HEADERS:
current_brand = line
pending_model = None
continue
if not current_brand:
continue
# Try to parse data line: Model YearFrom YearTo CTK#### Observations
match = re.match(
r'^(.+?)\s+(\d{4})\s+(\d{4})\s+(CTK\w+)\s+(.*)$', line)
if match:
model = match.group(1).strip()
if pending_model:
model = f"{pending_model} {model}"
pending_model = None
year_from = int(match.group(2))
year_to = int(match.group(3))
part_number = match.group(4).strip()
observations = match.group(5).strip()
for year in range(year_from, year_to + 1):
entries.append({
'brand': current_brand,
'model': model,
'year': year,
'part_number': part_number,
'observations': observations,
})
else:
# Check if this is a continuation model name (e.g., "Avalanche")
# followed by a sub-model on the next line
if not re.match(r'^\d', line) and not line.startswith('CTK'):
# Could be a model name prefix (like "Avalanche" before "1500")
# or a sub-brand header we don't recognize
pending_model = line
else:
pending_model = None
return entries
def main():
print("=" * 70)
print("IMPORTADOR - CATÁLOGO CARTEK FILTROS DE ACEITE")
print("=" * 70)
print(f"\n[1/5] Leyendo PDF: {PDF_PATH}")
entries = parse_cartek_pdf(PDF_PATH)
print(f" Entradas parseadas: {len(entries)}")
# Get unique parts and brands
unique_parts = set(e['part_number'] for e in entries)
unique_brands = set(e['brand'] for e in entries)
print(f" Partes únicas: {len(unique_parts)}")
print(f" Marcas de vehículos: {len(unique_brands)}")
conn = get_db()
cursor = conn.cursor()
# Create Cartek manufacturer
print("\n[2/5] Creando fabricante Cartek...")
cartek_mfr_id = ensure_manufacturer(cursor, 'Cartek', 'aftermarket', 'standard', 'Mexico')
print(f" Cartek manufacturer_id: {cartek_mfr_id}")
# Get oil filter group
oil_group_id = get_oil_filter_group(cursor)
print(f" Oil Filters group_id: {oil_group_id}")
# Create parts
print("\n[3/5] Creando partes de filtros...")
part_ids = {}
parts_created = 0
for pn in sorted(unique_parts):
name = f"Oil Filter {pn}"
name_es = f"Filtro de Aceite {pn}"
part_id, created = get_or_create_part(
cursor, pn, oil_group_id, name, name_es, "Cartek Oil Filter")
part_ids[pn] = part_id
if created:
parts_created += 1
print(f" Partes creadas: {parts_created}")
print(f" Partes existentes: {len(unique_parts) - parts_created}")
# Create vehicles and fitments
print("\n[4/5] Creando vehículos y fitments...")
vehicles_created = 0
fitments_created = 0
mye_cache = {}
for entry in entries:
cache_key = (entry['brand'], entry['model'], entry['year'])
if cache_key not in mye_cache:
brand_id = ensure_brand(cursor, entry['brand'])
model_id = ensure_model(cursor, brand_id, entry['model'])
year_id = ensure_year(cursor, entry['year'])
# Try to find existing MYE (any engine)
cursor.execute(
"""SELECT mye.id FROM model_year_engine mye
JOIN models m ON mye.model_id = m.id
JOIN brands b ON m.brand_id = b.id
JOIN years y ON mye.year_id = y.id
WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?) AND y.year = ?
LIMIT 1""",
(entry['brand'], entry['model'], entry['year']))
existing = cursor.fetchone()
if existing:
mye_cache[cache_key] = existing['id']
else:
mye_id = ensure_mye(cursor, model_id, year_id)
mye_cache[cache_key] = mye_id
vehicles_created += 1
mye_id = mye_cache[cache_key]
part_id = part_ids.get(entry['part_number'])
if not part_id:
continue
# Check if fitment exists
cursor.execute(
"SELECT id FROM vehicle_parts WHERE model_year_engine_id = ? AND part_id = ?",
(mye_id, part_id))
if not cursor.fetchone():
notes = f"Catálogo Cartek - ACEITE"
if entry['observations'] and entry['observations'] != '-':
notes += f" ({entry['observations']})"
cursor.execute(
"INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes) VALUES (?, ?, 1, ?)",
(mye_id, part_id, notes))
fitments_created += 1
print(f" Vehículos creados: {vehicles_created}")
print(f" Fitments creados: {fitments_created}")
# Create cross-references by matching Cartek parts to existing parts (Gonher, etc.)
# that fit the same vehicle
print("\n[5/5] Creando referencias cruzadas...")
xrefs_created = 0
for pn, part_id in part_ids.items():
# Find other parts in the same group that fit the same vehicles
cursor.execute("""
SELECT DISTINCT p2.id, p2.oem_part_number
FROM vehicle_parts vp1
JOIN vehicle_parts vp2 ON vp1.model_year_engine_id = vp2.model_year_engine_id
JOIN parts p2 ON vp2.part_id = p2.id
WHERE vp1.part_id = ?
AND p2.id != ?
AND p2.group_id = (SELECT group_id FROM parts WHERE id = ?)
AND p2.oem_part_number NOT LIKE 'CTK%'
LIMIT 20
""", (part_id, part_id, part_id))
for row in cursor.fetchall():
# Add cross-ref from Cartek to other brand
cursor.execute(
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
(part_id, row['oem_part_number']))
if not cursor.fetchone():
cursor.execute(
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'Cartek Catalog')",
(part_id, row['oem_part_number']))
xrefs_created += 1
# Add reverse cross-ref
cursor.execute(
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
(row['id'], pn))
if not cursor.fetchone():
cursor.execute(
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'Cartek Catalog')",
(row['id'], pn))
xrefs_created += 1
print(f" Cross-refs creadas: {xrefs_created}")
conn.commit()
conn.close()
print("\n" + "=" * 70)
print("IMPORTACIÓN CARTEK COMPLETADA")
print("=" * 70)
print(f"""
RESUMEN:
- Partes creadas: {parts_created:,}
- Vehículos creados: {vehicles_created:,}
- Fitments creados: {fitments_created:,}
- Cross-refs creadas: {xrefs_created:,}
""")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,680 @@
#!/usr/bin/env python3
"""
IMPORTADOR DEL CATÁLOGO DAR "LÍNEA AZUL" 2020
Formato: Brand → Model → AÑO DESCRIPCIÓN SKU #PÁG
Pages 27-571 contain vehicle application data.
PDF: /tmp/catalogs/suspension/catalogo_azul_2020.pdf
"""
import sqlite3
import re
import pypdf
from pathlib import Path
from collections import defaultdict
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
PDF_PATH = '/tmp/catalogs/suspension/catalogo_azul_2020.pdf'
# Page range (0-indexed) for vehicle application data
START_PAGE = 27
END_PAGE = 571
# Known brand headers in the DAR catalog
DAR_BRANDS = {
'ACURA', 'ALFA ROMEO', 'AUDI', 'BMW', 'BUICK', 'CADILLAC',
'CHEVROLET, GMC', 'CHRYSLER', 'DATSUN', 'DODGE', 'EAGLE',
'FIAT', 'FORD, MERCURY', 'GEO', 'HONDA', 'HUMMER', 'HYUNDAI',
'INFINITI', 'ISUZU', 'JAGUAR', 'JEEP', 'KIA',
'LAND ROVER', 'LEXUS', 'LINCOLN', 'MAZDA', 'MERCEDES-BENZ',
'MERKUR', 'MINI', 'MITSUBISHI', 'NISSAN', 'OLDSMOBILE',
'OPEL', 'PEUGEOT', 'PLYMOUTH', 'PONTIAC', 'PORSCHE',
'RAM', 'RENAULT', 'SAAB', 'SATURN', 'SCION', 'SEAT', 'SMART',
'SUBARU', 'SUZUKI', 'TOYOTA', 'TRIUMPH', 'VOLKSWAGEN',
'VOLVO', 'VOLVO/MASA',
}
# Year range regex: 2-digit or 4-digit years, or TODOS
YEAR_RE = re.compile(r'^(\d{2,4})\s*-\s*(\d{2,4})\b')
YEAR_SINGLE_RE = re.compile(r'^(\d{2,4})\b')
TODOS_RE = re.compile(r'^TODOS\b', re.IGNORECASE)
# Line ending with SKU + page ref: ...SKU_TOKEN 3-4_DIGIT_PAGEREF
ENTRY_END_RE = re.compile(r'^(.+?)\s+(\S+)\s+(\d{3,4})\s*$')
# Skip patterns
SKIP_PATTERNS = [
'Línea Azul',
'CATALOGO AZUL',
'AÑO DESCRIPCIÓN SKU #PÁG',
'AÑO DESCRIPCIÓN SKU',
'.indb',
]
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def ensure_manufacturer(cursor, name, type_='aftermarket', quality='standard', country=None):
cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute(
"INSERT INTO manufacturers (name, type, quality_tier, country) VALUES (?, ?, ?, ?)",
(name, type_, quality, country))
return cursor.lastrowid
def ensure_brand(cursor, name):
cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,))
return cursor.lastrowid
def ensure_model(cursor, brand_id, name):
cursor.execute(
"SELECT id FROM models WHERE brand_id = ? AND UPPER(name) = UPPER(?)",
(brand_id, name))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name))
return cursor.lastrowid
def ensure_year(cursor, year):
cursor.execute("SELECT id FROM years WHERE year = ?", (year,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO years (year) VALUES (?)", (year,))
return cursor.lastrowid
def get_generic_engine(cursor):
cursor.execute("SELECT id FROM engines WHERE name = 'Generic'")
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO engines (name, fuel_type) VALUES ('Generic', 'gasoline')")
return cursor.lastrowid
def ensure_mye(cursor, model_id, year_id, engine_id=None):
if engine_id:
cursor.execute(
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ? AND engine_id = ?",
(model_id, year_id, engine_id))
else:
cursor.execute(
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ?",
(model_id, year_id))
row = cursor.fetchone()
if row:
return row['id']
if not engine_id:
engine_id = get_generic_engine(cursor)
cursor.execute(
"INSERT INTO model_year_engine (model_id, year_id, engine_id) VALUES (?, ?, ?)",
(model_id, year_id, engine_id))
return cursor.lastrowid
def get_or_create_part(cursor, part_number, group_id, name, name_es, description):
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_number,))
row = cursor.fetchone()
if row:
return row['id'], False
cursor.execute(
"INSERT INTO parts (oem_part_number, name, name_es, group_id, description) VALUES (?, ?, ?, ?, ?)",
(part_number, name, name_es, group_id, description))
return cursor.lastrowid, True
# --- Group ID lookup cache ---
_group_cache = {}
def get_group_id(cursor, name_en):
if name_en not in _group_cache:
cursor.execute("SELECT id FROM part_groups WHERE name = ?", (name_en,))
row = cursor.fetchone()
_group_cache[name_en] = row['id'] if row else None
return _group_cache[name_en]
def classify_description(cursor, desc):
"""Map DAR description text to a DB group_id."""
d = desc.upper()
# Amortiguadores (Shocks)
if 'AMORTIGUADOR' in d and 'BASE' not in d:
if 'CAJUELA' in d or 'COFRE' in d or 'VIDRIO' in d:
return get_group_id(cursor, 'Struts') # trunk/hood/glass struts
if 'DIRECCIÓN' in d or 'DIRECCION' in d:
return get_group_id(cursor, 'Steering Dampers')
return get_group_id(cursor, 'Shocks')
# Base amortiguador (Strut Mounts)
if 'BASE AMORTIGUADOR' in d:
return get_group_id(cursor, 'Strut Mounts')
# Balero (Bearings)
if 'BALERO' in d:
return get_group_id(cursor, 'Wheel Bearings')
# Maza (Wheel Hubs)
if 'MAZA' in d:
return get_group_id(cursor, 'Wheel Hubs')
# Soporte de Motor / Transmisión (Mounts)
if 'SOPORTE DE MOTOR' in d or 'SOPORTE MOTOR' in d:
return get_group_id(cursor, 'Engine Mounts')
if 'SOPORTE DE TRANSMIS' in d or 'SOPORTE TRANSMIS' in d:
return get_group_id(cursor, 'Transmission Mounts')
if 'SOPORTE' in d and 'AMORTIGUADOR' in d:
return get_group_id(cursor, 'Strut Mounts')
if 'SOPORTE BRAZO' in d:
return get_group_id(cursor, 'Idler Arms')
# Rotula (Ball Joint)
if 'RÓTULA' in d or 'ROTULA' in d:
return get_group_id(cursor, 'Ball Joints')
# Terminal exterior / dirección (Tie Rod Ends)
if 'TERMINAL EXTERIOR' in d or 'TERMINAL DIREC' in d:
return get_group_id(cursor, 'Tie Rod Ends')
# Terminal interior (Inner Tie Rods)
if 'TERMINAL INTERIOR' in d:
return get_group_id(cursor, 'Inner Tie Rods')
# Horquilla (Control Arms)
if 'HORQUILLA' in d:
return get_group_id(cursor, 'Control Arms')
# Buje de varilla estabilizadora
if 'GOMA' in d and 'ESTABILIZADORA' in d:
return get_group_id(cursor, 'Sway Bar Bushings')
if 'BUJE' in d and 'ESTABILIZADORA' in d:
return get_group_id(cursor, 'Sway Bar Bushings')
# Tornillo estabilizador (Sway Bar Links)
if 'TORNILLO ESTABILIZADOR' in d:
return get_group_id(cursor, 'Sway Bar Links')
# Buje (Bushings)
if 'BUJE' in d:
return get_group_id(cursor, 'Bushings')
# Resorte (Springs)
if 'RESORTE' in d:
return get_group_id(cursor, 'Coil Springs')
# Brazo auxiliar (Idler Arm)
if 'BRAZO AUXILIAR' in d:
return get_group_id(cursor, 'Idler Arms')
# Brazo Pitman
if 'BRAZO PITMAN' in d or 'PITMAN' in d:
return get_group_id(cursor, 'Pitman Arms')
# Varilla / Barra central (Center Links)
if 'BARRA CENTRAL' in d or 'VARILLA CENTRAL' in d:
return get_group_id(cursor, 'Center Links')
# Varilla lateral / Barra de arrastre (Drag Links)
if 'VARILLA' in d:
return get_group_id(cursor, 'Drag Links')
# Cremallera (Steering Rack)
if 'CREMALLERA' in d:
return get_group_id(cursor, 'Steering Racks')
# Bomba dirección (Power Steering Pump)
if 'BOMBA DIREC' in d:
return get_group_id(cursor, 'Power Steering Pumps')
# Cople dirección (Steering Gearbox / Coupling)
if 'COPLE DIREC' in d:
return get_group_id(cursor, 'Steering Gearboxes')
# Flector dirección
if 'FLECTOR' in d:
return get_group_id(cursor, 'Steering Gearboxes')
# Nudo dirección (Steering Knuckle)
if 'NUDO DIREC' in d:
return get_group_id(cursor, 'Steering Knuckles')
# Excéntrico (Camber/Caster)
if 'EXCÉNTRICO' in d or 'EXCENTRICO' in d or 'CAMBER' in d:
return get_group_id(cursor, 'Camber/Caster Kits')
# Junta CV
if 'JUNTA' in d and ('RUEDA' in d or 'CAJA' in d):
return get_group_id(cursor, 'CV Joints')
# Macheta / Flecha
if 'MACHETA' in d or 'FLECHA' in d:
return get_group_id(cursor, 'CV Axles')
# Tirante (Trailing Arm)
if 'TIRANTE' in d:
return get_group_id(cursor, 'Trailing Arms')
# Barra horquilla / Barra torsión
if 'BARRA' in d and 'TORSIÓN' in d:
return get_group_id(cursor, 'Torsion Bars')
if 'BARRA' in d and 'HORQUILLA' in d:
return get_group_id(cursor, 'Control Arms')
# Default: Ball Joints
return get_group_id(cursor, 'Ball Joints')
# --- Part type name from description ---
def part_names_from_desc(desc, sku):
"""Generate English and Spanish names from DAR description."""
name_es = f"{desc} {sku}"
# Simplified English name
name_en = desc
for es, en in [
('AMORTIGUADOR DELANTERO', 'Front Shock'),
('AMORTIGUADOR TRASERO', 'Rear Shock'),
('AMORTIGUADOR', 'Shock Absorber'),
('BASE AMORTIGUADOR', 'Strut Mount'),
('BALERO DOBLE', 'Double Bearing'),
('BALERO CONICO', 'Tapered Bearing'),
('BALERO', 'Wheel Bearing'),
('BOMBA DIREC', 'Power Steering Pump'),
('BRAZO AUXILIAR', 'Idler Arm'),
('BRAZO PITMAN', 'Pitman Arm'),
('BUJE', 'Bushing'),
('CREMALLERA', 'Steering Rack'),
('COPLE DIREC', 'Steering Coupler'),
('FLECTOR', 'Steering Flex Disc'),
('GOMA VARILLA ESTABILIZADORA', 'Sway Bar Bushing'),
('HORQUILLA INFERIOR', 'Lower Control Arm'),
('HORQUILLA SUPERIOR', 'Upper Control Arm'),
('HORQUILLA', 'Control Arm'),
('MAZA DELANTERA', 'Front Wheel Hub'),
('MAZA TRASERA', 'Rear Wheel Hub'),
('MAZA', 'Wheel Hub'),
('RESORTE DELANTERO', 'Front Coil Spring'),
('RESORTE TRASERO', 'Rear Coil Spring'),
('RESORTE', 'Coil Spring'),
('RÓTULA INFERIOR', 'Lower Ball Joint'),
('RÓTULA SUPERIOR', 'Upper Ball Joint'),
('ROTULA INFERIOR', 'Lower Ball Joint'),
('ROTULA SUPERIOR', 'Upper Ball Joint'),
('RÓTULA', 'Ball Joint'),
('ROTULA', 'Ball Joint'),
('SOPORTE DE MOTOR', 'Engine Mount'),
('SOPORTE DE TRANSMIS', 'Transmission Mount'),
('TERMINAL EXTERIOR', 'Outer Tie Rod End'),
('TERMINAL INTERIOR', 'Inner Tie Rod'),
('TERMINAL DIREC', 'Tie Rod End'),
('TIRANTE', 'Trailing Arm'),
('TORNILLO ESTABILIZADOR', 'Sway Bar Link'),
('VARILLA', 'Drag Link'),
('EXCÉNTRICO', 'Camber Kit'),
]:
if es in desc.upper():
name_en = f"{en} {sku}"
break
else:
name_en = f"{desc} {sku}"
return name_en, name_es
def convert_year(yy):
"""Convert 2-digit year to 4-digit. 00-30 → 2000-2030, 31-99 → 1931-1999."""
y = int(yy)
if y >= 100:
return y # already 4-digit
if y <= 30:
return 2000 + y
return 1900 + y
def is_skip_line(line):
for pat in SKIP_PATTERNS:
if pat in line:
return True
# Pure page numbers
if re.match(r'^\d{1,3}$', line.strip()):
return True
return False
def is_brand_line(line):
"""Check if line is a brand header."""
stripped = line.strip()
if stripped in DAR_BRANDS:
return True
# Some brands have extra whitespace or minor variations
for b in DAR_BRANDS:
if stripped.upper() == b:
return True
return False
def parse_dar_pdf(pdf_path):
"""Parse the DAR Catalogo Azul vehicle application pages."""
pdf = pypdf.PdfReader(pdf_path)
entries = []
current_brands = [] # List because some pages have "CHEVROLET, GMC"
current_model = None
# Accumulator for multi-line entries
entry_year_from = None
entry_year_to = None
entry_lines = []
def flush_entry():
nonlocal entry_year_from, entry_year_to, entry_lines
if not entry_lines or entry_year_from is None:
entry_lines = []
entry_year_from = None
entry_year_to = None
return
# Join accumulated lines
full_text = ' '.join(entry_lines)
# Try to extract SKU and page ref from the end
m = ENTRY_END_RE.match(full_text)
if m:
desc_text = m.group(1).strip()
sku = m.group(2).strip()
# page_ref = m.group(3) # not used for import
if sku and desc_text and current_model:
for brand_name in current_brands:
for year in range(entry_year_from, entry_year_to + 1):
entries.append({
'brand': brand_name,
'model': current_model,
'year': year,
'description': desc_text,
'sku': sku,
})
entry_lines = []
entry_year_from = None
entry_year_to = None
for page_num in range(START_PAGE, min(END_PAGE + 1, len(pdf.pages))):
text = pdf.pages[page_num].extract_text()
if not text:
continue
lines = text.split('\n')
for line in lines:
line = line.strip()
if not line:
continue
if is_skip_line(line):
continue
# Check for brand header
if is_brand_line(line):
flush_entry()
# Split combined brands like "CHEVROLET, GMC"
current_brands = [b.strip() for b in line.split(',')]
current_model = None
continue
# Check for model line
# A model line is: not starting with a digit, not a data entry,
# not a brand, and we already have a brand
if not current_brands:
continue
# Check if this line starts with a year range
m_year = YEAR_RE.match(line)
m_single = YEAR_SINGLE_RE.match(line) if not m_year else None
m_todos = TODOS_RE.match(line)
if m_year or m_todos:
# Flush previous entry
flush_entry()
if m_todos:
# "TODOS" = all years, use a reasonable range
entry_year_from = 1960
entry_year_to = 2020
rest = line[m_todos.end():].strip()
else:
y1 = convert_year(m_year.group(1))
y2 = convert_year(m_year.group(2))
entry_year_from = min(y1, y2)
entry_year_to = max(y1, y2)
rest = line[m_year.end():].strip()
if rest:
entry_lines.append(rest)
continue
# If we're accumulating an entry, add continuation line
if entry_year_from is not None:
entry_lines.append(line)
continue
# Check if it's a single year + data (rare)
if m_single and len(line) > 4:
y_val = int(m_single.group(1))
# Only treat as year if it's a plausible 2-digit year (not a 4+ digit number)
if y_val < 100 and len(m_single.group(1)) == 2:
flush_entry()
entry_year_from = convert_year(m_single.group(1))
entry_year_to = entry_year_from
rest = line[m_single.end():].strip()
if rest:
entry_lines.append(rest)
continue
# If we get here, it's likely a model name
# Strip "(cont)" suffix
model_name = re.sub(r'\s*\(cont\)\s*$', '', line, flags=re.IGNORECASE).strip()
if model_name and not model_name.startswith('AÑO') and len(model_name) > 1:
flush_entry()
current_model = model_name
# Flush last entry
flush_entry()
return entries
def main():
print("=" * 70)
print("IMPORTADOR - CATÁLOGO DAR 'LÍNEA AZUL' 2020")
print("=" * 70)
print(f"\n[1/5] Leyendo PDF: {PDF_PATH}")
entries = parse_dar_pdf(PDF_PATH)
print(f" Entradas parseadas: {len(entries):,}")
unique_skus = set(e['sku'] for e in entries)
unique_brands = set(e['brand'] for e in entries)
unique_models = set((e['brand'], e['model']) for e in entries)
print(f" SKUs únicos: {len(unique_skus):,}")
print(f" Marcas de vehículos: {len(unique_brands):,}")
print(f" Modelos únicos: {len(unique_models):,}")
# Show sample entries
print("\n Primeras 5 entradas:")
for e in entries[:5]:
print(f" {e['brand']} {e['model']} {e['year']} | {e['description']} | {e['sku']}")
conn = get_db()
cursor = conn.cursor()
# Create DAR manufacturer
print("\n[2/5] Creando fabricante DAR...")
dar_mfr_id = ensure_manufacturer(cursor, 'DAR', 'aftermarket', 'standard', 'Mexico')
print(f" DAR manufacturer_id: {dar_mfr_id}")
# Create parts
print("\n[3/5] Creando partes...")
part_ids = {}
parts_created = 0
for sku in sorted(unique_skus):
# Find one entry with this SKU to get description
sample = next(e for e in entries if e['sku'] == sku)
group_id = classify_description(cursor, sample['description'])
name_en, name_es = part_names_from_desc(sample['description'], sku)
part_id, created = get_or_create_part(
cursor, sku, group_id, name_en, name_es, 'DAR Línea Azul')
part_ids[sku] = part_id
if created:
parts_created += 1
print(f" Partes creadas: {parts_created:,}")
print(f" Partes existentes: {len(unique_skus) - parts_created:,}")
# Create aftermarket entries for DAR-specific parts
print(" Creando aftermarket entries...")
am_created = 0
for sku in sorted(unique_skus):
part_id = part_ids.get(sku)
if not part_id:
continue
cursor.execute(
"SELECT id FROM aftermarket_parts WHERE manufacturer_id = ? AND part_number = ?",
(dar_mfr_id, sku))
if not cursor.fetchone():
sample = next(e for e in entries if e['sku'] == sku)
name_en, name_es = part_names_from_desc(sample['description'], sku)
cursor.execute(
"INSERT INTO aftermarket_parts (oem_part_id, manufacturer_id, part_number, name, name_es) VALUES (?, ?, ?, ?, ?)",
(part_id, dar_mfr_id, sku, name_en, name_es))
am_created += 1
print(f" Aftermarket entries creadas: {am_created:,}")
# Create vehicles and fitments
print("\n[4/5] Creando vehículos y fitments...")
vehicles_created = 0
fitments_created = 0
mye_cache = {}
for i, entry in enumerate(entries):
if i % 10000 == 0 and i > 0:
print(f" Procesando {i:,}/{len(entries):,}...")
cache_key = (entry['brand'], entry['model'], entry['year'])
if cache_key not in mye_cache:
brand_id = ensure_brand(cursor, entry['brand'])
model_id = ensure_model(cursor, brand_id, entry['model'])
year_id = ensure_year(cursor, entry['year'])
# Try to find existing MYE
cursor.execute(
"""SELECT mye.id FROM model_year_engine mye
JOIN models m ON mye.model_id = m.id
JOIN brands b ON m.brand_id = b.id
JOIN years y ON mye.year_id = y.id
WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?) AND y.year = ?
LIMIT 1""",
(entry['brand'], entry['model'], entry['year']))
existing = cursor.fetchone()
if existing:
mye_cache[cache_key] = existing['id']
else:
mye_id = ensure_mye(cursor, model_id, year_id)
mye_cache[cache_key] = mye_id
vehicles_created += 1
mye_id = mye_cache[cache_key]
part_id = part_ids.get(entry['sku'])
if not part_id:
continue
# Check if fitment exists
cursor.execute(
"SELECT id FROM vehicle_parts WHERE model_year_engine_id = ? AND part_id = ?",
(mye_id, part_id))
if not cursor.fetchone():
notes = f"Catálogo DAR Línea Azul 2020"
if entry.get('description'):
notes += f" - {entry['description']}"
cursor.execute(
"INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes) VALUES (?, ?, 1, ?)",
(mye_id, part_id, notes))
fitments_created += 1
print(f" Vehículos creados: {vehicles_created:,}")
print(f" Fitments creados: {fitments_created:,}")
# Cross-references: match DAR parts to MOOG parts on same vehicles
print("\n[5/5] Creando referencias cruzadas...")
xrefs_created = 0
for sku, part_id in part_ids.items():
# Find other parts (different brand) in same group fitting same vehicles
cursor.execute("""
SELECT DISTINCT p2.id, p2.oem_part_number
FROM vehicle_parts vp1
JOIN vehicle_parts vp2 ON vp1.model_year_engine_id = vp2.model_year_engine_id
JOIN parts p2 ON vp2.part_id = p2.id
WHERE vp1.part_id = ?
AND p2.id != ?
AND p2.group_id = (SELECT group_id FROM parts WHERE id = ?)
AND p2.oem_part_number != ?
LIMIT 30
""", (part_id, part_id, part_id, sku))
for row in cursor.fetchall():
other_pn = row['oem_part_number']
# Skip if same part number prefix pattern (same brand)
if other_pn[:3] == sku[:3]:
continue
# A -> B
cursor.execute(
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
(part_id, other_pn))
if not cursor.fetchone():
cursor.execute(
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'DAR Catalog')",
(part_id, other_pn))
xrefs_created += 1
# B -> A
cursor.execute(
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
(row['id'], sku))
if not cursor.fetchone():
cursor.execute(
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'DAR Catalog')",
(row['id'], sku))
xrefs_created += 1
print(f" Cross-refs creadas: {xrefs_created:,}")
conn.commit()
conn.close()
print("\n" + "=" * 70)
print("IMPORTACIÓN DAR COMPLETADA")
print("=" * 70)
print(f"""
RESUMEN:
- Partes creadas: {parts_created:,}
- Aftermarket entries: {am_created:,}
- Vehículos creados: {vehicles_created:,}
- Fitments creados: {fitments_created:,}
- Cross-refs creadas: {xrefs_created:,}
""")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,548 @@
#!/usr/bin/env python3
"""
IMPORTADOR DEL CATÁLOGO FRAM 2017
- Sección de vehículos livianos (páginas 3-87): Brand → Model + Motor + Dates + Filters
- Sección de equivalencias (páginas 149-199): Competitor → FRAM mappings
- Filtros: PH/CH = Aceite, CA/PA = Aire, G/P/PS = Combustible, CF/CFA = Cabina
"""
import sqlite3
import re
import pypdf
from pathlib import Path
from collections import defaultdict
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
PDF_PATH = '/tmp/catalogs/fram_2017.pdf'
# Filter type classification by part number prefix
FILTER_PREFIXES = {
'PH': ('Oil Filters', 'Oil Filter', 'Filtro de Aceite'),
'CH': ('Oil Filters', 'Oil Filter Cartridge', 'Filtro de Aceite Cartucho'),
'CA': ('Air Filters', 'Air Filter', 'Filtro de Aire'),
'PA': ('Air Filters', 'Air Filter', 'Filtro de Aire'),
'G': ('Fuel Filters', 'Fuel Filter', 'Filtro de Combustible'),
'P': ('Fuel Filters', 'Fuel Filter', 'Filtro de Combustible'),
'PS': ('Fuel Filters', 'Fuel Filter', 'Filtro de Combustible'),
'CF': ('Cabin Air Filters', 'Cabin Air Filter', 'Filtro de Cabina'),
'CFA': ('Cabin Air Filters', 'Cabin Air Filter', 'Filtro de Cabina'),
}
# FRAM part number pattern
FRAM_PART_RE = re.compile(r'\b(CFA?\d[\w-]*|PH\d[\w-]*|CH\d[\w-]*|CA\d[\w-]*|PA\d[\w-]*|PS\d[\w-]*|G\d[\w-]*|P\d[\w-]*)\b')
# Known brands that appear as headers in the FRAM catalog
KNOWN_BRANDS = {
'ACURA', 'ALEKO', 'ALFA ROMEO', 'ASIA MOTORS', 'ASTON MARTIN', 'AUDI',
'BEDFORD', 'BENTLEY', 'BMW', 'BUICK', 'CADILLAC', 'CHANA', 'CHERY',
'CHEVROLET', 'CHRYSLER', 'CITROEN', 'DAEWOO', 'DACIA', 'DAIHATSU',
'DODGE', 'EAGLE', 'FAW', 'FIAT', 'FORD', 'GALLOPER', 'GEO', 'GEELY',
'GREAT WALL', 'HONDA', 'HUMMER', 'HYUNDAI', 'INFINITI', 'ISUZU',
'IVECO', 'JAC', 'JAGUAR', 'JEEP', 'KIA', 'LADA', 'LANCIA', 'LAND ROVER',
'LEXUS', 'LIFAN', 'LINCOLN', 'LOTUS', 'MAHINDRA', 'MASERATI', 'MAZDA',
'MERCEDES BENZ', 'MERCURY', 'MG', 'MINI', 'MITSUBISHI', 'NISSAN',
'OLDSMOBILE', 'OPEL', 'PEUGEOT', 'PLYMOUTH', 'PONTIAC', 'PORSCHE',
'RAM', 'RENAULT', 'ROVER', 'SAAB', 'SAMSUNG', 'SATURN', 'SCION',
'SEAT', 'SKODA', 'SMART', 'SSANGYONG', 'SUBARU', 'SUZUKI', 'TATA',
'TOYOTA', 'TRIUMPH', 'VAUXHALL', 'VOLKSWAGEN', 'VOLVO',
}
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def ensure_manufacturer(cursor, name, type_='aftermarket', quality='standard', country=None):
cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute(
"INSERT INTO manufacturers (name, type, quality_tier, country) VALUES (?, ?, ?, ?)",
(name, type_, quality, country))
return cursor.lastrowid
def ensure_brand(cursor, name):
cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,))
return cursor.lastrowid
def ensure_model(cursor, brand_id, name):
cursor.execute(
"SELECT id FROM models WHERE brand_id = ? AND UPPER(name) = UPPER(?)",
(brand_id, name))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name))
return cursor.lastrowid
def ensure_year(cursor, year):
cursor.execute("SELECT id FROM years WHERE year = ?", (year,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO years (year) VALUES (?)", (year,))
return cursor.lastrowid
def ensure_engine(cursor, name):
cursor.execute("SELECT id FROM engines WHERE name = ?", (name,))
row = cursor.fetchone()
if row:
return row['id']
displacement = None
cylinders = None
fuel_type = 'gasoline'
m = re.search(r'(\d+)cc', name)
if m:
displacement = int(m.group(1))
if 'diesel' in name.lower() or 'td' in name.lower() or 'tdi' in name.lower() or 'jtd' in name.lower():
fuel_type = 'diesel'
cursor.execute(
"INSERT INTO engines (name, displacement_cc, cylinders, fuel_type) VALUES (?, ?, ?, ?)",
(name, displacement, cylinders, fuel_type))
return cursor.lastrowid
def get_generic_engine(cursor):
cursor.execute("SELECT id FROM engines WHERE name = 'Generic'")
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO engines (name, fuel_type) VALUES ('Generic', 'gasoline')")
return cursor.lastrowid
def ensure_mye(cursor, model_id, year_id, engine_id=None):
if engine_id:
cursor.execute(
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ? AND engine_id = ?",
(model_id, year_id, engine_id))
else:
cursor.execute(
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ?",
(model_id, year_id))
row = cursor.fetchone()
if row:
return row['id']
if not engine_id:
engine_id = get_generic_engine(cursor)
cursor.execute(
"INSERT INTO model_year_engine (model_id, year_id, engine_id) VALUES (?, ?, ?)",
(model_id, year_id, engine_id))
return cursor.lastrowid
def classify_filter(part_number):
"""Classify FRAM filter by part number prefix and return (group_name, name_en, name_es)."""
pn_upper = part_number.upper()
# Check longer prefixes first
for prefix in ['CFA', 'CF', 'PS', 'PH', 'CH', 'CA', 'PA']:
if pn_upper.startswith(prefix):
return FILTER_PREFIXES[prefix]
# Single letter prefixes
if pn_upper.startswith('G') and re.match(r'^G\d', pn_upper):
return FILTER_PREFIXES['G']
if pn_upper.startswith('P') and re.match(r'^P\d', pn_upper):
return FILTER_PREFIXES['P']
return None
def get_or_create_group(cursor, group_name):
"""Get group ID by name."""
cursor.execute("SELECT id FROM part_groups WHERE name = ?", (group_name,))
row = cursor.fetchone()
if row:
return row['id']
# Find category
cat_map = {
'Oil Filters': 'Engine', 'Air Filters': 'Engine',
'Fuel Filters': 'Fuel & Air', 'Cabin Air Filters': 'Heat & Air Conditioning',
}
cat_name = cat_map.get(group_name, 'Engine')
cursor.execute("SELECT id FROM part_categories WHERE name = ?", (cat_name,))
cat = cursor.fetchone()
if not cat:
return None
cursor.execute(
"INSERT INTO part_groups (category_id, name) VALUES (?, ?)",
(cat['id'], group_name))
return cursor.lastrowid
def get_or_create_part(cursor, part_number, group_id, name, name_es, description):
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_number,))
row = cursor.fetchone()
if row:
return row['id'], False
cursor.execute(
"INSERT INTO parts (oem_part_number, name, name_es, group_id, description) VALUES (?, ?, ?, ?, ?)",
(part_number, name, name_es, group_id, description))
return cursor.lastrowid, True
def parse_date_range(date_str):
"""Parse FRAM date range like (03/88 - 09/97) into year range."""
m = re.match(r'\(?\s*(\d{2})/(\d{2,4})\s*-\s*(\d{2})/(\d{2,4})\s*\)?', date_str)
if m:
y1 = int(m.group(2))
y2 = int(m.group(4))
if y1 < 100:
y1 += 2000 if y1 < 50 else 1900
if y2 < 100:
y2 += 2000 if y2 < 50 else 1900
return list(range(y1, y2 + 1))
# Try single year
m = re.match(r'\(?\s*(\d{2})/(\d{2,4})\s*-?\s*\)?', date_str)
if m:
y = int(m.group(2))
if y < 100:
y += 2000 if y < 50 else 1900
return [y]
return []
def extract_fram_parts(text):
"""Extract FRAM part numbers from a text string."""
return FRAM_PART_RE.findall(text)
def parse_vehicle_entries(pdf):
"""Parse vehicle entries from FRAM catalog (light vehicles section)."""
entries = []
current_brand = None
current_model_group = None
for page_num in range(2, 87): # Pages 3-87 (0-indexed)
text = pdf.pages[page_num].extract_text()
if not text:
continue
lines = text.split('\n')
prev_line = ""
for line in lines:
line = line.strip()
if not line:
continue
# Skip headers/footers
if line.startswith('LIVIANOS') or line.startswith('PESADOS'):
continue
if re.match(r'^\d{1,3}$', line):
continue
if 'MARCA/CATEGORÍA' in line:
continue
# Skip dimension notes
if re.match(r'^H1=', line) or line.startswith('Parcial') or line.startswith('Panel') or line.startswith('Redondo'):
continue
if line.startswith('C/C.') or line.startswith('Unidad Sellada'):
continue
# Brand detection
if line in KNOWN_BRANDS:
current_brand = line
current_model_group = None
continue
# Check if line is a brand listed with other brands (e.g., "Acura - Aleko - Alfa Romeo")
if ' - ' in line and all(b.strip() in KNOWN_BRANDS for b in line.split(' - ') if b.strip()):
continue
if not current_brand:
continue
# Try to extract data from line
# Format: [MODEL_GROUP] description - Mot.CODE-DISPcc-Powerkw/hp (date_from - date_to) FILTER_CODES
# Check if this is a continuation of previous line
if prev_line and not re.match(r'^[A-Z]', line) and not FRAM_PART_RE.search(line):
prev_line = ""
continue
# Extract date range and parts
date_match = re.search(r'\((\d{2}/\d{2,4}\s*-\s*(?:\d{2}/\d{2,4}\s*)?)\)', line)
parts = extract_fram_parts(line)
if parts:
years = []
if date_match:
years = parse_date_range(date_match.group(1))
# Extract model name
model_name = None
# Check if line starts with an uppercase model group
model_match = re.match(r'^([A-Z][A-Z0-9\s/\-]+?)\s+\S', line)
if model_match:
potential_model = model_match.group(1).strip()
# If it looks like a model group (all caps, short)
if potential_model.isupper() and len(potential_model) < 30:
current_model_group = potential_model
model_name = current_model_group
else:
model_name = current_model_group or "Unknown"
else:
model_name = current_model_group or "Unknown"
if not years:
years = [2017] # Default to catalog year
for year in years:
for part in parts:
info = classify_filter(part)
if info:
entries.append({
'brand': current_brand,
'model': model_name,
'year': year,
'part_number': part,
'filter_type': info[0],
})
prev_line = line
return entries
def parse_cross_references(pdf):
"""Parse the equivalencias/cross-reference section."""
xrefs = []
for page_num in range(148, min(200, len(pdf.pages))):
text = pdf.pages[page_num].extract_text()
if not text:
continue
if 'EQUIVALENCIAS' not in text and 'Código' not in text:
continue
lines = text.split('\n')
for line in lines:
line = line.strip()
if not line or 'EQUIVALENCIAS' in line or 'Código' in line:
continue
if re.match(r'^\d{1,3}$', line):
continue
# Skip brand header lines
if re.match(r'^[A-Z][a-z]', line) and ' - ' in line:
continue
if line.istitle() or (line[0].isupper() and line[1:2].islower() and len(line.split()) <= 3):
continue
# Parse: CompetitorNumber FRAMNumber
# FRAM numbers start with PH, CH, CA, PA, G, P, PS, CF, CFA
match = re.match(r'^(\S+)\s+((?:PH|CH|CA|PA|PS|CF|CFA|G|P)\w+)', line)
if match:
competitor_pn = match.group(1).strip()
fram_pn = match.group(2).strip()
# Skip if competitor number looks like a FRAM number
if re.match(r'^(PH|CH|CA|PA|PS|CF|CFA)', competitor_pn):
continue
xrefs.append({
'competitor': competitor_pn,
'fram': fram_pn,
})
return xrefs
def main():
print("=" * 70)
print("IMPORTADOR - CATÁLOGO FRAM 2017")
print("=" * 70)
print(f"\n[1/6] Leyendo PDF: {PDF_PATH}")
pdf = pypdf.PdfReader(PDF_PATH)
print(f" Total páginas: {len(pdf.pages)}")
print("\n[2/6] Extrayendo datos del catálogo...")
vehicle_entries = parse_vehicle_entries(pdf)
cross_refs = parse_cross_references(pdf)
print(f" Entradas de vehículos: {len(vehicle_entries)}")
print(f" Equivalencias (cross-refs): {len(cross_refs)}")
# Get unique parts
unique_parts = {}
for e in vehicle_entries:
if e['part_number'] not in unique_parts:
info = classify_filter(e['part_number'])
if info:
unique_parts[e['part_number']] = info
print(f" Partes únicas: {len(unique_parts)}")
# Also get parts from cross-refs
for xref in cross_refs:
if xref['fram'] not in unique_parts:
info = classify_filter(xref['fram'])
if info:
unique_parts[xref['fram']] = info
print(f" Partes únicas (incl. cross-refs): {len(unique_parts)}")
conn = get_db()
cursor = conn.cursor()
# Create FRAM manufacturer
print("\n[3/6] Creando fabricante FRAM...")
# Check if Fram already exists (from Gonher import)
fram_mfr_id = ensure_manufacturer(cursor, 'FRAM', 'aftermarket', 'standard', 'USA')
print(f" FRAM manufacturer_id: {fram_mfr_id}")
# Create parts
print("\n[4/6] Creando partes de filtros...")
part_ids = {}
parts_created = 0
group_cache = {}
for pn, (group_name, name_en, name_es) in unique_parts.items():
if group_name not in group_cache:
group_cache[group_name] = get_or_create_group(cursor, group_name)
group_id = group_cache[group_name]
if not group_id:
continue
full_name = f"{name_en} {pn}"
full_name_es = f"{name_es} {pn}"
part_id, created = get_or_create_part(
cursor, pn, group_id, full_name, full_name_es, "FRAM Filter")
part_ids[pn] = part_id
if created:
parts_created += 1
print(f" Partes creadas: {parts_created}")
# Create vehicles and fitments
print("\n[5/6] Creando vehículos y fitments...")
vehicles_created = 0
fitments_created = 0
mye_cache = {}
for entry in vehicle_entries:
part_id = part_ids.get(entry['part_number'])
if not part_id:
continue
cache_key = (entry['brand'], entry['model'], entry['year'])
if cache_key not in mye_cache:
brand_id = ensure_brand(cursor, entry['brand'])
model_id = ensure_model(cursor, brand_id, entry['model'])
year_id = ensure_year(cursor, entry['year'])
cursor.execute(
"""SELECT mye.id FROM model_year_engine mye
JOIN models m ON mye.model_id = m.id
JOIN brands b ON m.brand_id = b.id
JOIN years y ON mye.year_id = y.id
WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?) AND y.year = ?
LIMIT 1""",
(entry['brand'], entry['model'], entry['year']))
existing = cursor.fetchone()
if existing:
mye_cache[cache_key] = existing['id']
else:
mye_id = ensure_mye(cursor, model_id, year_id)
mye_cache[cache_key] = mye_id
vehicles_created += 1
mye_id = mye_cache[cache_key]
cursor.execute(
"SELECT id FROM vehicle_parts WHERE model_year_engine_id = ? AND part_id = ?",
(mye_id, part_id))
if not cursor.fetchone():
cursor.execute(
"INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes) VALUES (?, ?, 1, ?)",
(mye_id, part_id, f"Catálogo FRAM 2017 - {entry['filter_type']}"))
fitments_created += 1
print(f" Vehículos creados: {vehicles_created}")
print(f" Fitments creados: {fitments_created}")
# Create cross-references
print("\n[6/6] Creando referencias cruzadas...")
xrefs_created = 0
# A) From equivalencias section
for xref in cross_refs:
fram_part_id = part_ids.get(xref['fram'])
if not fram_part_id:
continue
cursor.execute(
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
(fram_part_id, xref['competitor']))
if not cursor.fetchone():
cursor.execute(
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'FRAM Equivalencias 2017')",
(fram_part_id, xref['competitor']))
xrefs_created += 1
# B) Match FRAM parts to other brands' parts by vehicle fitment
for pn, part_id in part_ids.items():
cursor.execute("""
SELECT DISTINCT p2.id, p2.oem_part_number
FROM vehicle_parts vp1
JOIN vehicle_parts vp2 ON vp1.model_year_engine_id = vp2.model_year_engine_id
JOIN parts p2 ON vp2.part_id = p2.id
WHERE vp1.part_id = ?
AND p2.id != ?
AND p2.group_id = (SELECT group_id FROM parts WHERE id = ?)
AND p2.oem_part_number NOT LIKE 'PH%'
AND p2.oem_part_number NOT LIKE 'CH%'
AND p2.oem_part_number NOT LIKE 'CA%'
AND p2.oem_part_number NOT LIKE 'PA%'
AND p2.oem_part_number NOT LIKE 'CF%'
AND p2.oem_part_number NOT LIKE 'CFA%'
LIMIT 20
""", (part_id, part_id, part_id))
for row in cursor.fetchall():
# Cross-ref FRAM → other
cursor.execute(
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
(part_id, row['oem_part_number']))
if not cursor.fetchone():
cursor.execute(
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'FRAM Catalog 2017')",
(part_id, row['oem_part_number']))
xrefs_created += 1
# Reverse cross-ref
cursor.execute(
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
(row['id'], pn))
if not cursor.fetchone():
cursor.execute(
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'FRAM Catalog 2017')",
(row['id'], pn))
xrefs_created += 1
print(f" Cross-refs creadas: {xrefs_created}")
conn.commit()
conn.close()
print("\n" + "=" * 70)
print("IMPORTACIÓN FRAM COMPLETADA")
print("=" * 70)
print(f"""
RESUMEN:
- Partes creadas: {parts_created:,}
- Vehículos creados: {vehicles_created:,}
- Fitments creados: {fitments_created:,}
- Cross-refs creadas: {xrefs_created:,}
- Equivalencias leídas: {len(cross_refs):,}
""")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,705 @@
#!/usr/bin/env python3
"""
IMPORTADOR DEL CATÁLOGO MOOG - SUSPENSIÓN Y DIRECCIÓN
Funciona para los 3 volúmenes:
Vol 1: ≤1989 /tmp/catalogs/suspension/moog_vol1_1989back.pdf pages 4-1037
Vol 2: 1990-2005 /tmp/catalogs/suspension/moog_vol2_1990_2005.pdf pages 7-1641
Vol 3: 2006+ /tmp/catalogs/suspension/moog_vol3_2006up.pdf pages 8-1089
"""
import sqlite3
import re
import sys
import pypdf
from pathlib import Path
from collections import defaultdict
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
VOLUMES = {
'1': {
'path': '/tmp/catalogs/suspension/moog_vol1_1989back.pdf',
'start_page': 3, # 0-indexed
'end_page': 1037,
'label': 'Vol 1 (≤1989)',
},
'2': {
'path': '/tmp/catalogs/suspension/moog_vol2_1990_2005.pdf',
'start_page': 6,
'end_page': 1641,
'label': 'Vol 2 (1990-2005)',
},
'3': {
'path': '/tmp/catalogs/suspension/moog_vol3_2006up.pdf',
'start_page': 7,
'end_page': 1089,
'label': 'Vol 3 (2006+)',
},
}
MOOG_BRANDS = {
'ACURA', 'ALFA ROMEO', 'AMERICAN MOTORS', 'AMERICAN MOTORS CORP.',
'ASTON MARTIN', 'AUDI', 'BMW', 'BUICK', 'CADILLAC',
'CHEVROLET', 'CHEVROLET TRUCK', 'CHRYSLER',
'DATSUN', 'DODGE', 'DODGE TRUCK',
'EAGLE', 'FIAT', 'FORD', 'FORD TRUCK', 'FREIGHTLINER',
'GEO', 'GEO TRUCK', 'GENERAL MOTORS TRUCK',
'HONDA', 'HUMMER', 'HYUNDAI',
'INFINITI', 'INTERNATIONAL', 'ISUZU', 'ISUZU TRUCK',
'JAGUAR', 'JEEP', 'KIA',
'LAFORZA', 'LAND ROVER', 'LEXUS', 'LINCOLN', 'LOTUS',
'MAZDA', 'MAZDA TRUCK', 'MERCEDES BENZ', 'MERCEDES-BENZ',
'MERCURY', 'MERKUR', 'MINI', 'MITSUBISHI', 'MITSUBISHI TRUCK',
'NISSAN', 'NISSAN TRUCK',
'OLDSMOBILE', 'OPEL',
'PEUGEOT', 'PLYMOUTH', 'PLYMOUTH TRUCK', 'PONTIAC', 'PORSCHE',
'RAM TRUCK', 'RENAULT', 'ROLLS ROYCE',
'SAAB', 'SATURN', 'SCION', 'SEAT', 'SHELBY', 'SMART', 'STERLING',
'SUBARU', 'SUBARU TRUCK', 'SUZUKI', 'SUZUKI TRUCK',
'TOYOTA', 'TOYOTA TRUCK', 'TRIUMPH',
'VOLKSWAGEN', 'VOLKSWAGEN TRUCK', 'VOLVO', 'VOLVO TRUCK',
'WILLYS MOTORS INC.',
}
# MOOG part number regex
MOOG_PART_RE = re.compile(
r'\b(K\d{3,7}T?|ES\d{3,7}[A-Z]{0,3}T?|EV\d{3,7}[A-Z]?|DS\d{3,7}'
r'|CC\d{3,6}|CK\d{3,7}|SSD\d{2,4}|BK\d{3,4}[A-Z]?'
r'|SB\d{3,4}|NIBJ\d+|VO[A-Z]{2}\d+|HY[A-Z]{2}\d+|AU[A-Z]{2}\d+|BM[A-Z]{2}\d+)\b'
)
# Numeric-only springs (only used within spring category context)
SPRING_NUM_RE = re.compile(r'\b(\d{4,6})\b')
# Figure code
FIGURE_RE = re.compile(r'\b([FSR]\d{3})\b')
# Year range at start of line
YEAR_RE = re.compile(r'^(\d{4})(?:\s*-\s*(\d{4}))?')
# System sections
SYSTEM_PATTERNS = {
'SUSPENSION DELANTERA': 'front_suspension',
'SUSPENSIÓN DELANTERA': 'front_suspension',
'DIRECCIÓN': 'steering',
'DIRECCION': 'steering',
'SUSPENSION TRASERA': 'rear_suspension',
'SUSPENSIÓN TRASERA': 'rear_suspension',
}
# Header/footer markers to skip
SKIP_MARKERS = [
'www.moogproblemsolver.com',
'CATÁLOGO MASTER',
'CATALOGO MASTER',
'Solucionador de problemas',
'búsqueda de piezas electrónicas',
'FMe-cat.mx',
'Año Observaciones',
'Total Solución',
'P/C\nCTD',
'Imagenes de piezas',
]
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def ensure_manufacturer(cursor, name, type_='aftermarket', quality='premium', country=None):
cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute(
"INSERT INTO manufacturers (name, type, quality_tier, country) VALUES (?, ?, ?, ?)",
(name, type_, quality, country))
return cursor.lastrowid
def ensure_brand(cursor, name):
cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,))
return cursor.lastrowid
def ensure_model(cursor, brand_id, name):
cursor.execute(
"SELECT id FROM models WHERE brand_id = ? AND UPPER(name) = UPPER(?)",
(brand_id, name))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name))
return cursor.lastrowid
def ensure_year(cursor, year):
cursor.execute("SELECT id FROM years WHERE year = ?", (year,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO years (year) VALUES (?)", (year,))
return cursor.lastrowid
def get_generic_engine(cursor):
cursor.execute("SELECT id FROM engines WHERE name = 'Generic'")
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO engines (name, fuel_type) VALUES ('Generic', 'gasoline')")
return cursor.lastrowid
def ensure_mye(cursor, model_id, year_id, engine_id=None):
if engine_id:
cursor.execute(
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ? AND engine_id = ?",
(model_id, year_id, engine_id))
else:
cursor.execute(
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ?",
(model_id, year_id))
row = cursor.fetchone()
if row:
return row['id']
if not engine_id:
engine_id = get_generic_engine(cursor)
cursor.execute(
"INSERT INTO model_year_engine (model_id, year_id, engine_id) VALUES (?, ?, ?)",
(model_id, year_id, engine_id))
return cursor.lastrowid
def get_or_create_part(cursor, part_number, group_id, name, name_es, description):
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_number,))
row = cursor.fetchone()
if row:
return row['id'], False
cursor.execute(
"INSERT INTO parts (oem_part_number, name, name_es, group_id, description) VALUES (?, ?, ?, ?, ?)",
(part_number, name, name_es, group_id, description))
return cursor.lastrowid, True
# --- Group ID lookup cache ---
_group_cache = {}
def get_group_id(cursor, name_en):
"""Get group ID by English name."""
if name_en not in _group_cache:
cursor.execute("SELECT id FROM part_groups WHERE name = ?", (name_en,))
row = cursor.fetchone()
_group_cache[name_en] = row['id'] if row else None
return _group_cache[name_en]
def classify_part(cursor, category_text, part_number):
"""Map MOOG category text + part number to a DB group_id."""
cat = category_text.lower() if category_text else ''
# By category text (Spanish)
if 'rótula' in cat and 'suspensión' in cat:
return get_group_id(cursor, 'Ball Joints')
if 'rótula' in cat and 'prensad' in cat:
return get_group_id(cursor, 'Ball Joints')
if 'brazo de control' in cat and 'rótula' in cat:
return get_group_id(cursor, 'Control Arms')
if 'ensamble de brazo' in cat:
return get_group_id(cursor, 'Control Arms')
if 'brazo de control' in cat:
return get_group_id(cursor, 'Control Arms')
if 'horquilla' in cat:
return get_group_id(cursor, 'Control Arms')
if 'buje' in cat and 'estabilizadora' in cat:
return get_group_id(cursor, 'Sway Bar Bushings')
if 'buje' in cat and 'brazo' in cat:
return get_group_id(cursor, 'Bushings')
if 'buje' in cat and 'amortiguador' in cat:
return get_group_id(cursor, 'Bushings')
if 'buje' in cat and 'tracción' in cat:
return get_group_id(cursor, 'Bushings')
if 'buje' in cat and 'camber' in cat:
return get_group_id(cursor, 'Camber/Caster Kits')
if 'buje' in cat:
return get_group_id(cursor, 'Bushings')
if 'cople' in cat and 'estabilizadora' in cat:
return get_group_id(cursor, 'Sway Bar Links')
if 'soporte' in cat and ('strut' in cat.lower() or 'amortiguador' in cat):
return get_group_id(cursor, 'Strut Mounts')
if 'montaje' in cat and 'amortiguador' in cat:
return get_group_id(cursor, 'Strut Mounts')
if 'fuelle' in cat or 'cubrepolvo' in cat:
return get_group_id(cursor, 'Struts')
if 'asiento' in cat and 'resorte' in cat:
return get_group_id(cursor, 'Spring Seats')
if 'ensamble de terminal' in cat:
return get_group_id(cursor, 'Tie Rod Ends')
if 'terminal' in cat and 'dirección' in cat:
if part_number and part_number.startswith('EV'):
return get_group_id(cursor, 'Inner Tie Rods')
return get_group_id(cursor, 'Tie Rod Ends')
if 'barra central' in cat:
return get_group_id(cursor, 'Center Links')
if 'barra de arrastre' in cat or 'barra de acoplamiento' in cat:
return get_group_id(cursor, 'Drag Links')
if 'varilla de dirección' in cat:
return get_group_id(cursor, 'Drag Links')
if 'resorte' in cat and 'suspensión' in cat:
return get_group_id(cursor, 'Coil Springs')
if 'camber' in cat or 'caster' in cat:
return get_group_id(cursor, 'Camber/Caster Kits')
if 'brazo auxiliar' in cat or 'brazo loco' in cat:
return get_group_id(cursor, 'Idler Arms')
if 'brazo pitman' in cat:
return get_group_id(cursor, 'Pitman Arms')
if 'amortiguador de dirección' in cat:
return get_group_id(cursor, 'Steering Dampers')
if 'pasador' in cat and 'dirección' in cat:
return get_group_id(cursor, 'King Pin Sets')
if 'muelle' in cat:
return get_group_id(cursor, 'Leaf Springs')
if 'barra de torsión' in cat:
return get_group_id(cursor, 'Torsion Bars')
# Fallback by part prefix
if part_number:
if part_number.startswith('ES'):
return get_group_id(cursor, 'Tie Rod Ends')
if part_number.startswith('EV'):
return get_group_id(cursor, 'Inner Tie Rods')
if part_number.startswith('DS'):
return get_group_id(cursor, 'Center Links')
if part_number.startswith('CC') or (part_number.isdigit() and len(part_number) >= 4):
return get_group_id(cursor, 'Coil Springs')
if part_number.startswith('SSD'):
return get_group_id(cursor, 'Steering Dampers')
if part_number.startswith('CK'):
return get_group_id(cursor, 'Control Arms')
if part_number.startswith('BK'):
return get_group_id(cursor, 'King Pin Sets')
if part_number.startswith('SB'):
return get_group_id(cursor, 'Bushings')
return get_group_id(cursor, 'Ball Joints') # Default
# --- Part type names for DB ---
PART_TYPE_NAMES = {
'Ball Joints': ('Ball Joint', 'Rótula de Suspensión'),
'Bushings': ('Bushing', 'Buje'),
'Sway Bar Bushings': ('Sway Bar Bushing', 'Buje de Barra Estabilizadora'),
'Control Arms': ('Control Arm', 'Brazo de Control'),
'Sway Bar Links': ('Sway Bar Link', 'Cople de Barra Estabilizadora'),
'Strut Mounts': ('Strut Mount', 'Soporte de Strut'),
'Struts': ('Strut Boot', 'Fuelle de Strut'),
'Spring Seats': ('Spring Seat', 'Asiento de Resorte'),
'Tie Rod Ends': ('Tie Rod End', 'Terminal de Dirección'),
'Inner Tie Rods': ('Inner Tie Rod', 'Terminal Interior de Dirección'),
'Center Links': ('Center Link', 'Barra Central'),
'Drag Links': ('Drag Link', 'Barra de Arrastre'),
'Coil Springs': ('Coil Spring', 'Resorte Helicoidal'),
'Camber/Caster Kits': ('Camber/Caster Kit', 'Kit de Camber/Caster'),
'Idler Arms': ('Idler Arm', 'Brazo Auxiliar'),
'Pitman Arms': ('Pitman Arm', 'Brazo Pitman'),
'Steering Dampers': ('Steering Damper', 'Amortiguador de Dirección'),
'King Pin Sets': ('King Pin Set', 'Juego de Pivote'),
'Leaf Springs': ('Leaf Spring', 'Muelle'),
'Torsion Bars': ('Torsion Bar', 'Barra de Torsión'),
}
# --- Parsing ---
def is_skip_line(line):
"""Check if line is header/footer to skip."""
return any(m in line for m in SKIP_MARKERS)
def parse_brand_model(line):
"""Try to parse a brand-model line. Returns (brand, model) or (None, None)."""
for dash in ['', '', '', '-']:
if dash not in line:
continue
parts = line.split(dash, 1)
if len(parts) != 2:
continue
left = re.sub(r'\s*\(Cont\.?\)\.?\s*', '', parts[0]).strip()
right = re.sub(r'\s*\(Cont\.?\)\.?\s*', '', parts[1]).strip()
if not left or not right:
continue
left_up = left.upper()
right_up = right.upper()
# Check which side matches a known brand
for brand in MOOG_BRANDS:
if left_up == brand or left_up.startswith(brand + ' '):
return left, right
if right_up == brand or right_up.startswith(brand + ' '):
return right, left
# Heuristic: if left is all uppercase words and right has mixed case
if left.isupper() and len(left) > 2:
return left, right
if right.isupper() and len(right) > 2:
return right, left
return None, None
def detect_system(line):
"""Check if line is a system section header."""
clean = line.strip().upper()
for pattern, system in SYSTEM_PATTERNS.items():
if clean.startswith(pattern.upper()):
return system
return None
CATEGORY_KEYWORDS = [
'Rótula', 'Rotula', 'Buje', 'Brazo de control', 'Brazo auxiliar',
'Brazo pitman', 'Brazo loco', 'Cople', 'Soporte', 'Fuelle',
'Asiento del resorte', 'Terminal de dirección', 'Terminal de direccion',
'Ensamble de terminal', 'Ensamble de brazo', 'Barra central',
'Barra de arrastre', 'Barra de dirección', 'Varilla',
'Juego de resortes', 'Resorte de suspensión', 'Juego para ajuste',
'Placa para ajuste', 'Seguro guia', 'Amortiguador de dirección',
'Pasador de dirección', 'Horquilla', 'Muelle',
'Juego de coples', 'Juego de soporte', 'Juego de montaje',
'Montaje del amortiguador',
]
def is_category_line(line):
"""Check if line is a part category header."""
for kw in CATEGORY_KEYWORDS:
if kw.lower() in line.lower():
# Make sure it doesn't also contain a part number (data line)
if not MOOG_PART_RE.search(line):
return True
return False
def parse_moog_pdf(pdf_path, start_page, end_page):
"""Parse a MOOG catalog PDF and return entries."""
pdf = pypdf.PdfReader(pdf_path)
entries = []
current_brand = None
current_model = None
current_submodel = None
current_system = None
current_figure = None
current_category = None
current_year_from = None
current_year_to = None
total = min(len(pdf.pages), end_page)
for page_num in range(start_page, total):
if (page_num - start_page) % 100 == 0:
print(f" Página {page_num + 1}/{total}...")
text = pdf.pages[page_num].extract_text()
if not text:
continue
lines = text.split('\n')
for line in lines:
line = line.strip()
if not line:
continue
if is_skip_line(line):
continue
# Skip standalone page numbers
if re.match(r'^\d{1,4}$', line) and not current_category:
continue
# Brand-model line
brand, model = parse_brand_model(line)
if brand and model:
current_brand = brand
current_model = model
current_submodel = None
current_system = None
current_figure = None
current_category = None
continue
# System section
system = detect_system(line)
if system:
current_system = system
current_category = None
current_submodel = None
# Check for figure code on same line or next
fig = FIGURE_RE.search(line)
if fig:
current_figure = fig.group(1)
continue
# Standalone figure code line
fig_match = re.match(r'^([FSR]\d{3})$', line.strip())
if fig_match:
current_figure = fig_match.group(1)
continue
# Figure code with comma (e.g., "F530,\nF531")
fig_multi = re.match(r'^([FSR]\d{3}),?$', line.strip())
if fig_multi and not YEAR_RE.match(line):
current_figure = fig_multi.group(1)
continue
if not current_brand or not current_model:
continue
# Part category header
if is_category_line(line):
current_category = line.strip()
continue
# Data line with year
year_match = YEAR_RE.match(line)
if year_match:
y1 = int(year_match.group(1))
y2 = int(year_match.group(2)) if year_match.group(2) else y1
if 1930 <= y1 <= 2025 and 1930 <= y2 <= 2025:
current_year_from = min(y1, y2)
current_year_to = max(y1, y2)
# Extract MOOG part numbers from line
parts_found = MOOG_PART_RE.findall(line)
# Also check for numeric springs in spring context
if current_category and 'resorte' in current_category.lower():
for m in SPRING_NUM_RE.finditer(line):
num = m.group(1)
if len(num) >= 4 and not any(num == p for p in parts_found):
# Avoid matching years
n = int(num)
if not (1930 <= n <= 2025):
parts_found.append(num)
if not parts_found or not current_year_from:
continue
# Build entries for each part found
model_name = current_model
if current_submodel:
model_name = f"{current_model} {current_submodel}"
for pn in parts_found:
# Clean part number (remove trailing T for Problem Solver)
clean_pn = pn.rstrip('T') if pn.endswith('T') and len(pn) > 4 else pn
for year in range(current_year_from, current_year_to + 1):
entries.append({
'brand': current_brand,
'model': model_name,
'year': year,
'system': current_system or 'front_suspension',
'figure': current_figure,
'category': current_category or '',
'part_number': clean_pn,
'notes': line.strip(),
})
return entries
def normalize_brand(brand):
"""Normalize MOOG brand names to standard form."""
mappings = {
'CHEVROLET TRUCK': 'CHEVROLET',
'DODGE TRUCK': 'DODGE',
'FORD TRUCK': 'FORD',
'GENERAL MOTORS TRUCK': 'GMC',
'GEO TRUCK': 'GEO',
'ISUZU TRUCK': 'ISUZU',
'MAZDA TRUCK': 'MAZDA',
'MITSUBISHI TRUCK': 'MITSUBISHI',
'NISSAN TRUCK': 'NISSAN',
'PLYMOUTH TRUCK': 'PLYMOUTH',
'SUBARU TRUCK': 'SUBARU',
'SUZUKI TRUCK': 'SUZUKI',
'TOYOTA TRUCK': 'TOYOTA',
'VOLKSWAGEN TRUCK': 'VOLKSWAGEN',
'VOLVO TRUCK': 'VOLVO',
'AMERICAN MOTORS CORP.': 'AMERICAN MOTORS',
'AMERICAN MOTORS': 'AMERICAN MOTORS',
'MERCEDES BENZ': 'MERCEDES-BENZ',
'WILLYS MOTORS INC.': 'WILLYS',
'RAM TRUCK': 'RAM',
}
up = brand.upper().strip()
return mappings.get(up, brand.strip())
def main():
if len(sys.argv) < 2 or sys.argv[1] not in VOLUMES:
print("Uso: python3 import_moog_catalog.py <1|2|3>")
print(" 1 = Vol 1 (≤1989)")
print(" 2 = Vol 2 (1990-2005)")
print(" 3 = Vol 3 (2006+)")
sys.exit(1)
vol = sys.argv[1]
config = VOLUMES[vol]
print("=" * 70)
print(f"IMPORTADOR - CATÁLOGO MOOG {config['label']}")
print("=" * 70)
print(f"\n[1/5] Leyendo PDF: {config['path']}")
entries = parse_moog_pdf(config['path'], config['start_page'], config['end_page'])
print(f" Entradas parseadas: {len(entries):,}")
unique_parts = {}
for e in entries:
if e['part_number'] not in unique_parts:
unique_parts[e['part_number']] = e['category']
unique_brands = set(normalize_brand(e['brand']) for e in entries)
print(f" Partes únicas: {len(unique_parts):,}")
print(f" Marcas de vehículos: {len(unique_brands)}")
conn = get_db()
cursor = conn.cursor()
print("\n[2/5] Creando fabricante MOOG...")
moog_mfr_id = ensure_manufacturer(cursor, 'MOOG', 'aftermarket', 'premium', 'USA')
print(f" MOOG manufacturer_id: {moog_mfr_id}")
print("\n[3/5] Creando partes...")
part_ids = {}
parts_created = 0
for pn, cat_text in sorted(unique_parts.items()):
group_id = classify_part(cursor, cat_text, pn)
if not group_id:
group_id = get_group_id(cursor, 'Ball Joints')
# Get group name for part description
cursor.execute("SELECT name FROM part_groups WHERE id = ?", (group_id,))
group_row = cursor.fetchone()
group_name = group_row['name'] if group_row else 'Suspension Part'
names = PART_TYPE_NAMES.get(group_name, (group_name, group_name))
name_en = f"{names[0]} {pn}"
name_es = f"{names[1]} {pn}"
part_id, created = get_or_create_part(
cursor, pn, group_id, name_en, name_es, f"MOOG {names[0]}")
part_ids[pn] = part_id
if created:
parts_created += 1
print(f" Partes creadas: {parts_created:,}")
print(f" Partes existentes: {len(unique_parts) - parts_created:,}")
print("\n[4/5] Creando vehículos y fitments...")
vehicles_created = 0
fitments_created = 0
mye_cache = {}
for i, entry in enumerate(entries):
if i % 10000 == 0 and i > 0:
print(f" Procesando {i:,}/{len(entries):,}...")
brand_name = normalize_brand(entry['brand'])
cache_key = (brand_name.upper(), entry['model'].upper(), entry['year'])
if cache_key not in mye_cache:
brand_id = ensure_brand(cursor, brand_name)
model_id = ensure_model(cursor, brand_id, entry['model'])
year_id = ensure_year(cursor, entry['year'])
cursor.execute("""
SELECT mye.id FROM model_year_engine mye
JOIN models m ON mye.model_id = m.id
JOIN brands b ON m.brand_id = b.id
JOIN years y ON mye.year_id = y.id
WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?) AND y.year = ?
LIMIT 1
""", (brand_name, entry['model'], entry['year']))
existing = cursor.fetchone()
if existing:
mye_cache[cache_key] = existing['id']
else:
mye_id = ensure_mye(cursor, model_id, year_id)
mye_cache[cache_key] = mye_id
vehicles_created += 1
mye_id = mye_cache[cache_key]
part_id = part_ids.get(entry['part_number'])
if not part_id:
continue
cursor.execute(
"SELECT id FROM vehicle_parts WHERE model_year_engine_id = ? AND part_id = ?",
(mye_id, part_id))
if not cursor.fetchone():
notes = f"MOOG Catalog {config['label']}"
if entry['figure']:
notes += f" - Fig {entry['figure']}"
if entry['system']:
notes += f" - {entry['system']}"
cursor.execute(
"INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes) VALUES (?, ?, 1, ?)",
(mye_id, part_id, notes))
fitments_created += 1
print(f" Vehículos creados: {vehicles_created:,}")
print(f" Fitments creados: {fitments_created:,}")
# Store diagram references
print("\n[5/5] Guardando referencias de diagramas...")
figures_seen = set()
# Get a default group_id for diagrams
susp_group = get_group_id(cursor, 'Ball Joints') or 164
for entry in entries:
if entry['figure'] and entry['figure'] not in figures_seen:
figures_seen.add(entry['figure'])
cursor.execute("SELECT id FROM diagrams WHERE name = ?", (entry['figure'],))
if not cursor.fetchone():
sys_label = {
'front_suspension': 'Suspensión Delantera',
'steering': 'Dirección',
'rear_suspension': 'Suspensión Trasera',
}.get(entry.get('system'), 'Suspensión')
cursor.execute(
"INSERT INTO diagrams (name, name_es, group_id, image_path, source) VALUES (?, ?, ?, ?, ?)",
(entry['figure'], f"MOOG {sys_label} - {entry['figure']}",
susp_group, f"moog/{entry['figure']}.png", 'MOOG Catalog'))
print(f" Diagramas registrados: {len(figures_seen)}")
conn.commit()
conn.close()
print("\n" + "=" * 70)
print(f"IMPORTACIÓN MOOG {config['label']} COMPLETADA")
print("=" * 70)
print(f"""
RESUMEN:
- Partes creadas: {parts_created:,}
- Vehículos creados: {vehicles_created:,}
- Fitments creados: {fitments_created:,}
- Diagramas: {len(figures_seen)}
""")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,554 @@
#!/usr/bin/env python3
"""
IMPORTADOR DEL CATÁLOGO WIX 2021 - FILTROS
Formato: Brand → Year → Model → Engine + filter columns
Páginas 77-687: Autos de pasajeros / camionetas ligeras
PDF: /tmp/catalogs/wix_2021.pdf
"""
import sqlite3
import re
import pypdf
from pathlib import Path
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
PDF_PATH = '/tmp/catalogs/wix_2021.pdf'
BRAND_HEADERS = {
'ACURA', 'ALFA ROMEO', 'AM GENERAL', 'AMERICAN MOTORS', 'ASTON MARTIN',
'ASUNA', 'AUDI', 'AUSTIN', 'AUSTIN HEALEY', 'AVANTI', 'BENTLEY', 'BMW',
'BUICK', 'CADILLAC', 'CHECKER', 'CHEVROLET', 'CHRYSLER', 'DAEWOO',
'DAIHATSU', 'DATSUN', 'DELOREAN', 'DODGE', 'EAGLE', 'FIAT', 'FORD',
'FREIGHTLINER', 'GEO', 'GMC', 'HILLMAN', 'HONDA', 'HUMMER', 'HYUNDAI',
'INFINITI', 'INTERNATIONAL', 'ISUZU', 'JAGUAR', 'JEEP', 'KIA',
'LAFORZA', 'LAND ROVER', 'LEXUS', 'LINCOLN', 'LOTUS', 'MACK', 'MAZDA',
'MERCEDES-BENZ', 'MERCURY', 'MERKUR', 'MINI', 'MITSUBISHI', 'MORGAN',
'NISSAN', 'OLDSMOBILE', 'OPEL', 'PEUGEOT', 'PLYMOUTH', 'PONTIAC',
'PORSCHE', 'RAM', 'RENAULT', 'ROLLS ROYCE', 'SAAB', 'SATURN', 'SCION',
'SEAT', 'SHELBY', 'SMART', 'SRT', 'STUDEBAKER', 'SUBARU', 'SUNBEAM',
'SUZUKI', 'TOYOTA', 'TRIUMPH', 'VOLKSWAGEN', 'VOLVO', 'WORKHORSE',
'WORKHORSE CUSTOM CHASSIS',
}
ENGINE_RE = re.compile(r'^[VLH]\s*\d+\s+\d+\.\d+L', re.IGNORECASE)
FOOTER_MARKERS = [
'Pass Car/Light Truck',
'Year/Año/Année',
'Model/Modelo/Modèle',
'N/A = Not Available',
'N/A = Non disponible',
'N/A = No disponible',
'Italicized Part Numbers',
'Las piezas con números',
'Les numéros de pièc',
'Engine/Motor/Moteur',
'Eng. Code',
'Código de',
'Code moteur',
'Oil XP',
'Aceite XP',
'Cabina Aire',
'Cabin Air XP',
'Combustible',
'Transmisión',
'Carburant',
]
FILTER_GROUPS = {
'oil': ('Oil Filters', 'Filtros de Aceite', 'Engine'),
'air': ('Air Filters', 'Filtros de Aire', 'Engine'),
'cabin_air': ('Cabin Air Filters', 'Filtros de Aire de Cabina', 'HVAC'),
'fuel': ('Fuel Filters', 'Filtros de Combustible', 'Fuel System'),
'transmission': ('Transmission Filters', 'Filtros de Transmisión', 'Transmission'),
}
TYPE_NAMES = {
'oil': ('Oil Filter', 'Filtro de Aceite'),
'oil_xp': ('Oil Filter XP', 'Filtro de Aceite XP'),
'air': ('Air Filter', 'Filtro de Aire'),
'air_xp': ('Air Filter XP', 'Filtro de Aire XP'),
'cabin_air': ('Cabin Air Filter', 'Filtro de Aire de Cabina'),
'cabin_air_xp': ('Cabin Air Filter XP', 'Filtro de Aire de Cabina XP'),
'fuel': ('Fuel Filter', 'Filtro de Combustible'),
'fuel_xp': ('Fuel Filter XP', 'Filtro de Combustible XP'),
'transmission': ('Transmission Filter', 'Filtro de Transmisión'),
'transmission_xp': ('Transmission Filter XP', 'Filtro de Transmisión XP'),
}
SKIP_VALUES = {'N/A', 'N/R', 'N/S', 'MT72', '-'}
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def ensure_manufacturer(cursor, name, type_='aftermarket', quality='standard', country=None):
cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute(
"INSERT INTO manufacturers (name, type, quality_tier, country) VALUES (?, ?, ?, ?)",
(name, type_, quality, country))
return cursor.lastrowid
def ensure_brand(cursor, name):
cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,))
return cursor.lastrowid
def ensure_model(cursor, brand_id, name):
cursor.execute(
"SELECT id FROM models WHERE brand_id = ? AND UPPER(name) = UPPER(?)",
(brand_id, name))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name))
return cursor.lastrowid
def ensure_year(cursor, year):
cursor.execute("SELECT id FROM years WHERE year = ?", (year,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO years (year) VALUES (?)", (year,))
return cursor.lastrowid
def get_generic_engine(cursor):
cursor.execute("SELECT id FROM engines WHERE name = 'Generic'")
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO engines (name, fuel_type) VALUES ('Generic', 'gasoline')")
return cursor.lastrowid
def ensure_mye(cursor, model_id, year_id, engine_id=None):
if engine_id:
cursor.execute(
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ? AND engine_id = ?",
(model_id, year_id, engine_id))
else:
cursor.execute(
"SELECT id FROM model_year_engine WHERE model_id = ? AND year_id = ?",
(model_id, year_id))
row = cursor.fetchone()
if row:
return row['id']
if not engine_id:
engine_id = get_generic_engine(cursor)
cursor.execute(
"INSERT INTO model_year_engine (model_id, year_id, engine_id) VALUES (?, ?, ?)",
(model_id, year_id, engine_id))
return cursor.lastrowid
def get_or_create_part(cursor, part_number, group_id, name, name_es, description):
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_number,))
row = cursor.fetchone()
if row:
return row['id'], False
cursor.execute(
"INSERT INTO parts (oem_part_number, name, name_es, group_id, description) VALUES (?, ?, ?, ?, ?)",
(part_number, name, name_es, group_id, description))
return cursor.lastrowid, True
def get_filter_group(cursor, filter_type):
name_en, name_es, category_name = FILTER_GROUPS[filter_type]
cursor.execute("SELECT id FROM part_groups WHERE name = ? LIMIT 1", (name_en,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("SELECT id FROM part_categories WHERE name = ? LIMIT 1", (category_name,))
cat = cursor.fetchone()
if not cat:
cursor.execute(
"INSERT INTO part_categories (name, name_es) VALUES (?, ?)",
(category_name, category_name))
cat_id = cursor.lastrowid
else:
cat_id = cat['id']
cursor.execute(
"INSERT INTO part_groups (category_id, name, name_es) VALUES (?, ?, ?)",
(cat_id, name_en, name_es))
return cursor.lastrowid
# --- Part number extraction ---
def extract_wix_part(token):
"""Extract WIX part number from token, stripping footnote suffixes."""
token = token.strip().rstrip('.')
if not token or token in SKIP_VALUES:
return None
# XP variants: 5digits+XP
xp_match = re.match(r'^(\d{5}XP)', token)
if xp_match:
return xp_match.group(1)
# Alpha-prefixed parts
wl = re.match(r'^(WL\d{4,6})', token)
if wl:
return wl.group(1)
wa = re.match(r'^(WA\d{4,5})', token)
if wa:
return wa.group(1)
wp = re.match(r'^(WP\d{4,5})', token)
if wp:
return wp.group(1)
wf = re.match(r'^(WF\d{4})', token)
if wf:
return wf.group(1)
# Numeric 5-digit WIX parts
num = re.match(r'^(\d{5})', token)
if num:
pn = num.group(1)
p2 = pn[:2]
if p2 in ('51', '57', '42', '43', '44', '45', '46', '47', '48', '49',
'24', '33', '58'):
return pn
return None
def classify_filter(pn):
"""Classify a WIX part number by filter type."""
if not pn:
return None
if pn.endswith('XP'):
base_type = classify_filter(pn[:-2])
return f"{base_type}_xp" if base_type else None
if pn.startswith('WL'):
return 'oil'
if pn.startswith('WA'):
return 'air'
if pn.startswith('WP'):
return 'cabin_air'
if pn.startswith('WF'):
return 'fuel'
if re.match(r'^5[17]\d{3}$', pn):
return 'oil'
if re.match(r'^4[2-9]\d{3}$', pn):
return 'air'
if re.match(r'^24\d{3}$', pn):
return 'cabin_air'
if re.match(r'^33\d{3}$', pn):
return 'fuel'
if re.match(r'^58\d{3}$', pn):
return 'transmission'
return None
def extract_parts_from_tokens(tokens):
"""Extract all unique WIX part numbers from tokens."""
parts = []
seen = set()
for token in tokens:
pn = extract_wix_part(token)
if pn and pn not in seen:
ftype = classify_filter(pn)
if ftype:
parts.append((pn, ftype))
seen.add(pn)
return parts
# --- Line classification ---
def is_footer_line(line):
return any(m in line for m in FOOTER_MARKERS)
def is_continuation(line):
"""Check if line continues engine data (not a new model/brand/year)."""
tokens = line.split()
if not tokens:
return False
first = tokens[0]
if first in ('Electric/Gas', 'Turbo', 'Diesel', 'Hybrid', 'O'):
return True
if first.startswith('N/'):
return True
if first.startswith('MT'):
return True
if re.match(r'^(WL|WA|WP|WF)\d', first):
return True
if re.match(r'^\d{5}', first):
return True
if first == '-':
return True
# Single/double digit + more tokens with part numbers
if re.match(r'^\d{1,2}$', first) and len(tokens) > 1:
for t in tokens[1:4]:
if extract_wix_part(t):
return True
return False
# --- PDF parsing ---
def parse_wix_pdf(pdf_path):
"""Parse WIX 2021 catalog pages 77-687."""
pdf = pypdf.PdfReader(pdf_path)
entries = []
current_brand = None
current_year = None
current_model = None
current_tokens = []
def flush_engine():
nonlocal current_tokens
if current_brand and current_year and current_model and current_tokens:
parts = extract_parts_from_tokens(current_tokens)
if parts:
entries.append({
'brand': current_brand,
'model': current_model,
'year': current_year,
'parts': parts,
})
current_tokens = []
total_pages = min(len(pdf.pages), 687)
for page_num in range(76, total_pages):
if (page_num - 76) % 50 == 0:
print(f" Procesando página {page_num + 1}/{total_pages}...")
text = pdf.pages[page_num].extract_text()
if not text:
continue
for line in text.split('\n'):
line = line.strip()
if not line:
continue
# Skip footer lines
if is_footer_line(line):
continue
# Clean continuation markers
clean = re.sub(r"\s*\(Cont'd/Suite\)\s*", '', line).strip()
if not clean:
continue
# Brand header
upper_clean = clean.upper()
if upper_clean in BRAND_HEADERS:
flush_engine()
current_brand = clean
current_year = None
current_model = None
continue
# Year
year_match = re.match(r'^(\d{4})$', clean)
if year_match:
y = int(year_match.group(1))
if 1940 <= y <= 2025:
flush_engine()
current_year = y
current_model = None
continue
if not current_brand or not current_year:
continue
# Engine line
if ENGINE_RE.match(clean):
flush_engine()
current_tokens = clean.split()
continue
# Continuation of engine data
if current_tokens and is_continuation(clean):
current_tokens.extend(clean.split())
continue
# Model name (must contain alpha characters)
if re.search(r'[A-Za-z]', clean):
flush_engine()
current_model = clean
continue
flush_engine()
return entries
def main():
print("=" * 70)
print("IMPORTADOR - CATÁLOGO WIX 2021")
print("=" * 70)
print(f"\n[1/6] Leyendo PDF: {PDF_PATH}")
entries = parse_wix_pdf(PDF_PATH)
print(f" Entradas parseadas: {len(entries)}")
unique_parts = {}
for entry in entries:
for pn, ftype in entry['parts']:
if pn not in unique_parts:
unique_parts[pn] = ftype
unique_brands = set(e['brand'] for e in entries)
print(f" Partes únicas: {len(unique_parts)}")
print(f" Marcas de vehículos: {len(unique_brands)}")
conn = get_db()
cursor = conn.cursor()
print("\n[2/6] Creando fabricante WIX...")
wix_mfr_id = ensure_manufacturer(cursor, 'WIX', 'aftermarket', 'premium', 'USA')
print(f" WIX manufacturer_id: {wix_mfr_id}")
print("\n[3/6] Creando partes de filtros...")
group_ids = {}
for ftype in FILTER_GROUPS:
group_ids[ftype] = get_filter_group(cursor, ftype)
group_ids[f"{ftype}_xp"] = group_ids[ftype]
part_ids = {}
parts_created = 0
for pn, ftype in sorted(unique_parts.items()):
gid = group_ids.get(ftype)
if not gid:
continue
name_en, name_es = TYPE_NAMES.get(ftype, ('Filter', 'Filtro'))
part_id, created = get_or_create_part(
cursor, pn, gid,
f"{name_en} {pn}", f"{name_es} {pn}",
f"WIX {name_en}")
part_ids[pn] = part_id
if created:
parts_created += 1
print(f" Partes creadas: {parts_created}")
print(f" Partes existentes: {len(unique_parts) - parts_created}")
print("\n[4/6] Creando vehículos y fitments...")
vehicles_created = 0
fitments_created = 0
mye_cache = {}
for i, entry in enumerate(entries):
if i % 5000 == 0 and i > 0:
print(f" Procesando entrada {i}/{len(entries)}...")
cache_key = (entry['brand'].upper(), entry['model'].upper(), entry['year'])
if cache_key not in mye_cache:
brand_id = ensure_brand(cursor, entry['brand'])
model_id = ensure_model(cursor, brand_id, entry['model'])
year_id = ensure_year(cursor, entry['year'])
cursor.execute("""
SELECT mye.id FROM model_year_engine mye
JOIN models m ON mye.model_id = m.id
JOIN brands b ON m.brand_id = b.id
JOIN years y ON mye.year_id = y.id
WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?) AND y.year = ?
LIMIT 1
""", (entry['brand'], entry['model'], entry['year']))
existing = cursor.fetchone()
if existing:
mye_cache[cache_key] = existing['id']
else:
mye_id = ensure_mye(cursor, model_id, year_id)
mye_cache[cache_key] = mye_id
vehicles_created += 1
mye_id = mye_cache[cache_key]
for pn, ftype in entry['parts']:
part_id = part_ids.get(pn)
if not part_id:
continue
cursor.execute(
"SELECT id FROM vehicle_parts WHERE model_year_engine_id = ? AND part_id = ?",
(mye_id, part_id))
if not cursor.fetchone():
notes = f"Catálogo WIX 2021 - {ftype.replace('_', ' ').upper()}"
cursor.execute(
"INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes) VALUES (?, ?, 1, ?)",
(mye_id, part_id, notes))
fitments_created += 1
print(f" Vehículos creados: {vehicles_created}")
print(f" Fitments creados: {fitments_created}")
print("\n[5/6] Creando referencias cruzadas...")
xrefs_created = 0
wix_part_id_set = set(part_ids.values())
for i, (pn, part_id) in enumerate(part_ids.items()):
if i % 200 == 0 and i > 0:
print(f" Procesando cross-ref {i}/{len(part_ids)}...")
cursor.execute("""
SELECT DISTINCT p2.id, p2.oem_part_number
FROM vehicle_parts vp1
JOIN vehicle_parts vp2 ON vp1.model_year_engine_id = vp2.model_year_engine_id
JOIN parts p2 ON vp2.part_id = p2.id
WHERE vp1.part_id = ?
AND p2.id != ?
AND p2.group_id = (SELECT group_id FROM parts WHERE id = ?)
LIMIT 50
""", (part_id, part_id, part_id))
for row in cursor.fetchall():
if row['id'] in wix_part_id_set:
continue
cursor.execute(
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
(part_id, row['oem_part_number']))
if not cursor.fetchone():
cursor.execute(
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'WIX 2021 Catalog')",
(part_id, row['oem_part_number']))
xrefs_created += 1
cursor.execute(
"SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?",
(row['id'], pn))
if not cursor.fetchone():
cursor.execute(
"INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source) VALUES (?, ?, 'interchange', 'WIX 2021 Catalog')",
(row['id'], pn))
xrefs_created += 1
print(f" Cross-refs creadas: {xrefs_created}")
conn.commit()
conn.close()
print("\n" + "=" * 70)
print("IMPORTACIÓN WIX COMPLETADA")
print("=" * 70)
print(f"""
RESUMEN:
- Partes creadas: {parts_created:,}
- Vehículos creados: {vehicles_created:,}
- Fitments creados: {fitments_created:,}
- Cross-refs creadas: {xrefs_created:,}
""")
if __name__ == '__main__':
main()

Binary file not shown.