Files
Autoparts-DB/vehicle_database/scripts/import_gonher_complete.py
consultoria-as e66b18f6ae Add admin panel, enhanced search, Gonher import and expand API
- Add admin interface (admin.html, admin.js) for managing catalog data
- Add enhanced search module with advanced filtering capabilities
- Expand server.py with new API endpoints and admin functionality
- Add Gonher catalog import scripts (import_gonher_catalog.py, import_gonher_complete.py)
- Add demo data population script and sample CSV data
- Update customer landing page and dashboard with UI improvements
- Update database with enriched vehicle and parts data

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 00:35:05 +00:00

512 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
IMPORTADOR COMPLETO DEL CATÁLOGO GONHER 2022
- Crea vehículos faltantes
- Crea partes de filtros Gonher
- Crea referencias cruzadas con otras marcas (AC Delco, Fram, etc.)
- Crea fitments (vincula partes a vehículos)
"""
import sqlite3
import re
import pypdf
from pathlib import Path
from collections import defaultdict
# Paths
DB_PATH = Path(__file__).parent.parent / 'vehicle_database.db'
PDF_PATH = '/tmp/filtros_catalog.pdf'
# Filter type configuration
FILTER_TYPES = {
'ACEITE': {'category': 'Engine', 'group': 'Oil Filters', 'prefix': 'Oil Filter', 'prefix_es': 'Filtro de Aceite'},
'SINTÉTICO': {'category': 'Engine', 'group': 'Oil Filters', 'prefix': 'Synthetic Oil Filter', 'prefix_es': 'Filtro de Aceite Sintético'},
'AIRE': {'category': 'Engine', 'group': 'Air Filters', 'prefix': 'Air Filter', 'prefix_es': 'Filtro de Aire'},
'COMB.': {'category': 'Fuel & Air', 'group': 'Fuel Filters', 'prefix': 'Fuel Filter', 'prefix_es': 'Filtro de Combustible'},
'CABINA': {'category': 'Heat & Air Conditioning', 'group': 'Cabin Air Filters', 'prefix': 'Cabin Air Filter', 'prefix_es': 'Filtro de Cabina'}
}
# Known brands in catalog
CATALOG_BRANDS = [
'ACURA', 'ALFA ROMEO', 'AUDI', 'BMW', 'BUICK', 'CADILLAC',
'CHEVROLET', 'CHRYSLER', 'DODGE', 'FIAT', 'FORD', 'GMC',
'HONDA', 'HYUNDAI', 'INFINITI', 'JAGUAR', 'JEEP', 'KIA',
'LEXUS', 'LINCOLN', 'MAZDA', 'MERCEDES BENZ', 'MERCURY',
'MINI', 'MITSUBISHI', 'NISSAN', 'PEUGEOT', 'PONTIAC',
'PORSCHE', 'RAM', 'RENAULT', 'SEAT', 'SMART', 'SUBARU',
'SUZUKI', 'TOYOTA', 'VOLKSWAGEN', 'VOLVO'
]
# Cross-reference brands
XREF_BRANDS = ['AC DELCO', 'FRAM', 'INTERFIL', 'MANN', 'MOTORCRAFT']
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def ensure_manufacturer(cursor, name, type_='aftermarket', quality='standard', country=None):
"""Create manufacturer if not exists"""
cursor.execute("SELECT id FROM manufacturers WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("""
INSERT INTO manufacturers (name, type, quality_tier, country)
VALUES (?, ?, ?, ?)
""", (name, type_, quality, country))
return cursor.lastrowid
def ensure_brand(cursor, name):
"""Create brand if not exists"""
cursor.execute("SELECT id FROM brands WHERE UPPER(name) = UPPER(?)", (name,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO brands (name) VALUES (?)", (name,))
return cursor.lastrowid
def ensure_model(cursor, brand_id, name):
"""Create model if not exists"""
cursor.execute("""
SELECT id FROM models
WHERE brand_id = ? AND UPPER(name) = UPPER(?)
""", (brand_id, name))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO models (brand_id, name) VALUES (?, ?)", (brand_id, name))
return cursor.lastrowid
def ensure_year(cursor, year):
"""Create year if not exists"""
cursor.execute("SELECT id FROM years WHERE year = ?", (year,))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("INSERT INTO years (year) VALUES (?)", (year,))
return cursor.lastrowid
def ensure_engine(cursor, name):
"""Create engine if not exists"""
cursor.execute("SELECT id FROM engines WHERE name = ?", (name,))
row = cursor.fetchone()
if row:
return row['id']
# Parse engine details from name
displacement = None
cylinders = None
fuel_type = 'gasoline' # lowercase to match DB constraint
# Parse displacement and cylinders from patterns like "L4-2.0L" or "V6-3.5L"
match = re.match(r'([LV])(\d+)[-]?([\d.]+)L?', name)
if match:
engine_type = match.group(1) # L or V
cylinders = int(match.group(2))
displacement = int(float(match.group(3)) * 1000)
if 'DIESEL' in name.upper() or 'DIÉSEL' in name.upper():
fuel_type = 'diesel'
elif 'ELECTRIC' in name.upper():
fuel_type = 'electric'
elif 'HYBRID' in name.upper():
fuel_type = 'hybrid'
# Note: 'TURBO' is not a fuel type, it's a modifier - default to gasoline
cursor.execute("""
INSERT INTO engines (name, displacement_cc, cylinders, fuel_type)
VALUES (?, ?, ?, ?)
""", (name, displacement, cylinders, fuel_type))
return cursor.lastrowid
def ensure_mye(cursor, model_id, year_id, engine_id):
"""Create model_year_engine if not exists"""
cursor.execute("""
SELECT id FROM model_year_engine
WHERE model_id = ? AND year_id = ? AND engine_id = ?
""", (model_id, year_id, engine_id))
row = cursor.fetchone()
if row:
return row['id']
cursor.execute("""
INSERT INTO model_year_engine (model_id, year_id, engine_id)
VALUES (?, ?, ?)
""", (model_id, year_id, engine_id))
return cursor.lastrowid
def get_or_create_group(cursor, category_name, group_name):
"""Get or create part group"""
cursor.execute("SELECT id FROM part_categories WHERE name = ?", (category_name,))
cat_row = cursor.fetchone()
if not cat_row:
return None
category_id = cat_row['id']
cursor.execute("""
SELECT id FROM part_groups WHERE category_id = ? AND name = ?
""", (category_id, group_name))
group_row = cursor.fetchone()
if group_row:
return group_row['id']
cursor.execute("""
INSERT INTO part_groups (category_id, name, name_es)
VALUES (?, ?, ?)
""", (category_id, group_name, group_name))
return cursor.lastrowid
def parse_year_range(year_str):
"""Parse year range into list of years"""
year_str = year_str.strip()
if re.match(r'^\d{4}$', year_str):
return [int(year_str)]
match = re.match(r'(\d{4})\s*[-]\s*(\d{4})', year_str)
if match:
start, end = int(match.group(1)), int(match.group(2))
if start > end:
start, end = end, start
return list(range(end, start + 1))
return []
def classify_filter(part_number):
"""Classify filter type by part number prefix"""
part_number = part_number.upper()
if part_number.startswith('GP-') or part_number.startswith('GPS-'):
if part_number.startswith('GPS-'):
return 'SINTÉTICO'
return 'ACEITE'
elif part_number.startswith('GA-') and not part_number.startswith('GAC-') and not part_number.startswith('GAVW-'):
return 'AIRE'
elif part_number.startswith('GG-'):
return 'COMB.'
elif part_number.startswith('GAC-'):
return 'CABINA'
elif part_number.startswith('G-'):
return 'ACEITE' # Generic oil filter
return None
def extract_vehicle_entries(pdf):
"""Extract all vehicle entries from catalog"""
entries = []
current_brand = None
current_model = None
for page in pdf.pages:
text = page.extract_text()
if not text:
continue
for line in text.split('\n'):
line = line.strip()
if not line:
continue
# Skip header/footer
if 'AÑO' in line and 'MOTOR' in line:
continue
if 'Los filtros Gonher' in line:
continue
# Brand detection
if line in CATALOG_BRANDS:
current_brand = line
current_model = None
continue
# Handle (Continúa) lines
if '(Continúa)' in line:
match = re.match(r'^([A-Z][A-Z0-9\s\-]+)\s*\(Continúa\)', line)
if match:
potential = match.group(1).strip()
if potential in CATALOG_BRANDS:
current_brand = potential
elif current_brand:
current_model = potential
continue
# Model detection
if current_brand:
if re.match(r'^[A-Z][A-Z0-9\s\-/]+$', line) and not re.match(r'^\d{4}', line):
if line not in ['ACEITE', 'AIRE', 'COMB', 'CABINA', 'SINTÉTICO', 'AÑO', 'MOTOR']:
if not re.match(r'^G[APCS]?[-]?\d', line): # Not a part number
current_model = line
continue
# Data line with year
if current_brand and current_model:
match = re.match(r'^(\d{4}(?:\s*[-]\s*\d{4})?)\s+(.+)$', line)
if match:
year_str = match.group(1)
rest = match.group(2)
# Extract motor
motor_match = re.match(r'^([LV]\d+[-][\d.]+L(?:\s+(?:Turbo|TURBOCHARGED|diésel|ELECTRIC|HYBRID))?)\s*(.*)$', rest, re.IGNORECASE)
if motor_match:
motor = motor_match.group(1).strip()
filters_str = motor_match.group(2).strip()
# Parse filter part numbers
filter_parts = re.findall(r'G[A-Z]*[-]?[\dA-Z]+(?:\(\d+\))?', filters_str)
years = parse_year_range(year_str)
if years:
for year in years:
entry = {
'brand': current_brand,
'model': current_model,
'year': year,
'motor': motor,
'filters': {}
}
for fp in filter_parts:
fp_clean = re.sub(r'\(\d+\)', '', fp)
filter_type = classify_filter(fp_clean)
if filter_type:
entry['filters'][filter_type] = fp_clean
if entry['filters']:
entries.append(entry)
return entries
def extract_cross_references(pdf):
"""Extract cross-reference data from catalog"""
xrefs = []
current_brand = None
# Cross-references are typically in pages 117+
for i in range(117, len(pdf.pages)):
text = pdf.pages[i].extract_text()
if not text:
continue
for line in text.split('\n'):
line = line.strip()
# Brand header
if line in XREF_BRANDS:
current_brand = line
continue
# Cross-reference line
if current_brand:
match = re.match(r'^([A-Z0-9\-/]+)\s+(G[A-Z]*[-]?\d+[A-Z]*)$', line)
if match:
xrefs.append({
'brand': current_brand,
'part_number': match.group(1),
'gonher_part': match.group(2)
})
return xrefs
def main():
print("=" * 70)
print("IMPORTADOR COMPLETO - CATÁLOGO GONHER 2022")
print("=" * 70)
# Read PDF
print(f"\n[1/7] Leyendo PDF: {PDF_PATH}")
pdf = pypdf.PdfReader(PDF_PATH)
print(f" Total páginas: {len(pdf.pages)}")
# Extract data
print("\n[2/7] Extrayendo datos del catálogo...")
vehicle_entries = extract_vehicle_entries(pdf)
cross_refs = extract_cross_references(pdf)
print(f" Entradas de vehículos: {len(vehicle_entries)}")
print(f" Referencias cruzadas: {len(cross_refs)}")
# Get unique filters
unique_filters = {}
for entry in vehicle_entries:
for filter_type, part_num in entry['filters'].items():
if part_num not in unique_filters:
unique_filters[part_num] = filter_type
print(f" Filtros únicos: {len(unique_filters)}")
# Connect to database
conn = get_db()
cursor = conn.cursor()
# Create manufacturers
print("\n[3/7] Creando fabricantes...")
manufacturers = {
'Gonher': ensure_manufacturer(cursor, 'Gonher', 'aftermarket', 'standard', 'Mexico'),
'AC Delco': ensure_manufacturer(cursor, 'AC Delco', 'oem', 'oem', 'USA'),
'Fram': ensure_manufacturer(cursor, 'Fram', 'aftermarket', 'standard', 'USA'),
'Interfil': ensure_manufacturer(cursor, 'Interfil', 'aftermarket', 'economy', 'Mexico'),
'Mann': ensure_manufacturer(cursor, 'Mann', 'aftermarket', 'premium', 'Germany'),
'Motorcraft': ensure_manufacturer(cursor, 'Motorcraft', 'oem', 'oem', 'USA'),
}
print(f" Fabricantes: {list(manufacturers.keys())}")
# Create vehicles
print("\n[4/7] Creando vehículos faltantes...")
vehicles_created = 0
mye_cache = {}
for entry in vehicle_entries:
cache_key = (entry['brand'], entry['model'], entry['year'], entry['motor'])
if cache_key in mye_cache:
continue
# Check if vehicle exists
cursor.execute("""
SELECT mye.id FROM model_year_engine mye
JOIN models m ON mye.model_id = m.id
JOIN brands b ON m.brand_id = b.id
JOIN years y ON mye.year_id = y.id
JOIN engines e ON mye.engine_id = e.id
WHERE UPPER(b.name) = UPPER(?) AND UPPER(m.name) = UPPER(?)
AND y.year = ? AND e.name = ?
LIMIT 1
""", (entry['brand'], entry['model'], entry['year'], entry['motor']))
existing = cursor.fetchone()
if existing:
mye_cache[cache_key] = existing['id']
else:
# Create vehicle
brand_id = ensure_brand(cursor, entry['brand'])
model_id = ensure_model(cursor, brand_id, entry['model'])
year_id = ensure_year(cursor, entry['year'])
engine_id = ensure_engine(cursor, entry['motor'])
mye_id = ensure_mye(cursor, model_id, year_id, engine_id)
mye_cache[cache_key] = mye_id
vehicles_created += 1
print(f" Vehículos creados: {vehicles_created}")
# Create filter parts
print("\n[5/7] Creando partes de filtros...")
filter_parts = {}
parts_created = 0
for part_num, filter_type in unique_filters.items():
config = FILTER_TYPES.get(filter_type)
if not config:
continue
group_id = get_or_create_group(cursor, config['category'], config['group'])
if not group_id:
continue
# Check if part exists
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (part_num,))
existing = cursor.fetchone()
if existing:
filter_parts[part_num] = existing['id']
else:
name = f"{config['prefix']} {part_num}"
name_es = f"{config['prefix_es']} {part_num}"
cursor.execute("""
INSERT INTO parts (oem_part_number, name, name_es, group_id, description)
VALUES (?, ?, ?, ?, ?)
""", (part_num, name, name_es, group_id, f"Gonher {config['prefix']}"))
filter_parts[part_num] = cursor.lastrowid
parts_created += 1
print(f" Partes creadas: {parts_created}")
# Create fitments
print("\n[6/7] Creando fitments (vehículo-parte)...")
fitments_created = 0
for entry in vehicle_entries:
cache_key = (entry['brand'], entry['model'], entry['year'], entry['motor'])
mye_id = mye_cache.get(cache_key)
if not mye_id:
continue
for filter_type, part_num in entry['filters'].items():
part_id = filter_parts.get(part_num)
if not part_id:
continue
# Check if fitment exists
cursor.execute("""
SELECT id FROM vehicle_parts
WHERE model_year_engine_id = ? AND part_id = ?
""", (mye_id, part_id))
if not cursor.fetchone():
cursor.execute("""
INSERT INTO vehicle_parts (model_year_engine_id, part_id, quantity_required, fitment_notes)
VALUES (?, ?, 1, ?)
""", (mye_id, part_id, f"Catálogo Gonher 2022 - {filter_type}"))
fitments_created += 1
print(f" Fitments creados: {fitments_created}")
# Create cross-references
print("\n[7/7] Creando referencias cruzadas...")
xrefs_created = 0
for xref in cross_refs:
gonher_part_id = filter_parts.get(xref['gonher_part'])
if not gonher_part_id:
# Part might not exist yet, try to find by OEM number
cursor.execute("SELECT id FROM parts WHERE oem_part_number = ?", (xref['gonher_part'],))
row = cursor.fetchone()
if row:
gonher_part_id = row['id']
else:
continue
# Check if cross-reference exists
cursor.execute("""
SELECT id FROM part_cross_references
WHERE part_id = ? AND cross_reference_number = ?
""", (gonher_part_id, xref['part_number']))
if not cursor.fetchone():
# Map brand to reference type
ref_type = 'interchange'
if xref['brand'] in ['AC DELCO', 'MOTORCRAFT']:
ref_type = 'oem_alternate'
cursor.execute("""
INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type)
VALUES (?, ?, ?)
""", (gonher_part_id, xref['part_number'], ref_type))
xrefs_created += 1
print(f" Referencias cruzadas creadas: {xrefs_created}")
# Commit
conn.commit()
conn.close()
print("\n" + "=" * 70)
print("IMPORTACIÓN COMPLETADA")
print("=" * 70)
print(f"""
RESUMEN:
- Vehículos creados: {vehicles_created:,}
- Partes creadas: {parts_created:,}
- Fitments creados: {fitments_created:,}
- Cross-refs creadas: {xrefs_created:,}
- Fabricantes: {len(manufacturers)}
""")
if __name__ == '__main__':
main()