fix: performance improvements, shared UI, and cross-reference data quality

Backend (server.py):
- Fix N+1 query in /api/diagrams/<id>/parts with batch cross-ref query
- Add LIMIT safety nets to 15 endpoints (50-5000 per data type)
- Add pagination to /api/vehicles, /api/model-year-engine, /api/vehicles/<id>/parts, /api/admin/export
- Optimize search_vehicles() EXISTS subquery to JOIN
- Restrict static route to /static/* subdir (security fix)
- Add detailed=true support to /api/brands and /api/models

Frontend:
- Extract shared CSS into shared.css (variables, reset, buttons, forms, scrollbar)
- Create shared nav.js component (logo + navigation links, auto-highlights)
- Update all 4 HTML pages to use shared CSS and nav
- Update JS to handle paginated API responses

Data quality:
- Fix cross-reference source field: map 72K records from catalog names to actual brands
- Fix aftermarket_parts manufacturer_id: correct 8K records with wrong brand attribution
- Delete 98MB backup file, orphan records, and garbage cross-references
- Add import scripts for DAR, FRAM, WIX, MOOG, Cartek catalogs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-17 03:09:22 +00:00
parent 3ea2de61e2
commit 7ecf1295a5
17 changed files with 6605 additions and 848 deletions

View File

@@ -0,0 +1,179 @@
#!/usr/bin/env python3
"""
EXTRACTOR DE IMÁGENES DE DIAGRAMAS MOOG
Extrae las ilustraciones de suspensión/dirección de los PDFs MOOG
y las guarda como archivos de imagen mapeados a sus figure codes.
"""
import re
import sys
import io
import hashlib
from pathlib import Path
import pypdf
OUTPUT_DIR = Path(__file__).parent.parent.parent / 'dashboard' / 'static' / 'diagrams' / 'moog'
VOLUMES = {
'1': {
'path': '/tmp/catalogs/suspension/moog_vol1_1989back.pdf',
'start_page': 3,
'end_page': 1037,
'label': 'Vol 1 (≤1989)',
},
'2': {
'path': '/tmp/catalogs/suspension/moog_vol2_1990_2005.pdf',
'start_page': 6,
'end_page': 1641,
'label': 'Vol 2 (1990-2005)',
},
'3': {
'path': '/tmp/catalogs/suspension/moog_vol3_2006up.pdf',
'start_page': 7,
'end_page': 1089,
'label': 'Vol 3 (2006+)',
},
}
FIGURE_RE = re.compile(r'\b([FSR]\d{3})\b')
def extract_figure_codes(text):
"""Extract ordered unique figure codes from page text."""
codes = []
seen = set()
for m in FIGURE_RE.finditer(text):
code = m.group(1)
if code not in seen:
codes.append(code)
seen.add(code)
return codes
def extract_volume(vol_key, already_extracted):
"""Extract diagram images from one MOOG volume."""
vol = VOLUMES[vol_key]
print(f"\n--- Procesando {vol['label']} ---")
print(f" PDF: {vol['path']}")
pdf = pypdf.PdfReader(vol['path'])
total_pages = len(pdf.pages)
end_page = min(vol['end_page'], total_pages - 1)
extracted = 0
skipped = 0
errors = 0
for page_idx in range(vol['start_page'], end_page + 1):
if page_idx % 100 == 0:
print(f" Página {page_idx}/{end_page}... (extraídas: {extracted})")
try:
page = pdf.pages[page_idx]
text = page.extract_text() or ''
# Get figure codes from this page
fig_codes = extract_figure_codes(text)
if not fig_codes:
continue
# Filter out already-extracted codes
needed_codes = [c for c in fig_codes if c not in already_extracted]
if not needed_codes:
skipped += len(fig_codes)
continue
# Extract images from page
images = []
try:
for img_key in page.images:
img_data = img_key.data
# Filter by size - diagram images are >10KB typically
if len(img_data) > 5000:
images.append(img_data)
except Exception:
# Fallback: try to extract from xobjects directly
try:
if '/XObject' in page['/Resources']:
xobjects = page['/Resources']['/XObject'].get_object()
for obj_name in sorted(xobjects.keys()):
xobj = xobjects[obj_name].get_object()
if xobj.get('/Subtype') == '/Image':
w = int(xobj.get('/Width', 0))
h = int(xobj.get('/Height', 0))
if w > 200 and h > 100:
try:
img_data = xobj.get_data()
if len(img_data) > 5000:
images.append(img_data)
except Exception:
pass
except Exception:
pass
if not images:
continue
# Match figure codes to images
# Strategy: if same number of large images and figure codes, match 1:1 in order
# If fewer images than codes, some codes share images (use first available)
# If more images than codes, filter further by size
for i, code in enumerate(needed_codes):
if i < len(images):
img_data = images[i]
# Determine file extension from magic bytes
ext = 'jpg'
if img_data[:4] == b'\x89PNG':
ext = 'png'
elif img_data[:4] == b'\x00\x00\x00\x0c':
ext = 'jp2'
out_path = OUTPUT_DIR / f"{code}.{ext}"
out_path.write_bytes(img_data)
already_extracted.add(code)
extracted += 1
except Exception as e:
errors += 1
if errors <= 5:
print(f" Error en página {page_idx}: {e}")
print(f" Resultado: {extracted} extraídas, {skipped} ya existentes, {errors} errores")
return extracted
def main():
volumes = sys.argv[1:] if len(sys.argv) > 1 else ['3', '2', '1']
print("=" * 70)
print("EXTRACTOR DE DIAGRAMAS MOOG")
print("=" * 70)
# Create output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
print(f"Directorio de salida: {OUTPUT_DIR}")
# Check what's already extracted
already_extracted = set()
for f in OUTPUT_DIR.iterdir():
if f.suffix in ('.jpg', '.png', '.jp2'):
already_extracted.add(f.stem)
print(f"Ya extraídas: {len(already_extracted)}")
total = 0
for vol_key in volumes:
if vol_key not in VOLUMES:
print(f"Volumen {vol_key} no reconocido, saltando...")
continue
count = extract_volume(vol_key, already_extracted)
total += count
print(f"\n{'=' * 70}")
print(f"EXTRACCIÓN COMPLETADA: {total} nuevas imágenes")
print(f"Total en directorio: {len(list(OUTPUT_DIR.iterdir()))}")
print(f"{'=' * 70}")
if __name__ == '__main__':
main()