Update multibrand scraper v3 with all RockAuto brands

Added 108 brands total:
- 90 main brands (international)
- 18 regional brands (Mexico, China, etc.)

Features:
- Interactive menu with multiple options
- Search brand by name
- Select by initial letter
- Process range of brands (e.g., 1-10)
- View all brands status with pagination
- Skip wait time with ENTER
- Years: 1975-2026, 5 years per batch
This commit is contained in:
2026-01-19 09:17:54 +00:00
parent 997f777514
commit 61474f7abe

View File

@@ -1,10 +1,10 @@
#!/usr/bin/env python3
"""
Scraper Multimarca v2
- Marcas: Dodge, Honda, Mitsubishi, Jeep, BMW, Fiat, Hyundai, Infiniti, Kia, Land Rover, Lexus
Scraper Multimarca v3 - TODAS LAS MARCAS DE ROCKAUTO
- Incluye todas las marcas con vehiculos en rango 1975-2026
- Procesa de 5 en 5 años
- Espera 3 minutos entre lotes (saltable con cualquier tecla)
- Menú interactivo para seleccionar marca y lote
- Espera 3 minutos entre lotes (saltable con ENTER)
- Menu interactivo para seleccionar marca y lote
- Años: 1975-2026
"""
@@ -28,25 +28,180 @@ DB_PATH = os.path.join(BASE_DIR, "vehicle_database", "vehicle_database.db")
BASE_URL = "https://www.rockauto.com/en/catalog"
# Marcas a scrapear
# TODAS LAS MARCAS DE ROCKAUTO (con vehiculos 1975-2026)
# Organizadas alfabeticamente
BRANDS = [
"DODGE",
"HONDA",
"MITSUBISHI",
"JEEP",
# A
"ABARTH",
"AC",
"ACURA",
"ALFA ROMEO",
"ALPINE",
"AM GENERAL",
"AMERICAN MOTORS",
"ASTON MARTIN",
"AUDI",
"AUSTIN",
"AUSTIN-HEALEY",
"AVANTI",
# B
"BENTLEY",
"BERTONE",
"BMW",
"BRICKLIN",
"BRISTOL",
"BUGATTI",
"BUICK",
"BYD",
# C
"CADILLAC",
"CHECKER",
"CHEVROLET",
"CHRYSLER",
"CITROEN",
"CUPRA",
# D
"DAEWOO",
"DAIHATSU",
"DATSUN",
"DELOREAN",
"DODGE",
# E
"EAGLE",
"EDSEL",
"EXCALIBUR",
# F
"FACEL VEGA",
"FERRARI",
"FIAT",
"FISKER",
"FORD",
"FREIGHTLINER",
# G
"GENESIS",
"GEO",
"GMC",
# H
"HILLMAN",
"HONDA",
"HUMMER",
"HYUNDAI",
# I
"INEOS",
"INFINITI",
"INTERNATIONAL",
"ISUZU",
# J
"JAGUAR",
"JEEP",
"JENSEN",
# K
"KARMA",
"KENWORTH",
"KIA",
# L
"LAFORZA",
"LAMBORGHINI",
"LANCIA",
"LAND ROVER",
"LEXUS"
"LEXUS",
"LINCOLN",
"LOTUS",
"LUCID",
# M
"MACK",
"MASERATI",
"MAYBACH",
"MAZDA",
"MCLAREN",
"MERCEDES-BENZ",
"MERCURY",
"MERKUR",
"MG",
"MINI",
"MITSUBISHI",
"MITSUBISHI FUSO",
"MOBILITY VENTURES",
"MORGAN",
# N
"NISSAN",
# O
"OLDSMOBILE",
"OPEL",
# P
"PANOZ",
"PEUGEOT",
"PLYMOUTH",
"POLESTAR",
"PONTIAC",
"PORSCHE",
# Q
"QVALE",
# R
"RAM",
"RENAULT",
"RIVIAN",
"ROLLS-ROYCE",
"ROVER",
# S
"SAAB",
"SALEEN",
"SATURN",
"SCION",
"SEAT",
"SHELBY",
"SMART",
"SPYKER",
"SRT",
"SSANGYONG",
"STERLING",
"STUDEBAKER",
"SUBARU",
"SUNBEAM",
"SUZUKI",
# T
"TESLA",
"TOYOTA",
"TRIUMPH",
"TVR",
# U
"UD",
# V
"VOLKSWAGEN",
"VOLVO",
"VPG",
# W
"WORKHORSE",
# Y
"YUGO",
]
# Marcas adicionales de mercados especificos (Mexico, China, etc.)
BRANDS_REGIONAL = [
"BAIC",
"BESTUNE",
"CHANGAN",
"CHIREY",
"DFSK",
"FAW",
"FOTON",
"GAC",
"GEELY",
"GIANT MOTORS",
"JAC",
"JAECOO",
"JETOUR",
"JMC",
"OMODA",
"SERES",
"VAM",
"VINFAST",
]
# Años de 1975 a 2026 (orden descendente)
ALL_YEARS = list(range(2026, 1974, -1))
# Configuración de lotes
# Configuracion de lotes
BATCH_SIZE = 5 # años por lote
WAIT_TIME = 180 # 3 minutos entre lotes
@@ -59,7 +214,7 @@ session.headers.update({
def check_key_press():
"""Verifica si se presionó alguna tecla (non-blocking)"""
"""Verifica si se presiono alguna tecla (non-blocking)"""
if sys.platform == 'win32':
import msvcrt
if msvcrt.kbhit():
@@ -246,12 +401,12 @@ def get_brand_batches(conn, brand):
def process_brand(conn, brand, start_batch=1):
"""Procesa una marca completa desde un lote específico"""
"""Procesa una marca completa desde un lote especifico"""
print(f"\n{'#'*60}")
print(f" PROCESANDO MARCA: {brand}")
print(f"{'#'*60}")
# Verificar qué años ya existen
# Verificar que años ya existen
existing = get_existing_years(conn, brand)
print(f"Años existentes de {brand}: {len(existing)} años")
if existing:
@@ -261,7 +416,7 @@ def process_brand(conn, brand, start_batch=1):
years_to_process = [y for y in ALL_YEARS if y not in existing]
if not years_to_process:
print(f"\n[OK] {brand}: Todos los años ya están en la base de datos!")
print(f"\n[OK] {brand}: Todos los años ya estan en la base de datos!")
return 0, 0
print(f"\nAños por procesar para {brand}: {len(years_to_process)}")
@@ -288,7 +443,7 @@ def process_brand(conn, brand, start_batch=1):
total_saved += saved
total_found += found
# Si no es el último lote, esperar para cambiar VPN
# Si no es el ultimo lote, esperar para cambiar VPN
if i < total_batches:
wait_with_skip(WAIT_TIME, f"PAUSA DE {WAIT_TIME//60} MINUTOS - [{brand}] Lotes restantes: {total_batches - i}")
@@ -296,139 +451,259 @@ def process_brand(conn, brand, start_batch=1):
def show_main_menu(conn):
"""Muestra menú principal con opciones"""
"""Muestra menu principal con opciones"""
all_brands = BRANDS + BRANDS_REGIONAL
while True:
print("\n" + "="*60)
print(" SCRAPER MULTIMARCA - MENU PRINCIPAL")
print(" SCRAPER MULTIMARCA v3 - MENU PRINCIPAL")
print("="*60)
print(f"\n Total de marcas disponibles: {len(all_brands)}")
print("\n Opciones:")
print(" 1. Ver estado de todas las marcas")
print(" 2. Seleccionar marca y lote específico")
print(" 3. Procesar todas las marcas pendientes")
print(" 2. Buscar marca por nombre")
print(" 3. Seleccionar marca por letra inicial")
print(" 4. Procesar multiples marcas (rango)")
print(" 5. Procesar TODAS las marcas pendientes")
print(" 0. Salir")
print("="*60)
choice = input("\nSelecciona opción: ").strip()
choice = input("\nSelecciona opcion: ").strip()
if choice == '0':
return None, None
return None, None, None
elif choice == '1':
show_all_brands_status(conn)
show_all_brands_status(conn, all_brands)
elif choice == '2':
result = show_batch_menu(conn)
if result[0] is not None or result[1] is not None:
result = search_brand_menu(conn, all_brands)
if result[0] is not None:
return result
elif choice == '3':
return 'ALL', 1
result = select_by_letter_menu(conn, all_brands)
if result[0] is not None:
return result
elif choice == '4':
result = select_range_menu(conn, all_brands)
if result[0] is not None:
return result
elif choice == '5':
return 'ALL', 1, all_brands
else:
print("Opción inválida")
print("Opcion invalida")
def show_all_brands_status(conn):
"""Muestra estado de todas las marcas"""
print("\n" + "-"*60)
def show_all_brands_status(conn, all_brands):
"""Muestra estado de todas las marcas con paginacion"""
print("\n" + "-"*70)
print(" ESTADO DE TODAS LAS MARCAS")
print("-"*60)
print("-"*70)
for brand in BRANDS:
page_size = 20
total_pages = (len(all_brands) + page_size - 1) // page_size
current_page = 0
while True:
start_idx = current_page * page_size
end_idx = min(start_idx + page_size, len(all_brands))
print(f"\n Pagina {current_page + 1}/{total_pages}")
print("-"*70)
print(f" {'#':<4} {'MARCA':<20} {'EXISTENTES':<12} {'ESTADO':<30}")
print("-"*70)
for i, brand in enumerate(all_brands[start_idx:end_idx], start_idx + 1):
batches, existing = get_brand_batches(conn, brand)
years_pending = sum(len(b) for b in batches)
status = "COMPLETO" if not batches else f"{years_pending} años pendientes ({len(batches)} lotes)"
print(f" {brand:15} | Existentes: {len(existing):3} | {status}")
print("-"*60)
input("\nPresiona ENTER para continuar...")
def show_batch_menu(conn):
"""Muestra menú para seleccionar marca y lote inicial"""
print("\n" + "="*60)
print(" SELECCIÓN DE MARCA Y LOTE")
print("="*60)
brand_info = {}
available_brands = []
for i, brand in enumerate(BRANDS, 1):
batches, existing = get_brand_batches(conn, brand)
brand_info[brand] = {'batches': batches, 'existing': existing}
if batches:
available_brands.append(brand)
print(f"\n {len(available_brands)}. {brand}")
print(f" Años existentes: {len(existing)}")
print(f" Lotes pendientes: {len(batches)}")
# Mostrar resumen de lotes
if len(batches) <= 5:
for j, batch in enumerate(batches, 1):
print(f" Lote {j}: años {batch[0]}-{batch[-1]}")
if not batches:
status = "COMPLETO"
else:
print(f" Lote 1: años {batches[0][0]}-{batches[0][-1]}")
print(f" ...")
print(f" Lote {len(batches)}: años {batches[-1][0]}-{batches[-1][-1]}")
else:
print(f"\n -. {brand} - [COMPLETO]")
status = f"{years_pending} años pend. ({len(batches)} lotes)"
print(f" {i:<4} {brand:<20} {len(existing):<12} {status:<30}")
if not available_brands:
print("\n [!] Todas las marcas están completas!")
input("\nPresiona ENTER para continuar...")
return None, None
print("-"*70)
print(f" [N] Siguiente | [P] Anterior | [Q] Volver")
print(f"\n 0. Volver al menú principal")
print("="*60)
# Seleccionar marca
while True:
try:
choice = input("\nSelecciona marca (número): ").strip()
if choice == '0' or choice == '':
return None, None
brand_idx = int(choice) - 1
if 0 <= brand_idx < len(available_brands):
selected_brand = available_brands[brand_idx]
nav = input("\nNavegacion: ").strip().upper()
if nav == 'N' and current_page < total_pages - 1:
current_page += 1
elif nav == 'P' and current_page > 0:
current_page -= 1
elif nav == 'Q' or nav == '':
break
print(f"Opción inválida. Ingresa un número entre 1 y {len(available_brands)}")
except ValueError:
print("Ingresa un número válido")
batches = brand_info[selected_brand]['batches']
# Mostrar lotes disponibles
print(f"\n{'='*60}")
print(f" LOTES DISPONIBLES PARA {selected_brand}")
print(f"{'='*60}")
def search_brand_menu(conn, all_brands):
"""Busca una marca por nombre"""
print("\n" + "="*60)
print(" BUSCAR MARCA")
print("="*60)
for j, batch in enumerate(batches, 1):
print(f" {j}. Lote {j}: años {batch[0]} - {batch[-1]} ({len(batch)} años)")
search = input("\nIngresa nombre de marca (o parte): ").strip().upper()
if not search:
return None, None, None
matches = [b for b in all_brands if search in b]
if not matches:
print(f"\n No se encontraron marcas con '{search}'")
input("\nPresiona ENTER para continuar...")
return None, None, None
print(f"\n Marcas encontradas ({len(matches)}):")
for i, brand in enumerate(matches, 1):
batches, existing = get_brand_batches(conn, brand)
status = "COMPLETO" if not batches else f"{len(batches)} lotes pendientes"
print(f" {i}. {brand} - {status}")
print(f"\n 0. Volver")
while True:
try:
choice = input("\nSelecciona marca: ").strip()
if choice == '0' or choice == '':
return None, None, None
idx = int(choice) - 1
if 0 <= idx < len(matches):
return select_batch_for_brand(conn, matches[idx])
print("Opcion invalida")
except ValueError:
print("Ingresa un numero valido")
def select_by_letter_menu(conn, all_brands):
"""Selecciona marcas por letra inicial"""
print("\n" + "="*60)
print(" SELECCIONAR POR LETRA")
print("="*60)
# Seleccionar lote
# Obtener letras disponibles
letters = sorted(set(b[0] for b in all_brands))
print("\n Letras disponibles:")
print(f" {' '.join(letters)}")
letter = input("\nIngresa letra: ").strip().upper()
if not letter or letter not in letters:
return None, None, None
matches = [b for b in all_brands if b.startswith(letter)]
print(f"\n Marcas con '{letter}' ({len(matches)}):")
for i, brand in enumerate(matches, 1):
batches, existing = get_brand_batches(conn, brand)
status = "COMPLETO" if not batches else f"{len(batches)} lotes pendientes"
print(f" {i}. {brand} - {status}")
print(f"\n 0. Volver")
print(f" A. Procesar TODAS las marcas con '{letter}'")
while True:
choice = input("\nSelecciona: ").strip().upper()
if choice == '0' or choice == '':
return None, None, None
if choice == 'A':
return 'MULTIPLE', 1, matches
try:
idx = int(choice) - 1
if 0 <= idx < len(matches):
return select_batch_for_brand(conn, matches[idx])
print("Opcion invalida")
except ValueError:
print("Ingresa un numero valido o 'A'")
def select_range_menu(conn, all_brands):
"""Selecciona un rango de marcas para procesar"""
print("\n" + "="*60)
print(" SELECCIONAR RANGO DE MARCAS")
print("="*60)
# Mostrar todas las marcas numeradas
print("\n Marcas disponibles:")
for i, brand in enumerate(all_brands, 1):
if i % 4 == 0:
print(f" {i:3}. {brand}")
else:
print(f" {i:3}. {brand:<18}", end="")
print()
print(f"\n Total: {len(all_brands)} marcas")
print("\n Ingresa rango (ej: 1-10, 5-20, etc.)")
range_input = input("\nRango: ").strip()
if not range_input:
return None, None, None
try:
if '-' in range_input:
start, end = map(int, range_input.split('-'))
else:
start = end = int(range_input)
if 1 <= start <= end <= len(all_brands):
selected = all_brands[start-1:end]
print(f"\n Marcas seleccionadas ({len(selected)}):")
for b in selected:
print(f" - {b}")
confirm = input("\nProcesar estas marcas? (S/N): ").strip().upper()
if confirm == 'S':
return 'MULTIPLE', 1, selected
else:
print("Rango invalido")
except ValueError:
print("Formato invalido. Usa: inicio-fin")
return None, None, None
def select_batch_for_brand(conn, brand):
"""Selecciona el lote inicial para una marca"""
batches, existing = get_brand_batches(conn, brand)
if not batches:
print(f"\n {brand} ya esta completo!")
input("\nPresiona ENTER para continuar...")
return None, None, None
print(f"\n{'='*60}")
print(f" LOTES PARA {brand}")
print(f"{'='*60}")
print(f"\n Años existentes: {len(existing)}")
print(f" Lotes pendientes: {len(batches)}")
# Mostrar lotes
print("\n Lotes disponibles:")
for j, batch in enumerate(batches, 1):
print(f" {j}. Años {batch[0]} - {batch[-1]} ({len(batch)} años)")
print(f"\n 0. Volver")
while True:
try:
batch_choice = input(f"\nComenzar desde lote (1-{len(batches)}): ").strip()
if batch_choice == '0':
return None, None
if batch_choice == '':
return selected_brand, 1
if batch_choice == '0' or batch_choice == '':
return None, None, None
batch_num = int(batch_choice)
if 1 <= batch_num <= len(batches):
return selected_brand, batch_num
print(f"Ingresa un número entre 1 y {len(batches)}")
return brand, batch_num, None
print(f"Ingresa un numero entre 1 y {len(batches)}")
except ValueError:
print("Ingresa un número válido")
print("Ingresa un numero valido")
def main():
all_brands = BRANDS + BRANDS_REGIONAL
print("="*60)
print(" SCRAPER MULTIMARCA v2")
print(" SCRAPER MULTIMARCA v3 - ROCKAUTO")
print("="*60)
print(f" Marcas: {len(BRANDS)}")
for brand in BRANDS:
print(f" - {brand}")
print(f" Total marcas: {len(all_brands)}")
print(f" Marcas principales: {len(BRANDS)}")
print(f" Marcas regionales: {len(BRANDS_REGIONAL)}")
print(f" Años: 1975-2026 | Lotes de {BATCH_SIZE} años")
print(f" Pausa entre lotes: {WAIT_TIME//60} minutos")
print(" >>> Presiona ENTER para saltar esperas <<<")
@@ -444,10 +719,10 @@ def main():
conn = sqlite3.connect(DB_PATH)
# Menú principal
selected_brand, start_batch = show_main_menu(conn)
# Menu principal
selected, start_batch, brands_list = show_main_menu(conn)
if selected_brand is None and start_batch is None:
if selected is None:
print("\nSaliendo...")
conn.close()
return
@@ -456,28 +731,35 @@ def main():
grand_total_found = 0
brand_stats = {}
# Determinar qué marcas procesar
if selected_brand == 'ALL':
# Procesar todas las marcas pendientes
brands_to_process = BRANDS
start_batches = {brand: 1 for brand in BRANDS}
elif selected_brand:
# Solo procesar la marca seleccionada desde el lote indicado
brands_to_process = [selected_brand]
start_batches = {selected_brand: start_batch}
# Determinar que marcas procesar
if selected == 'ALL':
brands_to_process = brands_list or all_brands
start_batches = {brand: 1 for brand in brands_to_process}
elif selected == 'MULTIPLE':
brands_to_process = brands_list
start_batches = {brand: 1 for brand in brands_to_process}
elif selected:
brands_to_process = [selected]
start_batches = {selected: start_batch}
else:
conn.close()
return
for brand in brands_to_process:
print(f"\n{'='*60}")
print(f" INICIANDO PROCESAMIENTO")
print(f" Marcas a procesar: {len(brands_to_process)}")
print(f"{'='*60}")
for idx, brand in enumerate(brands_to_process, 1):
print(f"\n[{idx}/{len(brands_to_process)}] ", end="")
saved, found = process_brand(conn, brand, start_batches.get(brand, 1))
brand_stats[brand] = {'saved': saved, 'found': found}
grand_total_saved += saved
grand_total_found += found
# Pausa entre marcas (si hay otra marca por procesar)
if brand != brands_to_process[-1]:
next_brand = brands_to_process[brands_to_process.index(brand)+1]
if idx < len(brands_to_process):
next_brand = brands_to_process[idx]
wait_with_skip(WAIT_TIME, f"PAUSA ENTRE MARCAS - Siguiente: {next_brand}")
conn.close()
@@ -485,14 +767,22 @@ def main():
print("\n" + "="*60)
print(" RESUMEN FINAL")
print("="*60)
for brand, stats in brand_stats.items():
if stats['found'] > 0 or stats['saved'] > 0:
# Solo mostrar marcas con datos
brands_with_data = {k: v for k, v in brand_stats.items() if v['found'] > 0 or v['saved'] > 0}
if brands_with_data:
for brand, stats in brands_with_data.items():
print(f" {brand}:")
print(f" Encontrados: {stats['found']}")
print(f" Nuevos guardados: {stats['saved']}")
else:
print(" No se encontraron nuevos datos")
print("-"*60)
print(f" TOTAL:")
print(f" Vehículos encontrados: {grand_total_found}")
print(f" Marcas procesadas: {len(brands_to_process)}")
print(f" Vehiculos encontrados: {grand_total_found}")
print(f" Nuevos guardados: {grand_total_saved}")
print("="*60)