Update multibrand scraper v3 with all RockAuto brands

Added 108 brands total:
- 90 main brands (international)
- 18 regional brands (Mexico, China, etc.)

Features:
- Interactive menu with multiple options
- Search brand by name
- Select by initial letter
- Process range of brands (e.g., 1-10)
- View all brands status with pagination
- Skip wait time with ENTER
- Years: 1975-2026, 5 years per batch
This commit is contained in:
2026-01-19 09:17:54 +00:00
parent 997f777514
commit 61474f7abe

View File

@@ -1,10 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Scraper Multimarca v2 Scraper Multimarca v3 - TODAS LAS MARCAS DE ROCKAUTO
- Marcas: Dodge, Honda, Mitsubishi, Jeep, BMW, Fiat, Hyundai, Infiniti, Kia, Land Rover, Lexus - Incluye todas las marcas con vehiculos en rango 1975-2026
- Procesa de 5 en 5 años - Procesa de 5 en 5 años
- Espera 3 minutos entre lotes (saltable con cualquier tecla) - Espera 3 minutos entre lotes (saltable con ENTER)
- Menú interactivo para seleccionar marca y lote - Menu interactivo para seleccionar marca y lote
- Años: 1975-2026 - Años: 1975-2026
""" """
@@ -28,25 +28,180 @@ DB_PATH = os.path.join(BASE_DIR, "vehicle_database", "vehicle_database.db")
BASE_URL = "https://www.rockauto.com/en/catalog" BASE_URL = "https://www.rockauto.com/en/catalog"
# Marcas a scrapear # TODAS LAS MARCAS DE ROCKAUTO (con vehiculos 1975-2026)
# Organizadas alfabeticamente
BRANDS = [ BRANDS = [
"DODGE", # A
"HONDA", "ABARTH",
"MITSUBISHI", "AC",
"JEEP", "ACURA",
"ALFA ROMEO",
"ALPINE",
"AM GENERAL",
"AMERICAN MOTORS",
"ASTON MARTIN",
"AUDI",
"AUSTIN",
"AUSTIN-HEALEY",
"AVANTI",
# B
"BENTLEY",
"BERTONE",
"BMW", "BMW",
"BRICKLIN",
"BRISTOL",
"BUGATTI",
"BUICK",
"BYD",
# C
"CADILLAC",
"CHECKER",
"CHEVROLET",
"CHRYSLER",
"CITROEN",
"CUPRA",
# D
"DAEWOO",
"DAIHATSU",
"DATSUN",
"DELOREAN",
"DODGE",
# E
"EAGLE",
"EDSEL",
"EXCALIBUR",
# F
"FACEL VEGA",
"FERRARI",
"FIAT", "FIAT",
"FISKER",
"FORD",
"FREIGHTLINER",
# G
"GENESIS",
"GEO",
"GMC",
# H
"HILLMAN",
"HONDA",
"HUMMER",
"HYUNDAI", "HYUNDAI",
# I
"INEOS",
"INFINITI", "INFINITI",
"INTERNATIONAL",
"ISUZU",
# J
"JAGUAR",
"JEEP",
"JENSEN",
# K
"KARMA",
"KENWORTH",
"KIA", "KIA",
# L
"LAFORZA",
"LAMBORGHINI",
"LANCIA",
"LAND ROVER", "LAND ROVER",
"LEXUS" "LEXUS",
"LINCOLN",
"LOTUS",
"LUCID",
# M
"MACK",
"MASERATI",
"MAYBACH",
"MAZDA",
"MCLAREN",
"MERCEDES-BENZ",
"MERCURY",
"MERKUR",
"MG",
"MINI",
"MITSUBISHI",
"MITSUBISHI FUSO",
"MOBILITY VENTURES",
"MORGAN",
# N
"NISSAN",
# O
"OLDSMOBILE",
"OPEL",
# P
"PANOZ",
"PEUGEOT",
"PLYMOUTH",
"POLESTAR",
"PONTIAC",
"PORSCHE",
# Q
"QVALE",
# R
"RAM",
"RENAULT",
"RIVIAN",
"ROLLS-ROYCE",
"ROVER",
# S
"SAAB",
"SALEEN",
"SATURN",
"SCION",
"SEAT",
"SHELBY",
"SMART",
"SPYKER",
"SRT",
"SSANGYONG",
"STERLING",
"STUDEBAKER",
"SUBARU",
"SUNBEAM",
"SUZUKI",
# T
"TESLA",
"TOYOTA",
"TRIUMPH",
"TVR",
# U
"UD",
# V
"VOLKSWAGEN",
"VOLVO",
"VPG",
# W
"WORKHORSE",
# Y
"YUGO",
]
# Marcas adicionales de mercados especificos (Mexico, China, etc.)
BRANDS_REGIONAL = [
"BAIC",
"BESTUNE",
"CHANGAN",
"CHIREY",
"DFSK",
"FAW",
"FOTON",
"GAC",
"GEELY",
"GIANT MOTORS",
"JAC",
"JAECOO",
"JETOUR",
"JMC",
"OMODA",
"SERES",
"VAM",
"VINFAST",
] ]
# Años de 1975 a 2026 (orden descendente) # Años de 1975 a 2026 (orden descendente)
ALL_YEARS = list(range(2026, 1974, -1)) ALL_YEARS = list(range(2026, 1974, -1))
# Configuración de lotes # Configuracion de lotes
BATCH_SIZE = 5 # años por lote BATCH_SIZE = 5 # años por lote
WAIT_TIME = 180 # 3 minutos entre lotes WAIT_TIME = 180 # 3 minutos entre lotes
@@ -59,7 +214,7 @@ session.headers.update({
def check_key_press(): def check_key_press():
"""Verifica si se presionó alguna tecla (non-blocking)""" """Verifica si se presiono alguna tecla (non-blocking)"""
if sys.platform == 'win32': if sys.platform == 'win32':
import msvcrt import msvcrt
if msvcrt.kbhit(): if msvcrt.kbhit():
@@ -246,12 +401,12 @@ def get_brand_batches(conn, brand):
def process_brand(conn, brand, start_batch=1): def process_brand(conn, brand, start_batch=1):
"""Procesa una marca completa desde un lote específico""" """Procesa una marca completa desde un lote especifico"""
print(f"\n{'#'*60}") print(f"\n{'#'*60}")
print(f" PROCESANDO MARCA: {brand}") print(f" PROCESANDO MARCA: {brand}")
print(f"{'#'*60}") print(f"{'#'*60}")
# Verificar qué años ya existen # Verificar que años ya existen
existing = get_existing_years(conn, brand) existing = get_existing_years(conn, brand)
print(f"Años existentes de {brand}: {len(existing)} años") print(f"Años existentes de {brand}: {len(existing)} años")
if existing: if existing:
@@ -261,7 +416,7 @@ def process_brand(conn, brand, start_batch=1):
years_to_process = [y for y in ALL_YEARS if y not in existing] years_to_process = [y for y in ALL_YEARS if y not in existing]
if not years_to_process: if not years_to_process:
print(f"\n[OK] {brand}: Todos los años ya están en la base de datos!") print(f"\n[OK] {brand}: Todos los años ya estan en la base de datos!")
return 0, 0 return 0, 0
print(f"\nAños por procesar para {brand}: {len(years_to_process)}") print(f"\nAños por procesar para {brand}: {len(years_to_process)}")
@@ -288,7 +443,7 @@ def process_brand(conn, brand, start_batch=1):
total_saved += saved total_saved += saved
total_found += found total_found += found
# Si no es el último lote, esperar para cambiar VPN # Si no es el ultimo lote, esperar para cambiar VPN
if i < total_batches: if i < total_batches:
wait_with_skip(WAIT_TIME, f"PAUSA DE {WAIT_TIME//60} MINUTOS - [{brand}] Lotes restantes: {total_batches - i}") wait_with_skip(WAIT_TIME, f"PAUSA DE {WAIT_TIME//60} MINUTOS - [{brand}] Lotes restantes: {total_batches - i}")
@@ -296,139 +451,259 @@ def process_brand(conn, brand, start_batch=1):
def show_main_menu(conn): def show_main_menu(conn):
"""Muestra menú principal con opciones""" """Muestra menu principal con opciones"""
all_brands = BRANDS + BRANDS_REGIONAL
while True: while True:
print("\n" + "="*60) print("\n" + "="*60)
print(" SCRAPER MULTIMARCA - MENU PRINCIPAL") print(" SCRAPER MULTIMARCA v3 - MENU PRINCIPAL")
print("="*60) print("="*60)
print(f"\n Total de marcas disponibles: {len(all_brands)}")
print("\n Opciones:") print("\n Opciones:")
print(" 1. Ver estado de todas las marcas") print(" 1. Ver estado de todas las marcas")
print(" 2. Seleccionar marca y lote específico") print(" 2. Buscar marca por nombre")
print(" 3. Procesar todas las marcas pendientes") print(" 3. Seleccionar marca por letra inicial")
print(" 4. Procesar multiples marcas (rango)")
print(" 5. Procesar TODAS las marcas pendientes")
print(" 0. Salir") print(" 0. Salir")
print("="*60) print("="*60)
choice = input("\nSelecciona opción: ").strip() choice = input("\nSelecciona opcion: ").strip()
if choice == '0': if choice == '0':
return None, None return None, None, None
elif choice == '1': elif choice == '1':
show_all_brands_status(conn) show_all_brands_status(conn, all_brands)
elif choice == '2': elif choice == '2':
result = show_batch_menu(conn) result = search_brand_menu(conn, all_brands)
if result[0] is not None or result[1] is not None: if result[0] is not None:
return result return result
elif choice == '3': elif choice == '3':
return 'ALL', 1 result = select_by_letter_menu(conn, all_brands)
if result[0] is not None:
return result
elif choice == '4':
result = select_range_menu(conn, all_brands)
if result[0] is not None:
return result
elif choice == '5':
return 'ALL', 1, all_brands
else: else:
print("Opción inválida") print("Opcion invalida")
def show_all_brands_status(conn): def show_all_brands_status(conn, all_brands):
"""Muestra estado de todas las marcas""" """Muestra estado de todas las marcas con paginacion"""
print("\n" + "-"*60) print("\n" + "-"*70)
print(" ESTADO DE TODAS LAS MARCAS") print(" ESTADO DE TODAS LAS MARCAS")
print("-"*60) print("-"*70)
for brand in BRANDS: page_size = 20
total_pages = (len(all_brands) + page_size - 1) // page_size
current_page = 0
while True:
start_idx = current_page * page_size
end_idx = min(start_idx + page_size, len(all_brands))
print(f"\n Pagina {current_page + 1}/{total_pages}")
print("-"*70)
print(f" {'#':<4} {'MARCA':<20} {'EXISTENTES':<12} {'ESTADO':<30}")
print("-"*70)
for i, brand in enumerate(all_brands[start_idx:end_idx], start_idx + 1):
batches, existing = get_brand_batches(conn, brand) batches, existing = get_brand_batches(conn, brand)
years_pending = sum(len(b) for b in batches) years_pending = sum(len(b) for b in batches)
status = "COMPLETO" if not batches else f"{years_pending} años pendientes ({len(batches)} lotes)" if not batches:
print(f" {brand:15} | Existentes: {len(existing):3} | {status}") status = "COMPLETO"
print("-"*60)
input("\nPresiona ENTER para continuar...")
def show_batch_menu(conn):
"""Muestra menú para seleccionar marca y lote inicial"""
print("\n" + "="*60)
print(" SELECCIÓN DE MARCA Y LOTE")
print("="*60)
brand_info = {}
available_brands = []
for i, brand in enumerate(BRANDS, 1):
batches, existing = get_brand_batches(conn, brand)
brand_info[brand] = {'batches': batches, 'existing': existing}
if batches:
available_brands.append(brand)
print(f"\n {len(available_brands)}. {brand}")
print(f" Años existentes: {len(existing)}")
print(f" Lotes pendientes: {len(batches)}")
# Mostrar resumen de lotes
if len(batches) <= 5:
for j, batch in enumerate(batches, 1):
print(f" Lote {j}: años {batch[0]}-{batch[-1]}")
else: else:
print(f" Lote 1: años {batches[0][0]}-{batches[0][-1]}") status = f"{years_pending} años pend. ({len(batches)} lotes)"
print(f" ...") print(f" {i:<4} {brand:<20} {len(existing):<12} {status:<30}")
print(f" Lote {len(batches)}: años {batches[-1][0]}-{batches[-1][-1]}")
else:
print(f"\n -. {brand} - [COMPLETO]")
if not available_brands: print("-"*70)
print("\n [!] Todas las marcas están completas!") print(f" [N] Siguiente | [P] Anterior | [Q] Volver")
input("\nPresiona ENTER para continuar...")
return None, None
print(f"\n 0. Volver al menú principal") nav = input("\nNavegacion: ").strip().upper()
print("="*60) if nav == 'N' and current_page < total_pages - 1:
current_page += 1
# Seleccionar marca elif nav == 'P' and current_page > 0:
while True: current_page -= 1
try: elif nav == 'Q' or nav == '':
choice = input("\nSelecciona marca (número): ").strip()
if choice == '0' or choice == '':
return None, None
brand_idx = int(choice) - 1
if 0 <= brand_idx < len(available_brands):
selected_brand = available_brands[brand_idx]
break break
print(f"Opción inválida. Ingresa un número entre 1 y {len(available_brands)}")
except ValueError:
print("Ingresa un número válido")
batches = brand_info[selected_brand]['batches']
# Mostrar lotes disponibles def search_brand_menu(conn, all_brands):
print(f"\n{'='*60}") """Busca una marca por nombre"""
print(f" LOTES DISPONIBLES PARA {selected_brand}") print("\n" + "="*60)
print(f"{'='*60}") print(" BUSCAR MARCA")
print("="*60)
for j, batch in enumerate(batches, 1): search = input("\nIngresa nombre de marca (o parte): ").strip().upper()
print(f" {j}. Lote {j}: años {batch[0]} - {batch[-1]} ({len(batch)} años)") if not search:
return None, None, None
matches = [b for b in all_brands if search in b]
if not matches:
print(f"\n No se encontraron marcas con '{search}'")
input("\nPresiona ENTER para continuar...")
return None, None, None
print(f"\n Marcas encontradas ({len(matches)}):")
for i, brand in enumerate(matches, 1):
batches, existing = get_brand_batches(conn, brand)
status = "COMPLETO" if not batches else f"{len(batches)} lotes pendientes"
print(f" {i}. {brand} - {status}")
print(f"\n 0. Volver") print(f"\n 0. Volver")
while True:
try:
choice = input("\nSelecciona marca: ").strip()
if choice == '0' or choice == '':
return None, None, None
idx = int(choice) - 1
if 0 <= idx < len(matches):
return select_batch_for_brand(conn, matches[idx])
print("Opcion invalida")
except ValueError:
print("Ingresa un numero valido")
def select_by_letter_menu(conn, all_brands):
"""Selecciona marcas por letra inicial"""
print("\n" + "="*60)
print(" SELECCIONAR POR LETRA")
print("="*60) print("="*60)
# Seleccionar lote # Obtener letras disponibles
letters = sorted(set(b[0] for b in all_brands))
print("\n Letras disponibles:")
print(f" {' '.join(letters)}")
letter = input("\nIngresa letra: ").strip().upper()
if not letter or letter not in letters:
return None, None, None
matches = [b for b in all_brands if b.startswith(letter)]
print(f"\n Marcas con '{letter}' ({len(matches)}):")
for i, brand in enumerate(matches, 1):
batches, existing = get_brand_batches(conn, brand)
status = "COMPLETO" if not batches else f"{len(batches)} lotes pendientes"
print(f" {i}. {brand} - {status}")
print(f"\n 0. Volver")
print(f" A. Procesar TODAS las marcas con '{letter}'")
while True:
choice = input("\nSelecciona: ").strip().upper()
if choice == '0' or choice == '':
return None, None, None
if choice == 'A':
return 'MULTIPLE', 1, matches
try:
idx = int(choice) - 1
if 0 <= idx < len(matches):
return select_batch_for_brand(conn, matches[idx])
print("Opcion invalida")
except ValueError:
print("Ingresa un numero valido o 'A'")
def select_range_menu(conn, all_brands):
"""Selecciona un rango de marcas para procesar"""
print("\n" + "="*60)
print(" SELECCIONAR RANGO DE MARCAS")
print("="*60)
# Mostrar todas las marcas numeradas
print("\n Marcas disponibles:")
for i, brand in enumerate(all_brands, 1):
if i % 4 == 0:
print(f" {i:3}. {brand}")
else:
print(f" {i:3}. {brand:<18}", end="")
print()
print(f"\n Total: {len(all_brands)} marcas")
print("\n Ingresa rango (ej: 1-10, 5-20, etc.)")
range_input = input("\nRango: ").strip()
if not range_input:
return None, None, None
try:
if '-' in range_input:
start, end = map(int, range_input.split('-'))
else:
start = end = int(range_input)
if 1 <= start <= end <= len(all_brands):
selected = all_brands[start-1:end]
print(f"\n Marcas seleccionadas ({len(selected)}):")
for b in selected:
print(f" - {b}")
confirm = input("\nProcesar estas marcas? (S/N): ").strip().upper()
if confirm == 'S':
return 'MULTIPLE', 1, selected
else:
print("Rango invalido")
except ValueError:
print("Formato invalido. Usa: inicio-fin")
return None, None, None
def select_batch_for_brand(conn, brand):
"""Selecciona el lote inicial para una marca"""
batches, existing = get_brand_batches(conn, brand)
if not batches:
print(f"\n {brand} ya esta completo!")
input("\nPresiona ENTER para continuar...")
return None, None, None
print(f"\n{'='*60}")
print(f" LOTES PARA {brand}")
print(f"{'='*60}")
print(f"\n Años existentes: {len(existing)}")
print(f" Lotes pendientes: {len(batches)}")
# Mostrar lotes
print("\n Lotes disponibles:")
for j, batch in enumerate(batches, 1):
print(f" {j}. Años {batch[0]} - {batch[-1]} ({len(batch)} años)")
print(f"\n 0. Volver")
while True: while True:
try: try:
batch_choice = input(f"\nComenzar desde lote (1-{len(batches)}): ").strip() batch_choice = input(f"\nComenzar desde lote (1-{len(batches)}): ").strip()
if batch_choice == '0': if batch_choice == '0' or batch_choice == '':
return None, None return None, None, None
if batch_choice == '':
return selected_brand, 1
batch_num = int(batch_choice) batch_num = int(batch_choice)
if 1 <= batch_num <= len(batches): if 1 <= batch_num <= len(batches):
return selected_brand, batch_num return brand, batch_num, None
print(f"Ingresa un número entre 1 y {len(batches)}") print(f"Ingresa un numero entre 1 y {len(batches)}")
except ValueError: except ValueError:
print("Ingresa un número válido") print("Ingresa un numero valido")
def main(): def main():
all_brands = BRANDS + BRANDS_REGIONAL
print("="*60) print("="*60)
print(" SCRAPER MULTIMARCA v2") print(" SCRAPER MULTIMARCA v3 - ROCKAUTO")
print("="*60) print("="*60)
print(f" Marcas: {len(BRANDS)}") print(f" Total marcas: {len(all_brands)}")
for brand in BRANDS: print(f" Marcas principales: {len(BRANDS)}")
print(f" - {brand}") print(f" Marcas regionales: {len(BRANDS_REGIONAL)}")
print(f" Años: 1975-2026 | Lotes de {BATCH_SIZE} años") print(f" Años: 1975-2026 | Lotes de {BATCH_SIZE} años")
print(f" Pausa entre lotes: {WAIT_TIME//60} minutos") print(f" Pausa entre lotes: {WAIT_TIME//60} minutos")
print(" >>> Presiona ENTER para saltar esperas <<<") print(" >>> Presiona ENTER para saltar esperas <<<")
@@ -444,10 +719,10 @@ def main():
conn = sqlite3.connect(DB_PATH) conn = sqlite3.connect(DB_PATH)
# Menú principal # Menu principal
selected_brand, start_batch = show_main_menu(conn) selected, start_batch, brands_list = show_main_menu(conn)
if selected_brand is None and start_batch is None: if selected is None:
print("\nSaliendo...") print("\nSaliendo...")
conn.close() conn.close()
return return
@@ -456,28 +731,35 @@ def main():
grand_total_found = 0 grand_total_found = 0
brand_stats = {} brand_stats = {}
# Determinar qué marcas procesar # Determinar que marcas procesar
if selected_brand == 'ALL': if selected == 'ALL':
# Procesar todas las marcas pendientes brands_to_process = brands_list or all_brands
brands_to_process = BRANDS start_batches = {brand: 1 for brand in brands_to_process}
start_batches = {brand: 1 for brand in BRANDS} elif selected == 'MULTIPLE':
elif selected_brand: brands_to_process = brands_list
# Solo procesar la marca seleccionada desde el lote indicado start_batches = {brand: 1 for brand in brands_to_process}
brands_to_process = [selected_brand] elif selected:
start_batches = {selected_brand: start_batch} brands_to_process = [selected]
start_batches = {selected: start_batch}
else: else:
conn.close() conn.close()
return return
for brand in brands_to_process: print(f"\n{'='*60}")
print(f" INICIANDO PROCESAMIENTO")
print(f" Marcas a procesar: {len(brands_to_process)}")
print(f"{'='*60}")
for idx, brand in enumerate(brands_to_process, 1):
print(f"\n[{idx}/{len(brands_to_process)}] ", end="")
saved, found = process_brand(conn, brand, start_batches.get(brand, 1)) saved, found = process_brand(conn, brand, start_batches.get(brand, 1))
brand_stats[brand] = {'saved': saved, 'found': found} brand_stats[brand] = {'saved': saved, 'found': found}
grand_total_saved += saved grand_total_saved += saved
grand_total_found += found grand_total_found += found
# Pausa entre marcas (si hay otra marca por procesar) # Pausa entre marcas (si hay otra marca por procesar)
if brand != brands_to_process[-1]: if idx < len(brands_to_process):
next_brand = brands_to_process[brands_to_process.index(brand)+1] next_brand = brands_to_process[idx]
wait_with_skip(WAIT_TIME, f"PAUSA ENTRE MARCAS - Siguiente: {next_brand}") wait_with_skip(WAIT_TIME, f"PAUSA ENTRE MARCAS - Siguiente: {next_brand}")
conn.close() conn.close()
@@ -485,14 +767,22 @@ def main():
print("\n" + "="*60) print("\n" + "="*60)
print(" RESUMEN FINAL") print(" RESUMEN FINAL")
print("="*60) print("="*60)
for brand, stats in brand_stats.items():
if stats['found'] > 0 or stats['saved'] > 0: # Solo mostrar marcas con datos
brands_with_data = {k: v for k, v in brand_stats.items() if v['found'] > 0 or v['saved'] > 0}
if brands_with_data:
for brand, stats in brands_with_data.items():
print(f" {brand}:") print(f" {brand}:")
print(f" Encontrados: {stats['found']}") print(f" Encontrados: {stats['found']}")
print(f" Nuevos guardados: {stats['saved']}") print(f" Nuevos guardados: {stats['saved']}")
else:
print(" No se encontraron nuevos datos")
print("-"*60) print("-"*60)
print(f" TOTAL:") print(f" TOTAL:")
print(f" Vehículos encontrados: {grand_total_found}") print(f" Marcas procesadas: {len(brands_to_process)}")
print(f" Vehiculos encontrados: {grand_total_found}")
print(f" Nuevos guardados: {grand_total_saved}") print(f" Nuevos guardados: {grand_total_saved}")
print("="*60) print("="*60)