#!/usr/bin/env python3 """ FASE 2: Populate cross-references and aftermarket parts This script creates FASE 2 tables and populates them with manufacturers, aftermarket part alternatives, and cross-references. """ import sqlite3 import os import random import string from typing import List, Dict, Tuple, Optional # Database path configuration DB_PATH = os.path.join(os.path.dirname(__file__), '..', 'vehicle_database.db') SCHEMA_PATH = os.path.join(os.path.dirname(__file__), '..', 'sql', 'schema.sql') class Fase2Manager: """Manager for FASE 2 tables: manufacturers, aftermarket_parts, and cross-references""" def __init__(self, db_path: str = DB_PATH): self.db_path = db_path self.connection = None def connect(self): """Connect to the SQLite database""" self.connection = sqlite3.connect(self.db_path) self.connection.row_factory = sqlite3.Row print(f"Connected to database: {self.db_path}") def disconnect(self): """Close the database connection""" if self.connection: self.connection.close() print("Disconnected from database") def create_fase2_tables(self): """Create FASE 2 tables from schema file""" if not os.path.exists(SCHEMA_PATH): raise FileNotFoundError(f"Schema file not found: {SCHEMA_PATH}") with open(SCHEMA_PATH, 'r') as f: schema = f.read() if self.connection: cursor = self.connection.cursor() cursor.executescript(schema) self.connection.commit() print("FASE 2 tables created successfully") def get_manufacturer_by_name(self, name: str) -> Optional[int]: """Get manufacturer ID by name, returns None if not found""" cursor = self.connection.cursor() cursor.execute("SELECT id FROM manufacturers WHERE name = ?", (name,)) result = cursor.fetchone() return result[0] if result else None def insert_manufacturer(self, name: str, type_: str, quality_tier: str, country: str = None, logo_url: str = None, website: str = None) -> int: """Insert a manufacturer if it doesn't exist, return its ID""" existing_id = self.get_manufacturer_by_name(name) if existing_id: print(f" Manufacturer '{name}' already exists (ID: {existing_id})") return existing_id cursor = self.connection.cursor() cursor.execute( """INSERT INTO manufacturers (name, type, quality_tier, country, logo_url, website) VALUES (?, ?, ?, ?, ?, ?)""", (name, type_, quality_tier, country, logo_url, website) ) self.connection.commit() manufacturer_id = cursor.lastrowid print(f" Inserted manufacturer: {name} (ID: {manufacturer_id})") return manufacturer_id def get_all_parts(self) -> List[Dict]: """Get all parts from the parts table""" cursor = self.connection.cursor() cursor.execute(""" SELECT p.id, p.oem_part_number, p.name, p.name_es, p.group_id, pg.name as group_name, pc.name as category_name FROM parts p LEFT JOIN part_groups pg ON p.group_id = pg.id LEFT JOIN part_categories pc ON pg.category_id = pc.id """) return [dict(row) for row in cursor.fetchall()] def get_aftermarket_part(self, oem_part_id: int, manufacturer_id: int) -> Optional[int]: """Check if an aftermarket part already exists""" cursor = self.connection.cursor() cursor.execute( """SELECT id FROM aftermarket_parts WHERE oem_part_id = ? AND manufacturer_id = ?""", (oem_part_id, manufacturer_id) ) result = cursor.fetchone() return result[0] if result else None def insert_aftermarket_part(self, oem_part_id: int, manufacturer_id: int, part_number: str, name: str = None, name_es: str = None, quality_tier: str = 'standard', price_usd: float = None, warranty_months: int = 12, in_stock: bool = True) -> int: """Insert an aftermarket part if it doesn't exist""" existing_id = self.get_aftermarket_part(oem_part_id, manufacturer_id) if existing_id: return existing_id cursor = self.connection.cursor() cursor.execute( """INSERT INTO aftermarket_parts (oem_part_id, manufacturer_id, part_number, name, name_es, quality_tier, price_usd, warranty_months, in_stock) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", (oem_part_id, manufacturer_id, part_number, name, name_es, quality_tier, price_usd, warranty_months, in_stock) ) self.connection.commit() return cursor.lastrowid def get_cross_reference(self, part_id: int, cross_reference_number: str) -> Optional[int]: """Check if a cross-reference already exists""" cursor = self.connection.cursor() cursor.execute( """SELECT id FROM part_cross_references WHERE part_id = ? AND cross_reference_number = ?""", (part_id, cross_reference_number) ) result = cursor.fetchone() return result[0] if result else None def insert_cross_reference(self, part_id: int, cross_reference_number: str, reference_type: str, source: str = None, notes: str = None) -> int: """Insert a cross-reference if it doesn't exist""" existing_id = self.get_cross_reference(part_id, cross_reference_number) if existing_id: return existing_id cursor = self.connection.cursor() cursor.execute( """INSERT INTO part_cross_references (part_id, cross_reference_number, reference_type, source, notes) VALUES (?, ?, ?, ?, ?)""", (part_id, cross_reference_number, reference_type, source, notes) ) self.connection.commit() return cursor.lastrowid def get_manufacturers_by_tier(self, quality_tier: str) -> List[Dict]: """Get all manufacturers of a specific quality tier""" cursor = self.connection.cursor() cursor.execute( "SELECT * FROM manufacturers WHERE quality_tier = ?", (quality_tier,) ) return [dict(row) for row in cursor.fetchall()] # Manufacturer data MANUFACTURERS_DATA = { # OEM manufacturers 'oem': [ {'name': 'Toyota', 'country': 'Japan', 'website': 'https://www.toyota.com'}, {'name': 'Honda', 'country': 'Japan', 'website': 'https://www.honda.com'}, {'name': 'Ford', 'country': 'USA', 'website': 'https://www.ford.com'}, {'name': 'GM/ACDelco', 'country': 'USA', 'website': 'https://www.acdelco.com'}, {'name': 'Volkswagen', 'country': 'Germany', 'website': 'https://www.vw.com'}, {'name': 'Nissan', 'country': 'Japan', 'website': 'https://www.nissan.com'}, {'name': 'Hyundai/Kia', 'country': 'South Korea', 'website': 'https://www.hyundai.com'}, ], # Premium aftermarket 'premium': [ {'name': 'Bosch', 'country': 'Germany', 'website': 'https://www.bosch.com'}, {'name': 'Denso', 'country': 'Japan', 'website': 'https://www.denso.com'}, {'name': 'NGK', 'country': 'Japan', 'website': 'https://www.ngk.com'}, {'name': 'Akebono', 'country': 'Japan', 'website': 'https://www.akebono.com'}, {'name': 'Brembo', 'country': 'Italy', 'website': 'https://www.brembo.com'}, {'name': 'KYB', 'country': 'Japan', 'website': 'https://www.kyb.com'}, {'name': 'Moog', 'country': 'USA', 'website': 'https://www.moogparts.com'}, {'name': 'Continental', 'country': 'Germany', 'website': 'https://www.continental.com'}, ], # Standard aftermarket 'standard': [ {'name': 'Monroe', 'country': 'USA', 'website': 'https://www.monroe.com'}, {'name': 'Raybestos', 'country': 'USA', 'website': 'https://www.raybestos.com'}, {'name': 'Wagner', 'country': 'USA', 'website': 'https://www.wagnerbrake.com'}, {'name': 'Cardone', 'country': 'USA', 'website': 'https://www.cardone.com'}, {'name': 'Standard Motor Products', 'country': 'USA', 'website': 'https://www.smpcorp.com'}, ], # Economy aftermarket 'economy': [ {'name': 'Fram', 'country': 'USA', 'website': 'https://www.fram.com'}, {'name': 'WIX', 'country': 'USA', 'website': 'https://www.wixfilters.com'}, {'name': 'Duralast', 'country': 'USA', 'website': 'https://www.autozone.com'}, {'name': 'AutoZone Valucraft', 'country': 'USA', 'website': 'https://www.autozone.com'}, ], } # Part number prefixes by manufacturer for realistic generation MANUFACTURER_PREFIXES = { 'Bosch': ['0 280', '0 986', '1 457', 'F 00M'], 'Denso': ['234-', '471-', '210-', '950-'], 'NGK': ['ZFR', 'BKR', 'LFR', 'TR'], 'Akebono': ['ACT', 'ASP', 'EUR', 'PRO'], 'Brembo': ['P 85', 'P 06', 'P 23', 'P 50'], 'KYB': ['332', '334', '343', '344'], 'Moog': ['K', 'ES', 'RK', 'CK'], 'Continental': ['49', '50', '51', 'A1'], 'Monroe': ['32', '33', '34', '37'], 'Raybestos': ['FRC', 'SGD', 'ATD', 'PGD'], 'Wagner': ['QC', 'OEX', 'TQ', 'ZD'], 'Cardone': ['18-', '19-', '20-', '21-'], 'Standard Motor Products': ['FD', 'TM', 'AC', 'JH'], 'Fram': ['PH', 'CA', 'TG', 'XG'], 'WIX': ['51', '57', '46', '33'], 'Duralast': ['DL', 'BP', 'AF', 'OF'], 'AutoZone Valucraft': ['VC', 'VB', 'VA', 'VP'], } # Price multipliers by quality tier (relative to a base OEM price) PRICE_MULTIPLIERS = { 'premium': (0.75, 1.10), # 75-110% of OEM price 'standard': (0.50, 0.75), # 50-75% of OEM price 'economy': (0.25, 0.50), # 25-50% of OEM price } # Warranty months by quality tier WARRANTY_MONTHS = { 'premium': [24, 36, 48], 'standard': [12, 18, 24], 'economy': [6, 12], } def generate_part_number(manufacturer_name: str, oem_number: str) -> str: """Generate a realistic aftermarket part number""" prefixes = MANUFACTURER_PREFIXES.get(manufacturer_name, ['XX']) prefix = random.choice(prefixes) # Extract numeric portion from OEM number or generate random numeric_part = ''.join(filter(str.isdigit, oem_number)) if len(numeric_part) < 4: numeric_part = ''.join(random.choices(string.digits, k=5)) else: # Modify slightly to make it different numeric_part = numeric_part[:4] + str(random.randint(0, 99)).zfill(2) return f"{prefix}{numeric_part}" def generate_base_price(part_name: str, category_name: str = None) -> float: """Generate a realistic base price for a part based on category""" # Base price ranges by category/keyword price_ranges = { 'spark plug': (5, 25), 'filter': (8, 45), 'oil filter': (5, 20), 'air filter': (12, 35), 'brake pad': (25, 80), 'brake rotor': (40, 150), 'shock': (50, 200), 'strut': (80, 250), 'sensor': (20, 120), 'alternator': (100, 350), 'starter': (80, 300), 'water pump': (30, 120), 'radiator': (100, 400), 'thermostat': (10, 40), 'belt': (15, 60), 'hose': (10, 50), 'gasket': (5, 80), 'bearing': (15, 100), 'cv joint': (40, 150), 'tie rod': (25, 80), 'ball joint': (30, 100), 'control arm': (60, 200), 'default': (20, 100), } # Find matching price range part_name_lower = part_name.lower() if part_name else '' category_lower = (category_name or '').lower() for keyword, (min_price, max_price) in price_ranges.items(): if keyword in part_name_lower or keyword in category_lower: return round(random.uniform(min_price, max_price), 2) return round(random.uniform(*price_ranges['default']), 2) def generate_cross_reference_number(oem_number: str, ref_type: str) -> str: """Generate a cross-reference number based on type""" if ref_type == 'oem_alternate': # Slight variation of OEM number chars = list(oem_number) if len(chars) > 2: idx = random.randint(0, len(chars) - 1) if chars[idx].isdigit(): chars[idx] = str((int(chars[idx]) + 1) % 10) elif chars[idx].isalpha(): chars[idx] = random.choice(string.ascii_uppercase) return ''.join(chars) elif ref_type == 'supersession': # New part number format return f"SUP-{oem_number[-6:]}" if len(oem_number) > 6 else f"SUP-{oem_number}" elif ref_type == 'interchange': # Generic interchange format numeric = ''.join(filter(str.isdigit, oem_number)) return f"INT-{numeric[:6] if len(numeric) > 6 else numeric}" elif ref_type == 'competitor': # Competitor format return f"CMP-{random.choice(string.ascii_uppercase)}{random.randint(1000, 9999)}" return oem_number def populate_manufacturers(manager: Fase2Manager) -> Dict[str, int]: """Populate the manufacturers table and return a mapping of name to ID""" print("\n=== Populating Manufacturers ===") manufacturer_ids = {} # Insert OEM manufacturers print("\nOEM Manufacturers:") for mfr in MANUFACTURERS_DATA['oem']: mfr_id = manager.insert_manufacturer( name=mfr['name'], type_='oem', quality_tier='oem', country=mfr['country'], website=mfr['website'] ) manufacturer_ids[mfr['name']] = mfr_id # Insert Premium aftermarket print("\nPremium Aftermarket Manufacturers:") for mfr in MANUFACTURERS_DATA['premium']: mfr_id = manager.insert_manufacturer( name=mfr['name'], type_='aftermarket', quality_tier='premium', country=mfr['country'], website=mfr['website'] ) manufacturer_ids[mfr['name']] = mfr_id # Insert Standard aftermarket print("\nStandard Aftermarket Manufacturers:") for mfr in MANUFACTURERS_DATA['standard']: mfr_id = manager.insert_manufacturer( name=mfr['name'], type_='aftermarket', quality_tier='standard', country=mfr['country'], website=mfr['website'] ) manufacturer_ids[mfr['name']] = mfr_id # Insert Economy aftermarket print("\nEconomy Aftermarket Manufacturers:") for mfr in MANUFACTURERS_DATA['economy']: mfr_id = manager.insert_manufacturer( name=mfr['name'], type_='aftermarket', quality_tier='economy', country=mfr['country'], website=mfr['website'] ) manufacturer_ids[mfr['name']] = mfr_id print(f"\nTotal manufacturers: {len(manufacturer_ids)}") return manufacturer_ids def populate_aftermarket_parts(manager: Fase2Manager, manufacturer_ids: Dict[str, int]): """Generate aftermarket parts for each OEM part in the database""" print("\n=== Generating Aftermarket Parts ===") parts = manager.get_all_parts() if not parts: print("No parts found in the database. Aftermarket parts will be generated when parts are added.") return total_aftermarket = 0 for part in parts: oem_part_id = part['id'] oem_number = part['oem_part_number'] part_name = part['name'] category_name = part.get('category_name', '') # Generate base price for this part base_price = generate_base_price(part_name, category_name) # Determine how many aftermarket alternatives (2-4) num_alternatives = random.randint(2, 4) # Select manufacturers from different tiers tiers_to_use = ['premium', 'standard', 'economy'] random.shuffle(tiers_to_use) alternatives_created = 0 for tier in tiers_to_use: if alternatives_created >= num_alternatives: break # Get manufacturers for this tier tier_manufacturers = [ name for name, data in [(m['name'], m) for m in ( MANUFACTURERS_DATA.get(tier, []) )] ] if not tier_manufacturers: continue # Pick 1-2 manufacturers from this tier selected = random.sample( tier_manufacturers, min(2, len(tier_manufacturers), num_alternatives - alternatives_created) ) for mfr_name in selected: if alternatives_created >= num_alternatives: break mfr_id = manufacturer_ids.get(mfr_name) if not mfr_id: continue # Generate aftermarket part number am_part_number = generate_part_number(mfr_name, oem_number) # Calculate price based on tier price_range = PRICE_MULTIPLIERS.get(tier, (0.5, 0.8)) price_multiplier = random.uniform(*price_range) am_price = round(base_price * price_multiplier, 2) # Get warranty for tier warranty = random.choice(WARRANTY_MONTHS.get(tier, [12])) # Determine quality tier for the part quality_tier = tier # Insert aftermarket part am_id = manager.insert_aftermarket_part( oem_part_id=oem_part_id, manufacturer_id=mfr_id, part_number=am_part_number, name=f"{mfr_name} {part_name}", name_es=part.get('name_es'), quality_tier=quality_tier, price_usd=am_price, warranty_months=warranty, in_stock=random.random() > 0.1 # 90% in stock ) if am_id: alternatives_created += 1 total_aftermarket += 1 print(f" Part {oem_number}: {alternatives_created} aftermarket alternatives created") print(f"\nTotal aftermarket parts created: {total_aftermarket}") def populate_cross_references(manager: Fase2Manager): """Generate cross-references for OEM parts""" print("\n=== Generating Cross-References ===") parts = manager.get_all_parts() if not parts: print("No parts found in the database. Cross-references will be generated when parts are added.") return total_refs = 0 reference_types = ['oem_alternate', 'supersession', 'interchange', 'competitor'] sources = ['RockAuto', 'PartsGeek', 'AutoZone', 'OReilly', 'NAPA', 'Manufacturer'] for part in parts: part_id = part['id'] oem_number = part['oem_part_number'] # Generate 1-3 cross-references per part num_refs = random.randint(1, 3) used_types = random.sample(reference_types, min(num_refs, len(reference_types))) for ref_type in used_types: cross_ref_number = generate_cross_reference_number(oem_number, ref_type) source = random.choice(sources) notes = None if ref_type == 'supersession': notes = "New part number supersedes original" elif ref_type == 'interchange': notes = "Interchangeable with original" ref_id = manager.insert_cross_reference( part_id=part_id, cross_reference_number=cross_ref_number, reference_type=ref_type, source=source, notes=notes ) if ref_id: total_refs += 1 print(f" Part {oem_number}: {len(used_types)} cross-references created") print(f"\nTotal cross-references created: {total_refs}") def main(): """Main entry point for FASE 2 population""" print("=" * 60) print("FASE 2: Cross-References and Aftermarket Parts Population") print("=" * 60) manager = Fase2Manager() try: # Connect to database manager.connect() # Create FASE 2 tables (idempotent) manager.create_fase2_tables() # Populate manufacturers manufacturer_ids = populate_manufacturers(manager) # Generate aftermarket parts populate_aftermarket_parts(manager, manufacturer_ids) # Generate cross-references populate_cross_references(manager) print("\n" + "=" * 60) print("FASE 2 population completed successfully!") print("=" * 60) # Print summary cursor = manager.connection.cursor() cursor.execute("SELECT COUNT(*) FROM manufacturers") mfr_count = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM aftermarket_parts") am_count = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM part_cross_references") xref_count = cursor.fetchone()[0] print(f"\nSummary:") print(f" Manufacturers: {mfr_count}") print(f" Aftermarket Parts: {am_count}") print(f" Cross-References: {xref_count}") except Exception as e: print(f"\nError: {e}") raise finally: manager.disconnect() if __name__ == "__main__": main()