diff --git a/scripts/migrate_aftermarket.py b/scripts/migrate_aftermarket.py new file mode 100755 index 0000000..f04d6da --- /dev/null +++ b/scripts/migrate_aftermarket.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +""" +Migrate AFT- prefixed parts from `parts` table to `aftermarket_parts` table. + +Parts with oem_part_number like 'AFT-{partNumber}-{manufacturerName}' are +aftermarket parts stored incorrectly in the parts table. This script parses +them, links to OEM parts via cross-references, inserts into aftermarket_parts, +and deletes the originals from parts (CASCADE cleans vehicle_parts & cross_refs). + +Usage: + python3 scripts/migrate_aftermarket.py +""" + +import sys +import time +import psycopg2 +from psycopg2.extras import execute_values + +DB_DSN = "postgresql://nexus:nexus_autoparts_2026@localhost/nexus_autoparts" +BATCH_SIZE = 5000 + + +def load_manufacturers(cur): + """Load all manufacturer names and ids into a dict {lowercase_name: id}.""" + cur.execute("SELECT id_manufacture, name_manufacture FROM manufacturers") + mfrs = {} + for row in cur.fetchall(): + mfrs[row[1].lower()] = row[0] + print(f"Loaded {len(mfrs)} known manufacturers", flush=True) + return mfrs + + +def parse_aft_part(oem_part_number, known_manufacturers): + """ + Parse 'AFT-{partNumber}-{manufacturerName}' into (part_number, manufacturer_name). + + The manufacturer is the longest right-side suffix (after a '-') that matches + a known manufacturer. Fallback: last segment is the manufacturer. + + Examples: + AFT-AC191-PARTQUIP -> ('AC191', 'PARTQUIP') + AFT-10-0058-Airstal -> ('10-0058', 'Airstal') + AFT-A-123-Some-Brand -> ('A-123', 'Some-Brand') if 'some-brand' is known + """ + without_prefix = oem_part_number[4:] # remove 'AFT-' + segments = without_prefix.split('-') + + if len(segments) < 2: + # Can't split — treat entire thing as part number, no manufacturer + return without_prefix, None + + # Try increasingly longer suffixes from the right to find a known manufacturer + # e.g. for segments [A, B, C, D], try "D", "C-D", "B-C-D" + # (don't try the full string — need at least one segment for part_number) + for i in range(len(segments) - 1, 0, -1): + candidate = '-'.join(segments[i:]) + if candidate.lower() in known_manufacturers: + part_number = '-'.join(segments[:i]) + return part_number, candidate + + # Fallback: last segment is manufacturer + manufacturer_name = segments[-1] + part_number = '-'.join(segments[:-1]) + return part_number, manufacturer_name + + +def main(): + conn = psycopg2.connect(DB_DSN) + conn.autocommit = False + cur = conn.cursor() + + # Step 1: Load known manufacturers + known_manufacturers = load_manufacturers(cur) + + # Step 2: Count AFT- parts + cur.execute("SELECT COUNT(*) FROM parts WHERE oem_part_number LIKE 'AFT-%%'") + total = cur.fetchone()[0] + print(f"Found {total:,} AFT- parts to migrate", flush=True) + + if total == 0: + print("Nothing to do.") + cur.close() + conn.close() + return + + # Load all AFT- parts into memory (357K rows is manageable) + print("Loading all AFT- parts into memory...", flush=True) + cur.execute( + "SELECT id_part, oem_part_number, name_part, description, group_id " + "FROM parts WHERE oem_part_number LIKE 'AFT-%%' " + "ORDER BY id_part" + ) + all_aft_parts = cur.fetchall() + print(f" Loaded {len(all_aft_parts):,} rows", flush=True) + + migrated = 0 + skipped_no_mfr = 0 + skipped_no_oem = 0 + inserted = 0 + batch_num = 0 + new_manufacturers = 0 + + # Process in batches + for i in range(0, len(all_aft_parts), BATCH_SIZE): + batch = all_aft_parts[i:i + BATCH_SIZE] + stats = process_batch(conn, cur, batch, known_manufacturers) + inserted += stats['inserted'] + skipped_no_mfr += stats['skipped_no_mfr'] + skipped_no_oem += stats['skipped_no_oem'] + new_manufacturers += stats['new_manufacturers'] + migrated += len(batch) + batch_num += 1 + print(f" Batch {batch_num}: processed {migrated:,}/{total:,} " + f"(inserted={inserted:,}, no_oem={skipped_no_oem:,}, " + f"no_mfr={skipped_no_mfr:,})", flush=True) + cur.close() + conn.close() + + print(f"\n=== Migration Complete ===") + print(f" Total AFT- parts processed: {migrated:,}") + print(f" Inserted into aftermarket_parts: {inserted:,}") + print(f" Skipped (no manufacturer parsed): {skipped_no_mfr:,}") + print(f" Skipped (no OEM part found): {skipped_no_oem:,}") + print(f" New manufacturers created: {new_manufacturers:,}") + + +def process_batch(conn, cur, batch, known_manufacturers): + """Process a batch of AFT- parts. Returns stats dict.""" + stats = {'inserted': 0, 'skipped_no_mfr': 0, 'skipped_no_oem': 0, 'new_manufacturers': 0} + + parts_to_delete = [] + aftermarket_rows = [] # (oem_part_id, manufacturer_id, part_number, name_aftermarket_parts) + + for id_part, oem_part_number, name_part, description, group_id in batch: + part_number, manufacturer_name = parse_aft_part(oem_part_number, known_manufacturers) + + if not manufacturer_name: + stats['skipped_no_mfr'] += 1 + # Still delete the malformed AFT- part from parts table + parts_to_delete.append(id_part) + continue + + # Ensure manufacturer exists + mfr_key = manufacturer_name.lower() + if mfr_key not in known_manufacturers: + cur.execute( + "INSERT INTO manufacturers (name_manufacture) VALUES (%s) " + "ON CONFLICT (name_manufacture) DO NOTHING " + "RETURNING id_manufacture", + (manufacturer_name,) + ) + row = cur.fetchone() + if row: + known_manufacturers[mfr_key] = row[0] + stats['new_manufacturers'] += 1 + else: + # Was inserted by a concurrent process; fetch it + cur.execute( + "SELECT id_manufacture FROM manufacturers WHERE name_manufacture = %s", + (manufacturer_name,) + ) + known_manufacturers[mfr_key] = cur.fetchone()[0] + + manufacturer_id = known_manufacturers[mfr_key] + + # Find the OEM part via cross-references + cur.execute( + "SELECT part_id FROM part_cross_references " + "WHERE cross_reference_number = %s AND source_ref = %s " + "LIMIT 1", + (part_number, manufacturer_name) + ) + xref_row = cur.fetchone() + + if not xref_row: + stats['skipped_no_oem'] += 1 + # Still delete — it doesn't belong in parts table + parts_to_delete.append(id_part) + continue + + oem_part_id = xref_row[0] + aftermarket_rows.append((oem_part_id, manufacturer_id, part_number, name_part)) + parts_to_delete.append(id_part) + + # Batch insert into aftermarket_parts + if aftermarket_rows: + execute_values( + cur, + "INSERT INTO aftermarket_parts " + "(oem_part_id, manufacturer_id, part_number, name_aftermarket_parts) " + "VALUES %s ON CONFLICT DO NOTHING", + aftermarket_rows, + page_size=1000 + ) + stats['inserted'] = len(aftermarket_rows) + + # Delete dependent rows first (FK without CASCADE) + if parts_to_delete: + cur.execute( + "DELETE FROM vehicle_parts WHERE part_id = ANY(%s)", + (parts_to_delete,) + ) + cur.execute( + "DELETE FROM part_cross_references WHERE part_id = ANY(%s)", + (parts_to_delete,) + ) + cur.execute( + "DELETE FROM parts WHERE id_part = ANY(%s)", + (parts_to_delete,) + ) + + conn.commit() + return stats + + +if __name__ == '__main__': + t0 = time.time() + main() + elapsed = time.time() - t0 + print(f"Elapsed: {elapsed:.1f}s")