feat(catalog): supplier catalog cleanup, fuzzy matching, and navigation fixes

- Cleaned 137+ fake engine-displacement models from supplier imports
  (v3/v4 scripts: Chevrolet, Ford, Chrysler, Dodge, Jeep, Nissan, etc.)
- Removed 1,251+ corrupted models (INT. prefixes, year-suffix, torque specs,
  empty names, trailing-year variants)
- Migrated supplier tables to master DB (supplier_catalog,
  supplier_catalog_compat, supplier_catalog_interchange)
- Fixed _get_mye_ids_with_parts() to query supplier_catalog_compat from
  master DB so supplier-only vehicles appear for all tenants
- Added fuzzy model matcher with parenthesis stripping, noise suffix removal,
  compact matching, prefix/substring fallback, model aliases, and ±3 year
  proximity
- Matched compat rows: KEEP GREEN +14,152, KNADIAN +3,021, VAZLO +127,500,
  LUK +477, RAYBESTOS +1,743
- Added KNADIAN catalog importer with year-range expansion and future-year
  filtering
- Added VAZLO catalog importer with position parsing and SKU-in-model cleanup
- Added Keep Green, LUK, Yokomitsu, Raybestos catalog importers
- Cache clearing after cleanups (_classify_cache_*, nexus:mye_ids:*,
  nexus:brand_mye_counts:*)

Final match rates:
- KEEP GREEN: 90.3%
- VAZLO: 93.6%
- YOKOMITSU: 100.0%
- KNADIAN: 57.4%
- LUK: 51.0%
- RAYBESTOS: 55.9%
This commit is contained in:
2026-06-09 07:47:42 +00:00
parent 5ea667b80e
commit ea29cc31c0
53 changed files with 7727 additions and 548 deletions

View File

@@ -9,6 +9,7 @@ Depends on:
import json
import logging
import urllib.parse
from typing import Optional
from decimal import Decimal
@@ -19,6 +20,19 @@ from services.inventory_engine import get_stock, get_stock_bulk
logger = logging.getLogger(__name__)
def _resolve_image_urls(images: list[str], base_url: str | None = None) -> list[str]:
"""Convert relative image paths to absolute URLs."""
resolved = []
for url in images:
if not url:
continue
if base_url and url.startswith("/"):
resolved.append(urllib.parse.urljoin(base_url.rstrip("/") + "/", url.lstrip("/")))
else:
resolved.append(url)
return resolved
# ═══════════════════════════════════════════════════════════════════════════
# CONFIG HELPERS
# ═══════════════════════════════════════════════════════════════════════════
@@ -145,6 +159,7 @@ def build_item_payload(
custom_title: str = None,
extra_attributes: list = None,
shipping_cost: float = None,
base_url: str = None,
) -> dict:
"""Convert a Nexus inventory row into a MercadoLibre item payload."""
title = custom_title or f"{inventory_row['name']} {inventory_row['brand'] or ''} {inventory_row['part_number'] or ''}".strip()
@@ -167,17 +182,20 @@ def build_item_payload(
"buying_mode": "buy_it_now",
"listing_type_id": listing_type_id,
"condition": "new",
"pictures": [{"source": url} for url in images if url],
"pictures": [{"source": url} for url in _resolve_image_urls(images, base_url) if url],
"shipping": shipping_payload,
"attributes": [],
}
if inventory_row.get("brand"):
# Collect extra attribute IDs to avoid duplicates
extra_attr_ids = {a.get("id") for a in (extra_attributes or []) if a.get("id")}
if inventory_row.get("brand") and "BRAND" not in extra_attr_ids:
payload["attributes"].append(
{"id": "BRAND", "value_name": inventory_row["brand"]}
)
if inventory_row.get("part_number"):
if inventory_row.get("part_number") and "PART_NUMBER" not in extra_attr_ids:
payload["attributes"].append(
{"id": "PART_NUMBER", "value_name": inventory_row["part_number"]}
)
@@ -193,11 +211,11 @@ def build_item_payload(
if isinstance(vehicle_compat, list) and vehicle_compat:
first = vehicle_compat[0]
if isinstance(first, dict):
if first.get("brand"):
if first.get("brand") and "VEHICLE_MODEL" not in extra_attr_ids:
payload["attributes"].append(
{"id": "VEHICLE_MODEL", "value_name": first["brand"]}
)
if first.get("model"):
if first.get("model") and "VEHICLE_MODEL_NAME" not in extra_attr_ids:
payload["attributes"].append(
{"id": "VEHICLE_MODEL_NAME", "value_name": first["model"]}
)
@@ -243,7 +261,7 @@ def check_meli_shipping_config(svc: MeliService, cfg: dict) -> dict:
# LISTINGS CRUD
# ═══════════════════════════════════════════════════════════════════════════
def check_inventory_ml_status(tenant_conn, inventory_ids: list[int]) -> dict:
def check_inventory_ml_status(tenant_conn, inventory_ids: list[int], base_url: str = None) -> dict:
"""Check local pre-flight status for ML publishing.
Returns per-item dict with checks: has_image, has_stock, has_price,
@@ -298,7 +316,7 @@ def check_inventory_ml_status(tenant_conn, inventory_ids: list[int]) -> dict:
"has_price": price > 0,
"price": price,
"stock": stock,
"image_url": image_url,
"image_url": _resolve_image_urls([image_url], base_url)[0] if image_url else None,
"already_published": existing is not None,
"existing_listing": existing,
})
@@ -312,6 +330,7 @@ def validate_items(
listing_type_id: str = "gold_special",
shipping_mode: str = "me2",
custom_data: dict = None,
base_url: str = None,
) -> dict:
"""Validate items against ML /items/validate without creating them.
@@ -370,6 +389,7 @@ def validate_items(
images = []
if inv.get("image_url"):
images.append(inv["image_url"])
images = _resolve_image_urls(images, base_url)
if not images:
results["invalid"].append({"inventory_id": inv_id, "error": "El producto no tiene imagen"})
continue
@@ -385,6 +405,7 @@ def validate_items(
shipping_mode=shipping_mode, listing_type_id=listing_type_id,
custom_title=title, extra_attributes=extra_attrs,
shipping_cost=shipping_cost,
base_url=base_url,
)
try:
@@ -412,6 +433,7 @@ def publish_items(
listing_type_id: str = "gold_special",
shipping_mode: str = "me2",
custom_data: dict = None,
base_url: str = None,
) -> dict:
"""Publish one or more inventory items to MercadoLibre.
@@ -476,6 +498,7 @@ def publish_items(
images = []
if inv.get("image_url"):
images.append(inv["image_url"])
images = _resolve_image_urls(images, base_url)
if not images:
results["failed"].append({"inventory_id": inv_id, "error": "El producto no tiene imagen. ML requiere imagen para publicar."})
@@ -492,6 +515,7 @@ def publish_items(
shipping_mode=shipping_mode, listing_type_id=listing_type_id,
custom_title=title, extra_attributes=extra_attrs,
shipping_cost=shipping_cost,
base_url=base_url,
)
try:
@@ -740,6 +764,239 @@ def close_listing(tenant_conn, listing_id: int) -> dict:
return {"ok": True, "status": "closed"}
def delete_listing_permanently(tenant_conn, listing_id: int) -> dict:
"""Hard-delete a closed listing from the local DB.
Sets listing_id = NULL on marketplace_order_items to avoid FK errors,
then deletes the marketplace_listings row.
"""
cur = tenant_conn.cursor()
cur.execute(
"SELECT id, external_status FROM marketplace_listings WHERE id = %s",
(listing_id,),
)
row = cur.fetchone()
if not row:
cur.close()
raise ValueError("Listing not found")
# Clear FK references so we can delete safely
cur.execute(
"UPDATE marketplace_order_items SET listing_id = NULL WHERE listing_id = %s",
(listing_id,),
)
cur.execute(
"DELETE FROM marketplace_listings WHERE id = %s",
(listing_id,),
)
tenant_conn.commit()
cur.close()
return {"ok": True, "deleted": True}
# ═══════════════════════════════════════════════════════════════════════════
# QUESTIONS & ANSWERS
# ═══════════════════════════════════════════════════════════════════════════
def _upsert_question(cur, q_data: dict, listing_id_map: dict):
"""Upsert a single question from ML API into marketplace_questions."""
external_qid = str(q_data.get("id"))
external_item_id = str(q_data.get("item_id"))
text = q_data.get("text", "")
status = q_data.get("status", "unanswered")
answer = q_data.get("answer", {})
answer_text = answer.get("text") if answer else None
answer_date = None
if answer and answer.get("date_created"):
answer_date = answer["date_created"]
from_user = q_data.get("from", {})
buyer_id = str(from_user.get("id")) if from_user else None
buyer_nickname = from_user.get("nickname") if from_user else None
question_date = q_data.get("date_created")
listing_id = listing_id_map.get(external_item_id)
cur.execute(
"""
INSERT INTO marketplace_questions
(listing_id, external_question_id, external_item_id, question_text,
answer_text, status, buyer_id, buyer_nickname, question_date,
answer_date, raw_json, updated_at)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())
ON CONFLICT (external_question_id)
DO UPDATE SET
question_text = EXCLUDED.question_text,
answer_text = EXCLUDED.answer_text,
status = EXCLUDED.status,
buyer_id = EXCLUDED.buyer_id,
buyer_nickname = EXCLUDED.buyer_nickname,
question_date = EXCLUDED.question_date,
answer_date = EXCLUDED.answer_date,
raw_json = EXCLUDED.raw_json,
updated_at = NOW()
""",
(
listing_id,
external_qid,
external_item_id,
text,
answer_text,
status,
buyer_id,
buyer_nickname,
question_date,
answer_date,
json.dumps(q_data),
),
)
def sync_questions(tenant_conn) -> dict:
"""Fetch questions from ML for all active listings and upsert locally.
Returns {"synced": N, "items": [...]}.
"""
cfg = get_meli_config(tenant_conn)
svc = _get_meli_service(cfg)
if not svc:
raise ValueError("MercadoLibre not configured")
cur = tenant_conn.cursor()
cur.execute(
"""
SELECT id, external_item_id
FROM marketplace_listings
WHERE channel = 'mercadolibre' AND is_active = true
"""
)
listings = {r[1]: r[0] for r in cur.fetchall()}
cur.close()
if not listings:
return {"synced": 0, "items": []}
total_synced = 0
for item_id in listings:
try:
resp = svc.get_questions(item_id, limit=50)
questions = resp.get("questions", [])
if not questions:
continue
cur = tenant_conn.cursor()
for q in questions:
_upsert_question(cur, q, listings)
tenant_conn.commit()
cur.close()
total_synced += len(questions)
except Exception as e:
logger.warning("Failed to sync questions for item %s: %s", item_id, e)
return {"synced": total_synced}
def fetch_question_from_ml(tenant_conn, external_question_id: str) -> dict:
"""Fetch a single question from ML API and upsert locally."""
cfg = get_meli_config(tenant_conn)
svc = _get_meli_service(cfg)
if not svc:
raise ValueError("MercadoLibre not configured")
q_data = svc.get_question(external_question_id)
cur = tenant_conn.cursor()
cur.execute(
"SELECT id, external_item_id FROM marketplace_listings WHERE external_item_id = %s",
(str(q_data.get("item_id")),),
)
row = cur.fetchone()
listing_id_map = {row[1]: row[0]} if row else {}
_upsert_question(cur, q_data, listing_id_map)
tenant_conn.commit()
cur.close()
return q_data
def answer_question(tenant_conn, local_question_id: int, text: str) -> dict:
"""Answer a question via ML API and update local status."""
cfg = get_meli_config(tenant_conn)
svc = _get_meli_service(cfg)
if not svc:
raise ValueError("MercadoLibre not configured")
cur = tenant_conn.cursor()
cur.execute(
"SELECT external_question_id FROM marketplace_questions WHERE id = %s",
(local_question_id,),
)
row = cur.fetchone()
if not row:
cur.close()
raise ValueError("Question not found")
external_qid = row[0]
cur.close()
resp = svc.answer_question(external_qid, text)
cur = tenant_conn.cursor()
cur.execute(
"""
UPDATE marketplace_questions
SET answer_text = %s, status = 'answered', answer_date = NOW(), updated_at = NOW()
WHERE id = %s
""",
(text, local_question_id),
)
tenant_conn.commit()
cur.close()
return {"ok": True, "ml_response": resp}
def list_local_questions(tenant_conn, status: str = None) -> list:
"""Return questions from local DB, optionally filtered by status."""
cur = tenant_conn.cursor()
if status:
cur.execute(
"""
SELECT q.id, q.external_question_id, q.external_item_id, q.question_text,
q.answer_text, q.status, q.buyer_nickname, q.question_date,
q.answer_date, q.created_at, l.title, l.external_permalink
FROM marketplace_questions q
LEFT JOIN marketplace_listings l ON l.id = q.listing_id
WHERE q.status = %s
ORDER BY q.question_date DESC
""",
(status,),
)
else:
cur.execute(
"""
SELECT q.id, q.external_question_id, q.external_item_id, q.question_text,
q.answer_text, q.status, q.buyer_nickname, q.question_date,
q.answer_date, q.created_at, l.title, l.external_permalink
FROM marketplace_questions q
LEFT JOIN marketplace_listings l ON l.id = q.listing_id
ORDER BY q.question_date DESC
"""
)
rows = cur.fetchall()
cur.close()
results = []
for r in rows:
results.append({
"id": r[0],
"external_question_id": r[1],
"external_item_id": r[2],
"question_text": r[3],
"answer_text": r[4],
"status": r[5],
"buyer_nickname": r[6],
"question_date": r[7],
"answer_date": r[8],
"created_at": r[9],
"listing_title": r[10],
"listing_permalink": r[11],
})
return results
# ═══════════════════════════════════════════════════════════════════════════
# ORDERS
# ═══════════════════════════════════════════════════════════════════════════