feat(whatsapp): QWEN primary AI backend, Hermes fallback, conversation history, vehicle persistence, demo prompts

- Add QWEN (qwen3.6) as primary AI backend with short system prompt - Hermes remains as fallback with 45s timeout - Increase QWEN timeout to 35s, max_tokens to 4000 - Add conversation history loading from whatsapp_messages (last 4 msgs) - Persist detected vehicle in whatsapp_sessions table - Add 'limpiar chat' / 'nuevo chat' / 'reset' commands to clear history - Fix CSS conflict: rename whatsapp chat-panel classes to wa-chat-panel - Fix JS ID conflicts with chat.js widget (waChatPanel, waChatMessages, etc.) - Improve no-stock response: conversational with alternatives - Split search_query by | for multi-part lookups - Add DEMO_PROMPTS.md and DEMO_PROMPTS_V2.md
2026-05-06 20:27:14 +00:00
parent 371d72887e
commit ff45905b49
33 changed files with 3040 additions and 445 deletions
--- a/pos/services/ai_chat.py
+++ b/pos/services/ai_chat.py
@@ -3,9 +3,15 @@

 import requests
 import json
-from config import OPENROUTER_API_KEY
+from config import OPENROUTER_API_KEY, HERMES_API_URL, HERMES_API_KEY
+from config import QWEN_API_URL, QWEN_API_KEY, QWEN_MODEL

 OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
+HERMES_ENABLED = bool(HERMES_API_KEY and HERMES_API_URL)
+HERMES_CHAT_URL = (HERMES_API_URL.rstrip('/') + '/chat/completions') if HERMES_API_URL else None
+
+QWEN_ENABLED = bool(QWEN_API_KEY and QWEN_API_URL)
+QWEN_CHAT_URL = (QWEN_API_URL.rstrip('/') + '/chat/completions') if QWEN_API_URL else None

 # ⚠️ SOLO MODELOS GRATUITOS — No cambiar a modelos de pago.
 # El modelo DEBE terminar en ":free" para garantizar costo $0.
@@ -24,11 +30,69 @@ FALLBACK_MODELS = [
    "meta-llama/llama-3.3-70b-instruct:free", # Meta — último fallback
 ]

+# Hermes Agent model (OpenAI-compatible API server)
+HERMES_MODEL = "hermes-agent"
+
 def _validate_model(model_id):
-    """Ensure only free models are used. Raises if model is not free."""
+    """Ensure only free models are used. Raises if model is not free.
+    
+    Skips validation for Hermes Agent and QWEN models (self-hosted / private API).
+    """
+    if model_id == HERMES_MODEL:
+        return
+    if model_id == QWEN_MODEL:
+        return
    if not model_id.endswith(':free'):
        raise ValueError(f"BLOQUEADO: Solo se permiten modelos gratuitos (:free). Modelo '{model_id}' no es gratuito.")

+
+def _post_chat_completion(url, api_key, model_id, messages, max_tokens=800, temperature=0.3, timeout=25):
+    """Generic OpenAI-compatible chat completion POST.
+    
+    Returns the parsed response dict on success, None on failure.
+    """
+    try:
+        resp = requests.post(
+            url,
+            headers={
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            },
+            json={
+                "model": model_id,
+                "messages": messages,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+            },
+            timeout=timeout,
+        )
+        if resp.status_code == 429:
+            print(f"[AI] Rate limited on {model_id} ({url})")
+            return None
+        if resp.status_code >= 400:
+            print(f"[AI] HTTP {resp.status_code} on {model_id} ({url}): {resp.text[:200]}")
+            return None
+        data = resp.json()
+        choice = data.get("choices", [{}])[0]
+        content = choice.get("message", {}).get("content") or ""
+        content = content.strip()
+        finish = choice.get("finish_reason", "")
+        if not content:
+            print(f"[AI] Empty response from {model_id} (finish={finish})")
+            return None
+        return {"content": content, "finish_reason": finish, "model": model_id}
+    except Exception as e:
+        print(f"[AI] Error with {model_id} ({url}): {e}")
+        return None
+
+
+SYSTEM_PROMPT_SHORT = """Eres un asistente de refaccionaria automotriz mexicana. Ayuda a encontrar autopartes.
+Responde SIEMPRE en formato JSON: {"message":"...","search_query":"...","vehicle":{"brand":"...","model":"...","year":...}}
+search_query va EN INGLES cuando el usuario pide una parte. Traducciones: Balatas=Brake Pad, Disco de freno=Brake Disc, Amortiguador=Shock Absorber, Filtro de aceite=Oil Filter, Filtro de aire=Air Filter, Bujias=Spark Plug, Banda=V-Belt, Bomba de agua=Water Pump, Alternador=Alternator, Radiador=Radiator, Sensor de oxigeno=Oxygen Sensor, Terminal de direccion=Tie Rod End, Bomba de gasolina=Fuel Pump, Clutch=Clutch Kit, Mofle=Exhaust, Inyector=Injector.
+No preguntes mas si ya puedes buscar. Si el usuario describe un sintoma, diagnostica y sugiere partes.
+Cuando pida cotizacion o multiples partes, search_query DEBE usar | para separar cada parte: "Brake Pad|Air Filter|Oil Filter|Spark Plug".
+"""
+
 SYSTEM_PROMPT = """Eres un asistente de refaccionaria automotriz mexicana. Tu trabajo es ayudar a encontrar autopartes.

 IMPORTANTE: Responde SIEMPRE en formato JSON valido con esta estructura:
@@ -161,6 +225,7 @@ def get_inventory_context(tenant_conn, branch_id=None):


 VISION_MODEL = "google/gemma-3-27b-it:free"
+HERMES_VISION_MODEL = "hermes-agent"

 VISION_SYSTEM_PROMPT = """Eres un experto en identificación de autopartes. El usuario te envía una foto de una parte automotriz.
 Tu trabajo es:
@@ -219,54 +284,41 @@ def chat_with_image(user_message, image_base64, conversation_history=None, inven
    ]
    messages.append({"role": "user", "content": user_content})

-    import time
-    max_retries = 3
-
-    for attempt in range(max_retries):
+    # Try Hermes first for vision (if enabled), fallback to OpenRouter
+    backends = []
+    if HERMES_ENABLED:
+        backends.append((HERMES_CHAT_URL, HERMES_API_KEY, HERMES_VISION_MODEL))
+    if OPENROUTER_API_KEY:
+        backends.append((OPENROUTER_URL, OPENROUTER_API_KEY, VISION_MODEL))
+    
+    last_error = None
+    for url, key, model_id in backends:
+        _validate_model(model_id)
+        result = _post_chat_completion(url, key, model_id, messages, max_tokens=500, temperature=0.3, timeout=30)
+        if result is None:
+            last_error = "api_error"
+            continue
+        content = result["content"]
        try:
-            resp = requests.post(
-                OPENROUTER_URL,
-                headers={
-                    "Authorization": f"Bearer {OPENROUTER_API_KEY}",
-                    "Content-Type": "application/json",
-                },
-                json={
-                    "model": VISION_MODEL,
-                    "messages": messages,
-                    "max_tokens": 500,
-                    "temperature": 0.3,
-                },
-                timeout=30,
-            )
-            if resp.status_code == 429:
-                wait = (attempt + 1) * 5
-                if attempt < max_retries - 1:
-                    time.sleep(wait)
-                    continue
-                return {"message": "El asistente esta ocupado. Intenta de nuevo en unos segundos.", "search_query": None, "vehicle": None}
-            resp.raise_for_status()
-            data = resp.json()
-            content = data["choices"][0]["message"]["content"]
-
-            try:
-                stripped = content.strip()
-                if stripped.startswith("```"):
-                    lines = stripped.split("\n")
-                    json_str = "\n".join(lines[1:-1])
-                    parsed = json.loads(json_str)
-                else:
-                    parsed = json.loads(stripped)
+            stripped = content.strip()
+            if stripped.startswith("```"):
+                lines = stripped.split("\n")
+                json_str = "\n".join(lines[1:-1])
+                parsed = json.loads(json_str)
                return parsed
-            except (json.JSONDecodeError, IndexError):
-                return {"message": content, "search_query": None, "vehicle": None}
-        except Exception as e:
-            if attempt < max_retries - 1:
-                continue
-            return {
-                "message": f"Error al analizar imagen: {str(e)}",
-                "search_query": None,
-                "vehicle": None,
-            }
+            else:
+                parsed = json.loads(stripped)
+                return parsed
+        except (json.JSONDecodeError, IndexError):
+            return {"message": content, "search_query": None, "vehicle": None}
+    
+    if last_error == "api_error":
+        return {"message": "El asistente esta ocupado. Intenta de nuevo en unos segundos.", "search_query": None, "vehicle": None}
+    return {
+        "message": f"Error al analizar imagen: {last_error}",
+        "search_query": None,
+        "vehicle": None,
+    }


 def classify_part(part_number):
@@ -287,47 +339,32 @@ def classify_part(part_number):
        {"role": "user", "content": prompt}
    ]

-    import time
-    max_retries = 3
-
-    for attempt in range(max_retries):
+    # Try Hermes first (if enabled), fallback to OpenRouter
+    backends = []
+    if HERMES_ENABLED:
+        backends.append((HERMES_CHAT_URL, HERMES_API_KEY, HERMES_MODEL))
+    if OPENROUTER_API_KEY:
+        backends.append((OPENROUTER_URL, OPENROUTER_API_KEY, MODEL))
+    
+    for url, key, model_id in backends:
+        _validate_model(model_id)
+        result = _post_chat_completion(url, key, model_id, messages, max_tokens=300, temperature=0.2, timeout=15)
+        if result is None:
+            continue
+        content = result["content"]
        try:
-            resp = requests.post(
-                OPENROUTER_URL,
-                headers={
-                    "Authorization": f"Bearer {OPENROUTER_API_KEY}",
-                    "Content-Type": "application/json",
-                },
-                json={
-                    "model": MODEL,
-                    "messages": messages,
-                    "max_tokens": 300,
-                    "temperature": 0.2,
-                },
-                timeout=15,
-            )
-            if resp.status_code == 429:
-                wait = (attempt + 1) * 5
-                if attempt < max_retries - 1:
-                    time.sleep(wait)
-                    continue
-                return {"name": None, "brand": None, "vehicle": None, "category": None}
-            resp.raise_for_status()
-            data = resp.json()
-            content = data["choices"][0]["message"]["content"]
-
            stripped = content.strip()
            if stripped.startswith("```"):
                lines = stripped.split("\n")
                json_str = "\n".join(lines[1:-1])
                parsed = json.loads(json_str)
+                return parsed
            else:
                parsed = json.loads(stripped)
-            return parsed
+                return parsed
        except Exception:
-            if attempt < max_retries - 1:
-                continue
-            return {"name": None, "brand": None, "vehicle": None, "category": None}
+            continue
+    return {"name": None, "brand": None, "vehicle": None, "category": None}


 # ═══════════════════════════════════════════════════════════════════════════
@@ -491,74 +528,71 @@ def chat(user_message, conversation_history=None, inventory_context=None):

    last_error = None

-    # Try each model in the fallback chain on 429 (rate limit)
-    for model_id in FALLBACK_MODELS:
-        _validate_model(model_id)  # Block paid models
-        try:
-            resp = requests.post(
-                OPENROUTER_URL,
-                headers={
-                    "Authorization": f"Bearer {OPENROUTER_API_KEY}",
-                    "Content-Type": "application/json",
-                },
-                json={
-                    "model": model_id,
-                    "messages": messages,
-                    "max_tokens": 800,
-                    "temperature": 0.3,
-                },
-                timeout=25,
-            )
-            if resp.status_code == 429:
+    # Build backend list: QWEN first (fast, ~1s), then Hermes (specialized, ~30s), then OpenRouter
+    backends = []
+    if QWEN_ENABLED:
+        backends.append((QWEN_CHAT_URL, QWEN_API_KEY, QWEN_MODEL, 35, SYSTEM_PROMPT_SHORT, 4000))
+    if HERMES_ENABLED:
+        backends.append((HERMES_CHAT_URL, HERMES_API_KEY, HERMES_MODEL, 45, SYSTEM_PROMPT, 800))
+    if OPENROUTER_API_KEY:
+        for m in FALLBACK_MODELS:
+            backends.append((OPENROUTER_URL, OPENROUTER_API_KEY, m, 25, SYSTEM_PROMPT, 800))
+
+    for url, key, model_id, timeout_sec, sys_prompt, max_tok in backends:
+        _validate_model(model_id)
+        # Use backend-specific system prompt and max_tokens
+        sys_content = sys_prompt
+        if inventory_context:
+            sys_content = sys_prompt + "\n\n" + inventory_context
+        msgs = [{"role": "system", "content": sys_content}]
+        if conversation_history:
+            msgs.extend(conversation_history)
+        msgs.append({"role": "user", "content": user_message})
+        result = _post_chat_completion(url, key, model_id, msgs, max_tokens=max_tok, temperature=0.3, timeout=timeout_sec)
+        if result is None:
+            if url == QWEN_CHAT_URL:
+                print(f"[AI] QWEN failed, trying Hermes fallback...")
+                last_error = "qwen_failed"
+            elif url == HERMES_CHAT_URL:
+                print(f"[AI] Hermes failed, trying OpenRouter fallback...")
+                last_error = "hermes_timeout"
+            else:
                print(f"[AI] Rate limited on {model_id}, trying next model...")
                last_error = "rate_limit"
-                continue
-            if resp.status_code >= 400:
-                print(f"[AI] HTTP {resp.status_code} on {model_id}: {resp.text[:200]}")
-                last_error = f"http_{resp.status_code}"
-                continue
-            data = resp.json()
-            choice = data.get("choices", [{}])[0]
-            content = choice.get("message", {}).get("content", "").strip()
-            finish = choice.get("finish_reason", "")
-
-            if not content:
-                print(f"[AI] Empty response from {model_id} (finish={finish})")
-                last_error = "empty_response"
-                continue
-
-            print(f"[AI] Response from {model_id} (finish={finish}, {len(content)} chars)")
-
-            # Try to parse JSON response
-            try:
-                stripped = content.strip()
-                if stripped.startswith("```"):
-                    lines = stripped.split("\n")
-                    json_str = "\n".join(lines[1:-1])
-                    parsed = json.loads(json_str)
-                else:
-                    parsed = json.loads(stripped)
-                # Successful JSON response — cache it
-                if cache_key:
-                    _cache_set(cache_key, parsed)
-                return parsed
-            except (json.JSONDecodeError, IndexError):
-                fallback = {"message": content, "search_query": None, "vehicle": None}
-                # Cache the fallback too — the model gave us a real answer,
-                # it just wasn't JSON. Next hit saves the API call.
-                if cache_key:
-                    _cache_set(cache_key, fallback)
-                return fallback
-        except Exception as e:
-            print(f"[AI] Error with {model_id}: {e}")
-            last_error = str(e)
            continue
+        
+        content = result["content"]
+        finish = result["finish_reason"]
+        print(f"[AI] Response from {model_id} (finish={finish}, {len(content)} chars)")
+
+        # Try to parse JSON response
+        try:
+            stripped = content.strip()
+            if stripped.startswith("```"):
+                lines = stripped.split("\n")
+                json_str = "\n".join(lines[1:-1])
+                parsed = json.loads(json_str)
+            else:
+                parsed = json.loads(stripped)
+            # Successful JSON response — cache it
+            if cache_key:
+                _cache_set(cache_key, parsed)
+            return parsed
+        except (json.JSONDecodeError, IndexError):
+            fallback = {"message": content, "search_query": None, "vehicle": None}
+            # Cache the fallback too — the model gave us a real answer,
+            # it just wasn't JSON. Next hit saves the API call.
+            if cache_key:
+                _cache_set(cache_key, fallback)
+            return fallback

    # All models exhausted — DON'T cache errors, we want retries next time
    if last_error == "rate_limit":
        return {"message": "El asistente está ocupado. Intenta de nuevo en unos segundos.", "search_query": None, "vehicle": None}
+    if last_error == "hermes_timeout":
+        return {"message": "El asistente tardó mucho en responder. Intenta de nuevo en un momento.", "search_query": None, "vehicle": None}
    return {
-        "message": f"Error de conexion: {last_error}",
+        "message": "El asistente no está disponible en este momento. Intenta de nuevo en unos segundos.",
        "search_query": None,
        "vehicle": None,
    }
--- a/pos/services/catalog_service.py
+++ b/pos/services/catalog_service.py
@@ -17,7 +17,8 @@ import re
 import redis

 from services.na_models import is_na_model
-from services.translations import translate_part_name, translate_category
+from services.translations import translate_part_name, translate_category, PART_TRANSLATIONS
+from services.nexpart_taxonomy import translate_taxonomy_node

 # Lazy Redis client for catalog caches
 _redis_client = None
@@ -632,6 +633,120 @@ def get_shop_supplies_parts(master_conn, group_slug, subgroup_slug, part_type_sl
    )


+def _normalize_es(text):
+    """Lowercase and strip accents for Spanish text matching."""
+    if not text:
+        return ''
+    text = text.lower()
+    for a, b in [('á', 'a'), ('é', 'e'), ('í', 'i'), ('ó', 'o'), ('ú', 'u'),
+                 ('ü', 'u'), ('ñ', 'n')]:
+        text = text.replace(a, b)
+    return text
+
+
+def _local_name_matches_part_type(name, part_type_slug):
+    """Check if a local inventory item name matches a Nexpart part_type.
+
+    Uses translation layers:
+      1. Direct substring (original slug in name) — legacy
+      2. Full Spanish translation via translate_taxonomy_node
+      3. Sub-phrase translations via PART_TRANSLATIONS
+      4. Word-level matching (handles plurals and partial matches)
+      5. Extra synonym mappings for Mexican aftermarket terminology
+    """
+    if not name or not part_type_slug:
+        return True
+
+    name_norm = _normalize_es(name)
+    slug_lower = part_type_slug.lower()
+
+    # 1. Legacy direct match
+    if slug_lower in name_norm:
+        return True
+
+    candidates = []
+
+    # 2. Full translation of the part_type slug
+    translated = translate_taxonomy_node(part_type_slug)
+    if translated and translated != part_type_slug:
+        candidates.append(_normalize_es(translated))
+
+    # 3. Sub-phrase translation: find the longest PART_TRANSLATIONS key
+    #    that is contained in the part_type_slug.
+    best_key = None
+    best_len = 0
+    for en_key, es_val in PART_TRANSLATIONS.items():
+        if en_key.lower() in slug_lower and len(en_key) > best_len:
+            best_key = en_key
+            best_len = len(en_key)
+    if best_key:
+        candidates.append(_normalize_es(PART_TRANSLATIONS[best_key]))
+
+    # 4. Word-level matching: any significant word (4+ chars) from the
+    #    candidate translations must appear in the local name.
+    #    Also strip trailing 's' to handle plurals (balatas -> balata).
+    for cand in candidates:
+        if cand in name_norm:
+            return True
+        words = [w for w in cand.split() if len(w) >= 4]
+        for w in words:
+            if w in name_norm:
+                return True
+            # plural fallback
+            if w.endswith('s') and w[:-1] in name_norm:
+                return True
+            if w.endswith('es') and w[:-2] in name_norm:
+                return True
+
+    # 5. Extra synonyms for common Mexican aftermarket terms
+    #    Map English sub-phrases to additional Spanish keywords.
+    EXTRA_SYNONYMS = {
+        'brake pad': ['balata', 'pastilla'],
+        'brake shoe': ['zapata', 'balata'],
+        'brake disc': ['disco', 'rotor'],
+        'brake rotor': ['disco', 'rotor'],
+        'shock absorber': ['amortiguador', 'amortiguadores'],
+        'strut': ['amortiguador', 'torre', 'estrut'],
+        'spark plug': ['bujia', 'bujía', 'bujias'],
+        'air filter': ['filtro de aire', 'filtro aire'],
+        'oil filter': ['filtro de aceite', 'filtro aceite'],
+        'fuel filter': ['filtro de gasolina', 'filtro gasolina'],
+        'cabin filter': ['filtro de cabina', 'filtro cabina', 'filtro de polen'],
+        'timing belt': ['banda de tiempo', 'banda distribucion', 'correa de distribucion'],
+        'drive belt': ['banda de accesorios', 'banda alternador'],
+        'water pump': ['bomba de agua'],
+        'alternator': ['alternador'],
+        'starter': ['marcha', 'motor de arranque'],
+        'radiator': ['radiador'],
+        'thermostat': ['termostato'],
+        'wheel bearing': ['balero', 'rodamiento'],
+        'hub assembly': ['maza', 'cubo'],
+        'control arm': ['horquilla', 'brazo'],
+        'tie rod': ['terminal', 'rotula'],
+        'ball joint': ['rotula', 'rotula'],
+        'clutch kit': ['kit de clutch', 'kit de embrague'],
+        'clutch disc': ['disco de clutch', 'disco de embrague'],
+        'axle': ['flecha', 'punta de eje', 'homocinetica'],
+        'cv joint': ['homocinetica', 'punta de eje'],
+        'oxygen sensor': ['sensor de oxigeno', 'sensor o2'],
+        'ignition coil': ['bobina', 'bobina de encendido'],
+        'wiper': ['pluma', 'limpiaparabrisas', 'escobilla'],
+        'headlight': ['faro', 'faro delantero'],
+        'taillight': ['calavera', 'faro trasero'],
+        'turn signal': ['direccional', 'cuarto'],
+        'fog light': ['faro de niebla'],
+        'battery': ['bateria', 'acumulador'],
+        'horn': ['claxon', 'bocina'],
+    }
+    for en_phrase, es_keywords in EXTRA_SYNONYMS.items():
+        if en_phrase in slug_lower:
+            for kw in es_keywords:
+                if _normalize_es(kw) in name_norm:
+                    return True
+
+    return False
+
+
 def get_parts_for_nexpart_triple(master_conn, mye_id, group_slug, subgroup_slug,
                                   part_type_slug, tenant_conn, branch_id,
                                   page=1, per_page=30):
@@ -659,13 +774,14 @@ def get_parts_for_nexpart_triple(master_conn, mye_id, group_slug, subgroup_slug,
    )
    # Inject local inventory items linked to this vehicle
    # (get_parts_local with oem_part_ids skips mye_id, so we call it separately)
+    local_injected = 0
    if tenant_conn and mye_id:
        from services.inventory_vehicle_compat import get_inventory_by_vehicle
        local_rows = get_inventory_by_vehicle(tenant_conn, master_conn, mye_id, branch_id)
        for lr in local_rows:
            inv_id, pn, name, brand, p1, p2, p3, img, desc, stock = lr
            # Only include if name roughly matches the Nexpart part_type
-            if part_type_slug and part_type_slug.lower() not in (name or '').lower():
+            if part_type_slug and not _local_name_matches_part_type(name, part_type_slug):
                continue
            result['data'].append({
                'id_part': f'inv:{inv_id}',
@@ -686,6 +802,13 @@ def get_parts_for_nexpart_triple(master_conn, mye_id, group_slug, subgroup_slug,
                'price_usd': None,
                'source': 'local_inventory',
            })
+            local_injected += 1
+    # Update pagination total to include injected local items
+    if local_injected:
+        result['pagination']['total'] = result['pagination'].get('total', 0) + local_injected
+        result['pagination']['total_pages'] = (
+            (result['pagination']['total'] + per_page - 1) // per_page
+        )
    return result


@@ -1299,13 +1422,14 @@ def _search_meili_fallback(master_conn, q, limit):
        return None


-def smart_search(master_conn, q, tenant_conn, branch_id, limit=50):
+def smart_search(master_conn, q, tenant_conn, branch_id, limit=50, mye_id=None):
    """Search parts by part number or text. Enriches with local stock.

    Strategy:
    1. Try Meilisearch first (sub-100ms full-text + typo tolerance)
    2. Fallback to PostgreSQL tsvector / ILIKE if Meilisearch is down
-    3. Always enriches results with local stock from tenant DB
+    3. Search local inventory items by part_number or name
+    4. Always enriches results with local stock from tenant DB
    """
    q = q.strip()
    if not q or len(q) < 2:
@@ -1349,10 +1473,6 @@ def smart_search(master_conn, q, tenant_conn, branch_id, limit=50):
            """, (tsquery, f'%{q}%', f'%{q}%', tsquery, limit))
        rows = cur.fetchall()

-    if not rows:
-        cur.close()
-        return []
-
    part_ids = [r[0] for r in rows]
    oem_numbers = [r[1] for r in rows]

@@ -1390,6 +1510,7 @@ def smart_search(master_conn, q, tenant_conn, branch_id, limit=50):
    local_map = _get_local_stock_bulk(tenant_conn, branch_id, oem_numbers, part_ids)

    results = []
+    seen_local_ids = set()
    for r in rows:
        part_id = r[0]
        oem = r[1]
@@ -1403,10 +1524,133 @@ def smart_search(master_conn, q, tenant_conn, branch_id, limit=50):
            'local_price': local['price_1'] if local else None,
            'vehicle_info': vehicle_info_map.get(part_id, ''),
        })
+        # Track which local inventory items are already shown via OEM link
+        if local:
+            seen_local_ids.add(local.get('inventory_id'))
+
+    # ── Inject local inventory items that match the query directly ──────────
+    if tenant_conn:
+        local_items = _search_local_inventory(tenant_conn, q, mye_id, branch_id, limit)
+        for li in local_items:
+            if li['inventory_id'] in seen_local_ids:
+                continue
+            results.append({
+                'id_part': f"inv:{li['inventory_id']}",
+                'oem_part_number': li['part_number'],
+                'name': li['name'],
+                'image_url': li['image_url'],
+                'local_stock': li['stock'],
+                'local_price': li['price_1'],
+                'vehicle_info': '',
+                'source': 'local_inventory',
+            })
+            if len(results) >= limit:
+                break

    return results


+def _search_local_inventory(tenant_conn, q, mye_id, branch_id, limit):
+    """Search tenant inventory items by part_number or name.
+
+    If mye_id is provided, only returns items compatible with that vehicle.
+    """
+    if tenant_conn is None:
+        return []
+    cur = tenant_conn.cursor()
+    clean_q = q.replace(' ', '').upper()
+
+    # Helper to strip accents in SQL for case-insensitive matching
+    _SQL_UNACCENT = """
+        REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
+        REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
+        UPPER(i.name)
+        , 'Á', 'A'), 'É', 'E'), 'Í', 'I'), 'Ó', 'O'), 'Ú', 'U')
+        , 'À', 'A'), 'È', 'E'), 'Ì', 'I'), 'Ò', 'O'), 'Ù', 'U')
+    """
+    _q_unaccent = q.upper()
+    for a, b in [('Á', 'A'), ('É', 'E'), ('Í', 'I'), ('Ó', 'O'), ('Ú', 'U'),
+                 ('À', 'A'), ('È', 'E'), ('Ì', 'I'), ('Ò', 'O'), ('Ù', 'U'),
+                 ('Ä', 'A'), ('Ë', 'E'), ('Ï', 'I'), ('Ö', 'O'), ('Ü', 'U'),
+                 ('Ñ', 'N')]:
+        _q_unaccent = _q_unaccent.replace(a, b)
+
+    if mye_id:
+        # Search only items linked to the given vehicle
+        if branch_id:
+            cur.execute(f"""
+                SELECT i.id, i.part_number, i.name, i.image_url,
+                       i.price_1, COALESCE(s.stock, 0) as stock
+                FROM inventory i
+                JOIN inventory_vehicle_compat ivc ON ivc.inventory_id = i.id
+                LEFT JOIN inventory_stock_summary s
+                    ON s.inventory_id = i.id AND s.branch_id = %s
+                WHERE ivc.model_year_engine_id = %s
+                  AND i.is_active = true
+                  AND (REPLACE(UPPER(i.part_number), ' ', '') LIKE %s
+                       OR {_SQL_UNACCENT} LIKE %s)
+                ORDER BY i.name
+                LIMIT %s
+            """, (branch_id, mye_id, f'%{clean_q}%', f'%{_q_unaccent}%', limit))
+        else:
+            cur.execute(f"""
+                SELECT i.id, i.part_number, i.name, i.image_url,
+                       i.price_1, COALESCE(SUM(s.stock), 0) as stock
+                FROM inventory i
+                JOIN inventory_vehicle_compat ivc ON ivc.inventory_id = i.id
+                LEFT JOIN inventory_stock_summary s ON s.inventory_id = i.id
+                WHERE ivc.model_year_engine_id = %s
+                  AND i.is_active = true
+                  AND (REPLACE(UPPER(i.part_number), ' ', '') LIKE %s
+                       OR {_SQL_UNACCENT} LIKE %s)
+                GROUP BY i.id, i.part_number, i.name, i.image_url, i.price_1
+                ORDER BY i.name
+                LIMIT %s
+            """, (mye_id, f'%{clean_q}%', f'%{_q_unaccent}%', limit))
+    else:
+        # Search all active inventory items
+        if branch_id:
+            cur.execute(f"""
+                SELECT i.id, i.part_number, i.name, i.image_url,
+                       i.price_1, COALESCE(s.stock, 0) as stock
+                FROM inventory i
+                LEFT JOIN inventory_stock_summary s
+                    ON s.inventory_id = i.id AND s.branch_id = %s
+                WHERE i.is_active = true
+                  AND (REPLACE(UPPER(i.part_number), ' ', '') LIKE %s
+                       OR {_SQL_UNACCENT} LIKE %s)
+                ORDER BY i.name
+                LIMIT %s
+            """, (branch_id, f'%{clean_q}%', f'%{_q_unaccent}%', limit))
+        else:
+            cur.execute(f"""
+                SELECT i.id, i.part_number, i.name, i.image_url,
+                       i.price_1, COALESCE(SUM(s.stock), 0) as stock
+                FROM inventory i
+                LEFT JOIN inventory_stock_summary s ON s.inventory_id = i.id
+                WHERE i.is_active = true
+                  AND (REPLACE(UPPER(i.part_number), ' ', '') LIKE %s
+                       OR {_SQL_UNACCENT} LIKE %s)
+                GROUP BY i.id, i.part_number, i.name, i.image_url, i.price_1
+                ORDER BY i.name
+                LIMIT %s
+            """, (f'%{clean_q}%', f'%{_q_unaccent}%', limit))
+
+    rows = cur.fetchall()
+    cur.close()
+    return [
+        {
+            'inventory_id': r[0],
+            'part_number': r[1],
+            'name': r[2],
+            'image_url': r[3],
+            'price_1': float(r[4]) if r[4] is not None else None,
+            'stock': int(r[5]) if r[5] is not None else 0,
+        }
+        for r in rows
+    ]
+
+
 # ─────────────────────────────────────────────────────────────────────────────
 # LOCAL STOCK HELPERS
 # ─────────────────────────────────────────────────────────────────────────────
--- a/pos/services/inventory_engine.py
+++ b/pos/services/inventory_engine.py
@@ -96,12 +96,13 @@ def get_stock_bulk(conn, branch_id=None):


 def record_operation(conn, inventory_id, branch_id, operation_type, quantity,
-                     reference_id=None, reference_type=None, cost_at_time=None, notes=None):
+                     reference_id=None, reference_type=None, cost_at_time=None,
+                     notes=None, employee_id=None):
    """Record a single inventory operation. Does NOT commit — caller controls transaction.

    Args:
        quantity: positive for entries (PURCHASE, RETURN, INITIAL), negative for exits (SALE)
-        operation_type: SALE, PURCHASE, RETURN, ADJUST, TRANSFER, INITIAL
+        operation_type: SALE, PURCHASE, RETURN, ADJUST, TRANSFER, INITIAL, QUOTE_RESERVE, QUOTE_RELEASE
    """
    cur = conn.cursor()
    cur.execute("""
@@ -113,7 +114,7 @@ def record_operation(conn, inventory_id, branch_id, operation_type, quantity,
    """, (
        inventory_id, branch_id, operation_type, quantity,
        reference_id, reference_type, cost_at_time,
-        _safe_g('employee_id'),
+        employee_id if employee_id is not None else _safe_g('employee_id'),
        _safe_g('device_id'),
        notes
    ))
--- a/pos/services/inventory_vehicle_compat.py
+++ b/pos/services/inventory_vehicle_compat.py
@@ -403,3 +403,56 @@ def save_qwen_fitment(tenant_conn, inventory_id, fitment_result):
    tenant_conn.commit()
    cur.close()
    return inserted
+
+
+def get_inventory_by_vehicle(tenant_conn, master_conn, mye_id, branch_id=None):
+    """Return local inventory items compatible with a given vehicle (MYE).
+
+    Args:
+        tenant_conn: Connection to tenant DB.
+        master_conn: Connection to master DB (kept for API consistency).
+        mye_id: model_year_engine_id.
+        branch_id: Optional branch filter for stock.
+
+    Returns:
+        List of tuples: (id, part_number, name, brand, price_1, price_2, price_3,
+                          image_url, description, stock)
+    """
+    cur = tenant_conn.cursor()
+
+    if branch_id:
+        # Stock for specific branch
+        cur.execute("""
+            SELECT i.id, i.part_number, i.name, i.brand,
+                   i.price_1, i.price_2, i.price_3,
+                   i.image_url, i.description,
+                   COALESCE(s.stock, 0) as stock
+            FROM inventory i
+            JOIN inventory_vehicle_compat ivc ON ivc.inventory_id = i.id
+            LEFT JOIN inventory_stock_summary s
+                ON s.inventory_id = i.id AND s.branch_id = %s
+            WHERE ivc.model_year_engine_id = %s
+              AND i.is_active = true
+            ORDER BY i.name
+        """, (branch_id, mye_id))
+    else:
+        # Total stock across all branches
+        cur.execute("""
+            SELECT i.id, i.part_number, i.name, i.brand,
+                   i.price_1, i.price_2, i.price_3,
+                   i.image_url, i.description,
+                   COALESCE(SUM(s.stock), 0) as stock
+            FROM inventory i
+            JOIN inventory_vehicle_compat ivc ON ivc.inventory_id = i.id
+            LEFT JOIN inventory_stock_summary s ON s.inventory_id = i.id
+            WHERE ivc.model_year_engine_id = %s
+              AND i.is_active = true
+            GROUP BY i.id, i.part_number, i.name, i.brand,
+                     i.price_1, i.price_2, i.price_3,
+                     i.image_url, i.description
+            ORDER BY i.name
+        """, (mye_id,))
+
+    rows = cur.fetchall()
+    cur.close()
+    return rows
--- a/pos/services/quote_reservation.py
+++ b/pos/services/quote_reservation.py
@@ -0,0 +1,123 @@
+"""Quotation stock reservation engine.
+
+Uses inventory_operations with operation types:
+  QUOTE_RESERVE  — negative quantity, reserves stock when quote is created
+  QUOTE_RELEASE  — positive quantity, restores stock when quote is cancelled/expired
+  QUOTE_CONVERT  — neutral (just a marker), actual sale uses SALE operation
+
+The trigger update_stock_summary() recalculates inventory_stock_summary
+by summing ALL operations, so reservations automatically affect visible stock.
+"""
+from services.inventory_engine import record_operation
+
+
+def reserve_for_quotation(conn, quotation_id, items, employee_id=None):
+    """Reserve stock for each item in a new quotation.
+
+    Args:
+        conn: tenant DB connection (not committed by this function).
+        quotation_id: the quotations.id.
+        items: list of dicts with inventory_id, quantity, branch_id (optional).
+        employee_id: optional, passed explicitly when g.employee_id is unavailable.
+    Returns:
+        list of operation IDs.
+    """
+    op_ids = []
+    for item in items:
+        inv_id = item.get('inventory_id')
+        qty = item.get('quantity', 0)
+        branch_id = item.get('branch_id')
+        if not inv_id or qty <= 0:
+            continue
+        op_id = record_operation(
+            conn, inv_id, branch_id, 'QUOTE_RESERVE',
+            quantity=-qty,
+            reference_id=quotation_id,
+            reference_type='quotation',
+            notes=f'Reserva cotizacion #{quotation_id}'
+        )
+        op_ids.append(op_id)
+    return op_ids
+
+
+def release_quotation_reservation(conn, quotation_id, items, employee_id=None):
+    """Release previously reserved stock (cancel, expire, or convert).
+
+    Args:
+        conn: tenant DB connection.
+        quotation_id: the quotations.id.
+        items: list of dicts with inventory_id, quantity, branch_id.
+        employee_id: optional.
+    Returns:
+        list of operation IDs.
+    """
+    op_ids = []
+    for item in items:
+        inv_id = item.get('inventory_id')
+        qty = item.get('quantity', 0)
+        branch_id = item.get('branch_id')
+        if not inv_id or qty <= 0:
+            continue
+        op_id = record_operation(
+            conn, inv_id, branch_id, 'QUOTE_RELEASE',
+            quantity=qty,
+            reference_id=quotation_id,
+            reference_type='quotation',
+            notes=f'Liberacion cotizacion #{quotation_id}'
+        )
+        op_ids.append(op_id)
+    return op_ids
+
+
+def convert_quotation_reservation(conn, quotation_id, items, sale_id=None, employee_id=None):
+    """Convert reservation to actual sale.
+
+    Flow:
+      1. Release the reservation (QUOTE_RELEASE +qty)
+      2. Record the actual sale (SALE -qty)
+
+    Args:
+        conn: tenant DB connection.
+        quotation_id: the quotations.id.
+        items: list of dicts with inventory_id, quantity, branch_id.
+        sale_id: the resulting sales.id (for reference).
+        employee_id: optional.
+    Returns:
+        list of operation IDs.
+    """
+    op_ids = release_quotation_reservation(conn, quotation_id, items, employee_id)
+    for item in items:
+        inv_id = item.get('inventory_id')
+        qty = item.get('quantity', 0)
+        branch_id = item.get('branch_id')
+        if not inv_id or qty <= 0:
+            continue
+        op_id = record_operation(
+            conn, inv_id, branch_id, 'SALE',
+            quantity=-qty,
+            reference_id=sale_id or quotation_id,
+            reference_type='sale' if sale_id else 'quotation',
+            notes=f'Venta convertida de cotizacion #{quotation_id}'
+        )
+        op_ids.append(op_id)
+    return op_ids
+
+
+def get_quotation_items_for_reservation(conn, quotation_id):
+    """Fetch items from a quotation joined with inventory to get branch_id.
+
+    Returns list of dicts: {inventory_id, quantity, branch_id}
+    """
+    cur = conn.cursor()
+    cur.execute("""
+        SELECT qi.inventory_id, qi.quantity, i.branch_id
+        FROM quotation_items qi
+        JOIN inventory i ON i.id = qi.inventory_id
+        WHERE qi.quotation_id = %s
+    """, (quotation_id,))
+    rows = cur.fetchall()
+    cur.close()
+    return [
+        {'inventory_id': r[0], 'quantity': r[1], 'branch_id': r[2]}
+        for r in rows
+    ]
--- a/pos/services/qwen_fitment.py
+++ b/pos/services/qwen_fitment.py
@@ -38,11 +38,11 @@ def get_vehicle_fitment(part_number, name, brand):
                json={
                    'model': QWEN_MODEL,
                    'messages': [
-                        {'role': 'system', 'content': 'Eres un experto en autopartes mexicanas. Devuelve SIEMPRE JSON valido sin markdown.'},
+                        {'role': 'system', 'content': 'Eres un experto en autopartes mexicanas y del mercado aftermarket norteamericano. Devuelve SIEMPRE JSON valido sin markdown.'},
                        {'role': 'user', 'content': prompt}
                    ],
                    'temperature': 0.2,
-                    'max_tokens': 2048,
+                    'max_tokens': 4096,
                },
                timeout=45,
            )
@@ -86,29 +86,37 @@ def get_vehicle_fitment(part_number, name, brand):

 def _build_prompt(part_number, name, brand):
    brand_str = brand or 'desconocida'
-    return f"""Dado el siguiente repuesto automotriz:
+    return f"""Dado el siguiente repuesto automotriz para el mercado mexicano y aftermarket norteamericano:
 - Numero de parte: {part_number}
- Nombre: {name}
- Marca del vehiculo: {brand_str}
+- Nombre/descripcion: {name}
+- Marca del fabricante: {brand_str}

-Devuelve UNICAMENTE un JSON valido (sin markdown, sin backticks) con esta estructura exacta:
+Devuelve UNICAMENTE un JSON valido (sin markdown, sin backticks, sin texto adicional) con esta estructura exacta:
 {{
  "vehicles": [
-    {{"make": "Toyota", "model": "Corolla", "year": 2015, "engine": "1.8L 16V"}},
-    {{"make": "Toyota", "model": "Matrix", "year": 2014, "engine": "1.8L"}}
+    {{
+      "make": "Toyota",
+      "model": "Corolla",
+      "year": 2015,
+      "engine": "1.8L 16V",
+      "engine_code": "2ZR-FE",
+      "notes": "Sedan y hatchback"
+    }}
  ],
  "confidence": 0.92,
-  "notes": "Compatible con motor 2ZR-FE"
+  "notes": "Compatible con plataforma E170. Verificar traccion delantera."
 }}

-Reglas:
-1. "make" es la marca del vehiculo (ej: Toyota, Nissan, Ford, Volkswagen).
-2. "model" es el modelo exacto.
-3. "year" es el ano numerico (int). Si hay rango de anos, usa el ano inicial.
-4. "engine" es la descripcion del motor (ej: "1.8L", "2.0L TDI", "V6 3.5L").
-5. Devuelve TODOS los vehiculos compatibles que conozcas. Minimo 1, maximo 30.
-6. Si no conoces el motor exacto, usa "desconocido".
-7. confidence entre 0.0 y 1.0.
+Reglas obligatorias:
+1. "make" = marca del vehiculo (ej: Toyota, Nissan, Ford, Volkswagen, Chevrolet, Honda, Hyundai, Kia, Mazda, Subaru).
+2. "model" = modelo exacto. Si hay variantes (ej: Civic Sedan vs Civic Coupe), incluye la variante.
+3. "year" = ano numerico (int). Si hay rango de anos (ej: 2003-2008), genera una entrada POR CADA ANO del rango. NO uses rangos.
+4. "engine" = descripcion del motor (ej: "1.8L", "2.0L TDI", "V6 3.5L", "1.6L Turbo"). Si no conoces el motor, usa "desconocido".
+5. "engine_code" = codigo exacto del motor SI LO CONOCES (ej: "2ZR-FE", "K24Z7", "EA888"). Si no lo conoces, usa "" (string vacio).
+6. Devuelve TODOS los vehiculos compatibles que conozcas. Minimo 1, maximo 100. Para piezas genericas (bujias, filtros, balatas, amortiguadores) incluye TODOS los modelos aplicables.
+7. "confidence" entre 0.0 y 1.0. Usa valores altos (>0.85) solo si estas muy seguro.
+8. Incluye marcas y modelos populares en Mexico (Nissan Tsuru, VW Sedan/Vocho, Chevy Monza, Ford Ka, etc.) cuando apliquen.
+9. Si la pieza es universal o de alta compatibilidad, indicalo en "notes".
 """


@@ -150,6 +158,7 @@ def _normalize_vehicle(v):
    model = v.get('model') or v.get('modelo') or ''
    year_raw = v.get('year') or v.get('ano') or v.get('año') or v.get('years') or v.get('anos') or ''
    engine = v.get('engine') or v.get('motor') or ''
+    engine_code = v.get('engine_code') or v.get('codigo_motor') or v.get('motor_code') or ''

    # Parse year (may be int, string, or range like "2003-2008")
    years = []
@@ -167,11 +176,31 @@ def _normalize_vehicle(v):
            if m2:
                years = [int(m2.group(1))]

-    return make, model, years, engine
+    return make, model, years, engine, engine_code
+
+
+def _extract_displacement(engine):
+    """Extract numeric displacement (L) from engine string, e.g. '1.8L 16V' -> 1.8."""
+    if not engine or engine.lower() == 'desconocido':
+        return None
+    # Match patterns like 1.8L, 2.0L, 3.5L, 1.6, etc.
+    match = re.search(r'(\d+\.?\d*)\s*[Ll]', engine)
+    if match:
+        try:
+            return float(match.group(1))
+        except ValueError:
+            return None
+    return None


 def _validate_vehicles(vehicles):
-    """Look up each vehicle in master DB and enrich with mye_id."""
+    """Look up each vehicle in master DB and enrich with mye_id.
+
+    Validation strategy (in order of preference):
+    1. Exact engine_code match (most precise)
+    2. Displacement-based match (e.g. all 1.8L engines for that make/model/year)
+    3. Broad make/model/year match (all engines for that make/model/year)
+    """
    from tenant_db import get_master_conn
    try:
        master = get_master_conn()
@@ -183,30 +212,66 @@ def _validate_vehicles(vehicles):
    seen_mye = set()

    for v in vehicles:
-        make, model, years, engine = _normalize_vehicle(v)
+        make, model, years, engine, engine_code = _normalize_vehicle(v)
        if not make or not model or not years:
            continue

        for year in years:
-            # First try with exact engine match; if no result, fall back to
-            # make/model/year only.  Engine descriptions rarely line up between
-            # QWEN and the master DB, so the fallback is the common path.
-            cur.execute("""
-                SELECT mye.id_mye
-                FROM model_year_engine mye
-                JOIN models m ON mye.model_id = m.id_model
-                JOIN brands b ON m.brand_id = b.id_brand
-                JOIN years y ON mye.year_id = y.id_year
-                JOIN engines e ON mye.engine_id = e.id_engine
-                WHERE b.name_brand ILIKE %s
-                  AND m.name_model ILIKE %s
-                  AND y.year_car = %s
-                  AND e.name_engine ILIKE %s
-                LIMIT 1
-            """, (make, f'%{model}%', year, engine or '%'))
-            row = cur.fetchone()
+            matched_myes = []

-            if not row:
+            # Strategy 1: engine_code match (most precise)
+            if engine_code:
+                cur.execute("""
+                    SELECT mye.id_mye
+                    FROM model_year_engine mye
+                    JOIN models m ON mye.model_id = m.id_model
+                    JOIN brands b ON m.brand_id = b.id_brand
+                    JOIN years y ON mye.year_id = y.id_year
+                    JOIN engines e ON mye.engine_id = e.id_engine
+                    WHERE b.name_brand ILIKE %s
+                      AND m.name_model ILIKE %s
+                      AND y.year_car = %s
+                      AND e.engine_code ILIKE %s
+                """, (make, f'%{model}%', year, f'%{engine_code}%'))
+                matched_myes = [r[0] for r in cur.fetchall()]
+
+            # Strategy 2: displacement-based match
+            if not matched_myes:
+                disp = _extract_displacement(engine)
+                if disp is not None:
+                    disp_pattern = f'{disp:.1f}L'
+                    cur.execute("""
+                        SELECT mye.id_mye
+                        FROM model_year_engine mye
+                        JOIN models m ON mye.model_id = m.id_model
+                        JOIN brands b ON m.brand_id = b.id_brand
+                        JOIN years y ON mye.year_id = y.id_year
+                        JOIN engines e ON mye.engine_id = e.id_engine
+                        WHERE b.name_brand ILIKE %s
+                          AND m.name_model ILIKE %s
+                          AND y.year_car = %s
+                          AND e.name_engine ILIKE %s
+                    """, (make, f'%{model}%', year, f'%{disp_pattern}%'))
+                    matched_myes = [r[0] for r in cur.fetchall()]
+
+            # Strategy 3: exact engine string match (legacy)
+            if not matched_myes and engine and engine.lower() != 'desconocido':
+                cur.execute("""
+                    SELECT mye.id_mye
+                    FROM model_year_engine mye
+                    JOIN models m ON mye.model_id = m.id_model
+                    JOIN brands b ON m.brand_id = b.id_brand
+                    JOIN years y ON mye.year_id = y.id_year
+                    JOIN engines e ON mye.engine_id = e.id_engine
+                    WHERE b.name_brand ILIKE %s
+                      AND m.name_model ILIKE %s
+                      AND y.year_car = %s
+                      AND e.name_engine ILIKE %s
+                """, (make, f'%{model}%', year, engine))
+                matched_myes = [r[0] for r in cur.fetchall()]
+
+            # Strategy 4: broad make/model/year fallback (all engines)
+            if not matched_myes:
                cur.execute("""
                    SELECT mye.id_mye
                    FROM model_year_engine mye
@@ -216,19 +281,21 @@ def _validate_vehicles(vehicles):
                    WHERE b.name_brand ILIKE %s
                      AND m.name_model ILIKE %s
                      AND y.year_car = %s
-                    LIMIT 1
                """, (make, f'%{model}%', year))
-                row = cur.fetchone()
+                matched_myes = [r[0] for r in cur.fetchall()]

-            if row and row[0] not in seen_mye:
-                seen_mye.add(row[0])
-                validated.append({
-                    'make': make,
-                    'model': model,
-                    'year': year,
-                    'engine': engine,
-                    'mye_id': row[0],
-                })
+            # Deduplicate and add to results
+            for mye_id in matched_myes:
+                if mye_id not in seen_mye:
+                    seen_mye.add(mye_id)
+                    validated.append({
+                        'make': make,
+                        'model': model,
+                        'year': year,
+                        'engine': engine,
+                        'engine_code': engine_code,
+                        'mye_id': mye_id,
+                    })

    cur.close()
    master.close()
--- a/pos/services/wa_quotation.py
+++ b/pos/services/wa_quotation.py
@@ -109,11 +109,30 @@ def confirm_quotation(tenant_conn, phone):
    return qid


-# ─── In-memory last-shown-part per phone ─────────────────────────────
+# ─── Persistent last-shown-part per phone ────────────────────────────
 # Tracks what part the bot last showed so "cotizar" knows what to add.
-# Key: phone (clean, no @lid). Value: dict with inventory item info.
+# Stored in tenant DB table whatsapp_sessions so it survives restarts.

-_last_shown = {}
+_WHATSAPP_SESSIONS_SQL = """
+CREATE TABLE IF NOT EXISTS whatsapp_sessions (
+    phone VARCHAR(50) PRIMARY KEY,
+    last_shown JSONB,
+    vehicle JSONB,
+    updated_at TIMESTAMP DEFAULT NOW()
+);
+"""
+
+
+def _ensure_sessions_table(tenant_conn):
+    cur = tenant_conn.cursor()
+    cur.execute(_WHATSAPP_SESSIONS_SQL)
+    # Migrate: add vehicle column if table already existed without it
+    cur.execute("""
+        ALTER TABLE whatsapp_sessions
+        ADD COLUMN IF NOT EXISTS vehicle JSONB
+    """)
+    tenant_conn.commit()
+    cur.close()


 def set_last_shown_part(phone, part_info):
@@ -122,15 +141,110 @@ def set_last_shown_part(phone, part_info):
    part_info: dict with keys inventory_id, part_number, name, brand,
    price, stock, unit
    """
-    _last_shown[phone] = part_info
+    # In-memory fallback for when tenant_conn is not available
+    from tenant_db import get_tenant_conn
+    try:
+        conn = get_tenant_conn(11)
+        _ensure_sessions_table(conn)
+        cur = conn.cursor()
+        import json
+        cur.execute("""
+            INSERT INTO whatsapp_sessions (phone, last_shown, updated_at)
+            VALUES (%s, %s, NOW())
+            ON CONFLICT (phone) DO UPDATE SET last_shown = EXCLUDED.last_shown, updated_at = NOW()
+        """, (phone, json.dumps(part_info)))
+        conn.commit()
+        cur.close()
+        conn.close()
+    except Exception as e:
+        print(f"[WA-SESSION] Failed to persist last_shown for {phone}: {e}")


 def get_last_shown_part(phone):
-    return _last_shown.get(phone)
+    from tenant_db import get_tenant_conn
+    try:
+        conn = get_tenant_conn(11)
+        _ensure_sessions_table(conn)
+        cur = conn.cursor()
+        cur.execute("SELECT last_shown FROM whatsapp_sessions WHERE phone = %s", (phone,))
+        row = cur.fetchone()
+        cur.close()
+        conn.close()
+        if row and row[0]:
+            return row[0]
+    except Exception as e:
+        print(f"[WA-SESSION] Failed to read last_shown for {phone}: {e}")
+    return None


 def clear_last_shown(phone):
-    _last_shown.pop(phone, None)
+    from tenant_db import get_tenant_conn
+    try:
+        conn = get_tenant_conn(11)
+        _ensure_sessions_table(conn)
+        cur = conn.cursor()
+        cur.execute("DELETE FROM whatsapp_sessions WHERE phone = %s", (phone,))
+        conn.commit()
+        cur.close()
+        conn.close()
+    except Exception as e:
+        print(f"[WA-SESSION] Failed to clear last_shown for {phone}: {e}")
+
+
+def set_vehicle(phone, vehicle):
+    """Store the detected vehicle for this phone number.
+
+    vehicle: dict with keys brand, model, year
+    """
+    from tenant_db import get_tenant_conn
+    try:
+        conn = get_tenant_conn(11)
+        _ensure_sessions_table(conn)
+        cur = conn.cursor()
+        import json
+        cur.execute("""
+            INSERT INTO whatsapp_sessions (phone, vehicle, updated_at)
+            VALUES (%s, %s, NOW())
+            ON CONFLICT (phone) DO UPDATE SET vehicle = EXCLUDED.vehicle, updated_at = NOW()
+        """, (phone, json.dumps(vehicle)))
+        conn.commit()
+        cur.close()
+        conn.close()
+    except Exception as e:
+        print(f"[WA-SESSION] Failed to persist vehicle for {phone}: {e}")
+
+
+def get_vehicle(phone):
+    """Retrieve the stored vehicle for this phone number."""
+    from tenant_db import get_tenant_conn
+    try:
+        conn = get_tenant_conn(11)
+        _ensure_sessions_table(conn)
+        cur = conn.cursor()
+        cur.execute("SELECT vehicle FROM whatsapp_sessions WHERE phone = %s", (phone,))
+        row = cur.fetchone()
+        cur.close()
+        conn.close()
+        if row and row[0]:
+            return row[0]
+    except Exception as e:
+        print(f"[WA-SESSION] Failed to read vehicle for {phone}: {e}")
+    return None
+
+
+def clear_session(phone):
+    """Clear all session data (last_shown + vehicle) for this phone."""
+    from tenant_db import get_tenant_conn
+    try:
+        conn = get_tenant_conn(11)
+        _ensure_sessions_table(conn)
+        cur = conn.cursor()
+        cur.execute("DELETE FROM whatsapp_sessions WHERE phone = %s", (phone,))
+        conn.commit()
+        cur.close()
+        conn.close()
+    except Exception as e:
+        print(f"[WA-SESSION] Failed to clear session for {phone}: {e}")


 # ─── Quotation CRUD ─────────────────────────────────────────────────