feat(whatsapp): QWEN primary AI backend, Hermes fallback, conversation history, vehicle persistence, demo prompts

- Add QWEN (qwen3.6) as primary AI backend with short system prompt - Hermes remains as fallback with 45s timeout - Increase QWEN timeout to 35s, max_tokens to 4000 - Add conversation history loading from whatsapp_messages (last 4 msgs) - Persist detected vehicle in whatsapp_sessions table - Add 'limpiar chat' / 'nuevo chat' / 'reset' commands to clear history - Fix CSS conflict: rename whatsapp chat-panel classes to wa-chat-panel - Fix JS ID conflicts with chat.js widget (waChatPanel, waChatMessages, etc.) - Improve no-stock response: conversational with alternatives - Split search_query by | for multi-part lookups - Add DEMO_PROMPTS.md and DEMO_PROMPTS_V2.md
2026-05-06 20:27:14 +00:00
parent 371d72887e
commit ff45905b49
33 changed files with 3040 additions and 445 deletions
--- a/pos/blueprints/whatsapp_bp.py
+++ b/pos/blueprints/whatsapp_bp.py
@@ -13,15 +13,92 @@ Endpoints:

 from flask import Blueprint, request, jsonify, g
 from middleware import require_auth
-from tenant_db import get_tenant_conn
+from tenant_db import get_tenant_conn, get_master_conn
 from services import whatsapp_service

 whatsapp_bp = Blueprint('whatsapp', __name__, url_prefix='/pos/api/whatsapp')


-def _enrich_wa_reply_with_part(search_query, vehicle, tenant_conn):
+def _resolve_mye_ids(vehicle, master_conn):
+    """Return list of MYE ids matching vehicle brand/model/year text."""
+    if not master_conn or not vehicle:
+        return []
+    brand = vehicle.get('brand', '').strip()
+    model = vehicle.get('model', '').strip()
+    year = str(vehicle.get('year', '')).strip()
+    if not brand and not model:
+        return []
+    cur = master_conn.cursor()
+    clauses = []
+    params = []
+    if brand:
+        clauses.append("b.name_brand ILIKE %s")
+        params.append(f'%{brand}%')
+    if model:
+        clauses.append("m.name_model ILIKE %s")
+        params.append(f'%{model}%')
+    if year and year.isdigit():
+        clauses.append("y.year_car = %s")
+        params.append(int(year))
+    if not clauses:
+        cur.close()
+        return []
+    cur.execute(f"""
+        SELECT mye.id_mye
+        FROM model_year_engine mye
+        JOIN models m ON m.id_model = mye.model_id
+        JOIN brands b ON b.id_brand = m.brand_id
+        JOIN years y ON y.id_year = mye.year_id
+        WHERE {' AND '.join(clauses)}
+        LIMIT 50
+    """, tuple(params))
+    rows = cur.fetchall()
+    cur.close()
+    return [r[0] for r in rows]
+
+
+def _get_conversation_history(phone, tenant_conn, limit=4):
+    """Fetch recent messages for *phone* to give the AI conversation context.
+
+    Includes both user and assistant messages, truncated to keep token count low.
+    The most recent message (the one currently being processed) is excluded.
+    """
+    if not tenant_conn or not phone:
+        return []
+    try:
+        cur = tenant_conn.cursor()
+        cur.execute("""
+            SELECT direction, message_text
+            FROM whatsapp_messages
+            WHERE phone = %s
+            ORDER BY created_at DESC
+            LIMIT %s OFFSET 1
+        """, (phone, limit))
+        rows = cur.fetchall()
+        cur.close()
+        # Reverse so oldest-first (chronological) for the LLM
+        history = []
+        for direction, text in reversed(rows):
+            if not text:
+                continue
+            role = "assistant" if direction == "outgoing" else "user"
+            # Truncate assistant replies more aggressively (they contain JSON/tables)
+            max_len = 200 if role == "assistant" else 300
+            truncated = text[:max_len] + ('...' if len(text) > max_len else '')
+            history.append({"role": role, "content": truncated})
+        return history
+    except Exception as e:
+        print(f"[WA-AI] Failed to load conversation history: {e}")
+        return []
+
+
+def _enrich_wa_reply_with_part(search_query, vehicle, tenant_conn, master_conn=None):
    """Search the refaccionaria's LOCAL inventory and build a WhatsApp reply.

+    If *vehicle* is provided and we have a master_conn, we first look up the
+    MYE ids for that vehicle and JOIN through inventory_vehicle_compat so we
+    only show parts that are known to fit the user's car.
+
    Returns:
        (formatted_text, first_part_dict) — first_part_dict is used by the
        quotation system to know what to add when the user says "cotizar".
@@ -31,101 +108,143 @@ def _enrich_wa_reply_with_part(search_query, vehicle, tenant_conn):
        return None, None

    try:
-        # Translate common English search terms to Spanish for local inventory
-        # (the AI sends search_query in English, but local inventory names
-        # are often in Spanish)
        from services.translations import PART_TRANSLATIONS
-        search_terms = [search_query]
-        # Add the Spanish translation if we have one
-        for en, es in PART_TRANSLATIONS.items():
-            if en.upper() in search_query.upper():
-                search_terms.append(es)
-                break

-        # Build ILIKE conditions for all search terms
-        conditions = []
-        params = []
-        for term in search_terms:
-            conditions.append("(i.name ILIKE %s OR i.part_number ILIKE %s OR i.brand ILIKE %s)")
-            like = f'%{term}%'
-            params.extend([like, like, like])
+        # Split search_query by '|' into individual terms
+        raw_terms = [t.strip() for t in (search_query or '').split('|') if t.strip()]
+        if not raw_terms:
+            raw_terms = [search_query] if search_query else []

-        where_search = ' OR '.join(conditions)
+        # Translate each term to Spanish if possible
+        search_terms = set()
+        for term in raw_terms:
+            search_terms.add(term)
+            # Check if any English translation matches
+            for en, es in PART_TRANSLATIONS.items():
+                if en.upper() == term.upper():
+                    search_terms.add(es)
+                    break
+                # Also check if the term contains an English word
+                if en.upper() in term.upper():
+                    search_terms.add(term.upper().replace(en.upper(), es))

-        cur = tenant_conn.cursor()
-        cur.execute(f"""
-            SELECT i.part_number, i.name, i.brand, i.price_1, i.price_2, i.price_3,
-                   COALESCE(s.stock, 0) AS stock,
-                   i.unit, i.location
-            FROM inventory i
-            LEFT JOIN (
-                SELECT inventory_id, SUM(quantity) AS stock
-                FROM inventory_operations
-                GROUP BY inventory_id
-            ) s ON s.inventory_id = i.id
-            WHERE i.is_active = TRUE
-              AND ({where_search})
-            ORDER BY
-                COALESCE(s.stock, 0) > 0 DESC,
-                i.name
-            LIMIT 10
-        """, params)
+        search_terms = list(search_terms)
+        if not search_terms:
+            return None, None

-        rows = cur.fetchall()
-        cur.close()
+        # Vehicle-aware filtering
+        mye_ids = _resolve_mye_ids(vehicle, master_conn)

-        if not rows:
-            return ('❌ No tenemos esa parte en inventario actualmente.\n'
-                    '_Puedes preguntar por otra parte o visitarnos en tienda._'), None
+        def _do_search(use_compat=True):
+            """Run inventory search. Returns list of rows."""
+            conditions = []
+            params = []
+            for term in search_terms:
+                conditions.append("(i.name ILIKE %s OR i.part_number ILIKE %s OR i.brand ILIKE %s)")
+                like = f'%{term}%'
+                params.extend([like, like, like])

-        # Split into in-stock and out-of-stock
-        in_stock = [r for r in rows if r[6] > 0]
-        out_stock = [r for r in rows if r[6] <= 0]
+            where_search = ' OR '.join(conditions)
+            compat_clause = ""
+            if use_compat and mye_ids:
+                compat_clause = f"AND i.id IN (SELECT inventory_id FROM inventory_vehicle_compat WHERE model_year_engine_id IN ({','.join(['%s']*len(mye_ids))}))"
+                params.extend(mye_ids)
+
+            cur = tenant_conn.cursor()
+            cur.execute(f"""
+                SELECT i.id, i.part_number, i.name, i.brand, i.price_1, i.price_2, i.price_3,
+                       COALESCE(s.stock, 0) AS stock,
+                       i.unit, i.location
+                FROM inventory i
+                LEFT JOIN inventory_stock_summary s ON s.inventory_id = i.id
+                WHERE i.is_active = TRUE
+                  AND ({where_search})
+                  {compat_clause}
+                ORDER BY
+                    COALESCE(s.stock, 0) > 0 DESC,
+                    i.name
+                LIMIT 10
+            """, params)
+            rows = cur.fetchall()
+            cur.close()
+            return rows
+
+        # 1. Try with vehicle compatibility filter
+        rows = _do_search(use_compat=True)
+        compat_filter_applied = bool(mye_ids)
+
+        # 2. If no results with compatibility, try WITHOUT filter
+        fallback_rows = []
+        if not rows and mye_ids:
+            fallback_rows = _do_search(use_compat=False)
+
+        if not rows and not fallback_rows:
+            # Truly nothing found — return a conversational message that doesn't kill the chat
+            v_str = ""
+            if vehicle and vehicle.get('brand'):
+                v_str = f"{vehicle.get('brand','')} {vehicle.get('model','')} {vehicle.get('year','')}".strip()
+
+            msg_parts = [
+                "🔍 Revisé nuestro inventario y no encontré esas partes en este momento."
+            ]
+            if v_str:
+                msg_parts.append(f"Para tu {v_str}, puedo:")
+            else:
+                msg_parts.append("Te puedo ayudar de estas formas:")
+            msg_parts.extend([
+                "",
+                "• *Pedirlas por encargo* — te doy tiempo y precio estimado",
+                "• *Buscar alternativas* — equivalentes de otra marca que sí tengamos",
+                "• *Sugerir refaccionarias cercanas* — si es urgente",
+                "",
+                "¿Qué prefieres? O dime si quieres buscar otra parte."
+            ])
+            return '\n'.join(msg_parts), None
+
+        # Use fallback rows if primary search returned nothing
+        using_fallback = False
+        if not rows and fallback_rows:
+            rows = fallback_rows
+            using_fallback = True
+
+        in_stock = [r for r in rows if r[7] > 0]
+        out_stock = [r for r in rows if r[7] <= 0]

-        # Build the first-part dict for quotation tracking
-        # Use the first in-stock part, or first out-of-stock if none available
        best = in_stock[0] if in_stock else (out_stock[0] if out_stock else None)
        first_part = None
        if best:
            first_part = {
-                'inventory_id': None,  # we'd need the id — fetch it
-                'part_number': best[0],
-                'name': best[1],
-                'brand': best[2] or '',
-                'price': float(best[3]) if best[3] else 0,
+                'inventory_id': best[0],
+                'part_number': best[1],
+                'name': best[2],
+                'brand': best[3] or '',
+                'price': float(best[4]) if best[4] else 0,
                'tax_rate': 0.16,
-                'stock': best[6],
-                'unit': best[7] or 'PZA',
+                'stock': best[7],
+                'unit': best[8] or 'PZA',
            }
-            # Fetch the inventory ID for the quotation item FK
-            try:
-                cur2 = tenant_conn.cursor()
-                cur2.execute("SELECT id FROM inventory WHERE part_number = %s AND is_active = TRUE LIMIT 1",
-                             (best[0],))
-                inv_row = cur2.fetchone()
-                if inv_row:
-                    first_part['inventory_id'] = inv_row[0]
-                cur2.close()
-            except Exception:
-                pass

        lines = []

+        if using_fallback:
+            lines.append("⚠️ *No encontré partes verificadas para tu vehículo, pero sí tengo estas opciones generales:*")
+            lines.append("")
+
        if in_stock:
            lines.append('✅ *Tenemos en stock:*')
            lines.append('')
            for r in in_stock:
-                part_num, name, brand, p1, p2, p3, stock, unit, location = r
+                inv_id, part_num, name, brand, p1, p2, p3, stock, unit, location = r
                brand_str = f'*{brand}*' if brand else ''
                price_str = f'${float(p1):,.2f}' if p1 else 'Consultar precio'
                lines.append(f'  • {brand_str} {name}')
                lines.append(f'    #{part_num} — {price_str} ({stock} {unit or "pzas"} disponibles)')
                lines.append('')
-        else:
+        elif out_stock:
            lines.append('⚠️ *Tenemos estas opciones pero sin stock actualmente:*')
            lines.append('')
            for r in out_stock[:5]:
-                part_num, name, brand, p1, p2, p3, stock, unit, location = r
+                inv_id, part_num, name, brand, p1, p2, p3, stock, unit, location = r
                brand_str = f'*{brand}*' if brand else ''
                price_str = f'${float(p1):,.2f}' if p1 else ''
                lines.append(f'  • {brand_str} {name} #{part_num} {price_str}')
@@ -143,6 +262,9 @@ def _enrich_wa_reply_with_part(search_query, vehicle, tenant_conn):

    except Exception as e:
        print(f"[WA-AI] Enrichment error: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, None
        return None, None


@@ -194,9 +316,11 @@ def webhook():
    # TODO: resolve tenant from phone number when multi-tenant WhatsApp arrives.
    tenant_id = 11
    tenant_conn = None
+    master_conn = None
    inventory_context = None
    try:
        tenant_conn = get_tenant_conn(tenant_id)
+        master_conn = get_master_conn()

        # 1. Log the incoming message (with contact display name)
        cur = tenant_conn.cursor()
@@ -216,6 +340,22 @@ def webhook():
        except Exception as e:
            print(f"[WA-AI] inventory_context failed: {e}")
            inventory_context = None
+
+        # 2b. Append previously-detected vehicle so the AI keeps context
+        # even when we don't send full conversation history (Hermes is slow with it)
+        try:
+            from services.wa_quotation import get_vehicle
+            saved_vehicle = get_vehicle(clean_phone)
+            if saved_vehicle and inventory_context:
+                v_str = f"{saved_vehicle.get('brand','')} {saved_vehicle.get('model','')} {saved_vehicle.get('year','')}".strip()
+                if v_str:
+                    inventory_context += f"\n\nVEHICULO DEL CLIENTE: {v_str}"
+            elif saved_vehicle:
+                v_str = f"{saved_vehicle.get('brand','')} {saved_vehicle.get('model','')} {saved_vehicle.get('year','')}".strip()
+                if v_str:
+                    inventory_context = f"VEHICULO DEL CLIENTE: {v_str}"
+        except Exception as e:
+            print(f"[WA-AI] vehicle_context failed: {e}")
    except Exception as e:
        print(f"[WA-AI] tenant connection failed: {e}")

@@ -281,6 +421,33 @@ def webhook():
                else:
                    reply = '⚠️ No tienes una cotización abierta para confirmar.'

+        # ── Check for conversation reset commands ──
+        if media_kind == 'text' and msg.get('text'):
+            txt_lower = msg['text'].lower().strip()
+            if txt_lower in ('limpiar chat', 'nuevo chat', 'borrar conversacion', 'borrar conversación', 'reset', 'reiniciar'):
+                if tenant_conn:
+                    try:
+                        cur_del = tenant_conn.cursor()
+                        cur_del.execute("DELETE FROM whatsapp_messages WHERE phone = %s", (clean_phone,))
+                        tenant_conn.commit()
+                        cur_del.close()
+                    except Exception as del_err:
+                        print(f"[WA-AI] Failed to clear conversation history: {del_err}")
+                reply = '🗑️ *Conversación reiniciada.*\n\n¡Hola de nuevo! ¿En qué puedo ayudarte?'
+                result = whatsapp_service.send_message(reply_to, reply)
+                if tenant_conn:
+                    try:
+                        cur_save = tenant_conn.cursor()
+                        cur_save.execute("INSERT INTO whatsapp_messages (phone, direction, message_text) VALUES (%s, 'outgoing', %s)", (clean_phone, reply))
+                        tenant_conn.commit()
+                        cur_save.close()
+                    except Exception:
+                        pass
+                if tenant_conn:
+                    try: tenant_conn.close()
+                    except Exception: pass
+                return jsonify({'ok': True})
+
        if intent is not None:
            # It was a quote command — send reply and skip the AI
            if reply:
@@ -299,6 +466,13 @@ def webhook():
                except Exception: pass
            return jsonify({'ok': True})

+    # Load conversation history so the AI remembers context (vehicle, parts, etc.)
+    conversation_history = []
+    if tenant_conn:
+        conversation_history = _get_conversation_history(clean_phone, tenant_conn, limit=2)
+        if conversation_history:
+            print(f"[WA-AI] Loaded {len(conversation_history)} history messages for {clean_phone}")
+
    try:
        if media_kind == 'image' and msg.get('media_base64'):
            from services.ai_chat import chat_with_image
@@ -308,6 +482,7 @@ def webhook():
            ai_resp = chat_with_image(
                user_message=prompt,
                image_base64=msg['media_base64'],
+                conversation_history=conversation_history,
                inventory_context=inventory_context,
            )
            reply = ai_resp.get('message', '') or ''
@@ -332,7 +507,7 @@ def webhook():
            if transcript:
                print(f"[WA-AI] Voice note transcribed: {transcript[:100]}")
                from services.ai_chat import chat
-                ai_resp = chat(transcript, inventory_context=inventory_context)
+                ai_resp = chat(transcript, conversation_history=conversation_history, inventory_context=inventory_context)
                reply = ai_resp.get('message', '') or ''
                # Prefix the reply so the sender knows we understood the voice note
                if reply:
@@ -344,16 +519,25 @@ def webhook():
        elif msg.get('text'):
            # Plain text message — standard chatbot flow
            from services.ai_chat import chat
-            ai_resp = chat(msg['text'], inventory_context=inventory_context)
+            ai_resp = chat(msg['text'], conversation_history=conversation_history, inventory_context=inventory_context)
            reply = ai_resp.get('message', '') or ''

            # Enrich: if the AI returned a search_query, look up real parts
            # from the catalog and append them to the WhatsApp reply.
            search_q = ai_resp.get('search_query')
            vehicle = ai_resp.get('vehicle')
+
+            # Persist detected vehicle so we don't lose context between messages
+            if vehicle and isinstance(vehicle, dict) and vehicle.get('brand'):
+                try:
+                    from services.wa_quotation import set_vehicle
+                    set_vehicle(clean_phone, vehicle)
+                except Exception as veh_err:
+                    print(f"[WA-AI] Failed to save vehicle: {veh_err}")
+
            if search_q and reply:
                try:
-                    enrichment, found_part = _enrich_wa_reply_with_part(search_q, vehicle, tenant_conn)
+                    enrichment, found_part = _enrich_wa_reply_with_part(search_q, vehicle, tenant_conn, master_conn)
                    if enrichment:
                        reply = reply + '\n\n' + enrichment
                    # Track the found part so "cotizar" can add it
@@ -384,12 +568,17 @@ def webhook():
    except Exception as e:
        print(f"[WA-AI] Error handling {media_kind} from {reply_to}: {e}")

-    # 4. Clean up the connection
+    # 4. Clean up connections
    if tenant_conn is not None:
        try:
            tenant_conn.close()
        except Exception:
            pass
+    if master_conn is not None:
+        try:
+            master_conn.close()
+        except Exception:
+            pass

    return jsonify({'ok': True})