feat(whatsapp): QWEN primary AI backend, Hermes fallback, conversation history, vehicle persistence, demo prompts
- Add QWEN (qwen3.6) as primary AI backend with short system prompt - Hermes remains as fallback with 45s timeout - Increase QWEN timeout to 35s, max_tokens to 4000 - Add conversation history loading from whatsapp_messages (last 4 msgs) - Persist detected vehicle in whatsapp_sessions table - Add 'limpiar chat' / 'nuevo chat' / 'reset' commands to clear history - Fix CSS conflict: rename whatsapp chat-panel classes to wa-chat-panel - Fix JS ID conflicts with chat.js widget (waChatPanel, waChatMessages, etc.) - Improve no-stock response: conversational with alternatives - Split search_query by | for multi-part lookups - Add DEMO_PROMPTS.md and DEMO_PROMPTS_V2.md
This commit is contained in:
@@ -3,9 +3,15 @@
|
||||
|
||||
import requests
|
||||
import json
|
||||
from config import OPENROUTER_API_KEY
|
||||
from config import OPENROUTER_API_KEY, HERMES_API_URL, HERMES_API_KEY
|
||||
from config import QWEN_API_URL, QWEN_API_KEY, QWEN_MODEL
|
||||
|
||||
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
||||
HERMES_ENABLED = bool(HERMES_API_KEY and HERMES_API_URL)
|
||||
HERMES_CHAT_URL = (HERMES_API_URL.rstrip('/') + '/chat/completions') if HERMES_API_URL else None
|
||||
|
||||
QWEN_ENABLED = bool(QWEN_API_KEY and QWEN_API_URL)
|
||||
QWEN_CHAT_URL = (QWEN_API_URL.rstrip('/') + '/chat/completions') if QWEN_API_URL else None
|
||||
|
||||
# ⚠️ SOLO MODELOS GRATUITOS — No cambiar a modelos de pago.
|
||||
# El modelo DEBE terminar en ":free" para garantizar costo $0.
|
||||
@@ -24,11 +30,69 @@ FALLBACK_MODELS = [
|
||||
"meta-llama/llama-3.3-70b-instruct:free", # Meta — último fallback
|
||||
]
|
||||
|
||||
# Hermes Agent model (OpenAI-compatible API server)
|
||||
HERMES_MODEL = "hermes-agent"
|
||||
|
||||
def _validate_model(model_id):
|
||||
"""Ensure only free models are used. Raises if model is not free."""
|
||||
"""Ensure only free models are used. Raises if model is not free.
|
||||
|
||||
Skips validation for Hermes Agent and QWEN models (self-hosted / private API).
|
||||
"""
|
||||
if model_id == HERMES_MODEL:
|
||||
return
|
||||
if model_id == QWEN_MODEL:
|
||||
return
|
||||
if not model_id.endswith(':free'):
|
||||
raise ValueError(f"BLOQUEADO: Solo se permiten modelos gratuitos (:free). Modelo '{model_id}' no es gratuito.")
|
||||
|
||||
|
||||
def _post_chat_completion(url, api_key, model_id, messages, max_tokens=800, temperature=0.3, timeout=25):
|
||||
"""Generic OpenAI-compatible chat completion POST.
|
||||
|
||||
Returns the parsed response dict on success, None on failure.
|
||||
"""
|
||||
try:
|
||||
resp = requests.post(
|
||||
url,
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": model_id,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature,
|
||||
},
|
||||
timeout=timeout,
|
||||
)
|
||||
if resp.status_code == 429:
|
||||
print(f"[AI] Rate limited on {model_id} ({url})")
|
||||
return None
|
||||
if resp.status_code >= 400:
|
||||
print(f"[AI] HTTP {resp.status_code} on {model_id} ({url}): {resp.text[:200]}")
|
||||
return None
|
||||
data = resp.json()
|
||||
choice = data.get("choices", [{}])[0]
|
||||
content = choice.get("message", {}).get("content") or ""
|
||||
content = content.strip()
|
||||
finish = choice.get("finish_reason", "")
|
||||
if not content:
|
||||
print(f"[AI] Empty response from {model_id} (finish={finish})")
|
||||
return None
|
||||
return {"content": content, "finish_reason": finish, "model": model_id}
|
||||
except Exception as e:
|
||||
print(f"[AI] Error with {model_id} ({url}): {e}")
|
||||
return None
|
||||
|
||||
|
||||
SYSTEM_PROMPT_SHORT = """Eres un asistente de refaccionaria automotriz mexicana. Ayuda a encontrar autopartes.
|
||||
Responde SIEMPRE en formato JSON: {"message":"...","search_query":"...","vehicle":{"brand":"...","model":"...","year":...}}
|
||||
search_query va EN INGLES cuando el usuario pide una parte. Traducciones: Balatas=Brake Pad, Disco de freno=Brake Disc, Amortiguador=Shock Absorber, Filtro de aceite=Oil Filter, Filtro de aire=Air Filter, Bujias=Spark Plug, Banda=V-Belt, Bomba de agua=Water Pump, Alternador=Alternator, Radiador=Radiator, Sensor de oxigeno=Oxygen Sensor, Terminal de direccion=Tie Rod End, Bomba de gasolina=Fuel Pump, Clutch=Clutch Kit, Mofle=Exhaust, Inyector=Injector.
|
||||
No preguntes mas si ya puedes buscar. Si el usuario describe un sintoma, diagnostica y sugiere partes.
|
||||
Cuando pida cotizacion o multiples partes, search_query DEBE usar | para separar cada parte: "Brake Pad|Air Filter|Oil Filter|Spark Plug".
|
||||
"""
|
||||
|
||||
SYSTEM_PROMPT = """Eres un asistente de refaccionaria automotriz mexicana. Tu trabajo es ayudar a encontrar autopartes.
|
||||
|
||||
IMPORTANTE: Responde SIEMPRE en formato JSON valido con esta estructura:
|
||||
@@ -161,6 +225,7 @@ def get_inventory_context(tenant_conn, branch_id=None):
|
||||
|
||||
|
||||
VISION_MODEL = "google/gemma-3-27b-it:free"
|
||||
HERMES_VISION_MODEL = "hermes-agent"
|
||||
|
||||
VISION_SYSTEM_PROMPT = """Eres un experto en identificación de autopartes. El usuario te envía una foto de una parte automotriz.
|
||||
Tu trabajo es:
|
||||
@@ -219,54 +284,41 @@ def chat_with_image(user_message, image_base64, conversation_history=None, inven
|
||||
]
|
||||
messages.append({"role": "user", "content": user_content})
|
||||
|
||||
import time
|
||||
max_retries = 3
|
||||
|
||||
for attempt in range(max_retries):
|
||||
# Try Hermes first for vision (if enabled), fallback to OpenRouter
|
||||
backends = []
|
||||
if HERMES_ENABLED:
|
||||
backends.append((HERMES_CHAT_URL, HERMES_API_KEY, HERMES_VISION_MODEL))
|
||||
if OPENROUTER_API_KEY:
|
||||
backends.append((OPENROUTER_URL, OPENROUTER_API_KEY, VISION_MODEL))
|
||||
|
||||
last_error = None
|
||||
for url, key, model_id in backends:
|
||||
_validate_model(model_id)
|
||||
result = _post_chat_completion(url, key, model_id, messages, max_tokens=500, temperature=0.3, timeout=30)
|
||||
if result is None:
|
||||
last_error = "api_error"
|
||||
continue
|
||||
content = result["content"]
|
||||
try:
|
||||
resp = requests.post(
|
||||
OPENROUTER_URL,
|
||||
headers={
|
||||
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": VISION_MODEL,
|
||||
"messages": messages,
|
||||
"max_tokens": 500,
|
||||
"temperature": 0.3,
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
if resp.status_code == 429:
|
||||
wait = (attempt + 1) * 5
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(wait)
|
||||
continue
|
||||
return {"message": "El asistente esta ocupado. Intenta de nuevo en unos segundos.", "search_query": None, "vehicle": None}
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
|
||||
try:
|
||||
stripped = content.strip()
|
||||
if stripped.startswith("```"):
|
||||
lines = stripped.split("\n")
|
||||
json_str = "\n".join(lines[1:-1])
|
||||
parsed = json.loads(json_str)
|
||||
else:
|
||||
parsed = json.loads(stripped)
|
||||
stripped = content.strip()
|
||||
if stripped.startswith("```"):
|
||||
lines = stripped.split("\n")
|
||||
json_str = "\n".join(lines[1:-1])
|
||||
parsed = json.loads(json_str)
|
||||
return parsed
|
||||
except (json.JSONDecodeError, IndexError):
|
||||
return {"message": content, "search_query": None, "vehicle": None}
|
||||
except Exception as e:
|
||||
if attempt < max_retries - 1:
|
||||
continue
|
||||
return {
|
||||
"message": f"Error al analizar imagen: {str(e)}",
|
||||
"search_query": None,
|
||||
"vehicle": None,
|
||||
}
|
||||
else:
|
||||
parsed = json.loads(stripped)
|
||||
return parsed
|
||||
except (json.JSONDecodeError, IndexError):
|
||||
return {"message": content, "search_query": None, "vehicle": None}
|
||||
|
||||
if last_error == "api_error":
|
||||
return {"message": "El asistente esta ocupado. Intenta de nuevo en unos segundos.", "search_query": None, "vehicle": None}
|
||||
return {
|
||||
"message": f"Error al analizar imagen: {last_error}",
|
||||
"search_query": None,
|
||||
"vehicle": None,
|
||||
}
|
||||
|
||||
|
||||
def classify_part(part_number):
|
||||
@@ -287,47 +339,32 @@ def classify_part(part_number):
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
import time
|
||||
max_retries = 3
|
||||
|
||||
for attempt in range(max_retries):
|
||||
# Try Hermes first (if enabled), fallback to OpenRouter
|
||||
backends = []
|
||||
if HERMES_ENABLED:
|
||||
backends.append((HERMES_CHAT_URL, HERMES_API_KEY, HERMES_MODEL))
|
||||
if OPENROUTER_API_KEY:
|
||||
backends.append((OPENROUTER_URL, OPENROUTER_API_KEY, MODEL))
|
||||
|
||||
for url, key, model_id in backends:
|
||||
_validate_model(model_id)
|
||||
result = _post_chat_completion(url, key, model_id, messages, max_tokens=300, temperature=0.2, timeout=15)
|
||||
if result is None:
|
||||
continue
|
||||
content = result["content"]
|
||||
try:
|
||||
resp = requests.post(
|
||||
OPENROUTER_URL,
|
||||
headers={
|
||||
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"max_tokens": 300,
|
||||
"temperature": 0.2,
|
||||
},
|
||||
timeout=15,
|
||||
)
|
||||
if resp.status_code == 429:
|
||||
wait = (attempt + 1) * 5
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(wait)
|
||||
continue
|
||||
return {"name": None, "brand": None, "vehicle": None, "category": None}
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
|
||||
stripped = content.strip()
|
||||
if stripped.startswith("```"):
|
||||
lines = stripped.split("\n")
|
||||
json_str = "\n".join(lines[1:-1])
|
||||
parsed = json.loads(json_str)
|
||||
return parsed
|
||||
else:
|
||||
parsed = json.loads(stripped)
|
||||
return parsed
|
||||
return parsed
|
||||
except Exception:
|
||||
if attempt < max_retries - 1:
|
||||
continue
|
||||
return {"name": None, "brand": None, "vehicle": None, "category": None}
|
||||
continue
|
||||
return {"name": None, "brand": None, "vehicle": None, "category": None}
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
@@ -491,74 +528,71 @@ def chat(user_message, conversation_history=None, inventory_context=None):
|
||||
|
||||
last_error = None
|
||||
|
||||
# Try each model in the fallback chain on 429 (rate limit)
|
||||
for model_id in FALLBACK_MODELS:
|
||||
_validate_model(model_id) # Block paid models
|
||||
try:
|
||||
resp = requests.post(
|
||||
OPENROUTER_URL,
|
||||
headers={
|
||||
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": model_id,
|
||||
"messages": messages,
|
||||
"max_tokens": 800,
|
||||
"temperature": 0.3,
|
||||
},
|
||||
timeout=25,
|
||||
)
|
||||
if resp.status_code == 429:
|
||||
# Build backend list: QWEN first (fast, ~1s), then Hermes (specialized, ~30s), then OpenRouter
|
||||
backends = []
|
||||
if QWEN_ENABLED:
|
||||
backends.append((QWEN_CHAT_URL, QWEN_API_KEY, QWEN_MODEL, 35, SYSTEM_PROMPT_SHORT, 4000))
|
||||
if HERMES_ENABLED:
|
||||
backends.append((HERMES_CHAT_URL, HERMES_API_KEY, HERMES_MODEL, 45, SYSTEM_PROMPT, 800))
|
||||
if OPENROUTER_API_KEY:
|
||||
for m in FALLBACK_MODELS:
|
||||
backends.append((OPENROUTER_URL, OPENROUTER_API_KEY, m, 25, SYSTEM_PROMPT, 800))
|
||||
|
||||
for url, key, model_id, timeout_sec, sys_prompt, max_tok in backends:
|
||||
_validate_model(model_id)
|
||||
# Use backend-specific system prompt and max_tokens
|
||||
sys_content = sys_prompt
|
||||
if inventory_context:
|
||||
sys_content = sys_prompt + "\n\n" + inventory_context
|
||||
msgs = [{"role": "system", "content": sys_content}]
|
||||
if conversation_history:
|
||||
msgs.extend(conversation_history)
|
||||
msgs.append({"role": "user", "content": user_message})
|
||||
result = _post_chat_completion(url, key, model_id, msgs, max_tokens=max_tok, temperature=0.3, timeout=timeout_sec)
|
||||
if result is None:
|
||||
if url == QWEN_CHAT_URL:
|
||||
print(f"[AI] QWEN failed, trying Hermes fallback...")
|
||||
last_error = "qwen_failed"
|
||||
elif url == HERMES_CHAT_URL:
|
||||
print(f"[AI] Hermes failed, trying OpenRouter fallback...")
|
||||
last_error = "hermes_timeout"
|
||||
else:
|
||||
print(f"[AI] Rate limited on {model_id}, trying next model...")
|
||||
last_error = "rate_limit"
|
||||
continue
|
||||
if resp.status_code >= 400:
|
||||
print(f"[AI] HTTP {resp.status_code} on {model_id}: {resp.text[:200]}")
|
||||
last_error = f"http_{resp.status_code}"
|
||||
continue
|
||||
data = resp.json()
|
||||
choice = data.get("choices", [{}])[0]
|
||||
content = choice.get("message", {}).get("content", "").strip()
|
||||
finish = choice.get("finish_reason", "")
|
||||
|
||||
if not content:
|
||||
print(f"[AI] Empty response from {model_id} (finish={finish})")
|
||||
last_error = "empty_response"
|
||||
continue
|
||||
|
||||
print(f"[AI] Response from {model_id} (finish={finish}, {len(content)} chars)")
|
||||
|
||||
# Try to parse JSON response
|
||||
try:
|
||||
stripped = content.strip()
|
||||
if stripped.startswith("```"):
|
||||
lines = stripped.split("\n")
|
||||
json_str = "\n".join(lines[1:-1])
|
||||
parsed = json.loads(json_str)
|
||||
else:
|
||||
parsed = json.loads(stripped)
|
||||
# Successful JSON response — cache it
|
||||
if cache_key:
|
||||
_cache_set(cache_key, parsed)
|
||||
return parsed
|
||||
except (json.JSONDecodeError, IndexError):
|
||||
fallback = {"message": content, "search_query": None, "vehicle": None}
|
||||
# Cache the fallback too — the model gave us a real answer,
|
||||
# it just wasn't JSON. Next hit saves the API call.
|
||||
if cache_key:
|
||||
_cache_set(cache_key, fallback)
|
||||
return fallback
|
||||
except Exception as e:
|
||||
print(f"[AI] Error with {model_id}: {e}")
|
||||
last_error = str(e)
|
||||
continue
|
||||
|
||||
content = result["content"]
|
||||
finish = result["finish_reason"]
|
||||
print(f"[AI] Response from {model_id} (finish={finish}, {len(content)} chars)")
|
||||
|
||||
# Try to parse JSON response
|
||||
try:
|
||||
stripped = content.strip()
|
||||
if stripped.startswith("```"):
|
||||
lines = stripped.split("\n")
|
||||
json_str = "\n".join(lines[1:-1])
|
||||
parsed = json.loads(json_str)
|
||||
else:
|
||||
parsed = json.loads(stripped)
|
||||
# Successful JSON response — cache it
|
||||
if cache_key:
|
||||
_cache_set(cache_key, parsed)
|
||||
return parsed
|
||||
except (json.JSONDecodeError, IndexError):
|
||||
fallback = {"message": content, "search_query": None, "vehicle": None}
|
||||
# Cache the fallback too — the model gave us a real answer,
|
||||
# it just wasn't JSON. Next hit saves the API call.
|
||||
if cache_key:
|
||||
_cache_set(cache_key, fallback)
|
||||
return fallback
|
||||
|
||||
# All models exhausted — DON'T cache errors, we want retries next time
|
||||
if last_error == "rate_limit":
|
||||
return {"message": "El asistente está ocupado. Intenta de nuevo en unos segundos.", "search_query": None, "vehicle": None}
|
||||
if last_error == "hermes_timeout":
|
||||
return {"message": "El asistente tardó mucho en responder. Intenta de nuevo en un momento.", "search_query": None, "vehicle": None}
|
||||
return {
|
||||
"message": f"Error de conexion: {last_error}",
|
||||
"message": "El asistente no está disponible en este momento. Intenta de nuevo en unos segundos.",
|
||||
"search_query": None,
|
||||
"vehicle": None,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user