feat: complete session — catalog, marketplace, WhatsApp, peer-to-peer, install scripts
Major features: - Pixel-Perfect glassmorphism design (landing + POS + public catalog) - OEM/Local catalog toggle with Nexpart taxonomy (14 groups, 108 subgroups, 558 part types) - Marketplace B2B Phase 1 (bodegas, POs, status machine, WA+email notifications) - Peer-to-peer inventory (multi-instance, LAN discovery) - WhatsApp: photo→Vision AI, voice→Whisper, conversational quotations - Smart unified search (VIN/plate/part_number/keyword auto-detect) - Shop Supplies tab (vehicle-independent parts) - Chatbot AI fallback chain (5 models) + response cache - CSV inventory import tool + setup_instance.sh installer - Tablet-responsive CSS + sidebar toggle - Filters, export CSV, employee edit, business data save - Quotation system (WA→POS) with auto-print on confirmation - Live stats on landing page Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -9,8 +9,20 @@ OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
||||
|
||||
# ⚠️ SOLO MODELOS GRATUITOS — No cambiar a modelos de pago.
|
||||
# El modelo DEBE terminar en ":free" para garantizar costo $0.
|
||||
# Alternativas gratuitas: "meta-llama/llama-4-scout:free", "google/gemma-3-27b-it:free"
|
||||
MODEL = "qwen/qwen3.6-plus-preview:free"
|
||||
MODEL = "qwen/qwen3.6-plus:free"
|
||||
|
||||
# Fallback chain: si el modelo principal tiene rate limit (429) o 404
|
||||
# (deprecated), intenta los siguientes. Todos :free. Mezclamos proveedores
|
||||
# distintos porque los rate limits aplican por-proveedor.
|
||||
# Lista actualizada 2026-04-09 después de que qwen3.6-plus fue deprecated.
|
||||
FALLBACK_MODELS = [
|
||||
"openai/gpt-oss-120b:free", # OpenInference — gran cobertura
|
||||
"google/gemma-4-31b-it:free", # Google — nuevo, 262K ctx
|
||||
"qwen/qwen3-next-80b-a3b-instruct:free", # Alibaba — 262K ctx
|
||||
"z-ai/glm-4.5-air:free", # Z.AI
|
||||
"google/gemma-3-27b-it:free", # Google — backup vision
|
||||
"meta-llama/llama-3.3-70b-instruct:free", # Meta — último fallback
|
||||
]
|
||||
|
||||
def _validate_model(model_id):
|
||||
"""Ensure only free models are used. Raises if model is not free."""
|
||||
@@ -318,15 +330,155 @@ def classify_part(part_number):
|
||||
return {"name": None, "brand": None, "vehicle": None, "category": None}
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# RESPONSE CACHE — reduces OpenRouter calls for repeated questions
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# Keyed by a normalized form of the user message. TTL 1 hour. Bypasses
|
||||
# caching for messages containing VINs or specific part numbers (where the
|
||||
# answer depends on the exact string).
|
||||
|
||||
import hashlib as _hashlib
|
||||
import re as _re
|
||||
import time as _time_chat
|
||||
|
||||
_RESPONSE_CACHE = {} # key → (expires_at, response_dict)
|
||||
_CACHE_TTL_SECONDS = 3600 # 1 hour
|
||||
_CACHE_MAX_SIZE = 1000
|
||||
_CACHE_HITS = 0
|
||||
_CACHE_MISSES = 0
|
||||
|
||||
# Stopwords that add noise but no meaning — stripped from cache keys.
|
||||
_CACHE_STOPWORDS = {
|
||||
'necesito', 'necesitas', 'me', 'das', 'dame', 'tienes', 'tiene', 'hay',
|
||||
'quiero', 'quisiera', 'puedes', 'puede', 'favor', 'por', 'porfavor',
|
||||
'hola', 'buenos', 'dias', 'tardes', 'noches', 'holaa',
|
||||
'i', 'need', 'want', 'do', 'you', 'have', 'please',
|
||||
}
|
||||
|
||||
# Patterns that disable caching — if the message contains any of these, we
|
||||
# never cache the response because the answer is specific to that exact input.
|
||||
# Rules designed to minimize false positives against normal Spanish queries
|
||||
# like "necesito balatas para corolla 2018".
|
||||
_CACHE_BYPASS_PATTERNS = [
|
||||
# 17-char VIN (strict, no spaces, alphanumeric except I/O/Q)
|
||||
_re.compile(r'\b[A-HJ-NPR-Z0-9]{17}\b'),
|
||||
# Long numeric (12+ digits — too long to be a year/model code)
|
||||
_re.compile(r'\b\d{12,}\b'),
|
||||
# Mexican license plate: 3 letters + 3-4 digits
|
||||
_re.compile(r'\b[A-Z]{3}[-\s]?\d{3,4}\b'),
|
||||
# OEM with REQUIRED dash/slash separator(s), letters+digits on both sides,
|
||||
# and a total length that makes it unlikely to be a brand+year collision.
|
||||
# Example matches: "4G0-857-951-A", "0 986 4B7 013" (after normalizing).
|
||||
_re.compile(r'\b[A-Z0-9]{2,}[-/][A-Z0-9]{2,}([-/][A-Z0-9]+)+\b'),
|
||||
]
|
||||
|
||||
|
||||
def _should_bypass_cache(message: str) -> bool:
|
||||
"""True if the message has VIN / part number / plate — don't cache."""
|
||||
if not message:
|
||||
return True
|
||||
upper = message.upper()
|
||||
for pat in _CACHE_BYPASS_PATTERNS:
|
||||
if pat.search(upper):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _normalize_for_cache(message: str) -> str:
|
||||
"""Lowercase, strip punctuation, collapse whitespace, drop stopwords."""
|
||||
if not message:
|
||||
return ''
|
||||
s = message.lower().strip()
|
||||
s = _re.sub(r'[¿?¡!.,;:()\[\]{}\'"]+', ' ', s)
|
||||
s = _re.sub(r'\s+', ' ', s).strip()
|
||||
tokens = [t for t in s.split() if t and t not in _CACHE_STOPWORDS]
|
||||
return ' '.join(tokens)
|
||||
|
||||
|
||||
def _cache_key(user_message: str, inventory_context: str | None) -> str | None:
|
||||
"""Build a stable cache key for (message, inventory_context).
|
||||
|
||||
Returns None if the message should bypass the cache.
|
||||
"""
|
||||
if _should_bypass_cache(user_message):
|
||||
return None
|
||||
normalized = _normalize_for_cache(user_message)
|
||||
if not normalized:
|
||||
return None
|
||||
# Hash the inventory context so same-tenant-same-question cache hits,
|
||||
# different-tenant-same-question does NOT (inventory context differs).
|
||||
ctx_hash = _hashlib.md5((inventory_context or '').encode()).hexdigest()[:12]
|
||||
return f"{normalized}::{ctx_hash}"
|
||||
|
||||
|
||||
def _cache_get(key: str):
|
||||
global _CACHE_HITS, _CACHE_MISSES
|
||||
if not key:
|
||||
_CACHE_MISSES += 1
|
||||
return None
|
||||
entry = _RESPONSE_CACHE.get(key)
|
||||
if not entry:
|
||||
_CACHE_MISSES += 1
|
||||
return None
|
||||
expires_at, data = entry
|
||||
if _time_chat.time() > expires_at:
|
||||
_RESPONSE_CACHE.pop(key, None)
|
||||
_CACHE_MISSES += 1
|
||||
return None
|
||||
_CACHE_HITS += 1
|
||||
return data
|
||||
|
||||
|
||||
def _cache_set(key: str, data: dict):
|
||||
if not key or not data:
|
||||
return
|
||||
_RESPONSE_CACHE[key] = (_time_chat.time() + _CACHE_TTL_SECONDS, data)
|
||||
# Bounded cache — evict oldest entries if we grow past the limit
|
||||
if len(_RESPONSE_CACHE) > _CACHE_MAX_SIZE:
|
||||
oldest_keys = sorted(
|
||||
_RESPONSE_CACHE.items(), key=lambda kv: kv[1][0]
|
||||
)[:200]
|
||||
for k, _v in oldest_keys:
|
||||
_RESPONSE_CACHE.pop(k, None)
|
||||
|
||||
|
||||
def chat_cache_stats() -> dict:
|
||||
"""Diagnostic helper: hit rate and cache size."""
|
||||
total = _CACHE_HITS + _CACHE_MISSES
|
||||
hit_rate = (_CACHE_HITS * 100 / total) if total else 0
|
||||
return {
|
||||
'entries': len(_RESPONSE_CACHE),
|
||||
'hits': _CACHE_HITS,
|
||||
'misses': _CACHE_MISSES,
|
||||
'hit_rate_pct': round(hit_rate, 1),
|
||||
'ttl_seconds': _CACHE_TTL_SECONDS,
|
||||
}
|
||||
|
||||
|
||||
def chat_cache_clear():
|
||||
"""Manual cache invalidation — e.g. after inventory bulk changes."""
|
||||
_RESPONSE_CACHE.clear()
|
||||
|
||||
|
||||
def chat(user_message, conversation_history=None, inventory_context=None):
|
||||
"""Send a message to the AI and get a response with search suggestions.
|
||||
|
||||
Caches responses for repeated identical questions (subject to bypass
|
||||
rules — messages with VINs / part numbers / plates are never cached).
|
||||
|
||||
Args:
|
||||
user_message: The user's chat message.
|
||||
conversation_history: Previous messages in the conversation.
|
||||
inventory_context: Optional inventory summary string to inject into the system prompt.
|
||||
"""
|
||||
_validate_model(MODEL) # Block paid models
|
||||
# Cache lookup — only when there's no conversation history (stateless)
|
||||
cache_key = None
|
||||
if not conversation_history:
|
||||
cache_key = _cache_key(user_message, inventory_context)
|
||||
cached = _cache_get(cache_key)
|
||||
if cached is not None:
|
||||
print(f"[AI] Cache HIT for '{user_message[:40]}...'")
|
||||
return cached
|
||||
|
||||
system_content = SYSTEM_PROMPT
|
||||
if inventory_context:
|
||||
@@ -337,10 +489,11 @@ def chat(user_message, conversation_history=None, inventory_context=None):
|
||||
messages.extend(conversation_history)
|
||||
messages.append({"role": "user", "content": user_message})
|
||||
|
||||
import time
|
||||
max_retries = 3
|
||||
last_error = None
|
||||
|
||||
for attempt in range(max_retries):
|
||||
# Try each model in the fallback chain on 429 (rate limit)
|
||||
for model_id in FALLBACK_MODELS:
|
||||
_validate_model(model_id) # Block paid models
|
||||
try:
|
||||
resp = requests.post(
|
||||
OPENROUTER_URL,
|
||||
@@ -349,23 +502,32 @@ def chat(user_message, conversation_history=None, inventory_context=None):
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": MODEL,
|
||||
"model": model_id,
|
||||
"messages": messages,
|
||||
"max_tokens": 500,
|
||||
"max_tokens": 800,
|
||||
"temperature": 0.3,
|
||||
},
|
||||
timeout=20,
|
||||
timeout=25,
|
||||
)
|
||||
if resp.status_code == 429:
|
||||
# Rate limited — wait and retry
|
||||
wait = (attempt + 1) * 5 # 5s, 10s, 15s
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(wait)
|
||||
continue
|
||||
return {"message": "El asistente está ocupado. Intenta de nuevo en unos segundos.", "search_query": None, "vehicle": None}
|
||||
resp.raise_for_status()
|
||||
print(f"[AI] Rate limited on {model_id}, trying next model...")
|
||||
last_error = "rate_limit"
|
||||
continue
|
||||
if resp.status_code >= 400:
|
||||
print(f"[AI] HTTP {resp.status_code} on {model_id}: {resp.text[:200]}")
|
||||
last_error = f"http_{resp.status_code}"
|
||||
continue
|
||||
data = resp.json()
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
choice = data.get("choices", [{}])[0]
|
||||
content = choice.get("message", {}).get("content", "").strip()
|
||||
finish = choice.get("finish_reason", "")
|
||||
|
||||
if not content:
|
||||
print(f"[AI] Empty response from {model_id} (finish={finish})")
|
||||
last_error = "empty_response"
|
||||
continue
|
||||
|
||||
print(f"[AI] Response from {model_id} (finish={finish}, {len(content)} chars)")
|
||||
|
||||
# Try to parse JSON response
|
||||
try:
|
||||
@@ -376,14 +538,27 @@ def chat(user_message, conversation_history=None, inventory_context=None):
|
||||
parsed = json.loads(json_str)
|
||||
else:
|
||||
parsed = json.loads(stripped)
|
||||
# Successful JSON response — cache it
|
||||
if cache_key:
|
||||
_cache_set(cache_key, parsed)
|
||||
return parsed
|
||||
except (json.JSONDecodeError, IndexError):
|
||||
return {"message": content, "search_query": None, "vehicle": None}
|
||||
fallback = {"message": content, "search_query": None, "vehicle": None}
|
||||
# Cache the fallback too — the model gave us a real answer,
|
||||
# it just wasn't JSON. Next hit saves the API call.
|
||||
if cache_key:
|
||||
_cache_set(cache_key, fallback)
|
||||
return fallback
|
||||
except Exception as e:
|
||||
if attempt < max_retries - 1:
|
||||
continue
|
||||
return {
|
||||
"message": f"Error de conexion: {str(e)}",
|
||||
"search_query": None,
|
||||
"vehicle": None,
|
||||
}
|
||||
print(f"[AI] Error with {model_id}: {e}")
|
||||
last_error = str(e)
|
||||
continue
|
||||
|
||||
# All models exhausted — DON'T cache errors, we want retries next time
|
||||
if last_error == "rate_limit":
|
||||
return {"message": "El asistente está ocupado. Intenta de nuevo en unos segundos.", "search_query": None, "vehicle": None}
|
||||
return {
|
||||
"message": f"Error de conexion: {last_error}",
|
||||
"search_query": None,
|
||||
"vehicle": None,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user