feat: complete session — catalog, marketplace, WhatsApp, peer-to-peer, install scripts

Major features:
- Pixel-Perfect glassmorphism design (landing + POS + public catalog)
- OEM/Local catalog toggle with Nexpart taxonomy (14 groups, 108 subgroups, 558 part types)
- Marketplace B2B Phase 1 (bodegas, POs, status machine, WA+email notifications)
- Peer-to-peer inventory (multi-instance, LAN discovery)
- WhatsApp: photo→Vision AI, voice→Whisper, conversational quotations
- Smart unified search (VIN/plate/part_number/keyword auto-detect)
- Shop Supplies tab (vehicle-independent parts)
- Chatbot AI fallback chain (5 models) + response cache
- CSV inventory import tool + setup_instance.sh installer
- Tablet-responsive CSS + sidebar toggle
- Filters, export CSV, employee edit, business data save
- Quotation system (WA→POS) with auto-print on confirmation
- Live stats on landing page

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-18 05:35:53 +00:00
parent 6b097614a0
commit e95f7cf684
54 changed files with 11226 additions and 1422 deletions

View File

@@ -9,8 +9,20 @@ OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
# ⚠️ SOLO MODELOS GRATUITOS — No cambiar a modelos de pago.
# El modelo DEBE terminar en ":free" para garantizar costo $0.
# Alternativas gratuitas: "meta-llama/llama-4-scout:free", "google/gemma-3-27b-it:free"
MODEL = "qwen/qwen3.6-plus-preview:free"
MODEL = "qwen/qwen3.6-plus:free"
# Fallback chain: si el modelo principal tiene rate limit (429) o 404
# (deprecated), intenta los siguientes. Todos :free. Mezclamos proveedores
# distintos porque los rate limits aplican por-proveedor.
# Lista actualizada 2026-04-09 después de que qwen3.6-plus fue deprecated.
FALLBACK_MODELS = [
"openai/gpt-oss-120b:free", # OpenInference — gran cobertura
"google/gemma-4-31b-it:free", # Google — nuevo, 262K ctx
"qwen/qwen3-next-80b-a3b-instruct:free", # Alibaba — 262K ctx
"z-ai/glm-4.5-air:free", # Z.AI
"google/gemma-3-27b-it:free", # Google — backup vision
"meta-llama/llama-3.3-70b-instruct:free", # Meta — último fallback
]
def _validate_model(model_id):
"""Ensure only free models are used. Raises if model is not free."""
@@ -318,15 +330,155 @@ def classify_part(part_number):
return {"name": None, "brand": None, "vehicle": None, "category": None}
# ═══════════════════════════════════════════════════════════════════════════
# RESPONSE CACHE — reduces OpenRouter calls for repeated questions
# ═══════════════════════════════════════════════════════════════════════════
# Keyed by a normalized form of the user message. TTL 1 hour. Bypasses
# caching for messages containing VINs or specific part numbers (where the
# answer depends on the exact string).
import hashlib as _hashlib
import re as _re
import time as _time_chat
_RESPONSE_CACHE = {} # key → (expires_at, response_dict)
_CACHE_TTL_SECONDS = 3600 # 1 hour
_CACHE_MAX_SIZE = 1000
_CACHE_HITS = 0
_CACHE_MISSES = 0
# Stopwords that add noise but no meaning — stripped from cache keys.
_CACHE_STOPWORDS = {
'necesito', 'necesitas', 'me', 'das', 'dame', 'tienes', 'tiene', 'hay',
'quiero', 'quisiera', 'puedes', 'puede', 'favor', 'por', 'porfavor',
'hola', 'buenos', 'dias', 'tardes', 'noches', 'holaa',
'i', 'need', 'want', 'do', 'you', 'have', 'please',
}
# Patterns that disable caching — if the message contains any of these, we
# never cache the response because the answer is specific to that exact input.
# Rules designed to minimize false positives against normal Spanish queries
# like "necesito balatas para corolla 2018".
_CACHE_BYPASS_PATTERNS = [
# 17-char VIN (strict, no spaces, alphanumeric except I/O/Q)
_re.compile(r'\b[A-HJ-NPR-Z0-9]{17}\b'),
# Long numeric (12+ digits — too long to be a year/model code)
_re.compile(r'\b\d{12,}\b'),
# Mexican license plate: 3 letters + 3-4 digits
_re.compile(r'\b[A-Z]{3}[-\s]?\d{3,4}\b'),
# OEM with REQUIRED dash/slash separator(s), letters+digits on both sides,
# and a total length that makes it unlikely to be a brand+year collision.
# Example matches: "4G0-857-951-A", "0 986 4B7 013" (after normalizing).
_re.compile(r'\b[A-Z0-9]{2,}[-/][A-Z0-9]{2,}([-/][A-Z0-9]+)+\b'),
]
def _should_bypass_cache(message: str) -> bool:
"""True if the message has VIN / part number / plate — don't cache."""
if not message:
return True
upper = message.upper()
for pat in _CACHE_BYPASS_PATTERNS:
if pat.search(upper):
return True
return False
def _normalize_for_cache(message: str) -> str:
"""Lowercase, strip punctuation, collapse whitespace, drop stopwords."""
if not message:
return ''
s = message.lower().strip()
s = _re.sub(r'[¿?¡!.,;:()\[\]{}\'"]+', ' ', s)
s = _re.sub(r'\s+', ' ', s).strip()
tokens = [t for t in s.split() if t and t not in _CACHE_STOPWORDS]
return ' '.join(tokens)
def _cache_key(user_message: str, inventory_context: str | None) -> str | None:
"""Build a stable cache key for (message, inventory_context).
Returns None if the message should bypass the cache.
"""
if _should_bypass_cache(user_message):
return None
normalized = _normalize_for_cache(user_message)
if not normalized:
return None
# Hash the inventory context so same-tenant-same-question cache hits,
# different-tenant-same-question does NOT (inventory context differs).
ctx_hash = _hashlib.md5((inventory_context or '').encode()).hexdigest()[:12]
return f"{normalized}::{ctx_hash}"
def _cache_get(key: str):
global _CACHE_HITS, _CACHE_MISSES
if not key:
_CACHE_MISSES += 1
return None
entry = _RESPONSE_CACHE.get(key)
if not entry:
_CACHE_MISSES += 1
return None
expires_at, data = entry
if _time_chat.time() > expires_at:
_RESPONSE_CACHE.pop(key, None)
_CACHE_MISSES += 1
return None
_CACHE_HITS += 1
return data
def _cache_set(key: str, data: dict):
if not key or not data:
return
_RESPONSE_CACHE[key] = (_time_chat.time() + _CACHE_TTL_SECONDS, data)
# Bounded cache — evict oldest entries if we grow past the limit
if len(_RESPONSE_CACHE) > _CACHE_MAX_SIZE:
oldest_keys = sorted(
_RESPONSE_CACHE.items(), key=lambda kv: kv[1][0]
)[:200]
for k, _v in oldest_keys:
_RESPONSE_CACHE.pop(k, None)
def chat_cache_stats() -> dict:
"""Diagnostic helper: hit rate and cache size."""
total = _CACHE_HITS + _CACHE_MISSES
hit_rate = (_CACHE_HITS * 100 / total) if total else 0
return {
'entries': len(_RESPONSE_CACHE),
'hits': _CACHE_HITS,
'misses': _CACHE_MISSES,
'hit_rate_pct': round(hit_rate, 1),
'ttl_seconds': _CACHE_TTL_SECONDS,
}
def chat_cache_clear():
"""Manual cache invalidation — e.g. after inventory bulk changes."""
_RESPONSE_CACHE.clear()
def chat(user_message, conversation_history=None, inventory_context=None):
"""Send a message to the AI and get a response with search suggestions.
Caches responses for repeated identical questions (subject to bypass
rules — messages with VINs / part numbers / plates are never cached).
Args:
user_message: The user's chat message.
conversation_history: Previous messages in the conversation.
inventory_context: Optional inventory summary string to inject into the system prompt.
"""
_validate_model(MODEL) # Block paid models
# Cache lookup — only when there's no conversation history (stateless)
cache_key = None
if not conversation_history:
cache_key = _cache_key(user_message, inventory_context)
cached = _cache_get(cache_key)
if cached is not None:
print(f"[AI] Cache HIT for '{user_message[:40]}...'")
return cached
system_content = SYSTEM_PROMPT
if inventory_context:
@@ -337,10 +489,11 @@ def chat(user_message, conversation_history=None, inventory_context=None):
messages.extend(conversation_history)
messages.append({"role": "user", "content": user_message})
import time
max_retries = 3
last_error = None
for attempt in range(max_retries):
# Try each model in the fallback chain on 429 (rate limit)
for model_id in FALLBACK_MODELS:
_validate_model(model_id) # Block paid models
try:
resp = requests.post(
OPENROUTER_URL,
@@ -349,23 +502,32 @@ def chat(user_message, conversation_history=None, inventory_context=None):
"Content-Type": "application/json",
},
json={
"model": MODEL,
"model": model_id,
"messages": messages,
"max_tokens": 500,
"max_tokens": 800,
"temperature": 0.3,
},
timeout=20,
timeout=25,
)
if resp.status_code == 429:
# Rate limited — wait and retry
wait = (attempt + 1) * 5 # 5s, 10s, 15s
if attempt < max_retries - 1:
time.sleep(wait)
continue
return {"message": "El asistente está ocupado. Intenta de nuevo en unos segundos.", "search_query": None, "vehicle": None}
resp.raise_for_status()
print(f"[AI] Rate limited on {model_id}, trying next model...")
last_error = "rate_limit"
continue
if resp.status_code >= 400:
print(f"[AI] HTTP {resp.status_code} on {model_id}: {resp.text[:200]}")
last_error = f"http_{resp.status_code}"
continue
data = resp.json()
content = data["choices"][0]["message"]["content"]
choice = data.get("choices", [{}])[0]
content = choice.get("message", {}).get("content", "").strip()
finish = choice.get("finish_reason", "")
if not content:
print(f"[AI] Empty response from {model_id} (finish={finish})")
last_error = "empty_response"
continue
print(f"[AI] Response from {model_id} (finish={finish}, {len(content)} chars)")
# Try to parse JSON response
try:
@@ -376,14 +538,27 @@ def chat(user_message, conversation_history=None, inventory_context=None):
parsed = json.loads(json_str)
else:
parsed = json.loads(stripped)
# Successful JSON response — cache it
if cache_key:
_cache_set(cache_key, parsed)
return parsed
except (json.JSONDecodeError, IndexError):
return {"message": content, "search_query": None, "vehicle": None}
fallback = {"message": content, "search_query": None, "vehicle": None}
# Cache the fallback too — the model gave us a real answer,
# it just wasn't JSON. Next hit saves the API call.
if cache_key:
_cache_set(cache_key, fallback)
return fallback
except Exception as e:
if attempt < max_retries - 1:
continue
return {
"message": f"Error de conexion: {str(e)}",
"search_query": None,
"vehicle": None,
}
print(f"[AI] Error with {model_id}: {e}")
last_error = str(e)
continue
# All models exhausted — DON'T cache errors, we want retries next time
if last_error == "rate_limit":
return {"message": "El asistente está ocupado. Intenta de nuevo en unos segundos.", "search_query": None, "vehicle": None}
return {
"message": f"Error de conexion: {last_error}",
"search_query": None,
"vehicle": None,
}