feat: Implementar PWA, Analytics, Reportes PDF y mejoras OCR

FASE 1 - PWA y Frontend: - Crear templates/base.html, dashboard.html, analytics.html, executive.html - Crear static/css/main.css con diseño responsivo - Agregar static/js/app.js, pwa.js, camera.js, charts.js - Implementar manifest.json y service-worker.js para PWA - Soporte para captura de tickets desde cámara móvil FASE 2 - Analytics: - Crear módulo analytics/ con predictions.py, trends.py, comparisons.py - Implementar predicción básica con promedio móvil + tendencia lineal - Agregar endpoints /api/analytics/trends, predictions, comparisons - Integrar Chart.js para gráficas interactivas FASE 3 - Reportes PDF: - Crear módulo reports/ con pdf_generator.py - Implementar SalesReportPDF con generar_reporte_diario y ejecutivo - Agregar comando /reporte [diario|semanal|ejecutivo] - Agregar endpoints /api/reports/generate y /api/reports/download FASE 4 - Mejoras OCR: - Crear módulo ocr/ con processor.py, preprocessor.py, patterns.py - Implementar AmountDetector con patrones múltiples de montos - Agregar preprocesador adaptativo con pipelines para diferentes condiciones - Soporte para corrección de rotación (deskew) y threshold Otsu Dependencias agregadas: - reportlab, matplotlib (PDF) - scipy, pandas (analytics) - imutils, deskew, cachetools (OCR) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-19 03:26:16 +00:00
parent ed1658eb2b
commit 9936deaa90
25 changed files with 5501 additions and 282 deletions
--- a/sales-bot/ocr/amount_detector.py
+++ b/sales-bot/ocr/amount_detector.py
@@ -0,0 +1,258 @@
+"""
+Amount detection for Sales Bot OCR
+Improved detection of total amounts from ticket text
+"""
+
+import re
+import logging
+from typing import Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+# Amount patterns in priority order
+PATTERNS = [
+    # Explicit total patterns (highest priority)
+    (r'total\s*a\s*pagar\s*:?\s*\$?\s*([\d,]+[\s\.]?\d{0,2})', 'total_a_pagar', 1),
+    (r'gran\s*total\s*:?\s*\$?\s*([\d,]+[\s\.]?\d{0,2})', 'gran_total', 2),
+    (r'total\s+final\s*:?\s*\$?\s*([\d,]+[\s\.]?\d{0,2})', 'total_final', 3),
+    (r'(?:^|\n)\s*total\s*:?\s*\$?\s*([\d,]+[\s\.]?\d{0,2})', 'total', 4),
+
+    # Payment related
+    (r'a\s*cobrar\s*:?\s*\$?\s*([\d,]+[\s\.]?\d{0,2})', 'a_cobrar', 5),
+    (r'importe\s*(?:total)?\s*:?\s*\$?\s*([\d,]+[\s\.]?\d{0,2})', 'importe', 6),
+    (r'monto\s*(?:total)?\s*:?\s*\$?\s*([\d,]+[\s\.]?\d{0,2})', 'monto', 7),
+    (r'suma\s*:?\s*\$?\s*([\d,]+[\s\.]?\d{0,2})', 'suma', 8),
+    (r'pago\s*:?\s*\$?\s*([\d,]+[\s\.]?\d{0,2})', 'pago', 9),
+
+    # Subtotal (lower priority - may need to add tax)
+    (r'subtotal\s*:?\s*\$?\s*([\d,]+[\s\.]?\d{0,2})', 'subtotal', 10),
+
+    # Generic currency patterns (lowest priority)
+    (r'\$\s*([\d,]+\.\d{2})\s*(?:\n|$)', 'monto_linea', 11),
+]
+
+# Words that indicate a line is NOT a total (negative patterns)
+EXCLUSION_WORDS = [
+    'cambio', 'efectivo', 'pago con', 'tarjeta', 'recibido',
+    'iva', 'impuesto', 'descuento', 'ahorro', 'puntos'
+]
+
+
+class AmountDetector:
+    """
+    Detects and extracts monetary amounts from ticket text.
+    Uses multiple patterns and heuristics to find the most likely total.
+    """
+
+    def __init__(self):
+        self.patterns = PATTERNS
+        self.min_amount = 1  # Minimum valid amount
+        self.max_amount = 1000000  # Maximum valid amount
+
+    def detectar_monto(self, texto: str) -> Optional[Dict]:
+        """
+        Detecta el monto total del ticket.
+
+        Args:
+            texto: Texto extraído del ticket
+
+        Returns:
+            dict con monto, tipo, patron, y confianza, o None si no se encuentra
+        """
+        texto_lower = texto.lower()
+        resultados = []
+
+        for patron, tipo, prioridad in self.patterns:
+            matches = re.findall(patron, texto_lower, re.IGNORECASE | re.MULTILINE)
+
+            for match in matches:
+                # Skip if match is in an exclusion context
+                if self._is_excluded(texto_lower, match):
+                    continue
+
+                monto = self._normalizar_monto(match)
+
+                if self.min_amount <= monto <= self.max_amount:
+                    # Calculate confidence based on pattern type and context
+                    confianza = self._calcular_confianza(texto_lower, match, tipo)
+
+                    resultados.append({
+                        'monto': monto,
+                        'tipo': tipo,
+                        'patron': patron,
+                        'prioridad': prioridad,
+                        'confianza': confianza
+                    })
+
+        if not resultados:
+            # Try to find the largest amount as fallback
+            return self._fallback_detection(texto)
+
+        # Sort by priority (lower is better) then by confidence (higher is better)
+        resultados.sort(key=lambda x: (x['prioridad'], -x['confianza']))
+
+        # Return the best match
+        best = resultados[0]
+        return {
+            'monto': best['monto'],
+            'tipo': best['tipo'],
+            'patron': best['patron'],
+            'confianza': best['confianza']
+        }
+
+    def _normalizar_monto(self, monto_str: str) -> float:
+        """
+        Normaliza string de monto a float.
+
+        Handles various formats:
+        - 1,234.56 (US/Mexico format)
+        - 1234.56
+        - 1 234.56 (space separator)
+        - 1234,56 (European format)
+        """
+        if not monto_str:
+            return 0.0
+
+        # Remove currency symbols and whitespace
+        monto = monto_str.strip().replace('$', '').replace(' ', '')
+
+        # Handle different decimal separators
+        # If there's both comma and dot, determine which is decimal
+        if ',' in monto and '.' in monto:
+            # US/Mexico format: 1,234.56
+            monto = monto.replace(',', '')
+        elif ',' in monto:
+            # Could be European (1234,56) or thousand separator (1,234)
+            parts = monto.split(',')
+            if len(parts) == 2 and len(parts[1]) == 2:
+                # European format
+                monto = monto.replace(',', '.')
+            else:
+                # Thousand separator
+                monto = monto.replace(',', '')
+
+        try:
+            return float(monto)
+        except ValueError:
+            return 0.0
+
+    def _is_excluded(self, texto: str, match: str) -> bool:
+        """
+        Checks if the match appears in an exclusion context.
+        """
+        # Find the line containing this match
+        for linea in texto.split('\n'):
+            if match in linea:
+                linea_lower = linea.lower()
+                for exclusion in EXCLUSION_WORDS:
+                    if exclusion in linea_lower:
+                        return True
+        return False
+
+    def _calcular_confianza(self, texto: str, match: str, tipo: str) -> float:
+        """
+        Calculates confidence score for a match.
+
+        Returns value between 0.0 and 1.0
+        """
+        confianza = 0.5  # Base confidence
+
+        # Higher confidence for explicit total patterns
+        if tipo in ['total_a_pagar', 'gran_total', 'total_final']:
+            confianza += 0.3
+        elif tipo == 'total':
+            confianza += 0.2
+
+        # Higher confidence if near end of text
+        position = texto.find(match)
+        text_length = len(texto)
+        if position > text_length * 0.6:  # In last 40% of text
+            confianza += 0.1
+
+        # Higher confidence if followed by payment info
+        after_match = texto[texto.find(match) + len(match):texto.find(match) + len(match) + 50]
+        if any(word in after_match.lower() for word in ['efectivo', 'tarjeta', 'cambio', 'gracias']):
+            confianza += 0.1
+
+        return min(confianza, 1.0)
+
+    def _fallback_detection(self, texto: str) -> Optional[Dict]:
+        """
+        Fallback detection when standard patterns fail.
+        Looks for the largest reasonable amount in the text.
+        """
+        # Find all currency-like numbers
+        all_amounts = re.findall(r'\$?\s*([\d,]+\.?\d{0,2})', texto)
+
+        valid_amounts = []
+        for amount_str in all_amounts:
+            amount = self._normalizar_monto(amount_str)
+            if self.min_amount <= amount <= self.max_amount:
+                valid_amounts.append(amount)
+
+        if valid_amounts:
+            # Return the largest amount (likely the total)
+            max_amount = max(valid_amounts)
+            return {
+                'monto': max_amount,
+                'tipo': 'fallback_max',
+                'patron': 'heuristic',
+                'confianza': 0.3
+            }
+
+        return None
+
+    def detectar_multiples_montos(self, texto: str) -> List[Dict]:
+        """
+        Detecta todos los montos en el texto.
+
+        Useful for itemized receipts.
+
+        Returns:
+            Lista de diccionarios con monto y contexto
+        """
+        texto_lower = texto.lower()
+        resultados = []
+
+        # Find all lines with amounts
+        lineas = texto.split('\n')
+        for linea in lineas:
+            matches = re.findall(r'\$?\s*([\d,]+\.?\d{0,2})', linea)
+            for match in matches:
+                monto = self._normalizar_monto(match)
+                if self.min_amount <= monto <= self.max_amount:
+                    resultados.append({
+                        'monto': monto,
+                        'contexto': linea.strip(),
+                        'es_total': 'total' in linea.lower()
+                    })
+
+        return resultados
+
+
+def detectar_monto(texto: str) -> Optional[Dict]:
+    """
+    Convenience function to detect amount from text.
+
+    Args:
+        texto: Ticket text
+
+    Returns:
+        Dict with monto, tipo, patron, confianza or None
+    """
+    detector = AmountDetector()
+    return detector.detectar_monto(texto)
+
+
+def normalizar_monto(monto_str: str) -> float:
+    """
+    Convenience function to normalize amount string.
+
+    Args:
+        monto_str: Amount as string
+
+    Returns:
+        Amount as float
+    """
+    detector = AmountDetector()
+    return detector._normalizar_monto(monto_str)