Files
sales-bot-stacks/sales-bot/ocr/processor.py
consultoria-as 9936deaa90 feat: Implementar PWA, Analytics, Reportes PDF y mejoras OCR
FASE 1 - PWA y Frontend:
- Crear templates/base.html, dashboard.html, analytics.html, executive.html
- Crear static/css/main.css con diseño responsivo
- Agregar static/js/app.js, pwa.js, camera.js, charts.js
- Implementar manifest.json y service-worker.js para PWA
- Soporte para captura de tickets desde cámara móvil

FASE 2 - Analytics:
- Crear módulo analytics/ con predictions.py, trends.py, comparisons.py
- Implementar predicción básica con promedio móvil + tendencia lineal
- Agregar endpoints /api/analytics/trends, predictions, comparisons
- Integrar Chart.js para gráficas interactivas

FASE 3 - Reportes PDF:
- Crear módulo reports/ con pdf_generator.py
- Implementar SalesReportPDF con generar_reporte_diario y ejecutivo
- Agregar comando /reporte [diario|semanal|ejecutivo]
- Agregar endpoints /api/reports/generate y /api/reports/download

FASE 4 - Mejoras OCR:
- Crear módulo ocr/ con processor.py, preprocessor.py, patterns.py
- Implementar AmountDetector con patrones múltiples de montos
- Agregar preprocesador adaptativo con pipelines para diferentes condiciones
- Soporte para corrección de rotación (deskew) y threshold Otsu

Dependencias agregadas:
- reportlab, matplotlib (PDF)
- scipy, pandas (analytics)
- imutils, deskew, cachetools (OCR)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-19 03:26:16 +00:00

295 lines
8.1 KiB
Python

"""
Main OCR processor for Sales Bot
Combines preprocessing, text extraction, and amount detection
"""
import logging
import os
from typing import Dict, Optional
from io import BytesIO
logger = logging.getLogger(__name__)
# Try to import OCR engine
try:
import pytesseract
from PIL import Image
TESSERACT_AVAILABLE = True
except ImportError:
TESSERACT_AVAILABLE = False
logger.warning("pytesseract not available. OCR will not work.")
# Import local modules
from .preprocessor import ImagePreprocessor, preprocess_image
from .amount_detector import AmountDetector, detectar_monto
from .patterns import (
detectar_formato_ticket,
extraer_fecha_ticket,
extraer_cliente_ticket,
contar_tubos_texto,
get_patron_total
)
class OCRProcessor:
"""
Main OCR processor that coordinates image preprocessing,
text extraction, and data parsing.
"""
def __init__(self):
self.preprocessor = ImagePreprocessor()
self.amount_detector = AmountDetector()
self.confidence_threshold = float(os.getenv('OCR_CONFIDENCE_THRESHOLD', '0.6'))
# Tesseract configuration for Spanish
self.tesseract_config = '--oem 3 --psm 6 -l spa'
def process(self, image_bytes: bytes) -> Dict:
"""
Process a ticket image and extract relevant data.
Args:
image_bytes: Raw image bytes (JPEG, PNG, etc.)
Returns:
Dict with extracted data:
- texto: Full extracted text
- monto: Detected total amount
- cliente: Client name if found
- fecha: Date if found
- tubos: Number of tubes/items
- formato: Detected ticket format
- confianza: Confidence score
"""
if not TESSERACT_AVAILABLE:
return {
'error': 'Tesseract OCR not available',
'texto': '',
'monto': 0,
'confianza': 0
}
try:
# Preprocess image
processed_bytes = self.preprocessor.preprocess(image_bytes)
# Extract text
texto = self._extract_text(processed_bytes)
if not texto or len(texto.strip()) < 10:
# Try again with original image
texto = self._extract_text(image_bytes)
if not texto:
return {
'error': 'No text could be extracted',
'texto': '',
'monto': 0,
'confianza': 0
}
# Detect ticket format
formato = detectar_formato_ticket(texto)
# Extract amount
monto_result = self.amount_detector.detectar_monto(texto)
monto = monto_result.get('monto', 0) if monto_result else 0
monto_confianza = monto_result.get('confianza', 0) if monto_result else 0
monto_tipo = monto_result.get('tipo', 'unknown') if monto_result else 'unknown'
# Extract other data
cliente = extraer_cliente_ticket(texto)
fecha = extraer_fecha_ticket(texto)
tubos = contar_tubos_texto(texto)
# Calculate overall confidence
confianza = self._calculate_overall_confidence(
texto, monto, monto_confianza, cliente, fecha
)
return {
'texto': texto,
'monto': monto,
'monto_tipo': monto_tipo,
'cliente': cliente,
'fecha': fecha,
'tubos': tubos,
'formato': formato,
'confianza': confianza
}
except Exception as e:
logger.error(f"Error processing image: {e}", exc_info=True)
return {
'error': str(e),
'texto': '',
'monto': 0,
'confianza': 0
}
def _extract_text(self, image_bytes: bytes) -> str:
"""
Extract text from image bytes using Tesseract.
"""
try:
# Load image
img = Image.open(BytesIO(image_bytes))
# Convert to RGB if necessary
if img.mode != 'RGB' and img.mode != 'L':
img = img.convert('RGB')
# Run OCR
texto = pytesseract.image_to_string(img, config=self.tesseract_config)
# Clean up text
texto = self._clean_text(texto)
return texto
except Exception as e:
logger.error(f"Error extracting text: {e}")
return ''
def _clean_text(self, texto: str) -> str:
"""
Clean up OCR output text.
"""
if not texto:
return ''
# Remove excessive whitespace
import re
texto = re.sub(r'\s+', ' ', texto)
texto = re.sub(r'\n\s*\n', '\n', texto)
# Fix common OCR errors
replacements = {
'|': 'l',
'0': 'O', # Only in certain contexts
'1': 'I', # Only in certain contexts
'S': '$', # Only at start of amounts
}
# Apply selective replacements
# (Being careful not to corrupt actual numbers)
return texto.strip()
def _calculate_overall_confidence(
self,
texto: str,
monto: float,
monto_confianza: float,
cliente: Optional[str],
fecha: Optional[str]
) -> float:
"""
Calculate overall extraction confidence.
"""
confidence = 0.0
# Text quality (based on length and structure)
if len(texto) > 50:
confidence += 0.2
if len(texto) > 200:
confidence += 0.1
# Amount detection confidence
confidence += monto_confianza * 0.4
# Bonus for finding additional data
if cliente:
confidence += 0.1
if fecha:
confidence += 0.1
# Check for typical receipt keywords
keywords = ['total', 'cliente', 'fecha', 'ticket', 'venta', 'pago']
found_keywords = sum(1 for kw in keywords if kw in texto.lower())
confidence += min(found_keywords * 0.05, 0.2)
return min(confidence, 1.0)
def process_multiple(self, images: list) -> Dict:
"""
Process multiple images (e.g., multi-page receipt).
Combines results from all images.
Args:
images: List of image bytes
Returns:
Combined results
"""
all_texto = []
total_monto = 0
cliente = None
fecha = None
tubos = 0
formato = None
max_confianza = 0
for img_bytes in images:
result = self.process(img_bytes)
if result.get('texto'):
all_texto.append(result['texto'])
if result.get('monto', 0) > total_monto:
total_monto = result['monto']
if not cliente and result.get('cliente'):
cliente = result['cliente']
if not fecha and result.get('fecha'):
fecha = result['fecha']
tubos += result.get('tubos', 0)
if not formato and result.get('formato'):
formato = result['formato']
if result.get('confianza', 0) > max_confianza:
max_confianza = result['confianza']
return {
'texto': '\n---\n'.join(all_texto),
'monto': total_monto,
'cliente': cliente,
'fecha': fecha,
'tubos': tubos,
'formato': formato,
'confianza': max_confianza,
'paginas': len(images)
}
def procesar_ticket_imagen(image_bytes: bytes) -> Dict:
"""
Convenience function to process a ticket image.
Args:
image_bytes: Raw image bytes
Returns:
Dict with extracted data
"""
processor = OCRProcessor()
return processor.process(image_bytes)
def procesar_multiples_imagenes(images: list) -> Dict:
"""
Convenience function to process multiple images.
Args:
images: List of image bytes
Returns:
Combined results
"""
processor = OCRProcessor()
return processor.process_multiple(images)