Files
Consultoría AS e32885afc5 fix: Fix YAML syntax errors and validator prompt formatting
- Fix YAML files with unquoted strings containing quotes
- Export singleton instances in ai/__init__.py
- Fix validator scoring prompt to use replace() instead of format()
  to avoid conflicts with JSON curly braces

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-28 21:13:58 +00:00

477 lines
15 KiB
Python

"""
ContentValidator - Validación y scoring de contenido con IA.
Este módulo maneja:
- Validaciones obligatorias (pass/fail)
- Scoring de calidad con IA
- Decisiones de regeneración
- Marcado de top performers
"""
import json
import re
from pathlib import Path
from typing import Dict, Any, Optional, Tuple, List
from dataclasses import dataclass
from openai import OpenAI
import yaml
from app.core.config import settings
from app.services.ai.platform_adapter import platform_adapter
@dataclass
class ValidationResult:
"""Resultado de validación."""
passed: bool
issues: List[Dict[str, Any]]
content: str
@dataclass
class ScoringResult:
"""Resultado de scoring."""
total_score: int
breakdown: Dict[str, int]
feedback: str
is_top_performer: bool
action: str # "accept", "regenerate", "reject"
@dataclass
class ContentQualityResult:
"""Resultado completo de validación y scoring."""
validation: ValidationResult
scoring: Optional[ScoringResult]
final_decision: str # "accept", "regenerate", "reject"
content: str
class ContentValidator:
"""
Validador de contenido generado.
Combina validaciones basadas en reglas (rápidas, sin costo)
con scoring usando IA (más preciso, con costo de tokens).
"""
def __init__(self, config_path: Optional[str] = None):
"""
Inicializar el validador.
Args:
config_path: Ruta al archivo quality.yaml
"""
self._client = None
self.model = "deepseek-chat"
# Cargar configuración
if config_path:
self.config_path = Path(config_path)
else:
base_dir = Path(__file__).parent.parent.parent
self.config_path = base_dir / "config" / "quality.yaml"
self.config = self._load_config()
def _load_config(self) -> Dict:
"""Cargar configuración de quality.yaml."""
if self.config_path.exists():
with open(self.config_path, "r", encoding="utf-8") as f:
return yaml.safe_load(f)
# Config por defecto si no existe el archivo
return {
"thresholds": {
"minimum_score": 60,
"excellent_score": 85,
},
"regeneration": {
"max_attempts": 2,
},
"validations": {
"prohibited_content": {
"prohibited_words": [],
"prohibited_patterns": [],
}
}
}
@property
def client(self) -> OpenAI:
"""Lazy initialization del cliente."""
if self._client is None:
if not settings.DEEPSEEK_API_KEY:
raise ValueError("DEEPSEEK_API_KEY no configurada")
self._client = OpenAI(
api_key=settings.DEEPSEEK_API_KEY,
base_url=settings.DEEPSEEK_BASE_URL
)
return self._client
# === Validaciones (Pass/Fail) ===
def validate(
self,
content: str,
platform: str,
expected_language: str = "es"
) -> ValidationResult:
"""
Ejecutar validaciones obligatorias.
Args:
content: Contenido a validar
platform: Plataforma destino
expected_language: Idioma esperado
Returns:
ValidationResult con resultado de validaciones
"""
issues = []
# 1. Validar longitud
length_result = self._validate_length(content, platform)
if not length_result["passed"]:
issues.append(length_result)
# 2. Validar contenido prohibido
prohibited_result = self._validate_prohibited_content(content)
if not prohibited_result["passed"]:
issues.append(prohibited_result)
# 3. Validar formato
format_result = self._validate_format(content)
if not format_result["passed"]:
issues.append(format_result)
# 4. Validar que no esté vacío o muy corto
if len(content.strip()) < 20:
issues.append({
"type": "empty_content",
"message": "Contenido demasiado corto",
"severity": "error",
"passed": False
})
passed = all(i.get("severity") != "error" for i in issues)
return ValidationResult(
passed=passed,
issues=issues,
content=content
)
def _validate_length(self, content: str, platform: str) -> Dict:
"""Validar longitud contra límites de plataforma."""
limits = platform_adapter.get_limits(platform)
max_chars = limits.get("max_characters", 2000)
if len(content) > max_chars:
return {
"type": "length",
"message": f"Contenido excede límite: {len(content)}/{max_chars}",
"severity": "error",
"passed": False,
"current": len(content),
"max": max_chars
}
return {"type": "length", "passed": True}
def _validate_prohibited_content(self, content: str) -> Dict:
"""Validar que no contenga palabras/patrones prohibidos."""
validations = self.config.get("validations", {})
prohibited = validations.get("prohibited_content", {})
content_lower = content.lower()
# Verificar palabras prohibidas
prohibited_words = prohibited.get("prohibited_words", [])
for word in prohibited_words:
if word.lower() in content_lower:
return {
"type": "prohibited_content",
"message": f"Contenido contiene palabra prohibida: {word}",
"severity": "error",
"passed": False,
"word": word
}
# Verificar patrones prohibidos
prohibited_patterns = prohibited.get("prohibited_patterns", [])
for pattern in prohibited_patterns:
if re.search(pattern, content_lower):
return {
"type": "prohibited_pattern",
"message": f"Contenido coincide con patrón prohibido",
"severity": "error",
"passed": False,
"pattern": pattern
}
return {"type": "prohibited_content", "passed": True}
def _validate_format(self, content: str) -> Dict:
"""Validar formato del contenido."""
issues = []
# Verificar que no esté truncado (terminando en medio de palabra)
if content and not content[-1] in ".!?\"')#\n":
# Podría estar truncado
last_word = content.split()[-1] if content.split() else ""
if len(last_word) > 15: # Palabra muy larga al final = truncado
issues.append("Posiblemente truncado")
# Verificar encoding (caracteres extraños)
try:
content.encode("utf-8").decode("utf-8")
except Exception:
issues.append("Problemas de encoding")
if issues:
return {
"type": "format",
"message": "; ".join(issues),
"severity": "warning",
"passed": True # Warning, no error
}
return {"type": "format", "passed": True}
# === Scoring con IA ===
async def score(
self,
content: str,
platform: str
) -> ScoringResult:
"""
Evaluar calidad del contenido usando IA.
Args:
content: Contenido a evaluar
platform: Plataforma
Returns:
ScoringResult con score y breakdown
"""
# Obtener prompt de scoring del config
scoring_prompt = self.config.get("scoring_prompt", "")
if not scoring_prompt:
scoring_prompt = self._default_scoring_prompt()
# Renderizar prompt usando replace para evitar problemas con JSON
prompt = scoring_prompt.replace("{content}", content).replace("{platform}", platform)
# Llamar a DeepSeek
response = self.client.chat.completions.create(
model=self.model,
messages=[
{
"role": "system",
"content": "Eres un evaluador de contenido para redes sociales. "
"Evalúa de forma objetiva y estricta. "
"Responde SOLO en JSON válido."
},
{"role": "user", "content": prompt}
],
max_tokens=300,
temperature=0.3 # Bajo para consistencia
)
response_text = response.choices[0].message.content.strip()
# Parsear respuesta JSON
try:
# Limpiar respuesta si tiene markdown
if "```json" in response_text:
response_text = response_text.split("```json")[1].split("```")[0]
elif "```" in response_text:
response_text = response_text.split("```")[1].split("```")[0]
result = json.loads(response_text)
except json.JSONDecodeError:
# Si falla el parsing, intentar extraer números
result = self._extract_score_from_text(response_text)
total_score = result.get("total", 50)
breakdown = result.get("breakdown", {})
feedback = result.get("feedback", "")
# Determinar acción
thresholds = self.config.get("thresholds", {})
min_score = thresholds.get("minimum_score", 60)
excellent_score = thresholds.get("excellent_score", 85)
if total_score < 40:
action = "reject"
elif total_score < min_score:
action = "regenerate"
else:
action = "accept"
is_top = total_score >= excellent_score
return ScoringResult(
total_score=total_score,
breakdown=breakdown,
feedback=feedback,
is_top_performer=is_top,
action=action
)
def _default_scoring_prompt(self) -> str:
"""Prompt por defecto para scoring."""
return """Evalúa este post para {platform} en escala 0-100.
POST:
{content}
CRITERIOS (suma = 100):
- Hook (0-25): ¿La primera línea captura atención?
- Claridad (0-20): ¿Se entiende fácilmente?
- Accionabilidad (0-20): ¿Qué puede hacer el lector?
- Originalidad (0-15): ¿Evita clichés?
- Voz de marca (0-10): ¿Profesional pero cercano?
- CTA (0-10): ¿CTA claro si aplica?
RESPONDE EN JSON:
{"total": N, "breakdown": {"hook_strength": N, "clarity": N, "actionability": N, "originality": N, "brand_voice": N, "cta_effectiveness": N}, "feedback": "sugerencia"}"""
def _extract_score_from_text(self, text: str) -> Dict:
"""Extraer score de texto si falla JSON parsing."""
# Buscar patrones como "total: 75" o "score: 75"
import re
total_match = re.search(r"total[:\s]+(\d+)", text.lower())
total = int(total_match.group(1)) if total_match else 50
return {
"total": min(100, max(0, total)),
"breakdown": {},
"feedback": "No se pudo parsear respuesta completa"
}
# === Evaluación Completa ===
async def evaluate(
self,
content: str,
platform: str,
skip_scoring: bool = False
) -> ContentQualityResult:
"""
Evaluación completa: validación + scoring.
Args:
content: Contenido a evaluar
platform: Plataforma
skip_scoring: Si omitir scoring (solo validación)
Returns:
ContentQualityResult con resultado completo
"""
# 1. Validaciones obligatorias
validation = self.validate(content, platform)
# Si falla validación, no hace falta scoring
if not validation.passed:
return ContentQualityResult(
validation=validation,
scoring=None,
final_decision="reject",
content=content
)
# 2. Scoring con IA (si no se omite)
scoring = None
if not skip_scoring:
scoring = await self.score(content, platform)
# 3. Decisión final
if scoring:
final_decision = scoring.action
else:
final_decision = "accept" # Sin scoring, aceptar si pasó validación
return ContentQualityResult(
validation=validation,
scoring=scoring,
final_decision=final_decision,
content=content
)
# === Utilidades ===
def should_regenerate(
self,
quality_result: ContentQualityResult,
attempt: int = 1
) -> bool:
"""
Determinar si se debe regenerar el contenido.
Args:
quality_result: Resultado de evaluación
attempt: Número de intento actual
Returns:
True si se debe regenerar
"""
max_attempts = self.config.get("regeneration", {}).get("max_attempts", 2)
if attempt >= max_attempts:
return False
return quality_result.final_decision == "regenerate"
def get_regeneration_hints(
self,
quality_result: ContentQualityResult
) -> str:
"""
Obtener hints para mejorar en la regeneración.
Args:
quality_result: Resultado de evaluación
Returns:
String con instrucciones para mejorar
"""
hints = []
# Hints de validación
for issue in quality_result.validation.issues:
if issue.get("type") == "length":
hints.append(f"Reducir longitud a máximo {issue.get('max')} caracteres")
elif issue.get("type") == "prohibited_content":
hints.append(f"Evitar: {issue.get('word', 'contenido prohibido')}")
# Hints de scoring
if quality_result.scoring:
if quality_result.scoring.feedback:
hints.append(quality_result.scoring.feedback)
# Identificar áreas débiles
breakdown = quality_result.scoring.breakdown
if breakdown:
weak_areas = []
if breakdown.get("hook_strength", 25) < 15:
weak_areas.append("mejorar el hook inicial")
if breakdown.get("clarity", 20) < 12:
weak_areas.append("hacer el mensaje más claro")
if breakdown.get("actionability", 20) < 12:
weak_areas.append("hacerlo más accionable")
if weak_areas:
hints.append("Enfocarse en: " + ", ".join(weak_areas))
if hints:
return "\n\nPARA MEJORAR:\n- " + "\n- ".join(hints)
return ""
# Instancia global
content_validator = ContentValidator()