Major improvements to AI content generation: ## New Components (app/services/ai/) - PromptLibrary: YAML-based prompt templates with inheritance - ContextEngine: Anti-repetition and best performers tracking - ContentGeneratorV2: Enhanced generation with dynamic parameters - PlatformAdapter: Platform-specific content adaptation - ContentValidator: AI-powered quality scoring (0-100) ## Prompt Library (app/prompts/) - 3 personalities: default, educational, promotional - 5 templates: tip_tech, product_post, service_post, thread, response - 4 platform configs: x, threads, instagram, facebook - Few-shot examples by category: ia, productividad, seguridad ## Database Changes - New table: content_memory (tracks generated content) - New columns in posts: quality_score, score_breakdown, generation_attempts ## New API Endpoints (/api/v2/generate/) - POST /generate - Generation with quality check - POST /generate/batch - Batch generation - POST /quality/evaluate - Evaluate content quality - GET /templates, /personalities, /platforms - List configs ## Celery Tasks - update_engagement_scores (every 6h) - cleanup_old_memory (monthly) - refresh_best_posts_yaml (weekly) ## Tests - Comprehensive tests for all AI engine components Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
480 lines
15 KiB
Python
480 lines
15 KiB
Python
"""
|
|
ContentValidator - Validación y scoring de contenido con IA.
|
|
|
|
Este módulo maneja:
|
|
- Validaciones obligatorias (pass/fail)
|
|
- Scoring de calidad con IA
|
|
- Decisiones de regeneración
|
|
- Marcado de top performers
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional, Tuple, List
|
|
from dataclasses import dataclass
|
|
from openai import OpenAI
|
|
import yaml
|
|
|
|
from app.core.config import settings
|
|
from app.services.ai.platform_adapter import platform_adapter
|
|
|
|
|
|
@dataclass
|
|
class ValidationResult:
|
|
"""Resultado de validación."""
|
|
passed: bool
|
|
issues: List[Dict[str, Any]]
|
|
content: str
|
|
|
|
|
|
@dataclass
|
|
class ScoringResult:
|
|
"""Resultado de scoring."""
|
|
total_score: int
|
|
breakdown: Dict[str, int]
|
|
feedback: str
|
|
is_top_performer: bool
|
|
action: str # "accept", "regenerate", "reject"
|
|
|
|
|
|
@dataclass
|
|
class ContentQualityResult:
|
|
"""Resultado completo de validación y scoring."""
|
|
validation: ValidationResult
|
|
scoring: Optional[ScoringResult]
|
|
final_decision: str # "accept", "regenerate", "reject"
|
|
content: str
|
|
|
|
|
|
class ContentValidator:
|
|
"""
|
|
Validador de contenido generado.
|
|
|
|
Combina validaciones basadas en reglas (rápidas, sin costo)
|
|
con scoring usando IA (más preciso, con costo de tokens).
|
|
"""
|
|
|
|
def __init__(self, config_path: Optional[str] = None):
|
|
"""
|
|
Inicializar el validador.
|
|
|
|
Args:
|
|
config_path: Ruta al archivo quality.yaml
|
|
"""
|
|
self._client = None
|
|
self.model = "deepseek-chat"
|
|
|
|
# Cargar configuración
|
|
if config_path:
|
|
self.config_path = Path(config_path)
|
|
else:
|
|
base_dir = Path(__file__).parent.parent.parent
|
|
self.config_path = base_dir / "config" / "quality.yaml"
|
|
|
|
self.config = self._load_config()
|
|
|
|
def _load_config(self) -> Dict:
|
|
"""Cargar configuración de quality.yaml."""
|
|
if self.config_path.exists():
|
|
with open(self.config_path, "r", encoding="utf-8") as f:
|
|
return yaml.safe_load(f)
|
|
|
|
# Config por defecto si no existe el archivo
|
|
return {
|
|
"thresholds": {
|
|
"minimum_score": 60,
|
|
"excellent_score": 85,
|
|
},
|
|
"regeneration": {
|
|
"max_attempts": 2,
|
|
},
|
|
"validations": {
|
|
"prohibited_content": {
|
|
"prohibited_words": [],
|
|
"prohibited_patterns": [],
|
|
}
|
|
}
|
|
}
|
|
|
|
@property
|
|
def client(self) -> OpenAI:
|
|
"""Lazy initialization del cliente."""
|
|
if self._client is None:
|
|
if not settings.DEEPSEEK_API_KEY:
|
|
raise ValueError("DEEPSEEK_API_KEY no configurada")
|
|
self._client = OpenAI(
|
|
api_key=settings.DEEPSEEK_API_KEY,
|
|
base_url=settings.DEEPSEEK_BASE_URL
|
|
)
|
|
return self._client
|
|
|
|
# === Validaciones (Pass/Fail) ===
|
|
|
|
def validate(
|
|
self,
|
|
content: str,
|
|
platform: str,
|
|
expected_language: str = "es"
|
|
) -> ValidationResult:
|
|
"""
|
|
Ejecutar validaciones obligatorias.
|
|
|
|
Args:
|
|
content: Contenido a validar
|
|
platform: Plataforma destino
|
|
expected_language: Idioma esperado
|
|
|
|
Returns:
|
|
ValidationResult con resultado de validaciones
|
|
"""
|
|
issues = []
|
|
|
|
# 1. Validar longitud
|
|
length_result = self._validate_length(content, platform)
|
|
if not length_result["passed"]:
|
|
issues.append(length_result)
|
|
|
|
# 2. Validar contenido prohibido
|
|
prohibited_result = self._validate_prohibited_content(content)
|
|
if not prohibited_result["passed"]:
|
|
issues.append(prohibited_result)
|
|
|
|
# 3. Validar formato
|
|
format_result = self._validate_format(content)
|
|
if not format_result["passed"]:
|
|
issues.append(format_result)
|
|
|
|
# 4. Validar que no esté vacío o muy corto
|
|
if len(content.strip()) < 20:
|
|
issues.append({
|
|
"type": "empty_content",
|
|
"message": "Contenido demasiado corto",
|
|
"severity": "error",
|
|
"passed": False
|
|
})
|
|
|
|
passed = all(i.get("severity") != "error" for i in issues)
|
|
|
|
return ValidationResult(
|
|
passed=passed,
|
|
issues=issues,
|
|
content=content
|
|
)
|
|
|
|
def _validate_length(self, content: str, platform: str) -> Dict:
|
|
"""Validar longitud contra límites de plataforma."""
|
|
limits = platform_adapter.get_limits(platform)
|
|
max_chars = limits.get("max_characters", 2000)
|
|
|
|
if len(content) > max_chars:
|
|
return {
|
|
"type": "length",
|
|
"message": f"Contenido excede límite: {len(content)}/{max_chars}",
|
|
"severity": "error",
|
|
"passed": False,
|
|
"current": len(content),
|
|
"max": max_chars
|
|
}
|
|
|
|
return {"type": "length", "passed": True}
|
|
|
|
def _validate_prohibited_content(self, content: str) -> Dict:
|
|
"""Validar que no contenga palabras/patrones prohibidos."""
|
|
validations = self.config.get("validations", {})
|
|
prohibited = validations.get("prohibited_content", {})
|
|
|
|
content_lower = content.lower()
|
|
|
|
# Verificar palabras prohibidas
|
|
prohibited_words = prohibited.get("prohibited_words", [])
|
|
for word in prohibited_words:
|
|
if word.lower() in content_lower:
|
|
return {
|
|
"type": "prohibited_content",
|
|
"message": f"Contenido contiene palabra prohibida: {word}",
|
|
"severity": "error",
|
|
"passed": False,
|
|
"word": word
|
|
}
|
|
|
|
# Verificar patrones prohibidos
|
|
prohibited_patterns = prohibited.get("prohibited_patterns", [])
|
|
for pattern in prohibited_patterns:
|
|
if re.search(pattern, content_lower):
|
|
return {
|
|
"type": "prohibited_pattern",
|
|
"message": f"Contenido coincide con patrón prohibido",
|
|
"severity": "error",
|
|
"passed": False,
|
|
"pattern": pattern
|
|
}
|
|
|
|
return {"type": "prohibited_content", "passed": True}
|
|
|
|
def _validate_format(self, content: str) -> Dict:
|
|
"""Validar formato del contenido."""
|
|
issues = []
|
|
|
|
# Verificar que no esté truncado (terminando en medio de palabra)
|
|
if content and not content[-1] in ".!?\"')#\n":
|
|
# Podría estar truncado
|
|
last_word = content.split()[-1] if content.split() else ""
|
|
if len(last_word) > 15: # Palabra muy larga al final = truncado
|
|
issues.append("Posiblemente truncado")
|
|
|
|
# Verificar encoding (caracteres extraños)
|
|
try:
|
|
content.encode("utf-8").decode("utf-8")
|
|
except Exception:
|
|
issues.append("Problemas de encoding")
|
|
|
|
if issues:
|
|
return {
|
|
"type": "format",
|
|
"message": "; ".join(issues),
|
|
"severity": "warning",
|
|
"passed": True # Warning, no error
|
|
}
|
|
|
|
return {"type": "format", "passed": True}
|
|
|
|
# === Scoring con IA ===
|
|
|
|
async def score(
|
|
self,
|
|
content: str,
|
|
platform: str
|
|
) -> ScoringResult:
|
|
"""
|
|
Evaluar calidad del contenido usando IA.
|
|
|
|
Args:
|
|
content: Contenido a evaluar
|
|
platform: Plataforma
|
|
|
|
Returns:
|
|
ScoringResult con score y breakdown
|
|
"""
|
|
# Obtener prompt de scoring del config
|
|
scoring_prompt = self.config.get("scoring_prompt", "")
|
|
if not scoring_prompt:
|
|
scoring_prompt = self._default_scoring_prompt()
|
|
|
|
# Renderizar prompt
|
|
prompt = scoring_prompt.format(
|
|
content=content,
|
|
platform=platform
|
|
)
|
|
|
|
# Llamar a DeepSeek
|
|
response = self.client.chat.completions.create(
|
|
model=self.model,
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "Eres un evaluador de contenido para redes sociales. "
|
|
"Evalúa de forma objetiva y estricta. "
|
|
"Responde SOLO en JSON válido."
|
|
},
|
|
{"role": "user", "content": prompt}
|
|
],
|
|
max_tokens=300,
|
|
temperature=0.3 # Bajo para consistencia
|
|
)
|
|
|
|
response_text = response.choices[0].message.content.strip()
|
|
|
|
# Parsear respuesta JSON
|
|
try:
|
|
# Limpiar respuesta si tiene markdown
|
|
if "```json" in response_text:
|
|
response_text = response_text.split("```json")[1].split("```")[0]
|
|
elif "```" in response_text:
|
|
response_text = response_text.split("```")[1].split("```")[0]
|
|
|
|
result = json.loads(response_text)
|
|
except json.JSONDecodeError:
|
|
# Si falla el parsing, intentar extraer números
|
|
result = self._extract_score_from_text(response_text)
|
|
|
|
total_score = result.get("total", 50)
|
|
breakdown = result.get("breakdown", {})
|
|
feedback = result.get("feedback", "")
|
|
|
|
# Determinar acción
|
|
thresholds = self.config.get("thresholds", {})
|
|
min_score = thresholds.get("minimum_score", 60)
|
|
excellent_score = thresholds.get("excellent_score", 85)
|
|
|
|
if total_score < 40:
|
|
action = "reject"
|
|
elif total_score < min_score:
|
|
action = "regenerate"
|
|
else:
|
|
action = "accept"
|
|
|
|
is_top = total_score >= excellent_score
|
|
|
|
return ScoringResult(
|
|
total_score=total_score,
|
|
breakdown=breakdown,
|
|
feedback=feedback,
|
|
is_top_performer=is_top,
|
|
action=action
|
|
)
|
|
|
|
def _default_scoring_prompt(self) -> str:
|
|
"""Prompt por defecto para scoring."""
|
|
return """Evalúa este post para {platform} en escala 0-100.
|
|
|
|
POST:
|
|
{content}
|
|
|
|
CRITERIOS (suma = 100):
|
|
- Hook (0-25): ¿La primera línea captura atención?
|
|
- Claridad (0-20): ¿Se entiende fácilmente?
|
|
- Accionabilidad (0-20): ¿Qué puede hacer el lector?
|
|
- Originalidad (0-15): ¿Evita clichés?
|
|
- Voz de marca (0-10): ¿Profesional pero cercano?
|
|
- CTA (0-10): ¿CTA claro si aplica?
|
|
|
|
RESPONDE EN JSON:
|
|
{{"total": N, "breakdown": {{"hook_strength": N, "clarity": N, "actionability": N, "originality": N, "brand_voice": N, "cta_effectiveness": N}}, "feedback": "sugerencia"}}"""
|
|
|
|
def _extract_score_from_text(self, text: str) -> Dict:
|
|
"""Extraer score de texto si falla JSON parsing."""
|
|
# Buscar patrones como "total: 75" o "score: 75"
|
|
import re
|
|
|
|
total_match = re.search(r"total[:\s]+(\d+)", text.lower())
|
|
total = int(total_match.group(1)) if total_match else 50
|
|
|
|
return {
|
|
"total": min(100, max(0, total)),
|
|
"breakdown": {},
|
|
"feedback": "No se pudo parsear respuesta completa"
|
|
}
|
|
|
|
# === Evaluación Completa ===
|
|
|
|
async def evaluate(
|
|
self,
|
|
content: str,
|
|
platform: str,
|
|
skip_scoring: bool = False
|
|
) -> ContentQualityResult:
|
|
"""
|
|
Evaluación completa: validación + scoring.
|
|
|
|
Args:
|
|
content: Contenido a evaluar
|
|
platform: Plataforma
|
|
skip_scoring: Si omitir scoring (solo validación)
|
|
|
|
Returns:
|
|
ContentQualityResult con resultado completo
|
|
"""
|
|
# 1. Validaciones obligatorias
|
|
validation = self.validate(content, platform)
|
|
|
|
# Si falla validación, no hace falta scoring
|
|
if not validation.passed:
|
|
return ContentQualityResult(
|
|
validation=validation,
|
|
scoring=None,
|
|
final_decision="reject",
|
|
content=content
|
|
)
|
|
|
|
# 2. Scoring con IA (si no se omite)
|
|
scoring = None
|
|
if not skip_scoring:
|
|
scoring = await self.score(content, platform)
|
|
|
|
# 3. Decisión final
|
|
if scoring:
|
|
final_decision = scoring.action
|
|
else:
|
|
final_decision = "accept" # Sin scoring, aceptar si pasó validación
|
|
|
|
return ContentQualityResult(
|
|
validation=validation,
|
|
scoring=scoring,
|
|
final_decision=final_decision,
|
|
content=content
|
|
)
|
|
|
|
# === Utilidades ===
|
|
|
|
def should_regenerate(
|
|
self,
|
|
quality_result: ContentQualityResult,
|
|
attempt: int = 1
|
|
) -> bool:
|
|
"""
|
|
Determinar si se debe regenerar el contenido.
|
|
|
|
Args:
|
|
quality_result: Resultado de evaluación
|
|
attempt: Número de intento actual
|
|
|
|
Returns:
|
|
True si se debe regenerar
|
|
"""
|
|
max_attempts = self.config.get("regeneration", {}).get("max_attempts", 2)
|
|
|
|
if attempt >= max_attempts:
|
|
return False
|
|
|
|
return quality_result.final_decision == "regenerate"
|
|
|
|
def get_regeneration_hints(
|
|
self,
|
|
quality_result: ContentQualityResult
|
|
) -> str:
|
|
"""
|
|
Obtener hints para mejorar en la regeneración.
|
|
|
|
Args:
|
|
quality_result: Resultado de evaluación
|
|
|
|
Returns:
|
|
String con instrucciones para mejorar
|
|
"""
|
|
hints = []
|
|
|
|
# Hints de validación
|
|
for issue in quality_result.validation.issues:
|
|
if issue.get("type") == "length":
|
|
hints.append(f"Reducir longitud a máximo {issue.get('max')} caracteres")
|
|
elif issue.get("type") == "prohibited_content":
|
|
hints.append(f"Evitar: {issue.get('word', 'contenido prohibido')}")
|
|
|
|
# Hints de scoring
|
|
if quality_result.scoring:
|
|
if quality_result.scoring.feedback:
|
|
hints.append(quality_result.scoring.feedback)
|
|
|
|
# Identificar áreas débiles
|
|
breakdown = quality_result.scoring.breakdown
|
|
if breakdown:
|
|
weak_areas = []
|
|
if breakdown.get("hook_strength", 25) < 15:
|
|
weak_areas.append("mejorar el hook inicial")
|
|
if breakdown.get("clarity", 20) < 12:
|
|
weak_areas.append("hacer el mensaje más claro")
|
|
if breakdown.get("actionability", 20) < 12:
|
|
weak_areas.append("hacerlo más accionable")
|
|
|
|
if weak_areas:
|
|
hints.append("Enfocarse en: " + ", ".join(weak_areas))
|
|
|
|
if hints:
|
|
return "\n\nPARA MEJORAR:\n- " + "\n- ".join(hints)
|
|
return ""
|
|
|
|
|
|
# Instancia global
|
|
content_validator = ContentValidator()
|