feat: Add Content Generation Engine v2 with quality scoring

Major improvements to AI content generation:

## New Components (app/services/ai/)
- PromptLibrary: YAML-based prompt templates with inheritance
- ContextEngine: Anti-repetition and best performers tracking
- ContentGeneratorV2: Enhanced generation with dynamic parameters
- PlatformAdapter: Platform-specific content adaptation
- ContentValidator: AI-powered quality scoring (0-100)

## Prompt Library (app/prompts/)
- 3 personalities: default, educational, promotional
- 5 templates: tip_tech, product_post, service_post, thread, response
- 4 platform configs: x, threads, instagram, facebook
- Few-shot examples by category: ia, productividad, seguridad

## Database Changes
- New table: content_memory (tracks generated content)
- New columns in posts: quality_score, score_breakdown, generation_attempts

## New API Endpoints (/api/v2/generate/)
- POST /generate - Generation with quality check
- POST /generate/batch - Batch generation
- POST /quality/evaluate - Evaluate content quality
- GET /templates, /personalities, /platforms - List configs

## Celery Tasks
- update_engagement_scores (every 6h)
- cleanup_old_memory (monthly)
- refresh_best_posts_yaml (weekly)

## Tests
- Comprehensive tests for all AI engine components

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-28 20:55:28 +00:00
parent f458f809ca
commit 11b0ba46fa
36 changed files with 6266 additions and 55 deletions

View File

@@ -16,7 +16,8 @@ celery_app = Celery(
"worker.tasks.generate_content",
"worker.tasks.publish_post",
"worker.tasks.fetch_interactions",
"worker.tasks.cleanup"
"worker.tasks.cleanup",
"worker.tasks.content_memory"
]
)
@@ -58,4 +59,22 @@ celery_app.conf.beat_schedule = {
"task": "worker.tasks.cleanup.daily_cleanup",
"schedule": crontab(hour=3, minute=0),
},
# Actualizar engagement scores cada 6 horas
"update-engagement-scores": {
"task": "worker.tasks.content_memory.update_engagement_scores",
"schedule": crontab(hour="*/6", minute=15),
},
# Limpiar memoria antigua mensualmente
"cleanup-old-memory": {
"task": "worker.tasks.content_memory.cleanup_old_memory",
"schedule": crontab(day_of_month=1, hour=4, minute=0),
},
# Actualizar best_posts.yaml semanalmente
"refresh-best-posts": {
"task": "worker.tasks.content_memory.refresh_best_posts_yaml",
"schedule": crontab(day_of_week=0, hour=5, minute=0), # Domingos 5 AM
},
}

View File

@@ -0,0 +1,342 @@
"""
Tareas de gestión de memoria de contenido.
Incluye:
- Actualización periódica de engagement scores
- Marcado de top performers
- Sincronización de métricas desde posts
- Limpieza de memoria antigua
"""
import logging
from datetime import datetime, timedelta
from typing import Optional
from worker.celery_app import celery_app
from app.core.database import SessionLocal
from app.models.post import Post
from app.models.post_metrics import PostMetrics
from app.models.content_memory import ContentMemory
logger = logging.getLogger(__name__)
@celery_app.task(name="worker.tasks.content_memory.update_engagement_scores")
def update_engagement_scores():
"""
Actualizar scores de engagement basado en métricas reales.
Esta tarea:
1. Obtiene métricas recientes de posts
2. Calcula engagement score normalizado
3. Actualiza ContentMemory
4. Marca top performers
Se ejecuta cada 6 horas.
"""
db = SessionLocal()
try:
updated_count = 0
new_top_performers = 0
# Obtener posts con métricas que tengan ContentMemory
memories = db.query(ContentMemory).filter(
ContentMemory.created_at >= datetime.utcnow() - timedelta(days=90)
).all()
for memory in memories:
# Obtener el post asociado
post = db.query(Post).filter(Post.id == memory.post_id).first()
if not post or not post.metrics:
continue
# Calcular engagement score
metrics = post.metrics
old_score = memory.engagement_score
memory.update_engagement(metrics)
updated_count += 1
# Logging si cambió significativamente
if old_score and abs((memory.engagement_score or 0) - old_score) > 10:
logger.info(
f"Post {post.id} engagement cambió: {old_score:.1f} -> {memory.engagement_score:.1f}"
)
db.commit()
# Recalcular top performers
new_top_performers = _recalculate_top_performers(db)
logger.info(
f"Engagement actualizado: {updated_count} posts, "
f"{new_top_performers} nuevos top performers"
)
return {
"updated": updated_count,
"new_top_performers": new_top_performers
}
except Exception as e:
logger.error(f"Error actualizando engagement: {e}")
db.rollback()
raise
finally:
db.close()
def _recalculate_top_performers(db, top_percentile: int = 20) -> int:
"""
Recalcular qué posts son top performers.
Args:
db: Sesión de DB
top_percentile: Percentil para considerar top performer
Returns:
Número de nuevos top performers marcados
"""
# Obtener todos los scores
all_scores = db.query(ContentMemory.engagement_score).filter(
ContentMemory.engagement_score.isnot(None)
).all()
if not all_scores:
return 0
scores = sorted([s[0] for s in all_scores], reverse=True)
# Calcular umbral
threshold_idx = max(0, int(len(scores) * top_percentile / 100) - 1)
threshold_score = scores[threshold_idx]
# Marcar nuevos top performers
new_tops = db.query(ContentMemory).filter(
ContentMemory.engagement_score >= threshold_score,
ContentMemory.is_top_performer == False
).update({"is_top_performer": True})
# Desmarcar los que ya no califican
db.query(ContentMemory).filter(
ContentMemory.engagement_score < threshold_score,
ContentMemory.is_top_performer == True
).update({"is_top_performer": False})
db.commit()
return new_tops
@celery_app.task(name="worker.tasks.content_memory.sync_post_metrics")
def sync_post_metrics(post_id: int):
"""
Sincronizar métricas de un post específico a ContentMemory.
Se llama después de obtener nuevas métricas.
Args:
post_id: ID del post
"""
db = SessionLocal()
try:
post = db.query(Post).filter(Post.id == post_id).first()
if not post or not post.metrics:
return {"status": "skipped", "reason": "no_metrics"}
memory = db.query(ContentMemory).filter(
ContentMemory.post_id == post_id
).first()
if not memory:
return {"status": "skipped", "reason": "no_memory"}
memory.update_engagement(post.metrics)
db.commit()
return {
"status": "updated",
"post_id": post_id,
"engagement_score": memory.engagement_score
}
finally:
db.close()
@celery_app.task(name="worker.tasks.content_memory.analyze_and_save_content")
def analyze_and_save_content(
post_id: int,
content: str,
content_type: str,
platform: str,
quality_score: Optional[int] = None,
quality_breakdown: Optional[dict] = None,
template_used: Optional[str] = None,
personality_used: Optional[str] = None
):
"""
Analizar contenido generado y guardarlo en memoria.
Se llama después de generar un nuevo post.
Args:
post_id: ID del post creado
content: Contenido generado
content_type: Tipo de contenido
platform: Plataforma principal
quality_score: Score de calidad (si se evaluó)
quality_breakdown: Breakdown del score
template_used: Template usado
personality_used: Personalidad usada
"""
db = SessionLocal()
try:
# Verificar que no exista ya
existing = db.query(ContentMemory).filter(
ContentMemory.post_id == post_id
).first()
if existing:
return {"status": "skipped", "reason": "already_exists"}
# Importar context engine para análisis
from app.services.ai import context_engine
# Guardar en memoria
memory = context_engine.save_to_memory(
db=db,
post_id=post_id,
content=content,
content_type=content_type,
platform=platform,
quality_score=quality_score,
quality_breakdown=quality_breakdown,
template_used=template_used,
personality_used=personality_used
)
return {
"status": "created",
"memory_id": memory.id,
"topics": memory.topics,
"hook_type": memory.hook_type
}
except Exception as e:
logger.error(f"Error guardando en memoria: {e}")
db.rollback()
raise
finally:
db.close()
@celery_app.task(name="worker.tasks.content_memory.cleanup_old_memory")
def cleanup_old_memory(days_to_keep: int = 180):
"""
Limpiar registros de memoria antiguos.
Mantiene top performers indefinidamente.
Args:
days_to_keep: Días de registros a mantener (excepto top performers)
"""
db = SessionLocal()
try:
cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep)
# Eliminar registros viejos que NO son top performers
deleted = db.query(ContentMemory).filter(
ContentMemory.created_at < cutoff_date,
ContentMemory.is_top_performer == False
).delete()
db.commit()
logger.info(f"Limpieza de memoria: {deleted} registros eliminados")
return {"deleted": deleted}
finally:
db.close()
@celery_app.task(name="worker.tasks.content_memory.refresh_best_posts_yaml")
def refresh_best_posts_yaml():
"""
Actualizar el archivo best_posts.yaml con top performers reales.
Se ejecuta semanalmente para mantener ejemplos actualizados.
"""
import yaml
from pathlib import Path
db = SessionLocal()
try:
# Obtener top performers por tipo de contenido
content_types = ["tip_tech", "producto", "servicio"]
examples = {}
for content_type in content_types:
top = db.query(ContentMemory).filter(
ContentMemory.content_type == content_type,
ContentMemory.is_top_performer == True
).order_by(
ContentMemory.engagement_score.desc()
).limit(5).all()
if top:
examples[content_type] = []
for mem in top:
post = db.query(Post).filter(Post.id == mem.post_id).first()
if post:
examples[content_type].append({
"content": post.content,
"platform": mem.platform,
"engagement_score": mem.engagement_score,
"metrics": post.metrics,
"analysis": {
"hook_type": mem.hook_type,
"topics": mem.topics
}
})
if not examples:
return {"status": "skipped", "reason": "no_top_performers"}
# Actualizar archivo YAML
yaml_path = Path(__file__).parent.parent.parent / "app" / "prompts" / "examples" / "best_posts.yaml"
# Cargar archivo existente
with open(yaml_path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f)
# Actualizar ejemplos
data["examples"] = examples
data["metadata"]["last_updated"] = datetime.utcnow().isoformat()
data["metadata"]["auto_update"] = True
# Guardar
with open(yaml_path, "w", encoding="utf-8") as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True)
logger.info(f"best_posts.yaml actualizado con {sum(len(v) for v in examples.values())} ejemplos")
return {
"status": "updated",
"examples_by_type": {k: len(v) for k, v in examples.items()}
}
except Exception as e:
logger.error(f"Error actualizando best_posts.yaml: {e}")
raise
finally:
db.close()

View File

@@ -1,9 +1,16 @@
"""
Tareas de generación de contenido.
Usa el nuevo Content Generation Engine v2 con:
- Quality scoring
- Anti-repetición via Context Engine
- Almacenamiento en Content Memory
"""
import asyncio
import logging
from datetime import datetime, timedelta
from typing import Optional, Dict, Any
from worker.celery_app import celery_app
from app.core.database import SessionLocal
@@ -14,6 +21,8 @@ from app.models.product import Product
from app.models.service import Service
from app.services.content_generator import content_generator
logger = logging.getLogger(__name__)
def run_async(coro):
"""Helper para ejecutar coroutines en Celery."""
@@ -21,6 +30,31 @@ def run_async(coro):
return loop.run_until_complete(coro)
def _save_to_memory(
post_id: int,
content: str,
content_type: str,
platform: str,
quality_score: Optional[int] = None,
quality_breakdown: Optional[Dict] = None,
template_used: Optional[str] = None
):
"""
Encolar guardado en memoria como tarea async.
Evita bloquear la generación principal.
"""
from worker.tasks.content_memory import analyze_and_save_content
analyze_and_save_content.delay(
post_id=post_id,
content=content,
content_type=content_type,
platform=platform,
quality_score=quality_score,
quality_breakdown=quality_breakdown,
template_used=template_used
)
@celery_app.task(name="worker.tasks.generate_content.generate_scheduled_content")
def generate_scheduled_content():
"""
@@ -40,6 +74,7 @@ def generate_scheduled_content():
ContentCalendar.is_active == True
).all()
generated = 0
for entry in entries:
# Verificar si es la hora correcta
if entry.time.hour != current_hour:
@@ -52,20 +87,23 @@ def generate_scheduled_content():
category_filter=entry.category_filter,
requires_approval=entry.requires_approval
)
generated += 1
elif entry.content_type == "producto":
generate_product_post.delay(
platforms=entry.platforms,
requires_approval=entry.requires_approval
)
generated += 1
elif entry.content_type == "servicio":
generate_service_post.delay(
platforms=entry.platforms,
requires_approval=entry.requires_approval
)
generated += 1
return f"Procesadas {len(entries)} entradas del calendario"
return f"Procesadas {len(entries)} entradas, {generated} generaciones iniciadas"
finally:
db.close()
@@ -75,9 +113,18 @@ def generate_scheduled_content():
def generate_tip_post(
platforms: list,
category_filter: str = None,
requires_approval: bool = False
requires_approval: bool = False,
use_quality_check: bool = True
):
"""Generar un post de tip tech."""
"""
Generar un post de tip tech con quality scoring.
Args:
platforms: Lista de plataformas destino
category_filter: Categoría específica (opcional)
requires_approval: Si requiere aprobación manual
use_quality_check: Si usar validación de calidad
"""
db = SessionLocal()
try:
@@ -96,23 +143,66 @@ def generate_tip_post(
).first()
if not tip:
return "No hay tips disponibles"
return {"status": "skipped", "reason": "no_tips_available"}
# Generar contenido para cada plataforma
content_by_platform = {}
quality_info = {}
for platform in platforms:
content = run_async(
content_generator.generate_tip_tech(
category=tip.category,
platform=platform,
template=tip.template
if use_quality_check:
# Usar generación con validación de calidad
result = run_async(
content_generator.generate_with_quality_check(
template_name="tip_tech",
variables={
"category": tip.category,
"difficulty_level": "principiante",
"target_audience": "profesionales tech"
},
platform=platform,
db=db,
max_attempts=2
)
)
)
content_by_platform[platform] = content
content_by_platform[platform] = result["content"]
quality_info[platform] = {
"score": result.get("quality_score"),
"breakdown": result.get("score_breakdown"),
"attempts": result.get("attempts", 1)
}
else:
# Generación simple (fallback)
content = run_async(
content_generator.generate_tip_tech(
category=tip.category,
platform=platform,
template=tip.template,
db=db
)
)
content_by_platform[platform] = content
# Obtener mejor score entre plataformas
best_score = None
best_breakdown = None
total_attempts = 1
if quality_info:
scores = [q.get("score") for q in quality_info.values() if q.get("score")]
if scores:
best_score = max(scores)
breakdowns = [q.get("breakdown") for q in quality_info.values() if q.get("breakdown")]
if breakdowns:
best_breakdown = breakdowns[0]
attempts = [q.get("attempts", 1) for q in quality_info.values()]
if attempts:
total_attempts = max(attempts)
# Crear post
main_platform = platforms[0]
post = Post(
content=content_by_platform.get(platforms[0], ""),
content=content_by_platform.get(main_platform, ""),
content_type="tip_tech",
platforms=platforms,
content_x=content_by_platform.get("x"),
@@ -122,7 +212,10 @@ def generate_tip_post(
status="pending_approval" if requires_approval else "scheduled",
scheduled_at=datetime.utcnow() + timedelta(minutes=5),
approval_required=requires_approval,
tip_template_id=tip.id
tip_template_id=tip.id,
quality_score=best_score,
score_breakdown=best_breakdown,
generation_attempts=total_attempts
)
db.add(post)
@@ -133,7 +226,33 @@ def generate_tip_post(
db.commit()
return f"Post de tip generado: {post.id}"
# Guardar en memoria (async)
_save_to_memory(
post_id=post.id,
content=post.content,
content_type="tip_tech",
platform=main_platform,
quality_score=best_score,
quality_breakdown=best_breakdown,
template_used="tip_tech"
)
logger.info(
f"Tip generado: post_id={post.id}, score={best_score}, "
f"attempts={total_attempts}, category={tip.category}"
)
return {
"status": "success",
"post_id": post.id,
"quality_score": best_score,
"attempts": total_attempts
}
except Exception as e:
logger.error(f"Error generando tip: {e}")
db.rollback()
raise
finally:
db.close()
@@ -143,9 +262,18 @@ def generate_tip_post(
def generate_product_post(
platforms: list,
product_id: int = None,
requires_approval: bool = True
requires_approval: bool = True,
use_quality_check: bool = True
):
"""Generar un post de producto."""
"""
Generar un post de producto con quality scoring.
Args:
platforms: Lista de plataformas destino
product_id: ID del producto específico (opcional)
requires_approval: Si requiere aprobación manual
use_quality_check: Si usar validación de calidad
"""
db = SessionLocal()
try:
@@ -161,22 +289,66 @@ def generate_product_post(
).first()
if not product:
return "No hay productos disponibles"
return {"status": "skipped", "reason": "no_products_available"}
# Generar contenido
content_by_platform = {}
quality_info = {}
for platform in platforms:
content = run_async(
content_generator.generate_product_post(
product=product.to_dict(),
platform=platform
if use_quality_check:
result = run_async(
content_generator.generate_with_quality_check(
template_name="product_post",
variables={
"product_name": product.name,
"product_description": product.description or "",
"price": product.price,
"category": product.category,
"specs": product.specs or {},
"highlights": product.highlights or []
},
platform=platform,
db=db,
max_attempts=2
)
)
)
content_by_platform[platform] = content
content_by_platform[platform] = result["content"]
quality_info[platform] = {
"score": result.get("quality_score"),
"breakdown": result.get("score_breakdown"),
"attempts": result.get("attempts", 1)
}
else:
content = run_async(
content_generator.generate_product_post(
product=product.to_dict(),
platform=platform,
db=db
)
)
content_by_platform[platform] = content
# Obtener mejor score
best_score = None
best_breakdown = None
total_attempts = 1
if quality_info:
scores = [q.get("score") for q in quality_info.values() if q.get("score")]
if scores:
best_score = max(scores)
breakdowns = [q.get("breakdown") for q in quality_info.values() if q.get("breakdown")]
if breakdowns:
best_breakdown = breakdowns[0]
attempts = [q.get("attempts", 1) for q in quality_info.values()]
if attempts:
total_attempts = max(attempts)
# Crear post
main_platform = platforms[0]
post = Post(
content=content_by_platform.get(platforms[0], ""),
content=content_by_platform.get(main_platform, ""),
content_type="producto",
platforms=platforms,
content_x=content_by_platform.get("x"),
@@ -187,7 +359,10 @@ def generate_product_post(
scheduled_at=datetime.utcnow() + timedelta(minutes=5),
approval_required=requires_approval,
product_id=product.id,
image_url=product.main_image
image_url=product.main_image,
quality_score=best_score,
score_breakdown=best_breakdown,
generation_attempts=total_attempts
)
db.add(post)
@@ -197,7 +372,33 @@ def generate_product_post(
db.commit()
return f"Post de producto generado: {post.id}"
# Guardar en memoria
_save_to_memory(
post_id=post.id,
content=post.content,
content_type="producto",
platform=main_platform,
quality_score=best_score,
quality_breakdown=best_breakdown,
template_used="product_post"
)
logger.info(
f"Producto generado: post_id={post.id}, product={product.name}, "
f"score={best_score}"
)
return {
"status": "success",
"post_id": post.id,
"product_id": product.id,
"quality_score": best_score
}
except Exception as e:
logger.error(f"Error generando producto: {e}")
db.rollback()
raise
finally:
db.close()
@@ -207,9 +408,18 @@ def generate_product_post(
def generate_service_post(
platforms: list,
service_id: int = None,
requires_approval: bool = True
requires_approval: bool = True,
use_quality_check: bool = True
):
"""Generar un post de servicio."""
"""
Generar un post de servicio con quality scoring.
Args:
platforms: Lista de plataformas destino
service_id: ID del servicio específico (opcional)
requires_approval: Si requiere aprobación manual
use_quality_check: Si usar validación de calidad
"""
db = SessionLocal()
try:
@@ -224,22 +434,66 @@ def generate_service_post(
).first()
if not service:
return "No hay servicios disponibles"
return {"status": "skipped", "reason": "no_services_available"}
# Generar contenido
content_by_platform = {}
quality_info = {}
for platform in platforms:
content = run_async(
content_generator.generate_service_post(
service=service.to_dict(),
platform=platform
if use_quality_check:
result = run_async(
content_generator.generate_with_quality_check(
template_name="service_post",
variables={
"service_name": service.name,
"service_description": service.description or "",
"category": service.category,
"target_sectors": service.target_sectors or [],
"benefits": service.benefits or [],
"call_to_action": service.call_to_action or "Contáctanos"
},
platform=platform,
db=db,
max_attempts=2
)
)
)
content_by_platform[platform] = content
content_by_platform[platform] = result["content"]
quality_info[platform] = {
"score": result.get("quality_score"),
"breakdown": result.get("score_breakdown"),
"attempts": result.get("attempts", 1)
}
else:
content = run_async(
content_generator.generate_service_post(
service=service.to_dict(),
platform=platform,
db=db
)
)
content_by_platform[platform] = content
# Obtener mejor score
best_score = None
best_breakdown = None
total_attempts = 1
if quality_info:
scores = [q.get("score") for q in quality_info.values() if q.get("score")]
if scores:
best_score = max(scores)
breakdowns = [q.get("breakdown") for q in quality_info.values() if q.get("breakdown")]
if breakdowns:
best_breakdown = breakdowns[0]
attempts = [q.get("attempts", 1) for q in quality_info.values()]
if attempts:
total_attempts = max(attempts)
# Crear post
main_platform = platforms[0]
post = Post(
content=content_by_platform.get(platforms[0], ""),
content=content_by_platform.get(main_platform, ""),
content_type="servicio",
platforms=platforms,
content_x=content_by_platform.get("x"),
@@ -250,7 +504,10 @@ def generate_service_post(
scheduled_at=datetime.utcnow() + timedelta(minutes=5),
approval_required=requires_approval,
service_id=service.id,
image_url=service.main_image
image_url=service.main_image,
quality_score=best_score,
score_breakdown=best_breakdown,
generation_attempts=total_attempts
)
db.add(post)
@@ -260,7 +517,106 @@ def generate_service_post(
db.commit()
return f"Post de servicio generado: {post.id}"
# Guardar en memoria
_save_to_memory(
post_id=post.id,
content=post.content,
content_type="servicio",
platform=main_platform,
quality_score=best_score,
quality_breakdown=best_breakdown,
template_used="service_post"
)
logger.info(
f"Servicio generado: post_id={post.id}, service={service.name}, "
f"score={best_score}"
)
return {
"status": "success",
"post_id": post.id,
"service_id": service.id,
"quality_score": best_score
}
except Exception as e:
logger.error(f"Error generando servicio: {e}")
db.rollback()
raise
finally:
db.close()
@celery_app.task(name="worker.tasks.generate_content.generate_thread")
def generate_thread(
topic: str,
num_posts: int = 5,
requires_approval: bool = True
):
"""
Generar un hilo educativo.
Args:
topic: Tema del hilo
num_posts: Número de posts
requires_approval: Si requiere aprobación
"""
db = SessionLocal()
try:
# Generar hilo
posts = run_async(
content_generator.generate_thread(
topic=topic,
num_posts=num_posts,
db=db
)
)
if not posts:
return {"status": "error", "reason": "no_posts_generated"}
# Crear posts individuales (vinculados)
created_posts = []
for i, content in enumerate(posts):
post = Post(
content=content,
content_type="hilo_educativo",
platforms=["x"], # Hilos principalmente para X
content_x=content,
status="pending_approval" if requires_approval else "scheduled",
scheduled_at=datetime.utcnow() + timedelta(minutes=5 + i), # Escalonado
approval_required=requires_approval,
)
db.add(post)
created_posts.append(post)
db.commit()
# Guardar primer post en memoria (representa el hilo)
if created_posts:
_save_to_memory(
post_id=created_posts[0].id,
content="\n\n".join(posts),
content_type="hilo_educativo",
platform="x",
template_used="thread"
)
logger.info(f"Hilo generado: {len(created_posts)} posts sobre '{topic}'")
return {
"status": "success",
"post_ids": [p.id for p in created_posts],
"topic": topic
}
except Exception as e:
logger.error(f"Error generando hilo: {e}")
db.rollback()
raise
finally:
db.close()