feat: Add Content Generation Engine v2 with quality scoring

Major improvements to AI content generation:

## New Components (app/services/ai/)
- PromptLibrary: YAML-based prompt templates with inheritance
- ContextEngine: Anti-repetition and best performers tracking
- ContentGeneratorV2: Enhanced generation with dynamic parameters
- PlatformAdapter: Platform-specific content adaptation
- ContentValidator: AI-powered quality scoring (0-100)

## Prompt Library (app/prompts/)
- 3 personalities: default, educational, promotional
- 5 templates: tip_tech, product_post, service_post, thread, response
- 4 platform configs: x, threads, instagram, facebook
- Few-shot examples by category: ia, productividad, seguridad

## Database Changes
- New table: content_memory (tracks generated content)
- New columns in posts: quality_score, score_breakdown, generation_attempts

## New API Endpoints (/api/v2/generate/)
- POST /generate - Generation with quality check
- POST /generate/batch - Batch generation
- POST /quality/evaluate - Evaluate content quality
- GET /templates, /personalities, /platforms - List configs

## Celery Tasks
- update_engagement_scores (every 6h)
- cleanup_old_memory (monthly)
- refresh_best_posts_yaml (weekly)

## Tests
- Comprehensive tests for all AI engine components

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-28 20:55:28 +00:00
parent f458f809ca
commit 11b0ba46fa
36 changed files with 6266 additions and 55 deletions

View File

@@ -19,6 +19,7 @@ from app.models.odoo_sync_log import OdooSyncLog
from app.models.ab_test import ABTest, ABTestVariant
from app.models.recycled_post import RecycledPost
from app.models.thread_series import ThreadSeries, ThreadPost
from app.models.content_memory import ContentMemory
__all__ = [
"Base",
@@ -38,5 +39,6 @@ __all__ = [
"ABTestVariant",
"RecycledPost",
"ThreadSeries",
"ThreadPost"
"ThreadPost",
"ContentMemory"
]

View File

@@ -0,0 +1,158 @@
"""
Modelo ContentMemory - Memoria de contenido para el Context Engine.
Este modelo almacena análisis de posts generados para:
- Evitar repetición de temas y frases
- Identificar posts de alto rendimiento para few-shot learning
- Trackear patrones de éxito
"""
from datetime import datetime
from sqlalchemy import (
Column, Integer, String, Text, Float, Boolean,
DateTime, ForeignKey, JSON
)
from sqlalchemy.dialects.postgresql import ARRAY
from sqlalchemy.orm import relationship
from app.core.database import Base
class ContentMemory(Base):
"""
Memoria de contenido generado.
Almacena análisis semántico de cada post para que el Context Engine
pueda evitar repeticiones y aprender de posts exitosos.
"""
__tablename__ = "content_memory"
id = Column(Integer, primary_key=True, index=True)
# Relación con el post original
post_id = Column(Integer, ForeignKey("posts.id"), nullable=False, unique=True, index=True)
# === Análisis del contenido ===
# Temas/categorías detectadas
# Ejemplo: ["ia", "productividad", "python"]
topics = Column(ARRAY(String), nullable=True)
# Frases distintivas usadas (para evitar repetición)
# Ejemplo: ["la regla 2-2-2", "el 90% ignora esto"]
key_phrases = Column(ARRAY(String), nullable=True)
# Tipo de hook usado
# Ejemplo: "pregunta", "dato_impactante", "tip_directo", "historia"
hook_type = Column(String(50), nullable=True, index=True)
# Resumen semántico del contenido (para comparación de similitud)
content_summary = Column(Text, nullable=True)
# Embedding del contenido (para búsqueda semántica futura)
# Por ahora null, se puede agregar después con pgvector
content_embedding = Column(JSON, nullable=True)
# === Métricas de éxito ===
# Score de engagement calculado (normalizado 0-100)
engagement_score = Column(Float, nullable=True, index=True)
# Breakdown de métricas
# {"likes": 45, "comments": 12, "shares": 8, "saves": 3}
engagement_breakdown = Column(JSON, nullable=True)
# ¿Está en el top 20% de engagement?
is_top_performer = Column(Boolean, default=False, index=True)
# Score de calidad asignado al generar
quality_score = Column(Integer, nullable=True)
# Breakdown del quality score
quality_breakdown = Column(JSON, nullable=True)
# === Control de uso como ejemplo ===
# Veces que se ha usado como few-shot example
times_used_as_example = Column(Integer, default=0)
# Última vez que se usó como ejemplo
last_used_as_example = Column(DateTime, nullable=True)
# === Metadata ===
# Plataforma para la que se generó originalmente
platform = Column(String(20), nullable=True, index=True)
# Tipo de contenido
content_type = Column(String(50), nullable=True, index=True)
# Personalidad usada para generar
personality_used = Column(String(50), nullable=True)
# Template usado
template_used = Column(String(50), nullable=True)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow, index=True)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
def __repr__(self):
return f"<ContentMemory post_id={self.post_id} score={self.engagement_score}>"
def to_dict(self) -> dict:
"""Convertir a diccionario."""
return {
"id": self.id,
"post_id": self.post_id,
"topics": self.topics,
"key_phrases": self.key_phrases,
"hook_type": self.hook_type,
"content_summary": self.content_summary,
"engagement_score": self.engagement_score,
"engagement_breakdown": self.engagement_breakdown,
"is_top_performer": self.is_top_performer,
"quality_score": self.quality_score,
"quality_breakdown": self.quality_breakdown,
"times_used_as_example": self.times_used_as_example,
"platform": self.platform,
"content_type": self.content_type,
"created_at": self.created_at.isoformat() if self.created_at else None,
}
def mark_as_top_performer(self) -> None:
"""Marcar este contenido como top performer."""
self.is_top_performer = True
self.updated_at = datetime.utcnow()
def record_example_usage(self) -> None:
"""Registrar que se usó como ejemplo."""
self.times_used_as_example += 1
self.last_used_as_example = datetime.utcnow()
self.updated_at = datetime.utcnow()
def update_engagement(self, metrics: dict) -> None:
"""
Actualizar métricas de engagement.
Args:
metrics: Dict con likes, comments, shares, saves, etc.
"""
self.engagement_breakdown = metrics
# Calcular score normalizado
# Fórmula: likes + (comments * 2) + (shares * 3) + (saves * 2)
# Normalizado a 0-100 basado en promedios históricos
likes = metrics.get("likes", 0)
comments = metrics.get("comments", 0)
shares = metrics.get("shares", 0) + metrics.get("retweets", 0)
saves = metrics.get("saves", 0) + metrics.get("bookmarks", 0)
raw_score = likes + (comments * 2) + (shares * 3) + (saves * 2)
# Normalización simple (ajustar según datos reales)
# Asume que un post "promedio" tiene ~20 puntos raw
self.engagement_score = min(100, (raw_score / 50) * 100)
self.updated_at = datetime.utcnow()

View File

@@ -102,6 +102,11 @@ class Post(Base):
recycled_from_id = Column(Integer, ForeignKey("posts.id"), nullable=True)
recycle_count = Column(Integer, default=0) # Times this post has been recycled
# AI Generation Quality
quality_score = Column(Integer, nullable=True, index=True) # 0-100 score from validator
score_breakdown = Column(JSON, nullable=True) # Detailed scoring breakdown
generation_attempts = Column(Integer, default=1) # Times regenerated before acceptance
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
@@ -136,7 +141,10 @@ class Post(Base):
"ab_test_id": self.ab_test_id,
"is_recyclable": self.is_recyclable,
"recycled_from_id": self.recycled_from_id,
"recycle_count": self.recycle_count
"recycle_count": self.recycle_count,
"quality_score": self.quality_score,
"score_breakdown": self.score_breakdown,
"generation_attempts": self.generation_attempts
}
def get_content_for_platform(self, platform: str) -> str: