feat: Add Content Generation Engine v2 with quality scoring

Major improvements to AI content generation: ## New Components (app/services/ai/) - PromptLibrary: YAML-based prompt templates with inheritance - ContextEngine: Anti-repetition and best performers tracking - ContentGeneratorV2: Enhanced generation with dynamic parameters - PlatformAdapter: Platform-specific content adaptation - ContentValidator: AI-powered quality scoring (0-100) ## Prompt Library (app/prompts/) - 3 personalities: default, educational, promotional - 5 templates: tip_tech, product_post, service_post, thread, response - 4 platform configs: x, threads, instagram, facebook - Few-shot examples by category: ia, productividad, seguridad ## Database Changes - New table: content_memory (tracks generated content) - New columns in posts: quality_score, score_breakdown, generation_attempts ## New API Endpoints (/api/v2/generate/) - POST /generate - Generation with quality check - POST /generate/batch - Batch generation - POST /quality/evaluate - Evaluate content quality - GET /templates, /personalities, /platforms - List configs ## Celery Tasks - update_engagement_scores (every 6h) - cleanup_old_memory (monthly) - refresh_best_posts_yaml (weekly) ## Tests - Comprehensive tests for all AI engine components Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-28 20:55:28 +00:00
parent f458f809ca
commit 11b0ba46fa
36 changed files with 6266 additions and 55 deletions
--- a/app/models/init.py
+++ b/app/models/init.py
@@ -19,6 +19,7 @@ from app.models.odoo_sync_log import OdooSyncLog
 from app.models.ab_test import ABTest, ABTestVariant
 from app.models.recycled_post import RecycledPost
 from app.models.thread_series import ThreadSeries, ThreadPost
+from app.models.content_memory import ContentMemory

 __all__ = [
    "Base",
@@ -38,5 +39,6 @@ __all__ = [
    "ABTestVariant",
    "RecycledPost",
    "ThreadSeries",
-    "ThreadPost"
+    "ThreadPost",
+    "ContentMemory"
 ]
--- a/app/models/content_memory.py
+++ b/app/models/content_memory.py
@@ -0,0 +1,158 @@
+"""
+Modelo ContentMemory - Memoria de contenido para el Context Engine.
+
+Este modelo almacena análisis de posts generados para:
+- Evitar repetición de temas y frases
+- Identificar posts de alto rendimiento para few-shot learning
+- Trackear patrones de éxito
+"""
+
+from datetime import datetime
+from sqlalchemy import (
+    Column, Integer, String, Text, Float, Boolean,
+    DateTime, ForeignKey, JSON
+)
+from sqlalchemy.dialects.postgresql import ARRAY
+from sqlalchemy.orm import relationship
+
+from app.core.database import Base
+
+
+class ContentMemory(Base):
+    """
+    Memoria de contenido generado.
+
+    Almacena análisis semántico de cada post para que el Context Engine
+    pueda evitar repeticiones y aprender de posts exitosos.
+    """
+
+    __tablename__ = "content_memory"
+
+    id = Column(Integer, primary_key=True, index=True)
+
+    # Relación con el post original
+    post_id = Column(Integer, ForeignKey("posts.id"), nullable=False, unique=True, index=True)
+
+    # === Análisis del contenido ===
+
+    # Temas/categorías detectadas
+    # Ejemplo: ["ia", "productividad", "python"]
+    topics = Column(ARRAY(String), nullable=True)
+
+    # Frases distintivas usadas (para evitar repetición)
+    # Ejemplo: ["la regla 2-2-2", "el 90% ignora esto"]
+    key_phrases = Column(ARRAY(String), nullable=True)
+
+    # Tipo de hook usado
+    # Ejemplo: "pregunta", "dato_impactante", "tip_directo", "historia"
+    hook_type = Column(String(50), nullable=True, index=True)
+
+    # Resumen semántico del contenido (para comparación de similitud)
+    content_summary = Column(Text, nullable=True)
+
+    # Embedding del contenido (para búsqueda semántica futura)
+    # Por ahora null, se puede agregar después con pgvector
+    content_embedding = Column(JSON, nullable=True)
+
+    # === Métricas de éxito ===
+
+    # Score de engagement calculado (normalizado 0-100)
+    engagement_score = Column(Float, nullable=True, index=True)
+
+    # Breakdown de métricas
+    # {"likes": 45, "comments": 12, "shares": 8, "saves": 3}
+    engagement_breakdown = Column(JSON, nullable=True)
+
+    # ¿Está en el top 20% de engagement?
+    is_top_performer = Column(Boolean, default=False, index=True)
+
+    # Score de calidad asignado al generar
+    quality_score = Column(Integer, nullable=True)
+
+    # Breakdown del quality score
+    quality_breakdown = Column(JSON, nullable=True)
+
+    # === Control de uso como ejemplo ===
+
+    # Veces que se ha usado como few-shot example
+    times_used_as_example = Column(Integer, default=0)
+
+    # Última vez que se usó como ejemplo
+    last_used_as_example = Column(DateTime, nullable=True)
+
+    # === Metadata ===
+
+    # Plataforma para la que se generó originalmente
+    platform = Column(String(20), nullable=True, index=True)
+
+    # Tipo de contenido
+    content_type = Column(String(50), nullable=True, index=True)
+
+    # Personalidad usada para generar
+    personality_used = Column(String(50), nullable=True)
+
+    # Template usado
+    template_used = Column(String(50), nullable=True)
+
+    # Timestamps
+    created_at = Column(DateTime, default=datetime.utcnow, index=True)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    def __repr__(self):
+        return f"<ContentMemory post_id={self.post_id} score={self.engagement_score}>"
+
+    def to_dict(self) -> dict:
+        """Convertir a diccionario."""
+        return {
+            "id": self.id,
+            "post_id": self.post_id,
+            "topics": self.topics,
+            "key_phrases": self.key_phrases,
+            "hook_type": self.hook_type,
+            "content_summary": self.content_summary,
+            "engagement_score": self.engagement_score,
+            "engagement_breakdown": self.engagement_breakdown,
+            "is_top_performer": self.is_top_performer,
+            "quality_score": self.quality_score,
+            "quality_breakdown": self.quality_breakdown,
+            "times_used_as_example": self.times_used_as_example,
+            "platform": self.platform,
+            "content_type": self.content_type,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+        }
+
+    def mark_as_top_performer(self) -> None:
+        """Marcar este contenido como top performer."""
+        self.is_top_performer = True
+        self.updated_at = datetime.utcnow()
+
+    def record_example_usage(self) -> None:
+        """Registrar que se usó como ejemplo."""
+        self.times_used_as_example += 1
+        self.last_used_as_example = datetime.utcnow()
+        self.updated_at = datetime.utcnow()
+
+    def update_engagement(self, metrics: dict) -> None:
+        """
+        Actualizar métricas de engagement.
+
+        Args:
+            metrics: Dict con likes, comments, shares, saves, etc.
+        """
+        self.engagement_breakdown = metrics
+
+        # Calcular score normalizado
+        # Fórmula: likes + (comments * 2) + (shares * 3) + (saves * 2)
+        # Normalizado a 0-100 basado en promedios históricos
+        likes = metrics.get("likes", 0)
+        comments = metrics.get("comments", 0)
+        shares = metrics.get("shares", 0) + metrics.get("retweets", 0)
+        saves = metrics.get("saves", 0) + metrics.get("bookmarks", 0)
+
+        raw_score = likes + (comments * 2) + (shares * 3) + (saves * 2)
+
+        # Normalización simple (ajustar según datos reales)
+        # Asume que un post "promedio" tiene ~20 puntos raw
+        self.engagement_score = min(100, (raw_score / 50) * 100)
+
+        self.updated_at = datetime.utcnow()
--- a/app/models/post.py
+++ b/app/models/post.py
@@ -102,6 +102,11 @@ class Post(Base):
    recycled_from_id = Column(Integer, ForeignKey("posts.id"), nullable=True)
    recycle_count = Column(Integer, default=0)  # Times this post has been recycled

+    # AI Generation Quality
+    quality_score = Column(Integer, nullable=True, index=True)  # 0-100 score from validator
+    score_breakdown = Column(JSON, nullable=True)  # Detailed scoring breakdown
+    generation_attempts = Column(Integer, default=1)  # Times regenerated before acceptance
+
    # Timestamps
    created_at = Column(DateTime, default=datetime.utcnow)
    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
@@ -136,7 +141,10 @@ class Post(Base):
            "ab_test_id": self.ab_test_id,
            "is_recyclable": self.is_recyclable,
            "recycled_from_id": self.recycled_from_id,
-            "recycle_count": self.recycle_count
+            "recycle_count": self.recycle_count,
+            "quality_score": self.quality_score,
+            "score_breakdown": self.score_breakdown,
+            "generation_attempts": self.generation_attempts
        }

    def get_content_for_platform(self, platform: str) -> str: