From 5f04aa0cce3b09c8f33429cfd464cb47c04a5889 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Consultor=C3=ADa=20AS?= Date: Thu, 29 Jan 2026 10:04:58 +0000 Subject: [PATCH] fix: Resolve duplicate interaction detection and add usernames - Add processed_ids set to prevent duplicate inserts when a tweet is both a mention and a comment (same external_id) - Enhance get_mentions() to fetch usernames via user expansions - Update fetch_interactions to prefer username over numeric author_id Co-Authored-By: Claude Opus 4.5 --- app/publishers/x_publisher.py | 13 +++++++++++-- worker/tasks/fetch_interactions.py | 20 ++++++++++++++++++-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/app/publishers/x_publisher.py b/app/publishers/x_publisher.py index 780ee91..4cbcf1c 100644 --- a/app/publishers/x_publisher.py +++ b/app/publishers/x_publisher.py @@ -189,22 +189,31 @@ class XPublisher(BasePublisher): me = self.client.get_me() user_id = me.data.id - # Obtener menciones + # Obtener menciones con información de usuarios mentions = self.client.get_users_mentions( id=user_id, since_id=since_id, max_results=50, - tweet_fields=['created_at', 'author_id', 'conversation_id'] + tweet_fields=['created_at', 'author_id', 'conversation_id'], + user_fields=['username'], + expansions=['author_id'] ) if not mentions.data: return [] + # Crear mapa de usuarios para obtener usernames + users_map = {} + if mentions.includes and 'users' in mentions.includes: + for user in mentions.includes['users']: + users_map[str(user.id)] = user.username + return [ { "id": str(tweet.id), "text": tweet.text, "author_id": str(tweet.author_id), + "username": users_map.get(str(tweet.author_id), str(tweet.author_id)), "created_at": tweet.created_at.isoformat() if tweet.created_at else None } for tweet in mentions.data diff --git a/worker/tasks/fetch_interactions.py b/worker/tasks/fetch_interactions.py index fe7d5ab..9afc785 100644 --- a/worker/tasks/fetch_interactions.py +++ b/worker/tasks/fetch_interactions.py @@ -45,6 +45,10 @@ def fetch_platform_interactions(platform: str): try: publisher = get_publisher(platform) + # Set para trackear external_ids procesados en esta ejecución + # Evita duplicados cuando un tweet es tanto mención como comentario + processed_ids = set() + # Obtener menciones mentions = run_async(publisher.get_mentions()) new_mentions = 0 @@ -52,17 +56,23 @@ def fetch_platform_interactions(platform: str): for mention in mentions: external_id = mention.get("id") - # Verificar si ya existe + # Verificar si ya existe en DB o fue procesado en esta ejecución + if external_id in processed_ids: + continue + existing = db.query(Interaction).filter( Interaction.external_id == external_id ).first() if not existing: + # Obtener username (preferir username sobre author_id) + author_username = mention.get("username") or mention.get("author_id", "unknown") + interaction = Interaction( platform=platform, interaction_type="mention", external_id=external_id, - author_username=mention.get("username", mention.get("author_id", "unknown")), + author_username=author_username, author_name=mention.get("name"), content=mention.get("text", mention.get("message")), interaction_at=datetime.fromisoformat( @@ -70,6 +80,7 @@ def fetch_platform_interactions(platform: str): ) if mention.get("created_at") else datetime.utcnow() ) db.add(interaction) + processed_ids.add(external_id) new_mentions += 1 # Obtener comentarios de posts recientes @@ -90,6 +101,10 @@ def fetch_platform_interactions(platform: str): for comment in comments: external_id = comment.get("id") + # Verificar si ya fue procesado como mención o existe en DB + if external_id in processed_ids: + continue + existing = db.query(Interaction).filter( Interaction.external_id == external_id ).first() @@ -120,6 +135,7 @@ def fetch_platform_interactions(platform: str): ) if comment.get("created_at") or comment.get("timestamp") or comment.get("created_time") else datetime.utcnow() ) db.add(interaction) + processed_ids.add(external_id) new_comments += 1 db.commit()