fix: Resolve duplicate interaction detection and add usernames

- Add processed_ids set to prevent duplicate inserts when a tweet is
  both a mention and a comment (same external_id)
- Enhance get_mentions() to fetch usernames via user expansions
- Update fetch_interactions to prefer username over numeric author_id

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-29 10:04:58 +00:00
parent f4f0a2d230
commit 5f04aa0cce
2 changed files with 29 additions and 4 deletions

View File

@@ -45,6 +45,10 @@ def fetch_platform_interactions(platform: str):
try:
publisher = get_publisher(platform)
# Set para trackear external_ids procesados en esta ejecución
# Evita duplicados cuando un tweet es tanto mención como comentario
processed_ids = set()
# Obtener menciones
mentions = run_async(publisher.get_mentions())
new_mentions = 0
@@ -52,17 +56,23 @@ def fetch_platform_interactions(platform: str):
for mention in mentions:
external_id = mention.get("id")
# Verificar si ya existe
# Verificar si ya existe en DB o fue procesado en esta ejecución
if external_id in processed_ids:
continue
existing = db.query(Interaction).filter(
Interaction.external_id == external_id
).first()
if not existing:
# Obtener username (preferir username sobre author_id)
author_username = mention.get("username") or mention.get("author_id", "unknown")
interaction = Interaction(
platform=platform,
interaction_type="mention",
external_id=external_id,
author_username=mention.get("username", mention.get("author_id", "unknown")),
author_username=author_username,
author_name=mention.get("name"),
content=mention.get("text", mention.get("message")),
interaction_at=datetime.fromisoformat(
@@ -70,6 +80,7 @@ def fetch_platform_interactions(platform: str):
) if mention.get("created_at") else datetime.utcnow()
)
db.add(interaction)
processed_ids.add(external_id)
new_mentions += 1
# Obtener comentarios de posts recientes
@@ -90,6 +101,10 @@ def fetch_platform_interactions(platform: str):
for comment in comments:
external_id = comment.get("id")
# Verificar si ya fue procesado como mención o existe en DB
if external_id in processed_ids:
continue
existing = db.query(Interaction).filter(
Interaction.external_id == external_id
).first()
@@ -120,6 +135,7 @@ def fetch_platform_interactions(platform: str):
) if comment.get("created_at") or comment.get("timestamp") or comment.get("created_time") else datetime.utcnow()
)
db.add(interaction)
processed_ids.add(external_id)
new_comments += 1
db.commit()