fix: Resolve duplicate interaction detection and add usernames
- Add processed_ids set to prevent duplicate inserts when a tweet is both a mention and a comment (same external_id) - Enhance get_mentions() to fetch usernames via user expansions - Update fetch_interactions to prefer username over numeric author_id Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -189,22 +189,31 @@ class XPublisher(BasePublisher):
|
|||||||
me = self.client.get_me()
|
me = self.client.get_me()
|
||||||
user_id = me.data.id
|
user_id = me.data.id
|
||||||
|
|
||||||
# Obtener menciones
|
# Obtener menciones con información de usuarios
|
||||||
mentions = self.client.get_users_mentions(
|
mentions = self.client.get_users_mentions(
|
||||||
id=user_id,
|
id=user_id,
|
||||||
since_id=since_id,
|
since_id=since_id,
|
||||||
max_results=50,
|
max_results=50,
|
||||||
tweet_fields=['created_at', 'author_id', 'conversation_id']
|
tweet_fields=['created_at', 'author_id', 'conversation_id'],
|
||||||
|
user_fields=['username'],
|
||||||
|
expansions=['author_id']
|
||||||
)
|
)
|
||||||
|
|
||||||
if not mentions.data:
|
if not mentions.data:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# Crear mapa de usuarios para obtener usernames
|
||||||
|
users_map = {}
|
||||||
|
if mentions.includes and 'users' in mentions.includes:
|
||||||
|
for user in mentions.includes['users']:
|
||||||
|
users_map[str(user.id)] = user.username
|
||||||
|
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
"id": str(tweet.id),
|
"id": str(tweet.id),
|
||||||
"text": tweet.text,
|
"text": tweet.text,
|
||||||
"author_id": str(tweet.author_id),
|
"author_id": str(tweet.author_id),
|
||||||
|
"username": users_map.get(str(tweet.author_id), str(tweet.author_id)),
|
||||||
"created_at": tweet.created_at.isoformat() if tweet.created_at else None
|
"created_at": tweet.created_at.isoformat() if tweet.created_at else None
|
||||||
}
|
}
|
||||||
for tweet in mentions.data
|
for tweet in mentions.data
|
||||||
|
|||||||
@@ -45,6 +45,10 @@ def fetch_platform_interactions(platform: str):
|
|||||||
try:
|
try:
|
||||||
publisher = get_publisher(platform)
|
publisher = get_publisher(platform)
|
||||||
|
|
||||||
|
# Set para trackear external_ids procesados en esta ejecución
|
||||||
|
# Evita duplicados cuando un tweet es tanto mención como comentario
|
||||||
|
processed_ids = set()
|
||||||
|
|
||||||
# Obtener menciones
|
# Obtener menciones
|
||||||
mentions = run_async(publisher.get_mentions())
|
mentions = run_async(publisher.get_mentions())
|
||||||
new_mentions = 0
|
new_mentions = 0
|
||||||
@@ -52,17 +56,23 @@ def fetch_platform_interactions(platform: str):
|
|||||||
for mention in mentions:
|
for mention in mentions:
|
||||||
external_id = mention.get("id")
|
external_id = mention.get("id")
|
||||||
|
|
||||||
# Verificar si ya existe
|
# Verificar si ya existe en DB o fue procesado en esta ejecución
|
||||||
|
if external_id in processed_ids:
|
||||||
|
continue
|
||||||
|
|
||||||
existing = db.query(Interaction).filter(
|
existing = db.query(Interaction).filter(
|
||||||
Interaction.external_id == external_id
|
Interaction.external_id == external_id
|
||||||
).first()
|
).first()
|
||||||
|
|
||||||
if not existing:
|
if not existing:
|
||||||
|
# Obtener username (preferir username sobre author_id)
|
||||||
|
author_username = mention.get("username") or mention.get("author_id", "unknown")
|
||||||
|
|
||||||
interaction = Interaction(
|
interaction = Interaction(
|
||||||
platform=platform,
|
platform=platform,
|
||||||
interaction_type="mention",
|
interaction_type="mention",
|
||||||
external_id=external_id,
|
external_id=external_id,
|
||||||
author_username=mention.get("username", mention.get("author_id", "unknown")),
|
author_username=author_username,
|
||||||
author_name=mention.get("name"),
|
author_name=mention.get("name"),
|
||||||
content=mention.get("text", mention.get("message")),
|
content=mention.get("text", mention.get("message")),
|
||||||
interaction_at=datetime.fromisoformat(
|
interaction_at=datetime.fromisoformat(
|
||||||
@@ -70,6 +80,7 @@ def fetch_platform_interactions(platform: str):
|
|||||||
) if mention.get("created_at") else datetime.utcnow()
|
) if mention.get("created_at") else datetime.utcnow()
|
||||||
)
|
)
|
||||||
db.add(interaction)
|
db.add(interaction)
|
||||||
|
processed_ids.add(external_id)
|
||||||
new_mentions += 1
|
new_mentions += 1
|
||||||
|
|
||||||
# Obtener comentarios de posts recientes
|
# Obtener comentarios de posts recientes
|
||||||
@@ -90,6 +101,10 @@ def fetch_platform_interactions(platform: str):
|
|||||||
for comment in comments:
|
for comment in comments:
|
||||||
external_id = comment.get("id")
|
external_id = comment.get("id")
|
||||||
|
|
||||||
|
# Verificar si ya fue procesado como mención o existe en DB
|
||||||
|
if external_id in processed_ids:
|
||||||
|
continue
|
||||||
|
|
||||||
existing = db.query(Interaction).filter(
|
existing = db.query(Interaction).filter(
|
||||||
Interaction.external_id == external_id
|
Interaction.external_id == external_id
|
||||||
).first()
|
).first()
|
||||||
@@ -120,6 +135,7 @@ def fetch_platform_interactions(platform: str):
|
|||||||
) if comment.get("created_at") or comment.get("timestamp") or comment.get("created_time") else datetime.utcnow()
|
) if comment.get("created_at") or comment.get("timestamp") or comment.get("created_time") else datetime.utcnow()
|
||||||
)
|
)
|
||||||
db.add(interaction)
|
db.add(interaction)
|
||||||
|
processed_ids.add(external_id)
|
||||||
new_comments += 1
|
new_comments += 1
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|||||||
Reference in New Issue
Block a user