Migrate Twitter to tweepy AsyncStream for faster, more reliable updates
This commit is contained in:
parent
6a0c6ca8d5
commit
5ced32ca2b
4 changed files with 107 additions and 70 deletions
2
.flake8
2
.flake8
|
@ -5,6 +5,8 @@ exclude =
|
||||||
extend-ignore =
|
extend-ignore =
|
||||||
Q0, E501, C812, E203, W503, # These default to arguing with Black. We might configure some of them eventually
|
Q0, E501, C812, E203, W503, # These default to arguing with Black. We might configure some of them eventually
|
||||||
ANN001, # Ignore self and cls annotations
|
ANN001, # Ignore self and cls annotations
|
||||||
|
ANN002, ANN003, # Ignore *args and **kwargs
|
||||||
|
ANN101, # Ignore self
|
||||||
ANN204, ANN206, # return annotations for special methods and class methods
|
ANN204, ANN206, # return annotations for special methods and class methods
|
||||||
D105, D107, # Missing Docstrings in magic method and __init__
|
D105, D107, # Missing Docstrings in magic method and __init__
|
||||||
S311, # Standard pseudo-random generators are not suitable for security/cryptographic purposes.
|
S311, # Standard pseudo-random generators are not suitable for security/cryptographic purposes.
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
"""JARVIS Twitter sync."""
|
"""JARVIS Twitter sync."""
|
||||||
import asyncio
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import tweepy
|
import tweepy.asynchronous
|
||||||
from dis_snek import Snake
|
from dis_snek import Snake
|
||||||
from dis_snek.models.discord.embed import Embed
|
from dis_snek.models.discord.embed import Embed
|
||||||
from jarvis_core.db import q
|
from jarvis_core.db import q
|
||||||
from jarvis_core.db.models import TwitterAccount, TwitterFollow
|
from jarvis_core.db.models import TwitterAccount, TwitterFollow
|
||||||
|
from tweepy.asynchronous import AsyncStream
|
||||||
|
from tweepy.models import Status
|
||||||
|
|
||||||
from jarvis_tasks.config import TaskConfig
|
from jarvis_tasks.config import TaskConfig
|
||||||
from jarvis_tasks.util import build_embed
|
from jarvis_tasks.util import build_embed
|
||||||
|
@ -17,14 +18,13 @@ config = TaskConfig.from_yaml()
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def tweet_embeds(tweet: tweepy.models.Status) -> List[Embed]:
|
def tweet_embeds(tweet: Status) -> List[Embed]:
|
||||||
"""
|
"""
|
||||||
Build a tweet embeds.
|
Build a tweet embeds.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
tweet: Tweet to build embeds
|
tweet: Tweet to build embeds
|
||||||
"""
|
"""
|
||||||
logger.debug("Starting Task-twitter")
|
|
||||||
url = f"https://twitter.com/{tweet.user.screen_name}/status/{tweet.id}"
|
url = f"https://twitter.com/{tweet.user.screen_name}/status/{tweet.id}"
|
||||||
entities = tweet.__dict__.get("extended_entities", {})
|
entities = tweet.__dict__.get("extended_entities", {})
|
||||||
media = entities.get("media", [])
|
media = entities.get("media", [])
|
||||||
|
@ -74,6 +74,78 @@ def tweet_embeds(tweet: tweepy.models.Status) -> List[Embed]:
|
||||||
return embeds
|
return embeds
|
||||||
|
|
||||||
|
|
||||||
|
class JARVISTwitterStream(AsyncStream):
|
||||||
|
"""JARVIS Twitter AsyncStream client."""
|
||||||
|
|
||||||
|
def __init__(self, bot: Snake, *args, **kwargs):
|
||||||
|
if not bot:
|
||||||
|
raise ValueError("Missing bot")
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.bot = bot
|
||||||
|
self.current_filter = None
|
||||||
|
|
||||||
|
async def on_keep_alive(self) -> None:
|
||||||
|
"""Override keep-alive to track new accounts."""
|
||||||
|
await super().on_keep_alive()
|
||||||
|
ids = [x.twitter_id async for x in TwitterAccount.find()]
|
||||||
|
if ids != self.current_filter:
|
||||||
|
logger.debug("Follows have changed, disconnected")
|
||||||
|
self.disconnect()
|
||||||
|
|
||||||
|
async def on_status(self, status: Status) -> None:
|
||||||
|
"""
|
||||||
|
Process new statuses.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
status: The status to process
|
||||||
|
"""
|
||||||
|
if status.author.id not in self.current_filter:
|
||||||
|
return
|
||||||
|
logger.debug(f"{status.author.screen_name} sent new tweet")
|
||||||
|
follows = TwitterFollow.find(q(twitter_id=status.author.id))
|
||||||
|
num_follows = 0
|
||||||
|
|
||||||
|
async for follow in follows:
|
||||||
|
num_follows += 1
|
||||||
|
|
||||||
|
guild = await self.bot.fetch_guild(follow.guild)
|
||||||
|
if not guild:
|
||||||
|
logger.warn(f"Follow {follow.id} invalid, deleting")
|
||||||
|
await follow.delete()
|
||||||
|
num_follows -= 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
channel = await guild.fetch_channel(follow.channel)
|
||||||
|
if not channel:
|
||||||
|
logger.warn(f"Follow {follow.id} invalid, deleting")
|
||||||
|
await follow.delete()
|
||||||
|
num_follows -= 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
retweet = "retweeted_status" in status.__dict__
|
||||||
|
if retweet and not follow.retweets:
|
||||||
|
continue
|
||||||
|
|
||||||
|
embeds = tweet_embeds(status)
|
||||||
|
mod = "re" if retweet else ""
|
||||||
|
timestamp = int(status.created_at.timestamp())
|
||||||
|
|
||||||
|
try:
|
||||||
|
await channel.send(
|
||||||
|
f"`@{status.user.screen_name}` {mod}tweeted this at <t:{timestamp}:f>",
|
||||||
|
embeds=embeds,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.debug(f"Failed to send message to {channel.id} in {channel.guild.name}")
|
||||||
|
|
||||||
|
if num_follows == 0:
|
||||||
|
logger.warning(f"Account {status.author.screen_name} no longer has followers, removing")
|
||||||
|
account = await TwitterAccount.find_one(q(twitter_id=status.author.id))
|
||||||
|
if account:
|
||||||
|
await account.delete()
|
||||||
|
self.disconnect()
|
||||||
|
|
||||||
|
|
||||||
async def twitter(bot: Snake) -> None:
|
async def twitter(bot: Snake) -> None:
|
||||||
"""
|
"""
|
||||||
Sync tweets in the background.
|
Sync tweets in the background.
|
||||||
|
@ -84,79 +156,41 @@ async def twitter(bot: Snake) -> None:
|
||||||
if not config.twitter:
|
if not config.twitter:
|
||||||
logger.warn("Missing Twitter config, not starting")
|
logger.warn("Missing Twitter config, not starting")
|
||||||
return
|
return
|
||||||
logger.debug("Starting Task-twitter")
|
stream = JARVISTwitterStream(bot=bot, **config.twitter)
|
||||||
auth = tweepy.AppAuthHandler(config.twitter["consumer_key"], config.twitter["consumer_secret"])
|
auth = tweepy.AppAuthHandler(config.twitter["consumer_key"], config.twitter["consumer_secret"])
|
||||||
api = tweepy.API(auth)
|
api = tweepy.API(auth)
|
||||||
|
logger.debug("Starting Task-twitter")
|
||||||
while True:
|
while True:
|
||||||
accounts = TwitterAccount.find()
|
accounts = TwitterAccount.find()
|
||||||
accounts_to_delete = []
|
|
||||||
|
|
||||||
# Go through all actively followed accounts
|
# Go through all actively followed accounts
|
||||||
|
ids = []
|
||||||
|
|
||||||
async for account in accounts:
|
async for account in accounts:
|
||||||
logger.debug(f"Checking account {account.handle}")
|
logger.debug(f"Checking account {account.handle}")
|
||||||
# Check if account needs updated (handle changes)
|
# Check if account needs updated (handle changes)
|
||||||
if account.last_sync + timedelta(hours=1) <= datetime.now(tz=timezone.utc):
|
if account.last_sync + timedelta(hours=1) <= datetime.now(tz=timezone.utc):
|
||||||
logger.debug(f"Account {account.handle} out of sync, updating")
|
logger.debug(f"Account {account.handle} out of sync, updating")
|
||||||
user = api.get_user(user_id=account.twitter_id)
|
try:
|
||||||
|
user = api.get_user(user_id=account.twitter_id)
|
||||||
|
except Exception:
|
||||||
|
logger.warn(f"Failed to get {account.handle}, deleting")
|
||||||
|
await account.delete()
|
||||||
|
continue
|
||||||
|
if not user:
|
||||||
|
logger.warn(f"Failed to get {account.handle}, deleting")
|
||||||
|
await account.delete()
|
||||||
|
continue
|
||||||
account.handle = user.screen_name
|
account.handle = user.screen_name
|
||||||
account.last_sync = datetime.now(tz=timezone.utc)
|
account.last_sync = datetime.now(tz=timezone.utc)
|
||||||
await account.commit()
|
await account.commit()
|
||||||
|
ids.append(account.twitter_id)
|
||||||
|
|
||||||
# Get new tweets
|
# Get new tweets
|
||||||
if tweets := api.user_timeline(user_id=account.twitter_id, since_id=account.last_tweet):
|
logger.debug(f"Starting stream with {len(ids)} accounts")
|
||||||
logger.debug(f"{account.handle} has new tweets")
|
stream.current_filter = ids
|
||||||
tweets = sorted(tweets, key=lambda x: x.id)
|
try:
|
||||||
follows = TwitterFollow.find(q(twitter_id=account.twitter_id))
|
await stream.filter(follow=ids)
|
||||||
follows_to_delete = []
|
except Exception:
|
||||||
num_follows = 0
|
logger.error("Encountered error with stream", stack_info=True)
|
||||||
|
logger.debug("Stream disconnected, updating filters and re-starting")
|
||||||
# Go through follows and send tweet if necessary
|
|
||||||
async for follow in follows:
|
|
||||||
num_follows += 1
|
|
||||||
guild = await bot.fetch_guild(follow.guild)
|
|
||||||
if not guild:
|
|
||||||
logger.warning(f"Follow {follow.id}'s guild no longer exists, deleting")
|
|
||||||
follows_to_delete.append(follow)
|
|
||||||
continue
|
|
||||||
channel = await bot.fetch_channel(follow.channel)
|
|
||||||
if not channel:
|
|
||||||
logger.warning(f"Follow {follow.id}'s channel no longer exists, deleting")
|
|
||||||
follows_to_delete.append(follow)
|
|
||||||
continue
|
|
||||||
for tweet in tweets:
|
|
||||||
retweet = "retweeted_status" in tweet.__dict__
|
|
||||||
if retweet and not follow.retweets:
|
|
||||||
continue
|
|
||||||
|
|
||||||
embeds = tweet_embeds(tweet)
|
|
||||||
mod = "re" if retweet else ""
|
|
||||||
timestamp = int(tweet.created_at.timestamp())
|
|
||||||
|
|
||||||
try:
|
|
||||||
await channel.send(
|
|
||||||
f"`@{account.handle}` {mod}tweeted this at <t:{timestamp}:f>",
|
|
||||||
embeds=embeds,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
logger.debug(
|
|
||||||
f"Failed to send message to {channel.id} in {channel.guild.name}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Delete invalid follows
|
|
||||||
for follow in follows_to_delete:
|
|
||||||
await follow.delete()
|
|
||||||
|
|
||||||
if num_follows == 0:
|
|
||||||
accounts_to_delete.append(account)
|
|
||||||
else:
|
|
||||||
newest = tweets[-1]
|
|
||||||
account.update(q(last_tweet=newest.id))
|
|
||||||
await account.commit()
|
|
||||||
|
|
||||||
# Delete invalid accounts (no follows)
|
|
||||||
for account in accounts_to_delete:
|
|
||||||
logger.info(f"{account.handle} has no followers, removing")
|
|
||||||
await account.delete()
|
|
||||||
|
|
||||||
# Only check once a minute
|
|
||||||
await asyncio.sleep(60)
|
|
||||||
|
|
7
poetry.lock
generated
7
poetry.lock
generated
|
@ -223,7 +223,7 @@ python-versions = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jarvis-core"
|
name = "jarvis-core"
|
||||||
version = "0.8.0"
|
version = "0.8.1"
|
||||||
description = "JARVIS core"
|
description = "JARVIS core"
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
|
@ -242,7 +242,7 @@ umongo = "^3.1.0"
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://git.zevaryx.com/stark-industries/jarvis/jarvis-core.git"
|
url = "https://git.zevaryx.com/stark-industries/jarvis/jarvis-core.git"
|
||||||
reference = "main"
|
reference = "main"
|
||||||
resolved_reference = "02b60a29ebbab91d9ad4cf876eb10c2aea6f2231"
|
resolved_reference = "257eaaea951d7f8ed5f6e1c4d01abed6d47e9f0d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "marshmallow"
|
name = "marshmallow"
|
||||||
|
@ -451,6 +451,7 @@ optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
aiohttp = {version = ">=3.7.3,<4", optional = true, markers = "extra == \"async\""}
|
||||||
oauthlib = ">=3.2.0,<4"
|
oauthlib = ">=3.2.0,<4"
|
||||||
requests = ">=2.27.0,<3"
|
requests = ">=2.27.0,<3"
|
||||||
requests-oauthlib = ">=1.2.0,<2"
|
requests-oauthlib = ">=1.2.0,<2"
|
||||||
|
@ -530,7 +531,7 @@ multidict = ">=4.0"
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "1.1"
|
lock-version = "1.1"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "a07ca72dcff5520aefe9b577b0fcaf6804e488d91cdf4779b81e37ccc5ba082b"
|
content-hash = "ca53ee1d35badd4d592b79ddc8280a04e74281bc5a21f2537d741a3b5650fad1"
|
||||||
|
|
||||||
[metadata.files]
|
[metadata.files]
|
||||||
aiofiles = [
|
aiofiles = [
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "jarvis-tasks"
|
name = "jarvis-tasks"
|
||||||
version = "0.5.0"
|
version = "0.6.0"
|
||||||
description = ""
|
description = ""
|
||||||
authors = ["Your Name <you@example.com>"]
|
authors = ["Your Name <you@example.com>"]
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ python = "^3.10"
|
||||||
jarvis-core = {git = "https://git.zevaryx.com/stark-industries/jarvis/jarvis-core.git", rev = "main"}
|
jarvis-core = {git = "https://git.zevaryx.com/stark-industries/jarvis/jarvis-core.git", rev = "main"}
|
||||||
dis-snek = "*"
|
dis-snek = "*"
|
||||||
aiohttp = "^3.8.1"
|
aiohttp = "^3.8.1"
|
||||||
tweepy = "^4.5.0"
|
tweepy = {extras = ["async"], version = "^4.8.0"}
|
||||||
asyncpraw = "^7.5.0"
|
asyncpraw = "^7.5.0"
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
|
|
Loading…
Add table
Reference in a new issue