Optimize get_audience by only fetching IDs

Looking at the tracing data from this function in prod, only ~500ms is
spent in the database. My best guess for the rest of the time is
transferring and creating the user objects, which we don't use, since we
simply need the ID.
This commit is contained in:
Wesley Aptekar-Cassels 2023-04-28 12:51:44 -04:00
parent 7211906697
commit 097cd3ed72

View file

@ -116,7 +116,7 @@ class ActivityStream(RedisStore):
) )
# direct messages don't appear in feeds, direct comments/reviews/etc do # direct messages don't appear in feeds, direct comments/reviews/etc do
if status.privacy == "direct" and status.status_type == "Note": if status.privacy == "direct" and status.status_type == "Note":
return [] return models.User.objects.none()
# everybody who could plausibly see this status # everybody who could plausibly see this status
audience = models.User.objects.filter( audience = models.User.objects.filter(
@ -152,11 +152,11 @@ class ActivityStream(RedisStore):
def get_audience(self, status): def get_audience(self, status):
"""given a status, what users should see it""" """given a status, what users should see it"""
trace.get_current_span().set_attribute("stream_id", self.key) trace.get_current_span().set_attribute("stream_id", self.key)
audience = self._get_audience(status) audience = self._get_audience(status).values_list("id", flat=True)
status_author = models.User.objects.filter( status_author = models.User.objects.filter(
is_active=True, local=True, id=status.user.id is_active=True, local=True, id=status.user.id
) ).values_list("id", flat=True)
return list({user.id for user in list(audience) + list(status_author)}) return list(set(list(audience) + list(status_author)))
def get_stores_for_users(self, user_ids): def get_stores_for_users(self, user_ids):
"""convert a list of user ids into redis store ids""" """convert a list of user ids into redis store ids"""
@ -186,12 +186,12 @@ class HomeStream(ActivityStream):
if not audience: if not audience:
return [] return []
# if the user is following the author # if the user is following the author
audience = audience.filter(following=status.user) audience = audience.filter(following=status.user).values_list("id", flat=True)
# if the user is the post's author # if the user is the post's author
status_author = models.User.objects.filter( status_author = models.User.objects.filter(
is_active=True, local=True, id=status.user.id is_active=True, local=True, id=status.user.id
) ).values_list("id", flat=True)
return list({user.id for user in list(audience) + list(status_author)}) return list(set(list(audience) + list(status_author)))
def get_statuses_for_user(self, user): def get_statuses_for_user(self, user):
return models.Status.privacy_filter( return models.Status.privacy_filter(
@ -240,7 +240,7 @@ class BooksStream(ActivityStream):
audience = super()._get_audience(status) audience = super()._get_audience(status)
if not audience: if not audience:
return [] return models.User.objects.none()
return audience.filter(shelfbook__book__parent_work=work).distinct() return audience.filter(shelfbook__book__parent_work=work).distinct()
def get_audience(self, status): def get_audience(self, status):