Optimize add/remove book statuses task queries

The queries as they previously existed required joining together 12
different tables, which is extremely expensive. Splitting it into four
queries means that the individual queries can effectively use the
indexes we have, and should be very fast no matter how many statuses are
in the database.

Removing the .distinct() call is fine, since we're adding them to a set
in Redis anyways, which will take care of the duplicates.

It's a bit ugly that we now make four separate calls to Redis (this
might result in things being slightly slower in cases where there are an
extremely small number of statuses), but doing things differently would
result in significantly more surgery to the existing code, so I've opted
to avoid that for the moment.

Fixes: #2725
This commit is contained in:
Wesley Aptekar-Cassels 2023-03-09 14:18:56 -05:00
parent 00666c4f52
commit 2a5f722f6e
2 changed files with 50 additions and 31 deletions

View file

@ -244,38 +244,38 @@ class BooksStream(ActivityStream):
def add_book_statuses(self, user, book):
"""add statuses about a book to a user's feed"""
work = book.parent_work
statuses = (
models.Status.privacy_filter(
user,
privacy_levels=["public"],
)
.filter(
Q(comment__book__parent_work=work)
| Q(quotation__book__parent_work=work)
| Q(review__book__parent_work=work)
| Q(mention_books__parent_work=work)
)
.distinct()
statuses = models.Status.privacy_filter(
user,
privacy_levels=["public"],
)
self.bulk_add_objects_to_store(statuses, self.stream_id(user))
book_comments = statuses.filter(Q(comment__book__parent_work=work))
book_quotations = statuses.filter(Q(quotation__book__parent_work=work))
book_reviews = statuses.filter(Q(review__book__parent_work=work))
book_mentions = statuses.filter(Q(mention_books__parent_work=work))
self.bulk_add_objects_to_store(book_comments, self.stream_id(user))
self.bulk_add_objects_to_store(book_quotations, self.stream_id(user))
self.bulk_add_objects_to_store(book_reviews, self.stream_id(user))
self.bulk_add_objects_to_store(book_mentions, self.stream_id(user))
def remove_book_statuses(self, user, book):
"""add statuses about a book to a user's feed"""
work = book.parent_work
statuses = (
models.Status.privacy_filter(
user,
privacy_levels=["public"],
)
.filter(
Q(comment__book__parent_work=work)
| Q(quotation__book__parent_work=work)
| Q(review__book__parent_work=work)
| Q(mention_books__parent_work=work)
)
.distinct()
statuses = models.Status.privacy_filter(
user,
privacy_levels=["public"],
)
self.bulk_remove_objects_from_store(statuses, self.stream_id(user))
book_comments = statuses.filter(Q(comment__book__parent_work=work))
book_quotations = statuses.filter(Q(quotation__book__parent_work=work))
book_reviews = statuses.filter(Q(review__book__parent_work=work))
book_mentions = statuses.filter(Q(mention_books__parent_work=work))
self.bulk_remove_objects_from_store(book_comments, self.stream_id(user))
self.bulk_remove_objects_from_store(book_quotations, self.stream_id(user))
self.bulk_remove_objects_from_store(book_reviews, self.stream_id(user))
self.bulk_remove_objects_from_store(book_mentions, self.stream_id(user))
# determine which streams are enabled in settings.py

View file

@ -1,4 +1,6 @@
""" testing activitystreams """
import itertools
from unittest.mock import patch
from django.test import TestCase
from bookwyrm import activitystreams, models
@ -69,12 +71,29 @@ class Activitystreams(TestCase):
shelf=self.local_user.shelf_set.first(),
book=self.book,
)
class RedisMockCounter:
"""keep track of calls to mock redis store"""
calls = []
def bulk_add_objects_to_store(self, objs, store):
"""keep track of bulk_add_objects_to_store calls"""
self.calls.append((objs, store))
redis_mock_counter = RedisMockCounter()
with patch(
"bookwyrm.activitystreams.BooksStream.bulk_add_objects_to_store"
) as redis_mock:
redis_mock.side_effect = redis_mock_counter.bulk_add_objects_to_store
activitystreams.BooksStream().add_book_statuses(self.local_user, self.book)
args = redis_mock.call_args[0]
queryset = args[0]
self.assertEqual(queryset.count(), 1)
self.assertTrue(status in queryset)
self.assertEqual(args[1], f"{self.local_user.id}-books")
self.assertEqual(sum(map(lambda x: x[0].count(), redis_mock_counter.calls)), 1)
self.assertTrue(
status
in itertools.chain.from_iterable(
map(lambda x: x[0], redis_mock_counter.calls)
)
)
for call in redis_mock_counter.calls:
self.assertEqual(call[1], f"{self.local_user.id}-books")