From 2a5f722f6e5ae37e20854372594486e39e55aaa7 Mon Sep 17 00:00:00 2001 From: Wesley Aptekar-Cassels Date: Thu, 9 Mar 2023 14:18:56 -0500 Subject: [PATCH] Optimize add/remove book statuses task queries The queries as they previously existed required joining together 12 different tables, which is extremely expensive. Splitting it into four queries means that the individual queries can effectively use the indexes we have, and should be very fast no matter how many statuses are in the database. Removing the .distinct() call is fine, since we're adding them to a set in Redis anyways, which will take care of the duplicates. It's a bit ugly that we now make four separate calls to Redis (this might result in things being slightly slower in cases where there are an extremely small number of statuses), but doing things differently would result in significantly more surgery to the existing code, so I've opted to avoid that for the moment. Fixes: #2725 --- bookwyrm/activitystreams.py | 52 +++++++++---------- .../tests/activitystreams/test_booksstream.py | 29 +++++++++-- 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index 80774e28d..1bc1e803c 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -244,38 +244,38 @@ class BooksStream(ActivityStream): def add_book_statuses(self, user, book): """add statuses about a book to a user's feed""" work = book.parent_work - statuses = ( - models.Status.privacy_filter( - user, - privacy_levels=["public"], - ) - .filter( - Q(comment__book__parent_work=work) - | Q(quotation__book__parent_work=work) - | Q(review__book__parent_work=work) - | Q(mention_books__parent_work=work) - ) - .distinct() + statuses = models.Status.privacy_filter( + user, + privacy_levels=["public"], ) - self.bulk_add_objects_to_store(statuses, self.stream_id(user)) + + book_comments = statuses.filter(Q(comment__book__parent_work=work)) + book_quotations = statuses.filter(Q(quotation__book__parent_work=work)) + book_reviews = statuses.filter(Q(review__book__parent_work=work)) + book_mentions = statuses.filter(Q(mention_books__parent_work=work)) + + self.bulk_add_objects_to_store(book_comments, self.stream_id(user)) + self.bulk_add_objects_to_store(book_quotations, self.stream_id(user)) + self.bulk_add_objects_to_store(book_reviews, self.stream_id(user)) + self.bulk_add_objects_to_store(book_mentions, self.stream_id(user)) def remove_book_statuses(self, user, book): """add statuses about a book to a user's feed""" work = book.parent_work - statuses = ( - models.Status.privacy_filter( - user, - privacy_levels=["public"], - ) - .filter( - Q(comment__book__parent_work=work) - | Q(quotation__book__parent_work=work) - | Q(review__book__parent_work=work) - | Q(mention_books__parent_work=work) - ) - .distinct() + statuses = models.Status.privacy_filter( + user, + privacy_levels=["public"], ) - self.bulk_remove_objects_from_store(statuses, self.stream_id(user)) + + book_comments = statuses.filter(Q(comment__book__parent_work=work)) + book_quotations = statuses.filter(Q(quotation__book__parent_work=work)) + book_reviews = statuses.filter(Q(review__book__parent_work=work)) + book_mentions = statuses.filter(Q(mention_books__parent_work=work)) + + self.bulk_remove_objects_from_store(book_comments, self.stream_id(user)) + self.bulk_remove_objects_from_store(book_quotations, self.stream_id(user)) + self.bulk_remove_objects_from_store(book_reviews, self.stream_id(user)) + self.bulk_remove_objects_from_store(book_mentions, self.stream_id(user)) # determine which streams are enabled in settings.py diff --git a/bookwyrm/tests/activitystreams/test_booksstream.py b/bookwyrm/tests/activitystreams/test_booksstream.py index dedf488ae..1cd335b30 100644 --- a/bookwyrm/tests/activitystreams/test_booksstream.py +++ b/bookwyrm/tests/activitystreams/test_booksstream.py @@ -1,4 +1,6 @@ """ testing activitystreams """ +import itertools + from unittest.mock import patch from django.test import TestCase from bookwyrm import activitystreams, models @@ -69,12 +71,29 @@ class Activitystreams(TestCase): shelf=self.local_user.shelf_set.first(), book=self.book, ) + + class RedisMockCounter: + """keep track of calls to mock redis store""" + + calls = [] + + def bulk_add_objects_to_store(self, objs, store): + """keep track of bulk_add_objects_to_store calls""" + self.calls.append((objs, store)) + + redis_mock_counter = RedisMockCounter() with patch( "bookwyrm.activitystreams.BooksStream.bulk_add_objects_to_store" ) as redis_mock: + redis_mock.side_effect = redis_mock_counter.bulk_add_objects_to_store activitystreams.BooksStream().add_book_statuses(self.local_user, self.book) - args = redis_mock.call_args[0] - queryset = args[0] - self.assertEqual(queryset.count(), 1) - self.assertTrue(status in queryset) - self.assertEqual(args[1], f"{self.local_user.id}-books") + + self.assertEqual(sum(map(lambda x: x[0].count(), redis_mock_counter.calls)), 1) + self.assertTrue( + status + in itertools.chain.from_iterable( + map(lambda x: x[0], redis_mock_counter.calls) + ) + ) + for call in redis_mock_counter.calls: + self.assertEqual(call[1], f"{self.local_user.id}-books")