From 5446869c389351046a708ddefdab2031d056174a Mon Sep 17 00:00:00 2001 From: Wesley Aptekar-Cassels Date: Wed, 8 Mar 2023 22:33:32 -0500 Subject: [PATCH 1/5] Change stream_id to take user ID Anywhere we have a user object, we can easily get the user ID in the caller, and this will allow us more flexibility in the future to implement optimizations that involve knowing a user ID without querying the database for the user object. --- bookwyrm/activitystreams.py | 22 +++++++++---------- .../activitystreams/test_abstractstream.py | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index 80774e28d..f8349eed8 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -13,18 +13,18 @@ from bookwyrm.tasks import app, LOW, MEDIUM, HIGH class ActivityStream(RedisStore): """a category of activity stream (like home, local, books)""" - def stream_id(self, user): + def stream_id(self, user_id): """the redis key for this user's instance of this stream""" - return f"{user.id}-{self.key}" + return f"{user_id}-{self.key}" def unread_id(self, user): """the redis key for this user's unread count for this stream""" - stream_id = self.stream_id(user) + stream_id = self.stream_id(user.id) return f"{stream_id}-unread" def unread_by_status_type_id(self, user): """the redis key for this user's unread count for this stream""" - stream_id = self.stream_id(user) + stream_id = self.stream_id(user.id) return f"{stream_id}-unread-by-type" def get_rank(self, obj): # pylint: disable=no-self-use @@ -52,13 +52,13 @@ class ActivityStream(RedisStore): """add a user's statuses to another user's feed""" # only add the statuses that the viewer should be able to see (ie, not dms) statuses = models.Status.privacy_filter(viewer).filter(user=user) - self.bulk_add_objects_to_store(statuses, self.stream_id(viewer)) + self.bulk_add_objects_to_store(statuses, self.stream_id(viewer.id)) def remove_user_statuses(self, viewer, user): """remove a user's status from another user's feed""" # remove all so that followers only statuses are removed statuses = user.status_set.all() - self.bulk_remove_objects_from_store(statuses, self.stream_id(viewer)) + self.bulk_remove_objects_from_store(statuses, self.stream_id(viewer.id)) def get_activity_stream(self, user): """load the statuses to be displayed""" @@ -66,7 +66,7 @@ class ActivityStream(RedisStore): r.set(self.unread_id(user), 0) r.delete(self.unread_by_status_type_id(user)) - statuses = self.get_store(self.stream_id(user)) + statuses = self.get_store(self.stream_id(user.id)) return ( models.Status.objects.select_subclasses() .filter(id__in=statuses) @@ -95,7 +95,7 @@ class ActivityStream(RedisStore): def populate_streams(self, user): """go from zero to a timeline""" - self.populate_store(self.stream_id(user)) + self.populate_store(self.stream_id(user.id)) def get_audience(self, status): # pylint: disable=no-self-use """given a status, what users should see it""" @@ -137,7 +137,7 @@ class ActivityStream(RedisStore): return audience.distinct() def get_stores_for_object(self, obj): - return [self.stream_id(u) for u in self.get_audience(obj)] + return [self.stream_id(u.id) for u in self.get_audience(obj)] def get_statuses_for_user(self, user): # pylint: disable=no-self-use """given a user, what statuses should they see on this stream""" @@ -257,7 +257,7 @@ class BooksStream(ActivityStream): ) .distinct() ) - self.bulk_add_objects_to_store(statuses, self.stream_id(user)) + self.bulk_add_objects_to_store(statuses, self.stream_id(user.id)) def remove_book_statuses(self, user, book): """add statuses about a book to a user's feed""" @@ -275,7 +275,7 @@ class BooksStream(ActivityStream): ) .distinct() ) - self.bulk_remove_objects_from_store(statuses, self.stream_id(user)) + self.bulk_remove_objects_from_store(statuses, self.stream_id(user.id)) # determine which streams are enabled in settings.py diff --git a/bookwyrm/tests/activitystreams/test_abstractstream.py b/bookwyrm/tests/activitystreams/test_abstractstream.py index af94233f0..d7d4793a1 100644 --- a/bookwyrm/tests/activitystreams/test_abstractstream.py +++ b/bookwyrm/tests/activitystreams/test_abstractstream.py @@ -53,7 +53,7 @@ class Activitystreams(TestCase): def test_activitystream_class_ids(self, *_): """the abstract base class for stream objects""" self.assertEqual( - self.test_stream.stream_id(self.local_user), + self.test_stream.stream_id(self.local_user.id), f"{self.local_user.id}-test", ) self.assertEqual( From 653e8ee81b7cf0774c61fe97c9dc37d8b2e7301d Mon Sep 17 00:00:00 2001 From: Wesley Aptekar-Cassels Date: Wed, 8 Mar 2023 22:36:02 -0500 Subject: [PATCH 2/5] Change unread_id to take user ID Same reason as described in the prior commit. --- bookwyrm/activitystreams.py | 10 +++++----- bookwyrm/tests/activitystreams/test_abstractstream.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index f8349eed8..ace9eadbd 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -17,9 +17,9 @@ class ActivityStream(RedisStore): """the redis key for this user's instance of this stream""" return f"{user_id}-{self.key}" - def unread_id(self, user): + def unread_id(self, user_id): """the redis key for this user's unread count for this stream""" - stream_id = self.stream_id(user.id) + stream_id = self.stream_id(user_id) return f"{stream_id}-unread" def unread_by_status_type_id(self, user): @@ -39,7 +39,7 @@ class ActivityStream(RedisStore): if increment_unread: for user in self.get_audience(status): # add to the unread status count - pipeline.incr(self.unread_id(user)) + pipeline.incr(self.unread_id(user.id)) # add to the unread status count for status type pipeline.hincrby( self.unread_by_status_type_id(user), get_status_type(status), 1 @@ -63,7 +63,7 @@ class ActivityStream(RedisStore): def get_activity_stream(self, user): """load the statuses to be displayed""" # clear unreads for this feed - r.set(self.unread_id(user), 0) + r.set(self.unread_id(user.id), 0) r.delete(self.unread_by_status_type_id(user)) statuses = self.get_store(self.stream_id(user.id)) @@ -83,7 +83,7 @@ class ActivityStream(RedisStore): def get_unread_count(self, user): """get the unread status count for this user's feed""" - return int(r.get(self.unread_id(user)) or 0) + return int(r.get(self.unread_id(user.id)) or 0) def get_unread_count_by_status_type(self, user): """get the unread status count for this user's feed's status types""" diff --git a/bookwyrm/tests/activitystreams/test_abstractstream.py b/bookwyrm/tests/activitystreams/test_abstractstream.py index d7d4793a1..5f4011bec 100644 --- a/bookwyrm/tests/activitystreams/test_abstractstream.py +++ b/bookwyrm/tests/activitystreams/test_abstractstream.py @@ -57,7 +57,7 @@ class Activitystreams(TestCase): f"{self.local_user.id}-test", ) self.assertEqual( - self.test_stream.unread_id(self.local_user), + self.test_stream.unread_id(self.local_user.id), f"{self.local_user.id}-test-unread", ) From 41e14bdfafb4645994be63df5cc7406f7bf680f0 Mon Sep 17 00:00:00 2001 From: Wesley Aptekar-Cassels Date: Wed, 8 Mar 2023 22:37:30 -0500 Subject: [PATCH 3/5] Change unread_by_status_type_id to take user ID Same reason as in prior commit. --- bookwyrm/activitystreams.py | 10 +++++----- bookwyrm/tests/activitystreams/test_abstractstream.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index ace9eadbd..dbc6a8107 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -22,9 +22,9 @@ class ActivityStream(RedisStore): stream_id = self.stream_id(user_id) return f"{stream_id}-unread" - def unread_by_status_type_id(self, user): + def unread_by_status_type_id(self, user_id): """the redis key for this user's unread count for this stream""" - stream_id = self.stream_id(user.id) + stream_id = self.stream_id(user_id) return f"{stream_id}-unread-by-type" def get_rank(self, obj): # pylint: disable=no-self-use @@ -42,7 +42,7 @@ class ActivityStream(RedisStore): pipeline.incr(self.unread_id(user.id)) # add to the unread status count for status type pipeline.hincrby( - self.unread_by_status_type_id(user), get_status_type(status), 1 + self.unread_by_status_type_id(user.id), get_status_type(status), 1 ) # and go! @@ -64,7 +64,7 @@ class ActivityStream(RedisStore): """load the statuses to be displayed""" # clear unreads for this feed r.set(self.unread_id(user.id), 0) - r.delete(self.unread_by_status_type_id(user)) + r.delete(self.unread_by_status_type_id(user.id)) statuses = self.get_store(self.stream_id(user.id)) return ( @@ -87,7 +87,7 @@ class ActivityStream(RedisStore): def get_unread_count_by_status_type(self, user): """get the unread status count for this user's feed's status types""" - status_types = r.hgetall(self.unread_by_status_type_id(user)) + status_types = r.hgetall(self.unread_by_status_type_id(user.id)) return { str(key.decode("utf-8")): int(value) or 0 for key, value in status_types.items() diff --git a/bookwyrm/tests/activitystreams/test_abstractstream.py b/bookwyrm/tests/activitystreams/test_abstractstream.py index 5f4011bec..05b453f9a 100644 --- a/bookwyrm/tests/activitystreams/test_abstractstream.py +++ b/bookwyrm/tests/activitystreams/test_abstractstream.py @@ -64,7 +64,7 @@ class Activitystreams(TestCase): def test_unread_by_status_type_id(self, *_): """stream for status type""" self.assertEqual( - self.test_stream.unread_by_status_type_id(self.local_user), + self.test_stream.unread_by_status_type_id(self.local_user.id), f"{self.local_user.id}-test-unread-by-type", ) From 23698dafe56306d05ce8bf1022b4f94b5f2d95b4 Mon Sep 17 00:00:00 2001 From: Wesley Aptekar-Cassels Date: Thu, 9 Mar 2023 00:28:11 -0500 Subject: [PATCH 4/5] Change get_audience to return list of user IDs This will make it simpler to implement various optimizations. --- bookwyrm/activitystreams.py | 26 ++++++++------- .../activitystreams/test_abstractstream.py | 32 +++++++++---------- .../tests/activitystreams/test_homestream.py | 10 +++--- .../tests/activitystreams/test_localstream.py | 10 +++--- 4 files changed, 41 insertions(+), 37 deletions(-) diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index dbc6a8107..ea629a318 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -37,12 +37,12 @@ class ActivityStream(RedisStore): pipeline = self.add_object_to_related_stores(status, execute=False) if increment_unread: - for user in self.get_audience(status): + for user_id in self.get_audience(status): # add to the unread status count - pipeline.incr(self.unread_id(user.id)) + pipeline.incr(self.unread_id(user_id)) # add to the unread status count for status type pipeline.hincrby( - self.unread_by_status_type_id(user.id), get_status_type(status), 1 + self.unread_by_status_type_id(user_id), get_status_type(status), 1 ) # and go! @@ -97,7 +97,7 @@ class ActivityStream(RedisStore): """go from zero to a timeline""" self.populate_store(self.stream_id(user.id)) - def get_audience(self, status): # pylint: disable=no-self-use + def _get_audience(self, status): # pylint: disable=no-self-use """given a status, what users should see it""" # direct messages don't appeard in feeds, direct comments/reviews/etc do if status.privacy == "direct" and status.status_type == "Note": @@ -136,8 +136,12 @@ class ActivityStream(RedisStore): ) return audience.distinct() + def get_audience(self, status): # pylint: disable=no-self-use + """given a status, what users should see it""" + return [user.id for user in self._get_audience(status)] + def get_stores_for_object(self, obj): - return [self.stream_id(u.id) for u in self.get_audience(obj)] + return [self.stream_id(user_id) for user_id in self.get_audience(obj)] def get_statuses_for_user(self, user): # pylint: disable=no-self-use """given a user, what statuses should they see on this stream""" @@ -156,8 +160,8 @@ class HomeStream(ActivityStream): key = "home" - def get_audience(self, status): - audience = super().get_audience(status) + def _get_audience(self, status): + audience = super()._get_audience(status) if not audience: return [] return audience.filter( @@ -183,11 +187,11 @@ class LocalStream(ActivityStream): key = "local" - def get_audience(self, status): + def _get_audience(self, status): # this stream wants no part in non-public statuses if status.privacy != "public" or not status.user.local: return [] - return super().get_audience(status) + return super()._get_audience(status) def get_statuses_for_user(self, user): # all public statuses by a local user @@ -202,7 +206,7 @@ class BooksStream(ActivityStream): key = "books" - def get_audience(self, status): + def _get_audience(self, status): """anyone with the mentioned book on their shelves""" # only show public statuses on the books feed, # and only statuses that mention books @@ -217,7 +221,7 @@ class BooksStream(ActivityStream): else status.mention_books.first().parent_work ) - audience = super().get_audience(status) + audience = super()._get_audience(status) if not audience: return [] return audience.filter(shelfbook__book__parent_work=work).distinct() diff --git a/bookwyrm/tests/activitystreams/test_abstractstream.py b/bookwyrm/tests/activitystreams/test_abstractstream.py index 05b453f9a..a9f2cfdd3 100644 --- a/bookwyrm/tests/activitystreams/test_abstractstream.py +++ b/bookwyrm/tests/activitystreams/test_abstractstream.py @@ -118,9 +118,9 @@ class Activitystreams(TestCase): ) users = self.test_stream.get_audience(status) # remote users don't have feeds - self.assertFalse(self.remote_user in users) - self.assertTrue(self.local_user in users) - self.assertTrue(self.another_user in users) + self.assertFalse(self.remote_user.id in users) + self.assertTrue(self.local_user.id in users) + self.assertTrue(self.another_user.id in users) def test_abstractstream_get_audience_direct(self, *_): """get a list of users that should see a status""" @@ -141,9 +141,9 @@ class Activitystreams(TestCase): ) status.mention_users.add(self.local_user) users = self.test_stream.get_audience(status) - self.assertTrue(self.local_user in users) - self.assertFalse(self.another_user in users) - self.assertFalse(self.remote_user in users) + self.assertTrue(self.local_user.id in users) + self.assertFalse(self.another_user.id in users) + self.assertFalse(self.remote_user.id in users) def test_abstractstream_get_audience_followers_remote_user(self, *_): """get a list of users that should see a status""" @@ -153,7 +153,7 @@ class Activitystreams(TestCase): privacy="followers", ) users = self.test_stream.get_audience(status) - self.assertFalse(users.exists()) + self.assertEqual(users, []) def test_abstractstream_get_audience_followers_self(self, *_): """get a list of users that should see a status""" @@ -164,9 +164,9 @@ class Activitystreams(TestCase): book=self.book, ) users = self.test_stream.get_audience(status) - self.assertTrue(self.local_user in users) - self.assertFalse(self.another_user in users) - self.assertFalse(self.remote_user in users) + self.assertTrue(self.local_user.id in users) + self.assertFalse(self.another_user.id in users) + self.assertFalse(self.remote_user.id in users) def test_abstractstream_get_audience_followers_with_mention(self, *_): """get a list of users that should see a status""" @@ -179,9 +179,9 @@ class Activitystreams(TestCase): status.mention_users.add(self.local_user) users = self.test_stream.get_audience(status) - self.assertTrue(self.local_user in users) - self.assertFalse(self.another_user in users) - self.assertFalse(self.remote_user in users) + self.assertTrue(self.local_user.id in users) + self.assertFalse(self.another_user.id in users) + self.assertFalse(self.remote_user.id in users) def test_abstractstream_get_audience_followers_with_relationship(self, *_): """get a list of users that should see a status""" @@ -193,6 +193,6 @@ class Activitystreams(TestCase): book=self.book, ) users = self.test_stream.get_audience(status) - self.assertFalse(self.local_user in users) - self.assertFalse(self.another_user in users) - self.assertFalse(self.remote_user in users) + self.assertFalse(self.local_user.id in users) + self.assertFalse(self.another_user.id in users) + self.assertFalse(self.remote_user.id in users) diff --git a/bookwyrm/tests/activitystreams/test_homestream.py b/bookwyrm/tests/activitystreams/test_homestream.py index 10c806c8e..2dc975523 100644 --- a/bookwyrm/tests/activitystreams/test_homestream.py +++ b/bookwyrm/tests/activitystreams/test_homestream.py @@ -44,7 +44,7 @@ class Activitystreams(TestCase): user=self.remote_user, content="hi", privacy="public" ) users = activitystreams.HomeStream().get_audience(status) - self.assertFalse(users.exists()) + self.assertEqual(users, []) def test_homestream_get_audience_with_mentions(self, *_): """get a list of users that should see a status""" @@ -53,8 +53,8 @@ class Activitystreams(TestCase): ) status.mention_users.add(self.local_user) users = activitystreams.HomeStream().get_audience(status) - self.assertFalse(self.local_user in users) - self.assertFalse(self.another_user in users) + self.assertFalse(self.local_user.id in users) + self.assertFalse(self.another_user.id in users) def test_homestream_get_audience_with_relationship(self, *_): """get a list of users that should see a status""" @@ -63,5 +63,5 @@ class Activitystreams(TestCase): user=self.remote_user, content="hi", privacy="public" ) users = activitystreams.HomeStream().get_audience(status) - self.assertTrue(self.local_user in users) - self.assertFalse(self.another_user in users) + self.assertTrue(self.local_user.id in users) + self.assertFalse(self.another_user.id in users) diff --git a/bookwyrm/tests/activitystreams/test_localstream.py b/bookwyrm/tests/activitystreams/test_localstream.py index d8bfb4fa9..14c5798dc 100644 --- a/bookwyrm/tests/activitystreams/test_localstream.py +++ b/bookwyrm/tests/activitystreams/test_localstream.py @@ -54,8 +54,8 @@ class Activitystreams(TestCase): user=self.local_user, content="hi", privacy="public" ) users = activitystreams.LocalStream().get_audience(status) - self.assertTrue(self.local_user in users) - self.assertTrue(self.another_user in users) + self.assertTrue(self.local_user.id in users) + self.assertTrue(self.another_user.id in users) def test_localstream_get_audience_unlisted(self, *_): """get a list of users that should see a status""" @@ -88,7 +88,7 @@ class Activitystreams(TestCase): ) # yes book, yes audience audience = activitystreams.BooksStream().get_audience(status) - self.assertTrue(self.local_user in audience) + self.assertTrue(self.local_user.id in audience) def test_localstream_get_audience_books_book_field(self, *_): """get a list of users that should see a status""" @@ -102,7 +102,7 @@ class Activitystreams(TestCase): ) # yes book, yes audience audience = activitystreams.BooksStream().get_audience(status) - self.assertTrue(self.local_user in audience) + self.assertTrue(self.local_user.id in audience) def test_localstream_get_audience_books_alternate_edition(self, *_): """get a list of users that should see a status""" @@ -119,7 +119,7 @@ class Activitystreams(TestCase): ) # yes book, yes audience audience = activitystreams.BooksStream().get_audience(status) - self.assertTrue(self.local_user in audience) + self.assertTrue(self.local_user.id in audience) def test_localstream_get_audience_books_non_public(self, *_): """get a list of users that should see a status""" From 56243f6529100dd7c2cd3a23f7c7b4581f0e1c93 Mon Sep 17 00:00:00 2001 From: Wesley Aptekar-Cassels Date: Thu, 9 Mar 2023 00:34:45 -0500 Subject: [PATCH 5/5] Optimize HomeStream.get_audience This splits HomeStream.get_audience into two separate database queries, in order to more effectively take advantage of the indexes we have. Combining the user ID query and the user following query means that Postgres isn't able to use the index we have on the userfollows table. The query planner claims that the userfollows query should be about 20 times faster than it was previously, and the id query should take a negligible amount of time, since it's selecting a single item by primary key. We don't need to worry about duplicates, since there is a constraint preventing a user from following themself. Fixes: #2720 --- bookwyrm/activitystreams.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index ea629a318..2f9e98042 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -160,14 +160,15 @@ class HomeStream(ActivityStream): key = "home" - def _get_audience(self, status): + def get_audience(self, status): audience = super()._get_audience(status) if not audience: return [] - return audience.filter( - Q(id=status.user.id) # if the user is the post's author - | Q(following=status.user) # if the user is following the author - ).distinct() + # if the user is the post's author + ids_self = [user.id for user in audience.filter(Q(id=status.user.id))] + # if the user is following the author + ids_following = [user.id for user in audience.filter(Q(following=status.user))] + return ids_self + ids_following def get_statuses_for_user(self, user): return models.Status.privacy_filter(