From 31babdfa510d88f89d08cbfb56de94cd8c0ac028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adeodato=20Sim=C3=B3?= Date: Fri, 26 Jan 2024 06:01:34 -0300 Subject: [PATCH 1/2] Always prefer shared inboxes when computing receipent lists This avoids duplicate submissions to remote instances when mentioning followers (i.e., `POST /user/foo/inbox` followed by `POST /inbox`, which results in two separate `add_status` tasks, and might generate duplicates in the target instance). --- bookwyrm/models/activitypub_mixin.py | 2 +- bookwyrm/tests/models/test_activitypub_mixin.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/bookwyrm/models/activitypub_mixin.py b/bookwyrm/models/activitypub_mixin.py index d0a941f43..41772a162 100644 --- a/bookwyrm/models/activitypub_mixin.py +++ b/bookwyrm/models/activitypub_mixin.py @@ -153,7 +153,7 @@ class ActivitypubMixin: mentions = self.recipients if hasattr(self, "recipients") else [] # we always send activities to explicitly mentioned users' inboxes - recipients = [u.inbox for u in mentions or [] if not u.local] + recipients = [u.shared_inbox or u.inbox for u in mentions if not u.local] # unless it's a dm, all the followers should receive the activity if privacy != "direct": diff --git a/bookwyrm/tests/models/test_activitypub_mixin.py b/bookwyrm/tests/models/test_activitypub_mixin.py index cad970412..2f6fad76d 100644 --- a/bookwyrm/tests/models/test_activitypub_mixin.py +++ b/bookwyrm/tests/models/test_activitypub_mixin.py @@ -227,14 +227,18 @@ class ActivitypubMixins(TestCase): shared_inbox="http://example.com/inbox", outbox="https://example.com/users/nutria/outbox", ) - MockSelf = namedtuple("Self", ("privacy", "user")) - mock_self = MockSelf("public", self.local_user) + MockSelf = namedtuple("Self", ("privacy", "user", "recipients")) self.local_user.followers.add(self.remote_user) self.local_user.followers.add(another_remote_user) + mock_self = MockSelf("public", self.local_user, []) recipients = ActivitypubMixin.get_recipients(mock_self) - self.assertEqual(len(recipients), 1) - self.assertEqual(recipients[0], "http://example.com/inbox") + self.assertCountEqual(recipients, ["http://example.com/inbox"]) + + # should also work with recipient that is a follower + mock_self.recipients.append(another_remote_user) + recipients = ActivitypubMixin.get_recipients(mock_self) + self.assertCountEqual(recipients, ["http://example.com/inbox"]) def test_get_recipients_software(self, *_): """should differentiate between bookwyrm and other remote users""" From 8ac873419fe66de65cdddf6a25560ed24c4e4a63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adeodato=20Sim=C3=B3?= Date: Fri, 26 Jan 2024 06:29:59 -0300 Subject: [PATCH 2/2] refactor: eagerly use a set in recipients, get_recipients --- bookwyrm/models/activitypub_mixin.py | 25 +++++++++++++------------ bookwyrm/models/status.py | 6 +++--- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/bookwyrm/models/activitypub_mixin.py b/bookwyrm/models/activitypub_mixin.py index 41772a162..db737b8bc 100644 --- a/bookwyrm/models/activitypub_mixin.py +++ b/bookwyrm/models/activitypub_mixin.py @@ -152,8 +152,9 @@ class ActivitypubMixin: # find anyone who's tagged in a status, for example mentions = self.recipients if hasattr(self, "recipients") else [] - # we always send activities to explicitly mentioned users' inboxes - recipients = [u.shared_inbox or u.inbox for u in mentions if not u.local] + # we always send activities to explicitly mentioned users (using shared inboxes + # where available to avoid duplicate submissions to a given instance) + recipients = {u.shared_inbox or u.inbox for u in mentions if not u.local} # unless it's a dm, all the followers should receive the activity if privacy != "direct": @@ -173,18 +174,18 @@ class ActivitypubMixin: if user: queryset = queryset.filter(following=user) - # ideally, we will send to shared inboxes for efficiency - shared_inboxes = ( - queryset.filter(shared_inbox__isnull=False) - .values_list("shared_inbox", flat=True) - .distinct() + # as above, we prefer shared inboxes if available + recipients.update( + queryset.filter(shared_inbox__isnull=False).values_list( + "shared_inbox", flat=True + ) ) - # but not everyone has a shared inbox - inboxes = queryset.filter(shared_inbox__isnull=True).values_list( - "inbox", flat=True + recipients.update( + queryset.filter(shared_inbox__isnull=True).values_list( + "inbox", flat=True + ) ) - recipients += list(shared_inboxes) + list(inboxes) - return list(set(recipients)) + return list(recipients) def to_activity_dataclass(self): """convert from a model to an activity""" diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py index cc44fe2bf..0d1d0d839 100644 --- a/bookwyrm/models/status.py +++ b/bookwyrm/models/status.py @@ -107,14 +107,14 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel): @property def recipients(self): """tagged users who definitely need to get this status in broadcast""" - mentions = [u for u in self.mention_users.all() if not u.local] + mentions = {u for u in self.mention_users.all() if not u.local} if ( hasattr(self, "reply_parent") and self.reply_parent and not self.reply_parent.user.local ): - mentions.append(self.reply_parent.user) - return list(set(mentions)) + mentions.add(self.reply_parent.user) + return list(mentions) @classmethod def ignore_activity(