mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2025-02-01 11:52:20 +00:00
Fix imports creating duplicate Editions
This fixes bugs at both the sending and receiving instances: As the primary fix is at the sending end, a full fix will need to be collaborative at the network level. **Fixing at the sending end: don't send duplicate GeneratedNotes** The primary fix is to `handle_reading_status`: it calls `create_generated_note` which saves and broadcasts the GeneratedNote. Before this fix, `handle_reading_status` then saved and broadcast the note again, creating two incoming GeneratedNotes for all federated BookWyrm servers, and thus triggering simultaneous or near-simultaneous imports of the associated book within the `tag` reference. This is then dereferenced, creating two identical Works, the first of which ends up empty and the second of which ends up with two identical Editions. **Fixing at the sending end: delay BookStatus broadcasts** My testing doesn't indicate one way or the other whether this happens, but there's a theoretical race condition for incoming editions that are ListItem or ShelfBook items. The Add activity needs to create the Edition and Work if they are not already on the receiving instance, and so does the Comment or GeneratedNote. The fix in `ActivitypubMixin::broadcast` sets a delay on these statuses to ensure that the Add activity is processed first, and thus the book will already exist when the status is processed. **Fixing at the receiving end: prevent dereferencing the incoming Edition** The book attached to a ListItem or ShelfBook is an Edition. This means that when a receiving instance dereferences it, we end up with a recursion: The Edition dereferences the parent Work, which dereferences all the child editions in celery tasks. This includes the Edition that triggered the whole process in the first place. I couldn't create a reproducable test of this, but I've seen evidence that this can cause a race condition where the incoming Edition ends up being duplicated - the task to create child Editions checks for existence before the original triggering Edition has saved. The changes to Note, Fields, and `ActivityObject::to_model` are aimed at preventing this by allowing us to compare related field ids to the original triggering remote_id before running `set_related_field` and excluding them if there is a match.
This commit is contained in:
parent
13381b9b4d
commit
32b041d10c
6 changed files with 45 additions and 13 deletions
|
@ -120,6 +120,7 @@ class ActivityObject:
|
|||
save: bool = True,
|
||||
overwrite: bool = True,
|
||||
allow_external_connections: bool = True,
|
||||
trigger=None,
|
||||
) -> Optional[TBookWyrmModel]:
|
||||
"""convert from an activity to a model instance. Args:
|
||||
model: the django model that this object is being converted to
|
||||
|
@ -133,6 +134,9 @@ class ActivityObject:
|
|||
only update blank fields if false
|
||||
allow_external_connections: look up missing data if true,
|
||||
throw an exception if false and an external connection is needed
|
||||
trigger: the object that originally triggered this
|
||||
self.to_model. e.g. if this is a Work being dereferenced from
|
||||
an incoming Edition
|
||||
"""
|
||||
model = model or get_model_from_type(self.type)
|
||||
|
||||
|
@ -223,6 +227,8 @@ class ActivityObject:
|
|||
related_field_name = model_field.field.name
|
||||
|
||||
for item in values:
|
||||
if trigger and item == trigger.remote_id:
|
||||
continue
|
||||
set_related_field.delay(
|
||||
related_model.__name__,
|
||||
instance.__class__.__name__,
|
||||
|
|
|
@ -50,6 +50,7 @@ class Note(ActivityObject):
|
|||
save=True,
|
||||
overwrite=True,
|
||||
allow_external_connections=True,
|
||||
trigger=None,
|
||||
):
|
||||
instance = super().to_model(
|
||||
model, instance, allow_create, save, overwrite, allow_external_connections
|
||||
|
|
|
@ -129,7 +129,20 @@ class ActivitypubMixin:
|
|||
|
||||
def broadcast(self, activity, sender, software=None, queue=BROADCAST):
|
||||
"""send out an activity"""
|
||||
|
||||
# if we're posting about ShelfBooks, set a delay to give the base activity
|
||||
# time to add the book on remote servers first to avoid race conditions
|
||||
countdown = (
|
||||
10
|
||||
if (
|
||||
isinstance(activity, object)
|
||||
and not isinstance(activity["object"], str)
|
||||
and activity["object"].get("type", None) in ["GeneratedNote", "Comment"]
|
||||
)
|
||||
else 0
|
||||
)
|
||||
broadcast_task.apply_async(
|
||||
countdown=countdown,
|
||||
args=(
|
||||
sender.id,
|
||||
json.dumps(activity, cls=activitypub.ActivityEncoder),
|
||||
|
@ -227,6 +240,7 @@ class ObjectMixin(ActivitypubMixin):
|
|||
return
|
||||
|
||||
try:
|
||||
# TODO: here is where we might use an ActivityPub extension instead
|
||||
# do we have a "pure" activitypub version of this for mastodon?
|
||||
if software != "bookwyrm" and hasattr(self, "pure_content"):
|
||||
pure_activity = self.to_create_activity(user, pure=True)
|
||||
|
|
|
@ -86,7 +86,9 @@ class ActivitypubFieldMixin:
|
|||
raise
|
||||
value = getattr(data, "actor")
|
||||
formatted = self.field_from_activity(
|
||||
value, allow_external_connections=allow_external_connections
|
||||
value,
|
||||
allow_external_connections=allow_external_connections,
|
||||
trigger=instance,
|
||||
)
|
||||
if formatted is None or formatted is MISSING or formatted == {}:
|
||||
return False
|
||||
|
@ -128,7 +130,7 @@ class ActivitypubFieldMixin:
|
|||
return value
|
||||
|
||||
# pylint: disable=unused-argument
|
||||
def field_from_activity(self, value, allow_external_connections=True):
|
||||
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
|
||||
"""formatter to convert activitypub into a model value"""
|
||||
if value and hasattr(self, "activitypub_wrapper"):
|
||||
value = value.get(self.activitypub_wrapper)
|
||||
|
@ -150,7 +152,9 @@ class ActivitypubRelatedFieldMixin(ActivitypubFieldMixin):
|
|||
self.load_remote = load_remote
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def field_from_activity(self, value, allow_external_connections=True):
|
||||
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
|
||||
"""trigger: the object that triggered this deserialization.
|
||||
For example the Edition for which self is the parent Work"""
|
||||
if not value:
|
||||
return None
|
||||
|
||||
|
@ -160,7 +164,7 @@ class ActivitypubRelatedFieldMixin(ActivitypubFieldMixin):
|
|||
# only look in the local database
|
||||
return related_model.find_existing(value.serialize())
|
||||
# this is an activitypub object, which we can deserialize
|
||||
return value.to_model(model=related_model)
|
||||
return value.to_model(model=related_model, trigger=trigger)
|
||||
try:
|
||||
# make sure the value looks like a remote id
|
||||
validate_remote_id(value)
|
||||
|
@ -336,7 +340,7 @@ class ManyToManyField(ActivitypubFieldMixin, models.ManyToManyField):
|
|||
return f"{value.instance.remote_id}/{self.name}"
|
||||
return [i.remote_id for i in value.all()]
|
||||
|
||||
def field_from_activity(self, value, allow_external_connections=True):
|
||||
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
|
||||
if value is None or value is MISSING:
|
||||
return None
|
||||
if not isinstance(value, list):
|
||||
|
@ -386,7 +390,7 @@ class TagField(ManyToManyField):
|
|||
)
|
||||
return tags
|
||||
|
||||
def field_from_activity(self, value, allow_external_connections=True):
|
||||
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
|
||||
if not isinstance(value, list):
|
||||
# GoToSocial DMs and single-user mentions are
|
||||
# sent as objects, not as an array of objects
|
||||
|
@ -481,7 +485,7 @@ class ImageField(ActivitypubFieldMixin, models.ImageField):
|
|||
|
||||
return activitypub.Image(url=url, name=alt)
|
||||
|
||||
def field_from_activity(self, value, allow_external_connections=True):
|
||||
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
|
||||
image_slug = value
|
||||
# when it's an inline image (User avatar/icon, Book cover), it's a json
|
||||
# blob, but when it's an attached image, it's just a url
|
||||
|
@ -538,7 +542,7 @@ class DateTimeField(ActivitypubFieldMixin, models.DateTimeField):
|
|||
return None
|
||||
return value.isoformat()
|
||||
|
||||
def field_from_activity(self, value, allow_external_connections=True):
|
||||
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
|
||||
missing_fields = datetime(1970, 1, 1) # "2022-10" => "2022-10-01"
|
||||
try:
|
||||
date_value = dateutil.parser.parse(value, default=missing_fields)
|
||||
|
@ -556,7 +560,7 @@ class PartialDateField(ActivitypubFieldMixin, PartialDateModel):
|
|||
def field_to_activity(self, value) -> str:
|
||||
return value.partial_isoformat() if value else None
|
||||
|
||||
def field_from_activity(self, value, allow_external_connections=True):
|
||||
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
|
||||
# pylint: disable=no-else-return
|
||||
try:
|
||||
return from_partial_isoformat(value)
|
||||
|
@ -584,7 +588,7 @@ class PartialDateField(ActivitypubFieldMixin, PartialDateModel):
|
|||
class HtmlField(ActivitypubFieldMixin, models.TextField):
|
||||
"""a text field for storing html"""
|
||||
|
||||
def field_from_activity(self, value, allow_external_connections=True):
|
||||
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
|
||||
if not value or value == MISSING:
|
||||
return None
|
||||
return clean(value)
|
||||
|
|
|
@ -70,7 +70,9 @@ class ReadingViews(TestCase):
|
|||
},
|
||||
)
|
||||
request.user = self.local_user
|
||||
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
|
||||
with patch(
|
||||
"bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"
|
||||
) as mock:
|
||||
views.ReadingStatus.as_view()(request, "start", self.book.id)
|
||||
|
||||
self.assertEqual(shelf.books.get(), self.book)
|
||||
|
@ -86,6 +88,12 @@ class ReadingViews(TestCase):
|
|||
self.assertEqual(readthrough.user, self.local_user)
|
||||
self.assertEqual(readthrough.book, self.book)
|
||||
|
||||
# Three broadcast tasks:
|
||||
# 1. Create Readthrough
|
||||
# 2. Create post as pure_content (for non-BookWyrm)
|
||||
# 3. Create post with book attached - this should only happen once!
|
||||
self.assertEqual(len(mock.mock_calls), 3)
|
||||
|
||||
def test_start_reading_with_comment(self, *_):
|
||||
"""begin a book"""
|
||||
shelf = self.local_user.shelf_set.get(identifier=models.Shelf.READING)
|
||||
|
|
|
@ -156,8 +156,7 @@ def handle_reading_status(user, shelf, book, privacy):
|
|||
# it's a non-standard shelf, don't worry about it
|
||||
return
|
||||
|
||||
status = create_generated_note(user, message, mention_books=[book], privacy=privacy)
|
||||
status.save()
|
||||
create_generated_note(user, message, mention_books=[book], privacy=privacy)
|
||||
|
||||
|
||||
def load_date_in_user_tz_as_utc(date_str: str, user: models.User) -> datetime:
|
||||
|
|
Loading…
Reference in a new issue