From 2bbc9a16adfb2a79109f6e3ae06b6ca016c8e746 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Sat, 15 Apr 2023 11:36:18 +0200 Subject: [PATCH 1/4] Fix deduplicating books that are on a shelf or in a list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously when the deduplicate_book_data script tried to merge an edition that was on a shelf or in a list then it would fail because when the canonical book was added to the shelf or the list then it wouldn’t set the extra fields of the linking table for the “through” model of the field. These would end up defaulting to NULL, but that is not valid for some of the fields in ShelfItem and ListItem so postgres wouldn’t accept it. To fix that, this patch makes it skip updating fields that have a non-autogenerated linking table. The linking table would appear as a separate model anyway so the book will be moved via that instead. Fixes: #2817 --- .../management/commands/deduplicate_book_data.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py index 5ca8496b0..4519d2cff 100644 --- a/bookwyrm/management/commands/deduplicate_book_data.py +++ b/bookwyrm/management/commands/deduplicate_book_data.py @@ -1,7 +1,7 @@ """ PROCEED WITH CAUTION: uses deduplication fields to permanently merge book data objects """ from django.core.management.base import BaseCommand -from django.db.models import Count +from django.db.models import Count, ManyToManyField from bookwyrm import models @@ -12,6 +12,16 @@ def update_related(canonical, obj): (r.remote_field.name, r.related_model) for r in canonical._meta.related_objects ] for (related_field, related_model) in related_models: + # Skip the ManyToMany fields that aren’t auto-created. These + # should have a corresponding OneToMany field in the model for + # the linking table anyway. If we update it through that model + # instead then we won’t lose the extra fields in the linking + # table. + related_field_obj = related_model._meta.get_field(related_field) + if isinstance(related_field_obj, ManyToManyField): + through = related_field_obj.remote_field.through + if not through._meta.auto_created: + continue related_objs = related_model.objects.filter(**{related_field: obj}) for related_obj in related_objs: print("replacing in", related_model.__name__, related_field, related_obj.id) From 71e2486d017a35f217bfdae7b281465d039f5ce0 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Fri, 14 Apr 2023 22:35:07 +0200 Subject: [PATCH 2/4] Move the merging code from deduplicate code to common module That way it can be used in a new command to merge individual items. --- .../commands/deduplicate_book_data.py | 50 ++----------------- bookwyrm/management/merge.py | 50 +++++++++++++++++++ 2 files changed, 53 insertions(+), 47 deletions(-) create mode 100644 bookwyrm/management/merge.py diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py index 4519d2cff..dde7d133c 100644 --- a/bookwyrm/management/commands/deduplicate_book_data.py +++ b/bookwyrm/management/commands/deduplicate_book_data.py @@ -1,50 +1,9 @@ """ PROCEED WITH CAUTION: uses deduplication fields to permanently merge book data objects """ from django.core.management.base import BaseCommand -from django.db.models import Count, ManyToManyField +from django.db.models import Count from bookwyrm import models - - -def update_related(canonical, obj): - """update all the models with fk to the object being removed""" - # move related models to canonical - related_models = [ - (r.remote_field.name, r.related_model) for r in canonical._meta.related_objects - ] - for (related_field, related_model) in related_models: - # Skip the ManyToMany fields that aren’t auto-created. These - # should have a corresponding OneToMany field in the model for - # the linking table anyway. If we update it through that model - # instead then we won’t lose the extra fields in the linking - # table. - related_field_obj = related_model._meta.get_field(related_field) - if isinstance(related_field_obj, ManyToManyField): - through = related_field_obj.remote_field.through - if not through._meta.auto_created: - continue - related_objs = related_model.objects.filter(**{related_field: obj}) - for related_obj in related_objs: - print("replacing in", related_model.__name__, related_field, related_obj.id) - try: - setattr(related_obj, related_field, canonical) - related_obj.save() - except TypeError: - getattr(related_obj, related_field).add(canonical) - getattr(related_obj, related_field).remove(obj) - - -def copy_data(canonical, obj): - """try to get the most data possible""" - for data_field in obj._meta.get_fields(): - if not hasattr(data_field, "activitypub_field"): - continue - data_value = getattr(obj, data_field.name) - if not data_value: - continue - if not getattr(canonical, data_field.name): - print("setting data field", data_field.name, data_value) - setattr(canonical, data_field.name, data_value) - canonical.save() +from bookwyrm.management.merge import merge_objects def dedupe_model(model): @@ -71,10 +30,7 @@ def dedupe_model(model): print("keeping", canonical.remote_id) for obj in objs[1:]: print(obj.remote_id) - copy_data(canonical, obj) - update_related(canonical, obj) - # remove the outdated entry - obj.delete() + merge_objects(canonical, obj) class Command(BaseCommand): diff --git a/bookwyrm/management/merge.py b/bookwyrm/management/merge.py new file mode 100644 index 000000000..f55229f18 --- /dev/null +++ b/bookwyrm/management/merge.py @@ -0,0 +1,50 @@ +from django.db.models import ManyToManyField + + +def update_related(canonical, obj): + """update all the models with fk to the object being removed""" + # move related models to canonical + related_models = [ + (r.remote_field.name, r.related_model) for r in canonical._meta.related_objects + ] + for (related_field, related_model) in related_models: + # Skip the ManyToMany fields that aren’t auto-created. These + # should have a corresponding OneToMany field in the model for + # the linking table anyway. If we update it through that model + # instead then we won’t lose the extra fields in the linking + # table. + related_field_obj = related_model._meta.get_field(related_field) + if isinstance(related_field_obj, ManyToManyField): + through = related_field_obj.remote_field.through + if not through._meta.auto_created: + continue + related_objs = related_model.objects.filter(**{related_field: obj}) + for related_obj in related_objs: + print("replacing in", related_model.__name__, related_field, related_obj.id) + try: + setattr(related_obj, related_field, canonical) + related_obj.save() + except TypeError: + getattr(related_obj, related_field).add(canonical) + getattr(related_obj, related_field).remove(obj) + + +def copy_data(canonical, obj): + """try to get the most data possible""" + for data_field in obj._meta.get_fields(): + if not hasattr(data_field, "activitypub_field"): + continue + data_value = getattr(obj, data_field.name) + if not data_value: + continue + if not getattr(canonical, data_field.name): + print("setting data field", data_field.name, data_value) + setattr(canonical, data_field.name, data_value) + canonical.save() + + +def merge_objects(canonical, obj): + copy_data(canonical, obj) + update_related(canonical, obj) + # remove the outdated entry + obj.delete() From bd893e29de710e12c8de92154e8c8c4d81db0771 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Fri, 14 Apr 2023 23:05:30 +0200 Subject: [PATCH 3/4] Add management commands to merge editions and authors --- bookwyrm/management/commands/merge_authors.py | 12 ++++++++ .../management/commands/merge_editions.py | 12 ++++++++ bookwyrm/management/merge_command.py | 30 +++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 bookwyrm/management/commands/merge_authors.py create mode 100644 bookwyrm/management/commands/merge_editions.py create mode 100644 bookwyrm/management/merge_command.py diff --git a/bookwyrm/management/commands/merge_authors.py b/bookwyrm/management/commands/merge_authors.py new file mode 100644 index 000000000..7465df147 --- /dev/null +++ b/bookwyrm/management/commands/merge_authors.py @@ -0,0 +1,12 @@ +""" PROCEED WITH CAUTION: uses deduplication fields to permanently +merge author data objects """ +from bookwyrm import models +from bookwyrm.management.merge_command import MergeCommand + + +class Command(MergeCommand): + """merges two authors by ID""" + + help = "merges specified authors into one" + + MODEL = models.Author diff --git a/bookwyrm/management/commands/merge_editions.py b/bookwyrm/management/commands/merge_editions.py new file mode 100644 index 000000000..9ed696201 --- /dev/null +++ b/bookwyrm/management/commands/merge_editions.py @@ -0,0 +1,12 @@ +""" PROCEED WITH CAUTION: uses deduplication fields to permanently +merge edition data objects """ +from bookwyrm import models +from bookwyrm.management.merge_command import MergeCommand + + +class Command(MergeCommand): + """merges two editions by ID""" + + help = "merges specified editions into one" + + MODEL = models.Edition diff --git a/bookwyrm/management/merge_command.py b/bookwyrm/management/merge_command.py new file mode 100644 index 000000000..c8f44fc80 --- /dev/null +++ b/bookwyrm/management/merge_command.py @@ -0,0 +1,30 @@ +from bookwyrm.management.merge import merge_objects +from django.core.management.base import BaseCommand + + +class MergeCommand(BaseCommand): + """base class for merge commands""" + + def add_arguments(self, parser): + """add the arguments for this command""" + parser.add_argument("--canonical", type=int, required=True) + parser.add_argument("--other", type=int, required=True) + + # pylint: disable=no-self-use,unused-argument + def handle(self, *args, **options): + """merge the two objects""" + model = self.MODEL + + try: + canonical = model.objects.get(id=options["canonical"]) + except model.DoesNotExist: + print("canonical book doesn’t exist!") + return + try: + other = model.objects.get(id=options["other"]) + except model.DoesNotExist: + print("other book doesn’t exist!") + return + + merge_objects(canonical, other) + From e28562949b4c5cbac34310b59d7530038fc48e54 Mon Sep 17 00:00:00 2001 From: Jascha Urbach Date: Sat, 15 Apr 2023 16:31:15 +0200 Subject: [PATCH 4/4] ./bw.-dev black get rid of the black error. --- bookwyrm/management/merge_command.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bookwyrm/management/merge_command.py b/bookwyrm/management/merge_command.py index c8f44fc80..805dc73fa 100644 --- a/bookwyrm/management/merge_command.py +++ b/bookwyrm/management/merge_command.py @@ -27,4 +27,3 @@ class MergeCommand(BaseCommand): return merge_objects(canonical, other) -