diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py index 5ca8496b0..dde7d133c 100644 --- a/bookwyrm/management/commands/deduplicate_book_data.py +++ b/bookwyrm/management/commands/deduplicate_book_data.py @@ -3,38 +3,7 @@ merge book data objects """ from django.core.management.base import BaseCommand from django.db.models import Count from bookwyrm import models - - -def update_related(canonical, obj): - """update all the models with fk to the object being removed""" - # move related models to canonical - related_models = [ - (r.remote_field.name, r.related_model) for r in canonical._meta.related_objects - ] - for (related_field, related_model) in related_models: - related_objs = related_model.objects.filter(**{related_field: obj}) - for related_obj in related_objs: - print("replacing in", related_model.__name__, related_field, related_obj.id) - try: - setattr(related_obj, related_field, canonical) - related_obj.save() - except TypeError: - getattr(related_obj, related_field).add(canonical) - getattr(related_obj, related_field).remove(obj) - - -def copy_data(canonical, obj): - """try to get the most data possible""" - for data_field in obj._meta.get_fields(): - if not hasattr(data_field, "activitypub_field"): - continue - data_value = getattr(obj, data_field.name) - if not data_value: - continue - if not getattr(canonical, data_field.name): - print("setting data field", data_field.name, data_value) - setattr(canonical, data_field.name, data_value) - canonical.save() +from bookwyrm.management.merge import merge_objects def dedupe_model(model): @@ -61,10 +30,7 @@ def dedupe_model(model): print("keeping", canonical.remote_id) for obj in objs[1:]: print(obj.remote_id) - copy_data(canonical, obj) - update_related(canonical, obj) - # remove the outdated entry - obj.delete() + merge_objects(canonical, obj) class Command(BaseCommand): diff --git a/bookwyrm/management/commands/merge_authors.py b/bookwyrm/management/commands/merge_authors.py new file mode 100644 index 000000000..7465df147 --- /dev/null +++ b/bookwyrm/management/commands/merge_authors.py @@ -0,0 +1,12 @@ +""" PROCEED WITH CAUTION: uses deduplication fields to permanently +merge author data objects """ +from bookwyrm import models +from bookwyrm.management.merge_command import MergeCommand + + +class Command(MergeCommand): + """merges two authors by ID""" + + help = "merges specified authors into one" + + MODEL = models.Author diff --git a/bookwyrm/management/commands/merge_editions.py b/bookwyrm/management/commands/merge_editions.py new file mode 100644 index 000000000..9ed696201 --- /dev/null +++ b/bookwyrm/management/commands/merge_editions.py @@ -0,0 +1,12 @@ +""" PROCEED WITH CAUTION: uses deduplication fields to permanently +merge edition data objects """ +from bookwyrm import models +from bookwyrm.management.merge_command import MergeCommand + + +class Command(MergeCommand): + """merges two editions by ID""" + + help = "merges specified editions into one" + + MODEL = models.Edition diff --git a/bookwyrm/management/merge.py b/bookwyrm/management/merge.py new file mode 100644 index 000000000..f55229f18 --- /dev/null +++ b/bookwyrm/management/merge.py @@ -0,0 +1,50 @@ +from django.db.models import ManyToManyField + + +def update_related(canonical, obj): + """update all the models with fk to the object being removed""" + # move related models to canonical + related_models = [ + (r.remote_field.name, r.related_model) for r in canonical._meta.related_objects + ] + for (related_field, related_model) in related_models: + # Skip the ManyToMany fields that aren’t auto-created. These + # should have a corresponding OneToMany field in the model for + # the linking table anyway. If we update it through that model + # instead then we won’t lose the extra fields in the linking + # table. + related_field_obj = related_model._meta.get_field(related_field) + if isinstance(related_field_obj, ManyToManyField): + through = related_field_obj.remote_field.through + if not through._meta.auto_created: + continue + related_objs = related_model.objects.filter(**{related_field: obj}) + for related_obj in related_objs: + print("replacing in", related_model.__name__, related_field, related_obj.id) + try: + setattr(related_obj, related_field, canonical) + related_obj.save() + except TypeError: + getattr(related_obj, related_field).add(canonical) + getattr(related_obj, related_field).remove(obj) + + +def copy_data(canonical, obj): + """try to get the most data possible""" + for data_field in obj._meta.get_fields(): + if not hasattr(data_field, "activitypub_field"): + continue + data_value = getattr(obj, data_field.name) + if not data_value: + continue + if not getattr(canonical, data_field.name): + print("setting data field", data_field.name, data_value) + setattr(canonical, data_field.name, data_value) + canonical.save() + + +def merge_objects(canonical, obj): + copy_data(canonical, obj) + update_related(canonical, obj) + # remove the outdated entry + obj.delete() diff --git a/bookwyrm/management/merge_command.py b/bookwyrm/management/merge_command.py new file mode 100644 index 000000000..805dc73fa --- /dev/null +++ b/bookwyrm/management/merge_command.py @@ -0,0 +1,29 @@ +from bookwyrm.management.merge import merge_objects +from django.core.management.base import BaseCommand + + +class MergeCommand(BaseCommand): + """base class for merge commands""" + + def add_arguments(self, parser): + """add the arguments for this command""" + parser.add_argument("--canonical", type=int, required=True) + parser.add_argument("--other", type=int, required=True) + + # pylint: disable=no-self-use,unused-argument + def handle(self, *args, **options): + """merge the two objects""" + model = self.MODEL + + try: + canonical = model.objects.get(id=options["canonical"]) + except model.DoesNotExist: + print("canonical book doesn’t exist!") + return + try: + other = model.objects.get(id=options["other"]) + except model.DoesNotExist: + print("other book doesn’t exist!") + return + + merge_objects(canonical, other)