BookDataModel: add dry_run argument to merge_into

This commit is contained in:
Bart Schuurmans 2024-03-05 17:12:51 +01:00
parent fb82c7a579
commit 4a690e675a
3 changed files with 39 additions and 14 deletions

View file

@ -6,7 +6,7 @@ from django.db.models import Count
from bookwyrm import models
def dedupe_model(model):
def dedupe_model(model, dry_run=False):
"""combine duplicate editions and update related models"""
print(f"deduplicating {model.__name__}:")
fields = model._meta.get_fields()
@ -27,10 +27,13 @@ def dedupe_model(model):
print("----------")
objs = model.objects.filter(**{field.name: value}).order_by("id")
canonical = objs.first()
print(f"merging into {canonical.remote_id} based on {field.name} {value}:")
action = "would merge" if dry_run else "merging"
print(
f"{action} into {model.__name__} {canonical.remote_id} based on {field.name} {value}:"
)
for obj in objs[1:]:
print(f"- {obj.remote_id}")
absorbed_fields = obj.merge_into(canonical)
absorbed_fields = obj.merge_into(canonical, dry_run=dry_run)
print(f" absorbed fields: {absorbed_fields}")
@ -39,9 +42,17 @@ class Command(BaseCommand):
help = "merges duplicate book data"
def add_arguments(self, parser):
"""add the arguments for this command"""
parser.add_argument(
"--dry_run",
action="store_true",
help="don't actually merge, only print what would happen",
)
# pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options):
"""run deduplications"""
dedupe_model(models.Edition)
dedupe_model(models.Work)
dedupe_model(models.Author)
dedupe_model(models.Edition, dry_run=options["dry_run"])
dedupe_model(models.Work, dry_run=options["dry_run"])
dedupe_model(models.Author, dry_run=options["dry_run"])

View file

@ -8,6 +8,11 @@ class MergeCommand(BaseCommand):
"""add the arguments for this command"""
parser.add_argument("--canonical", type=int, required=True)
parser.add_argument("--other", type=int, required=True)
parser.add_argument(
"--dry_run",
action="store_true",
help="don't actually merge, only print what would happen",
)
# pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options):
@ -25,6 +30,8 @@ class MergeCommand(BaseCommand):
print("other book doesnt exist!")
return
absorbed_fields = other.merge_into(canonical)
print(f"{other.remote_id} has been merged into {canonical.remote_id}")
absorbed_fields = other.merge_into(canonical, dry_run=options["dry_run"])
action = "would be" if options["dry_run"] else "has been"
print(f"{other.remote_id} {action} merged into {canonical.remote_id}")
print(f"absorbed fields: {absorbed_fields}")

View file

@ -110,12 +110,16 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
"""only send book data updates to other bookwyrm instances"""
super().broadcast(activity, sender, software=software, **kwargs)
def merge_into(self, canonical: Self) -> Dict[str, Any]:
def merge_into(self, canonical: Self, dry_run=False) -> Dict[str, Any]:
"""merge this entity into another entity"""
if canonical.id == self.id:
raise ValueError(f"Cannot merge {self} into itself")
absorbed_fields = canonical.absorb_data_from(self)
absorbed_fields = canonical.absorb_data_from(self, dry_run=dry_run)
if dry_run:
return absorbed_fields
canonical.save()
self.merged_model.objects.create(deleted_id=self.id, merged_into=canonical)
@ -149,7 +153,7 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
self.delete()
return absorbed_fields
def absorb_data_from(self, other: Self) -> Dict[str, Any]:
def absorb_data_from(self, other: Self, dry_run=False) -> Dict[str, Any]:
"""fill empty fields with values from another entity"""
absorbed_fields = {}
for data_field in self._meta.get_fields():
@ -162,7 +166,8 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
if isinstance(data_field, fields.ArrayField):
if new_values := list(set(other_value) - set(canonical_value)):
# append at the end (in no particular order)
setattr(self, data_field.name, canonical_value + new_values)
if not dry_run:
setattr(self, data_field.name, canonical_value + new_values)
absorbed_fields[data_field.name] = new_values
elif isinstance(data_field, fields.PartialDateField):
if (
@ -170,11 +175,13 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
or (other_value.has_day and not canonical_value.has_day)
or (other_value.has_month and not canonical_value.has_month)
):
setattr(self, data_field.name, other_value)
if not dry_run:
setattr(self, data_field.name, other_value)
absorbed_fields[data_field.name] = other_value
else:
if not canonical_value:
setattr(self, data_field.name, other_value)
if not dry_run:
setattr(self, data_field.name, other_value)
absorbed_fields[data_field.name] = other_value
return absorbed_fields