mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-23 08:36:32 +00:00
BookDataModel: add dry_run argument to merge_into
This commit is contained in:
parent
fb82c7a579
commit
4a690e675a
3 changed files with 39 additions and 14 deletions
|
@ -6,7 +6,7 @@ from django.db.models import Count
|
||||||
from bookwyrm import models
|
from bookwyrm import models
|
||||||
|
|
||||||
|
|
||||||
def dedupe_model(model):
|
def dedupe_model(model, dry_run=False):
|
||||||
"""combine duplicate editions and update related models"""
|
"""combine duplicate editions and update related models"""
|
||||||
print(f"deduplicating {model.__name__}:")
|
print(f"deduplicating {model.__name__}:")
|
||||||
fields = model._meta.get_fields()
|
fields = model._meta.get_fields()
|
||||||
|
@ -27,10 +27,13 @@ def dedupe_model(model):
|
||||||
print("----------")
|
print("----------")
|
||||||
objs = model.objects.filter(**{field.name: value}).order_by("id")
|
objs = model.objects.filter(**{field.name: value}).order_by("id")
|
||||||
canonical = objs.first()
|
canonical = objs.first()
|
||||||
print(f"merging into {canonical.remote_id} based on {field.name} {value}:")
|
action = "would merge" if dry_run else "merging"
|
||||||
|
print(
|
||||||
|
f"{action} into {model.__name__} {canonical.remote_id} based on {field.name} {value}:"
|
||||||
|
)
|
||||||
for obj in objs[1:]:
|
for obj in objs[1:]:
|
||||||
print(f"- {obj.remote_id}")
|
print(f"- {obj.remote_id}")
|
||||||
absorbed_fields = obj.merge_into(canonical)
|
absorbed_fields = obj.merge_into(canonical, dry_run=dry_run)
|
||||||
print(f" absorbed fields: {absorbed_fields}")
|
print(f" absorbed fields: {absorbed_fields}")
|
||||||
|
|
||||||
|
|
||||||
|
@ -39,9 +42,17 @@ class Command(BaseCommand):
|
||||||
|
|
||||||
help = "merges duplicate book data"
|
help = "merges duplicate book data"
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
"""add the arguments for this command"""
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry_run",
|
||||||
|
action="store_true",
|
||||||
|
help="don't actually merge, only print what would happen",
|
||||||
|
)
|
||||||
|
|
||||||
# pylint: disable=no-self-use,unused-argument
|
# pylint: disable=no-self-use,unused-argument
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
"""run deduplications"""
|
"""run deduplications"""
|
||||||
dedupe_model(models.Edition)
|
dedupe_model(models.Edition, dry_run=options["dry_run"])
|
||||||
dedupe_model(models.Work)
|
dedupe_model(models.Work, dry_run=options["dry_run"])
|
||||||
dedupe_model(models.Author)
|
dedupe_model(models.Author, dry_run=options["dry_run"])
|
||||||
|
|
|
@ -8,6 +8,11 @@ class MergeCommand(BaseCommand):
|
||||||
"""add the arguments for this command"""
|
"""add the arguments for this command"""
|
||||||
parser.add_argument("--canonical", type=int, required=True)
|
parser.add_argument("--canonical", type=int, required=True)
|
||||||
parser.add_argument("--other", type=int, required=True)
|
parser.add_argument("--other", type=int, required=True)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry_run",
|
||||||
|
action="store_true",
|
||||||
|
help="don't actually merge, only print what would happen",
|
||||||
|
)
|
||||||
|
|
||||||
# pylint: disable=no-self-use,unused-argument
|
# pylint: disable=no-self-use,unused-argument
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
@ -25,6 +30,8 @@ class MergeCommand(BaseCommand):
|
||||||
print("other book doesn’t exist!")
|
print("other book doesn’t exist!")
|
||||||
return
|
return
|
||||||
|
|
||||||
absorbed_fields = other.merge_into(canonical)
|
absorbed_fields = other.merge_into(canonical, dry_run=options["dry_run"])
|
||||||
print(f"{other.remote_id} has been merged into {canonical.remote_id}")
|
|
||||||
|
action = "would be" if options["dry_run"] else "has been"
|
||||||
|
print(f"{other.remote_id} {action} merged into {canonical.remote_id}")
|
||||||
print(f"absorbed fields: {absorbed_fields}")
|
print(f"absorbed fields: {absorbed_fields}")
|
||||||
|
|
|
@ -110,12 +110,16 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
|
||||||
"""only send book data updates to other bookwyrm instances"""
|
"""only send book data updates to other bookwyrm instances"""
|
||||||
super().broadcast(activity, sender, software=software, **kwargs)
|
super().broadcast(activity, sender, software=software, **kwargs)
|
||||||
|
|
||||||
def merge_into(self, canonical: Self) -> Dict[str, Any]:
|
def merge_into(self, canonical: Self, dry_run=False) -> Dict[str, Any]:
|
||||||
"""merge this entity into another entity"""
|
"""merge this entity into another entity"""
|
||||||
if canonical.id == self.id:
|
if canonical.id == self.id:
|
||||||
raise ValueError(f"Cannot merge {self} into itself")
|
raise ValueError(f"Cannot merge {self} into itself")
|
||||||
|
|
||||||
absorbed_fields = canonical.absorb_data_from(self)
|
absorbed_fields = canonical.absorb_data_from(self, dry_run=dry_run)
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
return absorbed_fields
|
||||||
|
|
||||||
canonical.save()
|
canonical.save()
|
||||||
|
|
||||||
self.merged_model.objects.create(deleted_id=self.id, merged_into=canonical)
|
self.merged_model.objects.create(deleted_id=self.id, merged_into=canonical)
|
||||||
|
@ -149,7 +153,7 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
|
||||||
self.delete()
|
self.delete()
|
||||||
return absorbed_fields
|
return absorbed_fields
|
||||||
|
|
||||||
def absorb_data_from(self, other: Self) -> Dict[str, Any]:
|
def absorb_data_from(self, other: Self, dry_run=False) -> Dict[str, Any]:
|
||||||
"""fill empty fields with values from another entity"""
|
"""fill empty fields with values from another entity"""
|
||||||
absorbed_fields = {}
|
absorbed_fields = {}
|
||||||
for data_field in self._meta.get_fields():
|
for data_field in self._meta.get_fields():
|
||||||
|
@ -162,7 +166,8 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
|
||||||
if isinstance(data_field, fields.ArrayField):
|
if isinstance(data_field, fields.ArrayField):
|
||||||
if new_values := list(set(other_value) - set(canonical_value)):
|
if new_values := list(set(other_value) - set(canonical_value)):
|
||||||
# append at the end (in no particular order)
|
# append at the end (in no particular order)
|
||||||
setattr(self, data_field.name, canonical_value + new_values)
|
if not dry_run:
|
||||||
|
setattr(self, data_field.name, canonical_value + new_values)
|
||||||
absorbed_fields[data_field.name] = new_values
|
absorbed_fields[data_field.name] = new_values
|
||||||
elif isinstance(data_field, fields.PartialDateField):
|
elif isinstance(data_field, fields.PartialDateField):
|
||||||
if (
|
if (
|
||||||
|
@ -170,11 +175,13 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
|
||||||
or (other_value.has_day and not canonical_value.has_day)
|
or (other_value.has_day and not canonical_value.has_day)
|
||||||
or (other_value.has_month and not canonical_value.has_month)
|
or (other_value.has_month and not canonical_value.has_month)
|
||||||
):
|
):
|
||||||
setattr(self, data_field.name, other_value)
|
if not dry_run:
|
||||||
|
setattr(self, data_field.name, other_value)
|
||||||
absorbed_fields[data_field.name] = other_value
|
absorbed_fields[data_field.name] = other_value
|
||||||
else:
|
else:
|
||||||
if not canonical_value:
|
if not canonical_value:
|
||||||
setattr(self, data_field.name, other_value)
|
if not dry_run:
|
||||||
|
setattr(self, data_field.name, other_value)
|
||||||
absorbed_fields[data_field.name] = other_value
|
absorbed_fields[data_field.name] = other_value
|
||||||
return absorbed_fields
|
return absorbed_fields
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue