Merge pull request #2821 from bpeel/merge-commands

Add management commands to merge a pair of editions or authors
This commit is contained in:
Mouse Reeve 2023-04-25 16:27:12 -07:00 committed by GitHub
commit 8fa89f5ece
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 105 additions and 36 deletions

View file

@ -3,38 +3,7 @@ merge book data objects """
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db.models import Count from django.db.models import Count
from bookwyrm import models from bookwyrm import models
from bookwyrm.management.merge import merge_objects
def update_related(canonical, obj):
"""update all the models with fk to the object being removed"""
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
]
for (related_field, related_model) in related_models:
related_objs = related_model.objects.filter(**{related_field: obj})
for related_obj in related_objs:
print("replacing in", related_model.__name__, related_field, related_obj.id)
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(obj)
def copy_data(canonical, obj):
"""try to get the most data possible"""
for data_field in obj._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
data_value = getattr(obj, data_field.name)
if not data_value:
continue
if not getattr(canonical, data_field.name):
print("setting data field", data_field.name, data_value)
setattr(canonical, data_field.name, data_value)
canonical.save()
def dedupe_model(model): def dedupe_model(model):
@ -61,10 +30,7 @@ def dedupe_model(model):
print("keeping", canonical.remote_id) print("keeping", canonical.remote_id)
for obj in objs[1:]: for obj in objs[1:]:
print(obj.remote_id) print(obj.remote_id)
copy_data(canonical, obj) merge_objects(canonical, obj)
update_related(canonical, obj)
# remove the outdated entry
obj.delete()
class Command(BaseCommand): class Command(BaseCommand):

View file

@ -0,0 +1,12 @@
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
merge author data objects """
from bookwyrm import models
from bookwyrm.management.merge_command import MergeCommand
class Command(MergeCommand):
"""merges two authors by ID"""
help = "merges specified authors into one"
MODEL = models.Author

View file

@ -0,0 +1,12 @@
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
merge edition data objects """
from bookwyrm import models
from bookwyrm.management.merge_command import MergeCommand
class Command(MergeCommand):
"""merges two editions by ID"""
help = "merges specified editions into one"
MODEL = models.Edition

View file

@ -0,0 +1,50 @@
from django.db.models import ManyToManyField
def update_related(canonical, obj):
"""update all the models with fk to the object being removed"""
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
]
for (related_field, related_model) in related_models:
# Skip the ManyToMany fields that arent auto-created. These
# should have a corresponding OneToMany field in the model for
# the linking table anyway. If we update it through that model
# instead then we wont lose the extra fields in the linking
# table.
related_field_obj = related_model._meta.get_field(related_field)
if isinstance(related_field_obj, ManyToManyField):
through = related_field_obj.remote_field.through
if not through._meta.auto_created:
continue
related_objs = related_model.objects.filter(**{related_field: obj})
for related_obj in related_objs:
print("replacing in", related_model.__name__, related_field, related_obj.id)
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(obj)
def copy_data(canonical, obj):
"""try to get the most data possible"""
for data_field in obj._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
data_value = getattr(obj, data_field.name)
if not data_value:
continue
if not getattr(canonical, data_field.name):
print("setting data field", data_field.name, data_value)
setattr(canonical, data_field.name, data_value)
canonical.save()
def merge_objects(canonical, obj):
copy_data(canonical, obj)
update_related(canonical, obj)
# remove the outdated entry
obj.delete()

View file

@ -0,0 +1,29 @@
from bookwyrm.management.merge import merge_objects
from django.core.management.base import BaseCommand
class MergeCommand(BaseCommand):
"""base class for merge commands"""
def add_arguments(self, parser):
"""add the arguments for this command"""
parser.add_argument("--canonical", type=int, required=True)
parser.add_argument("--other", type=int, required=True)
# pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options):
"""merge the two objects"""
model = self.MODEL
try:
canonical = model.objects.get(id=options["canonical"])
except model.DoesNotExist:
print("canonical book doesnt exist!")
return
try:
other = model.objects.get(id=options["other"])
except model.DoesNotExist:
print("other book doesnt exist!")
return
merge_objects(canonical, other)