mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-06-05 23:09:33 +00:00
BookDataModel: implement merge_into method
This commit is contained in:
parent
6b672eda44
commit
37c7447555
|
@ -1,9 +1,9 @@
|
|||
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
|
||||
merge book data objects """
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db.models import Count
|
||||
from bookwyrm import models
|
||||
from bookwyrm.management.merge import merge_objects
|
||||
|
||||
|
||||
def dedupe_model(model):
|
||||
|
@ -16,7 +16,8 @@ def dedupe_model(model):
|
|||
dupes = (
|
||||
model.objects.values(field.name)
|
||||
.annotate(Count(field.name))
|
||||
.filter(**{"%s__count__gt" % field.name: 1})
|
||||
.filter(merged_into__isnull=True, **{"%s__count__gt" % field.name: 1})
|
||||
.exclude(**{field.name: "", "%s__isnull" % field.name: True})
|
||||
)
|
||||
|
||||
for dupe in dupes:
|
||||
|
@ -30,13 +31,14 @@ def dedupe_model(model):
|
|||
print("keeping", canonical.remote_id)
|
||||
for obj in objs[1:]:
|
||||
print(obj.remote_id)
|
||||
merge_objects(canonical, obj)
|
||||
obj.merge_into(canonical)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""deduplicate allllll the book data models"""
|
||||
|
||||
help = "merges duplicate book data"
|
||||
|
||||
# pylint: disable=no-self-use,unused-argument
|
||||
def handle(self, *args, **options):
|
||||
"""run deduplications"""
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
from django.db.models import ManyToManyField
|
||||
|
||||
|
||||
def update_related(canonical, obj):
|
||||
"""update all the models with fk to the object being removed"""
|
||||
# move related models to canonical
|
||||
related_models = [
|
||||
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
|
||||
]
|
||||
for (related_field, related_model) in related_models:
|
||||
# Skip the ManyToMany fields that aren’t auto-created. These
|
||||
# should have a corresponding OneToMany field in the model for
|
||||
# the linking table anyway. If we update it through that model
|
||||
# instead then we won’t lose the extra fields in the linking
|
||||
# table.
|
||||
related_field_obj = related_model._meta.get_field(related_field)
|
||||
if isinstance(related_field_obj, ManyToManyField):
|
||||
through = related_field_obj.remote_field.through
|
||||
if not through._meta.auto_created:
|
||||
continue
|
||||
related_objs = related_model.objects.filter(**{related_field: obj})
|
||||
for related_obj in related_objs:
|
||||
print("replacing in", related_model.__name__, related_field, related_obj.id)
|
||||
try:
|
||||
setattr(related_obj, related_field, canonical)
|
||||
related_obj.save()
|
||||
except TypeError:
|
||||
getattr(related_obj, related_field).add(canonical)
|
||||
getattr(related_obj, related_field).remove(obj)
|
||||
|
||||
|
||||
def copy_data(canonical, obj):
|
||||
"""try to get the most data possible"""
|
||||
for data_field in obj._meta.get_fields():
|
||||
if not hasattr(data_field, "activitypub_field"):
|
||||
continue
|
||||
data_value = getattr(obj, data_field.name)
|
||||
if not data_value:
|
||||
continue
|
||||
if not getattr(canonical, data_field.name):
|
||||
print("setting data field", data_field.name, data_value)
|
||||
setattr(canonical, data_field.name, data_value)
|
||||
canonical.save()
|
||||
|
||||
|
||||
def merge_objects(canonical, obj):
|
||||
copy_data(canonical, obj)
|
||||
update_related(canonical, obj)
|
||||
# remove the outdated entry
|
||||
obj.delete()
|
|
@ -1,4 +1,3 @@
|
|||
from bookwyrm.management.merge import merge_objects
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
|
@ -26,4 +25,4 @@ class MergeCommand(BaseCommand):
|
|||
print("other book doesn’t exist!")
|
||||
return
|
||||
|
||||
merge_objects(canonical, other)
|
||||
other.merge_into(canonical)
|
||||
|
|
|
@ -3,12 +3,13 @@
|
|||
from itertools import chain
|
||||
import re
|
||||
from typing import Any
|
||||
from typing_extensions import Self
|
||||
|
||||
from django.contrib.postgres.search import SearchVectorField
|
||||
from django.contrib.postgres.indexes import GinIndex
|
||||
from django.core.cache import cache
|
||||
from django.db import models, transaction
|
||||
from django.db.models import Prefetch
|
||||
from django.db.models import Prefetch, ManyToManyField
|
||||
from django.dispatch import receiver
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from model_utils import FieldTracker
|
||||
|
@ -112,6 +113,58 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
|
|||
"""only send book data updates to other bookwyrm instances"""
|
||||
super().broadcast(activity, sender, software=software, **kwargs)
|
||||
|
||||
def merge_into(self, canonical: Self) -> None:
|
||||
"""merge this entity into another entity"""
|
||||
if canonical.id == self.id:
|
||||
raise ValueError(f"Cannot merge {self} into itself")
|
||||
if canonical.merged_into:
|
||||
raise ValueError(
|
||||
f"Cannot merge {self} into {canonical} because "
|
||||
f"{canonical} has itself been merged into {canonical.merged_into}"
|
||||
)
|
||||
canonical.adopt_data_from(self)
|
||||
canonical.save()
|
||||
|
||||
# move related models to canonical
|
||||
related_models = [
|
||||
(r.remote_field.name, r.related_model) for r in self._meta.related_objects
|
||||
]
|
||||
# pylint: disable=protected-access
|
||||
for related_field, related_model in related_models:
|
||||
# Skip the ManyToMany fields that aren’t auto-created. These
|
||||
# should have a corresponding OneToMany field in the model for
|
||||
# the linking table anyway. If we update it through that model
|
||||
# instead then we won’t lose the extra fields in the linking
|
||||
# table.
|
||||
# pylint: disable=protected-access
|
||||
related_field_obj = related_model._meta.get_field(related_field)
|
||||
if isinstance(related_field_obj, ManyToManyField):
|
||||
through = related_field_obj.remote_field.through
|
||||
if not through._meta.auto_created:
|
||||
continue
|
||||
related_objs = related_model.objects.filter(**{related_field: self})
|
||||
for related_obj in related_objs:
|
||||
try:
|
||||
setattr(related_obj, related_field, canonical)
|
||||
related_obj.save()
|
||||
except TypeError:
|
||||
getattr(related_obj, related_field).add(canonical)
|
||||
getattr(related_obj, related_field).remove(self)
|
||||
|
||||
self.merged_into = canonical
|
||||
self.save()
|
||||
|
||||
def adopt_data_from(self, other: Self) -> None:
|
||||
"""fill empty fields with values from another entity"""
|
||||
for data_field in self._meta.get_fields():
|
||||
if not hasattr(data_field, "activitypub_field"):
|
||||
continue
|
||||
data_value = getattr(other, data_field.name)
|
||||
if not data_value:
|
||||
continue
|
||||
if not getattr(self, data_field.name):
|
||||
setattr(self, data_field.name, data_value)
|
||||
|
||||
|
||||
class Book(BookDataModel):
|
||||
"""a generic book, which can mean either an edition or a work"""
|
||||
|
|
Loading…
Reference in a new issue