Merge pull request #3299 from Minnozz/absorb

Track which Author/Work/Edition a duplicate has been merged into
This commit is contained in:
Mouse Reeve 2024-04-09 05:55:44 -05:00 committed by GitHub
commit 73630331d1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 405 additions and 116 deletions

View file

@ -1,13 +1,14 @@
""" PROCEED WITH CAUTION: uses deduplication fields to permanently """ PROCEED WITH CAUTION: uses deduplication fields to permanently
merge book data objects """ merge book data objects """
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db.models import Count from django.db.models import Count
from bookwyrm import models from bookwyrm import models
from bookwyrm.management.merge import merge_objects
def dedupe_model(model): def dedupe_model(model, dry_run=False):
"""combine duplicate editions and update related models""" """combine duplicate editions and update related models"""
print(f"deduplicating {model.__name__}:")
fields = model._meta.get_fields() fields = model._meta.get_fields()
dedupe_fields = [ dedupe_fields = [
f for f in fields if hasattr(f, "deduplication_field") and f.deduplication_field f for f in fields if hasattr(f, "deduplication_field") and f.deduplication_field
@ -16,30 +17,42 @@ def dedupe_model(model):
dupes = ( dupes = (
model.objects.values(field.name) model.objects.values(field.name)
.annotate(Count(field.name)) .annotate(Count(field.name))
.filter(**{"%s__count__gt" % field.name: 1}) .filter(**{f"{field.name}__count__gt": 1})
.exclude(**{field.name: ""})
.exclude(**{f"{field.name}__isnull": True})
) )
for dupe in dupes: for dupe in dupes:
value = dupe[field.name] value = dupe[field.name]
if not value or value == "":
continue
print("----------") print("----------")
print(dupe)
objs = model.objects.filter(**{field.name: value}).order_by("id") objs = model.objects.filter(**{field.name: value}).order_by("id")
canonical = objs.first() canonical = objs.first()
print("keeping", canonical.remote_id) action = "would merge" if dry_run else "merging"
print(
f"{action} into {model.__name__} {canonical.remote_id} based on {field.name} {value}:"
)
for obj in objs[1:]: for obj in objs[1:]:
print(obj.remote_id) print(f"- {obj.remote_id}")
merge_objects(canonical, obj) absorbed_fields = obj.merge_into(canonical, dry_run=dry_run)
print(f" absorbed fields: {absorbed_fields}")
class Command(BaseCommand): class Command(BaseCommand):
"""deduplicate allllll the book data models""" """deduplicate allllll the book data models"""
help = "merges duplicate book data" help = "merges duplicate book data"
def add_arguments(self, parser):
"""add the arguments for this command"""
parser.add_argument(
"--dry_run",
action="store_true",
help="don't actually merge, only print what would happen",
)
# pylint: disable=no-self-use,unused-argument # pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options): def handle(self, *args, **options):
"""run deduplications""" """run deduplications"""
dedupe_model(models.Edition) dedupe_model(models.Edition, dry_run=options["dry_run"])
dedupe_model(models.Work) dedupe_model(models.Work, dry_run=options["dry_run"])
dedupe_model(models.Author) dedupe_model(models.Author, dry_run=options["dry_run"])

View file

@ -1,50 +0,0 @@
from django.db.models import ManyToManyField
def update_related(canonical, obj):
"""update all the models with fk to the object being removed"""
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
]
for (related_field, related_model) in related_models:
# Skip the ManyToMany fields that arent auto-created. These
# should have a corresponding OneToMany field in the model for
# the linking table anyway. If we update it through that model
# instead then we wont lose the extra fields in the linking
# table.
related_field_obj = related_model._meta.get_field(related_field)
if isinstance(related_field_obj, ManyToManyField):
through = related_field_obj.remote_field.through
if not through._meta.auto_created:
continue
related_objs = related_model.objects.filter(**{related_field: obj})
for related_obj in related_objs:
print("replacing in", related_model.__name__, related_field, related_obj.id)
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(obj)
def copy_data(canonical, obj):
"""try to get the most data possible"""
for data_field in obj._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
data_value = getattr(obj, data_field.name)
if not data_value:
continue
if not getattr(canonical, data_field.name):
print("setting data field", data_field.name, data_value)
setattr(canonical, data_field.name, data_value)
canonical.save()
def merge_objects(canonical, obj):
copy_data(canonical, obj)
update_related(canonical, obj)
# remove the outdated entry
obj.delete()

View file

@ -1,4 +1,3 @@
from bookwyrm.management.merge import merge_objects
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
@ -9,6 +8,11 @@ class MergeCommand(BaseCommand):
"""add the arguments for this command""" """add the arguments for this command"""
parser.add_argument("--canonical", type=int, required=True) parser.add_argument("--canonical", type=int, required=True)
parser.add_argument("--other", type=int, required=True) parser.add_argument("--other", type=int, required=True)
parser.add_argument(
"--dry_run",
action="store_true",
help="don't actually merge, only print what would happen",
)
# pylint: disable=no-self-use,unused-argument # pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options): def handle(self, *args, **options):
@ -26,4 +30,8 @@ class MergeCommand(BaseCommand):
print("other book doesnt exist!") print("other book doesnt exist!")
return return
merge_objects(canonical, other) absorbed_fields = other.merge_into(canonical, dry_run=options["dry_run"])
action = "would be" if options["dry_run"] else "has been"
print(f"{other.remote_id} {action} merged into {canonical.remote_id}")
print(f"absorbed fields: {absorbed_fields}")

View file

@ -0,0 +1,48 @@
# Generated by Django 3.2.24 on 2024-02-28 21:30
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0196_merge_pr3134_into_main"),
]
operations = [
migrations.CreateModel(
name="MergedBook",
fields=[
("deleted_id", models.IntegerField(primary_key=True, serialize=False)),
(
"merged_into",
models.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
related_name="absorbed",
to="bookwyrm.book",
),
),
],
options={
"abstract": False,
},
),
migrations.CreateModel(
name="MergedAuthor",
fields=[
("deleted_id", models.IntegerField(primary_key=True, serialize=False)),
(
"merged_into",
models.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
related_name="absorbed",
to="bookwyrm.author",
),
),
],
options={
"abstract": False,
},
),
]

View file

@ -1,4 +1,5 @@
""" database schema for info about authors """ """ database schema for info about authors """
import re import re
from typing import Tuple, Any from typing import Tuple, Any
@ -10,13 +11,15 @@ from bookwyrm import activitypub
from bookwyrm.settings import DOMAIN from bookwyrm.settings import DOMAIN
from bookwyrm.utils.db import format_trigger from bookwyrm.utils.db import format_trigger
from .book import BookDataModel from .book import BookDataModel, MergedAuthor
from . import fields from . import fields
class Author(BookDataModel): class Author(BookDataModel):
"""basic biographic info""" """basic biographic info"""
merged_model = MergedAuthor
wikipedia_link = fields.CharField( wikipedia_link = fields.CharField(
max_length=255, blank=True, null=True, deduplication_field=True max_length=255, blank=True, null=True, deduplication_field=True
) )

View file

@ -1,13 +1,15 @@
""" database schema for books and shelves """ """ database schema for books and shelves """
from itertools import chain from itertools import chain
import re import re
from typing import Any from typing import Any, Dict
from typing_extensions import Self
from django.contrib.postgres.search import SearchVectorField from django.contrib.postgres.search import SearchVectorField
from django.contrib.postgres.indexes import GinIndex from django.contrib.postgres.indexes import GinIndex
from django.core.cache import cache from django.core.cache import cache
from django.db import models, transaction from django.db import models, transaction
from django.db.models import Prefetch from django.db.models import Prefetch, ManyToManyField
from django.dispatch import receiver from django.dispatch import receiver
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from model_utils import FieldTracker from model_utils import FieldTracker
@ -108,10 +110,115 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
"""only send book data updates to other bookwyrm instances""" """only send book data updates to other bookwyrm instances"""
super().broadcast(activity, sender, software=software, **kwargs) super().broadcast(activity, sender, software=software, **kwargs)
def merge_into(self, canonical: Self, dry_run=False) -> Dict[str, Any]:
"""merge this entity into another entity"""
if canonical.id == self.id:
raise ValueError(f"Cannot merge {self} into itself")
absorbed_fields = canonical.absorb_data_from(self, dry_run=dry_run)
if dry_run:
return absorbed_fields
canonical.save()
self.merged_model.objects.create(deleted_id=self.id, merged_into=canonical)
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in self._meta.related_objects
]
# pylint: disable=protected-access
for related_field, related_model in related_models:
# Skip the ManyToMany fields that arent auto-created. These
# should have a corresponding OneToMany field in the model for
# the linking table anyway. If we update it through that model
# instead then we wont lose the extra fields in the linking
# table.
# pylint: disable=protected-access
related_field_obj = related_model._meta.get_field(related_field)
if isinstance(related_field_obj, ManyToManyField):
through = related_field_obj.remote_field.through
if not through._meta.auto_created:
continue
related_objs = related_model.objects.filter(**{related_field: self})
for related_obj in related_objs:
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(self)
self.delete()
return absorbed_fields
def absorb_data_from(self, other: Self, dry_run=False) -> Dict[str, Any]:
"""fill empty fields with values from another entity"""
absorbed_fields = {}
for data_field in self._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
canonical_value = getattr(self, data_field.name)
other_value = getattr(other, data_field.name)
if not other_value:
continue
if isinstance(data_field, fields.ArrayField):
if new_values := list(set(other_value) - set(canonical_value)):
# append at the end (in no particular order)
if not dry_run:
setattr(self, data_field.name, canonical_value + new_values)
absorbed_fields[data_field.name] = new_values
elif isinstance(data_field, fields.PartialDateField):
if (
(not canonical_value)
or (other_value.has_day and not canonical_value.has_day)
or (other_value.has_month and not canonical_value.has_month)
):
if not dry_run:
setattr(self, data_field.name, other_value)
absorbed_fields[data_field.name] = other_value
else:
if not canonical_value:
if not dry_run:
setattr(self, data_field.name, other_value)
absorbed_fields[data_field.name] = other_value
return absorbed_fields
class MergedBookDataModel(models.Model):
"""a BookDataModel instance that has been merged into another instance. kept
to be able to redirect old URLs"""
deleted_id = models.IntegerField(primary_key=True)
class Meta:
"""abstract just like BookDataModel"""
abstract = True
class MergedBook(MergedBookDataModel):
"""an Book that has been merged into another one"""
merged_into = models.ForeignKey(
"Book", on_delete=models.PROTECT, related_name="absorbed"
)
class MergedAuthor(MergedBookDataModel):
"""an Author that has been merged into another one"""
merged_into = models.ForeignKey(
"Author", on_delete=models.PROTECT, related_name="absorbed"
)
class Book(BookDataModel): class Book(BookDataModel):
"""a generic book, which can mean either an edition or a work""" """a generic book, which can mean either an edition or a work"""
merged_model = MergedBook
connector = models.ForeignKey("Connector", on_delete=models.PROTECT, null=True) connector = models.ForeignKey("Connector", on_delete=models.PROTECT, null=True)
# book/work metadata # book/work metadata
@ -192,9 +299,13 @@ class Book(BookDataModel):
"""properties of this edition, as a string""" """properties of this edition, as a string"""
items = [ items = [
self.physical_format if hasattr(self, "physical_format") else None, self.physical_format if hasattr(self, "physical_format") else None,
f"{self.languages[0]} language" (
if self.languages and self.languages[0] and self.languages[0] != "English" f"{self.languages[0]} language"
else None, if self.languages
and self.languages[0]
and self.languages[0] != "English"
else None
),
str(self.published_date.year) if self.published_date else None, str(self.published_date.year) if self.published_date else None,
", ".join(self.publishers) if hasattr(self, "publishers") else None, ", ".join(self.publishers) if hasattr(self, "publishers") else None,
] ]

View file

@ -0,0 +1,97 @@
"""test merging Authors, Works and Editions"""
from django.test import TestCase
from django.test.client import Client
from bookwyrm import models
class MergeBookDataModel(TestCase):
"""test merging of subclasses of BookDataModel"""
@classmethod
def setUpTestData(cls): # pylint: disable=invalid-name
"""shared data"""
models.SiteSettings.objects.create()
cls.jrr_tolkien = models.Author.objects.create(
name="J.R.R. Tolkien",
aliases=["JRR Tolkien", "Tolkien"],
bio="This guy wrote about hobbits and stuff.",
openlibrary_key="OL26320A",
isni="0000000121441970",
)
cls.jrr_tolkien_2 = models.Author.objects.create(
name="J.R.R. Tolkien",
aliases=["JRR Tolkien", "John Ronald Reuel Tolkien"],
openlibrary_key="OL26320A",
isni="wrong",
wikidata="Q892",
)
cls.jrr_tolkien_2_id = cls.jrr_tolkien_2.id
# perform merges
cls.jrr_tolkien_absorbed_fields = cls.jrr_tolkien_2.merge_into(cls.jrr_tolkien)
def test_merged_author(self):
"""verify merged author after merge"""
self.assertEqual(self.jrr_tolkien_2.id, None, msg="duplicate should be deleted")
def test_canonical_author(self):
"""verify canonical author data after merge"""
self.assertFalse(
self.jrr_tolkien.id is None, msg="canonical should not be deleted"
)
# identical in canonical and duplicate; should be unchanged
self.assertEqual(self.jrr_tolkien.name, "J.R.R. Tolkien")
self.assertEqual(self.jrr_tolkien.openlibrary_key, "OL26320A")
# present in canonical and absent in duplicate; should be unchanged
self.assertEqual(
self.jrr_tolkien.bio, "This guy wrote about hobbits and stuff."
)
# absent in canonical and present in duplicate; should be absorbed
self.assertEqual(self.jrr_tolkien.wikidata, "Q892")
# scalar value that is different in canonical and duplicate; should be unchanged
self.assertEqual(self.jrr_tolkien.isni, "0000000121441970")
# set value with both matching and non-matching elements; should be the
# union of canonical and duplicate
self.assertEqual(
self.jrr_tolkien.aliases,
[
"JRR Tolkien",
"Tolkien",
"John Ronald Reuel Tolkien",
],
)
def test_merged_author_redirect(self):
"""a web request for a merged author should redirect to the canonical author"""
client = Client()
response = client.get(
f"/author/{self.jrr_tolkien_2_id}/s/jrr-tolkien", follow=True
)
self.assertEqual(response.redirect_chain, [(self.jrr_tolkien.local_path, 301)])
def test_merged_author_activitypub(self):
"""an activitypub request for a merged author should return the data for
the canonical author (including the canonical id)"""
client = Client(HTTP_ACCEPT="application/json")
response = client.get(f"/author/{self.jrr_tolkien_2_id}")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json(), self.jrr_tolkien.to_activity())
def test_absorbed_fields(self):
"""reported absorbed_fields should be accurate for --dry_run"""
self.assertEqual(
self.jrr_tolkien_absorbed_fields,
{
"aliases": ["John Ronald Reuel Tolkien"],
"wikidata": "Q892",
},
)

View file

@ -67,6 +67,14 @@ class PartialDate(datetime):
# current_timezone and default_timezone. # current_timezone and default_timezone.
return cls.from_datetime(datetime(year, month, day, tzinfo=_westmost_tz)) return cls.from_datetime(datetime(year, month, day, tzinfo=_westmost_tz))
def __eq__(self, other: object) -> bool:
if not isinstance(other, PartialDate):
return NotImplemented
return self.partial_isoformat() == other.partial_isoformat()
def __repr__(self) -> str:
return f"<{self.__class__.__name__} object: {self.partial_isoformat()}>"
class MonthParts(PartialDate): class MonthParts(PartialDate):
"""a date bound into month precision""" """a date bound into month precision"""

View file

@ -1,4 +1,5 @@
""" the good people stuff! the authors! """ """ the good people stuff! the authors! """
from django.contrib.auth.decorators import login_required, permission_required from django.contrib.auth.decorators import login_required, permission_required
from django.core.paginator import Paginator from django.core.paginator import Paginator
from django.shortcuts import get_object_or_404, redirect from django.shortcuts import get_object_or_404, redirect
@ -11,7 +12,11 @@ from bookwyrm import forms, models
from bookwyrm.activitypub import ActivitypubResponse from bookwyrm.activitypub import ActivitypubResponse
from bookwyrm.connectors import connector_manager from bookwyrm.connectors import connector_manager
from bookwyrm.settings import PAGE_LENGTH from bookwyrm.settings import PAGE_LENGTH
from bookwyrm.views.helpers import is_api_request, maybe_redirect_local_path from bookwyrm.views.helpers import (
is_api_request,
get_mergeable_object_or_404,
maybe_redirect_local_path,
)
# pylint: disable= no-self-use # pylint: disable= no-self-use
@ -21,7 +26,7 @@ class Author(View):
# pylint: disable=unused-argument # pylint: disable=unused-argument
def get(self, request, author_id, slug=None): def get(self, request, author_id, slug=None):
"""landing page for an author""" """landing page for an author"""
author = get_object_or_404(models.Author, id=author_id) author = get_mergeable_object_or_404(models.Author, id=author_id)
if is_api_request(request): if is_api_request(request):
return ActivitypubResponse(author.to_activity()) return ActivitypubResponse(author.to_activity())
@ -56,13 +61,13 @@ class EditAuthor(View):
def get(self, request, author_id): def get(self, request, author_id):
"""info about a book""" """info about a book"""
author = get_object_or_404(models.Author, id=author_id) author = get_mergeable_object_or_404(models.Author, id=author_id)
data = {"author": author, "form": forms.AuthorForm(instance=author)} data = {"author": author, "form": forms.AuthorForm(instance=author)}
return TemplateResponse(request, "author/edit_author.html", data) return TemplateResponse(request, "author/edit_author.html", data)
def post(self, request, author_id): def post(self, request, author_id):
"""edit a author cool""" """edit a author cool"""
author = get_object_or_404(models.Author, id=author_id) author = get_mergeable_object_or_404(models.Author, id=author_id)
form = forms.AuthorForm(request.POST, request.FILES, instance=author) form = forms.AuthorForm(request.POST, request.FILES, instance=author)
if not form.is_valid(): if not form.is_valid():
@ -82,7 +87,7 @@ def update_author_from_remote(request, author_id, connector_identifier):
connector = connector_manager.load_connector( connector = connector_manager.load_connector(
get_object_or_404(models.Connector, identifier=connector_identifier) get_object_or_404(models.Connector, identifier=connector_identifier)
) )
author = get_object_or_404(models.Author, id=author_id) author = get_mergeable_object_or_404(models.Author, id=author_id)
connector.update_author_from_remote(author) connector.update_author_from_remote(author)

View file

@ -1,4 +1,5 @@
""" the good stuff! the books! """ """ the good stuff! the books! """
from uuid import uuid4 from uuid import uuid4
from django.contrib.auth.decorators import login_required, permission_required from django.contrib.auth.decorators import login_required, permission_required
@ -15,7 +16,11 @@ from bookwyrm.activitypub import ActivitypubResponse
from bookwyrm.connectors import connector_manager, ConnectorException from bookwyrm.connectors import connector_manager, ConnectorException
from bookwyrm.connectors.abstract_connector import get_image from bookwyrm.connectors.abstract_connector import get_image
from bookwyrm.settings import PAGE_LENGTH from bookwyrm.settings import PAGE_LENGTH
from bookwyrm.views.helpers import is_api_request, maybe_redirect_local_path from bookwyrm.views.helpers import (
is_api_request,
maybe_redirect_local_path,
get_mergeable_object_or_404,
)
# pylint: disable=no-self-use # pylint: disable=no-self-use
@ -40,7 +45,11 @@ class Book(View):
# table, so they never have clashing IDs # table, so they never have clashing IDs
book = ( book = (
models.Edition.viewer_aware_objects(request.user) models.Edition.viewer_aware_objects(request.user)
.filter(Q(id=book_id) | Q(parent_work__id=book_id)) .filter(
Q(id=book_id)
| Q(parent_work__id=book_id)
| Q(absorbed__deleted_id=book_id)
)
.order_by("-edition_rank") .order_by("-edition_rank")
.select_related("parent_work") .select_related("parent_work")
.prefetch_related("authors", "file_links") .prefetch_related("authors", "file_links")
@ -82,11 +91,13 @@ class Book(View):
"book": book, "book": book,
"statuses": paginated.get_page(request.GET.get("page")), "statuses": paginated.get_page(request.GET.get("page")),
"review_count": reviews.count(), "review_count": reviews.count(),
"ratings": reviews.filter( "ratings": (
Q(content__isnull=True) | Q(content="") reviews.filter(Q(content__isnull=True) | Q(content="")).select_related(
).select_related("user") "user"
if not user_statuses )
else None, if not user_statuses
else None
),
"rating": reviews.aggregate(Avg("rating"))["rating__avg"], "rating": reviews.aggregate(Avg("rating"))["rating__avg"],
"lists": lists, "lists": lists,
"update_error": kwargs.get("update_error", False), "update_error": kwargs.get("update_error", False),
@ -130,7 +141,7 @@ class Book(View):
@require_POST @require_POST
def upload_cover(request, book_id): def upload_cover(request, book_id):
"""upload a new cover""" """upload a new cover"""
book = get_object_or_404(models.Edition, id=book_id) book = get_mergeable_object_or_404(models.Edition, id=book_id)
book.last_edited_by = request.user book.last_edited_by = request.user
url = request.POST.get("cover-url") url = request.POST.get("cover-url")
@ -168,7 +179,7 @@ def set_cover_from_url(url):
@permission_required("bookwyrm.edit_book", raise_exception=True) @permission_required("bookwyrm.edit_book", raise_exception=True)
def add_description(request, book_id): def add_description(request, book_id):
"""upload a new cover""" """upload a new cover"""
book = get_object_or_404(models.Edition, id=book_id) book = get_mergeable_object_or_404(models.Edition, id=book_id)
description = request.POST.get("description") description = request.POST.get("description")
@ -199,7 +210,9 @@ def update_book_from_remote(request, book_id, connector_identifier):
connector = connector_manager.load_connector( connector = connector_manager.load_connector(
get_object_or_404(models.Connector, identifier=connector_identifier) get_object_or_404(models.Connector, identifier=connector_identifier)
) )
book = get_object_or_404(models.Book.objects.select_subclasses(), id=book_id) book = get_mergeable_object_or_404(
models.Book.objects.select_subclasses(), id=book_id
)
try: try:
connector.update_book_from_remote(book) connector.update_book_from_remote(book)

View file

@ -1,4 +1,5 @@
""" the good stuff! the books! """ """ the good stuff! the books! """
from re import sub, findall from re import sub, findall
from django.contrib.auth.decorators import login_required, permission_required from django.contrib.auth.decorators import login_required, permission_required
from django.contrib.postgres.search import SearchRank, SearchVector from django.contrib.postgres.search import SearchRank, SearchVector
@ -18,9 +19,10 @@ from bookwyrm.utils.isni import (
build_author_from_isni, build_author_from_isni,
augment_author_metadata, augment_author_metadata,
) )
from bookwyrm.views.helpers import get_edition from bookwyrm.views.helpers import get_edition, get_mergeable_object_or_404
from .books import set_cover_from_url from .books import set_cover_from_url
# pylint: disable=no-self-use # pylint: disable=no-self-use
@method_decorator(login_required, name="dispatch") @method_decorator(login_required, name="dispatch")
@method_decorator( @method_decorator(
@ -42,7 +44,7 @@ class EditBook(View):
def post(self, request, book_id): def post(self, request, book_id):
"""edit a book cool""" """edit a book cool"""
book = get_object_or_404(models.Edition, id=book_id) book = get_mergeable_object_or_404(models.Edition, id=book_id)
form = forms.EditionForm(request.POST, request.FILES, instance=book) form = forms.EditionForm(request.POST, request.FILES, instance=book)
@ -130,7 +132,7 @@ class CreateBook(View):
with transaction.atomic(): with transaction.atomic():
book = form.save(request) book = form.save(request)
parent_work = get_object_or_404(models.Work, id=parent_work_id) parent_work = get_mergeable_object_or_404(models.Work, id=parent_work_id)
book.parent_work = parent_work book.parent_work = parent_work
if authors: if authors:
@ -295,7 +297,7 @@ class ConfirmEditBook(View):
if not book.parent_work: if not book.parent_work:
work_match = request.POST.get("parent_work") work_match = request.POST.get("parent_work")
if work_match and work_match != "0": if work_match and work_match != "0":
work = get_object_or_404(models.Work, id=work_match) work = get_mergeable_object_or_404(models.Work, id=work_match)
else: else:
work = models.Work.objects.create(title=form.cleaned_data["title"]) work = models.Work.objects.create(title=form.cleaned_data["title"])
work.authors.set(book.authors.all()) work.authors.set(book.authors.all())

View file

@ -1,4 +1,5 @@
""" the good stuff! the books! """ """ the good stuff! the books! """
from functools import reduce from functools import reduce
import operator import operator
@ -7,7 +8,7 @@ from django.core.cache import cache as django_cache
from django.core.paginator import Paginator from django.core.paginator import Paginator
from django.db import transaction from django.db import transaction
from django.db.models import Q from django.db.models import Q
from django.shortcuts import get_object_or_404, redirect from django.shortcuts import redirect
from django.template.response import TemplateResponse from django.template.response import TemplateResponse
from django.views import View from django.views import View
from django.views.decorators.http import require_POST from django.views.decorators.http import require_POST
@ -15,7 +16,7 @@ from django.views.decorators.http import require_POST
from bookwyrm import forms, models from bookwyrm import forms, models
from bookwyrm.activitypub import ActivitypubResponse from bookwyrm.activitypub import ActivitypubResponse
from bookwyrm.settings import PAGE_LENGTH from bookwyrm.settings import PAGE_LENGTH
from bookwyrm.views.helpers import is_api_request from bookwyrm.views.helpers import is_api_request, get_mergeable_object_or_404
# pylint: disable=no-self-use # pylint: disable=no-self-use
@ -24,7 +25,7 @@ class Editions(View):
def get(self, request, book_id): def get(self, request, book_id):
"""list of editions of a book""" """list of editions of a book"""
work = get_object_or_404(models.Work, id=book_id) work = get_mergeable_object_or_404(models.Work, id=book_id)
if is_api_request(request): if is_api_request(request):
return ActivitypubResponse(work.to_edition_list(**request.GET)) return ActivitypubResponse(work.to_edition_list(**request.GET))
@ -83,7 +84,7 @@ class Editions(View):
def switch_edition(request): def switch_edition(request):
"""switch your copy of a book to a different edition""" """switch your copy of a book to a different edition"""
edition_id = request.POST.get("edition") edition_id = request.POST.get("edition")
new_edition = get_object_or_404(models.Edition, id=edition_id) new_edition = get_mergeable_object_or_404(models.Edition, id=edition_id)
shelfbooks = models.ShelfBook.objects.filter( shelfbooks = models.ShelfBook.objects.filter(
book__parent_work=new_edition.parent_work, shelf__user=request.user book__parent_work=new_edition.parent_work, shelf__user=request.user
) )

View file

@ -1,4 +1,5 @@
""" the good stuff! the books! """ """ the good stuff! the books! """
from django.contrib.auth.decorators import login_required, permission_required from django.contrib.auth.decorators import login_required, permission_required
from django.db import transaction from django.db import transaction
from django.shortcuts import get_object_or_404, redirect from django.shortcuts import get_object_or_404, redirect
@ -8,6 +9,7 @@ from django.utils.decorators import method_decorator
from django.views.decorators.http import require_POST from django.views.decorators.http import require_POST
from bookwyrm import forms, models from bookwyrm import forms, models
from bookwyrm.views.helpers import get_mergeable_object_or_404
# pylint: disable=no-self-use # pylint: disable=no-self-use
@ -20,7 +22,7 @@ class BookFileLinks(View):
def get(self, request, book_id): def get(self, request, book_id):
"""view links""" """view links"""
book = get_object_or_404(models.Edition, id=book_id) book = get_mergeable_object_or_404(models.Edition, id=book_id)
annotated_links = get_annotated_links(book) annotated_links = get_annotated_links(book)
data = {"book": book, "links": annotated_links} data = {"book": book, "links": annotated_links}
@ -36,7 +38,7 @@ class BookFileLinks(View):
# this form shouldn't ever really get here, since it's just a dropdown # this form shouldn't ever really get here, since it's just a dropdown
# get the data again rather than redirecting # get the data again rather than redirecting
book = get_object_or_404(models.Edition, id=book_id) book = get_mergeable_object_or_404(models.Edition, id=book_id)
annotated_links = get_annotated_links(book, form=form) annotated_links = get_annotated_links(book, form=form)
data = {"book": book, "links": annotated_links} data = {"book": book, "links": annotated_links}
@ -75,7 +77,7 @@ class AddFileLink(View):
def get(self, request, book_id): def get(self, request, book_id):
"""Create link form""" """Create link form"""
book = get_object_or_404(models.Edition, id=book_id) book = get_mergeable_object_or_404(models.Edition, id=book_id)
data = { data = {
"file_link_form": forms.FileLinkForm(), "file_link_form": forms.FileLinkForm(),
"book": book, "book": book,
@ -85,7 +87,9 @@ class AddFileLink(View):
@transaction.atomic @transaction.atomic
def post(self, request, book_id, link_id=None): def post(self, request, book_id, link_id=None):
"""Add a link to a copy of the book you can read""" """Add a link to a copy of the book you can read"""
book = get_object_or_404(models.Book.objects.select_subclasses(), id=book_id) book = get_mergeable_object_or_404(
models.Book.objects.select_subclasses(), id=book_id
)
link = get_object_or_404(models.FileLink, id=link_id) if link_id else None link = get_object_or_404(models.FileLink, id=link_id) if link_id else None
form = forms.FileLinkForm(request.POST, instance=link) form = forms.FileLinkForm(request.POST, instance=link)
if not form.is_valid(): if not form.is_valid():

View file

@ -1,10 +1,10 @@
""" books belonging to the same series """ """ books belonging to the same series """
from sys import float_info from sys import float_info
from django.views import View from django.views import View
from django.shortcuts import get_object_or_404
from django.template.response import TemplateResponse from django.template.response import TemplateResponse
from bookwyrm.views.helpers import is_api_request from bookwyrm.views.helpers import is_api_request, get_mergeable_object_or_404
from bookwyrm import models from bookwyrm import models
@ -27,7 +27,7 @@ class BookSeriesBy(View):
if is_api_request(request): if is_api_request(request):
pass pass
author = get_object_or_404(models.Author, id=author_id) author = get_mergeable_object_or_404(models.Author, id=author_id)
results = models.Edition.objects.filter(authors=author, series=series_name) results = models.Edition.objects.filter(authors=author, series=series_name)
@ -56,9 +56,11 @@ class BookSeriesBy(View):
sorted(numbered_books, key=sort_by_series) sorted(numbered_books, key=sort_by_series)
+ sorted( + sorted(
dated_books, dated_books,
key=lambda book: book.first_published_date key=lambda book: (
if book.first_published_date book.first_published_date
else book.published_date, if book.first_published_date
else book.published_date
),
) )
+ sorted( + sorted(
unsortable_books, unsortable_books,

View file

@ -1,4 +1,5 @@
""" Helping new users figure out the lay of the land """ """ Helping new users figure out the lay of the land """
import re import re
from django.contrib.auth.decorators import login_required from django.contrib.auth.decorators import login_required
@ -13,6 +14,7 @@ from django.views import View
from bookwyrm import book_search, forms, models from bookwyrm import book_search, forms, models
from bookwyrm.settings import INSTANCE_ACTOR_USERNAME from bookwyrm.settings import INSTANCE_ACTOR_USERNAME
from bookwyrm.suggested_users import suggested_users from bookwyrm.suggested_users import suggested_users
from bookwyrm.views.helpers import get_mergeable_object_or_404
from .preferences.edit_user import save_user_form from .preferences.edit_user import save_user_form
@ -80,8 +82,8 @@ class GetStartedBooks(View):
for k, v in request.POST.items() for k, v in request.POST.items()
if re.match(r"\d+", k) and re.match(r"\d+", v) if re.match(r"\d+", k) and re.match(r"\d+", v)
] ]
for (book_id, shelf_id) in shelve_actions: for book_id, shelf_id in shelve_actions:
book = get_object_or_404(models.Edition, id=book_id) book = get_mergeable_object_or_404(models.Edition, id=book_id)
shelf = get_object_or_404(models.Shelf, id=shelf_id) shelf = get_object_or_404(models.Shelf, id=shelf_id)
models.ShelfBook.objects.create(book=book, shelf=shelf, user=request.user) models.ShelfBook.objects.create(book=book, shelf=shelf, user=request.user)

View file

@ -1,4 +1,5 @@
""" helper functions used in various views """ """ helper functions used in various views """
import re import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
import dateutil.parser import dateutil.parser
@ -8,7 +9,7 @@ from dateutil.parser import ParserError
from requests import HTTPError from requests import HTTPError
from django.db.models import Q from django.db.models import Q
from django.conf import settings as django_settings from django.conf import settings as django_settings
from django.shortcuts import redirect from django.shortcuts import redirect, _get_queryset
from django.http import Http404 from django.http import Http404
from django.utils import translation from django.utils import translation
@ -232,3 +233,19 @@ def redirect_to_referer(request, *args, **kwargs):
# if not, use the args passed you'd normally pass to redirect() # if not, use the args passed you'd normally pass to redirect()
return redirect(*args or "/", **kwargs) return redirect(*args or "/", **kwargs)
# pylint: disable=redefined-builtin,invalid-name
def get_mergeable_object_or_404(klass, id):
"""variant of get_object_or_404 that also redirects if id has been merged
into another object"""
queryset = _get_queryset(klass)
try:
return queryset.get(pk=id)
except queryset.model.DoesNotExist:
try:
return queryset.get(absorbed__deleted_id=id)
except queryset.model.DoesNotExist:
pass
raise Http404(f"No {queryset.model} with ID {id} exists")

View file

@ -1,4 +1,5 @@
""" the good stuff! the books! """ """ the good stuff! the books! """
import logging import logging
from django.contrib.auth.decorators import login_required from django.contrib.auth.decorators import login_required
from django.core.cache import cache from django.core.cache import cache
@ -11,6 +12,7 @@ from django.views import View
from django.views.decorators.http import require_POST from django.views.decorators.http import require_POST
from bookwyrm import forms, models from bookwyrm import forms, models
from bookwyrm.views.helpers import get_mergeable_object_or_404
from bookwyrm.views.shelf.shelf_actions import unshelve from bookwyrm.views.shelf.shelf_actions import unshelve
from .status import CreateStatus from .status import CreateStatus
from .helpers import get_edition, handle_reading_status, is_api_request from .helpers import get_edition, handle_reading_status, is_api_request
@ -130,7 +132,7 @@ class ReadThrough(View):
def get(self, request, book_id, readthrough_id=None): def get(self, request, book_id, readthrough_id=None):
"""standalone form in case of errors""" """standalone form in case of errors"""
book = get_object_or_404(models.Edition, id=book_id) book = get_mergeable_object_or_404(models.Edition, id=book_id)
form = forms.ReadThroughForm() form = forms.ReadThroughForm()
data = {"form": form, "book": book} data = {"form": form, "book": book}
if readthrough_id: if readthrough_id:
@ -152,7 +154,7 @@ class ReadThrough(View):
) )
form = forms.ReadThroughForm(request.POST) form = forms.ReadThroughForm(request.POST)
if not form.is_valid(): if not form.is_valid():
book = get_object_or_404(models.Edition, id=book_id) book = get_mergeable_object_or_404(models.Edition, id=book_id)
data = {"form": form, "book": book} data = {"form": form, "book": book}
if request.POST.get("id"): if request.POST.get("id"):
data["readthrough"] = get_object_or_404( data["readthrough"] = get_object_or_404(

View file

@ -1,11 +1,12 @@
""" shelf views """ """ shelf views """
from django.db import IntegrityError, transaction from django.db import IntegrityError, transaction
from django.contrib.auth.decorators import login_required from django.contrib.auth.decorators import login_required
from django.shortcuts import get_object_or_404, redirect from django.shortcuts import get_object_or_404, redirect
from django.views.decorators.http import require_POST from django.views.decorators.http import require_POST
from bookwyrm import forms, models from bookwyrm import forms, models
from bookwyrm.views.helpers import redirect_to_referer from bookwyrm.views.helpers import redirect_to_referer, get_mergeable_object_or_404
@login_required @login_required
@ -36,7 +37,7 @@ def delete_shelf(request, shelf_id):
@transaction.atomic @transaction.atomic
def shelve(request): def shelve(request):
"""put a book on a user's shelf""" """put a book on a user's shelf"""
book = get_object_or_404(models.Edition, id=request.POST.get("book")) book = get_mergeable_object_or_404(models.Edition, id=request.POST.get("book"))
desired_shelf = get_object_or_404( desired_shelf = get_object_or_404(
request.user.shelf_set, identifier=request.POST.get("shelf") request.user.shelf_set, identifier=request.POST.get("shelf")
) )
@ -97,7 +98,7 @@ def shelve(request):
def unshelve(request, book_id=False): def unshelve(request, book_id=False):
"""remove a book from a user's shelf""" """remove a book from a user's shelf"""
identity = book_id if book_id else request.POST.get("book") identity = book_id if book_id else request.POST.get("book")
book = get_object_or_404(models.Edition, id=identity) book = get_mergeable_object_or_404(models.Edition, id=identity)
shelf_book = get_object_or_404( shelf_book = get_object_or_404(
models.ShelfBook, book=book, shelf__id=request.POST["shelf"] models.ShelfBook, book=book, shelf__id=request.POST["shelf"]
) )

View file

@ -1,4 +1,5 @@
""" what are we here for if not for posting """ """ what are we here for if not for posting """
import re import re
import logging import logging
@ -19,6 +20,7 @@ from markdown import markdown
from bookwyrm import forms, models from bookwyrm import forms, models
from bookwyrm.models.report import DELETE_ITEM from bookwyrm.models.report import DELETE_ITEM
from bookwyrm.utils import regex, sanitizer from bookwyrm.utils import regex, sanitizer
from bookwyrm.views.helpers import get_mergeable_object_or_404
from .helpers import handle_remote_webfinger, is_api_request from .helpers import handle_remote_webfinger, is_api_request
from .helpers import load_date_in_user_tz_as_utc, redirect_to_referer from .helpers import load_date_in_user_tz_as_utc, redirect_to_referer
@ -52,7 +54,7 @@ class CreateStatus(View):
def get(self, request, status_type): # pylint: disable=unused-argument def get(self, request, status_type): # pylint: disable=unused-argument
"""compose view (...not used?)""" """compose view (...not used?)"""
book = get_object_or_404(models.Edition, id=request.GET.get("book")) book = get_mergeable_object_or_404(models.Edition, id=request.GET.get("book"))
data = {"book": book} data = {"book": book}
return TemplateResponse(request, "compose.html", data) return TemplateResponse(request, "compose.html", data)
@ -98,7 +100,7 @@ class CreateStatus(View):
# inspect the text for user tags # inspect the text for user tags
content = status.content content = status.content
mentions = find_mentions(request.user, content) mentions = find_mentions(request.user, content)
for (_, mention_user) in mentions.items(): for _, mention_user in mentions.items():
# add them to status mentions fk # add them to status mentions fk
status.mention_users.add(mention_user) status.mention_users.add(mention_user)
content = format_mentions(content, mentions) content = format_mentions(content, mentions)
@ -109,7 +111,7 @@ class CreateStatus(View):
# inspect the text for hashtags # inspect the text for hashtags
hashtags = find_or_create_hashtags(content) hashtags = find_or_create_hashtags(content)
for (_, mention_hashtag) in hashtags.items(): for _, mention_hashtag in hashtags.items():
# add them to status mentions fk # add them to status mentions fk
status.mention_hashtags.add(mention_hashtag) status.mention_hashtags.add(mention_hashtag)
content = format_hashtags(content, hashtags) content = format_hashtags(content, hashtags)
@ -140,7 +142,7 @@ class CreateStatus(View):
def format_mentions(content, mentions): def format_mentions(content, mentions):
"""Detect @mentions and make them links""" """Detect @mentions and make them links"""
for (mention_text, mention_user) in mentions.items(): for mention_text, mention_user in mentions.items():
# turn the mention into a link # turn the mention into a link
content = re.sub( content = re.sub(
rf"(?<!/)\B{mention_text}\b(?!@)", rf"(?<!/)\B{mention_text}\b(?!@)",
@ -152,7 +154,7 @@ def format_mentions(content, mentions):
def format_hashtags(content, hashtags): def format_hashtags(content, hashtags):
"""Detect #hashtags and make them links""" """Detect #hashtags and make them links"""
for (mention_text, mention_hashtag) in hashtags.items(): for mention_text, mention_hashtag in hashtags.items():
# turn the mention into a link # turn the mention into a link
content = re.sub( content = re.sub(
rf"(?<!/)\B{mention_text}\b(?!@)", rf"(?<!/)\B{mention_text}\b(?!@)",