From b3753ab6dac5e872f612cd10c3aa9d396ee8ed63 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Wed, 28 Feb 2024 22:31:41 +0100 Subject: [PATCH 01/26] Add MergedBookDataModel --- .../0197_mergedauthor_mergedbook.py | 48 +++++++++++++++++++ bookwyrm/models/book.py | 39 +++++++++++++-- 2 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 bookwyrm/migrations/0197_mergedauthor_mergedbook.py diff --git a/bookwyrm/migrations/0197_mergedauthor_mergedbook.py b/bookwyrm/migrations/0197_mergedauthor_mergedbook.py new file mode 100644 index 000000000..23ca38ab2 --- /dev/null +++ b/bookwyrm/migrations/0197_mergedauthor_mergedbook.py @@ -0,0 +1,48 @@ +# Generated by Django 3.2.24 on 2024-02-28 21:30 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0196_merge_pr3134_into_main"), + ] + + operations = [ + migrations.CreateModel( + name="MergedBook", + fields=[ + ("deleted_id", models.IntegerField(primary_key=True, serialize=False)), + ( + "merged_into", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="absorbed", + to="bookwyrm.book", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="MergedAuthor", + fields=[ + ("deleted_id", models.IntegerField(primary_key=True, serialize=False)), + ( + "merged_into", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="absorbed", + to="bookwyrm.author", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index e167e2138..7a4a8addb 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -1,4 +1,5 @@ """ database schema for books and shelves """ + from itertools import chain import re from typing import Any @@ -192,9 +193,13 @@ class Book(BookDataModel): """properties of this edition, as a string""" items = [ self.physical_format if hasattr(self, "physical_format") else None, - f"{self.languages[0]} language" - if self.languages and self.languages[0] and self.languages[0] != "English" - else None, + ( + f"{self.languages[0]} language" + if self.languages + and self.languages[0] + and self.languages[0] != "English" + else None + ), str(self.published_date.year) if self.published_date else None, ", ".join(self.publishers) if hasattr(self, "publishers") else None, ] @@ -451,6 +456,34 @@ class Edition(Book): return queryset +class MergedBookDataModel(models.Model): + """a BookDataModel instance that has been merged into another instance. kept + to be able to redirect old URLs""" + + deleted_id = models.IntegerField(primary_key=True) + + class Meta: + """abstract just like BookDataModel""" + + abstract = True + + +class MergedAuthor(MergedBookDataModel): + """an Author that has been merged into another one""" + + merged_into = models.ForeignKey( + "Author", on_delete=models.PROTECT, related_name="absorbed" + ) + + +class MergedBook(MergedBookDataModel): + """an Book that has been merged into another one""" + + merged_into = models.ForeignKey( + "Book", on_delete=models.PROTECT, related_name="absorbed" + ) + + def isbn_10_to_13(isbn_10): """convert an isbn 10 into an isbn 13""" isbn_10 = re.sub(r"[^0-9X]", "", isbn_10) From 5e123972e88b751217c82564c7f447dbdc69b48d Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Thu, 22 Feb 2024 10:27:38 +0100 Subject: [PATCH 02/26] BookDataModel: implement merge_into method --- .../commands/deduplicate_book_data.py | 17 +-- bookwyrm/management/merge.py | 50 -------- bookwyrm/management/merge_command.py | 3 +- bookwyrm/models/author.py | 5 +- bookwyrm/models/book.py | 110 +++++++++++++----- 5 files changed, 95 insertions(+), 90 deletions(-) delete mode 100644 bookwyrm/management/merge.py diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py index dde7d133c..d2f4ef936 100644 --- a/bookwyrm/management/commands/deduplicate_book_data.py +++ b/bookwyrm/management/commands/deduplicate_book_data.py @@ -1,13 +1,14 @@ """ PROCEED WITH CAUTION: uses deduplication fields to permanently merge book data objects """ + from django.core.management.base import BaseCommand from django.db.models import Count from bookwyrm import models -from bookwyrm.management.merge import merge_objects def dedupe_model(model): """combine duplicate editions and update related models""" + print(f"deduplicating {model.__name__}:") fields = model._meta.get_fields() dedupe_fields = [ f for f in fields if hasattr(f, "deduplication_field") and f.deduplication_field @@ -16,27 +17,27 @@ def dedupe_model(model): dupes = ( model.objects.values(field.name) .annotate(Count(field.name)) - .filter(**{"%s__count__gt" % field.name: 1}) + .filter(**{f"{field.name}__count__gt": 1}) + .exclude(**{field.name: ""}) + .exclude(**{f"{field.name}__isnull": True}) ) for dupe in dupes: value = dupe[field.name] - if not value or value == "": - continue print("----------") - print(dupe) objs = model.objects.filter(**{field.name: value}).order_by("id") canonical = objs.first() - print("keeping", canonical.remote_id) + print(f"merging into {canonical.remote_id} based on {field.name} {value}:") for obj in objs[1:]: - print(obj.remote_id) - merge_objects(canonical, obj) + print(f"- {obj.remote_id}") + obj.merge_into(canonical) class Command(BaseCommand): """deduplicate allllll the book data models""" help = "merges duplicate book data" + # pylint: disable=no-self-use,unused-argument def handle(self, *args, **options): """run deduplications""" diff --git a/bookwyrm/management/merge.py b/bookwyrm/management/merge.py deleted file mode 100644 index f55229f18..000000000 --- a/bookwyrm/management/merge.py +++ /dev/null @@ -1,50 +0,0 @@ -from django.db.models import ManyToManyField - - -def update_related(canonical, obj): - """update all the models with fk to the object being removed""" - # move related models to canonical - related_models = [ - (r.remote_field.name, r.related_model) for r in canonical._meta.related_objects - ] - for (related_field, related_model) in related_models: - # Skip the ManyToMany fields that aren’t auto-created. These - # should have a corresponding OneToMany field in the model for - # the linking table anyway. If we update it through that model - # instead then we won’t lose the extra fields in the linking - # table. - related_field_obj = related_model._meta.get_field(related_field) - if isinstance(related_field_obj, ManyToManyField): - through = related_field_obj.remote_field.through - if not through._meta.auto_created: - continue - related_objs = related_model.objects.filter(**{related_field: obj}) - for related_obj in related_objs: - print("replacing in", related_model.__name__, related_field, related_obj.id) - try: - setattr(related_obj, related_field, canonical) - related_obj.save() - except TypeError: - getattr(related_obj, related_field).add(canonical) - getattr(related_obj, related_field).remove(obj) - - -def copy_data(canonical, obj): - """try to get the most data possible""" - for data_field in obj._meta.get_fields(): - if not hasattr(data_field, "activitypub_field"): - continue - data_value = getattr(obj, data_field.name) - if not data_value: - continue - if not getattr(canonical, data_field.name): - print("setting data field", data_field.name, data_value) - setattr(canonical, data_field.name, data_value) - canonical.save() - - -def merge_objects(canonical, obj): - copy_data(canonical, obj) - update_related(canonical, obj) - # remove the outdated entry - obj.delete() diff --git a/bookwyrm/management/merge_command.py b/bookwyrm/management/merge_command.py index 805dc73fa..2f3f90c86 100644 --- a/bookwyrm/management/merge_command.py +++ b/bookwyrm/management/merge_command.py @@ -1,4 +1,3 @@ -from bookwyrm.management.merge import merge_objects from django.core.management.base import BaseCommand @@ -26,4 +25,4 @@ class MergeCommand(BaseCommand): print("other book doesn’t exist!") return - merge_objects(canonical, other) + other.merge_into(canonical) diff --git a/bookwyrm/models/author.py b/bookwyrm/models/author.py index 154b00ccb..7f40f562c 100644 --- a/bookwyrm/models/author.py +++ b/bookwyrm/models/author.py @@ -1,4 +1,5 @@ """ database schema for info about authors """ + import re from typing import Tuple, Any @@ -9,13 +10,15 @@ from bookwyrm import activitypub from bookwyrm.settings import DOMAIN from bookwyrm.utils.db import format_trigger -from .book import BookDataModel +from .book import BookDataModel, MergedAuthor from . import fields class Author(BookDataModel): """basic biographic info""" + merged_model = MergedAuthor + wikipedia_link = fields.CharField( max_length=255, blank=True, null=True, deduplication_field=True ) diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 7a4a8addb..c7235a3f5 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -3,12 +3,13 @@ from itertools import chain import re from typing import Any +from typing_extensions import Self from django.contrib.postgres.search import SearchVectorField from django.contrib.postgres.indexes import GinIndex from django.core.cache import cache from django.db import models, transaction -from django.db.models import Prefetch +from django.db.models import Prefetch, ManyToManyField from django.dispatch import receiver from django.utils.translation import gettext_lazy as _ from model_utils import FieldTracker @@ -109,10 +110,89 @@ class BookDataModel(ObjectMixin, BookWyrmModel): """only send book data updates to other bookwyrm instances""" super().broadcast(activity, sender, software=software, **kwargs) + def merge_into(self, canonical: Self) -> None: + """merge this entity into another entity""" + if canonical.id == self.id: + raise ValueError(f"Cannot merge {self} into itself") + + canonical.absorb_data_from(self) + canonical.save() + + self.merged_model.objects.create(deleted_id=self.id, merged_into=canonical) + + # move related models to canonical + related_models = [ + (r.remote_field.name, r.related_model) for r in self._meta.related_objects + ] + # pylint: disable=protected-access + for related_field, related_model in related_models: + # Skip the ManyToMany fields that aren’t auto-created. These + # should have a corresponding OneToMany field in the model for + # the linking table anyway. If we update it through that model + # instead then we won’t lose the extra fields in the linking + # table. + # pylint: disable=protected-access + related_field_obj = related_model._meta.get_field(related_field) + if isinstance(related_field_obj, ManyToManyField): + through = related_field_obj.remote_field.through + if not through._meta.auto_created: + continue + related_objs = related_model.objects.filter(**{related_field: self}) + for related_obj in related_objs: + try: + setattr(related_obj, related_field, canonical) + related_obj.save() + except TypeError: + getattr(related_obj, related_field).add(canonical) + getattr(related_obj, related_field).remove(self) + + self.delete() + + def absorb_data_from(self, other: Self) -> None: + """fill empty fields with values from another entity""" + for data_field in self._meta.get_fields(): + if not hasattr(data_field, "activitypub_field"): + continue + data_value = getattr(other, data_field.name) + if not data_value: + continue + if not getattr(self, data_field.name): + setattr(self, data_field.name, data_value) + + +class MergedBookDataModel(models.Model): + """a BookDataModel instance that has been merged into another instance. kept + to be able to redirect old URLs""" + + deleted_id = models.IntegerField(primary_key=True) + + class Meta: + """abstract just like BookDataModel""" + + abstract = True + + +class MergedBook(MergedBookDataModel): + """an Book that has been merged into another one""" + + merged_into = models.ForeignKey( + "Book", on_delete=models.PROTECT, related_name="absorbed" + ) + + +class MergedAuthor(MergedBookDataModel): + """an Author that has been merged into another one""" + + merged_into = models.ForeignKey( + "Author", on_delete=models.PROTECT, related_name="absorbed" + ) + class Book(BookDataModel): """a generic book, which can mean either an edition or a work""" + merged_model = MergedBook + connector = models.ForeignKey("Connector", on_delete=models.PROTECT, null=True) # book/work metadata @@ -456,34 +536,6 @@ class Edition(Book): return queryset -class MergedBookDataModel(models.Model): - """a BookDataModel instance that has been merged into another instance. kept - to be able to redirect old URLs""" - - deleted_id = models.IntegerField(primary_key=True) - - class Meta: - """abstract just like BookDataModel""" - - abstract = True - - -class MergedAuthor(MergedBookDataModel): - """an Author that has been merged into another one""" - - merged_into = models.ForeignKey( - "Author", on_delete=models.PROTECT, related_name="absorbed" - ) - - -class MergedBook(MergedBookDataModel): - """an Book that has been merged into another one""" - - merged_into = models.ForeignKey( - "Book", on_delete=models.PROTECT, related_name="absorbed" - ) - - def isbn_10_to_13(isbn_10): """convert an isbn 10 into an isbn 13""" isbn_10 = re.sub(r"[^0-9X]", "", isbn_10) From e04cd79ff85a4c0637d721cf831ed1320125bdb4 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Fri, 1 Mar 2024 14:37:52 +0100 Subject: [PATCH 03/26] Redirect to new URL when a merged object is requested --- bookwyrm/views/author.py | 15 ++++++++---- bookwyrm/views/books/books.py | 33 +++++++++++++++++++-------- bookwyrm/views/books/edit_book.py | 10 ++++---- bookwyrm/views/books/editions.py | 9 ++++---- bookwyrm/views/books/links.py | 12 ++++++---- bookwyrm/views/books/series.py | 14 +++++++----- bookwyrm/views/get_started.py | 6 +++-- bookwyrm/views/helpers.py | 19 ++++++++++++++- bookwyrm/views/reading.py | 6 +++-- bookwyrm/views/shelf/shelf_actions.py | 7 +++--- bookwyrm/views/status.py | 12 ++++++---- 11 files changed, 97 insertions(+), 46 deletions(-) diff --git a/bookwyrm/views/author.py b/bookwyrm/views/author.py index 4dcf4c447..56977622f 100644 --- a/bookwyrm/views/author.py +++ b/bookwyrm/views/author.py @@ -1,4 +1,5 @@ """ the good people stuff! the authors! """ + from django.contrib.auth.decorators import login_required, permission_required from django.core.paginator import Paginator from django.shortcuts import get_object_or_404, redirect @@ -11,7 +12,11 @@ from bookwyrm import forms, models from bookwyrm.activitypub import ActivitypubResponse from bookwyrm.connectors import connector_manager from bookwyrm.settings import PAGE_LENGTH -from bookwyrm.views.helpers import is_api_request, maybe_redirect_local_path +from bookwyrm.views.helpers import ( + is_api_request, + get_mergeable_object_or_404, + maybe_redirect_local_path, +) # pylint: disable= no-self-use @@ -21,7 +26,7 @@ class Author(View): # pylint: disable=unused-argument def get(self, request, author_id, slug=None): """landing page for an author""" - author = get_object_or_404(models.Author, id=author_id) + author = get_mergeable_object_or_404(models.Author, id=author_id) if is_api_request(request): return ActivitypubResponse(author.to_activity()) @@ -56,13 +61,13 @@ class EditAuthor(View): def get(self, request, author_id): """info about a book""" - author = get_object_or_404(models.Author, id=author_id) + author = get_mergeable_object_or_404(models.Author, id=author_id) data = {"author": author, "form": forms.AuthorForm(instance=author)} return TemplateResponse(request, "author/edit_author.html", data) def post(self, request, author_id): """edit a author cool""" - author = get_object_or_404(models.Author, id=author_id) + author = get_mergeable_object_or_404(models.Author, id=author_id) form = forms.AuthorForm(request.POST, request.FILES, instance=author) if not form.is_valid(): @@ -82,7 +87,7 @@ def update_author_from_remote(request, author_id, connector_identifier): connector = connector_manager.load_connector( get_object_or_404(models.Connector, identifier=connector_identifier) ) - author = get_object_or_404(models.Author, id=author_id) + author = get_mergeable_object_or_404(models.Author, id=author_id) connector.update_author_from_remote(author) diff --git a/bookwyrm/views/books/books.py b/bookwyrm/views/books/books.py index 565220b6e..bbf041850 100644 --- a/bookwyrm/views/books/books.py +++ b/bookwyrm/views/books/books.py @@ -1,4 +1,5 @@ """ the good stuff! the books! """ + from uuid import uuid4 from django.contrib.auth.decorators import login_required, permission_required @@ -15,7 +16,11 @@ from bookwyrm.activitypub import ActivitypubResponse from bookwyrm.connectors import connector_manager, ConnectorException from bookwyrm.connectors.abstract_connector import get_image from bookwyrm.settings import PAGE_LENGTH -from bookwyrm.views.helpers import is_api_request, maybe_redirect_local_path +from bookwyrm.views.helpers import ( + is_api_request, + maybe_redirect_local_path, + get_mergeable_object_or_404, +) # pylint: disable=no-self-use @@ -40,7 +45,11 @@ class Book(View): # table, so they never have clashing IDs book = ( models.Edition.viewer_aware_objects(request.user) - .filter(Q(id=book_id) | Q(parent_work__id=book_id)) + .filter( + Q(id=book_id) + | Q(parent_work__id=book_id) + | Q(absorbed__deleted_id=book_id) + ) .order_by("-edition_rank") .select_related("parent_work") .prefetch_related("authors", "file_links") @@ -82,11 +91,13 @@ class Book(View): "book": book, "statuses": paginated.get_page(request.GET.get("page")), "review_count": reviews.count(), - "ratings": reviews.filter( - Q(content__isnull=True) | Q(content="") - ).select_related("user") - if not user_statuses - else None, + "ratings": ( + reviews.filter(Q(content__isnull=True) | Q(content="")).select_related( + "user" + ) + if not user_statuses + else None + ), "rating": reviews.aggregate(Avg("rating"))["rating__avg"], "lists": lists, "update_error": kwargs.get("update_error", False), @@ -130,7 +141,7 @@ class Book(View): @require_POST def upload_cover(request, book_id): """upload a new cover""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) book.last_edited_by = request.user url = request.POST.get("cover-url") @@ -168,7 +179,7 @@ def set_cover_from_url(url): @permission_required("bookwyrm.edit_book", raise_exception=True) def add_description(request, book_id): """upload a new cover""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) description = request.POST.get("description") @@ -199,7 +210,9 @@ def update_book_from_remote(request, book_id, connector_identifier): connector = connector_manager.load_connector( get_object_or_404(models.Connector, identifier=connector_identifier) ) - book = get_object_or_404(models.Book.objects.select_subclasses(), id=book_id) + book = get_mergeable_object_or_404( + models.Book.objects.select_subclasses(), id=book_id + ) try: connector.update_book_from_remote(book) diff --git a/bookwyrm/views/books/edit_book.py b/bookwyrm/views/books/edit_book.py index ae492374f..b8ceece13 100644 --- a/bookwyrm/views/books/edit_book.py +++ b/bookwyrm/views/books/edit_book.py @@ -1,4 +1,5 @@ """ the good stuff! the books! """ + from re import sub, findall from django.contrib.auth.decorators import login_required, permission_required from django.contrib.postgres.search import SearchRank, SearchVector @@ -18,9 +19,10 @@ from bookwyrm.utils.isni import ( build_author_from_isni, augment_author_metadata, ) -from bookwyrm.views.helpers import get_edition +from bookwyrm.views.helpers import get_edition, get_mergeable_object_or_404 from .books import set_cover_from_url + # pylint: disable=no-self-use @method_decorator(login_required, name="dispatch") @method_decorator( @@ -42,7 +44,7 @@ class EditBook(View): def post(self, request, book_id): """edit a book cool""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) form = forms.EditionForm(request.POST, request.FILES, instance=book) @@ -130,7 +132,7 @@ class CreateBook(View): with transaction.atomic(): book = form.save(request) - parent_work = get_object_or_404(models.Work, id=parent_work_id) + parent_work = get_mergeable_object_or_404(models.Work, id=parent_work_id) book.parent_work = parent_work if authors: @@ -295,7 +297,7 @@ class ConfirmEditBook(View): if not book.parent_work: work_match = request.POST.get("parent_work") if work_match and work_match != "0": - work = get_object_or_404(models.Work, id=work_match) + work = get_mergeable_object_or_404(models.Work, id=work_match) else: work = models.Work.objects.create(title=form.cleaned_data["title"]) work.authors.set(book.authors.all()) diff --git a/bookwyrm/views/books/editions.py b/bookwyrm/views/books/editions.py index a3167fac4..538ff6377 100644 --- a/bookwyrm/views/books/editions.py +++ b/bookwyrm/views/books/editions.py @@ -1,4 +1,5 @@ """ the good stuff! the books! """ + from functools import reduce import operator @@ -7,7 +8,7 @@ from django.core.cache import cache as django_cache from django.core.paginator import Paginator from django.db import transaction from django.db.models import Q -from django.shortcuts import get_object_or_404, redirect +from django.shortcuts import redirect from django.template.response import TemplateResponse from django.views import View from django.views.decorators.http import require_POST @@ -15,7 +16,7 @@ from django.views.decorators.http import require_POST from bookwyrm import forms, models from bookwyrm.activitypub import ActivitypubResponse from bookwyrm.settings import PAGE_LENGTH -from bookwyrm.views.helpers import is_api_request +from bookwyrm.views.helpers import is_api_request, get_mergeable_object_or_404 # pylint: disable=no-self-use @@ -24,7 +25,7 @@ class Editions(View): def get(self, request, book_id): """list of editions of a book""" - work = get_object_or_404(models.Work, id=book_id) + work = get_mergeable_object_or_404(models.Work, id=book_id) if is_api_request(request): return ActivitypubResponse(work.to_edition_list(**request.GET)) @@ -83,7 +84,7 @@ class Editions(View): def switch_edition(request): """switch your copy of a book to a different edition""" edition_id = request.POST.get("edition") - new_edition = get_object_or_404(models.Edition, id=edition_id) + new_edition = get_mergeable_object_or_404(models.Edition, id=edition_id) shelfbooks = models.ShelfBook.objects.filter( book__parent_work=new_edition.parent_work, shelf__user=request.user ) diff --git a/bookwyrm/views/books/links.py b/bookwyrm/views/books/links.py index 70b91f2d9..4793c6019 100644 --- a/bookwyrm/views/books/links.py +++ b/bookwyrm/views/books/links.py @@ -1,4 +1,5 @@ """ the good stuff! the books! """ + from django.contrib.auth.decorators import login_required, permission_required from django.db import transaction from django.shortcuts import get_object_or_404, redirect @@ -8,6 +9,7 @@ from django.utils.decorators import method_decorator from django.views.decorators.http import require_POST from bookwyrm import forms, models +from bookwyrm.views.helpers import get_mergeable_object_or_404 # pylint: disable=no-self-use @@ -20,7 +22,7 @@ class BookFileLinks(View): def get(self, request, book_id): """view links""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) annotated_links = get_annotated_links(book) data = {"book": book, "links": annotated_links} @@ -36,7 +38,7 @@ class BookFileLinks(View): # this form shouldn't ever really get here, since it's just a dropdown # get the data again rather than redirecting - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) annotated_links = get_annotated_links(book, form=form) data = {"book": book, "links": annotated_links} @@ -75,7 +77,7 @@ class AddFileLink(View): def get(self, request, book_id): """Create link form""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) data = { "file_link_form": forms.FileLinkForm(), "book": book, @@ -85,7 +87,9 @@ class AddFileLink(View): @transaction.atomic def post(self, request, book_id, link_id=None): """Add a link to a copy of the book you can read""" - book = get_object_or_404(models.Book.objects.select_subclasses(), id=book_id) + book = get_mergeable_object_or_404( + models.Book.objects.select_subclasses(), id=book_id + ) link = get_object_or_404(models.FileLink, id=link_id) if link_id else None form = forms.FileLinkForm(request.POST, instance=link) if not form.is_valid(): diff --git a/bookwyrm/views/books/series.py b/bookwyrm/views/books/series.py index bdc8dccab..eb3a2a04f 100644 --- a/bookwyrm/views/books/series.py +++ b/bookwyrm/views/books/series.py @@ -1,10 +1,10 @@ """ books belonging to the same series """ + from sys import float_info from django.views import View -from django.shortcuts import get_object_or_404 from django.template.response import TemplateResponse -from bookwyrm.views.helpers import is_api_request +from bookwyrm.views.helpers import is_api_request, get_mergeable_object_or_404 from bookwyrm import models @@ -27,7 +27,7 @@ class BookSeriesBy(View): if is_api_request(request): pass - author = get_object_or_404(models.Author, id=author_id) + author = get_mergeable_object_or_404(models.Author, id=author_id) results = models.Edition.objects.filter(authors=author, series=series_name) @@ -56,9 +56,11 @@ class BookSeriesBy(View): sorted(numbered_books, key=sort_by_series) + sorted( dated_books, - key=lambda book: book.first_published_date - if book.first_published_date - else book.published_date, + key=lambda book: ( + book.first_published_date + if book.first_published_date + else book.published_date + ), ) + sorted( unsortable_books, diff --git a/bookwyrm/views/get_started.py b/bookwyrm/views/get_started.py index 511a886ca..9a28dfbca 100644 --- a/bookwyrm/views/get_started.py +++ b/bookwyrm/views/get_started.py @@ -1,4 +1,5 @@ """ Helping new users figure out the lay of the land """ + import re from django.contrib.auth.decorators import login_required @@ -13,6 +14,7 @@ from django.views import View from bookwyrm import book_search, forms, models from bookwyrm.settings import INSTANCE_ACTOR_USERNAME from bookwyrm.suggested_users import suggested_users +from bookwyrm.views.helpers import get_mergeable_object_or_404 from .preferences.edit_user import save_user_form @@ -80,8 +82,8 @@ class GetStartedBooks(View): for k, v in request.POST.items() if re.match(r"\d+", k) and re.match(r"\d+", v) ] - for (book_id, shelf_id) in shelve_actions: - book = get_object_or_404(models.Edition, id=book_id) + for book_id, shelf_id in shelve_actions: + book = get_mergeable_object_or_404(models.Edition, id=book_id) shelf = get_object_or_404(models.Shelf, id=shelf_id) models.ShelfBook.objects.create(book=book, shelf=shelf, user=request.user) diff --git a/bookwyrm/views/helpers.py b/bookwyrm/views/helpers.py index 60d950354..5bbb05033 100644 --- a/bookwyrm/views/helpers.py +++ b/bookwyrm/views/helpers.py @@ -1,4 +1,5 @@ """ helper functions used in various views """ + import re from datetime import datetime, timedelta import dateutil.parser @@ -8,7 +9,7 @@ from dateutil.parser import ParserError from requests import HTTPError from django.db.models import Q from django.conf import settings as django_settings -from django.shortcuts import redirect +from django.shortcuts import redirect, _get_queryset from django.http import Http404 from django.utils import translation @@ -232,3 +233,19 @@ def redirect_to_referer(request, *args, **kwargs): # if not, use the args passed you'd normally pass to redirect() return redirect(*args or "/", **kwargs) + + +# pylint: disable=redefined-builtin,invalid-name +def get_mergeable_object_or_404(klass, id): + """variant of get_object_or_404 that also redirects if id has been merged + into another object""" + queryset = _get_queryset(klass) + try: + return queryset.get(pk=id) + except queryset.model.DoesNotExist: + try: + return queryset.get(absorbed__deleted_id=id) + except queryset.model.DoesNotExist: + pass + + raise Http404(f"No {queryset.model} with ID {id} exists") diff --git a/bookwyrm/views/reading.py b/bookwyrm/views/reading.py index 2ce59b096..478d27990 100644 --- a/bookwyrm/views/reading.py +++ b/bookwyrm/views/reading.py @@ -1,4 +1,5 @@ """ the good stuff! the books! """ + import logging from django.contrib.auth.decorators import login_required from django.core.cache import cache @@ -11,6 +12,7 @@ from django.views import View from django.views.decorators.http import require_POST from bookwyrm import forms, models +from bookwyrm.views.helpers import get_mergeable_object_or_404 from bookwyrm.views.shelf.shelf_actions import unshelve from .status import CreateStatus from .helpers import get_edition, handle_reading_status, is_api_request @@ -130,7 +132,7 @@ class ReadThrough(View): def get(self, request, book_id, readthrough_id=None): """standalone form in case of errors""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) form = forms.ReadThroughForm() data = {"form": form, "book": book} if readthrough_id: @@ -152,7 +154,7 @@ class ReadThrough(View): ) form = forms.ReadThroughForm(request.POST) if not form.is_valid(): - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) data = {"form": form, "book": book} if request.POST.get("id"): data["readthrough"] = get_object_or_404( diff --git a/bookwyrm/views/shelf/shelf_actions.py b/bookwyrm/views/shelf/shelf_actions.py index f0f5fa159..d68ea4219 100644 --- a/bookwyrm/views/shelf/shelf_actions.py +++ b/bookwyrm/views/shelf/shelf_actions.py @@ -1,11 +1,12 @@ """ shelf views """ + from django.db import IntegrityError, transaction from django.contrib.auth.decorators import login_required from django.shortcuts import get_object_or_404, redirect from django.views.decorators.http import require_POST from bookwyrm import forms, models -from bookwyrm.views.helpers import redirect_to_referer +from bookwyrm.views.helpers import redirect_to_referer, get_mergeable_object_or_404 @login_required @@ -36,7 +37,7 @@ def delete_shelf(request, shelf_id): @transaction.atomic def shelve(request): """put a book on a user's shelf""" - book = get_object_or_404(models.Edition, id=request.POST.get("book")) + book = get_mergeable_object_or_404(models.Edition, id=request.POST.get("book")) desired_shelf = get_object_or_404( request.user.shelf_set, identifier=request.POST.get("shelf") ) @@ -97,7 +98,7 @@ def shelve(request): def unshelve(request, book_id=False): """remove a book from a user's shelf""" identity = book_id if book_id else request.POST.get("book") - book = get_object_or_404(models.Edition, id=identity) + book = get_mergeable_object_or_404(models.Edition, id=identity) shelf_book = get_object_or_404( models.ShelfBook, book=book, shelf__id=request.POST["shelf"] ) diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py index 34b62d0b4..f2f03405f 100644 --- a/bookwyrm/views/status.py +++ b/bookwyrm/views/status.py @@ -1,4 +1,5 @@ """ what are we here for if not for posting """ + import re import logging @@ -19,6 +20,7 @@ from markdown import markdown from bookwyrm import forms, models from bookwyrm.models.report import DELETE_ITEM from bookwyrm.utils import regex, sanitizer +from bookwyrm.views.helpers import get_mergeable_object_or_404 from .helpers import handle_remote_webfinger, is_api_request from .helpers import load_date_in_user_tz_as_utc, redirect_to_referer @@ -52,7 +54,7 @@ class CreateStatus(View): def get(self, request, status_type): # pylint: disable=unused-argument """compose view (...not used?)""" - book = get_object_or_404(models.Edition, id=request.GET.get("book")) + book = get_mergeable_object_or_404(models.Edition, id=request.GET.get("book")) data = {"book": book} return TemplateResponse(request, "compose.html", data) @@ -98,7 +100,7 @@ class CreateStatus(View): # inspect the text for user tags content = status.content mentions = find_mentions(request.user, content) - for (_, mention_user) in mentions.items(): + for _, mention_user in mentions.items(): # add them to status mentions fk status.mention_users.add(mention_user) content = format_mentions(content, mentions) @@ -109,7 +111,7 @@ class CreateStatus(View): # inspect the text for hashtags hashtags = find_or_create_hashtags(content) - for (_, mention_hashtag) in hashtags.items(): + for _, mention_hashtag in hashtags.items(): # add them to status mentions fk status.mention_hashtags.add(mention_hashtag) content = format_hashtags(content, hashtags) @@ -140,7 +142,7 @@ class CreateStatus(View): def format_mentions(content, mentions): """Detect @mentions and make them links""" - for (mention_text, mention_user) in mentions.items(): + for mention_text, mention_user in mentions.items(): # turn the mention into a link content = re.sub( rf"(? Date: Sat, 2 Mar 2024 11:34:20 +0100 Subject: [PATCH 04/26] BookDataModel.merge_into: return and log absorbed fields --- .../management/commands/deduplicate_book_data.py | 3 ++- bookwyrm/management/merge_command.py | 4 +++- bookwyrm/models/book.py | 12 ++++++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py index d2f4ef936..74475a00b 100644 --- a/bookwyrm/management/commands/deduplicate_book_data.py +++ b/bookwyrm/management/commands/deduplicate_book_data.py @@ -30,7 +30,8 @@ def dedupe_model(model): print(f"merging into {canonical.remote_id} based on {field.name} {value}:") for obj in objs[1:]: print(f"- {obj.remote_id}") - obj.merge_into(canonical) + absorbed_fields = obj.merge_into(canonical) + print(f" absorbed fields: {absorbed_fields}") class Command(BaseCommand): diff --git a/bookwyrm/management/merge_command.py b/bookwyrm/management/merge_command.py index 2f3f90c86..0c464600a 100644 --- a/bookwyrm/management/merge_command.py +++ b/bookwyrm/management/merge_command.py @@ -25,4 +25,6 @@ class MergeCommand(BaseCommand): print("other book doesn’t exist!") return - other.merge_into(canonical) + absorbed_fields = other.merge_into(canonical) + print(f"{other.remote_id} has been merged into {canonical.remote_id}") + print(f"absorbed fields: {absorbed_fields}") diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index c7235a3f5..d7193cbbe 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -2,7 +2,7 @@ from itertools import chain import re -from typing import Any +from typing import Any, Dict from typing_extensions import Self from django.contrib.postgres.search import SearchVectorField @@ -110,12 +110,12 @@ class BookDataModel(ObjectMixin, BookWyrmModel): """only send book data updates to other bookwyrm instances""" super().broadcast(activity, sender, software=software, **kwargs) - def merge_into(self, canonical: Self) -> None: + def merge_into(self, canonical: Self) -> Dict[str, Any]: """merge this entity into another entity""" if canonical.id == self.id: raise ValueError(f"Cannot merge {self} into itself") - canonical.absorb_data_from(self) + absorbed_fields = canonical.absorb_data_from(self) canonical.save() self.merged_model.objects.create(deleted_id=self.id, merged_into=canonical) @@ -147,9 +147,11 @@ class BookDataModel(ObjectMixin, BookWyrmModel): getattr(related_obj, related_field).remove(self) self.delete() + return absorbed_fields - def absorb_data_from(self, other: Self) -> None: + def absorb_data_from(self, other: Self) -> Dict[str, Any]: """fill empty fields with values from another entity""" + absorbed_fields = {} for data_field in self._meta.get_fields(): if not hasattr(data_field, "activitypub_field"): continue @@ -158,6 +160,8 @@ class BookDataModel(ObjectMixin, BookWyrmModel): continue if not getattr(self, data_field.name): setattr(self, data_field.name, data_value) + absorbed_fields[data_field.name] = data_value + return absorbed_fields class MergedBookDataModel(models.Model): From 7fb079cb43ed7260efd796ba65216790814617f4 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Tue, 5 Mar 2024 15:25:35 +0100 Subject: [PATCH 05/26] PartialDate: fix __eq__ method --- bookwyrm/utils/partial_date.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bookwyrm/utils/partial_date.py b/bookwyrm/utils/partial_date.py index 40b89c838..4c9391476 100644 --- a/bookwyrm/utils/partial_date.py +++ b/bookwyrm/utils/partial_date.py @@ -67,6 +67,14 @@ class PartialDate(datetime): # current_timezone and default_timezone. return cls.from_datetime(datetime(year, month, day, tzinfo=_westmost_tz)) + def __eq__(self, other: object) -> bool: + if not isinstance(other, PartialDate): + return NotImplemented + return self.partial_isoformat() == other.partial_isoformat() + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} object: {self.partial_isoformat()}>" + class MonthParts(PartialDate): """a date bound into month precision""" From 6f191acb27a23e330cdadf829efebc61c3809477 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Tue, 5 Mar 2024 15:26:12 +0100 Subject: [PATCH 06/26] BookDataModel: fix absorbing data from array and partial date fields --- bookwyrm/models/book.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index d7193cbbe..607426189 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -155,12 +155,27 @@ class BookDataModel(ObjectMixin, BookWyrmModel): for data_field in self._meta.get_fields(): if not hasattr(data_field, "activitypub_field"): continue - data_value = getattr(other, data_field.name) - if not data_value: + canonical_value = getattr(self, data_field.name) + other_value = getattr(other, data_field.name) + if not other_value: continue - if not getattr(self, data_field.name): - setattr(self, data_field.name, data_value) - absorbed_fields[data_field.name] = data_value + if isinstance(data_field, fields.ArrayField): + if new_values := list(set(other_value) - set(canonical_value)): + # append at the end (in no particular order) + setattr(self, data_field.name, canonical_value + new_values) + absorbed_fields[data_field.name] = new_values + elif isinstance(data_field, fields.PartialDateField): + if ( + (not canonical_value) + or (other_value.has_day and not canonical_value.has_day) + or (other_value.has_month and not canonical_value.has_month) + ): + setattr(self, data_field.name, other_value) + absorbed_fields[data_field.name] = other_value + else: + if not canonical_value: + setattr(self, data_field.name, other_value) + absorbed_fields[data_field.name] = other_value return absorbed_fields From fb82c7a579752e68c098e31307aad22e5e07ce4c Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Tue, 5 Mar 2024 15:26:23 +0100 Subject: [PATCH 07/26] Add test for merging authors --- bookwyrm/tests/test_merge.py | 97 ++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 bookwyrm/tests/test_merge.py diff --git a/bookwyrm/tests/test_merge.py b/bookwyrm/tests/test_merge.py new file mode 100644 index 000000000..933751832 --- /dev/null +++ b/bookwyrm/tests/test_merge.py @@ -0,0 +1,97 @@ +"""test merging Authors, Works and Editions""" + +from django.test import TestCase +from django.test.client import Client + +from bookwyrm import models + + +class MergeBookDataModel(TestCase): + """test merging of subclasses of BookDataModel""" + + @classmethod + def setUpTestData(cls): # pylint: disable=invalid-name + """shared data""" + models.SiteSettings.objects.create() + + cls.jrr_tolkien = models.Author.objects.create( + name="J.R.R. Tolkien", + aliases=["JRR Tolkien", "Tolkien"], + bio="This guy wrote about hobbits and stuff.", + openlibrary_key="OL26320A", + isni="0000000121441970", + ) + cls.jrr_tolkien_2 = models.Author.objects.create( + name="J.R.R. Tolkien", + aliases=["JRR Tolkien", "John Ronald Reuel Tolkien"], + openlibrary_key="OL26320A", + isni="wrong", + wikidata="Q892", + ) + cls.jrr_tolkien_2_id = cls.jrr_tolkien_2.id + + # perform merges + cls.jrr_tolkien_absorbed_fields = cls.jrr_tolkien_2.merge_into(cls.jrr_tolkien) + + def test_merged_author(self): + """verify merged author after merge""" + self.assertEqual(self.jrr_tolkien_2.id, None, msg="duplicate should be deleted") + + def test_canonical_author(self): + """verify canonical author data after merge""" + + self.assertFalse( + self.jrr_tolkien.id is None, msg="canonical should not be deleted" + ) + + # identical in canonical and duplicate; should be unchanged + self.assertEqual(self.jrr_tolkien.name, "J.R.R. Tolkien") + self.assertEqual(self.jrr_tolkien.openlibrary_key, "OL26320A") + + # present in canonical and absent in duplicate; should be unchanged + self.assertEqual( + self.jrr_tolkien.bio, "This guy wrote about hobbits and stuff." + ) + + # absent in canonical and present in duplicate; should be absorbed + self.assertEqual(self.jrr_tolkien.wikidata, "Q892") + + # scalar value that is different in canonical and duplicate; should be unchanged + self.assertEqual(self.jrr_tolkien.isni, "0000000121441970") + + # set value with both matching and non-matching elements; should be the + # union of canonical and duplicate + self.assertEqual( + self.jrr_tolkien.aliases, + [ + "JRR Tolkien", + "Tolkien", + "John Ronald Reuel Tolkien", + ], + ) + + def test_merged_author_redirect(self): + """a web request for a merged author should redirect to the canonical author""" + client = Client() + response = client.get( + f"/author/{self.jrr_tolkien_2_id}/s/jrr-tolkien", follow=True + ) + self.assertEqual(response.redirect_chain, [(self.jrr_tolkien.local_path, 301)]) + + def test_merged_author_activitypub(self): + """an activitypub request for a merged author should return the data for + the canonical author (including the canonical id)""" + client = Client(HTTP_ACCEPT="application/json") + response = client.get(f"/author/{self.jrr_tolkien_2_id}") + self.assertEqual(response.status_code, 200) + self.assertEqual(response.json(), self.jrr_tolkien.to_activity()) + + def test_absorbed_fields(self): + """reported absorbed_fields should be accurate for --dry_run""" + self.assertEqual( + self.jrr_tolkien_absorbed_fields, + { + "aliases": ["John Ronald Reuel Tolkien"], + "wikidata": "Q892", + }, + ) From 4a690e675ae4652b81932eb089949e6d2f13f3ae Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Tue, 5 Mar 2024 17:12:51 +0100 Subject: [PATCH 08/26] BookDataModel: add dry_run argument to merge_into --- .../commands/deduplicate_book_data.py | 23 ++++++++++++++----- bookwyrm/management/merge_command.py | 11 +++++++-- bookwyrm/models/book.py | 19 ++++++++++----- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py index 74475a00b..c2d897ce3 100644 --- a/bookwyrm/management/commands/deduplicate_book_data.py +++ b/bookwyrm/management/commands/deduplicate_book_data.py @@ -6,7 +6,7 @@ from django.db.models import Count from bookwyrm import models -def dedupe_model(model): +def dedupe_model(model, dry_run=False): """combine duplicate editions and update related models""" print(f"deduplicating {model.__name__}:") fields = model._meta.get_fields() @@ -27,10 +27,13 @@ def dedupe_model(model): print("----------") objs = model.objects.filter(**{field.name: value}).order_by("id") canonical = objs.first() - print(f"merging into {canonical.remote_id} based on {field.name} {value}:") + action = "would merge" if dry_run else "merging" + print( + f"{action} into {model.__name__} {canonical.remote_id} based on {field.name} {value}:" + ) for obj in objs[1:]: print(f"- {obj.remote_id}") - absorbed_fields = obj.merge_into(canonical) + absorbed_fields = obj.merge_into(canonical, dry_run=dry_run) print(f" absorbed fields: {absorbed_fields}") @@ -39,9 +42,17 @@ class Command(BaseCommand): help = "merges duplicate book data" + def add_arguments(self, parser): + """add the arguments for this command""" + parser.add_argument( + "--dry_run", + action="store_true", + help="don't actually merge, only print what would happen", + ) + # pylint: disable=no-self-use,unused-argument def handle(self, *args, **options): """run deduplications""" - dedupe_model(models.Edition) - dedupe_model(models.Work) - dedupe_model(models.Author) + dedupe_model(models.Edition, dry_run=options["dry_run"]) + dedupe_model(models.Work, dry_run=options["dry_run"]) + dedupe_model(models.Author, dry_run=options["dry_run"]) diff --git a/bookwyrm/management/merge_command.py b/bookwyrm/management/merge_command.py index 0c464600a..66e60814a 100644 --- a/bookwyrm/management/merge_command.py +++ b/bookwyrm/management/merge_command.py @@ -8,6 +8,11 @@ class MergeCommand(BaseCommand): """add the arguments for this command""" parser.add_argument("--canonical", type=int, required=True) parser.add_argument("--other", type=int, required=True) + parser.add_argument( + "--dry_run", + action="store_true", + help="don't actually merge, only print what would happen", + ) # pylint: disable=no-self-use,unused-argument def handle(self, *args, **options): @@ -25,6 +30,8 @@ class MergeCommand(BaseCommand): print("other book doesn’t exist!") return - absorbed_fields = other.merge_into(canonical) - print(f"{other.remote_id} has been merged into {canonical.remote_id}") + absorbed_fields = other.merge_into(canonical, dry_run=options["dry_run"]) + + action = "would be" if options["dry_run"] else "has been" + print(f"{other.remote_id} {action} merged into {canonical.remote_id}") print(f"absorbed fields: {absorbed_fields}") diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 607426189..5e46a3245 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -110,12 +110,16 @@ class BookDataModel(ObjectMixin, BookWyrmModel): """only send book data updates to other bookwyrm instances""" super().broadcast(activity, sender, software=software, **kwargs) - def merge_into(self, canonical: Self) -> Dict[str, Any]: + def merge_into(self, canonical: Self, dry_run=False) -> Dict[str, Any]: """merge this entity into another entity""" if canonical.id == self.id: raise ValueError(f"Cannot merge {self} into itself") - absorbed_fields = canonical.absorb_data_from(self) + absorbed_fields = canonical.absorb_data_from(self, dry_run=dry_run) + + if dry_run: + return absorbed_fields + canonical.save() self.merged_model.objects.create(deleted_id=self.id, merged_into=canonical) @@ -149,7 +153,7 @@ class BookDataModel(ObjectMixin, BookWyrmModel): self.delete() return absorbed_fields - def absorb_data_from(self, other: Self) -> Dict[str, Any]: + def absorb_data_from(self, other: Self, dry_run=False) -> Dict[str, Any]: """fill empty fields with values from another entity""" absorbed_fields = {} for data_field in self._meta.get_fields(): @@ -162,7 +166,8 @@ class BookDataModel(ObjectMixin, BookWyrmModel): if isinstance(data_field, fields.ArrayField): if new_values := list(set(other_value) - set(canonical_value)): # append at the end (in no particular order) - setattr(self, data_field.name, canonical_value + new_values) + if not dry_run: + setattr(self, data_field.name, canonical_value + new_values) absorbed_fields[data_field.name] = new_values elif isinstance(data_field, fields.PartialDateField): if ( @@ -170,11 +175,13 @@ class BookDataModel(ObjectMixin, BookWyrmModel): or (other_value.has_day and not canonical_value.has_day) or (other_value.has_month and not canonical_value.has_month) ): - setattr(self, data_field.name, other_value) + if not dry_run: + setattr(self, data_field.name, other_value) absorbed_fields[data_field.name] = other_value else: if not canonical_value: - setattr(self, data_field.name, other_value) + if not dry_run: + setattr(self, data_field.name, other_value) absorbed_fields[data_field.name] = other_value return absorbed_fields From 2272e7a326c751f55d4c37150fee8b346218e501 Mon Sep 17 00:00:00 2001 From: Anthony Date: Fri, 29 Mar 2024 12:07:52 +0100 Subject: [PATCH 09/26] flower 2.0.1 fixes a few link bugs (particularly for favicon) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6b3d838bf..81975fc4d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ django-sass-processor==1.2.2 django-storages==1.13.2 django-storages[azure] environs==9.5.0 -flower==2.0.0 +flower==2.0.1 grpcio==1.57.0 # Not a direct dependency, pinned to get a security fix libsass==0.22.0 Markdown==3.4.1 From 75bc4f8cb0d62acbdafe33d4a56d724da9909393 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Fri, 29 Mar 2024 15:04:38 +0100 Subject: [PATCH 10/26] Make nginx config safer Instead of allowing all image files anywhere, and disallowing non-image file under /images/, only allow image files under /images/ and don't match non-image files elsewhere. They get proxied to web instead and result in a 404 there. For example, the old config allowed /exports/foo.jpg to be served, while the new config does not. --- nginx/development | 19 +++++++++---------- nginx/production | 21 ++++++++++----------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/nginx/development b/nginx/development index 2c3a1d02f..f7443968c 100644 --- a/nginx/development +++ b/nginx/development @@ -64,7 +64,7 @@ server { # directly serve static files from the # bookwyrm filesystem using sendfile. # make the logs quieter by not reporting these requests - location ~ ^/static/ { + location /static/ { root /app; try_files $uri =404; add_header X-Cache-Status STATIC; @@ -72,15 +72,14 @@ server { } # same with image files not in static folder - location ~ \.(bmp|ico|jpg|jpeg|png|svg|tif|tiff|webp)$ { - root /app; - try_files $uri =404; - add_header X-Cache-Status STATIC; - access_log off; - } - - # block access to any non-image files from images - location ~ ^/images/ { + location /images/ { + location ~ \.(bmp|ico|jpg|jpeg|png|svg|tif|tiff|webp)$ { + root /app; + try_files $uri =404; + add_header X-Cache-Status STATIC; + access_log off; + } + # block access to any non-image files from images return 403; } diff --git a/nginx/production b/nginx/production index 841ed8afa..a5e910b4b 100644 --- a/nginx/production +++ b/nginx/production @@ -96,23 +96,22 @@ server { # # directly serve static files from the # # bookwyrm filesystem using sendfile. # # make the logs quieter by not reporting these requests -# location ~ ^/static/ { +# location /static/ { # root /app; # try_files $uri =404; # add_header X-Cache-Status STATIC; # access_log off; # } - +# # # same with image files not in static folder -# location ~ \.(bmp|ico|jpg|jpeg|png|svg|tif|tiff|webp)$ { -# root /app; -# try_files $uri =404; -# add_header X-Cache-Status STATIC; -# access_log off; -# } - -# # block access to any non-image files from images -# location ~ ^/images/ { +# location /images/ { +# location ~ \.(bmp|ico|jpg|jpeg|png|svg|tif|tiff|webp)$ { +# root /app; +# try_files $uri =404; +# add_header X-Cache-Status STATIC; +# access_log off; +# } +# # block access to any non-image files from images # return 403; # } # From ffee29d8e2d5c8c430520ac666b5083c75b16499 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Fri, 29 Mar 2024 15:45:35 +0100 Subject: [PATCH 11/26] Fix resource leaks Rewrite places where files (or other resources) are opened but not closed to "with" blocks, which automatically call close() at the end of the scope. Also simplify some tests where images need to be saved to a model field: an opened file can be passed directly to FileField.save(). --- bookwyrm/preview_images.py | 48 +++++++++--------- bookwyrm/settings.py | 1 - .../tests/activitypub/test_base_activity.py | 10 ++-- .../tests/importers/test_calibre_import.py | 6 ++- .../tests/importers/test_goodreads_import.py | 6 ++- bookwyrm/tests/importers/test_importer.py | 6 ++- .../importers/test_librarything_import.py | 6 ++- .../importers/test_openlibrary_import.py | 6 ++- .../tests/importers/test_storygraph_import.py | 6 ++- bookwyrm/tests/models/test_book_model.py | 11 ++--- bookwyrm/tests/models/test_fields.py | 49 +++++++------------ bookwyrm/tests/models/test_status_model.py | 15 +++--- bookwyrm/tests/test_preview_images.py | 17 ++++--- bookwyrm/tests/views/admin/test_federation.py | 8 +-- bookwyrm/tests/views/books/test_book.py | 30 +++++------- bookwyrm/tests/views/imports/test_import.py | 14 +++--- .../tests/views/imports/test_user_import.py | 14 +++--- .../tests/views/preferences/test_edit_user.py | 20 ++++---- bookwyrm/tests/views/test_feed.py | 11 ++--- 19 files changed, 135 insertions(+), 149 deletions(-) diff --git a/bookwyrm/preview_images.py b/bookwyrm/preview_images.py index 995f25bfd..a213490ab 100644 --- a/bookwyrm/preview_images.py +++ b/bookwyrm/preview_images.py @@ -175,11 +175,13 @@ def generate_instance_layer(content_width): site = models.SiteSettings.objects.get() if site.logo_small: - logo_img = Image.open(site.logo_small) + with Image.open(site.logo_small) as logo_img: + logo_img.load() else: try: static_path = os.path.join(settings.STATIC_ROOT, "images/logo-small.png") - logo_img = Image.open(static_path) + with Image.open(static_path) as logo_img: + logo_img.load() except FileNotFoundError: logo_img = None @@ -211,18 +213,9 @@ def generate_instance_layer(content_width): def generate_rating_layer(rating, content_width): """Places components for rating preview""" - try: - icon_star_full = Image.open( - os.path.join(settings.STATIC_ROOT, "images/icons/star-full.png") - ) - icon_star_empty = Image.open( - os.path.join(settings.STATIC_ROOT, "images/icons/star-empty.png") - ) - icon_star_half = Image.open( - os.path.join(settings.STATIC_ROOT, "images/icons/star-half.png") - ) - except FileNotFoundError: - return None + path_star_full = os.path.join(settings.STATIC_ROOT, "images/icons/star-full.png") + path_star_empty = os.path.join(settings.STATIC_ROOT, "images/icons/star-empty.png") + path_star_half = os.path.join(settings.STATIC_ROOT, "images/icons/star-half.png") icon_size = 64 icon_margin = 10 @@ -237,17 +230,23 @@ def generate_rating_layer(rating, content_width): position_x = 0 - for _ in range(math.floor(rating)): - rating_layer_mask.alpha_composite(icon_star_full, (position_x, 0)) - position_x = position_x + icon_size + icon_margin + try: + with Image.open(path_star_full) as icon_star_full: + for _ in range(math.floor(rating)): + rating_layer_mask.alpha_composite(icon_star_full, (position_x, 0)) + position_x = position_x + icon_size + icon_margin - if math.floor(rating) != math.ceil(rating): - rating_layer_mask.alpha_composite(icon_star_half, (position_x, 0)) - position_x = position_x + icon_size + icon_margin + if math.floor(rating) != math.ceil(rating): + with Image.open(path_star_half) as icon_star_half: + rating_layer_mask.alpha_composite(icon_star_half, (position_x, 0)) + position_x = position_x + icon_size + icon_margin - for _ in range(5 - math.ceil(rating)): - rating_layer_mask.alpha_composite(icon_star_empty, (position_x, 0)) - position_x = position_x + icon_size + icon_margin + with Image.open(path_star_empty) as icon_star_empty: + for _ in range(5 - math.ceil(rating)): + rating_layer_mask.alpha_composite(icon_star_empty, (position_x, 0)) + position_x = position_x + icon_size + icon_margin + except FileNotFoundError: + return None rating_layer_mask = rating_layer_mask.getchannel("A") rating_layer_mask = ImageOps.invert(rating_layer_mask) @@ -290,7 +289,8 @@ def generate_preview_image( texts = texts or {} # Cover try: - inner_img_layer = Image.open(picture) + with Image.open(picture) as inner_img_layer: + inner_img_layer.load() inner_img_layer.thumbnail( (inner_img_width, inner_img_height), Image.Resampling.LANCZOS ) diff --git a/bookwyrm/settings.py b/bookwyrm/settings.py index 77bec0d8e..28d78a3da 100644 --- a/bookwyrm/settings.py +++ b/bookwyrm/settings.py @@ -19,7 +19,6 @@ DOMAIN = env("DOMAIN") with open("VERSION", encoding="utf-8") as f: version = f.read() version = version.replace("\n", "") -f.close() VERSION = version diff --git a/bookwyrm/tests/activitypub/test_base_activity.py b/bookwyrm/tests/activitypub/test_base_activity.py index b529f6ae5..a545beb3e 100644 --- a/bookwyrm/tests/activitypub/test_base_activity.py +++ b/bookwyrm/tests/activitypub/test_base_activity.py @@ -1,12 +1,10 @@ """ tests the base functionality for activitypub dataclasses """ -from io import BytesIO import json import pathlib from unittest.mock import patch from dataclasses import dataclass from django.test import TestCase -from PIL import Image import responses from bookwyrm import activitypub @@ -48,13 +46,11 @@ class BaseActivity(TestCase): # don't try to load the user icon del self.userdata["icon"] - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../../static/images/default_avi.jpg" ) - image = Image.open(image_file) - output = BytesIO() - image.save(output, format=image.format) - self.image_data = output.getvalue() + with open(image_path, "rb") as image_file: + self.image_data = image_file.read() def test_get_representative_not_existing(self, *_): """test that an instance representative actor is created if it does not exist""" diff --git a/bookwyrm/tests/importers/test_calibre_import.py b/bookwyrm/tests/importers/test_calibre_import.py index dcbe68c64..d7947e65e 100644 --- a/bookwyrm/tests/importers/test_calibre_import.py +++ b/bookwyrm/tests/importers/test_calibre_import.py @@ -9,7 +9,6 @@ from bookwyrm.importers import CalibreImporter from bookwyrm.models.import_job import handle_imported_book -# pylint: disable=consider-using-with @patch("bookwyrm.suggested_users.rerank_suggestions_task.delay") @patch("bookwyrm.activitystreams.populate_stream_task.delay") @patch("bookwyrm.activitystreams.add_book_statuses_task.delay") @@ -20,8 +19,13 @@ class CalibreImport(TestCase): """use a test csv""" self.importer = CalibreImporter() datafile = pathlib.Path(__file__).parent.joinpath("../data/calibre.csv") + # pylint: disable-next=consider-using-with self.csv = open(datafile, "r", encoding=self.importer.encoding) + def tearDown(self): + """close test csv""" + self.csv.close() + @classmethod def setUpTestData(cls): """populate database""" diff --git a/bookwyrm/tests/importers/test_goodreads_import.py b/bookwyrm/tests/importers/test_goodreads_import.py index f0b67cffd..79d58085c 100644 --- a/bookwyrm/tests/importers/test_goodreads_import.py +++ b/bookwyrm/tests/importers/test_goodreads_import.py @@ -16,7 +16,6 @@ def make_date(*args): return datetime.datetime(*args, tzinfo=pytz.UTC) -# pylint: disable=consider-using-with @patch("bookwyrm.suggested_users.rerank_suggestions_task.delay") @patch("bookwyrm.activitystreams.populate_stream_task.delay") @patch("bookwyrm.activitystreams.add_book_statuses_task.delay") @@ -27,8 +26,13 @@ class GoodreadsImport(TestCase): """use a test csv""" self.importer = GoodreadsImporter() datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv") + # pylint: disable-next=consider-using-with self.csv = open(datafile, "r", encoding=self.importer.encoding) + def tearDown(self): + """close test csv""" + self.csv.close() + @classmethod def setUpTestData(cls): """populate database""" diff --git a/bookwyrm/tests/importers/test_importer.py b/bookwyrm/tests/importers/test_importer.py index ea10d0f53..39aac22ff 100644 --- a/bookwyrm/tests/importers/test_importer.py +++ b/bookwyrm/tests/importers/test_importer.py @@ -19,7 +19,6 @@ def make_date(*args): return datetime.datetime(*args, tzinfo=pytz.UTC) -# pylint: disable=consider-using-with @patch("bookwyrm.suggested_users.rerank_suggestions_task.delay") @patch("bookwyrm.activitystreams.populate_stream_task.delay") @patch("bookwyrm.activitystreams.add_book_statuses_task.delay") @@ -30,8 +29,13 @@ class GenericImporter(TestCase): """use a test csv""" self.importer = Importer() datafile = pathlib.Path(__file__).parent.joinpath("../data/generic.csv") + # pylint: disable-next=consider-using-with self.csv = open(datafile, "r", encoding=self.importer.encoding) + def tearDown(self): + """close test csv""" + self.csv.close() + @classmethod def setUpTestData(cls): """populate database""" diff --git a/bookwyrm/tests/importers/test_librarything_import.py b/bookwyrm/tests/importers/test_librarything_import.py index 280131115..4d78d242a 100644 --- a/bookwyrm/tests/importers/test_librarything_import.py +++ b/bookwyrm/tests/importers/test_librarything_import.py @@ -16,7 +16,6 @@ def make_date(*args): return datetime.datetime(*args, tzinfo=pytz.UTC) -# pylint: disable=consider-using-with @patch("bookwyrm.suggested_users.rerank_suggestions_task.delay") @patch("bookwyrm.activitystreams.populate_stream_task.delay") @patch("bookwyrm.activitystreams.add_book_statuses_task.delay") @@ -29,8 +28,13 @@ class LibrarythingImport(TestCase): datafile = pathlib.Path(__file__).parent.joinpath("../data/librarything.tsv") # Librarything generates latin encoded exports... + # pylint: disable-next=consider-using-with self.csv = open(datafile, "r", encoding=self.importer.encoding) + def tearDown(self): + """close test csv""" + self.csv.close() + @classmethod def setUpTestData(cls): """populate database""" diff --git a/bookwyrm/tests/importers/test_openlibrary_import.py b/bookwyrm/tests/importers/test_openlibrary_import.py index 05bab0cc2..8f2f120ff 100644 --- a/bookwyrm/tests/importers/test_openlibrary_import.py +++ b/bookwyrm/tests/importers/test_openlibrary_import.py @@ -16,7 +16,6 @@ def make_date(*args): return datetime.datetime(*args, tzinfo=pytz.UTC) -# pylint: disable=consider-using-with @patch("bookwyrm.suggested_users.rerank_suggestions_task.delay") @patch("bookwyrm.activitystreams.populate_stream_task.delay") @patch("bookwyrm.activitystreams.add_book_statuses_task.delay") @@ -27,8 +26,13 @@ class OpenLibraryImport(TestCase): """use a test csv""" self.importer = OpenLibraryImporter() datafile = pathlib.Path(__file__).parent.joinpath("../data/openlibrary.csv") + # pylint: disable-next=consider-using-with self.csv = open(datafile, "r", encoding=self.importer.encoding) + def tearDown(self): + """close test csv""" + self.csv.close() + @classmethod def setUpTestData(cls): """populate database""" diff --git a/bookwyrm/tests/importers/test_storygraph_import.py b/bookwyrm/tests/importers/test_storygraph_import.py index eee27010c..3de2b13a0 100644 --- a/bookwyrm/tests/importers/test_storygraph_import.py +++ b/bookwyrm/tests/importers/test_storygraph_import.py @@ -16,7 +16,6 @@ def make_date(*args): return datetime.datetime(*args, tzinfo=pytz.UTC) -# pylint: disable=consider-using-with @patch("bookwyrm.suggested_users.rerank_suggestions_task.delay") @patch("bookwyrm.activitystreams.populate_stream_task.delay") @patch("bookwyrm.activitystreams.add_book_statuses_task.delay") @@ -27,8 +26,13 @@ class StorygraphImport(TestCase): """use a test csv""" self.importer = StorygraphImporter() datafile = pathlib.Path(__file__).parent.joinpath("../data/storygraph.csv") + # pylint: disable-next=consider-using-with self.csv = open(datafile, "r", encoding=self.importer.encoding) + def tearDown(self): + """close test csv""" + self.csv.close() + @classmethod def setUpTestData(cls): """populate database""" diff --git a/bookwyrm/tests/models/test_book_model.py b/bookwyrm/tests/models/test_book_model.py index c40c94294..5b2b71ba9 100644 --- a/bookwyrm/tests/models/test_book_model.py +++ b/bookwyrm/tests/models/test_book_model.py @@ -1,12 +1,9 @@ """ testing models """ -from io import BytesIO import pathlib import pytest from dateutil.parser import parse -from PIL import Image -from django.core.files.base import ContentFile from django.test import TestCase from django.utils import timezone @@ -130,15 +127,13 @@ class Book(TestCase): ) def test_thumbnail_fields(self): """Just hit them""" - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../../static/images/default_avi.jpg" ) - image = Image.open(image_file) - output = BytesIO() - image.save(output, format=image.format) book = models.Edition.objects.create(title="hello") - book.cover.save("test.jpg", ContentFile(output.getvalue())) + with open(image_path, "rb") as image_file: + book.cover.save("test.jpg", image_file) self.assertIsNotNone(book.cover_bw_book_xsmall_webp.url) self.assertIsNotNone(book.cover_bw_book_xsmall_jpg.url) diff --git a/bookwyrm/tests/models/test_fields.py b/bookwyrm/tests/models/test_fields.py index cc8c54113..2917c8908 100644 --- a/bookwyrm/tests/models/test_fields.py +++ b/bookwyrm/tests/models/test_fields.py @@ -1,5 +1,4 @@ """ testing models """ -from io import BytesIO from collections import namedtuple from dataclasses import dataclass import datetime @@ -10,7 +9,6 @@ from typing import List from unittest import expectedFailure from unittest.mock import patch -from PIL import Image import responses from django.core.exceptions import ValidationError @@ -420,13 +418,11 @@ class ModelFields(TestCase): user = User.objects.create_user( "mouse", "mouse@mouse.mouse", "mouseword", local=True, localname="mouse" ) - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../../static/images/default_avi.jpg" ) - image = Image.open(image_file) - output = BytesIO() - image.save(output, format=image.format) - user.avatar.save("test.jpg", ContentFile(output.getvalue())) + with open(image_path, "rb") as image_file: + user.avatar.save("test.jpg", image_file) instance = fields.ImageField() @@ -516,30 +512,25 @@ class ModelFields(TestCase): @responses.activate def test_image_field_set_field_from_activity_no_overwrite_with_cover(self, *_): """update a model instance from an activitypub object""" - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../../static/images/default_avi.jpg" ) - image = Image.open(image_file) - output = BytesIO() - image.save(output, format=image.format) - - another_image_file = pathlib.Path(__file__).parent.joinpath( + another_image_path = pathlib.Path(__file__).parent.joinpath( "../../static/images/logo.png" ) - another_image = Image.open(another_image_file) - another_output = BytesIO() - another_image.save(another_output, format=another_image.format) instance = fields.ImageField(activitypub_field="cover", name="cover") - responses.add( - responses.GET, - "http://www.example.com/image.jpg", - body=another_image.tobytes(), - status=200, - ) + with open(another_image_path, "rb") as another_image_file: + responses.add( + responses.GET, + "http://www.example.com/image.jpg", + body=another_image_file.read(), + status=200, + ) book = Edition.objects.create(title="hello") - book.cover.save("test.jpg", ContentFile(output.getvalue())) + with open(image_path, "rb") as image_file: + book.cover.save("test.jpg", image_file) cover_size = book.cover.size self.assertIsNotNone(cover_size) @@ -553,24 +544,22 @@ class ModelFields(TestCase): @responses.activate def test_image_field_set_field_from_activity_with_overwrite_with_cover(self, *_): """update a model instance from an activitypub object""" - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../../static/images/default_avi.jpg" ) - image = Image.open(image_file) - output = BytesIO() - image.save(output, format=image.format) book = Edition.objects.create(title="hello") - book.cover.save("test.jpg", ContentFile(output.getvalue())) + with open(image_path, "rb") as image_file: + book.cover.save("test.jpg", image_file) cover_size = book.cover.size self.assertIsNotNone(cover_size) - another_image_file = pathlib.Path(__file__).parent.joinpath( + another_image_path = pathlib.Path(__file__).parent.joinpath( "../../static/images/logo.png" ) instance = fields.ImageField(activitypub_field="cover", name="cover") - with open(another_image_file, "rb") as another_image: + with open(another_image_path, "rb") as another_image: responses.add( responses.GET, "http://www.example.com/image.jpg", diff --git a/bookwyrm/tests/models/test_status_model.py b/bookwyrm/tests/models/test_status_model.py index bd2853595..e97febbfa 100644 --- a/bookwyrm/tests/models/test_status_model.py +++ b/bookwyrm/tests/models/test_status_model.py @@ -1,16 +1,13 @@ """ testing models """ from unittest.mock import patch -from io import BytesIO import pathlib import re from django.http import Http404 -from django.core.files.base import ContentFile from django.db import IntegrityError from django.contrib.auth.models import AnonymousUser from django.test import TestCase from django.utils import timezone -from PIL import Image import responses from bookwyrm import activitypub, models, settings @@ -51,14 +48,14 @@ class Status(TestCase): """individual test setup""" self.anonymous_user = AnonymousUser self.anonymous_user.is_authenticated = False - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../../static/images/default_avi.jpg" ) - image = Image.open(image_file) - output = BytesIO() - with patch("bookwyrm.models.Status.broadcast"): - image.save(output, format=image.format) - self.book.cover.save("test.jpg", ContentFile(output.getvalue())) + with ( + patch("bookwyrm.models.Status.broadcast"), + open(image_path, "rb") as image_file, + ): + self.book.cover.save("test.jpg", image_file) def test_status_generated_fields(self, *_): """setting remote id""" diff --git a/bookwyrm/tests/test_preview_images.py b/bookwyrm/tests/test_preview_images.py index 4f711b38b..12fb56d07 100644 --- a/bookwyrm/tests/test_preview_images.py +++ b/bookwyrm/tests/test_preview_images.py @@ -21,20 +21,20 @@ from bookwyrm.preview_images import ( # pylint: disable=unused-argument # pylint: disable=missing-function-docstring -# pylint: disable=consider-using-with class PreviewImages(TestCase): """every response to a get request, html or json""" def setUp(self): """we need basic test data and mocks""" self.factory = RequestFactory() - avatar_file = pathlib.Path(__file__).parent.joinpath( + avatar_path = pathlib.Path(__file__).parent.joinpath( "../static/images/no_cover.jpg" ) with ( patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch("bookwyrm.activitystreams.populate_stream_task.delay"), patch("bookwyrm.lists_stream.populate_lists_task.delay"), + open(avatar_path, "rb") as avatar_file, ): self.local_user = models.User.objects.create_user( "possum@local.com", @@ -43,8 +43,8 @@ class PreviewImages(TestCase): local=True, localname="possum", avatar=SimpleUploadedFile( - avatar_file, - open(avatar_file, "rb").read(), + avatar_path, + avatar_file.read(), content_type="image/jpeg", ), ) @@ -68,6 +68,7 @@ class PreviewImages(TestCase): patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch("bookwyrm.activitystreams.populate_stream_task.delay"), patch("bookwyrm.lists_stream.populate_lists_task.delay"), + open(avatar_path, "rb") as avatar_file, ): self.remote_user_with_preview = models.User.objects.create_user( "badger@your.domain.here", @@ -78,8 +79,8 @@ class PreviewImages(TestCase): inbox="https://example.com/users/badger/inbox", outbox="https://example.com/users/badger/outbox", avatar=SimpleUploadedFile( - avatar_file, - open(avatar_file, "rb").read(), + avatar_path, + avatar_file.read(), content_type="image/jpeg", ), ) @@ -96,7 +97,7 @@ class PreviewImages(TestCase): settings.ENABLE_PREVIEW_IMAGES = True def test_generate_preview_image(self, *args, **kwargs): - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../static/images/no_cover.jpg" ) @@ -105,7 +106,7 @@ class PreviewImages(TestCase): "text_three": "@possum@local.com", } - result = generate_preview_image(texts=texts, picture=image_file, rating=5) + result = generate_preview_image(texts=texts, picture=image_path, rating=5) self.assertIsInstance(result, Image.Image) self.assertEqual( result.size, (settings.PREVIEW_IMG_WIDTH, settings.PREVIEW_IMG_HEIGHT) diff --git a/bookwyrm/tests/views/admin/test_federation.py b/bookwyrm/tests/views/admin/test_federation.py index 1d0012dde..6dcd5535f 100644 --- a/bookwyrm/tests/views/admin/test_federation.py +++ b/bookwyrm/tests/views/admin/test_federation.py @@ -1,5 +1,4 @@ """ test for app action functionality """ -import os import json from unittest.mock import patch @@ -179,7 +178,6 @@ class FederationViews(TestCase): self.assertEqual(server.application_type, "coolsoft") self.assertEqual(server.status, "blocked") - # pylint: disable=consider-using-with def test_import_blocklist(self): """load a json file with a list of servers to block""" server = models.FederatedServer.objects.create(server_name="hi.there.com") @@ -191,14 +189,13 @@ class FederationViews(TestCase): {"instance": "hi.there.com", "url": "https://explanation.url"}, # existing {"a": "b"}, # invalid ] - json.dump(data, open("file.json", "w")) # pylint: disable=unspecified-encoding view = views.ImportServerBlocklist.as_view() request = self.factory.post( "", { "json_file": SimpleUploadedFile( - "file.json", open("file.json", "rb").read() + "file.json", json.dumps(data).encode("utf-8") ) }, ) @@ -214,6 +211,3 @@ class FederationViews(TestCase): created = models.FederatedServer.objects.get(server_name="server.name") self.assertEqual(created.status, "blocked") self.assertEqual(created.notes, "https://explanation.url") - - # remove file.json after test - os.remove("file.json") diff --git a/bookwyrm/tests/views/books/test_book.py b/bookwyrm/tests/views/books/test_book.py index cb66811a1..ee6e7d8b4 100644 --- a/bookwyrm/tests/views/books/test_book.py +++ b/bookwyrm/tests/views/books/test_book.py @@ -1,8 +1,6 @@ """ test for app action functionality """ -from io import BytesIO import pathlib from unittest.mock import patch -from PIL import Image import responses @@ -161,15 +159,15 @@ class BookViews(TestCase): def test_upload_cover_file(self): """add a cover via file upload""" self.assertFalse(self.book.cover) - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../../../static/images/default_avi.jpg" ) form = forms.CoverForm(instance=self.book) - # pylint: disable=consider-using-with - form.data["cover"] = SimpleUploadedFile( - image_file, open(image_file, "rb").read(), content_type="image/jpeg" - ) + with open(image_path, "rb") as image_file: + form.data["cover"] = SimpleUploadedFile( + image_path, image_file.read(), content_type="image/jpeg" + ) request = self.factory.post("", form.data) request.user = self.local_user @@ -296,16 +294,14 @@ class BookViews(TestCase): def _setup_cover_url(): """creates cover url mock""" cover_url = "http://example.com" - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../../../static/images/default_avi.jpg" ) - image = Image.open(image_file) - output = BytesIO() - image.save(output, format=image.format) - responses.add( - responses.GET, - cover_url, - body=output.getvalue(), - status=200, - ) + with open(image_path, "rb") as image_file: + responses.add( + responses.GET, + cover_url, + body=image_file.read(), + status=200, + ) return cover_url diff --git a/bookwyrm/tests/views/imports/test_import.py b/bookwyrm/tests/views/imports/test_import.py index 658d95a33..f694b7bf5 100644 --- a/bookwyrm/tests/views/imports/test_import.py +++ b/bookwyrm/tests/views/imports/test_import.py @@ -81,13 +81,13 @@ class ImportViews(TestCase): form.data["source"] = "Goodreads" form.data["privacy"] = "public" form.data["include_reviews"] = False - csv_file = pathlib.Path(__file__).parent.joinpath("../../data/goodreads.csv") - form.data["csv_file"] = SimpleUploadedFile( - # pylint: disable=consider-using-with - csv_file, - open(csv_file, "rb").read(), - content_type="text/csv", - ) + csv_path = pathlib.Path(__file__).parent.joinpath("../../data/goodreads.csv") + with open(csv_path, "rb") as csv_file: + form.data["csv_file"] = SimpleUploadedFile( + csv_path, + csv_file.read(), + content_type="text/csv", + ) request = self.factory.post("", form.data) request.user = self.local_user diff --git a/bookwyrm/tests/views/imports/test_user_import.py b/bookwyrm/tests/views/imports/test_user_import.py index 4a676a57f..a8214e74e 100644 --- a/bookwyrm/tests/views/imports/test_user_import.py +++ b/bookwyrm/tests/views/imports/test_user_import.py @@ -47,16 +47,16 @@ class ImportUserViews(TestCase): view = views.UserImport.as_view() form = forms.ImportUserForm() - archive_file = pathlib.Path(__file__).parent.joinpath( + archive_path = pathlib.Path(__file__).parent.joinpath( "../../data/bookwyrm_account_export.tar.gz" ) - form.data["archive_file"] = SimpleUploadedFile( - # pylint: disable=consider-using-with - archive_file, - open(archive_file, "rb").read(), - content_type="application/gzip", - ) + with open(archive_path, "rb") as archive_file: + form.data["archive_file"] = SimpleUploadedFile( + archive_path, + archive_file.read(), + content_type="application/gzip", + ) form.data["include_user_settings"] = "" form.data["include_goals"] = "on" diff --git a/bookwyrm/tests/views/preferences/test_edit_user.py b/bookwyrm/tests/views/preferences/test_edit_user.py index 7872e8f6e..c31c8237e 100644 --- a/bookwyrm/tests/views/preferences/test_edit_user.py +++ b/bookwyrm/tests/views/preferences/test_edit_user.py @@ -96,13 +96,13 @@ class EditUserViews(TestCase): form.data["email"] = "wow@email.com" form.data["default_post_privacy"] = "public" form.data["preferred_timezone"] = "UTC" - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../../../static/images/no_cover.jpg" ) - # pylint: disable=consider-using-with - form.data["avatar"] = SimpleUploadedFile( - image_file, open(image_file, "rb").read(), content_type="image/jpeg" - ) + with open(image_path, "rb") as image_file: + form.data["avatar"] = SimpleUploadedFile( + image_path, image_file.read(), content_type="image/jpeg" + ) request = self.factory.post("", form.data) request.user = self.local_user @@ -119,12 +119,12 @@ class EditUserViews(TestCase): def test_crop_avatar(self, _): """reduce that image size""" - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../../../static/images/no_cover.jpg" ) - image = Image.open(image_file) - result = views.preferences.edit_user.crop_avatar(image) + with Image.open(image_path) as image: + result = views.preferences.edit_user.crop_avatar(image) self.assertIsInstance(result, ContentFile) - image_result = Image.open(result) - self.assertEqual(image_result.size, (120, 120)) + with Image.open(result) as image_result: + self.assertEqual(image_result.size, (120, 120)) diff --git a/bookwyrm/tests/views/test_feed.py b/bookwyrm/tests/views/test_feed.py index a57be1023..be4956c64 100644 --- a/bookwyrm/tests/views/test_feed.py +++ b/bookwyrm/tests/views/test_feed.py @@ -1,10 +1,7 @@ """ test for app action functionality """ -from io import BytesIO from unittest.mock import patch import pathlib -from PIL import Image -from django.core.files.base import ContentFile from django.http import Http404 from django.template.response import TemplateResponse from django.test import TestCase @@ -142,12 +139,9 @@ class FeedViews(TestCase): """there are so many views, this just makes sure it LOADS""" view = views.Status.as_view() - image_file = pathlib.Path(__file__).parent.joinpath( + image_path = pathlib.Path(__file__).parent.joinpath( "../../static/images/default_avi.jpg" ) - image = Image.open(image_file) - output = BytesIO() - image.save(output, format=image.format) with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): status = models.Review.objects.create( content="hi", @@ -157,7 +151,8 @@ class FeedViews(TestCase): attachment = models.Image.objects.create( status=status, caption="alt text here" ) - attachment.image.save("test.jpg", ContentFile(output.getvalue())) + with open(image_path, "rb") as image_file: + attachment.image.save("test.jpg", image_file) request = self.factory.get("") request.user = self.local_user From fcd0087589d5467eceba4b9fce75e9f52fa9e90d Mon Sep 17 00:00:00 2001 From: Holger Brunn Date: Sat, 30 Mar 2024 01:58:41 +0100 Subject: [PATCH 12/26] [FIX] make sure to get Pillow>=10 compatible pilkit --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 81975fc4d..5bb33ae0e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,6 +27,7 @@ opentelemetry-instrumentation-django==0.37b0 opentelemetry-instrumentation-psycopg2==0.37b0 opentelemetry-sdk==1.16.0 Pillow==10.2.0 +pilkit>2.0 # dependency of django-imagekit, 2.0 is incompatible with Pillow>=10 protobuf==3.20.* psycopg2==2.9.5 pycryptodome==3.19.1 From f66695193497adc5b8d864b346c8a013ce359732 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Sat, 30 Mar 2024 21:56:41 +0100 Subject: [PATCH 13/26] Update CodeQL workflows to v3 https://github.blog/changelog/2024-01-12-code-scanning-deprecation-of-codeql-action-v2/ --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 51316ef62..014745a52 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +51,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 # ℹ️ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -65,4 +65,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 From 4bbdd0b2d0f4e97e3f9e50aadff437b11276e1a9 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Tue, 2 Apr 2024 21:54:30 +0200 Subject: [PATCH 14/26] Add index on Status.remote_id This field is often used in WHERE-clauses in queries that are very slow on bookwyrm.social. --- ...9_status_bookwyrm_st_remote__06aeba_idx.py | 19 +++++++++++++++++++ bookwyrm/models/status.py | 3 +++ 2 files changed, 22 insertions(+) create mode 100644 bookwyrm/migrations/0199_status_bookwyrm_st_remote__06aeba_idx.py diff --git a/bookwyrm/migrations/0199_status_bookwyrm_st_remote__06aeba_idx.py b/bookwyrm/migrations/0199_status_bookwyrm_st_remote__06aeba_idx.py new file mode 100644 index 000000000..5d2513698 --- /dev/null +++ b/bookwyrm/migrations/0199_status_bookwyrm_st_remote__06aeba_idx.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.25 on 2024-04-02 19:53 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0198_book_search_vector_author_aliases"), + ] + + operations = [ + migrations.AddIndex( + model_name="status", + index=models.Index( + fields=["remote_id"], name="bookwyrm_st_remote__06aeba_idx" + ), + ), + ] diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py index f6235dab6..a9c678cb5 100644 --- a/bookwyrm/models/status.py +++ b/bookwyrm/models/status.py @@ -80,6 +80,9 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel): """default sorting""" ordering = ("-published_date",) + indexes = [ + models.Index(fields=["remote_id"]), + ] def save(self, *args, **kwargs): """save and notify""" From ea0ade955b83c9e0f3d29b0948ffa5699611cc63 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 16:45:11 +0000 Subject: [PATCH 15/26] Bump pillow from 10.2.0 to 10.3.0 Bumps [pillow](https://github.com/python-pillow/Pillow) from 10.2.0 to 10.3.0. - [Release notes](https://github.com/python-pillow/Pillow/releases) - [Changelog](https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst) - [Commits](https://github.com/python-pillow/Pillow/compare/10.2.0...10.3.0) --- updated-dependencies: - dependency-name: pillow dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 81975fc4d..218035926 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,7 +26,7 @@ opentelemetry-instrumentation-celery==0.37b0 opentelemetry-instrumentation-django==0.37b0 opentelemetry-instrumentation-psycopg2==0.37b0 opentelemetry-sdk==1.16.0 -Pillow==10.2.0 +Pillow==10.3.0 protobuf==3.20.* psycopg2==2.9.5 pycryptodome==3.19.1 From 5cfe7eca6f3c177cc436812e45dfb03288b5fbc2 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Wed, 3 Apr 2024 21:06:24 +0200 Subject: [PATCH 16/26] Add index for finding all statuses in a thread --- ...0_status_bookwyrm_st_thread__cf064f_idx.py | 19 +++++++++++++++++++ bookwyrm/models/status.py | 1 + 2 files changed, 20 insertions(+) create mode 100644 bookwyrm/migrations/0200_status_bookwyrm_st_thread__cf064f_idx.py diff --git a/bookwyrm/migrations/0200_status_bookwyrm_st_thread__cf064f_idx.py b/bookwyrm/migrations/0200_status_bookwyrm_st_thread__cf064f_idx.py new file mode 100644 index 000000000..daca654c7 --- /dev/null +++ b/bookwyrm/migrations/0200_status_bookwyrm_st_thread__cf064f_idx.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.25 on 2024-04-03 19:05 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0199_status_bookwyrm_st_remote__06aeba_idx"), + ] + + operations = [ + migrations.AddIndex( + model_name="status", + index=models.Index( + fields=["thread_id"], name="bookwyrm_st_thread__cf064f_idx" + ), + ), + ] diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py index a9c678cb5..546a8d6c8 100644 --- a/bookwyrm/models/status.py +++ b/bookwyrm/models/status.py @@ -82,6 +82,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel): ordering = ("-published_date",) indexes = [ models.Index(fields=["remote_id"]), + models.Index(fields=["thread_id"]), ] def save(self, *args, **kwargs): From 4d5a30d9533cec6f137cf25b85fad9415e122cfe Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Wed, 3 Apr 2024 21:11:27 +0200 Subject: [PATCH 17/26] Add index for looking up KeyPair by remote id --- ..._keypair_bookwyrm_ke_remote__472927_idx.py | 19 +++++++++++++++++++ bookwyrm/models/user.py | 7 +++++++ 2 files changed, 26 insertions(+) create mode 100644 bookwyrm/migrations/0201_keypair_bookwyrm_ke_remote__472927_idx.py diff --git a/bookwyrm/migrations/0201_keypair_bookwyrm_ke_remote__472927_idx.py b/bookwyrm/migrations/0201_keypair_bookwyrm_ke_remote__472927_idx.py new file mode 100644 index 000000000..e3d27a11b --- /dev/null +++ b/bookwyrm/migrations/0201_keypair_bookwyrm_ke_remote__472927_idx.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.25 on 2024-04-03 19:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0200_status_bookwyrm_st_thread__cf064f_idx"), + ] + + operations = [ + migrations.AddIndex( + model_name="keypair", + index=models.Index( + fields=["remote_id"], name="bookwyrm_ke_remote__472927_idx" + ), + ), + ] diff --git a/bookwyrm/models/user.py b/bookwyrm/models/user.py index 89fd39b73..e24143e8e 100644 --- a/bookwyrm/models/user.py +++ b/bookwyrm/models/user.py @@ -509,6 +509,13 @@ class KeyPair(ActivitypubMixin, BookWyrmModel): activity_serializer = activitypub.PublicKey serialize_reverse_fields = [("owner", "owner", "id")] + class Meta: + """indexes""" + + indexes = [ + models.Index(fields=["remote_id"]), + ] + def get_remote_id(self): # self.owner is set by the OneToOneField on User return f"{self.owner.remote_id}/#main-key" From 0501ce39cdd22a57b484b48f987dc342bbedf9e6 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Wed, 3 Apr 2024 21:15:24 +0200 Subject: [PATCH 18/26] Add index for looking up User by username --- ...202_user_bookwyrm_us_usernam_b2546d_idx.py | 19 +++++++++++++++++++ bookwyrm/models/user.py | 7 +++++++ 2 files changed, 26 insertions(+) create mode 100644 bookwyrm/migrations/0202_user_bookwyrm_us_usernam_b2546d_idx.py diff --git a/bookwyrm/migrations/0202_user_bookwyrm_us_usernam_b2546d_idx.py b/bookwyrm/migrations/0202_user_bookwyrm_us_usernam_b2546d_idx.py new file mode 100644 index 000000000..d8666fe3f --- /dev/null +++ b/bookwyrm/migrations/0202_user_bookwyrm_us_usernam_b2546d_idx.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.25 on 2024-04-03 19:14 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0201_keypair_bookwyrm_ke_remote__472927_idx"), + ] + + operations = [ + migrations.AddIndex( + model_name="user", + index=models.Index( + fields=["username"], name="bookwyrm_us_usernam_b2546d_idx" + ), + ), + ] diff --git a/bookwyrm/models/user.py b/bookwyrm/models/user.py index e24143e8e..8c1b79e45 100644 --- a/bookwyrm/models/user.py +++ b/bookwyrm/models/user.py @@ -198,6 +198,13 @@ class User(OrderedCollectionPageMixin, AbstractUser): hotp_secret = models.CharField(max_length=32, default=None, blank=True, null=True) hotp_count = models.IntegerField(default=0, blank=True, null=True) + class Meta(AbstractUser.Meta): + """indexes""" + + indexes = [ + models.Index(fields=["username"]), + ] + @property def active_follower_requests(self): """Follow requests from active users""" From 464a0298c6de25780020201ee2f397939d443cd3 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Wed, 3 Apr 2024 21:23:24 +0200 Subject: [PATCH 19/26] Add index for finding active (and local) users --- ...203_user_bookwyrm_us_is_acti_972dc4_idx.py | 19 +++++++++++++++++++ bookwyrm/models/user.py | 1 + 2 files changed, 20 insertions(+) create mode 100644 bookwyrm/migrations/0203_user_bookwyrm_us_is_acti_972dc4_idx.py diff --git a/bookwyrm/migrations/0203_user_bookwyrm_us_is_acti_972dc4_idx.py b/bookwyrm/migrations/0203_user_bookwyrm_us_is_acti_972dc4_idx.py new file mode 100644 index 000000000..b07f1c8a9 --- /dev/null +++ b/bookwyrm/migrations/0203_user_bookwyrm_us_is_acti_972dc4_idx.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.25 on 2024-04-03 19:22 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0202_user_bookwyrm_us_usernam_b2546d_idx"), + ] + + operations = [ + migrations.AddIndex( + model_name="user", + index=models.Index( + fields=["is_active", "local"], name="bookwyrm_us_is_acti_972dc4_idx" + ), + ), + ] diff --git a/bookwyrm/models/user.py b/bookwyrm/models/user.py index 8c1b79e45..0ec2c6529 100644 --- a/bookwyrm/models/user.py +++ b/bookwyrm/models/user.py @@ -203,6 +203,7 @@ class User(OrderedCollectionPageMixin, AbstractUser): indexes = [ models.Index(fields=["username"]), + models.Index(fields=["is_active", "local"]), ] @property From 321397a349cc9f22e9b976e6edb249896460b78e Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Wed, 3 Apr 2024 21:27:40 +0200 Subject: [PATCH 20/26] Specify which column DISTINCT should apply to --- bookwyrm/activitystreams.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index 42f99e209..5030005d7 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -139,14 +139,14 @@ class ActivityStream(RedisStore): | ( Q(following=status.user) & Q(following=status.reply_parent.user) ) # if the user is following both authors - ).distinct() + ) # only visible to the poster's followers and tagged users elif status.privacy == "followers": audience = audience.filter( Q(following=status.user) # if the user is following the author ) - return audience.distinct() + return audience.distinct("id") @tracer.start_as_current_span("ActivityStream.get_audience") def get_audience(self, status): From 439cb3ccaaddec6fa110bb1dc81561fd1710bac4 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Thu, 4 Apr 2024 13:15:31 +0200 Subject: [PATCH 21/26] Remove unnecessary conversions between list and set --- bookwyrm/activitystreams.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index 5030005d7..b8f4ed985 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -156,7 +156,7 @@ class ActivityStream(RedisStore): status_author = models.User.objects.filter( is_active=True, local=True, id=status.user.id ).values_list("id", flat=True) - return list(set(list(audience) + list(status_author))) + return list(set(audience) | set(status_author)) def get_stores_for_users(self, user_ids): """convert a list of user ids into redis store ids""" @@ -191,7 +191,7 @@ class HomeStream(ActivityStream): status_author = models.User.objects.filter( is_active=True, local=True, id=status.user.id ).values_list("id", flat=True) - return list(set(list(audience) + list(status_author))) + return list(set(audience) | set(status_author)) def get_statuses_for_user(self, user): return models.Status.privacy_filter( From e1c54b2933bbfab278a2d49a5716911903f4385e Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Thu, 4 Apr 2024 13:47:51 +0200 Subject: [PATCH 22/26] Remove optimizations with adverse effects `if not audience` actually causes the entire query to be evaluated, before .values_list() is called. --- bookwyrm/activitystreams.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index b8f4ed985..0009ac7a3 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -183,8 +183,6 @@ class HomeStream(ActivityStream): def get_audience(self, status): trace.get_current_span().set_attribute("stream_id", self.key) audience = super()._get_audience(status) - if not audience: - return [] # if the user is following the author audience = audience.filter(following=status.user).values_list("id", flat=True) # if the user is the post's author @@ -239,9 +237,7 @@ class BooksStream(ActivityStream): ) audience = super()._get_audience(status) - if not audience: - return models.User.objects.none() - return audience.filter(shelfbook__book__parent_work=work).distinct() + return audience.filter(shelfbook__book__parent_work=work) def get_audience(self, status): # only show public statuses on the books feed, From af0bd90c15e6743cd267372d6533041c431282a7 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Tue, 9 Apr 2024 05:57:27 -0500 Subject: [PATCH 23/26] Adds merge migration --- bookwyrm/migrations/0204_merge_20240409_1042.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 bookwyrm/migrations/0204_merge_20240409_1042.py diff --git a/bookwyrm/migrations/0204_merge_20240409_1042.py b/bookwyrm/migrations/0204_merge_20240409_1042.py new file mode 100644 index 000000000..ba7513341 --- /dev/null +++ b/bookwyrm/migrations/0204_merge_20240409_1042.py @@ -0,0 +1,14 @@ +# Generated by Django 3.2.25 on 2024-04-09 10:42 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('bookwyrm', '0197_mergedauthor_mergedbook'), + ('bookwyrm', '0203_user_bookwyrm_us_is_acti_972dc4_idx'), + ] + + operations = [ + ] From 3ffbb242a4f647f1317bb7892cf73965f3035ab0 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Tue, 9 Apr 2024 05:59:01 -0500 Subject: [PATCH 24/26] Black --- bookwyrm/migrations/0204_merge_20240409_1042.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bookwyrm/migrations/0204_merge_20240409_1042.py b/bookwyrm/migrations/0204_merge_20240409_1042.py index ba7513341..5656ac586 100644 --- a/bookwyrm/migrations/0204_merge_20240409_1042.py +++ b/bookwyrm/migrations/0204_merge_20240409_1042.py @@ -6,9 +6,8 @@ from django.db import migrations class Migration(migrations.Migration): dependencies = [ - ('bookwyrm', '0197_mergedauthor_mergedbook'), - ('bookwyrm', '0203_user_bookwyrm_us_is_acti_972dc4_idx'), + ("bookwyrm", "0197_mergedauthor_mergedbook"), + ("bookwyrm", "0203_user_bookwyrm_us_is_acti_972dc4_idx"), ] - operations = [ - ] + operations = [] From 9d9e64399c5018ab47ee53c19951b7b1f37d88d9 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Wed, 10 Apr 2024 21:26:34 +0200 Subject: [PATCH 25/26] Install same version of eslint in CI as in dev-tools --- .github/workflows/lint-frontend.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint-frontend.yaml b/.github/workflows/lint-frontend.yaml index 21f11ebf3..b0322f371 100644 --- a/.github/workflows/lint-frontend.yaml +++ b/.github/workflows/lint-frontend.yaml @@ -22,7 +22,8 @@ jobs: - uses: actions/checkout@v4 - name: Install modules - run: npm install stylelint stylelint-config-recommended stylelint-config-standard stylelint-order eslint + # run: npm install stylelint stylelint-config-recommended stylelint-config-standard stylelint-order eslint + run: npm install eslint@^8.9.0 # See .stylelintignore for files that are not linted. # - name: Run stylelint From d5a536ae367ae8197dae6c4e227377f09a36abc5 Mon Sep 17 00:00:00 2001 From: Bart Schuurmans Date: Thu, 11 Apr 2024 14:45:13 +0200 Subject: [PATCH 26/26] Change pilkit constraint to the version that does work --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 957a2c4f7..df0ad6e13 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ opentelemetry-instrumentation-django==0.37b0 opentelemetry-instrumentation-psycopg2==0.37b0 opentelemetry-sdk==1.16.0 Pillow==10.3.0 -pilkit>2.0 # dependency of django-imagekit, 2.0 is incompatible with Pillow>=10 +pilkit>=3.0 # dependency of django-imagekit, 2.0 is incompatible with Pillow>=10 protobuf==3.20.* psycopg2==2.9.5 pycryptodome==3.19.1