1
0
Fork 1
mirror of https://github.com/bookwyrm-social/bookwyrm.git synced 2025-04-23 18:54:10 +00:00

Merge branch 'main' into import-tombstone

This commit is contained in:
Hugh Rundle 2025-03-26 17:48:27 +11:00 committed by GitHub
commit 4949feec7f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 117 additions and 20 deletions

View file

@ -120,6 +120,7 @@ class ActivityObject:
save: bool = True,
overwrite: bool = True,
allow_external_connections: bool = True,
trigger=None,
) -> Optional[TBookWyrmModel]:
"""convert from an activity to a model instance. Args:
model: the django model that this object is being converted to
@ -133,6 +134,9 @@ class ActivityObject:
only update blank fields if false
allow_external_connections: look up missing data if true,
throw an exception if false and an external connection is needed
trigger: the object that originally triggered this
self.to_model. e.g. if this is a Work being dereferenced from
an incoming Edition
"""
model = model or get_model_from_type(self.type)
@ -223,6 +227,8 @@ class ActivityObject:
related_field_name = model_field.field.name
for item in values:
if trigger and item == trigger.remote_id:
continue
set_related_field.delay(
related_model.__name__,
instance.__class__.__name__,

View file

@ -50,6 +50,7 @@ class Note(ActivityObject):
save=True,
overwrite=True,
allow_external_connections=True,
trigger=None,
):
instance = super().to_model(
model, instance, allow_create, save, overwrite, allow_external_connections

View file

@ -4,11 +4,10 @@ from abc import ABC, abstractmethod
from typing import Optional, TypedDict, Any, Callable, Union, Iterator
from urllib.parse import quote_plus
# pylint: disable-next=deprecated-module
import imghdr # Deprecated in 3.11 for removal in 3.13; no good alternative yet
import logging
import re
import asyncio
from PIL import Image, UnidentifiedImageError
import requests
from requests.exceptions import RequestException
import aiohttp
@ -370,13 +369,14 @@ def get_image(
return None, None
image_content = ContentFile(resp.content)
extension = imghdr.what(None, image_content.read())
if not extension:
try:
with Image.open(image_content) as im:
extension = str(im.format).lower()
return image_content, extension
except UnidentifiedImageError:
logger.info("File requested was not an image: %s", url)
return None, None
return image_content, extension
class Mapping:
"""associate a local database field with a field in an external dataset"""

View file

@ -1,4 +1,5 @@
""" handle reading a csv from goodreads """
from typing import Optional
from . import Importer
@ -7,3 +8,10 @@ class GoodreadsImporter(Importer):
For a more complete example of overriding see librarything_import.py"""
service = "Goodreads"
def normalize_row(
self, entry: dict[str, str], mappings: dict[str, Optional[str]]
) -> dict[str, Optional[str]]:
normalized = super().normalize_row(entry, mappings)
normalized["goodreads_key"] = normalized["id"]
return normalized

View file

@ -129,7 +129,20 @@ class ActivitypubMixin:
def broadcast(self, activity, sender, software=None, queue=BROADCAST):
"""send out an activity"""
# if we're posting about ShelfBooks, set a delay to give the base activity
# time to add the book on remote servers first to avoid race conditions
countdown = (
10
if (
isinstance(activity, object)
and not isinstance(activity["object"], str)
and activity["object"].get("type", None) in ["GeneratedNote", "Comment"]
)
else 0
)
broadcast_task.apply_async(
countdown=countdown,
args=(
sender.id,
json.dumps(activity, cls=activitypub.ActivityEncoder),
@ -227,6 +240,7 @@ class ObjectMixin(ActivitypubMixin):
return
try:
# TODO: here is where we might use an ActivityPub extension instead
# do we have a "pure" activitypub version of this for mastodon?
if software != "bookwyrm" and hasattr(self, "pure_content"):
pure_activity = self.to_create_activity(user, pure=True)

View file

@ -86,7 +86,9 @@ class ActivitypubFieldMixin:
raise
value = getattr(data, "actor")
formatted = self.field_from_activity(
value, allow_external_connections=allow_external_connections
value,
allow_external_connections=allow_external_connections,
trigger=instance,
)
if formatted is None or formatted is MISSING or formatted == {}:
return False
@ -128,7 +130,7 @@ class ActivitypubFieldMixin:
return value
# pylint: disable=unused-argument
def field_from_activity(self, value, allow_external_connections=True):
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
"""formatter to convert activitypub into a model value"""
if value and hasattr(self, "activitypub_wrapper"):
value = value.get(self.activitypub_wrapper)
@ -150,7 +152,9 @@ class ActivitypubRelatedFieldMixin(ActivitypubFieldMixin):
self.load_remote = load_remote
super().__init__(*args, **kwargs)
def field_from_activity(self, value, allow_external_connections=True):
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
"""trigger: the object that triggered this deserialization.
For example the Edition for which self is the parent Work"""
if not value:
return None
@ -160,7 +164,7 @@ class ActivitypubRelatedFieldMixin(ActivitypubFieldMixin):
# only look in the local database
return related_model.find_existing(value.serialize())
# this is an activitypub object, which we can deserialize
return value.to_model(model=related_model)
return value.to_model(model=related_model, trigger=trigger)
try:
# make sure the value looks like a remote id
validate_remote_id(value)
@ -336,7 +340,7 @@ class ManyToManyField(ActivitypubFieldMixin, models.ManyToManyField):
return f"{value.instance.remote_id}/{self.name}"
return [i.remote_id for i in value.all()]
def field_from_activity(self, value, allow_external_connections=True):
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
if value is None or value is MISSING:
return None
if not isinstance(value, list):
@ -386,7 +390,7 @@ class TagField(ManyToManyField):
)
return tags
def field_from_activity(self, value, allow_external_connections=True):
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
if not isinstance(value, list):
# GoToSocial DMs and single-user mentions are
# sent as objects, not as an array of objects
@ -481,7 +485,7 @@ class ImageField(ActivitypubFieldMixin, models.ImageField):
return activitypub.Image(url=url, name=alt)
def field_from_activity(self, value, allow_external_connections=True):
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
image_slug = value
# when it's an inline image (User avatar/icon, Book cover), it's a json
# blob, but when it's an attached image, it's just a url
@ -538,7 +542,7 @@ class DateTimeField(ActivitypubFieldMixin, models.DateTimeField):
return None
return value.isoformat()
def field_from_activity(self, value, allow_external_connections=True):
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
missing_fields = datetime(1970, 1, 1) # "2022-10" => "2022-10-01"
try:
date_value = dateutil.parser.parse(value, default=missing_fields)
@ -556,7 +560,7 @@ class PartialDateField(ActivitypubFieldMixin, PartialDateModel):
def field_to_activity(self, value) -> str:
return value.partial_isoformat() if value else None
def field_from_activity(self, value, allow_external_connections=True):
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
# pylint: disable=no-else-return
try:
return from_partial_isoformat(value)
@ -584,7 +588,7 @@ class PartialDateField(ActivitypubFieldMixin, PartialDateModel):
class HtmlField(ActivitypubFieldMixin, models.TextField):
"""a text field for storing html"""
def field_from_activity(self, value, allow_external_connections=True):
def field_from_activity(self, value, allow_external_connections=True, trigger=None):
if not value or value == MISSING:
return None
return clean(value)

Binary file not shown.

After

(image error) Size: 6.6 KiB

View file

@ -63,6 +63,7 @@ class GoodreadsImport(TestCase):
self.assertEqual(import_items[0].data["Book Id"], "42036538")
self.assertEqual(import_items[0].normalized_data["isbn_13"], '="9781250313195"')
self.assertEqual(import_items[0].normalized_data["isbn_10"], '="1250313198"')
self.assertEqual(import_items[0].normalized_data["goodreads_key"], "42036538")
self.assertEqual(import_items[1].index, 1)
self.assertEqual(import_items[1].data["Book Id"], "52691223")

View file

@ -1,9 +1,14 @@
""" test searching for books """
import os
import re
from PIL import Image
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.test import TestCase
from bookwyrm.settings import BASE_URL
from bookwyrm.utils import regex
from bookwyrm.utils.images import remove_uploaded_image_exif
from bookwyrm.utils.validate import validate_url_domain
@ -24,3 +29,18 @@ class TestUtils(TestCase):
self.assertIsNone(
validate_url_domain("https://up-to-no-good.tld/bad-actor.exe")
)
def test_remove_uploaded_image_exif(self):
"""Check that EXIF data is removed from image"""
image_path = "bookwyrm/tests/data/default_avi_exif.jpg"
with open(image_path, "rb") as image_file:
source = InMemoryUploadedFile(
image_file,
"cover",
"default_avi_exif.jpg",
"image/jpeg",
os.fstat(image_file.fileno()).st_size,
None,
)
sanitized_image = Image.open(remove_uploaded_image_exif(source).open())
self.assertNotIn("exif", sanitized_image.info)

View file

@ -70,7 +70,9 @@ class ReadingViews(TestCase):
},
)
request.user = self.local_user
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
with patch(
"bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"
) as mock:
views.ReadingStatus.as_view()(request, "start", self.book.id)
self.assertEqual(shelf.books.get(), self.book)
@ -86,6 +88,12 @@ class ReadingViews(TestCase):
self.assertEqual(readthrough.user, self.local_user)
self.assertEqual(readthrough.book, self.book)
# Three broadcast tasks:
# 1. Create Readthrough
# 2. Create post as pure_content (for non-BookWyrm)
# 3. Create post with book attached - this should only happen once!
self.assertEqual(len(mock.mock_calls), 3)
def test_start_reading_with_comment(self, *_):
"""begin a book"""
shelf = self.local_user.shelf_set.get(identifier=models.Shelf.READING)

27
bookwyrm/utils/images.py Normal file
View file

@ -0,0 +1,27 @@
""" Image utilities """
from io import BytesIO
from PIL import Image
from django.core.files.uploadedfile import InMemoryUploadedFile
def remove_uploaded_image_exif(source: InMemoryUploadedFile) -> InMemoryUploadedFile:
"""Removes EXIF data from provided image and returns a sanitized copy"""
io = BytesIO()
with Image.open(source) as image:
if "exif" in image.info:
del image.info["exif"]
if image.format == "JPEG":
image.save(io, format=image.format, quality="keep")
else:
image.save(io, format=image.format)
return InMemoryUploadedFile(
io,
source.field_name,
source.name,
source.content_type,
len(io.getvalue()),
source.charset,
)

View file

@ -17,6 +17,7 @@ from bookwyrm.activitypub import ActivitypubResponse
from bookwyrm.connectors import connector_manager, ConnectorException
from bookwyrm.connectors.abstract_connector import get_image
from bookwyrm.settings import PAGE_LENGTH
from bookwyrm.utils.images import remove_uploaded_image_exif
from bookwyrm.views.helpers import (
is_api_request,
maybe_redirect_local_path,
@ -158,7 +159,7 @@ def upload_cover(request, book_id):
if not form.is_valid() or not form.files.get("cover"):
return redirect(book.local_path)
book.cover = form.files["cover"]
book.cover = remove_uploaded_image_exif(form.files["cover"])
book.save()
return redirect(book.local_path)

View file

@ -1,6 +1,7 @@
""" the good stuff! the books! """
from re import sub, findall
from django.contrib.auth.decorators import login_required, permission_required
from django.contrib.postgres.search import SearchRank, SearchVector
from django.db import transaction
@ -12,6 +13,7 @@ from django.views.decorators.http import require_POST
from django.views import View
from bookwyrm import book_search, forms, models
from bookwyrm.utils.images import remove_uploaded_image_exif
# from bookwyrm.activitypub.base_activity import ActivityObject
from bookwyrm.utils.isni import (
@ -71,6 +73,8 @@ class EditBook(View):
image = set_cover_from_url(url)
if image:
book.cover.save(*image, save=False)
elif "cover" in form.files:
book.cover = remove_uploaded_image_exif(form.files["cover"])
book.save()
return redirect(f"/book/{book.id}")
@ -142,6 +146,8 @@ class CreateBook(View):
image = set_cover_from_url(url)
if image:
book.cover.save(*image, save=False)
elif "cover" in form.files:
book.cover = remove_uploaded_image_exif(form.files["cover"])
book.save()
return redirect(f"/book/{book.id}")
@ -311,6 +317,8 @@ class ConfirmEditBook(View):
image = set_cover_from_url(url)
if image:
book.cover.save(*image, save=False)
elif "cover" in form.files:
book.cover = remove_uploaded_image_exif(form.files["cover"])
# we don't tell the world when creating a book
book.save(broadcast=False)

View file

@ -156,8 +156,7 @@ def handle_reading_status(user, shelf, book, privacy):
# it's a non-standard shelf, don't worry about it
return
status = create_generated_note(user, message, mention_books=[book], privacy=privacy)
status.save()
create_generated_note(user, message, mention_books=[book], privacy=privacy)
def load_date_in_user_tz_as_utc(date_str: str, user: models.User) -> datetime: