From 754e24812b63fb6a2d39215f01bee5892ac5948f Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Tue, 1 Feb 2022 21:18:25 -0800 Subject: [PATCH 1/3] Check image extensions before saving --- bookwyrm/connectors/abstract_connector.py | 12 +++++++++++- bookwyrm/models/fields.py | 8 ++------ bookwyrm/views/books/books.py | 8 +++----- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 5ed57df1f..8e998979f 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -1,7 +1,9 @@ """ functionality outline for a book data connector """ from abc import ABC, abstractmethod +import imghdr import logging +from django.core.files.base import ContentFile from django.db import transaction import requests from requests.exceptions import RequestException @@ -291,9 +293,17 @@ def get_image(url, timeout=10): except RequestException as err: logger.exception(err) return None + if not resp.ok: return None - return resp + + image_content = ContentFile(resp.content) + extension = imghdr.what(None, image_content.read()) + if not extension: + logger.exception("File requested was not an image: %s", url) + return None + + return image_content, extension class Mapping: diff --git a/bookwyrm/models/fields.py b/bookwyrm/models/fields.py index e61f912e5..b506c11ca 100644 --- a/bookwyrm/models/fields.py +++ b/bookwyrm/models/fields.py @@ -1,6 +1,5 @@ """ activitypub-aware django model fields """ from dataclasses import MISSING -import imghdr import re from uuid import uuid4 from urllib.parse import urljoin @@ -9,7 +8,6 @@ import dateutil.parser from dateutil.parser import ParserError from django.contrib.postgres.fields import ArrayField as DjangoArrayField from django.core.exceptions import ValidationError -from django.core.files.base import ContentFile from django.db import models from django.forms import ClearableFileInput, ImageField as DjangoImageField from django.utils import timezone @@ -443,12 +441,10 @@ class ImageField(ActivitypubFieldMixin, models.ImageField): except ValidationError: return None - response = get_image(url) - if not response: + image_content, extension = get_image(url) + if not image_content: return None - image_content = ContentFile(response.content) - extension = imghdr.what(None, image_content.read()) or "" image_name = f"{uuid4()}.{extension}" return [image_name, image_content] diff --git a/bookwyrm/views/books/books.py b/bookwyrm/views/books/books.py index 7de2d0d20..e04230bac 100644 --- a/bookwyrm/views/books/books.py +++ b/bookwyrm/views/books/books.py @@ -2,7 +2,6 @@ from uuid import uuid4 from django.contrib.auth.decorators import login_required, permission_required -from django.core.files.base import ContentFile from django.core.paginator import Paginator from django.db.models import Avg, Q from django.http import Http404 @@ -144,13 +143,12 @@ def upload_cover(request, book_id): def set_cover_from_url(url): """load it from a url""" try: - image_file = get_image(url) + image_content, extension = get_image(url) except: # pylint: disable=bare-except return None - if not image_file: + if not image_content: return None - image_name = str(uuid4()) + "." + url.split(".")[-1] - image_content = ContentFile(image_file.content) + image_name = str(uuid4()) + "." + extension return [image_name, image_content] From 194c69f512d11dd6ff05604395a2b7abaf4ae667 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Wed, 2 Feb 2022 07:09:35 -0800 Subject: [PATCH 2/3] Fixes return values of null responses --- bookwyrm/connectors/abstract_connector.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 8e998979f..0ab135b87 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -292,16 +292,16 @@ def get_image(url, timeout=10): ) except RequestException as err: logger.exception(err) - return None + return None, None if not resp.ok: - return None + return None, None image_content = ContentFile(resp.content) extension = imghdr.what(None, image_content.read()) if not extension: logger.exception("File requested was not an image: %s", url) - return None + return None, None return image_content, extension From 6fd3ac0cb13ed6ed1fe1c582729c89157401be13 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Wed, 2 Feb 2022 09:23:06 -0800 Subject: [PATCH 3/3] Changes how test image data is loaded --- bookwyrm/tests/models/test_fields.py | 73 ++++++++++++++-------------- bw-dev | 1 + 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/bookwyrm/tests/models/test_fields.py b/bookwyrm/tests/models/test_fields.py index 5bb7fecc6..f7386c2e4 100644 --- a/bookwyrm/tests/models/test_fields.py +++ b/bookwyrm/tests/models/test_fields.py @@ -443,18 +443,17 @@ class ModelFields(TestCase): image_file = pathlib.Path(__file__).parent.joinpath( "../../static/images/default_avi.jpg" ) - image = Image.open(image_file) - output = BytesIO() - image.save(output, format=image.format) - instance = fields.ImageField() - responses.add( - responses.GET, - "http://www.example.com/image.jpg", - body=image.tobytes(), - status=200, - ) + with open(image_file, "rb") as image_data: + responses.add( + responses.GET, + "http://www.example.com/image.jpg", + body=image_data.read(), + status=200, + content_type="image/jpeg", + stream=True, + ) loaded_image = instance.field_from_activity("http://www.example.com/image.jpg") self.assertIsInstance(loaded_image, list) self.assertIsInstance(loaded_image[1], ContentFile) @@ -465,18 +464,18 @@ class ModelFields(TestCase): image_file = pathlib.Path(__file__).parent.joinpath( "../../static/images/default_avi.jpg" ) - image = Image.open(image_file) - output = BytesIO() - image.save(output, format=image.format) instance = fields.ImageField(activitypub_field="cover", name="cover") - responses.add( - responses.GET, - "http://www.example.com/image.jpg", - body=image.tobytes(), - status=200, - ) + with open(image_file, "rb") as image_data: + responses.add( + responses.GET, + "http://www.example.com/image.jpg", + body=image_data.read(), + content_type="image/jpeg", + status=200, + stream=True, + ) book = Edition.objects.create(title="hello") MockActivity = namedtuple("MockActivity", ("cover")) @@ -491,18 +490,18 @@ class ModelFields(TestCase): image_file = pathlib.Path(__file__).parent.joinpath( "../../static/images/default_avi.jpg" ) - image = Image.open(image_file) - output = BytesIO() - image.save(output, format=image.format) instance = fields.ImageField(activitypub_field="cover", name="cover") - responses.add( - responses.GET, - "http://www.example.com/image.jpg", - body=image.tobytes(), - status=200, - ) + with open(image_file, "rb") as image_data: + responses.add( + responses.GET, + "http://www.example.com/image.jpg", + body=image_data.read(), + status=200, + content_type="image/jpeg", + stream=True, + ) book = Edition.objects.create(title="hello") MockActivity = namedtuple("MockActivity", ("cover")) @@ -565,18 +564,18 @@ class ModelFields(TestCase): another_image_file = pathlib.Path(__file__).parent.joinpath( "../../static/images/logo.png" ) - another_image = Image.open(another_image_file) - another_output = BytesIO() - another_image.save(another_output, format=another_image.format) instance = fields.ImageField(activitypub_field="cover", name="cover") - responses.add( - responses.GET, - "http://www.example.com/image.jpg", - body=another_image.tobytes(), - status=200, - ) + with open(another_image_file, "rb") as another_image: + responses.add( + responses.GET, + "http://www.example.com/image.jpg", + body=another_image.read(), + status=200, + content_type="image/jpeg", + stream=True, + ) MockActivity = namedtuple("MockActivity", ("cover")) mock_activity = MockActivity("http://www.example.com/image.jpg") diff --git a/bw-dev b/bw-dev index 00faa6a62..77d337297 100755 --- a/bw-dev +++ b/bw-dev @@ -209,6 +209,7 @@ case "$CMD" in echo " build" echo " clean" echo " black" + echo " prettier" echo " populate_streams [--stream=]" echo " populate_suggestions" echo " generate_thumbnails"