From 6f31dc56005584052aab5728f9e151804edf7e15 Mon Sep 17 00:00:00 2001 From: Henri Dickson <90480431+alphatownsman@users.noreply.github.com> Date: Mon, 19 Feb 2024 13:06:03 -0500 Subject: [PATCH] fix potential vulnerability when fetching remote json data --- activities/models/post.py | 4 +++- activities/services/search.py | 8 ++++---- activities/views/debug.py | 6 ++++-- core/json.py | 5 ++--- tests/activities/models/test_post.py | 2 ++ tests/api/test_search.py | 3 +-- tests/users/models/test_identity.py | 3 +++ users/models/identity.py | 7 +++---- 8 files changed, 22 insertions(+), 16 deletions(-) diff --git a/activities/models/post.py b/activities/models/post.py index fef4db4..cede409 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -30,6 +30,7 @@ from activities.models.post_types import ( ) from core.exceptions import ActivityPubFormatError from core.html import ContentRenderer, FediverseHtmlParser +from core.json import json_from_response from core.ld import ( canonicalise, format_ld_date, @@ -1033,8 +1034,9 @@ class Post(StatorModel): {response.content}, ) try: + json_data = json_from_response(response) post = cls.by_ap( - canonicalise(response.json(), include_security=True), + canonicalise(json_data, include_security=True), create=True, update=True, fetch_author=True, diff --git a/activities/services/search.py b/activities/services/search.py index 8ed05e6..3d09d39 100644 --- a/activities/services/search.py +++ b/activities/services/search.py @@ -83,11 +83,11 @@ class SearchService: if response.status_code >= 400: return None - json_data = json_from_response(response) - if not json_data: + try: + json_data = json_from_response(response) + document = canonicalise(json_data, include_security=True) + except ValueError: return None - - document = canonicalise(json_data, include_security=True) type = document.get("type", "unknown").lower() # Is it an identity? diff --git a/activities/views/debug.py b/activities/views/debug.py index d838905..bd9e52a 100644 --- a/activities/views/debug.py +++ b/activities/views/debug.py @@ -5,6 +5,7 @@ from django import forms from django.utils.decorators import method_decorator from django.views.generic import FormView, TemplateView +from core.json import json_from_response from core.ld import canonicalise from users.decorators import admin_required from users.models import SystemActor @@ -50,8 +51,9 @@ class JsonViewer(FormView): result = f"Error response: {response.status_code}\n{response.content}" else: try: - document = canonicalise(response.json(), include_security=True) - except json.JSONDecodeError as ex: + json_data = json_from_response(response) + document = canonicalise(json_data, include_security=True) + except ValueError as ex: result = str(ex) else: context["raw_result"] = json.dumps(response.json(), indent=2) diff --git a/core/json.py b/core/json.py index 1a71e97..b46259a 100644 --- a/core/json.py +++ b/core/json.py @@ -3,19 +3,18 @@ import json from httpx import Response JSON_CONTENT_TYPES = [ - "application/json", "application/ld+json", "application/activity+json", ] -def json_from_response(response: Response) -> dict | None: +def json_from_response(response: Response) -> dict: content_type, *parameters = ( response.headers.get("Content-Type", "invalid").lower().split(";") ) if content_type not in JSON_CONTENT_TYPES: - return None + raise ValueError(f"Invalid content type: {content_type}") charset = None diff --git a/tests/activities/models/test_post.py b/tests/activities/models/test_post.py index 825a3bb..2860b2a 100644 --- a/tests/activities/models/test_post.py +++ b/tests/activities/models/test_post.py @@ -13,6 +13,7 @@ def test_fetch_post(httpx_mock: HTTPXMock, config_system): """ httpx_mock.add_response( url="https://example.com/test-actor", + headers={"Content-Type": "application/activity+json"}, json={ "@context": [ "https://www.w3.org/ns/activitystreams", @@ -23,6 +24,7 @@ def test_fetch_post(httpx_mock: HTTPXMock, config_system): ) httpx_mock.add_response( url="https://example.com/test-post", + headers={"Content-Type": "application/activity+json"}, json={ "@context": [ "https://www.w3.org/ns/activitystreams", diff --git a/tests/api/test_search.py b/tests/api/test_search.py index 389535f..dabb4c8 100644 --- a/tests/api/test_search.py +++ b/tests/api/test_search.py @@ -86,8 +86,7 @@ def test_search_not_found(httpx_mock: HTTPXMock, api_client): @pytest.mark.parametrize( "content_type", [ - "application/json", - "application/ld+json", + 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"', "application/activity+json", ], ) diff --git a/tests/users/models/test_identity.py b/tests/users/models/test_identity.py index 3b8ac35..c83e2e4 100644 --- a/tests/users/models/test_identity.py +++ b/tests/users/models/test_identity.py @@ -109,6 +109,7 @@ def test_fetch_actor(httpx_mock, config_system): # Trigger actor fetch httpx_mock.add_response( url="https://example.com/.well-known/webfinger?resource=acct:test@example.com", + headers={"Content-Type": "application/activity+json"}, json={ "subject": "acct:test@example.com", "aliases": [ @@ -130,6 +131,7 @@ def test_fetch_actor(httpx_mock, config_system): ) httpx_mock.add_response( url="https://example.com/test-actor/", + headers={"Content-Type": "application/activity+json"}, json={ "@context": [ "https://www.w3.org/ns/activitystreams", @@ -170,6 +172,7 @@ def test_fetch_actor(httpx_mock, config_system): ) httpx_mock.add_response( url="https://example.com/test-actor/collections/featured/", + headers={"Content-Type": "application/activity+json"}, json={ "type": "Collection", "totalItems": 1, diff --git a/users/models/identity.py b/users/models/identity.py index f65ad0b..b8d6d88 100644 --- a/users/models/identity.py +++ b/users/models/identity.py @@ -857,7 +857,8 @@ class Identity(StatorModel): return [] try: - data = canonicalise(response.json(), include_security=True) + json_data = json_from_response(response) + data = canonicalise(json_data, include_security=True) items: list[dict | str] = [] if "orderedItems" in data: items = list(reversed(data["orderedItems"])) @@ -917,10 +918,8 @@ class Identity(StatorModel): "Client error fetching actor: %d %s", status_code, self.actor_uri ) return False - json_data = json_from_response(response) - if not json_data: - return False try: + json_data = json_from_response(response) document = canonicalise(json_data, include_security=True) except ValueError: # servers with empty or invalid responses are inevitable