Improve search api json parsing (#662)

2025-01-10 22:25:25 +00:00 · 2023-11-19 13:32:35 -05:00 · 2023-11-19 13:32:35 -05:00 · 81d019ad0d
commit 81d019ad0d
parent 5267e4108c
2 changed files with 148 additions and 7 deletions
--- a/activities/services/search.py
+++ b/activities/services/search.py
@ -1,3 +1,5 @@
+import json
+
 import httpx

 from activities.models import Hashtag, Post
@ -15,6 +17,32 @@ class SearchService:
        self.query = query.strip()
        self.identity = identity

+    def _json(self, response: httpx.Response) -> dict | None:
+        content_type, *parameters = (
+            response.headers.get("Content-Type", "invalid").lower().split(";")
+        )
+
+        if content_type not in [
+            "application/json",
+            "application/ld+json",
+            "application/activity+json",
+        ]:
+            return None
+
+        charset = None
+
+        for parameter in parameters:
+            key, value = parameter.split("=")
+            if key.strip() == "charset":
+                charset = value.strip()
+
+        if charset:
+            return json.loads(response.content.decode(charset))
+        else:
+            # if no charset informed, default to
+            # httpx json encoding inference
+            return response.json()
+
    def search_identities_handle(self) -> set[Identity]:
        """
        Searches for identities by their handles
@ -81,14 +109,12 @@ class SearchService:
            return None
        if response.status_code >= 400:
            return None
-        content_type = response.headers.get("Content-Type", "").lower()
-        if content_type not in [
-            "application/json",
-            "application/ld+json",
-            "application/activity+json",
-        ]:
+
+        json_data = self._json(response)
+        if not json_data:
            return None
-        document = canonicalise(response.json(), include_security=True)
+
+        document = canonicalise(json_data, include_security=True)
        type = document.get("type", "unknown").lower()

        # Is it an identity?
--- a/tests/api/test_search.py
+++ b/tests/api/test_search.py
@ -0,0 +1,115 @@
+import pytest
+from pytest_httpx import HTTPXMock
+
+test_account_json = r"""
+{
+   "@context":[
+      "https://www.w3.org/ns/activitystreams",
+      "https://w3id.org/security/v1",
+      {
+         "manuallyApprovesFollowers":"as:manuallyApprovesFollowers",
+         "toot":"http://joinmastodon.org/ns#",
+         "featured":{
+            "@id":"toot:featured",
+            "@type":"@id"
+         },
+         "featuredTags":{
+            "@id":"toot:featuredTags",
+            "@type":"@id"
+         },
+         "movedTo":{
+            "@id":"as:movedTo",
+            "@type":"@id"
+         },
+         "schema":"http://schema.org#",
+         "PropertyValue":"schema:PropertyValue",
+         "value":"schema:value",
+         "discoverable":"toot:discoverable",
+         "Device":"toot:Device",
+         "deviceId":"toot:deviceId",
+         "messageType":"toot:messageType",
+         "cipherText":"toot:cipherText",
+         "suspended":"toot:suspended",
+         "memorial":"toot:memorial",
+         "indexable":"toot:indexable"
+      }
+   ],
+   "id":"https://search.example.com/users/searchtest",
+   "type":"Person",
+   "following":"https://search.example.com/users/searchtest/following",
+   "followers":"https://search.example.com/users/searchtest/followers",
+   "inbox":"https://search.example.com/users/searchtest/inbox",
+   "outbox":"https://search.example.com/users/searchtest/outbox",
+   "featured":"https://search.example.com/users/searchtest/collections/featured",
+   "featuredTags":"https://search.example.com/users/searchtest/collections/tags",
+   "preferredUsername":"searchtest",
+   "name":"searchtest",
+   "summary":"<p>The official searchtest account for the instance.</p>",
+   "url":"https://search.example.com/@searchtest",
+   "manuallyApprovesFollowers":false,
+   "discoverable":true,
+   "indexable":false,
+   "published":"2018-05-09T00:00:00Z",
+   "memorial":false,
+   "devices":"https://search.example.com/users/searchtest/collections/devices",
+   "endpoints":{
+      "sharedInbox":"https://search.example.com/inbox"
+   }
+}
+"""
+
+
+@pytest.mark.django_db
+def test_search_not_found(httpx_mock: HTTPXMock, api_client):
+    httpx_mock.add_response(status_code=404)
+    response = api_client.get(
+        "/api/v2/search",
+        content_type="application/json",
+        data={
+            "q": "https://notfound.example.com",
+        },
+    ).json()
+
+    assert response["accounts"] == []
+    assert response["statuses"] == []
+    assert response["hashtags"] == []
+
+
+@pytest.mark.django_db
+@pytest.mark.parametrize(
+    "encoding",
+    [
+        "utf-8",
+        "iso-8859-1",
+    ],
+)
+@pytest.mark.parametrize(
+    "content_type",
+    [
+        "application/json",
+        "application/ld+json",
+        "application/activity+json",
+    ],
+)
+def test_search(
+    content_type: str,
+    encoding: str,
+    httpx_mock: HTTPXMock,
+    api_client,
+):
+    httpx_mock.add_response(
+        headers={"Content-Type": f"{content_type}; charset={encoding}"},
+        content=test_account_json.encode(encoding),
+    )
+
+    response = api_client.get(
+        "/api/v2/search",
+        content_type="application/json",
+        data={
+            "q": "https://search.example.com/users/searchtest",
+        },
+    ).json()
+
+    assert len(response["accounts"]) == 1
+    assert response["accounts"][0]["acct"] == "searchtest@search.example.com"
+    assert response["accounts"][0]["username"] == "searchtest"