Improve search api json parsing (#662)

This commit is contained in:
Humberto Rocha 2023-11-19 13:32:35 -05:00 committed by GitHub
parent 5267e4108c
commit 81d019ad0d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 148 additions and 7 deletions

View file

@ -1,3 +1,5 @@
import json
import httpx import httpx
from activities.models import Hashtag, Post from activities.models import Hashtag, Post
@ -15,6 +17,32 @@ class SearchService:
self.query = query.strip() self.query = query.strip()
self.identity = identity self.identity = identity
def _json(self, response: httpx.Response) -> dict | None:
content_type, *parameters = (
response.headers.get("Content-Type", "invalid").lower().split(";")
)
if content_type not in [
"application/json",
"application/ld+json",
"application/activity+json",
]:
return None
charset = None
for parameter in parameters:
key, value = parameter.split("=")
if key.strip() == "charset":
charset = value.strip()
if charset:
return json.loads(response.content.decode(charset))
else:
# if no charset informed, default to
# httpx json encoding inference
return response.json()
def search_identities_handle(self) -> set[Identity]: def search_identities_handle(self) -> set[Identity]:
""" """
Searches for identities by their handles Searches for identities by their handles
@ -81,14 +109,12 @@ class SearchService:
return None return None
if response.status_code >= 400: if response.status_code >= 400:
return None return None
content_type = response.headers.get("Content-Type", "").lower()
if content_type not in [ json_data = self._json(response)
"application/json", if not json_data:
"application/ld+json",
"application/activity+json",
]:
return None return None
document = canonicalise(response.json(), include_security=True)
document = canonicalise(json_data, include_security=True)
type = document.get("type", "unknown").lower() type = document.get("type", "unknown").lower()
# Is it an identity? # Is it an identity?

115
tests/api/test_search.py Normal file
View file

@ -0,0 +1,115 @@
import pytest
from pytest_httpx import HTTPXMock
test_account_json = r"""
{
"@context":[
"https://www.w3.org/ns/activitystreams",
"https://w3id.org/security/v1",
{
"manuallyApprovesFollowers":"as:manuallyApprovesFollowers",
"toot":"http://joinmastodon.org/ns#",
"featured":{
"@id":"toot:featured",
"@type":"@id"
},
"featuredTags":{
"@id":"toot:featuredTags",
"@type":"@id"
},
"movedTo":{
"@id":"as:movedTo",
"@type":"@id"
},
"schema":"http://schema.org#",
"PropertyValue":"schema:PropertyValue",
"value":"schema:value",
"discoverable":"toot:discoverable",
"Device":"toot:Device",
"deviceId":"toot:deviceId",
"messageType":"toot:messageType",
"cipherText":"toot:cipherText",
"suspended":"toot:suspended",
"memorial":"toot:memorial",
"indexable":"toot:indexable"
}
],
"id":"https://search.example.com/users/searchtest",
"type":"Person",
"following":"https://search.example.com/users/searchtest/following",
"followers":"https://search.example.com/users/searchtest/followers",
"inbox":"https://search.example.com/users/searchtest/inbox",
"outbox":"https://search.example.com/users/searchtest/outbox",
"featured":"https://search.example.com/users/searchtest/collections/featured",
"featuredTags":"https://search.example.com/users/searchtest/collections/tags",
"preferredUsername":"searchtest",
"name":"searchtest",
"summary":"<p>The official searchtest account for the instance.</p>",
"url":"https://search.example.com/@searchtest",
"manuallyApprovesFollowers":false,
"discoverable":true,
"indexable":false,
"published":"2018-05-09T00:00:00Z",
"memorial":false,
"devices":"https://search.example.com/users/searchtest/collections/devices",
"endpoints":{
"sharedInbox":"https://search.example.com/inbox"
}
}
"""
@pytest.mark.django_db
def test_search_not_found(httpx_mock: HTTPXMock, api_client):
httpx_mock.add_response(status_code=404)
response = api_client.get(
"/api/v2/search",
content_type="application/json",
data={
"q": "https://notfound.example.com",
},
).json()
assert response["accounts"] == []
assert response["statuses"] == []
assert response["hashtags"] == []
@pytest.mark.django_db
@pytest.mark.parametrize(
"encoding",
[
"utf-8",
"iso-8859-1",
],
)
@pytest.mark.parametrize(
"content_type",
[
"application/json",
"application/ld+json",
"application/activity+json",
],
)
def test_search(
content_type: str,
encoding: str,
httpx_mock: HTTPXMock,
api_client,
):
httpx_mock.add_response(
headers={"Content-Type": f"{content_type}; charset={encoding}"},
content=test_account_json.encode(encoding),
)
response = api_client.get(
"/api/v2/search",
content_type="application/json",
data={
"q": "https://search.example.com/users/searchtest",
},
).json()
assert len(response["accounts"]) == 1
assert response["accounts"][0]["acct"] == "searchtest@search.example.com"
assert response["accounts"][0]["username"] == "searchtest"