From 93ccb5dd32f4e3942d08814e5a86fb30c90c9ff6 Mon Sep 17 00:00:00 2001 From: Andrew Godwin Date: Sun, 4 Dec 2022 21:13:33 -0700 Subject: [PATCH] Make search work with URLs --- activities/models/post.py | 36 ++++++++----- activities/views/search.py | 88 +++++++++++++++++++++++++++++--- templates/activities/search.html | 23 +++++++-- users/models/identity.py | 4 +- 4 files changed, 124 insertions(+), 27 deletions(-) diff --git a/activities/models/post.py b/activities/models/post.py index aa4be16..f8a5e75 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -3,7 +3,7 @@ from typing import Dict, Iterable, List, Optional, Set import httpx import urlman -from asgiref.sync import sync_to_async +from asgiref.sync import async_to_sync, sync_to_async from django.contrib.postgres.indexes import GinIndex from django.db import models, transaction from django.template.defaultfilters import linebreaks_filter @@ -16,6 +16,7 @@ from core.html import sanitize_post, strip_html from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date from stator.models import State, StateField, StateGraph, StatorModel from users.models.identity import Identity +from users.models.system_actor import SystemActor class PostStates(StateGraph): @@ -609,19 +610,28 @@ class Post(StatorModel): return cls.objects.get(object_uri=object_uri) except cls.DoesNotExist: if fetch: - # Go grab the data from the URI - response = httpx.get( - object_uri, - headers={"Accept": "application/json"}, - follow_redirects=True, - ) - if 200 <= response.status_code < 300: - return cls.by_ap( - canonicalise(response.json(), include_security=True), - create=True, - update=True, + try: + response = async_to_sync(SystemActor().signed_request)( + method="get", uri=object_uri ) - raise cls.DoesNotExist(f"Cannot find Post with URI {object_uri}") + except (httpx.RequestError, httpx.ConnectError): + raise cls.DoesNotExist(f"Could not fetch {object_uri}") + if response.status_code in [404, 410]: + raise cls.DoesNotExist(f"No post at {object_uri}") + if response.status_code >= 500: + raise cls.DoesNotExist(f"Server error fetching {object_uri}") + if response.status_code >= 400: + raise cls.DoesNotExist( + f"Error fetching post from {object_uri}: {response.status_code}", + {response.content}, + ) + return cls.by_ap( + canonicalise(response.json(), include_security=True), + create=True, + update=True, + ) + else: + raise cls.DoesNotExist(f"Cannot find Post with URI {object_uri}") @classmethod def handle_create_ap(cls, data): diff --git a/activities/views/search.py b/activities/views/search.py index 8bdef78..ab37e17 100644 --- a/activities/views/search.py +++ b/activities/views/search.py @@ -1,11 +1,14 @@ from typing import Set +import httpx from asgiref.sync import async_to_sync from django import forms from django.views.generic import FormView -from activities.models import Hashtag +from activities.models import Hashtag, Post +from core.ld import canonicalise from users.models import Domain, Identity, IdentityStates +from users.models.system_actor import SystemActor class Search(FormView): @@ -14,11 +17,20 @@ class Search(FormView): class form_class(forms.Form): query = forms.CharField( - help_text="Search for a user by @username@domain or hashtag by #tagname", + help_text="Search for:\nA user by @username@domain or their profile URL\nA hashtag by #tagname\nA post by its URL", widget=forms.TextInput(attrs={"type": "search", "autofocus": "autofocus"}), ) - def search_identities(self, query: str): + def search_identities_handle(self, query: str): + """ + Searches for identities by their handles + """ + + # Short circuit if it's obviously not for us + if "://" in query: + return set() + + # Try to fetch the user by handle query = query.lstrip("@") results: Set[Identity] = set() if "@" in query: @@ -52,12 +64,65 @@ class Search(FormView): results.add(identity) return results + def search_url(self, query: str) -> Post | Identity | None: + """ + Searches for an identity or post by URL. + """ + + # Short circuit if it's obviously not for us + if "://" not in query: + return None + + # Clean up query + query = query.strip() + + # Fetch the provided URL as the system actor to retrieve the AP JSON + try: + response = async_to_sync(SystemActor().signed_request)( + method="get", uri=query + ) + except (httpx.RequestError, httpx.ConnectError): + return None + if response.status_code >= 400: + return None + document = canonicalise(response.json(), include_security=True) + type = document.get("type", "unknown").lower() + + # Is it an identity? + if type == "person": + # Try and retrieve the profile by actor URI + identity = Identity.by_actor_uri(document["id"], create=True) + if identity and identity.state == IdentityStates.outdated: + async_to_sync(identity.fetch_actor)() + return identity + + # Is it a post? + elif type == "note": + # Try and retrieve the post by URI + # (we do not trust the JSON we just got - fetch from source!) + try: + post = Post.by_object_uri(document["id"], fetch=True) + # We may need to live-fetch the identity too + if post.author.state == IdentityStates.outdated: + async_to_sync(post.author.fetch_actor)() + return post + except Post.DoesNotExist: + return None + + # Dunno what it is + else: + return None + def search_hashtags(self, query: str): + """ + Searches for hashtags by their name + """ + + # Short circuit out if it's obviously not a hashtag + if "@" in query or "://" in query: + return set() + results: Set[Hashtag] = set() - - if "@" in query: - return results - query = query.lstrip("#") for hashtag in Hashtag.objects.public().hashtag_or_alias(query)[:10]: results.add(hashtag) @@ -68,10 +133,17 @@ class Search(FormView): def form_valid(self, form): query = form.cleaned_data["query"].lower() results = { - "identities": self.search_identities(query), + "identities": self.search_identities_handle(query), "hashtags": self.search_hashtags(query), + "posts": set(), } + url_result = self.search_url(query) + if isinstance(url_result, Identity): + results["identities"].add(url_result) + if isinstance(url_result, Post): + results["posts"].add(url_result) + # Render results context = self.get_context_data(form=form) context["results"] = results diff --git a/templates/activities/search.html b/templates/activities/search.html index 84b2cc7..58964b2 100644 --- a/templates/activities/search.html +++ b/templates/activities/search.html @@ -18,6 +18,14 @@ {% include "activities/_identity.html" %} {% endfor %} {% endif %} + {% if results.posts %} +

Posts

+
+ {% for post in results.posts %} + {% include "activities/_post.html" %} + {% endfor %} +
+ {% endif %} {% if results.hashtags %}

Hashtags

@@ -26,10 +34,15 @@ {% endfor %}
{% endif %} - {% if results and not results.identities and not results.hashtags %} -

No results (yet)

-

No results found — not yet, at least. The search swamphens are still - rooting around behind the scenes and may yet turn something up. If you try your search - again after a moment, you might get lucky!

+ {% if results and not results.identities and not results.hashtags and not results.posts %} +

No results

+

+ We could not find anything matching your query. +

+

+ If you're trying to find a post or profile on another server, + try again in a few moments - if the other end is overloaded, it + can take some time to fetch the details. +

{% endif %} {% endblock %} diff --git a/users/models/identity.py b/users/models/identity.py index c435713..bbedceb 100644 --- a/users/models/identity.py +++ b/users/models/identity.py @@ -241,9 +241,11 @@ class Identity(StatorModel): @property def handle(self): + if self.username is None: + return "(unknown user)" if self.domain_id: return f"{self.username}@{self.domain_id}" - return f"{self.username}@unknown.invalid" + return f"{self.username}@(unknown server)" @property def data_age(self) -> float: