Make search work with URLs

This commit is contained in:
Andrew Godwin 2022-12-04 21:13:33 -07:00
parent 28ac03c42f
commit 93ccb5dd32
4 changed files with 124 additions and 27 deletions

View file

@ -3,7 +3,7 @@ from typing import Dict, Iterable, List, Optional, Set
import httpx import httpx
import urlman import urlman
from asgiref.sync import sync_to_async from asgiref.sync import async_to_sync, sync_to_async
from django.contrib.postgres.indexes import GinIndex from django.contrib.postgres.indexes import GinIndex
from django.db import models, transaction from django.db import models, transaction
from django.template.defaultfilters import linebreaks_filter from django.template.defaultfilters import linebreaks_filter
@ -16,6 +16,7 @@ from core.html import sanitize_post, strip_html
from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date
from stator.models import State, StateField, StateGraph, StatorModel from stator.models import State, StateField, StateGraph, StatorModel
from users.models.identity import Identity from users.models.identity import Identity
from users.models.system_actor import SystemActor
class PostStates(StateGraph): class PostStates(StateGraph):
@ -609,19 +610,28 @@ class Post(StatorModel):
return cls.objects.get(object_uri=object_uri) return cls.objects.get(object_uri=object_uri)
except cls.DoesNotExist: except cls.DoesNotExist:
if fetch: if fetch:
# Go grab the data from the URI try:
response = httpx.get( response = async_to_sync(SystemActor().signed_request)(
object_uri, method="get", uri=object_uri
headers={"Accept": "application/json"},
follow_redirects=True,
)
if 200 <= response.status_code < 300:
return cls.by_ap(
canonicalise(response.json(), include_security=True),
create=True,
update=True,
) )
raise cls.DoesNotExist(f"Cannot find Post with URI {object_uri}") except (httpx.RequestError, httpx.ConnectError):
raise cls.DoesNotExist(f"Could not fetch {object_uri}")
if response.status_code in [404, 410]:
raise cls.DoesNotExist(f"No post at {object_uri}")
if response.status_code >= 500:
raise cls.DoesNotExist(f"Server error fetching {object_uri}")
if response.status_code >= 400:
raise cls.DoesNotExist(
f"Error fetching post from {object_uri}: {response.status_code}",
{response.content},
)
return cls.by_ap(
canonicalise(response.json(), include_security=True),
create=True,
update=True,
)
else:
raise cls.DoesNotExist(f"Cannot find Post with URI {object_uri}")
@classmethod @classmethod
def handle_create_ap(cls, data): def handle_create_ap(cls, data):

View file

@ -1,11 +1,14 @@
from typing import Set from typing import Set
import httpx
from asgiref.sync import async_to_sync from asgiref.sync import async_to_sync
from django import forms from django import forms
from django.views.generic import FormView from django.views.generic import FormView
from activities.models import Hashtag from activities.models import Hashtag, Post
from core.ld import canonicalise
from users.models import Domain, Identity, IdentityStates from users.models import Domain, Identity, IdentityStates
from users.models.system_actor import SystemActor
class Search(FormView): class Search(FormView):
@ -14,11 +17,20 @@ class Search(FormView):
class form_class(forms.Form): class form_class(forms.Form):
query = forms.CharField( query = forms.CharField(
help_text="Search for a user by @username@domain or hashtag by #tagname", help_text="Search for:\nA user by @username@domain or their profile URL\nA hashtag by #tagname\nA post by its URL",
widget=forms.TextInput(attrs={"type": "search", "autofocus": "autofocus"}), widget=forms.TextInput(attrs={"type": "search", "autofocus": "autofocus"}),
) )
def search_identities(self, query: str): def search_identities_handle(self, query: str):
"""
Searches for identities by their handles
"""
# Short circuit if it's obviously not for us
if "://" in query:
return set()
# Try to fetch the user by handle
query = query.lstrip("@") query = query.lstrip("@")
results: Set[Identity] = set() results: Set[Identity] = set()
if "@" in query: if "@" in query:
@ -52,12 +64,65 @@ class Search(FormView):
results.add(identity) results.add(identity)
return results return results
def search_url(self, query: str) -> Post | Identity | None:
"""
Searches for an identity or post by URL.
"""
# Short circuit if it's obviously not for us
if "://" not in query:
return None
# Clean up query
query = query.strip()
# Fetch the provided URL as the system actor to retrieve the AP JSON
try:
response = async_to_sync(SystemActor().signed_request)(
method="get", uri=query
)
except (httpx.RequestError, httpx.ConnectError):
return None
if response.status_code >= 400:
return None
document = canonicalise(response.json(), include_security=True)
type = document.get("type", "unknown").lower()
# Is it an identity?
if type == "person":
# Try and retrieve the profile by actor URI
identity = Identity.by_actor_uri(document["id"], create=True)
if identity and identity.state == IdentityStates.outdated:
async_to_sync(identity.fetch_actor)()
return identity
# Is it a post?
elif type == "note":
# Try and retrieve the post by URI
# (we do not trust the JSON we just got - fetch from source!)
try:
post = Post.by_object_uri(document["id"], fetch=True)
# We may need to live-fetch the identity too
if post.author.state == IdentityStates.outdated:
async_to_sync(post.author.fetch_actor)()
return post
except Post.DoesNotExist:
return None
# Dunno what it is
else:
return None
def search_hashtags(self, query: str): def search_hashtags(self, query: str):
"""
Searches for hashtags by their name
"""
# Short circuit out if it's obviously not a hashtag
if "@" in query or "://" in query:
return set()
results: Set[Hashtag] = set() results: Set[Hashtag] = set()
if "@" in query:
return results
query = query.lstrip("#") query = query.lstrip("#")
for hashtag in Hashtag.objects.public().hashtag_or_alias(query)[:10]: for hashtag in Hashtag.objects.public().hashtag_or_alias(query)[:10]:
results.add(hashtag) results.add(hashtag)
@ -68,10 +133,17 @@ class Search(FormView):
def form_valid(self, form): def form_valid(self, form):
query = form.cleaned_data["query"].lower() query = form.cleaned_data["query"].lower()
results = { results = {
"identities": self.search_identities(query), "identities": self.search_identities_handle(query),
"hashtags": self.search_hashtags(query), "hashtags": self.search_hashtags(query),
"posts": set(),
} }
url_result = self.search_url(query)
if isinstance(url_result, Identity):
results["identities"].add(url_result)
if isinstance(url_result, Post):
results["posts"].add(url_result)
# Render results # Render results
context = self.get_context_data(form=form) context = self.get_context_data(form=form)
context["results"] = results context["results"] = results

View file

@ -18,6 +18,14 @@
{% include "activities/_identity.html" %} {% include "activities/_identity.html" %}
{% endfor %} {% endfor %}
{% endif %} {% endif %}
{% if results.posts %}
<h2>Posts</h2>
<section class="icon-menu">
{% for post in results.posts %}
{% include "activities/_post.html" %}
{% endfor %}
</section>
{% endif %}
{% if results.hashtags %} {% if results.hashtags %}
<h2>Hashtags</h2> <h2>Hashtags</h2>
<section class="icon-menu"> <section class="icon-menu">
@ -26,10 +34,15 @@
{% endfor %} {% endfor %}
</section> </section>
{% endif %} {% endif %}
{% if results and not results.identities and not results.hashtags %} {% if results and not results.identities and not results.hashtags and not results.posts %}
<h2>No results (yet)</h2> <h2>No results</h2>
<p>No results found — not yet, at least. The search swamphens are still <p>
rooting around behind the scenes and may yet turn something up. If you try your search We could not find anything matching your query.
again after a moment, you might get lucky!</p> </p>
<p>
If you're trying to find a post or profile on another server,
try again in a few moments - if the other end is overloaded, it
can take some time to fetch the details.
</p>
{% endif %} {% endif %}
{% endblock %} {% endblock %}

View file

@ -241,9 +241,11 @@ class Identity(StatorModel):
@property @property
def handle(self): def handle(self):
if self.username is None:
return "(unknown user)"
if self.domain_id: if self.domain_id:
return f"{self.username}@{self.domain_id}" return f"{self.username}@{self.domain_id}"
return f"{self.username}@unknown.invalid" return f"{self.username}@(unknown server)"
@property @property
def data_age(self) -> float: def data_age(self) -> float: