Make search work with URLs

This commit is contained in:
Andrew Godwin 2022-12-04 21:13:33 -07:00
parent 28ac03c42f
commit 93ccb5dd32
4 changed files with 124 additions and 27 deletions

View file

@ -3,7 +3,7 @@ from typing import Dict, Iterable, List, Optional, Set
import httpx
import urlman
from asgiref.sync import sync_to_async
from asgiref.sync import async_to_sync, sync_to_async
from django.contrib.postgres.indexes import GinIndex
from django.db import models, transaction
from django.template.defaultfilters import linebreaks_filter
@ -16,6 +16,7 @@ from core.html import sanitize_post, strip_html
from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date
from stator.models import State, StateField, StateGraph, StatorModel
from users.models.identity import Identity
from users.models.system_actor import SystemActor
class PostStates(StateGraph):
@ -609,19 +610,28 @@ class Post(StatorModel):
return cls.objects.get(object_uri=object_uri)
except cls.DoesNotExist:
if fetch:
# Go grab the data from the URI
response = httpx.get(
object_uri,
headers={"Accept": "application/json"},
follow_redirects=True,
)
if 200 <= response.status_code < 300:
return cls.by_ap(
canonicalise(response.json(), include_security=True),
create=True,
update=True,
try:
response = async_to_sync(SystemActor().signed_request)(
method="get", uri=object_uri
)
raise cls.DoesNotExist(f"Cannot find Post with URI {object_uri}")
except (httpx.RequestError, httpx.ConnectError):
raise cls.DoesNotExist(f"Could not fetch {object_uri}")
if response.status_code in [404, 410]:
raise cls.DoesNotExist(f"No post at {object_uri}")
if response.status_code >= 500:
raise cls.DoesNotExist(f"Server error fetching {object_uri}")
if response.status_code >= 400:
raise cls.DoesNotExist(
f"Error fetching post from {object_uri}: {response.status_code}",
{response.content},
)
return cls.by_ap(
canonicalise(response.json(), include_security=True),
create=True,
update=True,
)
else:
raise cls.DoesNotExist(f"Cannot find Post with URI {object_uri}")
@classmethod
def handle_create_ap(cls, data):

View file

@ -1,11 +1,14 @@
from typing import Set
import httpx
from asgiref.sync import async_to_sync
from django import forms
from django.views.generic import FormView
from activities.models import Hashtag
from activities.models import Hashtag, Post
from core.ld import canonicalise
from users.models import Domain, Identity, IdentityStates
from users.models.system_actor import SystemActor
class Search(FormView):
@ -14,11 +17,20 @@ class Search(FormView):
class form_class(forms.Form):
query = forms.CharField(
help_text="Search for a user by @username@domain or hashtag by #tagname",
help_text="Search for:\nA user by @username@domain or their profile URL\nA hashtag by #tagname\nA post by its URL",
widget=forms.TextInput(attrs={"type": "search", "autofocus": "autofocus"}),
)
def search_identities(self, query: str):
def search_identities_handle(self, query: str):
"""
Searches for identities by their handles
"""
# Short circuit if it's obviously not for us
if "://" in query:
return set()
# Try to fetch the user by handle
query = query.lstrip("@")
results: Set[Identity] = set()
if "@" in query:
@ -52,12 +64,65 @@ class Search(FormView):
results.add(identity)
return results
def search_url(self, query: str) -> Post | Identity | None:
"""
Searches for an identity or post by URL.
"""
# Short circuit if it's obviously not for us
if "://" not in query:
return None
# Clean up query
query = query.strip()
# Fetch the provided URL as the system actor to retrieve the AP JSON
try:
response = async_to_sync(SystemActor().signed_request)(
method="get", uri=query
)
except (httpx.RequestError, httpx.ConnectError):
return None
if response.status_code >= 400:
return None
document = canonicalise(response.json(), include_security=True)
type = document.get("type", "unknown").lower()
# Is it an identity?
if type == "person":
# Try and retrieve the profile by actor URI
identity = Identity.by_actor_uri(document["id"], create=True)
if identity and identity.state == IdentityStates.outdated:
async_to_sync(identity.fetch_actor)()
return identity
# Is it a post?
elif type == "note":
# Try and retrieve the post by URI
# (we do not trust the JSON we just got - fetch from source!)
try:
post = Post.by_object_uri(document["id"], fetch=True)
# We may need to live-fetch the identity too
if post.author.state == IdentityStates.outdated:
async_to_sync(post.author.fetch_actor)()
return post
except Post.DoesNotExist:
return None
# Dunno what it is
else:
return None
def search_hashtags(self, query: str):
"""
Searches for hashtags by their name
"""
# Short circuit out if it's obviously not a hashtag
if "@" in query or "://" in query:
return set()
results: Set[Hashtag] = set()
if "@" in query:
return results
query = query.lstrip("#")
for hashtag in Hashtag.objects.public().hashtag_or_alias(query)[:10]:
results.add(hashtag)
@ -68,10 +133,17 @@ class Search(FormView):
def form_valid(self, form):
query = form.cleaned_data["query"].lower()
results = {
"identities": self.search_identities(query),
"identities": self.search_identities_handle(query),
"hashtags": self.search_hashtags(query),
"posts": set(),
}
url_result = self.search_url(query)
if isinstance(url_result, Identity):
results["identities"].add(url_result)
if isinstance(url_result, Post):
results["posts"].add(url_result)
# Render results
context = self.get_context_data(form=form)
context["results"] = results

View file

@ -18,6 +18,14 @@
{% include "activities/_identity.html" %}
{% endfor %}
{% endif %}
{% if results.posts %}
<h2>Posts</h2>
<section class="icon-menu">
{% for post in results.posts %}
{% include "activities/_post.html" %}
{% endfor %}
</section>
{% endif %}
{% if results.hashtags %}
<h2>Hashtags</h2>
<section class="icon-menu">
@ -26,10 +34,15 @@
{% endfor %}
</section>
{% endif %}
{% if results and not results.identities and not results.hashtags %}
<h2>No results (yet)</h2>
<p>No results found — not yet, at least. The search swamphens are still
rooting around behind the scenes and may yet turn something up. If you try your search
again after a moment, you might get lucky!</p>
{% if results and not results.identities and not results.hashtags and not results.posts %}
<h2>No results</h2>
<p>
We could not find anything matching your query.
</p>
<p>
If you're trying to find a post or profile on another server,
try again in a few moments - if the other end is overloaded, it
can take some time to fetch the details.
</p>
{% endif %}
{% endblock %}

View file

@ -241,9 +241,11 @@ class Identity(StatorModel):
@property
def handle(self):
if self.username is None:
return "(unknown user)"
if self.domain_id:
return f"{self.username}@{self.domain_id}"
return f"{self.username}@unknown.invalid"
return f"{self.username}@(unknown server)"
@property
def data_age(self) -> float: