Refactoring inbox processing to smaller tasks (#647)

This commit is contained in:
Osma Ahvenlampi 2023-10-26 19:01:03 +03:00 committed by GitHub
parent 9368996a5b
commit 039adae797
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 147 additions and 57 deletions

View file

@ -884,7 +884,7 @@ class Post(StatorModel):
except IntegrityError:
# despite previous checks, a parallel thread managed
# to create the same object already
post = cls.by_object_uri(object_uri=data["id"])
raise TryAgainLater()
else:
raise cls.DoesNotExist(f"No post with ID {data['id']}", data)
if update or created:
@ -1014,7 +1014,7 @@ class Post(StatorModel):
response = SystemActor().signed_request(
method="get", uri=object_uri
)
except (httpx.HTTPError, ssl.SSLCertVerificationError):
except (httpx.HTTPError, ssl.SSLCertVerificationError, ValueError):
raise cls.DoesNotExist(f"Could not fetch {object_uri}")
if response.status_code in [404, 410]:
raise cls.DoesNotExist(f"No post at {object_uri}")
@ -1072,7 +1072,7 @@ class Post(StatorModel):
if data["actor"] != data["object"]["attributedTo"]:
raise ValueError("Create actor does not match its Post object", data)
# Create it, stator will fan it out locally
cls.by_ap(data["object"], create=True, update=True)
cls.by_ap(data["object"], create=True, update=True, fetch_author=True)
@classmethod
def handle_update_ap(cls, data):

View file

@ -295,6 +295,8 @@ class LDSignature:
Verifies a document
"""
try:
# causing side effects to the original document is bad form
document = document.copy()
# Strip out the signature from the incoming document
signature = document.pop("signature")
# Create the options document
@ -322,7 +324,7 @@ class LDSignature:
hashes.SHA256(),
)
except InvalidSignature:
raise VerificationError("Signature mismatch")
raise VerificationError("LDSignature mismatch")
@classmethod
def create_signature(

View file

@ -12,6 +12,7 @@ def test_sign_ld(keypair):
"""
# Create the signature
document = {
"@context": ["https://www.w3.org/ns/activitystreams"],
"id": "https://example.com/test-create",
"type": "Create",
"actor": "https://example.com/test-actor",
@ -38,6 +39,7 @@ def test_verifying_ld(keypair):
Tests verifying JSON-LD signatures from a known-good document
"""
document = {
"@context": ["https://www.w3.org/ns/activitystreams"],
"id": "https://example.com/test-create",
"type": "Create",
"actor": "https://example.com/test-actor",
@ -45,13 +47,15 @@ def test_verifying_ld(keypair):
"signature": {
"@context": "https://w3id.org/identity/v1",
"creator": "https://example.com/test-actor#test-key",
"created": "2022-11-12T21:41:47Z",
"signatureValue": "nTHfkHqG4hegfnjpHucXtXDLDaIKi2Duk+NeCzqTtkjf4NneXsofbZY2tGew4uAooEe1UeM23PIyjWYnR16KwcD4YY8nMj8L3xY2czwQPScMM9n+KhSHzkWfX+iI4FWKbjpPI8M53EtTRJU+1qEjjmGUx03Ip0vfvT5821etIgvY4wLNhg3y7R8fevnNux+BeytcEV6gM4awJJ6RK0xrWGLyTgDNon5V5aNUjwcV/UVPy9UAQi1KYWtA74/F0Y4oPzL5CTudPpyiViyVHZQaal4r+ExzgSvGztqKxQeT1ya6gLXxbm1YQ+8UiGVSS8zoGhMFDEZWVsRPv7e0jm5wfA==",
"created": "2023-10-25T08:08:47.702Z",
"signatureValue": "ajg4ukZzCtBWjflO1u6MlTc4tBVO6MsqzBr/L+kO5VI2ucutFaUdDa/Kx4W12ZCm9oYvTyMQMnoeELx5BifslRWEeMmo1wWMPXmg2/BMKgm8Spt+Zanq68uTlYGyKvuw1Q0FyNq84N2PbRZRXu2Yhlj2KnAVTRtKrsfEiCg3yNfVQ7lbUpDtlXvXLAq2yBN8H/BnZDoynjaDlafFW9Noq8025q1K/lz5jNzBEL22CSrKsD2qYWq1TK3s3h6SJ+j3J+5s0Ni3F/TH7W/5VeGBpzx4z6MSjmn7aHAS3JNCnAWDW9Rf6yKLg2y5htj6FpexiGcoEjO3VqjLoIP4f/115Q==",
"type": "RsaSignature2017",
},
}
# Ensure it verifies with correct data
LDSignature.verify_signature(document, keypair["public_key"])
# signature should remain in document if it was valid
assert "signature" in document
# Mutate it slightly and ensure it does not verify
with pytest.raises(VerificationError):
document["actor"] = "https://example.com/evil-actor"

View file

@ -199,6 +199,7 @@ def test_fetch_actor(httpx_mock, config_system):
identity.featured_collection_uri
== "https://example.com/test-actor/collections/featured/"
)
identity.fetch_pinned_post_uris(identity.featured_collection_uri)
assert identity.icon_uri == "https://example.com/icon.jpg"
assert identity.image_uri == "https://example.com/image.jpg"
assert identity.summary == "<p>A test user</p>"

View file

@ -34,6 +34,7 @@ from core.uris import (
from stator.exceptions import TryAgainLater
from stator.models import State, StateField, StateGraph, StatorModel
from users.models.domain import Domain
from users.models.inbox_message import InboxMessage
from users.models.system_actor import SystemActor
@ -743,7 +744,7 @@ class Identity(StatorModel):
except (httpx.HTTPError, ssl.SSLCertVerificationError) as ex:
response = getattr(ex, "response", None)
if isinstance(ex, httpx.TimeoutException) or (
response and response.status_code in [408, 504]
response and response.status_code in [408, 429, 504]
):
raise TryAgainLater() from ex
elif (
@ -800,7 +801,7 @@ class Identity(StatorModel):
except (httpx.HTTPError, ssl.SSLCertVerificationError) as ex:
response = getattr(ex, "response", None)
if isinstance(ex, httpx.TimeoutException) or (
response and response.status_code in [408, 504]
response and response.status_code in [408, 429, 504]
):
raise TryAgainLater() from ex
elif (
@ -847,7 +848,6 @@ class Identity(StatorModel):
webfinger if it's available.
"""
from activities.models import Emoji
from users.services import IdentityService
if self.local:
raise ValueError("Cannot fetch local identities")
@ -866,19 +866,14 @@ class Identity(StatorModel):
return False
status_code = response.status_code
if status_code >= 400:
if status_code in [408, 504]:
if status_code in [408, 429, 504]:
raise TryAgainLater()
if status_code == 410 and self.pk:
# Their account got deleted, so let's do the same.
Identity.objects.filter(pk=self.pk).delete()
if status_code < 500 and status_code not in [401, 403, 404, 406, 410]:
logging.info(
f"Client error fetching actor at {self.actor_uri}: {status_code}",
extra={
"identity": self.pk,
"domain": self.domain_id,
"content": response.content,
},
"Client error fetching actor: %d %s", status_code, self.actor_uri
)
return False
try:
@ -886,10 +881,9 @@ class Identity(StatorModel):
except ValueError:
# servers with empty or invalid responses are inevitable
logging.info(
f"Invalid response fetching actor at {self.actor_uri}",
"Invalid response fetching actor %s",
self.actor_uri,
extra={
"identity": self.pk,
"domain": self.domain_id,
"content": response.content,
},
)
@ -948,7 +942,16 @@ class Identity(StatorModel):
self.domain = Domain.get_remote_domain(webfinger_domain)
except TryAgainLater:
# continue with original domain when webfinger times out
logging.info("WebFinger timed out: %s", self.actor_uri)
pass
except ValueError as exc:
logging.info(
"Can't parse WebFinger: %s %s",
exc.args[0],
self.actor_uri,
exc_info=exc,
)
return False
# Emojis (we need the domain so we do them here)
for tag in get_list(document, "tag"):
if tag["type"].lower() in ["toot:emoji", "emoji"]:
@ -973,11 +976,14 @@ class Identity(StatorModel):
with transaction.atomic():
self.save()
# Fetch pinned posts after identity has been fetched and saved
# Fetch pinned posts in a followup task
if self.featured_collection_uri:
featured = self.fetch_pinned_post_uris(self.featured_collection_uri)
service = IdentityService(self)
service.sync_pins(featured)
InboxMessage.create_internal(
{
"type": "SyncPins",
"identity": self.pk,
}
)
return True

View file

@ -141,6 +141,10 @@ class InboxMessageStates(StateGraph):
IdentityService.handle_internal_add_follow(
instance.message["object"]
)
case "syncpins":
IdentityService.handle_internal_sync_pins(
instance.message["object"]
)
case unknown:
return cls.errored
case unknown:

View file

@ -1,3 +1,6 @@
import logging
from django.core.exceptions import MultipleObjectsReturned
from django.db import models, transaction
from django.template.defaultfilters import linebreaks_filter
@ -222,12 +225,14 @@ class IdentityService:
post=post,
state__in=PostInteractionStates.group_active(),
)
except MultipleObjectsReturned as exc:
logging.exception("%s on %s", exc, object_uri)
pass
except Post.DoesNotExist:
# ignore 404s...
pass
except TryAgainLater:
# when fetching a post -> author -> post we can
# get into a state. Ignore this round.
# don't wait for it now, it'll be synced on next refresh
pass
for removed in PostInteraction.objects.filter(
type=PostInteraction.Types.pin,
@ -319,3 +324,22 @@ class IdentityService:
raise ValueError(f"Cannot find identity to follow: {target_identity}")
# Follow!
self.follow(target_identity=target_identity, boosts=payload.get("boosts", True))
@classmethod
def handle_internal_sync_pins(cls, payload):
"""
Handles an inbox message saying we need to sync featured posts
Message format:
{
"type": "SyncPins",
"identity": "90310938129083",
}
"""
# Retrieve ourselves
actor = Identity.objects.get(pk=payload["identity"])
self = cls(actor)
# Get the remote end (may need a fetch)
if actor.featured_collection_uri:
featured = actor.fetch_pinned_post_uris(actor.featured_collection_uri)
self.sync_pins(featured)

View file

@ -1,5 +1,6 @@
import json
import logging
from urllib.parse import urldefrag, urlparse
from django.conf import settings
from django.http import Http404, HttpResponse, HttpResponseBadRequest, JsonResponse
@ -20,9 +21,9 @@ from core.signatures import (
VerificationFormatError,
)
from core.views import StaticContentView
from stator.exceptions import TryAgainLater
from takahe import __version__
from users.models import Identity, InboxMessage, SystemActor
from users.models.domain import Domain
from users.shortcuts import by_handle_or_404
@ -153,50 +154,91 @@ class Inbox(View):
# We don't have an Identity record for the user. No-op
return HttpResponse(status=202)
if not identity.public_key:
# See if we can fetch it right now
try:
identity.fetch_actor()
except TryAgainLater:
logging.warning(
f"Inbox error: timed out fetching actor {document['actor']}"
)
return HttpResponse(status=504)
if not identity.public_key:
logging.warning(f"Inbox error: cannot fetch actor {document['actor']}")
return HttpResponseBadRequest("Cannot retrieve actor")
# See if it's from a blocked user or domain
if identity.blocked or identity.domain.recursively_blocked():
# See if it's from a blocked user or domain - without calling
# fetch_actor, which would fetch data from potentially bad actor
domain = identity.domain
if not domain:
actor_url_parts = urlparse(document["actor"])
domain = Domain.get_remote_domain(actor_url_parts.hostname)
if identity.blocked or domain.recursively_blocked():
# I love to lie! Throw it away!
logging.warning(f"Inbox: Discarded message from {identity.actor_uri}")
logging.info(
"Inbox: Discarded message from blocked %s %s",
"domain" if domain.recursively_blocked() else "user",
identity.actor_uri,
)
return HttpResponse(status=202)
# If there's a "signature" payload, verify against that
if "signature" in document:
# authenticate HTTP signature first, if one is present and the actor
# is already known to us. An invalid signature is an error and message
# should be discarded. NOTE: for previously unknown actors, we
# don't have their public key yet!
if "signature" in request:
try:
LDSignature.verify_signature(document, identity.public_key)
if identity.public_key:
HttpSignature.verify_request(
request,
identity.public_key,
)
logging.debug(
"Inbox: %s from %s has good HTTP signature",
document["type"],
identity,
)
else:
logging.info(
"Inbox: New actor, no key available: %s",
document["actor"],
)
except VerificationFormatError as e:
logging.warning(f"Inbox error: Bad LD signature format: {e.args[0]}")
logging.warning("Inbox error: Bad HTTP signature format: %s", e.args[0])
return HttpResponseBadRequest(e.args[0])
except VerificationError:
logging.warning("Inbox error: Bad LD signature")
logging.warning("Inbox error: Bad HTTP signature from %s", identity)
return HttpResponseUnauthorized("Bad signature")
# Otherwise, verify against the header (assuming it's the same actor)
else:
# Mastodon advices not implementing LD Signatures, but
# they're widely deployed today. Validate it if one exists.
# https://docs.joinmastodon.org/spec/security/#ld
if "signature" in document:
try:
HttpSignature.verify_request(
request,
identity.public_key,
# signatures are identified by the signature block
creator = urldefrag(document["signature"]["creator"]).url
creator_identity = Identity.by_actor_uri(
creator, create=True, transient=True
)
if not creator_identity.public_key:
logging.info("Inbox: New actor, no key available: %s", creator)
# if we can't verify it, we don't keep it
document.pop("signature")
else:
LDSignature.verify_signature(document, creator_identity.public_key)
logging.debug(
"Inbox: %s from %s has good LD signature",
document["type"],
creator_identity,
)
except VerificationFormatError as e:
logging.warning(f"Inbox error: Bad HTTP signature format: {e.args[0]}")
logging.warning("Inbox error: Bad LD signature format: %s", e.args[0])
return HttpResponseBadRequest(e.args[0])
except VerificationError:
logging.warning("Inbox error: Bad HTTP signature")
return HttpResponseUnauthorized("Bad signature")
# An invalid LD Signature might also indicate nothing but
# a syntactical difference between implementations.
# Strip it out if we can't verify it.
if "signature" in document:
document.pop("signature")
logging.info(
"Inbox: Stripping invalid LD signature from %s %s",
creator_identity,
document["id"],
)
if not ("signature" in request or "signature" in document):
logging.debug(
"Inbox: %s from %s is unauthenticated. That's OK.",
document["type"],
identity,
)
# Don't allow injection of internal messages
if document["type"].startswith("__"):

View file

@ -44,7 +44,14 @@ class ViewIdentity(ListView):
)
# If it's remote, redirect to its profile page
if not self.identity.local:
return redirect(self.identity.profile_uri)
if self.identity.profile_uri:
return redirect(self.identity.profile_uri)
elif self.identity.actor_uri:
# gup.pe topic actors don't have profile URLs
return redirect(self.identity.actor_uri)
else:
return Http404("Unknown actor")
# If they're coming in looking for JSON, they want the actor
if request.ap_json:
# Return actor info