From 011c51b3c4228118f145dc86b1d8705b1ac99f55 Mon Sep 17 00:00:00 2001 From: Andrew Godwin Date: Fri, 30 Dec 2022 11:06:38 -0700 Subject: [PATCH] Implement progressive thread parent fetching --- activities/models/post.py | 60 +++++++++++++++++++++++++++-------- activities/services/post.py | 4 ++- core/ld.py | 14 ++++++++ users/models/inbox_message.py | 10 ++++++ users/views/activitypub.py | 4 +++ 5 files changed, 78 insertions(+), 14 deletions(-) diff --git a/activities/models/post.py b/activities/models/post.py index 273586d..6caa439 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -21,10 +21,17 @@ from activities.models.post_types import ( PostTypeDataEncoder, ) from core.html import ContentRenderer, strip_html -from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date +from core.ld import ( + canonicalise, + format_ld_date, + get_list, + get_value_or_map, + parse_ld_date, +) from stator.exceptions import TryAgainLater from stator.models import State, StateField, StateGraph, StatorModel from users.models.identity import Identity, IdentityStates +from users.models.inbox_message import InboxMessage from users.models.system_actor import SystemActor @@ -700,16 +707,7 @@ class Post(StatorModel): if post.type in (cls.Types.article, cls.Types.question): type_data = PostTypeData(__root__=data).__root__ post.type_data = type_data.dict() - # Get content in order of: content value, contentmap.und, any contentmap entry - if "content" in data: - post.content = data["content"] - elif "contentMap" in data: - if "und" in data["contentMap"]: - post.content = data["contentMap"]["und"] - else: - post.content = list(data["contentMap"].values())[0] - else: - raise ValueError("Post has no content or content map") + post.content = get_value_or_map(data, "content", "contentMap") post.summary = data.get("summary") post.sensitive = data.get("sensitive", False) post.url = data.get("url", data["id"]) @@ -723,7 +721,9 @@ class Post(StatorModel): mention_identity = Identity.by_actor_uri(tag["href"], create=True) post.mentions.add(mention_identity) elif tag["type"].lower() in ["_:hashtag", "hashtag"]: - post.hashtags.append(tag["name"].lower().lstrip("#")) + post.hashtags.append( + get_value_or_map(tag, "name", "nameMap").lower().lstrip("#") + ) elif tag["type"].lower() in ["toot:emoji", "emoji"]: emoji = Emoji.by_ap_tag(post.author.domain, tag, create=True) post.emojis.add(emoji) @@ -758,10 +758,13 @@ class Post(StatorModel): focal_y=focal_y, ) post.save() + # Potentially schedule a fetch of the reply parent + if post.in_reply_to: + cls.ensure_object_uri(post.in_reply_to) return post @classmethod - def by_object_uri(cls, object_uri, fetch=False): + def by_object_uri(cls, object_uri, fetch=False) -> "Post": """ Gets the post by URI - either looking up locally, or fetching from the other end if it's not here. @@ -798,6 +801,27 @@ class Post(StatorModel): else: raise cls.DoesNotExist(f"Cannot find Post with URI {object_uri}") + @classmethod + def ensure_object_uri(cls, object_uri: str): + """ + Sees if the post is in our local set, and if not, schedules a fetch + for it (in the background) + """ + if not object_uri: + raise ValueError("No URI provided!") + try: + cls.by_object_uri(object_uri) + except cls.DoesNotExist: + InboxMessage.objects.create( + message={ + "type": "__internal__", + "object": { + "type": "FetchPost", + "object": object_uri, + }, + } + ) + @classmethod def handle_create_ap(cls, data): """ @@ -848,6 +872,16 @@ class Post(StatorModel): raise ValueError("Actor on delete does not match object") post.delete() + @classmethod + def handle_fetch_internal(cls, data): + """ + Handles an internal fetch-request inbox message + """ + try: + cls.by_object_uri(data["object"]["object"], fetch=True) + except cls.DoesNotExist: + pass + ### OpenGraph API ### def to_opengraph_dict(self) -> dict: diff --git a/activities/services/post.py b/activities/services/post.py index 59f73fe..46e8f6d 100644 --- a/activities/services/post.py +++ b/activities/services/post.py @@ -103,8 +103,10 @@ class PostService: ancestors: list[Post] = [] ancestor = self.post while ancestor.in_reply_to and len(ancestors) < num_ancestors: - ancestor = self.queryset().filter(object_uri=ancestor.in_reply_to).first() + object_uri = ancestor.in_reply_to + ancestor = self.queryset().filter(object_uri=object_uri).first() if ancestor is None: + Post.ensure_object_uri(object_uri) break if ancestor.state in [PostStates.deleted, PostStates.deleted_fanned_out]: break diff --git a/core/ld.py b/core/ld.py index 3c37253..0700e08 100644 --- a/core/ld.py +++ b/core/ld.py @@ -523,6 +523,20 @@ def get_first_image_url(data) -> str | None: return None +def get_value_or_map(data, key, map_key): + """ + Retrieves a value that can either be a top level key (like "name") or + an entry in a map (like nameMap). + """ + if key in data: + return data[key] + if map_key in data: + if "und" in map_key: + return data[map_key]["und"] + return list(data[map_key].values())[0] + raise KeyError(f"Cannot find {key} or {map_key}") + + def media_type_from_filename(filename): _, extension = os.path.splitext(filename) if extension == ".png": diff --git a/users/models/inbox_message.py b/users/models/inbox_message.py index 962c3d2..88a9e65 100644 --- a/users/models/inbox_message.py +++ b/users/models/inbox_message.py @@ -137,6 +137,16 @@ class InboxMessageStates(StateGraph): case "flag": # Received reports await sync_to_async(Report.handle_ap)(instance.message) + case "__internal__": + match instance.message_object_type: + case "fetchpost": + await sync_to_async(Post.handle_fetch_internal)( + instance.message + ) + case unknown: + raise ValueError( + f"Cannot handle activity of type __internal__.{unknown}" + ) case unknown: raise ValueError(f"Cannot handle activity of type {unknown}") return cls.processed diff --git a/users/views/activitypub.py b/users/views/activitypub.py index 774901b..1842f67 100644 --- a/users/views/activitypub.py +++ b/users/views/activitypub.py @@ -187,6 +187,10 @@ class Inbox(View): exceptions.capture_message("Inbox error: Bad HTTP signature") return HttpResponseUnauthorized("Bad signature") + # Don't allow injection of internal messages + if document["type"].startswith("__"): + return HttpResponseUnauthorized("Bad type") + # Hand off the item to be processed by the queue InboxMessage.objects.create(message=document) return HttpResponse(status=202)