Implement progressive thread parent fetching

This commit is contained in:
Andrew Godwin 2022-12-30 11:06:38 -07:00
parent a93f03d639
commit 011c51b3c4
5 changed files with 78 additions and 14 deletions

View file

@ -21,10 +21,17 @@ from activities.models.post_types import (
PostTypeDataEncoder,
)
from core.html import ContentRenderer, strip_html
from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date
from core.ld import (
canonicalise,
format_ld_date,
get_list,
get_value_or_map,
parse_ld_date,
)
from stator.exceptions import TryAgainLater
from stator.models import State, StateField, StateGraph, StatorModel
from users.models.identity import Identity, IdentityStates
from users.models.inbox_message import InboxMessage
from users.models.system_actor import SystemActor
@ -700,16 +707,7 @@ class Post(StatorModel):
if post.type in (cls.Types.article, cls.Types.question):
type_data = PostTypeData(__root__=data).__root__
post.type_data = type_data.dict()
# Get content in order of: content value, contentmap.und, any contentmap entry
if "content" in data:
post.content = data["content"]
elif "contentMap" in data:
if "und" in data["contentMap"]:
post.content = data["contentMap"]["und"]
else:
post.content = list(data["contentMap"].values())[0]
else:
raise ValueError("Post has no content or content map")
post.content = get_value_or_map(data, "content", "contentMap")
post.summary = data.get("summary")
post.sensitive = data.get("sensitive", False)
post.url = data.get("url", data["id"])
@ -723,7 +721,9 @@ class Post(StatorModel):
mention_identity = Identity.by_actor_uri(tag["href"], create=True)
post.mentions.add(mention_identity)
elif tag["type"].lower() in ["_:hashtag", "hashtag"]:
post.hashtags.append(tag["name"].lower().lstrip("#"))
post.hashtags.append(
get_value_or_map(tag, "name", "nameMap").lower().lstrip("#")
)
elif tag["type"].lower() in ["toot:emoji", "emoji"]:
emoji = Emoji.by_ap_tag(post.author.domain, tag, create=True)
post.emojis.add(emoji)
@ -758,10 +758,13 @@ class Post(StatorModel):
focal_y=focal_y,
)
post.save()
# Potentially schedule a fetch of the reply parent
if post.in_reply_to:
cls.ensure_object_uri(post.in_reply_to)
return post
@classmethod
def by_object_uri(cls, object_uri, fetch=False):
def by_object_uri(cls, object_uri, fetch=False) -> "Post":
"""
Gets the post by URI - either looking up locally, or fetching
from the other end if it's not here.
@ -798,6 +801,27 @@ class Post(StatorModel):
else:
raise cls.DoesNotExist(f"Cannot find Post with URI {object_uri}")
@classmethod
def ensure_object_uri(cls, object_uri: str):
"""
Sees if the post is in our local set, and if not, schedules a fetch
for it (in the background)
"""
if not object_uri:
raise ValueError("No URI provided!")
try:
cls.by_object_uri(object_uri)
except cls.DoesNotExist:
InboxMessage.objects.create(
message={
"type": "__internal__",
"object": {
"type": "FetchPost",
"object": object_uri,
},
}
)
@classmethod
def handle_create_ap(cls, data):
"""
@ -848,6 +872,16 @@ class Post(StatorModel):
raise ValueError("Actor on delete does not match object")
post.delete()
@classmethod
def handle_fetch_internal(cls, data):
"""
Handles an internal fetch-request inbox message
"""
try:
cls.by_object_uri(data["object"]["object"], fetch=True)
except cls.DoesNotExist:
pass
### OpenGraph API ###
def to_opengraph_dict(self) -> dict:

View file

@ -103,8 +103,10 @@ class PostService:
ancestors: list[Post] = []
ancestor = self.post
while ancestor.in_reply_to and len(ancestors) < num_ancestors:
ancestor = self.queryset().filter(object_uri=ancestor.in_reply_to).first()
object_uri = ancestor.in_reply_to
ancestor = self.queryset().filter(object_uri=object_uri).first()
if ancestor is None:
Post.ensure_object_uri(object_uri)
break
if ancestor.state in [PostStates.deleted, PostStates.deleted_fanned_out]:
break

View file

@ -523,6 +523,20 @@ def get_first_image_url(data) -> str | None:
return None
def get_value_or_map(data, key, map_key):
"""
Retrieves a value that can either be a top level key (like "name") or
an entry in a map (like nameMap).
"""
if key in data:
return data[key]
if map_key in data:
if "und" in map_key:
return data[map_key]["und"]
return list(data[map_key].values())[0]
raise KeyError(f"Cannot find {key} or {map_key}")
def media_type_from_filename(filename):
_, extension = os.path.splitext(filename)
if extension == ".png":

View file

@ -137,6 +137,16 @@ class InboxMessageStates(StateGraph):
case "flag":
# Received reports
await sync_to_async(Report.handle_ap)(instance.message)
case "__internal__":
match instance.message_object_type:
case "fetchpost":
await sync_to_async(Post.handle_fetch_internal)(
instance.message
)
case unknown:
raise ValueError(
f"Cannot handle activity of type __internal__.{unknown}"
)
case unknown:
raise ValueError(f"Cannot handle activity of type {unknown}")
return cls.processed

View file

@ -187,6 +187,10 @@ class Inbox(View):
exceptions.capture_message("Inbox error: Bad HTTP signature")
return HttpResponseUnauthorized("Bad signature")
# Don't allow injection of internal messages
if document["type"].startswith("__"):
return HttpResponseUnauthorized("Bad type")
# Hand off the item to be processed by the queue
InboxMessage.objects.create(message=document)
return HttpResponse(status=202)