takahe/activities/models/post.py

1088 lines
39 KiB
Python
Raw Normal View History

import datetime
import hashlib
import json
import mimetypes
2022-11-22 02:59:27 +00:00
import re
import ssl
from collections.abc import Iterable
from typing import Optional
from urllib.parse import urlparse
2022-11-12 06:04:43 +00:00
2022-11-14 01:42:47 +00:00
import httpx
2022-11-12 05:02:43 +00:00
import urlman
2022-12-05 04:13:33 +00:00
from asgiref.sync import async_to_sync, sync_to_async
2022-11-29 04:47:31 +00:00
from django.contrib.postgres.indexes import GinIndex
from django.db import models, transaction
from django.template import loader
from django.template.defaultfilters import linebreaks_filter
from django.utils import timezone
2022-11-12 05:02:43 +00:00
2022-12-15 07:50:54 +00:00
from activities.models.emoji import Emoji
2022-11-12 06:04:43 +00:00
from activities.models.fan_out import FanOut
from activities.models.hashtag import Hashtag, HashtagStates
from activities.models.post_types import (
PostTypeData,
PostTypeDataDecoder,
PostTypeDataEncoder,
)
from core.exceptions import capture_message
from core.html import ContentRenderer, strip_html
from core.ld import (
canonicalise,
format_ld_date,
get_list,
get_value_or_map,
parse_ld_date,
)
from core.snowflake import Snowflake
from stator.exceptions import TryAgainLater
2022-11-12 05:02:43 +00:00
from stator.models import State, StateField, StateGraph, StatorModel
2023-01-08 17:36:18 +00:00
from users.models.follow import FollowStates
2022-12-05 04:22:20 +00:00
from users.models.identity import Identity, IdentityStates
from users.models.inbox_message import InboxMessage
2022-12-05 04:13:33 +00:00
from users.models.system_actor import SystemActor
2022-11-12 05:02:43 +00:00
class PostStates(StateGraph):
new = State(try_interval=300)
fanned_out = State(externally_progressed=True)
deleted = State(try_interval=300)
deleted_fanned_out = State(delete_after=24 * 60 * 60)
2022-11-12 05:02:43 +00:00
2022-11-27 18:09:46 +00:00
edited = State(try_interval=300)
edited_fanned_out = State(externally_progressed=True)
2022-11-12 05:02:43 +00:00
new.transitions_to(fanned_out)
fanned_out.transitions_to(deleted)
2022-11-27 18:09:46 +00:00
fanned_out.transitions_to(edited)
deleted.transitions_to(deleted_fanned_out)
2022-11-27 18:09:46 +00:00
edited.transitions_to(edited_fanned_out)
edited_fanned_out.transitions_to(edited)
edited_fanned_out.transitions_to(deleted)
2022-11-12 05:02:43 +00:00
@classmethod
2022-11-29 04:41:36 +00:00
async def targets_fan_out(cls, post: "Post", type_: str) -> None:
# Fan out to each target
for follow in await post.aget_targets():
2022-11-16 13:53:39 +00:00
await FanOut.objects.acreate(
identity=follow,
2022-11-29 04:41:36 +00:00
type=type_,
2022-11-16 13:53:39 +00:00
subject_post=post,
)
2022-11-29 04:41:36 +00:00
@classmethod
async def handle_new(cls, instance: "Post"):
"""
Creates all needed fan-out objects for a new Post.
"""
post = await instance.afetch_full()
# Only fan out if the post was published in the last day or it's local
# (we don't want to fan out anything older that that which is remote)
if post.local or (timezone.now() - post.published) < datetime.timedelta(days=1):
await cls.targets_fan_out(post, FanOut.Types.post)
2022-11-29 04:41:36 +00:00
await post.ensure_hashtags()
return cls.fanned_out
@classmethod
async def handle_deleted(cls, instance: "Post"):
"""
Creates all needed fan-out objects needed to delete a Post.
"""
post = await instance.afetch_full()
2022-11-29 04:41:36 +00:00
await cls.targets_fan_out(post, FanOut.Types.post_deleted)
return cls.deleted_fanned_out
2022-11-12 05:02:43 +00:00
2022-11-27 18:09:46 +00:00
@classmethod
async def handle_edited(cls, instance: "Post"):
"""
Creates all needed fan-out objects for an edited Post.
"""
post = await instance.afetch_full()
2022-11-29 04:41:36 +00:00
await cls.targets_fan_out(post, FanOut.Types.post_edited)
await post.ensure_hashtags()
2022-11-27 18:09:46 +00:00
return cls.edited_fanned_out
2022-11-12 05:02:43 +00:00
2022-11-29 04:41:36 +00:00
class PostQuerySet(models.QuerySet):
2022-12-11 19:37:28 +00:00
def not_hidden(self):
query = self.exclude(
state__in=[PostStates.deleted, PostStates.deleted_fanned_out]
)
return query
2022-11-29 05:14:52 +00:00
def public(self, include_replies: bool = False):
query = self.filter(
visibility__in=[
Post.Visibilities.public,
Post.Visibilities.local_only,
],
)
if not include_replies:
return query.filter(in_reply_to__isnull=True)
return query
2022-11-29 04:41:36 +00:00
def local_public(self, include_replies: bool = False):
query = self.filter(
visibility__in=[
Post.Visibilities.public,
Post.Visibilities.local_only,
],
local=True,
2022-11-29 04:41:36 +00:00
)
if not include_replies:
return query.filter(in_reply_to__isnull=True)
return query
2022-12-11 19:37:28 +00:00
def unlisted(self, include_replies: bool = False):
query = self.filter(
visibility__in=[
Post.Visibilities.public,
Post.Visibilities.local_only,
Post.Visibilities.unlisted,
],
)
if not include_replies:
return query.filter(in_reply_to__isnull=True)
return query
def visible_to(self, identity: Identity | None, include_replies: bool = False):
if identity is None:
return self.unlisted(include_replies=include_replies)
2022-12-20 09:59:06 +00:00
query = self.filter(
models.Q(
visibility__in=[
Post.Visibilities.public,
Post.Visibilities.local_only,
Post.Visibilities.unlisted,
]
)
| models.Q(
visibility=Post.Visibilities.followers,
author__inbound_follows__source=identity,
)
| models.Q(
mentions=identity,
)
| models.Q(author=identity)
2022-12-20 09:59:06 +00:00
).distinct()
if not include_replies:
return query.filter(in_reply_to__isnull=True)
return query
2022-11-29 04:41:36 +00:00
def tagged_with(self, hashtag: str | Hashtag):
if isinstance(hashtag, str):
tag_q = models.Q(hashtags__contains=hashtag)
else:
tag_q = models.Q(hashtags__contains=hashtag.hashtag)
if hashtag.aliases:
for alias in hashtag.aliases:
tag_q |= models.Q(hashtags__contains=alias)
return self.filter(tag_q)
class PostManager(models.Manager):
def get_queryset(self):
return PostQuerySet(self.model, using=self._db)
2022-12-11 19:37:28 +00:00
def not_hidden(self):
return self.get_queryset().not_hidden()
2022-11-29 05:14:52 +00:00
def public(self, include_replies: bool = False):
return self.get_queryset().public(include_replies=include_replies)
2022-11-29 04:41:36 +00:00
def local_public(self, include_replies: bool = False):
return self.get_queryset().local_public(include_replies=include_replies)
2022-12-11 19:37:28 +00:00
def unlisted(self, include_replies: bool = False):
return self.get_queryset().unlisted(include_replies=include_replies)
2022-11-29 04:41:36 +00:00
def tagged_with(self, hashtag: str | Hashtag):
return self.get_queryset().tagged_with(hashtag=hashtag)
2022-11-12 05:02:43 +00:00
class Post(StatorModel):
"""
A post (status, toot) that is either local or remote.
"""
class Visibilities(models.IntegerChoices):
public = 0
2022-11-26 17:09:31 +00:00
local_only = 4
2022-11-12 05:02:43 +00:00
unlisted = 1
followers = 2
mentioned = 3
class Types(models.TextChoices):
article = "Article"
audio = "Audio"
event = "Event"
image = "Image"
note = "Note"
page = "Page"
question = "Question"
video = "Video"
id = models.BigIntegerField(primary_key=True, default=Snowflake.generate_post)
2022-11-12 05:02:43 +00:00
# The author (attributedTo) of the post
author = models.ForeignKey(
"users.Identity",
2022-11-20 19:59:06 +00:00
on_delete=models.CASCADE,
2022-11-12 05:02:43 +00:00
related_name="posts",
)
# The state the post is in
state = StateField(PostStates)
# If it is our post or not
local = models.BooleanField()
# The canonical object ID
object_uri = models.CharField(max_length=2048, blank=True, null=True, unique=True)
2022-11-12 05:02:43 +00:00
# Who should be able to see this Post
visibility = models.IntegerField(
choices=Visibilities.choices,
default=Visibilities.public,
)
# The main (HTML) content
content = models.TextField()
type = models.CharField(
max_length=20,
choices=Types.choices,
default=Types.note,
)
type_data = models.JSONField(
blank=True, null=True, encoder=PostTypeDataEncoder, decoder=PostTypeDataDecoder
)
2022-11-12 05:02:43 +00:00
# If the contents of the post are sensitive, and the summary (content
# warning) to show if it is
sensitive = models.BooleanField(default=False)
summary = models.TextField(blank=True, null=True)
# The public, web URL of this Post on the original server
url = models.CharField(max_length=2048, blank=True, null=True)
2022-11-12 05:02:43 +00:00
# The Post it is replying to as an AP ID URI
# (as otherwise we'd have to pull entire threads to use IDs)
in_reply_to = models.CharField(max_length=500, blank=True, null=True)
# The identities the post is directly to (who can see it if not public)
to = models.ManyToManyField(
"users.Identity",
related_name="posts_to",
blank=True,
)
# The identities mentioned in the post
mentions = models.ManyToManyField(
"users.Identity",
related_name="posts_mentioning",
blank=True,
)
2022-11-16 13:53:39 +00:00
# Hashtags in the post
hashtags = models.JSONField(blank=True, null=True)
2022-11-16 13:53:39 +00:00
2022-12-15 07:50:54 +00:00
emojis = models.ManyToManyField(
"activities.Emoji",
related_name="posts_using_emoji",
blank=True,
)
# Like/Boost/etc counts
stats = models.JSONField(blank=True, null=True)
2022-11-12 06:04:43 +00:00
# When the post was originally created (as opposed to when we received it)
2022-11-14 01:42:47 +00:00
published = models.DateTimeField(default=timezone.now)
2022-11-12 06:04:43 +00:00
2022-11-17 05:23:32 +00:00
# If the post has been edited after initial publication
edited = models.DateTimeField(blank=True, null=True)
2022-11-12 05:02:43 +00:00
created = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(auto_now=True)
2022-11-29 04:41:36 +00:00
objects = PostManager()
2022-11-29 04:47:31 +00:00
class Meta:
indexes = [
GinIndex(fields=["hashtags"], name="hashtags_gin"),
models.Index(
fields=["visibility", "local", "published"],
name="ix_post_local_public_published",
),
models.Index(
fields=["visibility", "local", "created"],
name="ix_post_local_public_created",
),
2022-11-29 04:47:31 +00:00
]
2022-11-12 05:02:43 +00:00
class urls(urlman.Urls):
2022-11-12 06:04:43 +00:00
view = "{self.author.urls.view}posts/{self.id}/"
object_uri = "{self.author.actor_uri}posts/{self.id}/"
action_like = "{view}like/"
action_unlike = "{view}unlike/"
action_boost = "{view}boost/"
action_unboost = "{view}unboost/"
2022-11-25 05:31:45 +00:00
action_delete = "{view}delete/"
2022-11-27 18:09:46 +00:00
action_edit = "{view}edit/"
2022-12-17 21:45:31 +00:00
action_report = "{view}report/"
2022-11-24 22:17:32 +00:00
action_reply = "/compose/?reply_to={self.id}"
admin_edit = "/djadmin/activities/post/{self.id}/change/"
2022-11-12 06:04:43 +00:00
def get_scheme(self, url):
return "https"
def get_hostname(self, url):
return self.instance.author.domain.uri_domain
2022-11-12 05:02:43 +00:00
def __str__(self):
return f"{self.author} #{self.id}"
2022-11-17 06:00:10 +00:00
def get_absolute_url(self):
return self.urls.view
def absolute_object_uri(self):
"""
Returns an object URI that is always absolute, for sending out to
other servers.
"""
if self.local:
return self.author.absolute_profile_uri() + f"posts/{self.id}/"
else:
return self.object_uri
2022-11-24 22:17:32 +00:00
def in_reply_to_post(self) -> Optional["Post"]:
"""
Returns the actual Post object we're replying to, if we can find it
"""
2022-12-11 19:37:28 +00:00
if self.in_reply_to is None:
return None
2022-11-24 22:17:32 +00:00
return (
Post.objects.filter(object_uri=self.in_reply_to)
.select_related("author")
.first()
)
ain_reply_to_post = sync_to_async(in_reply_to_post)
2022-11-22 02:59:27 +00:00
### Content cleanup and extraction ###
def clean_type_data(self, value):
PostTypeData.parse_obj(value)
2022-11-22 02:59:27 +00:00
mention_regex = re.compile(
r"(^|[^\w\d\-_/])@([\w\d\-_]+(?:@[\w\d\-_\.]+[\w\d\-_]+)?)"
2022-11-22 02:59:27 +00:00
)
def _safe_content_note(self, *, local: bool = True):
return ContentRenderer(local=local).render_post(self.content, self)
# def _safe_content_question(self, *, local: bool = True):
# context = {
# "post": self,
# "typed_data": PostTypeData(self.type_data),
# }
# return loader.render_to_string("activities/_type_question.html", context)
def _safe_content_typed(self, *, local: bool = True):
context = {
"post": self,
"sanitized_content": self._safe_content_note(local=local),
"local_display": local,
}
return loader.render_to_string(
(
f"activities/_type_{self.type.lower()}.html",
"activities/_type_unknown.html",
),
context,
)
def safe_content(self, *, local: bool = True):
func = getattr(
self, f"_safe_content_{self.type.lower()}", self._safe_content_typed
)
if callable(func):
return func(local=local)
return self._safe_content_note(local=local) # fallback
def safe_content_local(self):
"""
Returns the content formatted for local display
"""
return self.safe_content(local=True)
def safe_content_remote(self):
"""
Returns the content formatted for remote consumption
"""
return self.safe_content(local=False)
2022-11-12 05:02:43 +00:00
def summary_class(self) -> str:
"""
Returns a CSS class name to identify this summary value
"""
if not self.summary:
return ""
return "summary-" + hashlib.md5(self.summary.encode("utf8")).hexdigest()
@property
def stats_with_defaults(self):
"""
Returns the stats dict with counts of likes/etc. in it
"""
return {
"likes": self.stats.get("likes", 0) if self.stats else 0,
"boosts": self.stats.get("boosts", 0) if self.stats else 0,
"replies": self.stats.get("replies", 0) if self.stats else 0,
}
2022-11-12 06:04:43 +00:00
### Async helpers ###
2022-11-29 04:41:36 +00:00
async def afetch_full(self) -> "Post":
2022-11-12 06:04:43 +00:00
"""
Returns a version of the object with all relations pre-loaded
"""
return (
await Post.objects.select_related("author", "author__domain")
2022-12-15 18:56:48 +00:00
.prefetch_related("mentions", "mentions__domain", "attachments", "emojis")
.aget(pk=self.pk)
2022-11-12 06:04:43 +00:00
)
2022-11-27 19:09:08 +00:00
### Local creation/editing ###
2022-11-12 05:02:43 +00:00
@classmethod
2022-11-13 23:14:38 +00:00
def create_local(
2022-11-16 13:53:39 +00:00
cls,
author: Identity,
content: str,
summary: str | None = None,
2022-12-11 19:37:28 +00:00
sensitive: bool = False,
2022-11-16 13:53:39 +00:00
visibility: int = Visibilities.public,
2022-11-24 22:17:32 +00:00
reply_to: Optional["Post"] = None,
attachments: list | None = None,
2022-11-13 23:14:38 +00:00
) -> "Post":
with transaction.atomic():
# Find mentions in this post
2022-11-27 19:09:08 +00:00
mentions = cls.mentions_from_content(content, author)
2022-11-24 22:17:32 +00:00
if reply_to:
mentions.add(reply_to.author)
2022-11-26 17:09:31 +00:00
# Maintain local-only for replies
if reply_to.visibility == reply_to.Visibilities.local_only:
visibility = reply_to.Visibilities.local_only
2022-11-29 04:41:36 +00:00
# Find hashtags in this post
hashtags = Hashtag.hashtags_from_content(content) or None
2022-12-15 07:50:54 +00:00
# Find emoji in this post
2022-12-15 18:56:48 +00:00
emojis = Emoji.emojis_from_content(content, None)
# Strip all HTML and apply linebreaks filter
content = linebreaks_filter(strip_html(content))
# Make the Post object
post = cls.objects.create(
author=author,
content=content,
summary=summary or None,
2022-12-11 19:37:28 +00:00
sensitive=bool(summary) or sensitive,
local=True,
2022-11-16 13:53:39 +00:00
visibility=visibility,
2022-11-29 04:41:36 +00:00
hashtags=hashtags,
2022-11-24 22:17:32 +00:00
in_reply_to=reply_to.object_uri if reply_to else None,
)
post.object_uri = post.urls.object_uri
post.url = post.absolute_object_uri()
post.mentions.set(mentions)
2022-12-15 07:50:54 +00:00
post.emojis.set(emojis)
2022-12-02 01:46:49 +00:00
if attachments:
post.attachments.set(attachments)
post.save()
# Recalculate parent stats for replies
if reply_to:
reply_to.calculate_stats()
2022-11-12 05:02:43 +00:00
return post
2022-11-27 19:09:08 +00:00
def edit_local(
self,
content: str,
summary: str | None = None,
2022-11-27 19:09:08 +00:00
visibility: int = Visibilities.public,
attachments: list | None = None,
2022-11-27 19:09:08 +00:00
):
with transaction.atomic():
# Strip all HTML and apply linebreaks filter
self.content = linebreaks_filter(strip_html(content))
self.summary = summary or None
self.sensitive = bool(summary)
self.visibility = visibility
self.edited = timezone.now()
2022-11-29 04:41:36 +00:00
self.hashtags = Hashtag.hashtags_from_content(content) or None
2022-11-27 19:09:08 +00:00
self.mentions.set(self.mentions_from_content(content, self.author))
2022-12-15 18:56:48 +00:00
self.emojis.set(Emoji.emojis_from_content(content, None))
2022-12-02 01:46:49 +00:00
self.attachments.set(attachments or [])
2022-11-27 19:09:08 +00:00
self.save()
@classmethod
def mentions_from_content(cls, content, author) -> set[Identity]:
2022-11-27 19:09:08 +00:00
mention_hits = cls.mention_regex.findall(content)
mentions = set()
for precursor, handle in mention_hits:
2022-12-05 03:22:24 +00:00
handle = handle.lower()
2022-11-27 19:09:08 +00:00
if "@" in handle:
username, domain = handle.split("@", 1)
else:
username = handle
domain = author.domain_id
identity = Identity.by_username_and_domain(
username=username,
domain=domain,
fetch=True,
)
if identity is not None:
mentions.add(identity)
return mentions
2022-11-29 04:41:36 +00:00
async def ensure_hashtags(self) -> None:
"""
Ensure any of the already parsed hashtags from this Post
have a corresponding Hashtag record.
"""
# Ensure hashtags
if self.hashtags:
for hashtag in self.hashtags:
tag, _ = await Hashtag.objects.aget_or_create(
2022-11-29 04:41:36 +00:00
hashtag=hashtag,
)
await tag.atransition_perform(HashtagStates.outdated)
2022-11-29 04:41:36 +00:00
def calculate_stats(self, save=True):
"""
Recalculates our stats dict
"""
from activities.models import PostInteraction, PostInteractionStates
self.stats = {
"likes": self.interactions.filter(
type=PostInteraction.Types.like,
state__in=PostInteractionStates.group_active(),
).count(),
"boosts": self.interactions.filter(
type=PostInteraction.Types.boost,
state__in=PostInteractionStates.group_active(),
).count(),
"replies": Post.objects.filter(in_reply_to=self.object_uri).count(),
}
if save:
self.save()
2022-11-12 06:04:43 +00:00
### ActivityPub (outbound) ###
2022-11-12 05:02:43 +00:00
def to_ap(self) -> dict:
2022-11-12 06:04:43 +00:00
"""
Returns the AP JSON for this object
"""
value = {
2022-12-20 14:20:11 +00:00
"to": [],
"cc": [],
"type": self.type,
2022-11-12 06:04:43 +00:00
"id": self.object_uri,
2022-11-14 01:42:47 +00:00
"published": format_ld_date(self.published),
2022-11-12 06:04:43 +00:00
"attributedTo": self.author.actor_uri,
"content": self.safe_content_remote(),
2022-12-16 01:58:11 +00:00
"sensitive": self.sensitive,
"url": self.absolute_object_uri(),
"tag": [],
2022-12-02 01:46:49 +00:00
"attachment": [],
2022-11-12 06:04:43 +00:00
}
if self.type == Post.Types.question and self.type_data:
value[self.type_data.mode] = [
{
"name": option.name,
"type": option.type,
"replies": {"type": "Collection", "totalItems": option.votes},
}
for option in self.type_data.options
]
value["toot:votersCount"] = self.type_data.voter_count
if self.type_data.end_time:
value["endTime"] = format_ld_date(self.type_data.end_time)
2022-11-12 06:04:43 +00:00
if self.summary:
value["summary"] = self.summary
2022-11-24 22:17:32 +00:00
if self.in_reply_to:
value["inReplyTo"] = self.in_reply_to
2022-11-27 18:09:46 +00:00
if self.edited:
value["updated"] = format_ld_date(self.edited)
2022-12-20 14:20:11 +00:00
# Targeting
# TODO: Add followers object
if self.visibility == self.Visibilities.public:
value["to"].append("Public")
elif self.visibility == self.Visibilities.unlisted:
value["cc"].append("Public")
# Mentions
for mention in self.mentions.all():
2022-12-15 07:50:54 +00:00
value["tag"].append(mention.to_ap_tag())
value["cc"].append(mention.actor_uri)
2022-12-16 00:00:00 +00:00
# Hashtags
for hashtag in self.hashtags or []:
value["tag"].append(
{
"href": f"https://{self.author.domain.uri_domain}/tags/{hashtag}/",
"name": f"#{hashtag}",
2022-12-16 01:58:11 +00:00
"type": "Hashtag",
2022-12-16 00:00:00 +00:00
}
)
2022-12-15 07:50:54 +00:00
# Emoji
for emoji in self.emojis.all():
value["tag"].append(emoji.to_ap_tag())
2022-12-02 01:46:49 +00:00
# Attachments
for attachment in self.attachments.all():
value["attachment"].append(attachment.to_ap())
# Remove fields if they're empty
2022-12-20 14:20:11 +00:00
for field in ["to", "cc", "tag", "attachment"]:
2022-12-02 01:46:49 +00:00
if not value[field]:
del value[field]
2022-11-12 06:04:43 +00:00
return value
def to_create_ap(self):
"""
Returns the AP JSON to create this object
"""
object = self.to_ap()
2022-11-12 06:04:43 +00:00
return {
2022-12-20 14:20:11 +00:00
"to": object.get("to", []),
"cc": object.get("cc", []),
2022-11-12 06:04:43 +00:00
"type": "Create",
"id": self.object_uri + "#create",
"actor": self.author.actor_uri,
"object": object,
2022-11-12 06:04:43 +00:00
}
2022-11-27 18:09:46 +00:00
def to_update_ap(self):
"""
Returns the AP JSON to update this object
"""
object = self.to_ap()
return {
2022-12-20 14:20:11 +00:00
"to": object.get("to", []),
2022-11-27 18:09:46 +00:00
"cc": object.get("cc", []),
"type": "Update",
"id": self.object_uri + "#update",
"actor": self.author.actor_uri,
"object": object,
}
def to_delete_ap(self):
"""
Returns the AP JSON to create this object
"""
object = self.to_ap()
return {
2022-12-20 14:20:11 +00:00
"to": object.get("to", []),
"cc": object.get("cc", []),
"type": "Delete",
"id": self.object_uri + "#delete",
"actor": self.author.actor_uri,
"object": object,
}
async def aget_targets(self) -> Iterable[Identity]:
"""
Returns a list of Identities that need to see posts and their changes
"""
targets = set()
async for mention in self.mentions.all():
targets.add(mention)
# Then, if it's not mentions only, also deliver to followers
if self.visibility != Post.Visibilities.mentioned:
2023-01-08 17:36:18 +00:00
async for follower in self.author.inbound_follows.filter(
state__in=FollowStates.group_active()
).select_related("source"):
targets.add(follower.source)
# If it's a reply, always include the original author if we know them
reply_post = await self.ain_reply_to_post()
if reply_post:
targets.add(reply_post.author)
# And if it's a reply to one of our own, we have to re-fan-out to
# the original author's followers
if reply_post.author.local:
2023-01-08 17:36:18 +00:00
async for follower in reply_post.author.inbound_follows.filter(
state__in=FollowStates.group_active()
).select_related("source"):
targets.add(follower.source)
2022-11-26 17:09:31 +00:00
# If this is a remote post or local-only, filter to only include
# local identities
if not self.local or self.visibility == Post.Visibilities.local_only:
targets = {target for target in targets if target.local}
# If it's a local post, include the author
2022-11-26 17:09:31 +00:00
if self.local:
targets.add(self.author)
2023-01-15 20:35:45 +00:00
# Fetch the author's full blocks and remove them as targets
blocks = (
self.author.outbound_blocks.active()
.filter(mute=False)
.select_related("target")
)
async for block in blocks:
targets.remove(block.target)
2023-01-08 20:46:40 +00:00
# Now dedupe the targets based on shared inboxes (we only keep one per
# shared inbox)
deduped_targets = set()
shared_inboxes = set()
for target in targets:
if target.local or not target.shared_inbox_uri:
deduped_targets.add(target)
elif target.shared_inbox_uri not in shared_inboxes:
shared_inboxes.add(target.shared_inbox_uri)
deduped_targets.add(target)
else:
# Their shared inbox is already being sent to
pass
return deduped_targets
2022-11-12 06:04:43 +00:00
### ActivityPub (inbound) ###
2022-11-12 05:02:43 +00:00
@classmethod
def by_ap(cls, data, create=False, update=False, fetch_author=False) -> "Post":
2022-11-12 05:02:43 +00:00
"""
Retrieves a Post instance by its ActivityPub JSON object.
Optionally creates one if it's not present.
Raises DoesNotExist if it's not found and create is False,
or it's from a blocked domain.
2022-11-12 05:02:43 +00:00
"""
try:
# Ensure the domain of the object's actor and ID match to prevent injection
if urlparse(data["id"]).hostname != urlparse(data["attributedTo"]).hostname:
raise ValueError("Object's ID domain is different to its author")
except (TypeError, KeyError):
raise ValueError("Object data is not a recognizable ActivityPub object")
2022-11-12 05:02:43 +00:00
# Do we have one with the right ID?
2022-11-13 04:14:21 +00:00
created = False
2022-11-12 05:02:43 +00:00
try:
post: Post = cls.objects.select_related("author__domain").get(
2022-12-15 07:50:54 +00:00
object_uri=data["id"]
)
2022-11-12 05:02:43 +00:00
except cls.DoesNotExist:
if create:
# Resolve the author
author = Identity.by_actor_uri(data["attributedTo"], create=create)
# If the author is not fetched yet, try again later
if author.domain is None:
if fetch_author:
async_to_sync(author.fetch_actor)()
if author.domain is None:
raise TryAgainLater()
else:
raise TryAgainLater()
# If the post is from a blocked domain, stop and drop
if author.domain.blocked:
raise cls.DoesNotExist("Post is from a blocked domain")
2022-11-13 04:14:21 +00:00
post = cls.objects.create(
object_uri=data["id"],
2022-11-12 05:02:43 +00:00
author=author,
2022-12-22 05:29:49 +00:00
content="",
2022-11-12 05:02:43 +00:00
local=False,
type=data["type"],
2022-11-12 05:02:43 +00:00
)
2022-11-13 04:14:21 +00:00
created = True
2022-11-12 05:02:43 +00:00
else:
raise cls.DoesNotExist(f"No post with ID {data['id']}", data)
2022-11-13 04:14:21 +00:00
if update or created:
post.type = data["type"]
if post.type in (cls.Types.article, cls.Types.question):
type_data = PostTypeData(__root__=data).__root__
post.type_data = type_data.dict()
post.content = get_value_or_map(data, "content", "contentMap")
2022-11-17 06:00:10 +00:00
post.summary = data.get("summary")
2022-12-16 23:38:52 +00:00
post.sensitive = data.get("sensitive", False)
post.url = data.get("url", data["id"])
2022-11-17 06:00:10 +00:00
post.published = parse_ld_date(data.get("published"))
post.edited = parse_ld_date(data.get("updated"))
post.in_reply_to = data.get("inReplyTo")
2022-11-16 13:53:39 +00:00
# Mentions and hashtags
post.hashtags = []
for tag in get_list(data, "tag"):
tag_type = tag["type"].lower()
if tag_type == "mention":
2022-11-16 13:53:39 +00:00
mention_identity = Identity.by_actor_uri(tag["href"], create=True)
post.mentions.add(mention_identity)
elif tag_type in ["_:hashtag", "hashtag"]:
post.hashtags.append(
get_value_or_map(tag, "name", "nameMap").lower().lstrip("#")
)
elif tag_type in ["toot:emoji", "emoji"]:
2022-12-15 07:50:54 +00:00
emoji = Emoji.by_ap_tag(post.author.domain, tag, create=True)
post.emojis.add(emoji)
elif tag_type == "edition":
# Bookwyrm Edition is similar to hashtags. There should be a link to
# the book in the Note's content and a post attachment of the cover
# image. No special processing should be needed for ingest.
pass
2022-11-16 13:53:39 +00:00
else:
raise ValueError(f"Unknown tag type {tag['type']}")
# Visibility and to
2022-12-20 14:20:11 +00:00
# (a post is public if it's to:public, otherwise it's unlisted if
# it's cc:public, otherwise it's more limited)
to = [x.lower() for x in get_list(data, "to")]
cc = [x.lower() for x in get_list(data, "cc")]
post.visibility = Post.Visibilities.mentioned
if "public" in to or "as:public" in to:
post.visibility = Post.Visibilities.public
elif "public" in cc or "as:public" in cc:
post.visibility = Post.Visibilities.unlisted
2022-11-17 06:00:10 +00:00
# Attachments
# These have no IDs, so we have to wipe them each time
post.attachments.all().delete()
for attachment in get_list(data, "attachment"):
2022-12-18 18:00:21 +00:00
if "focalPoint" in attachment:
try:
focal_x, focal_y = attachment["focalPoint"]
except (ValueError, TypeError):
focal_x, focal_y = None, None
2022-11-17 06:00:10 +00:00
else:
focal_x, focal_y = None, None
mimetype = attachment.get("mediaType")
if not mimetype or not isinstance(mimetype, str):
mimetype, _ = mimetypes.guess_type(attachment["url"])
if not mimetype:
mimetype = "application/octet-stream"
2022-11-17 06:00:10 +00:00
post.attachments.create(
remote_url=attachment["url"],
mimetype=mimetype,
2022-11-17 06:00:10 +00:00
name=attachment.get("name"),
width=attachment.get("width"),
height=attachment.get("height"),
2022-12-18 18:00:21 +00:00
blurhash=attachment.get("blurhash"),
2022-11-17 06:00:10 +00:00
focal_x=focal_x,
focal_y=focal_y,
)
# Calculate stats in case we have existing replies
post.calculate_stats(save=False)
2022-11-13 04:14:21 +00:00
post.save()
# Potentially schedule a fetch of the reply parent, and recalculate
# its stats if it's here already.
if post.in_reply_to:
try:
parent = cls.by_object_uri(post.in_reply_to)
except cls.DoesNotExist:
try:
cls.ensure_object_uri(post.in_reply_to, reason=post.object_uri)
except ValueError:
capture_message(
f"Cannot fetch ancestor of Post={post.pk}, ancestor_uri={post.in_reply_to}"
)
else:
parent.calculate_stats()
2022-11-13 04:14:21 +00:00
return post
2022-11-12 05:02:43 +00:00
2022-11-14 01:42:47 +00:00
@classmethod
def by_object_uri(cls, object_uri, fetch=False) -> "Post":
2022-11-14 01:42:47 +00:00
"""
Gets the post by URI - either looking up locally, or fetching
from the other end if it's not here.
"""
try:
return cls.objects.get(object_uri=object_uri)
except cls.DoesNotExist:
if fetch:
2022-12-05 04:13:33 +00:00
try:
response = async_to_sync(SystemActor().signed_request)(
method="get", uri=object_uri
)
except (httpx.HTTPError, ssl.SSLCertVerificationError):
2022-12-05 04:13:33 +00:00
raise cls.DoesNotExist(f"Could not fetch {object_uri}")
if response.status_code in [404, 410]:
raise cls.DoesNotExist(f"No post at {object_uri}")
if response.status_code >= 500:
raise cls.DoesNotExist(f"Server error fetching {object_uri}")
if response.status_code >= 400:
raise cls.DoesNotExist(
f"Error fetching post from {object_uri}: {response.status_code}",
{response.content},
2022-11-14 01:42:47 +00:00
)
try:
post = cls.by_ap(
canonicalise(response.json(), include_security=True),
create=True,
update=True,
fetch_author=True,
)
except (json.JSONDecodeError, ValueError):
raise cls.DoesNotExist(f"Invalid ld+json response for {object_uri}")
2022-12-05 04:22:20 +00:00
# We may need to fetch the author too
if post.author.state == IdentityStates.outdated:
async_to_sync(post.author.fetch_actor)()
return post
2022-12-05 04:13:33 +00:00
else:
raise cls.DoesNotExist(f"Cannot find Post with URI {object_uri}")
2022-11-14 01:42:47 +00:00
@classmethod
def ensure_object_uri(cls, object_uri: str, reason: str | None = None):
"""
Sees if the post is in our local set, and if not, schedules a fetch
for it (in the background)
"""
if not object_uri or "://" not in object_uri:
raise ValueError("URI missing or invalid")
try:
cls.by_object_uri(object_uri)
except cls.DoesNotExist:
InboxMessage.create_internal(
{
"type": "FetchPost",
"object": object_uri,
"reason": reason,
}
)
2022-11-12 05:02:43 +00:00
@classmethod
def handle_create_ap(cls, data):
"""
Handles an incoming create request
"""
with transaction.atomic():
# Ensure the Create actor is the Post's attributedTo
if data["actor"] != data["object"]["attributedTo"]:
raise ValueError("Create actor does not match its Post object", data)
# Create it, stator will fan it out locally
cls.by_ap(data["object"], create=True, update=True)
2022-11-14 02:03:43 +00:00
2022-11-17 05:23:32 +00:00
@classmethod
def handle_update_ap(cls, data):
"""
Handles an incoming update request
"""
with transaction.atomic():
# Ensure the Create actor is the Post's attributedTo
if data["actor"] != data["object"]["attributedTo"]:
raise ValueError("Create actor does not match its Post object", data)
# Find it and update it
try:
cls.by_ap(data["object"], create=False, update=True)
except cls.DoesNotExist:
# We don't have a copy - assume we got a delete first and ignore.
pass
2022-11-17 05:23:32 +00:00
@classmethod
def handle_delete_ap(cls, data):
"""
2022-11-26 02:33:46 +00:00
Handles an incoming delete request
"""
with transaction.atomic():
# Is this an embedded object or plain ID?
if isinstance(data["object"], str):
object_uri = data["object"]
else:
object_uri = data["object"]["id"]
# Find our post by ID if we have one
try:
post = cls.by_object_uri(object_uri)
except cls.DoesNotExist:
# It's already been deleted
return
# Ensure the actor on the request authored the post
if not post.author.actor_uri == data["actor"]:
raise ValueError("Actor on delete does not match object")
post.delete()
@classmethod
def handle_fetch_internal(cls, data):
"""
Handles an internal fetch-request inbox message
"""
try:
uri = data["object"]
if "://" in uri:
cls.by_object_uri(uri, fetch=True)
except (cls.DoesNotExist, KeyError):
pass
### OpenGraph API ###
def to_opengraph_dict(self) -> dict:
return {
"og:title": f"{self.author.name} (@{self.author.handle})",
"og:type": "article",
"og:published_time": (self.published or self.created).isoformat(),
"og:modified_time": (
self.edited or self.published or self.created
).isoformat(),
"og:description": (self.summary or self.safe_content_local()),
"og:image:url": self.author.local_icon_url().absolute,
"og:image:height": 85,
"og:image:width": 85,
}
2022-12-11 07:25:48 +00:00
### Mastodon API ###
2022-12-11 19:37:28 +00:00
def to_mastodon_json(self, interactions=None):
2022-12-11 07:25:48 +00:00
reply_parent = None
if self.in_reply_to:
# Load the PK and author.id explicitly to prevent a SELECT on the entire author Identity
reply_parent = (
Post.objects.filter(object_uri=self.in_reply_to)
.only("pk", "author_id")
.first()
)
2022-12-12 07:54:51 +00:00
visibility_mapping = {
self.Visibilities.public: "public",
self.Visibilities.unlisted: "unlisted",
self.Visibilities.followers: "private",
self.Visibilities.mentioned: "direct",
self.Visibilities.local_only: "public",
}
2022-12-11 19:37:28 +00:00
value = {
2022-12-11 07:25:48 +00:00
"id": self.pk,
"uri": self.object_uri,
"created_at": format_ld_date(self.published),
"account": self.author.to_mastodon_json(include_counts=False),
2022-12-11 07:25:48 +00:00
"content": self.safe_content_remote(),
2022-12-12 07:54:51 +00:00
"visibility": visibility_mapping[self.visibility],
2022-12-11 07:25:48 +00:00
"sensitive": self.sensitive,
"spoiler_text": self.summary or "",
"media_attachments": [
attachment.to_mastodon_json() for attachment in self.attachments.all()
],
"mentions": [
mention.to_mastodon_mention_json() for mention in self.mentions.all()
2022-12-11 07:25:48 +00:00
],
"tags": (
[
{
"name": tag,
"url": f"https://{self.author.domain.uri_domain}/tags/{tag}/",
}
for tag in self.hashtags
]
2022-12-11 07:25:48 +00:00
if self.hashtags
else []
),
# Filter in the list comp rather than query because the common case is no emoji in the resultset
# When filter is on emojis like `emojis.usable()` it causes a query that is not cached by prefetch_related
"emojis": [
emoji.to_mastodon_json()
for emoji in self.emojis.all()
if emoji.is_usable
],
"reblogs_count": self.stats_with_defaults["boosts"],
"favourites_count": self.stats_with_defaults["likes"],
"replies_count": self.stats_with_defaults["replies"],
2022-12-11 07:25:48 +00:00
"url": self.absolute_object_uri(),
"in_reply_to_id": reply_parent.pk if reply_parent else None,
"in_reply_to_account_id": (
reply_parent.author_id if reply_parent else None
),
2022-12-11 07:25:48 +00:00
"reblog": None,
"poll": None,
"card": None,
"language": None,
"text": self.safe_content_remote(),
2022-12-11 07:25:48 +00:00
"edited_at": format_ld_date(self.edited) if self.edited else None,
}
2022-12-11 19:37:28 +00:00
if interactions:
value["favourited"] = self.pk in interactions.get("like", [])
value["reblogged"] = self.pk in interactions.get("boost", [])
return value