moviewyrm/bookwyrm/models/activitypub_mixin.py

561 lines
20 KiB
Python
Raw Normal View History

2021-03-08 16:49:10 +00:00
""" activitypub model functionality """
from base64 import b64encode
from collections import namedtuple
2021-02-04 20:36:39 +00:00
from functools import reduce
import json
import operator
import logging
from uuid import uuid4
2021-02-04 20:36:39 +00:00
import requests
2021-08-09 01:13:02 +00:00
from requests.exceptions import RequestException
2021-02-04 20:36:39 +00:00
from Crypto.PublicKey import RSA
from Crypto.Signature import pkcs1_15
from Crypto.Hash import SHA256
2021-02-04 20:36:39 +00:00
from django.apps import apps
from django.core.paginator import Paginator
2021-02-04 20:36:39 +00:00
from django.db.models import Q
from django.utils.http import http_date
from bookwyrm import activitypub
from bookwyrm.settings import USER_AGENT, PAGE_LENGTH
2021-02-04 20:36:39 +00:00
from bookwyrm.signatures import make_signature, make_digest
from bookwyrm.tasks import app
from bookwyrm.models.fields import ImageField, ManyToManyField
2021-02-04 20:36:39 +00:00
logger = logging.getLogger(__name__)
# I tried to separate these classes into mutliple files but I kept getting
# circular import errors so I gave up. I'm sure it could be done though!
PropertyField = namedtuple("PropertyField", ("set_activity_from_field"))
2021-07-07 17:56:19 +00:00
# pylint: disable=invalid-name
def set_activity_from_property_field(activity, obj, field):
2021-04-26 16:15:42 +00:00
"""assign a model property value to the activity json"""
activity[field[1]] = getattr(obj, field[0])
2021-02-04 20:36:39 +00:00
class ActivitypubMixin:
2021-04-26 16:15:42 +00:00
"""add this mixin for models that are AP serializable"""
2021-03-08 16:49:10 +00:00
2021-02-04 20:36:39 +00:00
activity_serializer = lambda: {}
reverse_unfurl = False
def __init__(self, *args, **kwargs):
2021-08-09 01:13:02 +00:00
"""collect some info on model fields for later use"""
2021-02-04 20:36:39 +00:00
self.image_fields = []
self.many_to_many_fields = []
2021-03-08 16:49:10 +00:00
self.simple_fields = [] # "simple"
# sort model fields by type
2021-02-04 20:36:39 +00:00
for field in self._meta.get_fields():
2021-03-08 16:49:10 +00:00
if not hasattr(field, "field_to_activity"):
2021-02-04 20:36:39 +00:00
continue
if isinstance(field, ImageField):
self.image_fields.append(field)
elif isinstance(field, ManyToManyField):
self.many_to_many_fields.append(field)
else:
self.simple_fields.append(field)
# a list of allll the serializable fields
2021-03-08 16:49:10 +00:00
self.activity_fields = (
self.image_fields + self.many_to_many_fields + self.simple_fields
)
if hasattr(self, "property_fields"):
self.activity_fields += [
# pylint: disable=cell-var-from-loop
PropertyField(lambda a, o: set_activity_from_property_field(a, o, f))
for f in self.property_fields
]
2021-02-04 20:36:39 +00:00
# these are separate to avoid infinite recursion issues
2021-03-08 16:49:10 +00:00
self.deserialize_reverse_fields = (
self.deserialize_reverse_fields
if hasattr(self, "deserialize_reverse_fields")
else []
)
self.serialize_reverse_fields = (
self.serialize_reverse_fields
if hasattr(self, "serialize_reverse_fields")
else []
)
2021-02-04 20:36:39 +00:00
super().__init__(*args, **kwargs)
@classmethod
def find_existing_by_remote_id(cls, remote_id):
2021-04-26 16:15:42 +00:00
"""look up a remote id in the db"""
2021-03-08 16:49:10 +00:00
return cls.find_existing({"id": remote_id})
2021-02-04 20:36:39 +00:00
@classmethod
def find_existing(cls, data):
2021-03-08 16:49:10 +00:00
"""compare data to fields that can be used for deduplation.
2021-02-04 20:36:39 +00:00
This always includes remote_id, but can also be unique identifiers
2021-03-08 16:49:10 +00:00
like an isbn for an edition"""
2021-02-04 20:36:39 +00:00
filters = []
# grabs all the data from the model to create django queryset filters
2021-02-04 20:36:39 +00:00
for field in cls._meta.get_fields():
2021-03-08 16:49:10 +00:00
if (
not hasattr(field, "deduplication_field")
or not field.deduplication_field
):
2021-02-04 20:36:39 +00:00
continue
value = data.get(field.get_activitypub_field())
if not value:
continue
filters.append({field.name: value})
2021-03-08 16:49:10 +00:00
if hasattr(cls, "origin_id") and "id" in data:
2021-02-04 20:36:39 +00:00
# kinda janky, but this handles special case for books
2021-03-08 16:49:10 +00:00
filters.append({"origin_id": data["id"]})
2021-02-04 20:36:39 +00:00
if not filters:
# if there are no deduplication fields, it will match the first
# item no matter what. this shouldn't happen but just in case.
return None
objects = cls.objects
2021-03-08 16:49:10 +00:00
if hasattr(objects, "select_subclasses"):
2021-02-04 20:36:39 +00:00
objects = objects.select_subclasses()
# an OR operation on all the match fields, sorry for the dense syntax
2021-03-08 16:49:10 +00:00
match = objects.filter(reduce(operator.or_, (Q(**f) for f in filters)))
2021-02-04 20:36:39 +00:00
# there OUGHT to be only one match
return match.first()
def broadcast(self, activity, sender, software=None):
2021-04-26 16:15:42 +00:00
"""send out an activity"""
2021-02-04 20:36:39 +00:00
broadcast_task.delay(
sender.id,
json.dumps(activity, cls=activitypub.ActivityEncoder),
2021-03-08 16:49:10 +00:00
self.get_recipients(software=software),
2021-02-04 20:36:39 +00:00
)
def get_recipients(self, software=None):
2021-04-26 16:15:42 +00:00
"""figure out which inbox urls to post to"""
2021-02-04 20:36:39 +00:00
# first we have to figure out who should receive this activity
2021-03-08 16:49:10 +00:00
privacy = self.privacy if hasattr(self, "privacy") else "public"
2021-02-04 20:36:39 +00:00
# is this activity owned by a user (statuses, lists, shelves), or is it
# general to the instance (like books)
2021-03-08 16:49:10 +00:00
user = self.user if hasattr(self, "user") else None
user_model = apps.get_model("bookwyrm.User", require_ready=True)
if not user and isinstance(self, user_model):
2021-02-04 20:36:39 +00:00
# or maybe the thing itself is a user
user = self
# find anyone who's tagged in a status, for example
2021-03-08 16:49:10 +00:00
mentions = self.recipients if hasattr(self, "recipients") else []
2021-02-04 20:36:39 +00:00
# we always send activities to explicitly mentioned users' inboxes
2021-04-22 14:29:09 +00:00
recipients = [u.inbox for u in mentions or [] if not u.local]
2021-02-04 20:36:39 +00:00
# unless it's a dm, all the followers should receive the activity
2021-03-08 16:49:10 +00:00
if privacy != "direct":
# we will send this out to a subset of all remote users
2021-04-22 14:51:06 +00:00
queryset = (
user_model.viewer_aware_objects(user)
.filter(
local=False,
)
.distinct()
)
# filter users first by whether they're using the desired software
# this lets us send book updates only to other bw servers
if software:
2021-03-08 16:49:10 +00:00
queryset = queryset.filter(bookwyrm_user=(software == "bookwyrm"))
2021-02-04 20:36:39 +00:00
# if there's a user, we only want to send to the user's followers
if user:
queryset = queryset.filter(following=user)
# ideally, we will send to shared inboxes for efficiency
2021-03-08 16:49:10 +00:00
shared_inboxes = (
queryset.filter(shared_inbox__isnull=False)
.values_list("shared_inbox", flat=True)
.distinct()
)
2021-02-04 20:36:39 +00:00
# but not everyone has a shared inbox
2021-03-08 16:49:10 +00:00
inboxes = queryset.filter(shared_inbox__isnull=True).values_list(
"inbox", flat=True
)
2021-02-04 20:36:39 +00:00
recipients += list(shared_inboxes) + list(inboxes)
2021-04-22 14:29:09 +00:00
return list(set(recipients))
2021-02-04 20:36:39 +00:00
2021-02-16 20:31:27 +00:00
def to_activity_dataclass(self):
2021-04-26 16:15:42 +00:00
"""convert from a model to an activity"""
2021-02-04 20:36:39 +00:00
activity = generate_activity(self)
2021-02-16 20:31:27 +00:00
return self.activity_serializer(**activity)
2021-03-08 16:49:10 +00:00
def to_activity(self, **kwargs): # pylint: disable=unused-argument
2021-04-26 16:15:42 +00:00
"""convert from a model to a json activity"""
2021-02-16 20:31:27 +00:00
return self.to_activity_dataclass().serialize()
2021-02-04 20:36:39 +00:00
class ObjectMixin(ActivitypubMixin):
2021-04-26 16:15:42 +00:00
"""add this mixin for object models that are AP serializable"""
2021-03-08 16:49:10 +00:00
def save(self, *args, created=None, **kwargs):
2021-04-26 16:15:42 +00:00
"""broadcast created/updated/deleted objects as appropriate"""
2021-03-08 16:49:10 +00:00
broadcast = kwargs.get("broadcast", True)
# this bonus kwarg would cause an error in the base save method
2021-03-08 16:49:10 +00:00
if "broadcast" in kwargs:
del kwargs["broadcast"]
created = created or not bool(self.id)
# first off, we want to save normally no matter what
super().save(*args, **kwargs)
2021-04-23 02:36:27 +00:00
if not broadcast or (
hasattr(self, "status_type") and self.status_type == "Announce"
2021-04-23 01:16:00 +00:00
):
return
# this will work for objects owned by a user (lists, shelves)
2021-03-08 16:49:10 +00:00
user = self.user if hasattr(self, "user") else None
if created:
# broadcast Create activities for objects owned by a local user
if not user or not user.local:
return
2021-02-07 05:26:39 +00:00
try:
software = None
2021-02-20 19:24:41 +00:00
# do we have a "pure" activitypub version of this for mastodon?
2021-03-08 16:49:10 +00:00
if hasattr(self, "pure_content"):
pure_activity = self.to_create_activity(user, pure=True)
2021-03-08 16:49:10 +00:00
self.broadcast(pure_activity, user, software="other")
software = "bookwyrm"
# sends to BW only if we just did a pure version for masto
2021-02-07 05:26:39 +00:00
activity = self.to_create_activity(user)
self.broadcast(activity, user, software=software)
2021-02-20 19:24:41 +00:00
except AttributeError:
2021-02-07 05:26:39 +00:00
# janky as heck, this catches the mutliple inheritence chain
# for boosts and ignores this auxilliary broadcast
return
return
# --- updating an existing object
if not user:
# users don't have associated users, they ARE users
2021-03-08 16:49:10 +00:00
user_model = apps.get_model("bookwyrm.User", require_ready=True)
if isinstance(self, user_model):
user = self
# book data tracks last editor
2021-03-08 16:49:10 +00:00
elif hasattr(self, "last_edited_by"):
user = self.last_edited_by
# again, if we don't know the user or they're remote, don't bother
if not user or not user.local:
return
# is this a deletion?
if hasattr(self, "deleted") and self.deleted:
activity = self.to_delete_activity(user)
else:
activity = self.to_update_activity(user)
self.broadcast(activity, user)
def to_create_activity(self, user, **kwargs):
2021-04-26 16:15:42 +00:00
"""returns the object wrapped in a Create activity"""
2021-02-20 19:24:41 +00:00
activity_object = self.to_activity_dataclass(**kwargs)
signature = None
2021-03-08 16:49:10 +00:00
create_id = self.remote_id + "/activity"
if hasattr(activity_object, "content") and activity_object.content:
signer = pkcs1_15.new(RSA.import_key(user.key_pair.private_key))
2021-02-20 19:24:41 +00:00
content = activity_object.content
2021-03-08 16:49:10 +00:00
signed_message = signer.sign(SHA256.new(content.encode("utf8")))
signature = activitypub.Signature(
2021-03-08 16:49:10 +00:00
creator="%s#main-key" % user.remote_id,
2021-02-20 19:24:41 +00:00
created=activity_object.published,
2021-03-08 16:49:10 +00:00
signatureValue=b64encode(signed_message).decode("utf8"),
)
return activitypub.Create(
id=create_id,
actor=user.remote_id,
2021-02-20 19:24:41 +00:00
to=activity_object.to,
cc=activity_object.cc,
object=activity_object,
signature=signature,
).serialize()
def to_delete_activity(self, user):
2021-04-26 16:15:42 +00:00
"""notice of deletion"""
return activitypub.Delete(
2021-03-08 16:49:10 +00:00
id=self.remote_id + "/activity",
actor=user.remote_id,
2021-03-08 16:49:10 +00:00
to=["%s/followers" % user.remote_id],
cc=["https://www.w3.org/ns/activitystreams#Public"],
object=self,
).serialize()
def to_update_activity(self, user):
2021-04-26 16:15:42 +00:00
"""wrapper for Updates to an activity"""
2021-03-08 16:49:10 +00:00
activity_id = "%s#update/%s" % (self.remote_id, uuid4())
return activitypub.Update(
id=activity_id,
actor=user.remote_id,
2021-03-08 16:49:10 +00:00
to=["https://www.w3.org/ns/activitystreams#Public"],
object=self,
).serialize()
class OrderedCollectionPageMixin(ObjectMixin):
2021-03-08 16:49:10 +00:00
"""just the paginator utilities, so you don't HAVE to
override ActivitypubMixin's to_activity (ie, for outbox)"""
@property
def collection_remote_id(self):
2021-04-26 16:15:42 +00:00
"""this can be overriden if there's a special remote id, ie outbox"""
return self.remote_id
2021-03-08 16:49:10 +00:00
def to_ordered_collection(
self, queryset, remote_id=None, page=False, collection_only=False, **kwargs
):
2021-04-26 16:15:42 +00:00
"""an ordered collection of whatevers"""
if not queryset.ordered:
2021-03-08 16:49:10 +00:00
raise RuntimeError("queryset must be ordered")
remote_id = remote_id or self.remote_id
if page:
2021-07-07 17:56:19 +00:00
if isinstance(page, list) and len(page) > 0:
page = page[0]
return to_ordered_collection_page(queryset, remote_id, page=page, **kwargs)
2021-03-08 16:49:10 +00:00
if collection_only or not hasattr(self, "activity_serializer"):
serializer = activitypub.OrderedCollection
activity = {}
else:
serializer = self.activity_serializer
# a dict from the model fields
activity = generate_activity(self)
if remote_id:
2021-03-08 16:49:10 +00:00
activity["id"] = remote_id
paginated = Paginator(queryset, PAGE_LENGTH)
# add computed fields specific to orderd collections
2021-03-08 16:49:10 +00:00
activity["totalItems"] = paginated.count
activity["first"] = "%s?page=1" % remote_id
activity["last"] = "%s?page=%d" % (remote_id, paginated.num_pages)
2021-02-20 19:24:41 +00:00
return serializer(**activity)
class OrderedCollectionMixin(OrderedCollectionPageMixin):
2021-04-26 16:15:42 +00:00
"""extends activitypub models to work as ordered collections"""
2021-03-08 16:49:10 +00:00
@property
def collection_queryset(self):
2021-04-26 16:15:42 +00:00
"""usually an ordered collection model aggregates a different model"""
2021-03-08 16:49:10 +00:00
raise NotImplementedError("Model must define collection_queryset")
activity_serializer = activitypub.OrderedCollection
2021-02-20 19:24:41 +00:00
def to_activity_dataclass(self, **kwargs):
return self.to_ordered_collection(self.collection_queryset, **kwargs)
def to_activity(self, **kwargs):
2021-04-26 16:15:42 +00:00
"""an ordered collection of the specified model queryset"""
return self.to_ordered_collection(
2021-03-08 16:49:10 +00:00
self.collection_queryset, **kwargs
).serialize()
class CollectionItemMixin(ActivitypubMixin):
2021-04-26 16:15:42 +00:00
"""for items that are part of an (Ordered)Collection"""
2021-03-08 16:49:10 +00:00
activity_serializer = activitypub.CollectionItem
def broadcast(self, activity, sender, software="bookwyrm"):
2021-04-26 16:15:42 +00:00
"""only send book collection updates to other bookwyrm instances"""
super().broadcast(activity, sender, software=software)
@property
def privacy(self):
2021-04-26 16:15:42 +00:00
"""inherit the privacy of the list, or direct if pending"""
collection_field = getattr(self, self.collection_field)
if self.approved:
return collection_field.privacy
return "direct"
@property
def recipients(self):
2021-04-26 16:15:42 +00:00
"""the owner of the list is a direct recipient"""
collection_field = getattr(self, self.collection_field)
if collection_field.user.local:
# don't broadcast to yourself
return []
return [collection_field.user]
def save(self, *args, broadcast=True, **kwargs):
2021-04-26 16:15:42 +00:00
"""broadcast updated"""
# first off, we want to save normally no matter what
super().save(*args, **kwargs)
# list items can be updateda, normally you would only broadcast on created
if not broadcast or not self.user.local:
return
# adding an obj to the collection
activity = self.to_add_activity(self.user)
self.broadcast(activity, self.user)
2021-04-11 01:15:13 +00:00
def delete(self, *args, broadcast=True, **kwargs):
2021-04-26 16:15:42 +00:00
"""broadcast a remove activity"""
activity = self.to_remove_activity(self.user)
super().delete(*args, **kwargs)
2021-04-11 01:15:13 +00:00
if self.user.local and broadcast:
self.broadcast(activity, self.user)
def to_add_activity(self, user):
2021-04-26 16:15:42 +00:00
"""AP for shelving a book"""
collection_field = getattr(self, self.collection_field)
return activitypub.Add(
id="{:s}#add".format(collection_field.remote_id),
actor=user.remote_id,
object=self.to_activity_dataclass(),
2021-03-08 16:49:10 +00:00
target=collection_field.remote_id,
).serialize()
def to_remove_activity(self, user):
2021-04-26 16:15:42 +00:00
"""AP for un-shelving a book"""
collection_field = getattr(self, self.collection_field)
return activitypub.Remove(
id="{:s}#remove".format(collection_field.remote_id),
actor=user.remote_id,
object=self.to_activity_dataclass(),
2021-03-08 16:49:10 +00:00
target=collection_field.remote_id,
).serialize()
2021-02-04 22:36:57 +00:00
class ActivityMixin(ActivitypubMixin):
2021-04-26 16:15:42 +00:00
"""add this mixin for models that are AP serializable"""
2021-03-08 16:49:10 +00:00
def save(self, *args, broadcast=True, **kwargs):
2021-04-26 16:15:42 +00:00
"""broadcast activity"""
super().save(*args, **kwargs)
2021-03-08 16:49:10 +00:00
user = self.user if hasattr(self, "user") else self.user_subject
if broadcast and user.local:
self.broadcast(self.to_activity(), user)
def delete(self, *args, broadcast=True, **kwargs):
2021-04-26 16:15:42 +00:00
"""nevermind, undo that activity"""
2021-03-08 16:49:10 +00:00
user = self.user if hasattr(self, "user") else self.user_subject
if broadcast and user.local:
self.broadcast(self.to_undo_activity(), user)
super().delete(*args, **kwargs)
def to_undo_activity(self):
2021-04-26 16:15:42 +00:00
"""undo an action"""
2021-03-08 16:49:10 +00:00
user = self.user if hasattr(self, "user") else self.user_subject
return activitypub.Undo(
2021-03-08 16:49:10 +00:00
id="%s#undo" % self.remote_id,
2021-02-07 06:37:40 +00:00
actor=user.remote_id,
object=self,
).serialize()
def generate_activity(obj):
2021-04-26 16:15:42 +00:00
"""go through the fields on an object"""
activity = {}
for field in obj.activity_fields:
field.set_activity_from_field(activity, obj)
2021-03-08 16:49:10 +00:00
if hasattr(obj, "serialize_reverse_fields"):
# for example, editions of a work
2021-03-08 16:49:10 +00:00
for (
model_field_name,
activity_field_name,
sort_field,
) in obj.serialize_reverse_fields:
related_field = getattr(obj, model_field_name)
2021-03-08 16:49:10 +00:00
activity[activity_field_name] = unfurl_related_field(
related_field, sort_field=sort_field
2021-03-08 16:49:10 +00:00
)
2021-03-08 16:49:10 +00:00
if not activity.get("id"):
activity["id"] = obj.get_remote_id()
return activity
def unfurl_related_field(related_field, sort_field=None):
2021-04-26 16:15:42 +00:00
"""load reverse lookups (like public key owner or Status attachment"""
if sort_field and hasattr(related_field, "all"):
2021-03-08 16:49:10 +00:00
return [
unfurl_related_field(i) for i in related_field.order_by(sort_field).all()
]
if related_field.reverse_unfurl:
2021-03-15 20:55:48 +00:00
# if it's a one-to-one (key pair)
if hasattr(related_field, "field_to_activity"):
return related_field.field_to_activity()
# if it's one-to-many (attachments)
return related_field.to_activity()
return related_field.remote_id
@app.task
def broadcast_task(sender_id, activity, recipients):
2021-04-26 16:15:42 +00:00
"""the celery task for broadcast"""
2021-03-08 16:49:10 +00:00
user_model = apps.get_model("bookwyrm.User", require_ready=True)
sender = user_model.objects.get(id=sender_id)
for recipient in recipients:
try:
sign_and_send(sender, activity, recipient)
2021-08-09 01:13:02 +00:00
except RequestException:
pass
def sign_and_send(sender, data, destination):
2021-04-26 16:15:42 +00:00
"""crpyto whatever and http junk"""
now = http_date()
if not sender.key_pair.private_key:
# this shouldn't happen. it would be bad if it happened.
2021-03-08 16:49:10 +00:00
raise ValueError("No private key found for sender")
digest = make_digest(data)
response = requests.post(
destination,
data=data,
headers={
2021-03-08 16:49:10 +00:00
"Date": now,
"Digest": digest,
"Signature": make_signature(sender, destination, now, digest),
"Content-Type": "application/activity+json; charset=utf-8",
"User-Agent": USER_AGENT,
},
)
if not response.ok:
response.raise_for_status()
return response
# pylint: disable=unused-argument
def to_ordered_collection_page(
2021-03-08 16:49:10 +00:00
queryset, remote_id, id_only=False, page=1, pure=False, **kwargs
):
2021-04-26 16:15:42 +00:00
"""serialize and pagiante a queryset"""
paginated = Paginator(queryset, PAGE_LENGTH)
activity_page = paginated.get_page(page)
if id_only:
items = [s.remote_id for s in activity_page.object_list]
else:
2021-02-23 19:34:15 +00:00
items = [s.to_activity(pure=pure) for s in activity_page.object_list]
prev_page = next_page = None
if activity_page.has_next():
2021-03-08 16:49:10 +00:00
next_page = "%s?page=%d" % (remote_id, activity_page.next_page_number())
if activity_page.has_previous():
2021-03-08 16:49:10 +00:00
prev_page = "%s?page=%d" % (remote_id, activity_page.previous_page_number())
return activitypub.OrderedCollectionPage(
2021-03-08 16:49:10 +00:00
id="%s?page=%s" % (remote_id, page),
partOf=remote_id,
orderedItems=items,
next=next_page,
2021-03-08 16:49:10 +00:00
prev=prev_page,
2021-02-24 21:13:29 +00:00
)