moviewyrm/bookwyrm/activitypub/base_activity.py

237 lines
8.6 KiB
Python

''' basics for an activitypub serializer '''
from dataclasses import dataclass, fields, MISSING
from json import JSONEncoder
from django.db.models.fields.related_descriptors \
import ForwardManyToOneDescriptor, ManyToManyDescriptor, \
ReverseManyToOneDescriptor
from django.db.models.fields.files import ImageFileDescriptor
from bookwyrm.connectors import ConnectorException, get_data
class ActivitySerializerError(ValueError):
''' routine problems serializing activitypub json '''
class ActivityEncoder(JSONEncoder):
''' used to convert an Activity object into json '''
def default(self, o):
return o.__dict__
@dataclass
class Link:
''' for tagging a book in a status '''
href: str
name: str
type: str = 'Link'
@dataclass
class Mention(Link):
''' a subtype of Link for mentioning an actor '''
type: str = 'Mention'
@dataclass
class Signature:
''' public key block '''
creator: str
created: str
signatureValue: str
type: str = 'RsaSignature2017'
@dataclass(init=False)
class ActivityObject:
''' actor activitypub json '''
id: str
type: str
def __init__(self, **kwargs):
''' this lets you pass in an object with fields that aren't in the
dataclass, which it ignores. Any field in the dataclass is required or
has a default value '''
for field in fields(self):
try:
value = kwargs[field.name]
except KeyError:
if field.default == MISSING and \
field.default_factory == MISSING:
raise ActivitySerializerError(\
'Missing required field: %s' % field.name)
value = field.default
setattr(self, field.name, value)
def to_model(self, model, instance=None):
''' convert from an activity to a model instance '''
if not isinstance(self, model.activity_serializer):
raise ActivitySerializerError(
'Wrong activity type "%s" for model "%s" (expects "%s")' % \
(self.__class__,
model.__name__,
model.activity_serializer)
)
# check for an existing instance, if we're not updating a known obj
if not instance:
instance = find_existing_by_remote_id(model, self.id)
# TODO: deduplicate books by identifiers
mapped_fields = {}
many_to_many_fields = {}
one_to_many_fields = {}
image_fields = {}
for field in model._meta.get_fields():
if not hasattr(field, 'field_to_activity'):
continue
activitypub_field = field.get_activitypub_field()
value = field.field_from_activity(getattr(self, activitypub_field))
if value is None:
continue
# value is None if there's a default that isn't supplied
# in the activity but is supplied in the formatter
value = getattr(self, activitypub_field)
model_field = getattr(model, field.name)
formatted_value = field.field_from_activity(value)
if isinstance(model_field, ForwardManyToOneDescriptor):
if not formatted_value:
continue
# foreign key remote id reolver (work on Edition, for example)
fk_model = model_field.field.related_model
if isinstance(formatted_value, dict) and \
formatted_value.get('id'):
# if the AP field is a serialized object (as in Add)
# or PublicKey
related_model = field.related_model
related_activity = related_model.activity_serializer
mapped_fields[field.name] = related_activity(
**formatted_value
).to_model(related_model)
else:
# if the field is just a remote_id (as in every other case)
remote_id = formatted_value
reference = resolve_remote_id(fk_model, remote_id)
mapped_fields[field.name] = reference
elif isinstance(model_field, ManyToManyDescriptor):
# status mentions book/users
many_to_many_fields[field.name] = formatted_value
elif isinstance(model_field, ReverseManyToOneDescriptor):
# attachments on Status, for example
one_to_many_fields[field.name] = formatted_value
elif isinstance(model_field, ImageFileDescriptor):
# image fields need custom handling
image_fields[field.name] = formatted_value
else:
if formatted_value == MISSING:
formatted_value = None
mapped_fields[field.name] = formatted_value
if instance:
# updating an existing model instance
for k, v in mapped_fields.items():
setattr(instance, k, v)
instance.save()
else:
# creating a new model instance
instance = model.objects.create(**mapped_fields)
# --- these are all fields that can't be saved until after the
# instance has an id (after it's been saved). ---------------#
# add images
for (model_key, value) in image_fields.items():
if not formatted_value:
continue
getattr(instance, model_key).save(*formatted_value, save=True)
# add many to many fields
for (model_key, values) in many_to_many_fields.items():
# mention books, mention users, followers
if values == MISSING or not isinstance(values, list):
# user followers is a link to an orderedcollection, skip it
continue
model_field = getattr(instance, model_key)
model = model_field.model
items = []
for link in values:
if isinstance(link, dict):
# check that the Type matches the model (Status
# tags contain both user mentions and book tags)
if not model.activity_serializer.type == \
link.get('type'):
continue
remote_id = link.get('href')
else:
remote_id = link
items.append(
resolve_remote_id(model, remote_id)
)
getattr(instance, model_key).set(items)
# add one to many fields
for (model_key, values) in one_to_many_fields.items():
if values == MISSING:
continue
model_field = getattr(instance, model_key)
model = model_field.model
for item in values:
if isinstance(item, str):
item = resolve_remote_id(model, item)
else:
item = model.activity_serializer(**item)
item = item.to_model(model)
field_name = instance.__class__.__name__.lower()
setattr(item, field_name, instance)
item.save()
return instance
def serialize(self):
''' convert to dictionary with context attr '''
data = self.__dict__
data['@context'] = 'https://www.w3.org/ns/activitystreams'
return data
def find_existing_by_remote_id(model, remote_id):
''' check for an existing instance of this id in the db '''
objects = model.objects
if hasattr(model.objects, 'select_subclasses'):
objects = objects.select_subclasses()
# first, check for an existing copy in the database
result = objects.filter(
remote_id=remote_id
).first()
if not result and hasattr(model, 'origin_id'):
result = objects.filter(
origin_id=remote_id
).first()
return result
def resolve_remote_id(model, remote_id, refresh=False):
''' look up the remote_id in the database or load it remotely '''
result = find_existing_by_remote_id(model, remote_id)
if result and not refresh:
return result
# load the data and create the object
try:
data = get_data(remote_id)
except (ConnectorException, ConnectionError):
raise ActivitySerializerError(
'Could not connect to host for remote_id in %s model: %s' % \
(model.__name__, remote_id))
item = model.activity_serializer(**data)
# if we're refreshing, "result" will be set and we'll update it
return item.to_model(model, instance=result)