bookwyrm/bookwyrm/models/fields.py

447 lines
15 KiB
Python
Raw Normal View History

2021-03-08 16:49:10 +00:00
""" activitypub-aware django model fields """
from dataclasses import MISSING
2020-11-30 18:32:13 +00:00
import re
from uuid import uuid4
2020-12-03 20:35:57 +00:00
import dateutil.parser
from dateutil.parser import ParserError
2020-11-30 22:40:26 +00:00
from django.contrib.postgres.fields import ArrayField as DjangoArrayField
2020-11-30 18:32:13 +00:00
from django.core.exceptions import ValidationError
from django.core.files.base import ContentFile
from django.db import models
2020-12-03 20:35:57 +00:00
from django.utils import timezone
2020-11-30 18:32:13 +00:00
from django.utils.translation import gettext_lazy as _
from bookwyrm import activitypub
from bookwyrm.connectors import get_image
2020-12-17 00:47:05 +00:00
from bookwyrm.sanitize_html import InputHtmlParser
2020-11-30 18:32:13 +00:00
from bookwyrm.settings import DOMAIN
def validate_remote_id(value):
2021-03-08 16:49:10 +00:00
""" make sure the remote_id looks like a url """
if not value or not re.match(r"^http.?:\/\/[^\s]+$", value):
2020-11-30 18:32:13 +00:00
raise ValidationError(
2021-03-08 16:49:10 +00:00
_("%(value)s is not a valid remote_id"),
params={"value": value},
2020-11-30 18:32:13 +00:00
)
2020-12-20 02:54:56 +00:00
def validate_localname(value):
2021-03-08 16:49:10 +00:00
""" make sure localnames look okay """
if not re.match(r"^[A-Za-z\-_\.0-9]+$", value):
2020-12-28 22:14:22 +00:00
raise ValidationError(
2021-03-08 16:49:10 +00:00
_("%(value)s is not a valid username"),
params={"value": value},
2020-12-28 22:14:22 +00:00
)
def validate_username(value):
2021-03-08 16:49:10 +00:00
""" make sure usernames look okay """
if not re.match(r"^[A-Za-z\-_\.0-9]+@[A-Za-z\-_\.0-9]+\.[a-z]{2,}$", value):
raise ValidationError(
2021-03-08 16:49:10 +00:00
_("%(value)s is not a valid username"),
params={"value": value},
)
2020-11-30 18:32:13 +00:00
class ActivitypubFieldMixin:
2021-03-08 16:49:10 +00:00
""" make a database field serializable """
def __init__(
self,
*args,
activitypub_field=None,
activitypub_wrapper=None,
deduplication_field=False,
**kwargs
):
2020-12-12 21:39:55 +00:00
self.deduplication_field = deduplication_field
2020-11-30 18:32:54 +00:00
if activitypub_wrapper:
self.activitypub_wrapper = activitypub_field
self.activitypub_field = activitypub_wrapper
else:
self.activitypub_field = activitypub_field
2020-11-30 18:32:13 +00:00
super().__init__(*args, **kwargs)
def set_field_from_activity(self, instance, data):
2021-03-08 16:49:10 +00:00
""" helper function for assinging a value to the field """
2020-12-15 19:15:06 +00:00
try:
value = getattr(data, self.get_activitypub_field())
except AttributeError:
# masssively hack-y workaround for boosts
2021-03-08 16:49:10 +00:00
if self.get_activitypub_field() != "attributedTo":
2020-12-15 19:15:06 +00:00
raise
2021-03-08 16:49:10 +00:00
value = getattr(data, "actor")
formatted = self.field_from_activity(value)
if formatted is None or formatted is MISSING:
return
setattr(instance, self.name, formatted)
def set_activity_from_field(self, activity, instance):
2021-03-08 16:49:10 +00:00
""" update the json object """
value = getattr(instance, self.name)
formatted = self.field_to_activity(value)
if formatted is None:
return
key = self.get_activitypub_field()
2020-12-18 19:34:21 +00:00
# TODO: surely there's a better way
2021-03-08 16:49:10 +00:00
if instance.__class__.__name__ == "Boost" and key == "attributedTo":
key = "actor"
if isinstance(activity.get(key), list):
activity[key] += formatted
else:
activity[key] = formatted
def field_to_activity(self, value):
2021-03-08 16:49:10 +00:00
""" formatter to convert a model value into activitypub """
if hasattr(self, "activitypub_wrapper"):
2020-12-01 03:01:43 +00:00
return {self.activitypub_wrapper: value}
2020-11-30 18:32:54 +00:00
return value
2020-11-30 18:32:13 +00:00
2020-12-03 20:35:57 +00:00
def field_from_activity(self, value):
2021-03-08 16:49:10 +00:00
""" formatter to convert activitypub into a model value """
if hasattr(self, "activitypub_wrapper"):
2020-11-30 18:32:13 +00:00
value = value.get(self.activitypub_wrapper)
return value
2020-12-01 03:01:43 +00:00
def get_activitypub_field(self):
2021-03-08 16:49:10 +00:00
""" model_field_name to activitypubFieldName """
2020-12-01 03:01:43 +00:00
if self.activitypub_field:
return self.activitypub_field
2021-03-08 16:49:10 +00:00
name = self.name.split(".")[-1]
components = name.split("_")
return components[0] + "".join(x.title() for x in components[1:])
2020-12-01 03:01:43 +00:00
2020-11-30 18:32:13 +00:00
class ActivitypubRelatedFieldMixin(ActivitypubFieldMixin):
2021-03-08 16:49:10 +00:00
""" default (de)serialization for foreign key and one to one """
def __init__(self, *args, load_remote=True, **kwargs):
self.load_remote = load_remote
super().__init__(*args, **kwargs)
def field_from_activity(self, value):
if not value:
return None
related_model = self.related_model
2021-03-08 16:49:10 +00:00
if hasattr(value, "id") and value.id:
if not self.load_remote:
# only look in the local database
2021-02-16 19:04:13 +00:00
return related_model.find_existing(value.serialize())
# this is an activitypub object, which we can deserialize
2021-02-16 19:04:13 +00:00
return value.to_model(model=related_model)
try:
# make sure the value looks like a remote id
validate_remote_id(value)
except ValidationError:
# we don't know what this is, ignore it
return None
# gets or creates the model field from the remote id
if not self.load_remote:
# only look in the local database
return related_model.find_existing_by_remote_id(value)
2021-02-16 19:04:13 +00:00
return activitypub.resolve_remote_id(value, model=related_model)
2020-11-30 18:32:13 +00:00
class RemoteIdField(ActivitypubFieldMixin, models.CharField):
2021-03-08 16:49:10 +00:00
""" a url that serves as a unique identifier """
2020-11-30 18:32:13 +00:00
def __init__(self, *args, max_length=255, validators=None, **kwargs):
validators = validators or [validate_remote_id]
2021-03-08 16:49:10 +00:00
super().__init__(*args, max_length=max_length, validators=validators, **kwargs)
2020-12-12 21:39:55 +00:00
# for this field, the default is true. false everywhere else.
2021-03-08 16:49:10 +00:00
self.deduplication_field = kwargs.get("deduplication_field", True)
2020-11-30 18:32:13 +00:00
class UsernameField(ActivitypubFieldMixin, models.CharField):
2021-03-08 16:49:10 +00:00
""" activitypub-aware username field """
def __init__(self, activitypub_field="preferredUsername", **kwargs):
2020-11-30 18:32:13 +00:00
self.activitypub_field = activitypub_field
# I don't totally know why pylint is mad at this, but it makes it work
2021-03-08 16:49:10 +00:00
super(ActivitypubFieldMixin, self).__init__( # pylint: disable=bad-super-call
_("username"),
2020-11-30 18:32:13 +00:00
max_length=150,
unique=True,
2020-12-28 22:14:22 +00:00
validators=[validate_username],
2020-11-30 18:32:13 +00:00
error_messages={
2021-03-08 16:49:10 +00:00
"unique": _("A user with that username already exists."),
2020-11-30 18:32:13 +00:00
},
)
def deconstruct(self):
2021-03-08 16:49:10 +00:00
""" implementation of models.Field deconstruct """
2020-11-30 18:32:13 +00:00
name, path, args, kwargs = super().deconstruct()
2021-03-08 16:49:10 +00:00
del kwargs["verbose_name"]
del kwargs["max_length"]
del kwargs["unique"]
del kwargs["validators"]
del kwargs["error_messages"]
2020-11-30 18:32:13 +00:00
return name, path, args, kwargs
def field_to_activity(self, value):
2021-03-08 16:49:10 +00:00
return value.split("@")[0]
2020-11-30 18:32:13 +00:00
2021-03-08 16:49:10 +00:00
PrivacyLevels = models.TextChoices(
"Privacy", ["public", "unlisted", "followers", "direct"]
)
class PrivacyField(ActivitypubFieldMixin, models.CharField):
2021-03-08 16:49:10 +00:00
""" this maps to two differente activitypub fields """
public = "https://www.w3.org/ns/activitystreams#Public"
def __init__(self, *args, **kwargs):
super().__init__(
2021-03-08 16:49:10 +00:00
*args, max_length=255, choices=PrivacyLevels.choices, default="public"
)
def set_field_from_activity(self, instance, data):
to = data.to
cc = data.cc
if to == [self.public]:
2021-03-08 16:49:10 +00:00
setattr(instance, self.name, "public")
elif cc == []:
2021-03-08 16:49:10 +00:00
setattr(instance, self.name, "direct")
elif self.public in cc:
2021-03-08 16:49:10 +00:00
setattr(instance, self.name, "unlisted")
else:
2021-03-08 16:49:10 +00:00
setattr(instance, self.name, "followers")
def set_activity_from_field(self, activity, instance):
# explicitly to anyone mentioned (statuses only)
mentions = []
2021-03-08 16:49:10 +00:00
if hasattr(instance, "mention_users"):
mentions = [u.remote_id for u in instance.mention_users.all()]
# this is a link to the followers list
2021-03-08 16:49:10 +00:00
followers = instance.user.__class__._meta.get_field(
"followers"
).field_to_activity(instance.user.followers)
if instance.privacy == "public":
activity["to"] = [self.public]
activity["cc"] = [followers] + mentions
elif instance.privacy == "unlisted":
activity["to"] = [followers]
activity["cc"] = [self.public] + mentions
elif instance.privacy == "followers":
activity["to"] = [followers]
activity["cc"] = mentions
if instance.privacy == "direct":
activity["to"] = mentions
activity["cc"] = []
class ForeignKey(ActivitypubRelatedFieldMixin, models.ForeignKey):
2021-03-08 16:49:10 +00:00
""" activitypub-aware foreign key field """
def field_to_activity(self, value):
2020-11-30 22:24:31 +00:00
if not value:
return None
2020-11-30 18:32:13 +00:00
return value.remote_id
class OneToOneField(ActivitypubRelatedFieldMixin, models.OneToOneField):
2021-03-08 16:49:10 +00:00
""" activitypub-aware foreign key field """
def field_to_activity(self, value):
2020-11-30 22:24:31 +00:00
if not value:
return None
2020-11-30 18:32:54 +00:00
return value.to_activity()
2020-11-30 18:32:13 +00:00
class ManyToManyField(ActivitypubFieldMixin, models.ManyToManyField):
2021-03-08 16:49:10 +00:00
""" activitypub-aware many to many field """
2020-11-30 18:32:13 +00:00
def __init__(self, *args, link_only=False, **kwargs):
self.link_only = link_only
super().__init__(*args, **kwargs)
def set_field_from_activity(self, instance, data):
2021-03-08 16:49:10 +00:00
""" helper function for assinging a value to the field """
value = getattr(data, self.get_activitypub_field())
formatted = self.field_from_activity(value)
if formatted is None or formatted is MISSING:
return
getattr(instance, self.name).set(formatted)
2021-02-10 23:18:20 +00:00
instance.save(broadcast=False)
def field_to_activity(self, value):
2020-11-30 18:32:13 +00:00
if self.link_only:
2021-03-08 16:49:10 +00:00
return "%s/%s" % (value.instance.remote_id, self.name)
2020-11-30 22:40:26 +00:00
return [i.remote_id for i in value.all()]
2020-11-30 18:32:13 +00:00
def field_from_activity(self, value):
items = []
if value is None or value is MISSING:
return []
for remote_id in value:
try:
validate_remote_id(remote_id)
except ValidationError:
continue
items.append(
2021-03-08 16:49:10 +00:00
activitypub.resolve_remote_id(remote_id, model=self.related_model)
)
return items
2020-12-01 03:01:43 +00:00
2020-11-30 22:24:31 +00:00
class TagField(ManyToManyField):
2021-03-08 16:49:10 +00:00
""" special case of many to many that uses Tags """
2020-11-30 22:24:31 +00:00
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
2021-03-08 16:49:10 +00:00
self.activitypub_field = "tag"
2020-11-30 22:24:31 +00:00
def field_to_activity(self, value):
2020-11-30 22:24:31 +00:00
tags = []
for item in value.all():
activity_type = item.__class__.__name__
2021-03-08 16:49:10 +00:00
if activity_type == "User":
activity_type = "Mention"
tags.append(
activitypub.Link(
href=item.remote_id,
name=getattr(item, item.name_field),
type=activity_type,
)
)
2020-11-30 22:24:31 +00:00
return tags
def field_from_activity(self, value):
if not isinstance(value, list):
return None
items = []
for link_json in value:
link = activitypub.Link(**link_json)
2021-03-08 16:49:10 +00:00
tag_type = link.type if link.type != "Mention" else "Person"
if tag_type == "Book":
tag_type = "Edition"
if tag_type != self.related_model.activity_serializer.type:
# tags can contain multiple types
continue
items.append(
2021-03-08 16:49:10 +00:00
activitypub.resolve_remote_id(link.href, model=self.related_model)
)
return items
2020-11-30 22:24:31 +00:00
2020-12-17 20:46:05 +00:00
def image_serializer(value, alt):
2021-03-08 16:49:10 +00:00
""" helper for serializing images """
if value and hasattr(value, "url"):
2020-11-30 22:24:31 +00:00
url = value.url
else:
return None
2021-03-08 16:49:10 +00:00
url = "https://%s%s" % (DOMAIN, url)
2020-12-17 20:46:05 +00:00
return activitypub.Image(url=url, name=alt)
2020-11-30 22:24:31 +00:00
2020-11-30 18:32:13 +00:00
class ImageField(ActivitypubFieldMixin, models.ImageField):
2021-03-08 16:49:10 +00:00
""" activitypub-aware image field """
2020-12-17 20:46:05 +00:00
def __init__(self, *args, alt_field=None, **kwargs):
self.alt_field = alt_field
super().__init__(*args, **kwargs)
# pylint: disable=arguments-differ
def set_field_from_activity(self, instance, data, save=True):
2021-03-08 16:49:10 +00:00
""" helper function for assinging a value to the field """
value = getattr(data, self.get_activitypub_field())
formatted = self.field_from_activity(value)
if formatted is None or formatted is MISSING:
return
getattr(instance, self.name).save(*formatted, save=save)
2020-12-17 20:46:05 +00:00
def set_activity_from_field(self, activity, instance):
value = getattr(instance, self.name)
if value is None:
return
alt_text = getattr(instance, self.alt_field)
formatted = self.field_to_activity(value, alt_text)
2020-12-17 20:46:05 +00:00
key = self.get_activitypub_field()
activity[key] = formatted
def field_to_activity(self, value, alt=None):
return image_serializer(value, alt)
2020-11-30 18:32:13 +00:00
2020-12-03 20:35:57 +00:00
def field_from_activity(self, value):
image_slug = value
2020-11-30 18:32:13 +00:00
# when it's an inline image (User avatar/icon, Book cover), it's a json
# blob, but when it's an attached image, it's just a url
2021-03-08 16:49:10 +00:00
if hasattr(image_slug, "url"):
url = image_slug.url
2020-11-30 18:32:13 +00:00
elif isinstance(image_slug, str):
url = image_slug
else:
return None
try:
validate_remote_id(url)
except ValidationError:
2020-11-30 18:32:13 +00:00
return None
response = get_image(url)
if not response:
return None
2021-03-08 16:49:10 +00:00
image_name = str(uuid4()) + "." + url.split(".")[-1]
2020-11-30 18:32:13 +00:00
image_content = ContentFile(response.content)
return [image_name, image_content]
2020-11-30 22:24:31 +00:00
class DateTimeField(ActivitypubFieldMixin, models.DateTimeField):
2021-03-08 16:49:10 +00:00
""" activitypub-aware datetime field """
def field_to_activity(self, value):
2020-11-30 22:40:26 +00:00
if not value:
return None
2020-11-30 22:24:31 +00:00
return value.isoformat()
2020-12-03 20:35:57 +00:00
def field_from_activity(self, value):
try:
date_value = dateutil.parser.parse(value)
try:
return timezone.make_aware(date_value)
except ValueError:
return date_value
except (ParserError, TypeError):
return None
2021-03-08 16:49:10 +00:00
2020-12-17 00:47:05 +00:00
class HtmlField(ActivitypubFieldMixin, models.TextField):
2021-03-08 16:49:10 +00:00
""" a text field for storing html """
2020-12-17 00:47:05 +00:00
def field_from_activity(self, value):
if not value or value == MISSING:
return None
sanitizer = InputHtmlParser()
sanitizer.feed(value)
return sanitizer.get_output()
2021-03-08 16:49:10 +00:00
2020-11-30 22:40:26 +00:00
class ArrayField(ActivitypubFieldMixin, DjangoArrayField):
2021-03-08 16:49:10 +00:00
""" activitypub-aware array field """
def field_to_activity(self, value):
2020-11-30 22:40:26 +00:00
return [str(i) for i in value]
2020-11-30 22:24:31 +00:00
2021-03-08 16:49:10 +00:00
2020-11-30 18:32:13 +00:00
class CharField(ActivitypubFieldMixin, models.CharField):
2021-03-08 16:49:10 +00:00
""" activitypub-aware char field """
2020-11-30 18:32:13 +00:00
class TextField(ActivitypubFieldMixin, models.TextField):
2021-03-08 16:49:10 +00:00
""" activitypub-aware text field """
2020-11-30 18:32:13 +00:00
class BooleanField(ActivitypubFieldMixin, models.BooleanField):
2021-03-08 16:49:10 +00:00
""" activitypub-aware boolean field """
2020-11-30 22:24:31 +00:00
class IntegerField(ActivitypubFieldMixin, models.IntegerField):
2021-03-08 16:49:10 +00:00
""" activitypub-aware boolean field """