moviewyrm/bookwyrm/models/fields.py

539 lines
17 KiB
Python
Raw Normal View History

2021-03-08 16:49:10 +00:00
""" activitypub-aware django model fields """
from dataclasses import MISSING
2020-11-30 18:32:13 +00:00
import re
from uuid import uuid4
2021-08-04 14:55:12 +00:00
from urllib.parse import urljoin
2020-11-30 18:32:13 +00:00
2020-12-03 20:35:57 +00:00
import dateutil.parser
from dateutil.parser import ParserError
2020-11-30 22:40:26 +00:00
from django.contrib.postgres.fields import ArrayField as DjangoArrayField
2020-11-30 18:32:13 +00:00
from django.core.exceptions import ValidationError
from django.db import models
2021-06-14 22:19:54 +00:00
from django.forms import ClearableFileInput, ImageField as DjangoImageField
2020-12-03 20:35:57 +00:00
from django.utils import timezone
2020-11-30 18:32:13 +00:00
from django.utils.translation import gettext_lazy as _
2021-08-04 14:55:12 +00:00
from django.utils.encoding import filepath_to_uri
2021-08-03 01:55:08 +00:00
2020-11-30 18:32:13 +00:00
from bookwyrm import activitypub
from bookwyrm.connectors import get_image
2020-12-17 00:47:05 +00:00
from bookwyrm.sanitize_html import InputHtmlParser
2021-08-04 14:55:12 +00:00
from bookwyrm.settings import MEDIA_FULL_URL
2020-11-30 18:32:13 +00:00
def validate_remote_id(value):
2021-04-26 16:15:42 +00:00
"""make sure the remote_id looks like a url"""
2021-03-08 16:49:10 +00:00
if not value or not re.match(r"^http.?:\/\/[^\s]+$", value):
2020-11-30 18:32:13 +00:00
raise ValidationError(
2021-03-08 16:49:10 +00:00
_("%(value)s is not a valid remote_id"),
params={"value": value},
2020-11-30 18:32:13 +00:00
)
2020-12-20 02:54:56 +00:00
def validate_localname(value):
2021-04-26 16:15:42 +00:00
"""make sure localnames look okay"""
2021-03-08 16:49:10 +00:00
if not re.match(r"^[A-Za-z\-_\.0-9]+$", value):
2020-12-28 22:14:22 +00:00
raise ValidationError(
2021-03-08 16:49:10 +00:00
_("%(value)s is not a valid username"),
params={"value": value},
2020-12-28 22:14:22 +00:00
)
def validate_username(value):
2021-04-26 16:15:42 +00:00
"""make sure usernames look okay"""
2021-03-08 16:49:10 +00:00
if not re.match(r"^[A-Za-z\-_\.0-9]+@[A-Za-z\-_\.0-9]+\.[a-z]{2,}$", value):
raise ValidationError(
2021-03-08 16:49:10 +00:00
_("%(value)s is not a valid username"),
params={"value": value},
)
2020-11-30 18:32:13 +00:00
class ActivitypubFieldMixin:
2021-04-26 16:15:42 +00:00
"""make a database field serializable"""
2021-03-08 16:49:10 +00:00
def __init__(
self,
*args,
activitypub_field=None,
activitypub_wrapper=None,
deduplication_field=False,
2021-09-18 18:33:43 +00:00
**kwargs,
2021-03-08 16:49:10 +00:00
):
2020-12-12 21:39:55 +00:00
self.deduplication_field = deduplication_field
2020-11-30 18:32:54 +00:00
if activitypub_wrapper:
self.activitypub_wrapper = activitypub_field
self.activitypub_field = activitypub_wrapper
else:
self.activitypub_field = activitypub_field
2020-11-30 18:32:13 +00:00
super().__init__(*args, **kwargs)
def set_field_from_activity(self, instance, data, overwrite=True):
"""helper function for assinging a value to the field. Returns if changed"""
2020-12-15 19:15:06 +00:00
try:
value = getattr(data, self.get_activitypub_field())
except AttributeError:
# masssively hack-y workaround for boosts
2021-03-08 16:49:10 +00:00
if self.get_activitypub_field() != "attributedTo":
2020-12-15 19:15:06 +00:00
raise
2021-03-08 16:49:10 +00:00
value = getattr(data, "actor")
formatted = self.field_from_activity(value)
if formatted is None or formatted is MISSING or formatted == {}:
return False
current_value = (
getattr(instance, self.name) if hasattr(instance, self.name) else None
)
# if we're not in overwrite mode, only continue updating the field if its unset
if current_value and not overwrite:
return False
# the field is unchanged
if current_value == formatted:
return False
setattr(instance, self.name, formatted)
return True
def set_activity_from_field(self, activity, instance):
2021-04-26 16:15:42 +00:00
"""update the json object"""
value = getattr(instance, self.name)
formatted = self.field_to_activity(value)
if formatted is None:
return
key = self.get_activitypub_field()
2020-12-18 19:34:21 +00:00
# TODO: surely there's a better way
2021-03-08 16:49:10 +00:00
if instance.__class__.__name__ == "Boost" and key == "attributedTo":
key = "actor"
if isinstance(activity.get(key), list):
activity[key] += formatted
else:
activity[key] = formatted
def field_to_activity(self, value):
2021-04-26 16:15:42 +00:00
"""formatter to convert a model value into activitypub"""
2021-03-08 16:49:10 +00:00
if hasattr(self, "activitypub_wrapper"):
2020-12-01 03:01:43 +00:00
return {self.activitypub_wrapper: value}
2020-11-30 18:32:54 +00:00
return value
2020-11-30 18:32:13 +00:00
2020-12-03 20:35:57 +00:00
def field_from_activity(self, value):
2021-04-26 16:15:42 +00:00
"""formatter to convert activitypub into a model value"""
if value and hasattr(self, "activitypub_wrapper"):
2020-11-30 18:32:13 +00:00
value = value.get(self.activitypub_wrapper)
return value
2020-12-01 03:01:43 +00:00
def get_activitypub_field(self):
2021-04-26 16:15:42 +00:00
"""model_field_name to activitypubFieldName"""
2020-12-01 03:01:43 +00:00
if self.activitypub_field:
return self.activitypub_field
2021-03-08 16:49:10 +00:00
name = self.name.split(".")[-1]
components = name.split("_")
return components[0] + "".join(x.title() for x in components[1:])
2020-12-01 03:01:43 +00:00
2020-11-30 18:32:13 +00:00
class ActivitypubRelatedFieldMixin(ActivitypubFieldMixin):
2021-04-26 16:15:42 +00:00
"""default (de)serialization for foreign key and one to one"""
2021-03-08 16:49:10 +00:00
def __init__(self, *args, load_remote=True, **kwargs):
self.load_remote = load_remote
super().__init__(*args, **kwargs)
def field_from_activity(self, value):
if not value:
return None
related_model = self.related_model
2021-03-08 16:49:10 +00:00
if hasattr(value, "id") and value.id:
if not self.load_remote:
# only look in the local database
2021-02-16 19:04:13 +00:00
return related_model.find_existing(value.serialize())
# this is an activitypub object, which we can deserialize
2021-02-16 19:04:13 +00:00
return value.to_model(model=related_model)
try:
# make sure the value looks like a remote id
validate_remote_id(value)
except ValidationError:
# we don't know what this is, ignore it
return None
# gets or creates the model field from the remote id
if not self.load_remote:
# only look in the local database
return related_model.find_existing_by_remote_id(value)
2021-02-16 19:04:13 +00:00
return activitypub.resolve_remote_id(value, model=related_model)
2020-11-30 18:32:13 +00:00
class RemoteIdField(ActivitypubFieldMixin, models.CharField):
2021-04-26 16:15:42 +00:00
"""a url that serves as a unique identifier"""
2021-03-08 16:49:10 +00:00
2020-11-30 18:32:13 +00:00
def __init__(self, *args, max_length=255, validators=None, **kwargs):
validators = validators or [validate_remote_id]
2021-03-08 16:49:10 +00:00
super().__init__(*args, max_length=max_length, validators=validators, **kwargs)
2020-12-12 21:39:55 +00:00
# for this field, the default is true. false everywhere else.
2021-03-08 16:49:10 +00:00
self.deduplication_field = kwargs.get("deduplication_field", True)
2020-11-30 18:32:13 +00:00
class UsernameField(ActivitypubFieldMixin, models.CharField):
2021-04-26 16:15:42 +00:00
"""activitypub-aware username field"""
2021-03-08 16:49:10 +00:00
def __init__(self, activitypub_field="preferredUsername", **kwargs):
2020-11-30 18:32:13 +00:00
self.activitypub_field = activitypub_field
# I don't totally know why pylint is mad at this, but it makes it work
2021-03-08 16:49:10 +00:00
super(ActivitypubFieldMixin, self).__init__( # pylint: disable=bad-super-call
_("username"),
2020-11-30 18:32:13 +00:00
max_length=150,
unique=True,
2020-12-28 22:14:22 +00:00
validators=[validate_username],
2020-11-30 18:32:13 +00:00
error_messages={
2021-03-08 16:49:10 +00:00
"unique": _("A user with that username already exists."),
2020-11-30 18:32:13 +00:00
},
)
def deconstruct(self):
2021-04-26 16:15:42 +00:00
"""implementation of models.Field deconstruct"""
2020-11-30 18:32:13 +00:00
name, path, args, kwargs = super().deconstruct()
2021-03-08 16:49:10 +00:00
del kwargs["verbose_name"]
del kwargs["max_length"]
del kwargs["unique"]
del kwargs["validators"]
del kwargs["error_messages"]
2020-11-30 18:32:13 +00:00
return name, path, args, kwargs
def field_to_activity(self, value):
2021-03-08 16:49:10 +00:00
return value.split("@")[0]
2020-11-30 18:32:13 +00:00
PrivacyLevels = [
("public", _("Public")),
("unlisted", _("Unlisted")),
("followers", _("Followers")),
("direct", _("Private")),
]
2021-03-08 16:49:10 +00:00
class PrivacyField(ActivitypubFieldMixin, models.CharField):
2021-04-26 16:15:42 +00:00
"""this maps to two differente activitypub fields"""
2021-03-08 16:49:10 +00:00
public = "https://www.w3.org/ns/activitystreams#Public"
def __init__(self, *args, **kwargs):
super().__init__(*args, max_length=255, choices=PrivacyLevels, default="public")
2021-06-18 21:29:24 +00:00
# pylint: disable=invalid-name
def set_field_from_activity(self, instance, data, overwrite=True):
if not overwrite:
return False
original = getattr(instance, self.name)
to = data.to
cc = data.cc
# we need to figure out who this is to get their followers link
2021-08-29 15:22:14 +00:00
for field in ["attributedTo", "owner", "actor"]:
if hasattr(data, field):
2021-08-29 15:50:46 +00:00
user_field = field
2021-08-29 15:22:14 +00:00
break
if not user_field:
raise ValidationError("No user field found for privacy", data)
user = activitypub.resolve_remote_id(getattr(data, user_field), model="User")
if to == [self.public]:
2021-03-08 16:49:10 +00:00
setattr(instance, self.name, "public")
elif to == [user.followers_url]:
setattr(instance, self.name, "followers")
elif cc == []:
2021-03-08 16:49:10 +00:00
setattr(instance, self.name, "direct")
elif self.public in cc:
2021-03-08 16:49:10 +00:00
setattr(instance, self.name, "unlisted")
else:
2021-03-08 16:49:10 +00:00
setattr(instance, self.name, "followers")
return original == getattr(instance, self.name)
def set_activity_from_field(self, activity, instance):
# explicitly to anyone mentioned (statuses only)
mentions = []
2021-03-08 16:49:10 +00:00
if hasattr(instance, "mention_users"):
mentions = [u.remote_id for u in instance.mention_users.all()]
# this is a link to the followers list
2021-06-18 21:29:24 +00:00
# pylint: disable=protected-access
followers = instance.user.followers_url
2021-03-08 16:49:10 +00:00
if instance.privacy == "public":
activity["to"] = [self.public]
activity["cc"] = [followers] + mentions
elif instance.privacy == "unlisted":
activity["to"] = [followers]
activity["cc"] = [self.public] + mentions
elif instance.privacy == "followers":
activity["to"] = [followers]
activity["cc"] = mentions
if instance.privacy == "direct":
activity["to"] = mentions
activity["cc"] = []
class ForeignKey(ActivitypubRelatedFieldMixin, models.ForeignKey):
2021-04-26 16:15:42 +00:00
"""activitypub-aware foreign key field"""
2021-03-08 16:49:10 +00:00
def field_to_activity(self, value):
2020-11-30 22:24:31 +00:00
if not value:
return None
2020-11-30 18:32:13 +00:00
return value.remote_id
class OneToOneField(ActivitypubRelatedFieldMixin, models.OneToOneField):
2021-04-26 16:15:42 +00:00
"""activitypub-aware foreign key field"""
2021-03-08 16:49:10 +00:00
def field_to_activity(self, value):
2020-11-30 22:24:31 +00:00
if not value:
return None
2020-11-30 18:32:54 +00:00
return value.to_activity()
2020-11-30 18:32:13 +00:00
class ManyToManyField(ActivitypubFieldMixin, models.ManyToManyField):
2021-04-26 16:15:42 +00:00
"""activitypub-aware many to many field"""
2021-03-08 16:49:10 +00:00
2020-11-30 18:32:13 +00:00
def __init__(self, *args, link_only=False, **kwargs):
self.link_only = link_only
super().__init__(*args, **kwargs)
def set_field_from_activity(self, instance, data, overwrite=True):
2021-11-28 16:50:29 +00:00
"""helper function for assigning a value to the field"""
if not overwrite and getattr(instance, self.name).exists():
return False
value = getattr(data, self.get_activitypub_field())
formatted = self.field_from_activity(value)
if formatted is None or formatted is MISSING:
return False
getattr(instance, self.name).set(formatted)
2021-02-10 23:18:20 +00:00
instance.save(broadcast=False)
return True
def field_to_activity(self, value):
2020-11-30 18:32:13 +00:00
if self.link_only:
2021-09-18 18:32:00 +00:00
return f"{value.instance.remote_id}/{self.name}"
2020-11-30 22:40:26 +00:00
return [i.remote_id for i in value.all()]
2020-11-30 18:32:13 +00:00
def field_from_activity(self, value):
if value is None or value is MISSING:
2021-04-22 17:28:10 +00:00
return None
if not isinstance(value, list):
2021-04-22 17:30:00 +00:00
# If this is a link, we currently aren't doing anything with it
2021-04-22 17:28:10 +00:00
return None
items = []
for remote_id in value:
try:
validate_remote_id(remote_id)
except ValidationError:
continue
items.append(
2021-03-08 16:49:10 +00:00
activitypub.resolve_remote_id(remote_id, model=self.related_model)
)
return items
2020-12-01 03:01:43 +00:00
2020-11-30 22:24:31 +00:00
class TagField(ManyToManyField):
2021-04-26 16:15:42 +00:00
"""special case of many to many that uses Tags"""
2021-03-08 16:49:10 +00:00
2020-11-30 22:24:31 +00:00
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
2021-03-08 16:49:10 +00:00
self.activitypub_field = "tag"
2020-11-30 22:24:31 +00:00
def field_to_activity(self, value):
2020-11-30 22:24:31 +00:00
tags = []
for item in value.all():
activity_type = item.__class__.__name__
2021-03-08 16:49:10 +00:00
if activity_type == "User":
activity_type = "Mention"
tags.append(
activitypub.Link(
href=item.remote_id,
name=getattr(item, item.name_field),
type=activity_type,
)
)
2020-11-30 22:24:31 +00:00
return tags
def field_from_activity(self, value):
if not isinstance(value, list):
return None
items = []
for link_json in value:
link = activitypub.Link(**link_json)
2021-03-08 16:49:10 +00:00
tag_type = link.type if link.type != "Mention" else "Person"
if tag_type == "Book":
tag_type = "Edition"
if tag_type != self.related_model.activity_serializer.type:
# tags can contain multiple types
continue
items.append(
2021-03-08 16:49:10 +00:00
activitypub.resolve_remote_id(link.href, model=self.related_model)
)
return items
2020-11-30 22:24:31 +00:00
class ClearableFileInputWithWarning(ClearableFileInput):
2021-06-14 22:19:54 +00:00
"""max file size warning"""
2021-06-14 22:29:07 +00:00
2021-05-24 01:35:21 +00:00
template_name = "widgets/clearable_file_input_with_warning.html"
2021-06-14 22:19:54 +00:00
class CustomImageField(DjangoImageField):
"""overwrites image field for form"""
2021-06-14 22:29:07 +00:00
widget = ClearableFileInputWithWarning
2020-11-30 18:32:13 +00:00
class ImageField(ActivitypubFieldMixin, models.ImageField):
2021-04-26 16:15:42 +00:00
"""activitypub-aware image field"""
2021-03-08 16:49:10 +00:00
2020-12-17 20:46:05 +00:00
def __init__(self, *args, alt_field=None, **kwargs):
self.alt_field = alt_field
super().__init__(*args, **kwargs)
# pylint: disable=arguments-differ
def set_field_from_activity(self, instance, data, save=True, overwrite=True):
2021-04-26 16:15:42 +00:00
"""helper function for assinging a value to the field"""
value = getattr(data, self.get_activitypub_field())
formatted = self.field_from_activity(value)
if formatted is None or formatted is MISSING:
return False
if (
not overwrite
and hasattr(instance, self.name)
and getattr(instance, self.name)
):
return False
getattr(instance, self.name).save(*formatted, save=save)
return True
2020-12-17 20:46:05 +00:00
def set_activity_from_field(self, activity, instance):
value = getattr(instance, self.name)
if value is None:
return
alt_text = getattr(instance, self.alt_field)
formatted = self.field_to_activity(value, alt_text)
2020-12-17 20:46:05 +00:00
key = self.get_activitypub_field()
activity[key] = formatted
def field_to_activity(self, value, alt=None):
2021-11-10 18:58:02 +00:00
url = get_absolute_url(value)
2021-10-27 16:13:47 +00:00
if not url:
return None
2021-08-04 17:11:57 +00:00
return activitypub.Document(url=url, name=alt)
2020-11-30 18:32:13 +00:00
2020-12-03 20:35:57 +00:00
def field_from_activity(self, value):
image_slug = value
2020-11-30 18:32:13 +00:00
# when it's an inline image (User avatar/icon, Book cover), it's a json
# blob, but when it's an attached image, it's just a url
2021-03-08 16:49:10 +00:00
if hasattr(image_slug, "url"):
url = image_slug.url
2020-11-30 18:32:13 +00:00
elif isinstance(image_slug, str):
url = image_slug
else:
return None
try:
validate_remote_id(url)
except ValidationError:
2020-11-30 18:32:13 +00:00
return None
2022-02-02 05:18:25 +00:00
image_content, extension = get_image(url)
if not image_content:
2020-11-30 18:32:13 +00:00
return None
2021-09-18 18:32:00 +00:00
image_name = f"{uuid4()}.{extension}"
2020-11-30 18:32:13 +00:00
return [image_name, image_content]
def formfield(self, **kwargs):
2021-06-14 22:29:07 +00:00
"""special case for forms"""
2021-05-24 01:35:21 +00:00
return super().formfield(
**{
"form_class": CustomImageField,
**kwargs,
}
)
2021-08-04 14:55:12 +00:00
2021-11-10 18:58:02 +00:00
def get_absolute_url(value):
"""returns an absolute URL for the image"""
name = getattr(value, "name")
if not name:
return None
url = filepath_to_uri(name)
if url is not None:
url = url.lstrip("/")
url = urljoin(MEDIA_FULL_URL, url)
2021-08-04 14:55:12 +00:00
2021-11-10 18:58:02 +00:00
return url
2021-08-04 14:55:12 +00:00
2020-11-30 18:32:13 +00:00
2020-11-30 22:24:31 +00:00
class DateTimeField(ActivitypubFieldMixin, models.DateTimeField):
2021-04-26 16:15:42 +00:00
"""activitypub-aware datetime field"""
2021-03-08 16:49:10 +00:00
def field_to_activity(self, value):
2020-11-30 22:40:26 +00:00
if not value:
return None
2020-11-30 22:24:31 +00:00
return value.isoformat()
2020-12-03 20:35:57 +00:00
def field_from_activity(self, value):
try:
date_value = dateutil.parser.parse(value)
try:
return timezone.make_aware(date_value)
except ValueError:
return date_value
except (ParserError, TypeError):
return None
2021-03-08 16:49:10 +00:00
2020-12-17 00:47:05 +00:00
class HtmlField(ActivitypubFieldMixin, models.TextField):
2021-04-26 16:15:42 +00:00
"""a text field for storing html"""
2021-03-08 16:49:10 +00:00
2020-12-17 00:47:05 +00:00
def field_from_activity(self, value):
if not value or value == MISSING:
return None
sanitizer = InputHtmlParser()
sanitizer.feed(value)
return sanitizer.get_output()
2021-03-08 16:49:10 +00:00
2020-11-30 22:40:26 +00:00
class ArrayField(ActivitypubFieldMixin, DjangoArrayField):
2021-04-26 16:15:42 +00:00
"""activitypub-aware array field"""
2021-03-08 16:49:10 +00:00
def field_to_activity(self, value):
2020-11-30 22:40:26 +00:00
return [str(i) for i in value]
2020-11-30 22:24:31 +00:00
2021-03-08 16:49:10 +00:00
2020-11-30 18:32:13 +00:00
class CharField(ActivitypubFieldMixin, models.CharField):
2021-04-26 16:15:42 +00:00
"""activitypub-aware char field"""
2021-03-08 16:49:10 +00:00
2020-11-30 18:32:13 +00:00
2021-12-15 20:40:31 +00:00
class URLField(ActivitypubFieldMixin, models.URLField):
"""activitypub-aware url field"""
2020-11-30 18:32:13 +00:00
class TextField(ActivitypubFieldMixin, models.TextField):
2021-04-26 16:15:42 +00:00
"""activitypub-aware text field"""
2021-03-08 16:49:10 +00:00
2020-11-30 18:32:13 +00:00
class BooleanField(ActivitypubFieldMixin, models.BooleanField):
2021-04-26 16:15:42 +00:00
"""activitypub-aware boolean field"""
2021-03-08 16:49:10 +00:00
2020-11-30 22:24:31 +00:00
class IntegerField(ActivitypubFieldMixin, models.IntegerField):
2021-04-26 16:15:42 +00:00
"""activitypub-aware boolean field"""
2021-03-19 19:14:59 +00:00
class DecimalField(ActivitypubFieldMixin, models.DecimalField):
2021-04-26 16:15:42 +00:00
"""activitypub-aware boolean field"""
def field_to_activity(self, value):
2021-03-24 16:51:49 +00:00
if not value:
return None
return float(value)