Refactor HTML rendering into one place

Also suppress using external <a> tags for now, until we can separate
them from hashtags properly.
This commit is contained in:
Andrew Godwin 2022-12-20 11:39:45 +00:00
parent d750c7a871
commit 4ad4f468a4
18 changed files with 179 additions and 163 deletions

View file

@ -46,6 +46,7 @@ repos:
rev: v0.991 rev: v0.991
hooks: hooks:
- id: mypy - id: mypy
exclude: "^tests/"
additional_dependencies: additional_dependencies:
[ [
types-pyopenssl, types-pyopenssl,

View file

@ -1,10 +1,11 @@
import re import re
from functools import partial from functools import partial
from typing import ClassVar, cast from typing import ClassVar
import httpx import httpx
import urlman import urlman
from asgiref.sync import sync_to_async from asgiref.sync import sync_to_async
from cachetools import TTLCache, cached
from django.conf import settings from django.conf import settings
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from django.db import models from django.db import models
@ -50,17 +51,18 @@ class EmojiStates(StateGraph):
class EmojiQuerySet(models.QuerySet): class EmojiQuerySet(models.QuerySet):
def usable(self, domain: Domain | None = None): def usable(self, domain: Domain | None = None):
if domain is None or domain.local: """
visible_q = models.Q(local=True) Returns all usable emoji, optionally filtering by domain too.
else: """
visible_q = models.Q(public=True) visible_q = models.Q(local=True) | models.Q(public=True)
if Config.system.emoji_unreviewed_are_public: if Config.system.emoji_unreviewed_are_public:
visible_q |= models.Q(public__isnull=True) visible_q |= models.Q(public__isnull=True)
qs = self.filter(visible_q) qs = self.filter(visible_q)
if domain: if domain:
if not domain.local: if not domain.local:
qs = qs.filter(domain=domain) qs = qs.filter(domain=domain)
return qs return qs
@ -136,6 +138,13 @@ class Emoji(StatorModel):
def load_locals(cls) -> dict[str, "Emoji"]: def load_locals(cls) -> dict[str, "Emoji"]:
return {x.shortcode: x for x in Emoji.objects.usable().filter(local=True)} return {x.shortcode: x for x in Emoji.objects.usable().filter(local=True)}
@classmethod
@cached(cache=TTLCache(maxsize=1000, ttl=60))
def for_domain(cls, domain: Domain | None) -> list["Emoji"]:
if not domain:
return list(cls.locals.values())
return list(cls.objects.usable(domain))
@property @property
def fullcode(self): def fullcode(self):
return f":{self.shortcode}:" return f":{self.shortcode}:"
@ -164,41 +173,6 @@ class Emoji(StatorModel):
) )
return self.fullcode return self.fullcode
@classmethod
def imageify_emojis(
cls,
content: str,
*,
emojis: list["Emoji"] | EmojiQuerySet | None = None,
include_local: bool = True,
):
"""
Find :emoji: in content and convert to <img>. If include_local is True,
the local emoji will be used as a fallback for any shortcodes not defined
by emojis.
"""
emoji_set = (
cast(list[Emoji], list(cls.locals.values())) if include_local else []
)
if emojis:
if isinstance(emojis, (EmojiQuerySet, list)):
emoji_set.extend(list(emojis))
else:
raise TypeError("Unsupported type for emojis")
possible_matches = {
emoji.shortcode: emoji.as_html() for emoji in emoji_set if emoji.is_usable
}
def replacer(match):
fullcode = match.group(1).lower()
if fullcode in possible_matches:
return possible_matches[fullcode]
return match.group()
return mark_safe(Emoji.emoji_regex.sub(replacer, content))
@classmethod @classmethod
def emojis_from_content(cls, content: str, domain: Domain | None) -> list[str]: def emojis_from_content(cls, content: str, domain: Domain | None) -> list[str]:
""" """

View file

@ -5,7 +5,6 @@ import urlman
from asgiref.sync import sync_to_async from asgiref.sync import sync_to_async
from django.db import models from django.db import models
from django.utils import timezone from django.utils import timezone
from django.utils.safestring import mark_safe
from core.html import strip_html from core.html import strip_html
from core.models import Config from core.models import Config
@ -176,19 +175,6 @@ class Hashtag(StatorModel):
hashtags = sorted({tag.lower() for tag in hashtag_hits}) hashtags = sorted({tag.lower() for tag in hashtag_hits})
return list(hashtags) return list(hashtags)
@classmethod
def linkify_hashtags(cls, content, domain=None) -> str:
def replacer(match):
hashtag = match.group(1)
if domain:
return f'<a class="hashtag" href="https://{domain.uri_domain}/tags/{hashtag.lower()}/">#{hashtag}</a>'
else:
return (
f'<a class="hashtag" href="/tags/{hashtag.lower()}/">#{hashtag}</a>'
)
return mark_safe(Hashtag.hashtag_regex.sub(replacer, content))
def to_mastodon_json(self): def to_mastodon_json(self):
return { return {
"name": self.hashtag, "name": self.hashtag,

View file

@ -20,8 +20,7 @@ from activities.models.post_types import (
PostTypeDataDecoder, PostTypeDataDecoder,
PostTypeDataEncoder, PostTypeDataEncoder,
) )
from activities.templatetags.emoji_tags import imageify_emojis from core.html import ContentRenderer, strip_html
from core.html import sanitize_post, strip_html
from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date
from stator.exceptions import TryAgainLater from stator.exceptions import TryAgainLater
from stator.models import State, StateField, StateGraph, StatorModel from stator.models import State, StateField, StateGraph, StatorModel
@ -383,13 +382,7 @@ class Post(StatorModel):
return mark_safe(self.mention_regex.sub(replacer, content)) return mark_safe(self.mention_regex.sub(replacer, content))
def _safe_content_note(self, *, local: bool = True): def _safe_content_note(self, *, local: bool = True):
content = Hashtag.linkify_hashtags( return ContentRenderer(local=local).render_post(self.content, self)
self.linkify_mentions(sanitize_post(self.content), local=local),
domain=None if local else self.author.domain,
)
if local:
content = imageify_emojis(content, self.author.domain)
return content
# def _safe_content_question(self, *, local: bool = True): # def _safe_content_question(self, *, local: bool = True):
# context = { # context = {
@ -432,12 +425,6 @@ class Post(StatorModel):
""" """
return self.safe_content(local=False) return self.safe_content(local=False)
def safe_content_plain(self):
"""
Returns the content formatted as plain text
"""
return self.linkify_mentions(sanitize_post(self.content))
### Async helpers ### ### Async helpers ###
async def afetch_full(self) -> "Post": async def afetch_full(self) -> "Post":
@ -914,7 +901,7 @@ class Post(StatorModel):
"poll": None, "poll": None,
"card": None, "card": None,
"language": None, "language": None,
"text": self.safe_content_plain(), "text": self.safe_content_remote(),
"edited_at": format_ld_date(self.edited) if self.edited else None, "edited_at": format_ld_date(self.edited) if self.edited else None,
} }
if interactions: if interactions:

View file

@ -3,8 +3,6 @@ import datetime
from django import template from django import template
from django.utils import timezone from django.utils import timezone
from activities.models import Hashtag
register = template.Library() register = template.Library()
@ -33,14 +31,3 @@ def timedeltashort(value: datetime.datetime):
years = max(days // 365.25, 1) years = max(days // 365.25, 1)
text = f"{years:0n}y" text = f"{years:0n}y"
return text return text
@register.filter
def linkify_hashtags(value: str):
"""
Convert hashtags in content in to /tags/<hashtag>/ links.
"""
if not value:
return ""
return Hashtag.linkify_hashtags(value)

View file

@ -1,27 +0,0 @@
from cachetools import TTLCache, cached
from django import template
from activities.models import Emoji
from users.models import Domain
register = template.Library()
@cached(cache=TTLCache(maxsize=1000, ttl=60))
def emoji_from_domain(domain: Domain | None) -> list[Emoji]:
if not domain:
return list(Emoji.locals.values())
return list(Emoji.objects.usable(domain))
@register.filter
def imageify_emojis(value: str, arg: Domain | None = None):
"""
Convert hashtags in content in to /tags/<hashtag>/ links.
"""
if not value:
return ""
emojis = emoji_from_domain(arg)
return Emoji.imageify_emojis(value, emojis=emojis)

View file

@ -40,6 +40,11 @@ class Individual(TemplateView):
ancestors, descendants = PostService(self.post_obj).context( ancestors, descendants = PostService(self.post_obj).context(
self.request.identity self.request.identity
) )
print(
self.post_obj.to_mastodon_json(),
self.post_obj.emojis.all(),
self.post_obj.emojis.usable(),
)
return { return {
"identity": self.identity, "identity": self.identity,
"post": self.post_obj, "post": self.post_obj,

View file

@ -5,4 +5,4 @@ from api.views.base import api_router
@api_router.get("/v1/custom_emojis", response=list[CustomEmoji]) @api_router.get("/v1/custom_emojis", response=list[CustomEmoji])
def emojis(request): def emojis(request):
return [e.to_mastodon_json() for e in Emoji.objects.usable()] return [e.to_mastodon_json() for e in Emoji.objects.usable().filter(local=True)]

View file

@ -15,12 +15,12 @@ def allow_a(tag: str, name: str, value: str):
return False return False
def sanitize_post(post_html: str) -> str: def sanitize_html(post_html: str) -> str:
""" """
Only allows a, br, p and span tags, and class attributes. Only allows a, br, p and span tags, and class attributes.
""" """
cleaner = bleach.Cleaner( cleaner = bleach.Cleaner(
tags=["br", "p", "a"], tags=["br", "p"],
attributes={ # type:ignore attributes={ # type:ignore
"a": allow_a, "a": allow_a,
"p": ["class"], "p": ["class"],
@ -50,3 +50,117 @@ def html_to_plaintext(post_html: str) -> str:
# Remove all other HTML and return # Remove all other HTML and return
cleaner = bleach.Cleaner(tags=[], strip=True, filters=[]) cleaner = bleach.Cleaner(tags=[], strip=True, filters=[])
return cleaner.clean(post_html).strip() return cleaner.clean(post_html).strip()
class ContentRenderer:
"""
Renders HTML for posts, identity fields, and more.
The `local` parameter affects whether links are absolute (False) or relative (True)
"""
def __init__(self, local: bool):
self.local = local
def render_post(self, html: str, post) -> str:
"""
Given post HTML, normalises it and renders it for presentation.
"""
if not html:
return ""
html = sanitize_html(html)
html = self.linkify_mentions(html, post=post)
html = self.linkify_hashtags(html, identity=post.author)
if self.local:
html = self.imageify_emojis(html, identity=post.author)
return mark_safe(html)
def render_identity(self, html: str, identity, strip: bool = False) -> str:
"""
Given identity field HTML, normalises it and renders it for presentation.
"""
if not html:
return ""
if strip:
html = strip_html(html)
else:
html = sanitize_html(html)
html = self.linkify_hashtags(html, identity=identity)
if self.local:
html = self.imageify_emojis(html, identity=identity)
return mark_safe(html)
def linkify_mentions(self, html: str, post) -> str:
"""
Links mentions _in the context of the post_ - as in, using the mentions
property as the only source (as we might be doing this without other
DB access allowed)
"""
from activities.models import Post
possible_matches = {}
for mention in post.mentions.all():
if self.local:
url = str(mention.urls.view)
else:
url = mention.absolute_profile_uri()
possible_matches[mention.username] = url
possible_matches[f"{mention.username}@{mention.domain_id}"] = url
collapse_name: dict[str, str] = {}
def replacer(match):
precursor = match.group(1)
handle = match.group(2).lower()
if "@" in handle:
short_handle = handle.split("@", 1)[0]
else:
short_handle = handle
if handle in possible_matches:
if short_handle not in collapse_name:
collapse_name[short_handle] = handle
elif collapse_name.get(short_handle) != handle:
short_handle = handle
return f'{precursor}<a href="{possible_matches[handle]}">@{short_handle}</a>'
else:
return match.group()
return Post.mention_regex.sub(replacer, html)
def linkify_hashtags(self, html, identity) -> str:
from activities.models import Hashtag
def replacer(match):
hashtag = match.group(1)
if self.local:
return (
f'<a class="hashtag" href="/tags/{hashtag.lower()}/">#{hashtag}</a>'
)
else:
return f'<a class="hashtag" href="https://{identity.domain.uri_domain}/tags/{hashtag.lower()}/">#{hashtag}</a>'
return Hashtag.hashtag_regex.sub(replacer, html)
def imageify_emojis(self, html: str, identity, include_local: bool = True):
"""
Find :emoji: in content and convert to <img>. If include_local is True,
the local emoji will be used as a fallback for any shortcodes not defined
by emojis.
"""
from activities.models import Emoji
emoji_set = Emoji.for_domain(identity.domain)
if include_local:
emoji_set.extend(Emoji.for_domain(None))
possible_matches = {
emoji.shortcode: emoji.as_html() for emoji in emoji_set if emoji.is_usable
}
def replacer(match):
fullcode = match.group(1).lower()
if fullcode in possible_matches:
return possible_matches[fullcode]
return match.group()
return Emoji.emoji_regex.sub(replacer, html)

View file

@ -15,6 +15,7 @@ filterwarnings =
[mypy] [mypy]
warn_unused_ignores = True warn_unused_ignores = True
exclude = tests
[mypy-django.*] [mypy-django.*]
ignore_missing_imports = True ignore_missing_imports = True

View file

@ -1,5 +1,4 @@
{% extends "base.html" %} {% extends "base.html" %}
{% load emoji_tags %}
{% block title %}{{ identity }}{% endblock %} {% block title %}{{ identity }}{% endblock %}

View file

@ -1,4 +1,5 @@
from activities.models import Hashtag from activities.models import Hashtag
from core.html import ContentRenderer
def test_hashtag_from_content(): def test_hashtag_from_content():
@ -19,7 +20,7 @@ def test_hashtag_from_content():
def test_linkify_hashtag(): def test_linkify_hashtag():
linkify = Hashtag.linkify_hashtags linkify = lambda html: ContentRenderer(local=True).linkify_hashtags(html, None)
assert linkify("# hashtag") == "# hashtag" assert linkify("# hashtag") == "# hashtag"
assert ( assert (

View file

@ -9,6 +9,16 @@ def test_fetch_post(httpx_mock: HTTPXMock, config_system):
""" """
Tests that a post we don't have locally can be fetched by by_object_uri Tests that a post we don't have locally can be fetched by by_object_uri
""" """
httpx_mock.add_response(
url="https://example.com/test-actor",
json={
"@context": [
"https://www.w3.org/ns/activitystreams",
],
"id": "https://example.com/test-actor",
"type": "Person",
},
)
httpx_mock.add_response( httpx_mock.add_response(
url="https://example.com/test-post", url="https://example.com/test-post",
json={ json={

View file

@ -6,19 +6,19 @@ from core.ld import canonicalise
@pytest.mark.django_db @pytest.mark.django_db
def test_question_post(config_system, identity, remote_identity): def test_question_post(config_system, identity, remote_identity, httpx_mock):
data = { data = {
"cc": [], "cc": [],
"id": "https://fosstodon.org/users/manfre/statuses/109519951621804608/activity", "id": "https://remote.test/test-actor/statuses/109519951621804608/activity",
"to": identity.absolute_profile_uri(), "to": identity.absolute_profile_uri(),
"type": "Create", "type": "Create",
"actor": "https://fosstodon.org/users/manfre", "actor": "https://remote.test/test-actor/",
"object": { "object": {
"cc": [], "cc": [],
"id": "https://fosstodon.org/users/manfre/statuses/109519951621804608", "id": "https://remote.test/test-actor/statuses/109519951621804608",
"to": identity.absolute_profile_uri(), "to": identity.absolute_profile_uri(),
"tag": [], "tag": [],
"url": "https://fosstodon.org/@manfre/109519951621804608", "url": "https://remote.test/test-actor/109519951621804608",
"type": "Question", "type": "Question",
"oneOf": [ "oneOf": [
{ {
@ -35,13 +35,13 @@ def test_question_post(config_system, identity, remote_identity):
"content": '<p>This is a poll :python: </p><p><span class="h-card"><a href="https://ehakat.manfre.net/@mike/" class="u-url mention">@<span>mike</span></a></span></p>', "content": '<p>This is a poll :python: </p><p><span class="h-card"><a href="https://ehakat.manfre.net/@mike/" class="u-url mention">@<span>mike</span></a></span></p>',
"endTime": "2022-12-18T22:03:59Z", "endTime": "2022-12-18T22:03:59Z",
"replies": { "replies": {
"id": "https://fosstodon.org/users/manfre/statuses/109519951621804608/replies", "id": "https://remote.test/test-actor/statuses/109519951621804608/replies",
"type": "Collection", "type": "Collection",
"first": { "first": {
"next": "https://fosstodon.org/users/manfre/statuses/109519951621804608/replies?only_other_accounts=true&page=true", "next": "https://remote.test/test-actor/109519951621804608/replies?only_other_accounts=true&page=true",
"type": "CollectionPage", "type": "CollectionPage",
"items": [], "items": [],
"partOf": "https://fosstodon.org/users/manfre/statuses/109519951621804608/replies", "partOf": "https://remote.test/test-actor/109519951621804608/replies",
}, },
}, },
"published": "2022-12-15T22:03:59Z", "published": "2022-12-15T22:03:59Z",
@ -50,15 +50,9 @@ def test_question_post(config_system, identity, remote_identity):
"en": '<p>This is a poll :python: </p><p><span class="h-card"><a href="https://ehakat.manfre.net/@mike/" class="u-url mention">@<span>mike</span></a></span></p>' "en": '<p>This is a poll :python: </p><p><span class="h-card"><a href="https://ehakat.manfre.net/@mike/" class="u-url mention">@<span>mike</span></a></span></p>'
}, },
"as:sensitive": False, "as:sensitive": False,
"attributedTo": "https://fosstodon.org/users/manfre", "attributedTo": "https://remote.test/test-actor/",
"http://ostatus.org#atomUri": "https://fosstodon.org/users/manfre/statuses/109519951621804608", "toot:votersCount": 0,
"http://ostatus.org#conversation": "tag:fosstodon.org,2022-12-15:objectId=69494364:objectType=Conversation",
"http://joinmastodon.org/ns#votersCount": 0,
}, },
"@context": [
"https://www.w3.org/ns/activitystreams",
"https://w3id.org/security/v1",
],
"published": "2022-12-15T22:03:59Z", "published": "2022-12-15T22:03:59Z",
} }

View file

@ -6,7 +6,7 @@ from users.models import Identity
@pytest.mark.django_db @pytest.mark.django_db
def test_post_context(identity: Identity): def test_post_context(identity: Identity, config_system):
""" """
Tests that post context fetching works correctly Tests that post context fetching works correctly
""" """

View file

@ -2,7 +2,7 @@ from datetime import timedelta
from django.utils import timezone from django.utils import timezone
from activities.templatetags.activity_tags import linkify_hashtags, timedeltashort from activities.templatetags.activity_tags import timedeltashort
def test_timedeltashort(): def test_timedeltashort():
@ -22,17 +22,3 @@ def test_timedeltashort():
assert timedeltashort(value - timedelta(days=364)) == "364d" assert timedeltashort(value - timedelta(days=364)) == "364d"
assert timedeltashort(value - timedelta(days=365)) == "1y" assert timedeltashort(value - timedelta(days=365)) == "1y"
assert timedeltashort(value - timedelta(days=366)) == "1y" assert timedeltashort(value - timedelta(days=366)) == "1y"
def test_linkify_hashtags():
"""
Tests that linkify_hashtags works correctly
"""
assert linkify_hashtags(None) == ""
assert linkify_hashtags("") == ""
assert (
linkify_hashtags("#Takahe")
== '<a class="hashtag" href="/tags/takahe/">#Takahe</a>'
)

View file

@ -1,4 +1,4 @@
from core.html import html_to_plaintext, sanitize_post from core.html import html_to_plaintext, sanitize_html
def test_html_to_plaintext(): def test_html_to_plaintext():
@ -17,5 +17,5 @@ def test_html_to_plaintext():
def test_sanitize_post(): def test_sanitize_post():
assert sanitize_post("<p>Hello!</p>") == "<p>Hello!</p>" assert sanitize_html("<p>Hello!</p>") == "<p>Hello!</p>"
assert sanitize_post("<p>It&#39;s great</p>") == "<p>It&#39;s great</p>" assert sanitize_html("<p>It&#39;s great</p>") == "<p>It&#39;s great</p>"

View file

@ -11,7 +11,7 @@ from django.utils import timezone
from django.utils.functional import lazy from django.utils.functional import lazy
from core.exceptions import ActorMismatchError from core.exceptions import ActorMismatchError
from core.html import sanitize_post, strip_html from core.html import ContentRenderer, strip_html
from core.ld import ( from core.ld import (
canonicalise, canonicalise,
format_ld_date, format_ld_date,
@ -192,20 +192,18 @@ class Identity(StatorModel):
@property @property
def safe_summary(self): def safe_summary(self):
from activities.templatetags.emoji_tags import imageify_emojis return ContentRenderer(local=True).render_identity(self.summary, self)
return imageify_emojis(sanitize_post(self.summary), self.domain)
@property @property
def safe_metadata(self): def safe_metadata(self):
from activities.templatetags.emoji_tags import imageify_emojis renderer = ContentRenderer(local=True)
if not self.metadata: if not self.metadata:
return [] return []
return [ return [
{ {
"name": imageify_emojis(strip_html(data["name"]), self.domain), "name": renderer.render_identity(data["name"], self, strip=True),
"value": imageify_emojis(strip_html(data["value"]), self.domain), "value": renderer.render_identity(data["value"], self, strip=True),
} }
for data in self.metadata for data in self.metadata
] ]
@ -279,9 +277,9 @@ class Identity(StatorModel):
""" """
Return the name_or_handle with any HTML substitutions made Return the name_or_handle with any HTML substitutions made
""" """
from activities.templatetags.emoji_tags import imageify_emojis return ContentRenderer(local=True).render_identity(
self.name_or_handle, self, strip=True
return imageify_emojis(self.name_or_handle, self.domain) )
@property @property
def handle(self): def handle(self):