Apply Mastodon style link text shortening (#426)

This commit is contained in:
Michael Manfre 2023-01-16 13:59:46 -05:00 committed by GitHub
parent 54e7755080
commit 9b6ceee490
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 58 additions and 2 deletions

View file

@ -2,6 +2,7 @@ import re
from functools import partial from functools import partial
import bleach import bleach
import bleach.callbacks
from bleach.html5lib_shim import Filter from bleach.html5lib_shim import Filter
from bleach.linkifier import LinkifyFilter from bleach.linkifier import LinkifyFilter
from django.utils.safestring import mark_safe from django.utils.safestring import mark_safe
@ -90,6 +91,35 @@ def allow_a(tag: str, name: str, value: str):
return False return False
def shorten_link_text(attrs, new=False):
"""
Applies Mastodon's link shortening behavior where URL text links are
shortened by removing the scheme and only showing the first 30 chars.
Orig:
<a>https://social.example.com/a-long/path/2023/01/16/that-should-be-shortened</a>
Becomes:
<a>social.example.com/a-long/path</a>
"""
text = attrs.get("_text")
if not text:
text = attrs.get((None, "href"))
if text and "://" in text and len(text) > 30:
attrs[(None, "class")] = " ".join(
filter(None, [attrs.pop((None, "class"), ""), "ellipsis"])
)
# Add the full URL in to title for easier user inspection
attrs[(None, "title")] = attrs.get((None, "href"))
attrs["_text"] = text.split("://", 1)[-1][:30]
return attrs
linkify_callbacks = [bleach.callbacks.nofollow, shorten_link_text]
def sanitize_html(post_html: str) -> str: def sanitize_html(post_html: str) -> str:
""" """
Only allows a, br, p and span tags, and class attributes. Only allows a, br, p and span tags, and class attributes.
@ -100,7 +130,10 @@ def sanitize_html(post_html: str) -> str:
"a": allow_a, "a": allow_a,
"p": ["class"], "p": ["class"],
}, },
filters=[partial(LinkifyFilter, url_re=url_regex), MastodonStrictTagFilter], filters=[
partial(LinkifyFilter, url_re=url_regex, callbacks=linkify_callbacks),
MastodonStrictTagFilter,
],
strip=True, strip=True,
) )
return mark_safe(cleaner.clean(post_html)) return mark_safe(cleaner.clean(post_html))
@ -113,7 +146,9 @@ def strip_html(post_html: str, *, linkify: bool = True) -> str:
cleaner = bleach.Cleaner( cleaner = bleach.Cleaner(
tags=[], tags=[],
strip=True, strip=True,
filters=[partial(LinkifyFilter, url_re=url_regex)] if linkify else [], filters=[partial(LinkifyFilter, url_re=url_regex, callbacks=linkify_callbacks)]
if linkify
else [],
) )
return mark_safe(cleaner.clean(post_html)) return mark_safe(cleaner.clean(post_html))

View file

@ -392,6 +392,10 @@ img.emoji {
height: 0.8em; height: 0.8em;
} }
.ellipsis::after {
content: "…";
}
/* Generic markdown styling and sections */ /* Generic markdown styling and sections */
.no-sidebar section { .no-sidebar section {

View file

@ -37,6 +37,23 @@ def test_sanitize_post():
) )
def test_shorten_url():
full_url = (
"https://social.example.com/a-long/path/2023/01/16/that-should-be-shortened"
)
assert (
sanitize_html(f"<p>{full_url}</p>")
== f'<p><a href="{full_url}" rel="nofollow" class="ellipsis" title="{full_url}">social.example.com/a-long/path</a></p>'
)
assert (
sanitize_html(
f'<p><a href="{full_url}">This is a long link text, but cannot be shortened as a URL</a></p>'
)
== f'<p><a href="{full_url}" rel="nofollow">This is a long link text, but cannot be shortened as a URL</a></p>'
)
@pytest.mark.django_db @pytest.mark.django_db
def test_link_preservation(): def test_link_preservation():
""" """