mirror of
https://github.com/jointakahe/takahe.git
synced 2024-11-26 09:11:00 +00:00
Move linkifying to all http-prefixed links
This commit is contained in:
parent
202046247c
commit
78d2283458
2 changed files with 32 additions and 2 deletions
22
core/html.py
22
core/html.py
|
@ -1,7 +1,21 @@
|
||||||
|
import re
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
import bleach
|
import bleach
|
||||||
from bleach.linkifier import LinkifyFilter
|
from bleach.linkifier import LinkifyFilter
|
||||||
from django.utils.safestring import mark_safe
|
from django.utils.safestring import mark_safe
|
||||||
|
|
||||||
|
url_regex = re.compile(
|
||||||
|
r"""\(* # Match any opening parentheses.
|
||||||
|
\b(?<![@.])(?:https?://(?:(?:\w+:)?\w+@)?) # http://
|
||||||
|
([\w-]+\.)+(?:[\w-]+)(?:\:[0-9]+)?(?!\.\w)\b # xx.yy.tld(:##)?
|
||||||
|
(?:[/?][^\s\{{\}}\|\\\^\[\]`<>"]*)?
|
||||||
|
# /path/zz (excluding "unsafe" chars from RFC 1738,
|
||||||
|
# except for # and ~, which happen in practice)
|
||||||
|
""",
|
||||||
|
re.IGNORECASE | re.VERBOSE | re.UNICODE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def allow_a(tag: str, name: str, value: str):
|
def allow_a(tag: str, name: str, value: str):
|
||||||
if name in ["href", "title", "class"]:
|
if name in ["href", "title", "class"]:
|
||||||
|
@ -26,7 +40,7 @@ def sanitize_html(post_html: str) -> str:
|
||||||
"p": ["class"],
|
"p": ["class"],
|
||||||
"span": ["class"],
|
"span": ["class"],
|
||||||
},
|
},
|
||||||
filters=[LinkifyFilter],
|
filters=[partial(LinkifyFilter, url_re=url_regex)],
|
||||||
strip=True,
|
strip=True,
|
||||||
)
|
)
|
||||||
return mark_safe(cleaner.clean(post_html))
|
return mark_safe(cleaner.clean(post_html))
|
||||||
|
@ -36,7 +50,11 @@ def strip_html(post_html: str) -> str:
|
||||||
"""
|
"""
|
||||||
Strips all tags from the text, then linkifies it.
|
Strips all tags from the text, then linkifies it.
|
||||||
"""
|
"""
|
||||||
cleaner = bleach.Cleaner(tags=[], strip=True, filters=[LinkifyFilter])
|
cleaner = bleach.Cleaner(
|
||||||
|
tags=[],
|
||||||
|
strip=True,
|
||||||
|
filters=[partial(LinkifyFilter, url_re=url_regex)],
|
||||||
|
)
|
||||||
return mark_safe(cleaner.clean(post_html))
|
return mark_safe(cleaner.clean(post_html))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -19,3 +19,15 @@ def test_sanitize_post():
|
||||||
|
|
||||||
assert sanitize_html("<p>Hello!</p>") == "<p>Hello!</p>"
|
assert sanitize_html("<p>Hello!</p>") == "<p>Hello!</p>"
|
||||||
assert sanitize_html("<p>It's great</p>") == "<p>It's great</p>"
|
assert sanitize_html("<p>It's great</p>") == "<p>It's great</p>"
|
||||||
|
|
||||||
|
# Note that we only want to linkify things with protocol prefixes to prevent
|
||||||
|
# too many false positives.
|
||||||
|
assert sanitize_html("<p>test.com</p>") == "<p>test.com</p>"
|
||||||
|
assert (
|
||||||
|
sanitize_html("<p>https://test.com</p>")
|
||||||
|
== '<p><a href="https://test.com" rel="nofollow">https://test.com</a></p>'
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
sanitize_html("<p>@someone@subdomain.some-domain.com</p>")
|
||||||
|
== "<p>@someone@subdomain.some-domain.com</p>"
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in a new issue