mirror of
https://github.com/jointakahe/takahe.git
synced 2024-12-23 05:56:29 +00:00
Move linkifying to all http-prefixed links
This commit is contained in:
parent
202046247c
commit
78d2283458
2 changed files with 32 additions and 2 deletions
22
core/html.py
22
core/html.py
|
@ -1,7 +1,21 @@
|
|||
import re
|
||||
from functools import partial
|
||||
|
||||
import bleach
|
||||
from bleach.linkifier import LinkifyFilter
|
||||
from django.utils.safestring import mark_safe
|
||||
|
||||
url_regex = re.compile(
|
||||
r"""\(* # Match any opening parentheses.
|
||||
\b(?<![@.])(?:https?://(?:(?:\w+:)?\w+@)?) # http://
|
||||
([\w-]+\.)+(?:[\w-]+)(?:\:[0-9]+)?(?!\.\w)\b # xx.yy.tld(:##)?
|
||||
(?:[/?][^\s\{{\}}\|\\\^\[\]`<>"]*)?
|
||||
# /path/zz (excluding "unsafe" chars from RFC 1738,
|
||||
# except for # and ~, which happen in practice)
|
||||
""",
|
||||
re.IGNORECASE | re.VERBOSE | re.UNICODE,
|
||||
)
|
||||
|
||||
|
||||
def allow_a(tag: str, name: str, value: str):
|
||||
if name in ["href", "title", "class"]:
|
||||
|
@ -26,7 +40,7 @@ def sanitize_html(post_html: str) -> str:
|
|||
"p": ["class"],
|
||||
"span": ["class"],
|
||||
},
|
||||
filters=[LinkifyFilter],
|
||||
filters=[partial(LinkifyFilter, url_re=url_regex)],
|
||||
strip=True,
|
||||
)
|
||||
return mark_safe(cleaner.clean(post_html))
|
||||
|
@ -36,7 +50,11 @@ def strip_html(post_html: str) -> str:
|
|||
"""
|
||||
Strips all tags from the text, then linkifies it.
|
||||
"""
|
||||
cleaner = bleach.Cleaner(tags=[], strip=True, filters=[LinkifyFilter])
|
||||
cleaner = bleach.Cleaner(
|
||||
tags=[],
|
||||
strip=True,
|
||||
filters=[partial(LinkifyFilter, url_re=url_regex)],
|
||||
)
|
||||
return mark_safe(cleaner.clean(post_html))
|
||||
|
||||
|
||||
|
|
|
@ -19,3 +19,15 @@ def test_sanitize_post():
|
|||
|
||||
assert sanitize_html("<p>Hello!</p>") == "<p>Hello!</p>"
|
||||
assert sanitize_html("<p>It's great</p>") == "<p>It's great</p>"
|
||||
|
||||
# Note that we only want to linkify things with protocol prefixes to prevent
|
||||
# too many false positives.
|
||||
assert sanitize_html("<p>test.com</p>") == "<p>test.com</p>"
|
||||
assert (
|
||||
sanitize_html("<p>https://test.com</p>")
|
||||
== '<p><a href="https://test.com" rel="nofollow">https://test.com</a></p>'
|
||||
)
|
||||
assert (
|
||||
sanitize_html("<p>@someone@subdomain.some-domain.com</p>")
|
||||
== "<p>@someone@subdomain.some-domain.com</p>"
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue