2022-12-20 13:55:14 +00:00
|
|
|
import pytest
|
|
|
|
|
2023-01-30 00:46:22 +00:00
|
|
|
from core.html import FediverseHtmlParser
|
2022-11-27 19:09:08 +00:00
|
|
|
|
|
|
|
|
2023-01-30 00:46:22 +00:00
|
|
|
@pytest.mark.django_db
|
|
|
|
def test_parser(identity):
|
|
|
|
"""
|
|
|
|
Validates the HtmlParser in its various output modes
|
|
|
|
"""
|
2022-11-27 19:09:08 +00:00
|
|
|
|
2023-01-30 00:46:22 +00:00
|
|
|
# Basic tag allowance
|
|
|
|
parser = FediverseHtmlParser("<p>Hello!</p><script></script>")
|
|
|
|
assert parser.html == "<p>Hello!</p>"
|
|
|
|
assert parser.plain_text == "Hello!"
|
|
|
|
|
|
|
|
# Newline erasure
|
|
|
|
parser = FediverseHtmlParser("<p>Hi!</p>\n\n<p>How are you?</p>")
|
|
|
|
assert parser.html == "<p>Hi!</p><p>How are you?</p>"
|
|
|
|
assert parser.plain_text == "Hi!\n\nHow are you?"
|
|
|
|
|
|
|
|
# Trying to be evil
|
|
|
|
parser = FediverseHtmlParser("<scri<span></span>pt>")
|
|
|
|
assert "<scr" not in parser.html
|
|
|
|
parser = FediverseHtmlParser("<scri #hashtag pt>")
|
|
|
|
assert "<scr" not in parser.html
|
|
|
|
|
|
|
|
# Entities are escaped
|
|
|
|
parser = FediverseHtmlParser("<p>It's great</p>", find_hashtags=True)
|
|
|
|
assert parser.html == "<p>It's great</p>"
|
|
|
|
assert parser.plain_text == "It's great"
|
|
|
|
assert parser.hashtags == set()
|
|
|
|
|
|
|
|
# Linkify works, but only with protocol prefixes
|
|
|
|
parser = FediverseHtmlParser("<p>test.com</p>")
|
|
|
|
assert parser.html == "<p>test.com</p>"
|
|
|
|
assert parser.plain_text == "test.com"
|
|
|
|
parser = FediverseHtmlParser("<p>https://test.com</p>")
|
2022-11-27 19:09:08 +00:00
|
|
|
assert (
|
2023-01-30 00:46:22 +00:00
|
|
|
parser.html == '<p><a href="https://test.com" rel="nofollow">test.com</a></p>'
|
2022-11-27 19:09:08 +00:00
|
|
|
)
|
2023-01-30 00:46:22 +00:00
|
|
|
assert parser.plain_text == "https://test.com"
|
2022-11-27 19:09:08 +00:00
|
|
|
|
2023-01-30 00:46:22 +00:00
|
|
|
# Links are preserved
|
|
|
|
parser = FediverseHtmlParser("<a href='https://takahe.social'>takahe social</a>")
|
2022-11-27 19:09:08 +00:00
|
|
|
assert (
|
2023-01-30 00:46:22 +00:00
|
|
|
parser.html
|
|
|
|
== '<a href="https://takahe.social" rel="nofollow">takahe social</a>'
|
2022-11-27 19:09:08 +00:00
|
|
|
)
|
2023-01-30 00:46:22 +00:00
|
|
|
assert parser.plain_text == "https://takahe.social"
|
2022-11-29 05:34:14 +00:00
|
|
|
|
2023-01-30 00:46:22 +00:00
|
|
|
# Very long links are shortened
|
|
|
|
full_url = "https://social.example.com/a-long/path/that-should-be-shortened"
|
|
|
|
parser = FediverseHtmlParser(f"<p>{full_url}</p>")
|
2023-01-18 06:41:33 +00:00
|
|
|
assert (
|
2023-01-30 00:46:22 +00:00
|
|
|
parser.html
|
|
|
|
== f'<p><a href="{full_url}" rel="nofollow" class="ellipsis" title="{full_url.removeprefix("https://")}">social.example.com/a-long/path</a></p>'
|
2023-01-18 06:41:33 +00:00
|
|
|
)
|
2022-12-20 13:10:35 +00:00
|
|
|
assert (
|
2023-01-30 00:46:22 +00:00
|
|
|
parser.plain_text
|
|
|
|
== "https://social.example.com/a-long/path/that-should-be-shortened"
|
2022-12-20 13:10:35 +00:00
|
|
|
)
|
2022-12-20 13:55:14 +00:00
|
|
|
|
2023-01-30 00:46:22 +00:00
|
|
|
# Make sure things that look like mentions are left alone with no mentions supplied.
|
|
|
|
parser = FediverseHtmlParser(
|
|
|
|
"<p>@test@example.com</p>",
|
|
|
|
find_mentions=True,
|
|
|
|
find_hashtags=True,
|
|
|
|
find_emojis=True,
|
2023-01-16 18:59:46 +00:00
|
|
|
)
|
2023-01-30 00:46:22 +00:00
|
|
|
assert parser.html == "<p>@test@example.com</p>"
|
|
|
|
assert parser.plain_text == "@test@example.com"
|
|
|
|
assert parser.mentions == {"test@example.com"}
|
|
|
|
|
|
|
|
# Make sure mentions work when there is a mention supplied
|
|
|
|
parser = FediverseHtmlParser(
|
|
|
|
"<p>@test@example.com</p>",
|
|
|
|
mentions=[identity],
|
|
|
|
find_hashtags=True,
|
|
|
|
find_emojis=True,
|
2023-01-16 18:59:46 +00:00
|
|
|
)
|
2023-05-09 15:55:19 +00:00
|
|
|
assert (
|
|
|
|
parser.html == '<p><a href="/@test@example.com/" class="mention">@test</a></p>'
|
|
|
|
)
|
2023-01-30 00:46:22 +00:00
|
|
|
assert parser.plain_text == "@test@example.com"
|
|
|
|
assert parser.mentions == {"test@example.com"}
|
|
|
|
|
|
|
|
# Ensure mentions are case insensitive
|
|
|
|
parser = FediverseHtmlParser(
|
|
|
|
"<p>@TeSt@ExamPle.com</p>",
|
|
|
|
mentions=[identity],
|
|
|
|
find_hashtags=True,
|
|
|
|
find_emojis=True,
|
2023-01-16 18:59:46 +00:00
|
|
|
)
|
2023-05-09 15:55:19 +00:00
|
|
|
assert (
|
|
|
|
parser.html == '<p><a href="/@test@example.com/" class="mention">@TeSt</a></p>'
|
|
|
|
)
|
2023-01-30 00:46:22 +00:00
|
|
|
assert parser.plain_text == "@TeSt@ExamPle.com"
|
|
|
|
assert parser.mentions == {"test@example.com"}
|
|
|
|
|
|
|
|
# Ensure hashtags are linked, even through spans, but not within hrefs
|
|
|
|
parser = FediverseHtmlParser(
|
|
|
|
'<a href="http://example.com#notahashtag">something</a> <span>#</span>hashtag <a href="https://example.com/tags/hashtagtwo/">#hashtagtwo</a>',
|
|
|
|
find_hashtags=True,
|
|
|
|
find_emojis=True,
|
2022-12-20 13:55:14 +00:00
|
|
|
)
|
2023-01-16 05:32:04 +00:00
|
|
|
assert (
|
2023-01-30 00:46:22 +00:00
|
|
|
parser.html
|
2023-02-20 03:35:54 +00:00
|
|
|
== '<a href="http://example.com#notahashtag" rel="nofollow">something</a> <a href="/tags/hashtag/" rel="tag">#hashtag</a> <a href="/tags/hashtagtwo/" rel="tag">#hashtagtwo</a>'
|
2023-01-16 05:32:04 +00:00
|
|
|
)
|
2023-01-30 00:46:22 +00:00
|
|
|
assert parser.plain_text == "http://example.com#notahashtag #hashtag #hashtagtwo"
|
|
|
|
assert parser.hashtags == {"hashtag", "hashtagtwo"}
|
|
|
|
|
|
|
|
# Ensure lists are rendered reasonably
|
|
|
|
parser = FediverseHtmlParser(
|
|
|
|
"<p>List:</p><ul><li>One</li><li>Two</li><li>Three</li></ul><p>End!</p>",
|
|
|
|
find_hashtags=True,
|
|
|
|
find_emojis=True,
|
2022-12-25 04:04:25 +00:00
|
|
|
)
|
2023-01-30 00:46:22 +00:00
|
|
|
assert parser.html == "<p>List:</p><p>One<br>Two<br>Three</p><p>End!</p>"
|
|
|
|
assert parser.plain_text == "List:\n\nOne\nTwo\nThree\n\nEnd!"
|
2023-03-22 16:43:51 +00:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.django_db
|
|
|
|
def test_parser_same_name_mentions(remote_identity, remote_identity2):
|
|
|
|
"""
|
|
|
|
Ensure mentions that differ only by link are parsed right
|
|
|
|
"""
|
|
|
|
|
|
|
|
parser = FediverseHtmlParser(
|
|
|
|
'<span class="h-card"><a href="https://remote.test/@test/" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>test</span></a></span> <span class="h-card"><a href="https://remote2.test/@test/" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>test</span></a></span>',
|
|
|
|
mentions=[remote_identity, remote_identity2],
|
|
|
|
find_hashtags=True,
|
|
|
|
find_emojis=True,
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
parser.html
|
2023-05-09 15:55:19 +00:00
|
|
|
== '<a href="/@test@remote.test/" class="mention">@test</a> <a href="/@test@remote2.test/" class="mention">@test</a>'
|
2023-03-22 16:43:51 +00:00
|
|
|
)
|
|
|
|
assert parser.plain_text == "@test @test"
|