import pytest from core.html import FediverseHtmlParser @pytest.mark.django_db def test_parser(identity): """ Validates the HtmlParser in its various output modes """ # Basic tag allowance parser = FediverseHtmlParser("
Hello!
") assert parser.html == "Hello!
" assert parser.plain_text == "Hello!" # Newline erasure parser = FediverseHtmlParser("Hi!
\n\nHow are you?
") assert parser.html == "Hi!
How are you?
" assert parser.plain_text == "Hi!\n\nHow are you?" # Trying to be evil parser = FediverseHtmlParser("It's great
" assert parser.plain_text == "It's great" assert parser.hashtags == set() # Linkify works, but only with protocol prefixes parser = FediverseHtmlParser("test.com
") assert parser.html == "test.com
" assert parser.plain_text == "test.com" parser = FediverseHtmlParser("https://test.com
") assert ( parser.html == '' ) assert parser.plain_text == "https://test.com" # Links are preserved parser = FediverseHtmlParser("takahe social") assert ( parser.html == 'takahe social' ) assert parser.plain_text == "https://takahe.social" # Very long links are shortened full_url = "https://social.example.com/a-long/path/that-should-be-shortened" parser = FediverseHtmlParser(f"{full_url}
") assert ( parser.html == f'https://social.example.com/a-long/path/that-should-be-shortened
' ) assert ( parser.plain_text == "https://social.example.com/a-long/path/that-should-be-shortened" ) # Make sure things that look like mentions are left alone with no mentions supplied. parser = FediverseHtmlParser( "@test@example.com
", find_mentions=True, find_hashtags=True, find_emojis=True, ) assert parser.html == "@test@example.com
" assert parser.plain_text == "@test@example.com" assert parser.mentions == {"test@example.com"} # Make sure mentions work when there is a mention supplied parser = FediverseHtmlParser( "@test@example.com
", mentions=[identity], find_hashtags=True, find_emojis=True, ) assert ( parser.html == '' ) assert parser.plain_text == "@test@example.com" assert parser.mentions == {"test@example.com"} # Ensure mentions are case insensitive parser = FediverseHtmlParser( "@TeSt@ExamPle.com
", mentions=[identity], find_hashtags=True, find_emojis=True, ) assert ( parser.html == '' ) assert parser.plain_text == "@TeSt@ExamPle.com" assert parser.mentions == {"test@example.com"} # Ensure hashtags are linked, even through spans, but not within hrefs parser = FediverseHtmlParser( 'something #hashtag #hashtagtwo', find_hashtags=True, find_emojis=True, ) assert ( parser.html == 'something #hashtag #hashtagtwo' ) assert parser.plain_text == "http://example.com#notahashtag #hashtag #hashtagtwo" assert parser.hashtags == {"hashtag", "hashtagtwo"} # Ensure lists are rendered reasonably parser = FediverseHtmlParser( "List:
End!
", find_hashtags=True, find_emojis=True, ) assert parser.html == "List:
One
Two
Three
End!
" assert parser.plain_text == "List:\n\nOne\nTwo\nThree\n\nEnd!" @pytest.mark.django_db def test_parser_same_name_mentions(remote_identity, remote_identity2): """ Ensure mentions that differ only by link are parsed right """ parser = FediverseHtmlParser( '@test @test', mentions=[remote_identity, remote_identity2], find_hashtags=True, find_emojis=True, ) assert ( parser.html == '@test @test' ) assert parser.plain_text == "@test @test"