Improves tagging regex

This commit is contained in:
Mouse Reeve 2021-01-06 09:45:36 -08:00
parent f87a138d49
commit a25bc2383b
3 changed files with 53 additions and 7 deletions

View file

@ -293,12 +293,15 @@ def find_mentions(content):
yield (match.group(), mention_user)
def to_markdown(content):
''' catch links and convert to markdown '''
content = re.sub(
def format_links(content):
return re.sub(
r'([^(href=")]|^)(https?:\/\/([\w\.\-_]+\.[a-z]{2,}(\/[\w\.\-_\/]+)?))',
r'\g<1><a href="\g<2>">\g<3></a>',
content)
def to_markdown(content):
''' catch links and convert to markdown '''
content = format_links(content)
content = markdown(content)
# sanitize resulting html
sanitizer = InputHtmlParser()

View file

@ -447,3 +447,46 @@ class Outgoing(TestCase):
self.assertEqual(reply.user, user)
self.assertTrue(self.remote_user in reply.mention_users.all())
self.assertTrue(self.local_user in reply.mention_users.all())
def test_find_mentions(self):
''' detect and look up @ mentions of users '''
user = models.User.objects.create_user(
'nutria@%s' % DOMAIN, 'nutria@nutria.com', 'password',
local=True, localname='nutria')
self.assertEqual(user.username, 'nutria@%s' % DOMAIN)
self.assertEqual(
list(outgoing.find_mentions('@nutria'))[0],
('@nutria', user)
)
self.assertEqual(
list(outgoing.find_mentions('leading text @nutria'))[0],
('@nutria', user)
)
self.assertEqual(
list(outgoing.find_mentions('leading @nutria trailing text'))[0],
('@nutria', user)
)
self.assertEqual(
list(outgoing.find_mentions('@rat@example.com'))[0],
('@rat@example.com', self.remote_user)
)
multiple = list(outgoing.find_mentions('@nutria and @rat@example.com'))
self.assertEqual(multiple[0], ('@nutria', user))
self.assertEqual(multiple[1], ('@rat@example.com', self.remote_user))
with patch('bookwyrm.outgoing.handle_remote_webfinger') as rw:
rw.return_value = self.local_user
self.assertEqual(
list(outgoing.find_mentions('@beep@beep.com'))[0],
('@beep@beep.com', self.local_user)
)
with patch('bookwyrm.outgoing.handle_remote_webfinger') as rw:
rw.return_value = None
self.assertEqual(list(outgoing.find_mentions('@beep@beep.com')), [])
self.assertEqual(
list(outgoing.find_mentions('@nutria@%s' % DOMAIN))[0],
('@nutria@%s' % DOMAIN, user)
)

View file

@ -1,10 +1,10 @@
''' defining regexes for regularly used concepts '''
domain = r'[a-z-A-Z0-9_\-]+\.[a-z]+'
localname = r'@?[a-zA-Z_\-\.0-9]+'
domain = r'[\w_\-\.]+\.[a-z]{2,}'
localname = r'@?[a-zA-Z_\-\.0-9]+\b'
strict_localname = r'@[a-zA-Z_\-\.0-9]+'
username = r'%s(@%s)?' % (localname, domain)
strict_username = r'%s(@%s)?' % (strict_localname, domain)
full_username = r'%s@%s' % (localname, domain)
strict_username = r'\B%s(@%s)?\b' % (strict_localname, domain)
full_username = r'\B%s@%s\b' % (localname, domain)
# should match (BookWyrm/1.0.0; or (BookWyrm/99.1.2;
bookwyrm_user_agent = r'\(BookWyrm/[0-9]+\.[0-9]+\.[0-9]+;'