From f87a138d4988e92a498a1b438a54545439c9eb91 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Tue, 5 Jan 2021 21:23:36 -0800 Subject: [PATCH 1/3] Better matching for links in statuses --- bookwyrm/outgoing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bookwyrm/outgoing.py b/bookwyrm/outgoing.py index 88377d335..f247b4283 100644 --- a/bookwyrm/outgoing.py +++ b/bookwyrm/outgoing.py @@ -296,8 +296,7 @@ def find_mentions(content): def to_markdown(content): ''' catch links and convert to markdown ''' content = re.sub( - r'([^(href=")])(https?:\/\/([A-Za-z\.\-_\/]+' \ - r'\.[A-Za-z]{2,}[A-Za-z\.\-_\/]+))', + r'([^(href=")]|^)(https?:\/\/([\w\.\-_]+\.[a-z]{2,}(\/[\w\.\-_\/]+)?))', r'\g<1>\g<3>', content) content = markdown(content) From a25bc2383b667bbb0e0de3dc46f2d42632936add Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Wed, 6 Jan 2021 09:45:36 -0800 Subject: [PATCH 2/3] Improves tagging regex --- bookwyrm/outgoing.py | 9 ++++--- bookwyrm/tests/test_outgoing.py | 43 +++++++++++++++++++++++++++++++++ bookwyrm/utils/regex.py | 8 +++--- 3 files changed, 53 insertions(+), 7 deletions(-) diff --git a/bookwyrm/outgoing.py b/bookwyrm/outgoing.py index f247b4283..7838b3351 100644 --- a/bookwyrm/outgoing.py +++ b/bookwyrm/outgoing.py @@ -293,12 +293,15 @@ def find_mentions(content): yield (match.group(), mention_user) -def to_markdown(content): - ''' catch links and convert to markdown ''' - content = re.sub( +def format_links(content): + return re.sub( r'([^(href=")]|^)(https?:\/\/([\w\.\-_]+\.[a-z]{2,}(\/[\w\.\-_\/]+)?))', r'\g<1>\g<3>', content) + +def to_markdown(content): + ''' catch links and convert to markdown ''' + content = format_links(content) content = markdown(content) # sanitize resulting html sanitizer = InputHtmlParser() diff --git a/bookwyrm/tests/test_outgoing.py b/bookwyrm/tests/test_outgoing.py index e004d1090..c99b89c0b 100644 --- a/bookwyrm/tests/test_outgoing.py +++ b/bookwyrm/tests/test_outgoing.py @@ -447,3 +447,46 @@ class Outgoing(TestCase): self.assertEqual(reply.user, user) self.assertTrue(self.remote_user in reply.mention_users.all()) self.assertTrue(self.local_user in reply.mention_users.all()) + + def test_find_mentions(self): + ''' detect and look up @ mentions of users ''' + user = models.User.objects.create_user( + 'nutria@%s' % DOMAIN, 'nutria@nutria.com', 'password', + local=True, localname='nutria') + self.assertEqual(user.username, 'nutria@%s' % DOMAIN) + + self.assertEqual( + list(outgoing.find_mentions('@nutria'))[0], + ('@nutria', user) + ) + self.assertEqual( + list(outgoing.find_mentions('leading text @nutria'))[0], + ('@nutria', user) + ) + self.assertEqual( + list(outgoing.find_mentions('leading @nutria trailing text'))[0], + ('@nutria', user) + ) + self.assertEqual( + list(outgoing.find_mentions('@rat@example.com'))[0], + ('@rat@example.com', self.remote_user) + ) + + multiple = list(outgoing.find_mentions('@nutria and @rat@example.com')) + self.assertEqual(multiple[0], ('@nutria', user)) + self.assertEqual(multiple[1], ('@rat@example.com', self.remote_user)) + + with patch('bookwyrm.outgoing.handle_remote_webfinger') as rw: + rw.return_value = self.local_user + self.assertEqual( + list(outgoing.find_mentions('@beep@beep.com'))[0], + ('@beep@beep.com', self.local_user) + ) + with patch('bookwyrm.outgoing.handle_remote_webfinger') as rw: + rw.return_value = None + self.assertEqual(list(outgoing.find_mentions('@beep@beep.com')), []) + + self.assertEqual( + list(outgoing.find_mentions('@nutria@%s' % DOMAIN))[0], + ('@nutria@%s' % DOMAIN, user) + ) diff --git a/bookwyrm/utils/regex.py b/bookwyrm/utils/regex.py index 9553c913a..b087b5649 100644 --- a/bookwyrm/utils/regex.py +++ b/bookwyrm/utils/regex.py @@ -1,10 +1,10 @@ ''' defining regexes for regularly used concepts ''' -domain = r'[a-z-A-Z0-9_\-]+\.[a-z]+' -localname = r'@?[a-zA-Z_\-\.0-9]+' +domain = r'[\w_\-\.]+\.[a-z]{2,}' +localname = r'@?[a-zA-Z_\-\.0-9]+\b' strict_localname = r'@[a-zA-Z_\-\.0-9]+' username = r'%s(@%s)?' % (localname, domain) -strict_username = r'%s(@%s)?' % (strict_localname, domain) -full_username = r'%s@%s' % (localname, domain) +strict_username = r'\B%s(@%s)?\b' % (strict_localname, domain) +full_username = r'\B%s@%s\b' % (localname, domain) # should match (BookWyrm/1.0.0; or (BookWyrm/99.1.2; bookwyrm_user_agent = r'\(BookWyrm/[0-9]+\.[0-9]+\.[0-9]+;' From 9e07f094ada98aec9c1932bf3c3013c71d7da684 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Wed, 6 Jan 2021 10:08:43 -0800 Subject: [PATCH 3/3] Improves link detecting regex --- bookwyrm/outgoing.py | 3 ++- bookwyrm/tests/test_outgoing.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/bookwyrm/outgoing.py b/bookwyrm/outgoing.py index 7838b3351..b438f5d18 100644 --- a/bookwyrm/outgoing.py +++ b/bookwyrm/outgoing.py @@ -294,8 +294,9 @@ def find_mentions(content): def format_links(content): + ''' detect and format links ''' return re.sub( - r'([^(href=")]|^)(https?:\/\/([\w\.\-_]+\.[a-z]{2,}(\/[\w\.\-_\/]+)?))', + r'([^(href=")]|^)(https?:\/\/(%s([\w\.\-_\/])*))' % regex.domain, r'\g<1>\g<3>', content) diff --git a/bookwyrm/tests/test_outgoing.py b/bookwyrm/tests/test_outgoing.py index c99b89c0b..498b43d9c 100644 --- a/bookwyrm/tests/test_outgoing.py +++ b/bookwyrm/tests/test_outgoing.py @@ -490,3 +490,16 @@ class Outgoing(TestCase): list(outgoing.find_mentions('@nutria@%s' % DOMAIN))[0], ('@nutria@%s' % DOMAIN, user) ) + + def test_format_links(self): + ''' find and format urls into a tags ''' + url = 'http://www.fish.com/' + self.assertEqual( + outgoing.format_links(url), + 'www.fish.com/' % url) + url = 'https://archive.org/details/dli.granth.72113/page/n25/mode/2up' + self.assertEqual( + outgoing.format_links(url), + '' \ + 'archive.org/details/dli.granth.72113/page/n25/mode/2up' \ + % url)