mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2025-01-03 13:58:43 +00:00
Merge pull request #488 from mouse-reeve/url-regex
Better matching for links in statuses
This commit is contained in:
commit
e7e21c194e
3 changed files with 68 additions and 9 deletions
|
@ -293,13 +293,16 @@ def find_mentions(content):
|
||||||
yield (match.group(), mention_user)
|
yield (match.group(), mention_user)
|
||||||
|
|
||||||
|
|
||||||
def to_markdown(content):
|
def format_links(content):
|
||||||
''' catch links and convert to markdown '''
|
''' detect and format links '''
|
||||||
content = re.sub(
|
return re.sub(
|
||||||
r'([^(href=")])(https?:\/\/([A-Za-z\.\-_\/]+' \
|
r'([^(href=")]|^)(https?:\/\/(%s([\w\.\-_\/])*))' % regex.domain,
|
||||||
r'\.[A-Za-z]{2,}[A-Za-z\.\-_\/]+))',
|
|
||||||
r'\g<1><a href="\g<2>">\g<3></a>',
|
r'\g<1><a href="\g<2>">\g<3></a>',
|
||||||
content)
|
content)
|
||||||
|
|
||||||
|
def to_markdown(content):
|
||||||
|
''' catch links and convert to markdown '''
|
||||||
|
content = format_links(content)
|
||||||
content = markdown(content)
|
content = markdown(content)
|
||||||
# sanitize resulting html
|
# sanitize resulting html
|
||||||
sanitizer = InputHtmlParser()
|
sanitizer = InputHtmlParser()
|
||||||
|
|
|
@ -447,3 +447,59 @@ class Outgoing(TestCase):
|
||||||
self.assertEqual(reply.user, user)
|
self.assertEqual(reply.user, user)
|
||||||
self.assertTrue(self.remote_user in reply.mention_users.all())
|
self.assertTrue(self.remote_user in reply.mention_users.all())
|
||||||
self.assertTrue(self.local_user in reply.mention_users.all())
|
self.assertTrue(self.local_user in reply.mention_users.all())
|
||||||
|
|
||||||
|
def test_find_mentions(self):
|
||||||
|
''' detect and look up @ mentions of users '''
|
||||||
|
user = models.User.objects.create_user(
|
||||||
|
'nutria@%s' % DOMAIN, 'nutria@nutria.com', 'password',
|
||||||
|
local=True, localname='nutria')
|
||||||
|
self.assertEqual(user.username, 'nutria@%s' % DOMAIN)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
list(outgoing.find_mentions('@nutria'))[0],
|
||||||
|
('@nutria', user)
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
list(outgoing.find_mentions('leading text @nutria'))[0],
|
||||||
|
('@nutria', user)
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
list(outgoing.find_mentions('leading @nutria trailing text'))[0],
|
||||||
|
('@nutria', user)
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
list(outgoing.find_mentions('@rat@example.com'))[0],
|
||||||
|
('@rat@example.com', self.remote_user)
|
||||||
|
)
|
||||||
|
|
||||||
|
multiple = list(outgoing.find_mentions('@nutria and @rat@example.com'))
|
||||||
|
self.assertEqual(multiple[0], ('@nutria', user))
|
||||||
|
self.assertEqual(multiple[1], ('@rat@example.com', self.remote_user))
|
||||||
|
|
||||||
|
with patch('bookwyrm.outgoing.handle_remote_webfinger') as rw:
|
||||||
|
rw.return_value = self.local_user
|
||||||
|
self.assertEqual(
|
||||||
|
list(outgoing.find_mentions('@beep@beep.com'))[0],
|
||||||
|
('@beep@beep.com', self.local_user)
|
||||||
|
)
|
||||||
|
with patch('bookwyrm.outgoing.handle_remote_webfinger') as rw:
|
||||||
|
rw.return_value = None
|
||||||
|
self.assertEqual(list(outgoing.find_mentions('@beep@beep.com')), [])
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
list(outgoing.find_mentions('@nutria@%s' % DOMAIN))[0],
|
||||||
|
('@nutria@%s' % DOMAIN, user)
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_format_links(self):
|
||||||
|
''' find and format urls into a tags '''
|
||||||
|
url = 'http://www.fish.com/'
|
||||||
|
self.assertEqual(
|
||||||
|
outgoing.format_links(url),
|
||||||
|
'<a href="%s">www.fish.com/</a>' % url)
|
||||||
|
url = 'https://archive.org/details/dli.granth.72113/page/n25/mode/2up'
|
||||||
|
self.assertEqual(
|
||||||
|
outgoing.format_links(url),
|
||||||
|
'<a href="%s">' \
|
||||||
|
'archive.org/details/dli.granth.72113/page/n25/mode/2up</a>' \
|
||||||
|
% url)
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
''' defining regexes for regularly used concepts '''
|
''' defining regexes for regularly used concepts '''
|
||||||
|
|
||||||
domain = r'[a-z-A-Z0-9_\-]+\.[a-z]+'
|
domain = r'[\w_\-\.]+\.[a-z]{2,}'
|
||||||
localname = r'@?[a-zA-Z_\-\.0-9]+'
|
localname = r'@?[a-zA-Z_\-\.0-9]+\b'
|
||||||
strict_localname = r'@[a-zA-Z_\-\.0-9]+'
|
strict_localname = r'@[a-zA-Z_\-\.0-9]+'
|
||||||
username = r'%s(@%s)?' % (localname, domain)
|
username = r'%s(@%s)?' % (localname, domain)
|
||||||
strict_username = r'%s(@%s)?' % (strict_localname, domain)
|
strict_username = r'\B%s(@%s)?\b' % (strict_localname, domain)
|
||||||
full_username = r'%s@%s' % (localname, domain)
|
full_username = r'\B%s@%s\b' % (localname, domain)
|
||||||
# should match (BookWyrm/1.0.0; or (BookWyrm/99.1.2;
|
# should match (BookWyrm/1.0.0; or (BookWyrm/99.1.2;
|
||||||
bookwyrm_user_agent = r'\(BookWyrm/[0-9]+\.[0-9]+\.[0-9]+;'
|
bookwyrm_user_agent = r'\(BookWyrm/[0-9]+\.[0-9]+\.[0-9]+;'
|
||||||
|
|
Loading…
Reference in a new issue