Improves link detecting regex

2025-04-15 06:44:05 +00:00 · 2021-01-06 10:08:43 -08:00 · 2021-01-06 10:08:43 -08:00 · 9e07f094ad
commit 9e07f094ad
parent a25bc2383b
2 changed files with 15 additions and 1 deletions
--- a/bookwyrm/outgoing.py
+++ b/bookwyrm/outgoing.py
@ -294,8 +294,9 @@ def find_mentions(content):


 def format_links(content):
+    ''' detect and format links '''
    return re.sub(
-        r'([^(href=")]|^)(https?:\/\/([\w\.\-_]+\.[a-z]{2,}(\/[\w\.\-_\/]+)?))',
+        r'([^(href=")]|^)(https?:\/\/(%s([\w\.\-_\/])*))' % regex.domain,
        r'\g<1><a href="\g<2>">\g<3></a>',
        content)

--- a/bookwyrm/tests/test_outgoing.py
+++ b/bookwyrm/tests/test_outgoing.py
@ -490,3 +490,16 @@ class Outgoing(TestCase):
            list(outgoing.find_mentions('@nutria@%s' % DOMAIN))[0],
            ('@nutria@%s' % DOMAIN, user)
        )
+
+    def test_format_links(self):
+        ''' find and format urls into a tags '''
+        url = 'http://www.fish.com/'
+        self.assertEqual(
+            outgoing.format_links(url),
+            '<a href="%s">www.fish.com/</a>' % url)
+        url = 'https://archive.org/details/dli.granth.72113/page/n25/mode/2up'
+        self.assertEqual(
+            outgoing.format_links(url),
+            '<a href="%s">' \
+                'archive.org/details/dli.granth.72113/page/n25/mode/2up</a>' \
+                % url)