fix link formatting issue, for real this time

2021-08-30 14:23:04 -05:00 · 2021-08-30 14:23:04 -05:00 · 4f321e5f33
commit 4f321e5f33
parent c6d08050e2
2 changed files with 24 additions and 19 deletions
--- a/bookwyrm/tests/views/test_status.py
+++ b/bookwyrm/tests/views/test_status.py
@ -307,14 +307,14 @@ class StatusViews(TestCase):
        self.assertEqual(
            views.status.format_links(url), '<a href="%s">tech.lgbt/@bookwyrm</a>' % url
        )
-        url = "users.speakeasy.net/~lion/nb/book.pdf"
+        url = "https://users.speakeasy.net/~lion/nb/book.pdf"
        self.assertEqual(
            views.status.format_links(url),
            '<a href="%s">users.speakeasy.net/~lion/nb/book.pdf</a>' % url,
        )
-        url = "pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps"
+        url = "https://pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps"
        self.assertEqual(
-            views.status.format_links(url), '<a href="%s">%s</a>' % (url, url)
+            views.status.format_links(url), '<a href="%s">%s</a>' % (url, url[8:])
        )

    def test_to_markdown(self, *_):
--- a/bookwyrm/views/status.py
+++ b/bookwyrm/views/status.py
@ -153,32 +153,37 @@ def find_mentions(content):

 def format_links(content):
    """detect and format links"""
+    validator = URLValidator()
    formatted_content = ""
-    for potential_link in content.split():
-        try:
-            # raises an error on anything that's not a valid
-            URLValidator(potential_link)
-        except (ValidationError, UnicodeError):
-            formatted_content += potential_link + " "
-            continue
+    split_content = content.split()
+
+    for index, potential_link in enumerate(split_content):
        wrapped = _wrapped(potential_link)
        if wrapped:
            wrapper_close = potential_link[-1]
            formatted_content += potential_link[0]
            potential_link = potential_link[1:-1]

-        # so we can use everything but the scheme in the presentation of the link
-        url = urlparse(potential_link)
-        link = url.netloc + url.path + url.params
-        if url.query != "":
-            link += "?" + url.query
-        if url.fragment != "":
-            link += "#" + url.fragment
+        try:
+            # raises an error on anything that's not a valid link
+            validator(potential_link)

-        formatted_content += '<a href="%s">%s</a>' % (potential_link, link)
+            # use everything but the scheme in the presentation of the link
+            url = urlparse(potential_link)
+            link = url.netloc + url.path + url.params
+            if url.query != "":
+                link += "?" + url.query
+            if url.fragment != "":
+                link += "#" + url.fragment
+
+            formatted_content += '<a href="%s">%s</a>' % (potential_link, link)
+        except (ValidationError, UnicodeError):
+            formatted_content += potential_link

        if wrapped:
            formatted_content += wrapper_close
+        if index < len(split_content) - 1:
+            formatted_content += " "

    return formatted_content

@ -194,8 +199,8 @@ def _wrapped(text):

 def to_markdown(content):
    """catch links and convert to markdown"""
-    content = markdown(content)
    content = format_links(content)
+    content = markdown(content)
    # sanitize resulting html
    sanitizer = InputHtmlParser()
    sanitizer.feed(content)