From 5291308677964dbb2678b78e0e2eb170b9d85366 Mon Sep 17 00:00:00 2001
From: reesporte <reesedporter@gmail.com>
Date: Mon, 30 Aug 2021 11:12:05 -0500
Subject: [PATCH 1/7] fix link formatting issues

---
 bookwyrm/views/status.py | 46 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 5 deletions(-)
diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py
index 651021b63..e2dd7fd55 100644
--- a/bookwyrm/views/status.py
+++ b/bookwyrm/views/status.py
@@ -1,13 +1,18 @@
 """ what are we here for if not for posting """
 import re
 from django.contrib.auth.decorators import login_required
+from django.core.validators import URLValidator
+from django.core.exceptions import ValidationError
 from django.http import HttpResponseBadRequest
 from django.shortcuts import get_object_or_404, redirect
 from django.template.response import TemplateResponse
 from django.utils.decorators import method_decorator
+from django.utils.html import urlize
 from django.views import View
 from markdown import markdown
 
+from urllib.parse import urlparse
+
 from bookwyrm import forms, models
 from bookwyrm.sanitize_html import InputHtmlParser
 from bookwyrm.settings import DOMAIN
@@ -149,12 +154,43 @@ def find_mentions(content):
 
 def format_links(content):
     """detect and format links"""
-    return re.sub(
-        r'([^(href=")]|^|\()(https?:\/\/(%s([\w\.\-_\/+&\?=:;,@#])*))' % regex.DOMAIN,
-        r'\g<1><a href="\g<2>">\g<3></a>',
-        content,
-    )
+    v = URLValidator()
+    formatted_content = ""
+    for potential_link in content.split():
+        try:
+            # raises an error on anything that's not a valid 
+            URLValidator(potential_link)
+        except (ValidationError, UnicodeError):
+            formatted_content += potential_link + " "
+            continue
+        wrapped = _wrapped(potential_link)
+        if wrapped:
+            wrapper_close = potential_link[-1]
+            formatted_content += potential_link[0] 
+            potential_link = potential_link[1:-1]
 
+        # so we can use everything but the scheme in the presentation of the link
+        url = urlparse(potential_link)
+        link = url.netloc + url.path + url.params
+        if url.query != "":
+            link += "?" + url.query 
+        if url.fragment != "":
+            link += "#" + url.fragment
+
+        formatted_content += '<a href="%s">%s</a>' % (potential_link, link)
+
+        if wrapped:
+            formatted_content += wrapper_close 
+
+    return formatted_content
+
+def _wrapped(text):
+    """ check if a line of text is wrapped in parentheses, square brackets or curly brackets. return wrapped status """
+    wrappers = [("(", ")"), ("[","]"), ("{", "}")]
+    for w in wrappers:
+        if text[0] == w[0] and text[-1] == w[-1]:
+            return True 
+    return False
 
 def to_markdown(content):
     """catch links and convert to markdown"""

From aa946e3ab7cfa7ddc63a833131ba3b80408d06fc Mon Sep 17 00:00:00 2001
From: reesporte <reesedporter@gmail.com>
Date: Mon, 30 Aug 2021 11:12:27 -0500
Subject: [PATCH 2/7] add more test cases for link formatting

---
 bookwyrm/tests/views/test_status.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/bookwyrm/tests/views/test_status.py b/bookwyrm/tests/views/test_status.py
index d4edee0b3..b782906da 100644
--- a/bookwyrm/tests/views/test_status.py
+++ b/bookwyrm/tests/views/test_status.py
@@ -303,6 +303,21 @@ class StatusViews(TestCase):
             '<a href="%s">openlibrary.org/search'
             "?q=arkady+strugatsky&mode=everything</a>" % url,
         )
+        url = "https://tech.lgbt/@bookwyrm"
+        self.assertEqual(
+            views.status.format_links(url), 
+            '<a href="%s">tech.lgbt/@bookwyrm</a>' % url
+        )
+        url = "users.speakeasy.net/~lion/nb/book.pdf"
+        self.assertEqual(
+            views.status.format_links(url),
+            '<a href="%s">users.speakeasy.net/~lion/nb/book.pdf</a>' % url
+        )
+        url = "pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps"
+        self.assertEqual(
+            views.status.format_links(url),
+            '<a href="%s">%s</a>' % (url, url)
+        )
 
     def test_to_markdown(self, *_):
         """this is mostly handled in other places, but nonetheless"""

From ac6438147dfe88e1837719fb9d1650aa4e63f0ae Mon Sep 17 00:00:00 2001
From: reesporte <reesedporter@gmail.com>
Date: Mon, 30 Aug 2021 11:20:56 -0500
Subject: [PATCH 3/7] remove unused import

---
 bookwyrm/views/status.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py
index e2dd7fd55..a62e1b48b 100644
--- a/bookwyrm/views/status.py
+++ b/bookwyrm/views/status.py
@@ -7,7 +7,6 @@ from django.http import HttpResponseBadRequest
 from django.shortcuts import get_object_or_404, redirect
 from django.template.response import TemplateResponse
 from django.utils.decorators import method_decorator
-from django.utils.html import urlize
 from django.views import View
 from markdown import markdown
 

From 6e628fed38bbde01e720a2bf366a5fd9b58695c7 Mon Sep 17 00:00:00 2001
From: reesporte <reesedporter@gmail.com>
Date: Mon, 30 Aug 2021 11:38:00 -0500
Subject: [PATCH 4/7] formatting

---
 bookwyrm/tests/views/test_status.py |  8 +++-----
 bookwyrm/views/status.py            | 16 +++++++++-------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/bookwyrm/tests/views/test_status.py b/bookwyrm/tests/views/test_status.py
index b782906da..893e06505 100644
--- a/bookwyrm/tests/views/test_status.py
+++ b/bookwyrm/tests/views/test_status.py
@@ -305,18 +305,16 @@ class StatusViews(TestCase):
         )
         url = "https://tech.lgbt/@bookwyrm"
         self.assertEqual(
-            views.status.format_links(url), 
-            '<a href="%s">tech.lgbt/@bookwyrm</a>' % url
+            views.status.format_links(url), '<a href="%s">tech.lgbt/@bookwyrm</a>' % url
         )
         url = "users.speakeasy.net/~lion/nb/book.pdf"
         self.assertEqual(
             views.status.format_links(url),
-            '<a href="%s">users.speakeasy.net/~lion/nb/book.pdf</a>' % url
+            '<a href="%s">users.speakeasy.net/~lion/nb/book.pdf</a>' % url,
         )
         url = "pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps"
         self.assertEqual(
-            views.status.format_links(url),
-            '<a href="%s">%s</a>' % (url, url)
+            views.status.format_links(url), '<a href="%s">%s</a>' % (url, url)
         )
 
     def test_to_markdown(self, *_):
diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py
index a62e1b48b..c415e4b29 100644
--- a/bookwyrm/views/status.py
+++ b/bookwyrm/views/status.py
@@ -157,7 +157,7 @@ def format_links(content):
     formatted_content = ""
     for potential_link in content.split():
         try:
-            # raises an error on anything that's not a valid 
+            # raises an error on anything that's not a valid
             URLValidator(potential_link)
         except (ValidationError, UnicodeError):
             formatted_content += potential_link + " "
@@ -165,32 +165,34 @@ def format_links(content):
         wrapped = _wrapped(potential_link)
         if wrapped:
             wrapper_close = potential_link[-1]
-            formatted_content += potential_link[0] 
+            formatted_content += potential_link[0]
             potential_link = potential_link[1:-1]
 
         # so we can use everything but the scheme in the presentation of the link
         url = urlparse(potential_link)
         link = url.netloc + url.path + url.params
         if url.query != "":
-            link += "?" + url.query 
+            link += "?" + url.query
         if url.fragment != "":
             link += "#" + url.fragment
 
         formatted_content += '<a href="%s">%s</a>' % (potential_link, link)
 
         if wrapped:
-            formatted_content += wrapper_close 
+            formatted_content += wrapper_close
 
     return formatted_content
 
+
 def _wrapped(text):
-    """ check if a line of text is wrapped in parentheses, square brackets or curly brackets. return wrapped status """
-    wrappers = [("(", ")"), ("[","]"), ("{", "}")]
+    """check if a line of text is wrapped in parentheses, square brackets or curly brackets. return wrapped status"""
+    wrappers = [("(", ")"), ("[", "]"), ("{", "}")]
     for w in wrappers:
         if text[0] == w[0] and text[-1] == w[-1]:
-            return True 
+            return True
     return False
 
+
 def to_markdown(content):
     """catch links and convert to markdown"""
     content = markdown(content)

From 0f481714c01a65e1cbc94c24227c88ad89527e0c Mon Sep 17 00:00:00 2001
From: reesporte <reesedporter@gmail.com>
Date: Mon, 30 Aug 2021 11:47:19 -0500
Subject: [PATCH 5/7] fix pylint errors

---
 bookwyrm/views/status.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py
index c415e4b29..eeedcc3da 100644
--- a/bookwyrm/views/status.py
+++ b/bookwyrm/views/status.py
@@ -1,5 +1,7 @@
 """ what are we here for if not for posting """
 import re
+from urllib.parse import urlparse
+
 from django.contrib.auth.decorators import login_required
 from django.core.validators import URLValidator
 from django.core.exceptions import ValidationError
@@ -8,10 +10,8 @@ from django.shortcuts import get_object_or_404, redirect
 from django.template.response import TemplateResponse
 from django.utils.decorators import method_decorator
 from django.views import View
+
 from markdown import markdown
-
-from urllib.parse import urlparse
-
 from bookwyrm import forms, models
 from bookwyrm.sanitize_html import InputHtmlParser
 from bookwyrm.settings import DOMAIN
@@ -153,7 +153,6 @@ def find_mentions(content):
 
 def format_links(content):
     """detect and format links"""
-    v = URLValidator()
     formatted_content = ""
     for potential_link in content.split():
         try:
@@ -187,8 +186,8 @@ def format_links(content):
 def _wrapped(text):
     """check if a line of text is wrapped in parentheses, square brackets or curly brackets. return wrapped status"""
     wrappers = [("(", ")"), ("[", "]"), ("{", "}")]
-    for w in wrappers:
-        if text[0] == w[0] and text[-1] == w[-1]:
+    for wrapper in wrappers:
+        if text[0] == wrapper[0] and text[-1] == wrapper[-1]:
             return True
     return False
 

From c6d08050e2a2732706f2d1256648868f5118c6e4 Mon Sep 17 00:00:00 2001
From: reesporte <reesedporter@gmail.com>
Date: Mon, 30 Aug 2021 11:51:42 -0500
Subject: [PATCH 6/7] bruh

---
 bookwyrm/views/status.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py
index eeedcc3da..fdc23ac42 100644
--- a/bookwyrm/views/status.py
+++ b/bookwyrm/views/status.py
@@ -184,7 +184,7 @@ def format_links(content):
 
 
 def _wrapped(text):
-    """check if a line of text is wrapped in parentheses, square brackets or curly brackets. return wrapped status"""
+    """check if a line of text is wrapped"""
     wrappers = [("(", ")"), ("[", "]"), ("{", "}")]
     for wrapper in wrappers:
         if text[0] == wrapper[0] and text[-1] == wrapper[-1]:

From 4f321e5f33eaa1de3f728282d5ebe481c7ebbd1d Mon Sep 17 00:00:00 2001
From: reesporte <reesedporter@gmail.com>
Date: Mon, 30 Aug 2021 14:23:04 -0500
Subject: [PATCH 7/7] fix link formatting issue, for real this time

---
 bookwyrm/tests/views/test_status.py |  6 ++---
 bookwyrm/views/status.py            | 37 ++++++++++++++++-------------
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/bookwyrm/tests/views/test_status.py b/bookwyrm/tests/views/test_status.py
index 893e06505..ff429e20f 100644
--- a/bookwyrm/tests/views/test_status.py
+++ b/bookwyrm/tests/views/test_status.py
@@ -307,14 +307,14 @@ class StatusViews(TestCase):
         self.assertEqual(
             views.status.format_links(url), '<a href="%s">tech.lgbt/@bookwyrm</a>' % url
         )
-        url = "users.speakeasy.net/~lion/nb/book.pdf"
+        url = "https://users.speakeasy.net/~lion/nb/book.pdf"
         self.assertEqual(
             views.status.format_links(url),
             '<a href="%s">users.speakeasy.net/~lion/nb/book.pdf</a>' % url,
         )
-        url = "pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps"
+        url = "https://pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps"
         self.assertEqual(
-            views.status.format_links(url), '<a href="%s">%s</a>' % (url, url)
+            views.status.format_links(url), '<a href="%s">%s</a>' % (url, url[8:])
         )
 
     def test_to_markdown(self, *_):
diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py
index fdc23ac42..fe1dfda13 100644
--- a/bookwyrm/views/status.py
+++ b/bookwyrm/views/status.py
@@ -153,32 +153,37 @@ def find_mentions(content):
 
 def format_links(content):
     """detect and format links"""
+    validator = URLValidator()
     formatted_content = ""
-    for potential_link in content.split():
-        try:
-            # raises an error on anything that's not a valid
-            URLValidator(potential_link)
-        except (ValidationError, UnicodeError):
-            formatted_content += potential_link + " "
-            continue
+    split_content = content.split()
+
+    for index, potential_link in enumerate(split_content):
         wrapped = _wrapped(potential_link)
         if wrapped:
             wrapper_close = potential_link[-1]
             formatted_content += potential_link[0]
             potential_link = potential_link[1:-1]
 
-        # so we can use everything but the scheme in the presentation of the link
-        url = urlparse(potential_link)
-        link = url.netloc + url.path + url.params
-        if url.query != "":
-            link += "?" + url.query
-        if url.fragment != "":
-            link += "#" + url.fragment
+        try:
+            # raises an error on anything that's not a valid link
+            validator(potential_link)
 
-        formatted_content += '<a href="%s">%s</a>' % (potential_link, link)
+            # use everything but the scheme in the presentation of the link
+            url = urlparse(potential_link)
+            link = url.netloc + url.path + url.params
+            if url.query != "":
+                link += "?" + url.query
+            if url.fragment != "":
+                link += "#" + url.fragment
+
+            formatted_content += '<a href="%s">%s</a>' % (potential_link, link)
+        except (ValidationError, UnicodeError):
+            formatted_content += potential_link
 
         if wrapped:
             formatted_content += wrapper_close
+        if index < len(split_content) - 1:
+            formatted_content += " "
 
     return formatted_content
 
@@ -194,8 +199,8 @@ def _wrapped(text):
 
 def to_markdown(content):
     """catch links and convert to markdown"""
-    content = markdown(content)
     content = format_links(content)
+    content = markdown(content)
     # sanitize resulting html
     sanitizer = InputHtmlParser()
     sanitizer.feed(content)