diff --git a/bookwyrm/tests/views/test_status.py b/bookwyrm/tests/views/test_status.py
index d4edee0b3..ff429e20f 100644
--- a/bookwyrm/tests/views/test_status.py
+++ b/bookwyrm/tests/views/test_status.py
@@ -303,6 +303,19 @@ class StatusViews(TestCase):
'openlibrary.org/search'
"?q=arkady+strugatsky&mode=everything" % url,
)
+ url = "https://tech.lgbt/@bookwyrm"
+ self.assertEqual(
+ views.status.format_links(url), 'tech.lgbt/@bookwyrm' % url
+ )
+ url = "https://users.speakeasy.net/~lion/nb/book.pdf"
+ self.assertEqual(
+ views.status.format_links(url),
+ 'users.speakeasy.net/~lion/nb/book.pdf' % url,
+ )
+ url = "https://pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps"
+ self.assertEqual(
+ views.status.format_links(url), '%s' % (url, url[8:])
+ )
def test_to_markdown(self, *_):
"""this is mostly handled in other places, but nonetheless"""
diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py
index 651021b63..fe1dfda13 100644
--- a/bookwyrm/views/status.py
+++ b/bookwyrm/views/status.py
@@ -1,13 +1,17 @@
""" what are we here for if not for posting """
import re
+from urllib.parse import urlparse
+
from django.contrib.auth.decorators import login_required
+from django.core.validators import URLValidator
+from django.core.exceptions import ValidationError
from django.http import HttpResponseBadRequest
from django.shortcuts import get_object_or_404, redirect
from django.template.response import TemplateResponse
from django.utils.decorators import method_decorator
from django.views import View
-from markdown import markdown
+from markdown import markdown
from bookwyrm import forms, models
from bookwyrm.sanitize_html import InputHtmlParser
from bookwyrm.settings import DOMAIN
@@ -149,17 +153,54 @@ def find_mentions(content):
def format_links(content):
"""detect and format links"""
- return re.sub(
- r'([^(href=")]|^|\()(https?:\/\/(%s([\w\.\-_\/+&\?=:;,@#])*))' % regex.DOMAIN,
- r'\g<1>\g<3>',
- content,
- )
+ validator = URLValidator()
+ formatted_content = ""
+ split_content = content.split()
+
+ for index, potential_link in enumerate(split_content):
+ wrapped = _wrapped(potential_link)
+ if wrapped:
+ wrapper_close = potential_link[-1]
+ formatted_content += potential_link[0]
+ potential_link = potential_link[1:-1]
+
+ try:
+ # raises an error on anything that's not a valid link
+ validator(potential_link)
+
+ # use everything but the scheme in the presentation of the link
+ url = urlparse(potential_link)
+ link = url.netloc + url.path + url.params
+ if url.query != "":
+ link += "?" + url.query
+ if url.fragment != "":
+ link += "#" + url.fragment
+
+ formatted_content += '%s' % (potential_link, link)
+ except (ValidationError, UnicodeError):
+ formatted_content += potential_link
+
+ if wrapped:
+ formatted_content += wrapper_close
+ if index < len(split_content) - 1:
+ formatted_content += " "
+
+ return formatted_content
+
+
+def _wrapped(text):
+ """check if a line of text is wrapped"""
+ wrappers = [("(", ")"), ("[", "]"), ("{", "}")]
+ for wrapper in wrappers:
+ if text[0] == wrapper[0] and text[-1] == wrapper[-1]:
+ return True
+ return False
def to_markdown(content):
"""catch links and convert to markdown"""
- content = markdown(content)
content = format_links(content)
+ content = markdown(content)
# sanitize resulting html
sanitizer = InputHtmlParser()
sanitizer.feed(content)