diff --git a/bookwyrm/tests/views/test_status.py b/bookwyrm/tests/views/test_status.py index d4edee0b3..ff429e20f 100644 --- a/bookwyrm/tests/views/test_status.py +++ b/bookwyrm/tests/views/test_status.py @@ -303,6 +303,19 @@ class StatusViews(TestCase): 'openlibrary.org/search' "?q=arkady+strugatsky&mode=everything" % url, ) + url = "https://tech.lgbt/@bookwyrm" + self.assertEqual( + views.status.format_links(url), 'tech.lgbt/@bookwyrm' % url + ) + url = "https://users.speakeasy.net/~lion/nb/book.pdf" + self.assertEqual( + views.status.format_links(url), + 'users.speakeasy.net/~lion/nb/book.pdf' % url, + ) + url = "https://pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps" + self.assertEqual( + views.status.format_links(url), '%s' % (url, url[8:]) + ) def test_to_markdown(self, *_): """this is mostly handled in other places, but nonetheless""" diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py index 651021b63..fe1dfda13 100644 --- a/bookwyrm/views/status.py +++ b/bookwyrm/views/status.py @@ -1,13 +1,17 @@ """ what are we here for if not for posting """ import re +from urllib.parse import urlparse + from django.contrib.auth.decorators import login_required +from django.core.validators import URLValidator +from django.core.exceptions import ValidationError from django.http import HttpResponseBadRequest from django.shortcuts import get_object_or_404, redirect from django.template.response import TemplateResponse from django.utils.decorators import method_decorator from django.views import View -from markdown import markdown +from markdown import markdown from bookwyrm import forms, models from bookwyrm.sanitize_html import InputHtmlParser from bookwyrm.settings import DOMAIN @@ -149,17 +153,54 @@ def find_mentions(content): def format_links(content): """detect and format links""" - return re.sub( - r'([^(href=")]|^|\()(https?:\/\/(%s([\w\.\-_\/+&\?=:;,@#])*))' % regex.DOMAIN, - r'\g<1>\g<3>', - content, - ) + validator = URLValidator() + formatted_content = "" + split_content = content.split() + + for index, potential_link in enumerate(split_content): + wrapped = _wrapped(potential_link) + if wrapped: + wrapper_close = potential_link[-1] + formatted_content += potential_link[0] + potential_link = potential_link[1:-1] + + try: + # raises an error on anything that's not a valid link + validator(potential_link) + + # use everything but the scheme in the presentation of the link + url = urlparse(potential_link) + link = url.netloc + url.path + url.params + if url.query != "": + link += "?" + url.query + if url.fragment != "": + link += "#" + url.fragment + + formatted_content += '%s' % (potential_link, link) + except (ValidationError, UnicodeError): + formatted_content += potential_link + + if wrapped: + formatted_content += wrapper_close + if index < len(split_content) - 1: + formatted_content += " " + + return formatted_content + + +def _wrapped(text): + """check if a line of text is wrapped""" + wrappers = [("(", ")"), ("[", "]"), ("{", "}")] + for wrapper in wrappers: + if text[0] == wrapper[0] and text[-1] == wrapper[-1]: + return True + return False def to_markdown(content): """catch links and convert to markdown""" - content = markdown(content) content = format_links(content) + content = markdown(content) # sanitize resulting html sanitizer = InputHtmlParser() sanitizer.feed(content)