Merge pull request #1343 from reesporte/fix-link-formatter

fix link formatter
This commit is contained in:
Mouse Reeve 2021-08-30 13:37:04 -07:00 committed by GitHub
commit 8d23f1d356
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 61 additions and 7 deletions

View file

@ -303,6 +303,19 @@ class StatusViews(TestCase):
'<a href="%s">openlibrary.org/search' '<a href="%s">openlibrary.org/search'
"?q=arkady+strugatsky&mode=everything</a>" % url, "?q=arkady+strugatsky&mode=everything</a>" % url,
) )
url = "https://tech.lgbt/@bookwyrm"
self.assertEqual(
views.status.format_links(url), '<a href="%s">tech.lgbt/@bookwyrm</a>' % url
)
url = "https://users.speakeasy.net/~lion/nb/book.pdf"
self.assertEqual(
views.status.format_links(url),
'<a href="%s">users.speakeasy.net/~lion/nb/book.pdf</a>' % url,
)
url = "https://pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps"
self.assertEqual(
views.status.format_links(url), '<a href="%s">%s</a>' % (url, url[8:])
)
def test_to_markdown(self, *_): def test_to_markdown(self, *_):
"""this is mostly handled in other places, but nonetheless""" """this is mostly handled in other places, but nonetheless"""

View file

@ -1,13 +1,17 @@
""" what are we here for if not for posting """ """ what are we here for if not for posting """
import re import re
from urllib.parse import urlparse
from django.contrib.auth.decorators import login_required from django.contrib.auth.decorators import login_required
from django.core.validators import URLValidator
from django.core.exceptions import ValidationError
from django.http import HttpResponseBadRequest from django.http import HttpResponseBadRequest
from django.shortcuts import get_object_or_404, redirect from django.shortcuts import get_object_or_404, redirect
from django.template.response import TemplateResponse from django.template.response import TemplateResponse
from django.utils.decorators import method_decorator from django.utils.decorators import method_decorator
from django.views import View from django.views import View
from markdown import markdown
from markdown import markdown
from bookwyrm import forms, models from bookwyrm import forms, models
from bookwyrm.sanitize_html import InputHtmlParser from bookwyrm.sanitize_html import InputHtmlParser
from bookwyrm.settings import DOMAIN from bookwyrm.settings import DOMAIN
@ -149,17 +153,54 @@ def find_mentions(content):
def format_links(content): def format_links(content):
"""detect and format links""" """detect and format links"""
return re.sub( validator = URLValidator()
r'([^(href=")]|^|\()(https?:\/\/(%s([\w\.\-_\/+&\?=:;,@#])*))' % regex.DOMAIN, formatted_content = ""
r'\g<1><a href="\g<2>">\g<3></a>', split_content = content.split()
content,
) for index, potential_link in enumerate(split_content):
wrapped = _wrapped(potential_link)
if wrapped:
wrapper_close = potential_link[-1]
formatted_content += potential_link[0]
potential_link = potential_link[1:-1]
try:
# raises an error on anything that's not a valid link
validator(potential_link)
# use everything but the scheme in the presentation of the link
url = urlparse(potential_link)
link = url.netloc + url.path + url.params
if url.query != "":
link += "?" + url.query
if url.fragment != "":
link += "#" + url.fragment
formatted_content += '<a href="%s">%s</a>' % (potential_link, link)
except (ValidationError, UnicodeError):
formatted_content += potential_link
if wrapped:
formatted_content += wrapper_close
if index < len(split_content) - 1:
formatted_content += " "
return formatted_content
def _wrapped(text):
"""check if a line of text is wrapped"""
wrappers = [("(", ")"), ("[", "]"), ("{", "}")]
for wrapper in wrappers:
if text[0] == wrapper[0] and text[-1] == wrapper[-1]:
return True
return False
def to_markdown(content): def to_markdown(content):
"""catch links and convert to markdown""" """catch links and convert to markdown"""
content = markdown(content)
content = format_links(content) content = format_links(content)
content = markdown(content)
# sanitize resulting html # sanitize resulting html
sanitizer = InputHtmlParser() sanitizer = InputHtmlParser()
sanitizer.feed(content) sanitizer.feed(content)