From 5291308677964dbb2678b78e0e2eb170b9d85366 Mon Sep 17 00:00:00 2001 From: reesporte Date: Mon, 30 Aug 2021 11:12:05 -0500 Subject: [PATCH 1/7] fix link formatting issues --- bookwyrm/views/status.py | 46 +++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py index 651021b63..e2dd7fd55 100644 --- a/bookwyrm/views/status.py +++ b/bookwyrm/views/status.py @@ -1,13 +1,18 @@ """ what are we here for if not for posting """ import re from django.contrib.auth.decorators import login_required +from django.core.validators import URLValidator +from django.core.exceptions import ValidationError from django.http import HttpResponseBadRequest from django.shortcuts import get_object_or_404, redirect from django.template.response import TemplateResponse from django.utils.decorators import method_decorator +from django.utils.html import urlize from django.views import View from markdown import markdown +from urllib.parse import urlparse + from bookwyrm import forms, models from bookwyrm.sanitize_html import InputHtmlParser from bookwyrm.settings import DOMAIN @@ -149,12 +154,43 @@ def find_mentions(content): def format_links(content): """detect and format links""" - return re.sub( - r'([^(href=")]|^|\()(https?:\/\/(%s([\w\.\-_\/+&\?=:;,@#])*))' % regex.DOMAIN, - r'\g<1>\g<3>', - content, - ) + v = URLValidator() + formatted_content = "" + for potential_link in content.split(): + try: + # raises an error on anything that's not a valid + URLValidator(potential_link) + except (ValidationError, UnicodeError): + formatted_content += potential_link + " " + continue + wrapped = _wrapped(potential_link) + if wrapped: + wrapper_close = potential_link[-1] + formatted_content += potential_link[0] + potential_link = potential_link[1:-1] + # so we can use everything but the scheme in the presentation of the link + url = urlparse(potential_link) + link = url.netloc + url.path + url.params + if url.query != "": + link += "?" + url.query + if url.fragment != "": + link += "#" + url.fragment + + formatted_content += '%s' % (potential_link, link) + + if wrapped: + formatted_content += wrapper_close + + return formatted_content + +def _wrapped(text): + """ check if a line of text is wrapped in parentheses, square brackets or curly brackets. return wrapped status """ + wrappers = [("(", ")"), ("[","]"), ("{", "}")] + for w in wrappers: + if text[0] == w[0] and text[-1] == w[-1]: + return True + return False def to_markdown(content): """catch links and convert to markdown""" From aa946e3ab7cfa7ddc63a833131ba3b80408d06fc Mon Sep 17 00:00:00 2001 From: reesporte Date: Mon, 30 Aug 2021 11:12:27 -0500 Subject: [PATCH 2/7] add more test cases for link formatting --- bookwyrm/tests/views/test_status.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bookwyrm/tests/views/test_status.py b/bookwyrm/tests/views/test_status.py index d4edee0b3..b782906da 100644 --- a/bookwyrm/tests/views/test_status.py +++ b/bookwyrm/tests/views/test_status.py @@ -303,6 +303,21 @@ class StatusViews(TestCase): 'openlibrary.org/search' "?q=arkady+strugatsky&mode=everything" % url, ) + url = "https://tech.lgbt/@bookwyrm" + self.assertEqual( + views.status.format_links(url), + 'tech.lgbt/@bookwyrm' % url + ) + url = "users.speakeasy.net/~lion/nb/book.pdf" + self.assertEqual( + views.status.format_links(url), + 'users.speakeasy.net/~lion/nb/book.pdf' % url + ) + url = "pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps" + self.assertEqual( + views.status.format_links(url), + '%s' % (url, url) + ) def test_to_markdown(self, *_): """this is mostly handled in other places, but nonetheless""" From ac6438147dfe88e1837719fb9d1650aa4e63f0ae Mon Sep 17 00:00:00 2001 From: reesporte Date: Mon, 30 Aug 2021 11:20:56 -0500 Subject: [PATCH 3/7] remove unused import --- bookwyrm/views/status.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py index e2dd7fd55..a62e1b48b 100644 --- a/bookwyrm/views/status.py +++ b/bookwyrm/views/status.py @@ -7,7 +7,6 @@ from django.http import HttpResponseBadRequest from django.shortcuts import get_object_or_404, redirect from django.template.response import TemplateResponse from django.utils.decorators import method_decorator -from django.utils.html import urlize from django.views import View from markdown import markdown From 6e628fed38bbde01e720a2bf366a5fd9b58695c7 Mon Sep 17 00:00:00 2001 From: reesporte Date: Mon, 30 Aug 2021 11:38:00 -0500 Subject: [PATCH 4/7] formatting --- bookwyrm/tests/views/test_status.py | 8 +++----- bookwyrm/views/status.py | 16 +++++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bookwyrm/tests/views/test_status.py b/bookwyrm/tests/views/test_status.py index b782906da..893e06505 100644 --- a/bookwyrm/tests/views/test_status.py +++ b/bookwyrm/tests/views/test_status.py @@ -305,18 +305,16 @@ class StatusViews(TestCase): ) url = "https://tech.lgbt/@bookwyrm" self.assertEqual( - views.status.format_links(url), - 'tech.lgbt/@bookwyrm' % url + views.status.format_links(url), 'tech.lgbt/@bookwyrm' % url ) url = "users.speakeasy.net/~lion/nb/book.pdf" self.assertEqual( views.status.format_links(url), - 'users.speakeasy.net/~lion/nb/book.pdf' % url + 'users.speakeasy.net/~lion/nb/book.pdf' % url, ) url = "pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps" self.assertEqual( - views.status.format_links(url), - '%s' % (url, url) + views.status.format_links(url), '%s' % (url, url) ) def test_to_markdown(self, *_): diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py index a62e1b48b..c415e4b29 100644 --- a/bookwyrm/views/status.py +++ b/bookwyrm/views/status.py @@ -157,7 +157,7 @@ def format_links(content): formatted_content = "" for potential_link in content.split(): try: - # raises an error on anything that's not a valid + # raises an error on anything that's not a valid URLValidator(potential_link) except (ValidationError, UnicodeError): formatted_content += potential_link + " " @@ -165,32 +165,34 @@ def format_links(content): wrapped = _wrapped(potential_link) if wrapped: wrapper_close = potential_link[-1] - formatted_content += potential_link[0] + formatted_content += potential_link[0] potential_link = potential_link[1:-1] # so we can use everything but the scheme in the presentation of the link url = urlparse(potential_link) link = url.netloc + url.path + url.params if url.query != "": - link += "?" + url.query + link += "?" + url.query if url.fragment != "": link += "#" + url.fragment formatted_content += '%s' % (potential_link, link) if wrapped: - formatted_content += wrapper_close + formatted_content += wrapper_close return formatted_content + def _wrapped(text): - """ check if a line of text is wrapped in parentheses, square brackets or curly brackets. return wrapped status """ - wrappers = [("(", ")"), ("[","]"), ("{", "}")] + """check if a line of text is wrapped in parentheses, square brackets or curly brackets. return wrapped status""" + wrappers = [("(", ")"), ("[", "]"), ("{", "}")] for w in wrappers: if text[0] == w[0] and text[-1] == w[-1]: - return True + return True return False + def to_markdown(content): """catch links and convert to markdown""" content = markdown(content) From 0f481714c01a65e1cbc94c24227c88ad89527e0c Mon Sep 17 00:00:00 2001 From: reesporte Date: Mon, 30 Aug 2021 11:47:19 -0500 Subject: [PATCH 5/7] fix pylint errors --- bookwyrm/views/status.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py index c415e4b29..eeedcc3da 100644 --- a/bookwyrm/views/status.py +++ b/bookwyrm/views/status.py @@ -1,5 +1,7 @@ """ what are we here for if not for posting """ import re +from urllib.parse import urlparse + from django.contrib.auth.decorators import login_required from django.core.validators import URLValidator from django.core.exceptions import ValidationError @@ -8,10 +10,8 @@ from django.shortcuts import get_object_or_404, redirect from django.template.response import TemplateResponse from django.utils.decorators import method_decorator from django.views import View + from markdown import markdown - -from urllib.parse import urlparse - from bookwyrm import forms, models from bookwyrm.sanitize_html import InputHtmlParser from bookwyrm.settings import DOMAIN @@ -153,7 +153,6 @@ def find_mentions(content): def format_links(content): """detect and format links""" - v = URLValidator() formatted_content = "" for potential_link in content.split(): try: @@ -187,8 +186,8 @@ def format_links(content): def _wrapped(text): """check if a line of text is wrapped in parentheses, square brackets or curly brackets. return wrapped status""" wrappers = [("(", ")"), ("[", "]"), ("{", "}")] - for w in wrappers: - if text[0] == w[0] and text[-1] == w[-1]: + for wrapper in wrappers: + if text[0] == wrapper[0] and text[-1] == wrapper[-1]: return True return False From c6d08050e2a2732706f2d1256648868f5118c6e4 Mon Sep 17 00:00:00 2001 From: reesporte Date: Mon, 30 Aug 2021 11:51:42 -0500 Subject: [PATCH 6/7] bruh --- bookwyrm/views/status.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py index eeedcc3da..fdc23ac42 100644 --- a/bookwyrm/views/status.py +++ b/bookwyrm/views/status.py @@ -184,7 +184,7 @@ def format_links(content): def _wrapped(text): - """check if a line of text is wrapped in parentheses, square brackets or curly brackets. return wrapped status""" + """check if a line of text is wrapped""" wrappers = [("(", ")"), ("[", "]"), ("{", "}")] for wrapper in wrappers: if text[0] == wrapper[0] and text[-1] == wrapper[-1]: From 4f321e5f33eaa1de3f728282d5ebe481c7ebbd1d Mon Sep 17 00:00:00 2001 From: reesporte Date: Mon, 30 Aug 2021 14:23:04 -0500 Subject: [PATCH 7/7] fix link formatting issue, for real this time --- bookwyrm/tests/views/test_status.py | 6 ++--- bookwyrm/views/status.py | 37 ++++++++++++++++------------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/bookwyrm/tests/views/test_status.py b/bookwyrm/tests/views/test_status.py index 893e06505..ff429e20f 100644 --- a/bookwyrm/tests/views/test_status.py +++ b/bookwyrm/tests/views/test_status.py @@ -307,14 +307,14 @@ class StatusViews(TestCase): self.assertEqual( views.status.format_links(url), 'tech.lgbt/@bookwyrm' % url ) - url = "users.speakeasy.net/~lion/nb/book.pdf" + url = "https://users.speakeasy.net/~lion/nb/book.pdf" self.assertEqual( views.status.format_links(url), 'users.speakeasy.net/~lion/nb/book.pdf' % url, ) - url = "pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps" + url = "https://pkm.one/#/page/The%20Book%20which%20launched%20a%201000%20Note%20taking%20apps" self.assertEqual( - views.status.format_links(url), '%s' % (url, url) + views.status.format_links(url), '%s' % (url, url[8:]) ) def test_to_markdown(self, *_): diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py index fdc23ac42..fe1dfda13 100644 --- a/bookwyrm/views/status.py +++ b/bookwyrm/views/status.py @@ -153,32 +153,37 @@ def find_mentions(content): def format_links(content): """detect and format links""" + validator = URLValidator() formatted_content = "" - for potential_link in content.split(): - try: - # raises an error on anything that's not a valid - URLValidator(potential_link) - except (ValidationError, UnicodeError): - formatted_content += potential_link + " " - continue + split_content = content.split() + + for index, potential_link in enumerate(split_content): wrapped = _wrapped(potential_link) if wrapped: wrapper_close = potential_link[-1] formatted_content += potential_link[0] potential_link = potential_link[1:-1] - # so we can use everything but the scheme in the presentation of the link - url = urlparse(potential_link) - link = url.netloc + url.path + url.params - if url.query != "": - link += "?" + url.query - if url.fragment != "": - link += "#" + url.fragment + try: + # raises an error on anything that's not a valid link + validator(potential_link) - formatted_content += '%s' % (potential_link, link) + # use everything but the scheme in the presentation of the link + url = urlparse(potential_link) + link = url.netloc + url.path + url.params + if url.query != "": + link += "?" + url.query + if url.fragment != "": + link += "#" + url.fragment + + formatted_content += '%s' % (potential_link, link) + except (ValidationError, UnicodeError): + formatted_content += potential_link if wrapped: formatted_content += wrapper_close + if index < len(split_content) - 1: + formatted_content += " " return formatted_content @@ -194,8 +199,8 @@ def _wrapped(text): def to_markdown(content): """catch links and convert to markdown""" - content = markdown(content) content = format_links(content) + content = markdown(content) # sanitize resulting html sanitizer = InputHtmlParser() sanitizer.feed(content)