From 8d266fda4dca59ac106f097d8f4a15fdd1c9ec16 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Fri, 8 Apr 2022 15:21:06 -0700 Subject: [PATCH 01/87] Removes unused `related_book` field on notification model --- .../0149_remove_notification_related_book.py | 17 +++++++++++++++++ bookwyrm/models/notification.py | 2 -- bookwyrm/views/notifications.py | 1 - 3 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 bookwyrm/migrations/0149_remove_notification_related_book.py diff --git a/bookwyrm/migrations/0149_remove_notification_related_book.py b/bookwyrm/migrations/0149_remove_notification_related_book.py new file mode 100644 index 000000000..c976af6c9 --- /dev/null +++ b/bookwyrm/migrations/0149_remove_notification_related_book.py @@ -0,0 +1,17 @@ +# Generated by Django 3.2.12 on 2022-04-08 22:20 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0148_alter_user_preferred_language"), + ] + + operations = [ + migrations.RemoveField( + model_name="notification", + name="related_book", + ), + ] diff --git a/bookwyrm/models/notification.py b/bookwyrm/models/notification.py index 417bf7591..28c5b803e 100644 --- a/bookwyrm/models/notification.py +++ b/bookwyrm/models/notification.py @@ -15,7 +15,6 @@ class Notification(BookWyrmModel): """you've been tagged, liked, followed, etc""" user = models.ForeignKey("User", on_delete=models.CASCADE) - related_book = models.ForeignKey("Edition", on_delete=models.CASCADE, null=True) related_user = models.ForeignKey( "User", on_delete=models.CASCADE, null=True, related_name="related_user" ) @@ -38,7 +37,6 @@ class Notification(BookWyrmModel): # there's probably a better way to do this if self.__class__.objects.filter( user=self.user, - related_book=self.related_book, related_user=self.related_user, related_group=self.related_group, related_status=self.related_status, diff --git a/bookwyrm/views/notifications.py b/bookwyrm/views/notifications.py index 0a7a62002..e081b07c3 100644 --- a/bookwyrm/views/notifications.py +++ b/bookwyrm/views/notifications.py @@ -22,7 +22,6 @@ class Notifications(View): "related_import", "related_report", "related_user", - "related_book", "related_list_item", "related_list_item__book", ) From a2a04da49356165e53cbcd557ad2b9ad2a1e75e1 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 9 Apr 2022 09:41:10 -0700 Subject: [PATCH 02/87] Adds many to many related items to notifications --- .../migrations/0150_auto_20220408_2236.py | 128 ++++++++++++++++++ bookwyrm/models/notification.py | 41 ++---- 2 files changed, 142 insertions(+), 27 deletions(-) create mode 100644 bookwyrm/migrations/0150_auto_20220408_2236.py diff --git a/bookwyrm/migrations/0150_auto_20220408_2236.py b/bookwyrm/migrations/0150_auto_20220408_2236.py new file mode 100644 index 000000000..bf1c30487 --- /dev/null +++ b/bookwyrm/migrations/0150_auto_20220408_2236.py @@ -0,0 +1,128 @@ +# Generated by Django 3.2.12 on 2022-04-08 22:36 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0149_remove_notification_related_book"), + ] + + operations = [ + migrations.AddField( + model_name="notification", + name="related_groups", + field=models.ManyToManyField( + related_name="notifications", to="bookwyrm.Group" + ), + ), + migrations.AddField( + model_name="notification", + name="related_list_items", + field=models.ManyToManyField( + related_name="notifications", to="bookwyrm.ListItem" + ), + ), + migrations.AddField( + model_name="notification", + name="related_reports", + field=models.ManyToManyField(to="bookwyrm.Report"), + ), + migrations.AddField( + model_name="notification", + name="related_statuses", + field=models.ManyToManyField( + related_name="notifications", to="bookwyrm.Status" + ), + ), + migrations.AddField( + model_name="notification", + name="related_users", + field=models.ManyToManyField( + related_name="notifications", to=settings.AUTH_USER_MODEL + ), + ), + migrations.AlterField( + model_name="notification", + name="related_group", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="notifications_temp", + to="bookwyrm.group", + ), + ), + migrations.AlterField( + model_name="notification", + name="related_list_item", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="notifications_tmp", + to="bookwyrm.listitem", + ), + ), + migrations.AlterField( + model_name="notification", + name="related_report", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="notifications_tmp", + to="bookwyrm.report", + ), + ), + migrations.RunSQL( + sql=""" + INSERT INTO bookwyrm_notification_related_statuses (notification_id, status_id) + SELECT id, related_status_id + FROM bookwyrm_notification + WHERE bookwyrm_notification.related_status_id IS NOT NULL; + + INSERT INTO bookwyrm_notification_related_users (notification_id, user_id) + SELECT id, related_user_id + FROM bookwyrm_notification + WHERE bookwyrm_notification.related_user_id IS NOT NULL; + + INSERT INTO bookwyrm_notification_related_groups (notification_id, group_id) + SELECT id, related_group_id + FROM bookwyrm_notification + WHERE bookwyrm_notification.related_group_id IS NOT NULL; + + INSERT INTO bookwyrm_notification_related_list_items (notification_id, listitem_id) + SELECT id, related_list_item_id + FROM bookwyrm_notification + WHERE bookwyrm_notification.related_list_item_id IS NOT NULL; + + INSERT INTO bookwyrm_notification_related_reports (notification_id, report_id) + SELECT id, related_report_id + FROM bookwyrm_notification + WHERE bookwyrm_notification.related_report_id IS NOT NULL; + + """, + reverse_sql=migrations.RunSQL.noop, + ), + migrations.RemoveField( + model_name="notification", + name="related_group", + ), + migrations.RemoveField( + model_name="notification", + name="related_list_item", + ), + migrations.RemoveField( + model_name="notification", + name="related_report", + ), + migrations.RemoveField( + model_name="notification", + name="related_status", + ), + migrations.RemoveField( + model_name="notification", + name="related_user", + ), + ] diff --git a/bookwyrm/models/notification.py b/bookwyrm/models/notification.py index 28c5b803e..7b8059e0f 100644 --- a/bookwyrm/models/notification.py +++ b/bookwyrm/models/notification.py @@ -15,38 +15,25 @@ class Notification(BookWyrmModel): """you've been tagged, liked, followed, etc""" user = models.ForeignKey("User", on_delete=models.CASCADE) - related_user = models.ForeignKey( - "User", on_delete=models.CASCADE, null=True, related_name="related_user" - ) - related_group = models.ForeignKey( - "Group", on_delete=models.CASCADE, null=True, related_name="notifications" - ) - related_status = models.ForeignKey("Status", on_delete=models.CASCADE, null=True) - related_import = models.ForeignKey("ImportJob", on_delete=models.CASCADE, null=True) - related_list_item = models.ForeignKey( - "ListItem", on_delete=models.CASCADE, null=True - ) - related_report = models.ForeignKey("Report", on_delete=models.CASCADE, null=True) read = models.BooleanField(default=False) notification_type = models.CharField( max_length=255, choices=NotificationType.choices ) - def save(self, *args, **kwargs): - """save, but don't make dupes""" - # there's probably a better way to do this - if self.__class__.objects.filter( - user=self.user, - related_user=self.related_user, - related_group=self.related_group, - related_status=self.related_status, - related_import=self.related_import, - related_list_item=self.related_list_item, - related_report=self.related_report, - notification_type=self.notification_type, - ).exists(): - return - super().save(*args, **kwargs) + related_users = models.ManyToManyField( + "User", symmetrical=False, related_name="notifications" + ) + related_groups = models.ManyToManyField( + "Group", symmetrical=False, related_name="notifications" + ) + related_statuses = models.ManyToManyField( + "Status", symmetrical=False, related_name="notifications" + ) + related_import = models.ForeignKey("ImportJob", on_delete=models.CASCADE, null=True) + related_list_items = models.ManyToManyField( + "ListItem", symmetrical=False, related_name="notifications" + ) + related_reports = models.ManyToManyField("Report", symmetrical=False) class Meta: """checks if notifcation is in enum list for valid types""" From 516c4a9790f1142d2baa4ef641f5a0fc680de07b Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 2 Jul 2022 21:02:44 -0700 Subject: [PATCH 03/87] Add warning to dashboard if email sender looks misconfigured This can be a really obscure error, hopefully this warning will catch potential issues. --- bookwyrm/templates/settings/dashboard/dashboard.html | 11 +++++++++++ bookwyrm/views/admin/dashboard.py | 9 +++++++++ 2 files changed, 20 insertions(+) diff --git a/bookwyrm/templates/settings/dashboard/dashboard.html b/bookwyrm/templates/settings/dashboard/dashboard.html index cec3e89ac..46d552fb1 100644 --- a/bookwyrm/templates/settings/dashboard/dashboard.html +++ b/bookwyrm/templates/settings/dashboard/dashboard.html @@ -37,6 +37,17 @@
+ {% if email_config_error %} +
+ + {% blocktrans trimmed %} + Your outgoing email address, {{ email_sender }}, may be misconfigured. + {% endblocktrans %} + {% trans "Check the EMAIL_SENDER_NAME and EMAIL_SENDER_DOMAIN in your .env." %} + +
+ {% endif %} + {% if reports %}
diff --git a/bookwyrm/views/admin/dashboard.py b/bookwyrm/views/admin/dashboard.py index b06b6ba00..8b9017bb4 100644 --- a/bookwyrm/views/admin/dashboard.py +++ b/bookwyrm/views/admin/dashboard.py @@ -1,5 +1,7 @@ """ instance overview """ from datetime import timedelta +import re + from dateutil.parser import parse from packaging import version @@ -13,6 +15,7 @@ from django.views import View from bookwyrm import models, settings from bookwyrm.connectors.abstract_connector import get_data from bookwyrm.connectors.connector_manager import ConnectorException +from bookwyrm.utils import regex # pylint: disable= no-self-use @@ -88,6 +91,10 @@ class Dashboard(View): }, ) + email_config_error = re.findall( + r"[\s\@]", settings.EMAIL_SENDER_DOMAIN + ) or not re.match(regex.DOMAIN, settings.EMAIL_SENDER_DOMAIN) + data = { "start": start.strftime("%Y-%m-%d"), "end": end.strftime("%Y-%m-%d"), @@ -109,6 +116,8 @@ class Dashboard(View): "status_stats": status_chart.get_chart(start, end, interval), "register_stats": register_chart.get_chart(start, end, interval), "works_stats": works_chart.get_chart(start, end, interval), + "email_config_error": email_config_error, + "email_sender": f"{settings.EMAIL_SENDER_NAME}@{settings.EMAIL_SENDER_DOMAIN}", } # check version From e16506c1dfbc570172810c668f65716542106bf8 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 2 Jul 2022 21:25:07 -0700 Subject: [PATCH 04/87] Fixes logic error in checking relationships I had the logic backwards for pending relationships. --- bookwyrm/models/relationship.py | 4 ++-- bookwyrm/templatetags/interaction.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bookwyrm/models/relationship.py b/bookwyrm/models/relationship.py index 171f45840..313200514 100644 --- a/bookwyrm/models/relationship.py +++ b/bookwyrm/models/relationship.py @@ -218,7 +218,7 @@ def clear_cache(user_subject, user_object): """clear relationship cache""" cache.delete_many( [ - f"relationship-{user_subject.id}-{user_object.id}", - f"relationship-{user_object.id}-{user_subject.id}", + f"cached-relationship-{user_subject.id}-{user_object.id}", + f"cached-relationship-{user_object.id}-{user_subject.id}", ] ) diff --git a/bookwyrm/templatetags/interaction.py b/bookwyrm/templatetags/interaction.py index 89a25420a..40f92dcd6 100644 --- a/bookwyrm/templatetags/interaction.py +++ b/bookwyrm/templatetags/interaction.py @@ -42,7 +42,7 @@ def get_relationship(context, user_object): """caches the relationship between the logged in user and another user""" user = context["request"].user return get_or_set( - f"relationship-{user.id}-{user_object.id}", + f"cached-relationship-{user.id}-{user_object.id}", get_relationship_name, user, user_object, @@ -61,6 +61,6 @@ def get_relationship_name(user, user_object): types["is_blocked"] = True elif user_object in user.following.all(): types["is_following"] = True - elif user_object in user.follower_requests.all(): + elif user in user_object.follower_requests.all(): types["is_follow_pending"] = True return types From 89165fd909ba4f4033f706569d77b2fd5348c8e8 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 3 Jul 2022 08:28:24 -0700 Subject: [PATCH 05/87] Creates helper function for creating charts --- bookwyrm/views/admin/dashboard.py | 177 ++++++++++++++++-------------- 1 file changed, 92 insertions(+), 85 deletions(-) diff --git a/bookwyrm/views/admin/dashboard.py b/bookwyrm/views/admin/dashboard.py index 8b9017bb4..df34fe5bf 100644 --- a/bookwyrm/views/admin/dashboard.py +++ b/bookwyrm/views/admin/dashboard.py @@ -29,96 +29,18 @@ class Dashboard(View): def get(self, request): """list of users""" - interval = int(request.GET.get("days", 1)) - now = timezone.now() - start = request.GET.get("start") - if start: - start = timezone.make_aware(parse(start)) - else: - start = now - timedelta(days=6 * interval) - - end = request.GET.get("end") - end = timezone.make_aware(parse(end)) if end else now - start = start.replace(hour=0, minute=0, second=0) - - user_queryset = models.User.objects.filter(local=True) - user_chart = Chart( - queryset=user_queryset, - queries={ - "total": lambda q, s, e: q.filter( - Q(is_active=True) | Q(deactivation_date__gt=e), - created_date__lte=e, - ).count(), - "active": lambda q, s, e: q.filter( - Q(is_active=True) | Q(deactivation_date__gt=e), - created_date__lte=e, - ) - .filter( - last_active_date__gt=e - timedelta(days=31), - ) - .count(), - }, - ) - - status_queryset = models.Status.objects.filter(user__local=True, deleted=False) - status_chart = Chart( - queryset=status_queryset, - queries={ - "total": lambda q, s, e: q.filter( - created_date__gt=s, - created_date__lte=e, - ).count() - }, - ) - - register_chart = Chart( - queryset=user_queryset, - queries={ - "total": lambda q, s, e: q.filter( - created_date__gt=s, - created_date__lte=e, - ).count() - }, - ) - - works_chart = Chart( - queryset=models.Work.objects, - queries={ - "total": lambda q, s, e: q.filter( - created_date__gt=s, - created_date__lte=e, - ).count() - }, - ) + data = get_charts_and_stats(request) + # Make sure email looks properly configured email_config_error = re.findall( r"[\s\@]", settings.EMAIL_SENDER_DOMAIN ) or not re.match(regex.DOMAIN, settings.EMAIL_SENDER_DOMAIN) - data = { - "start": start.strftime("%Y-%m-%d"), - "end": end.strftime("%Y-%m-%d"), - "interval": interval, - "users": user_queryset.filter(is_active=True).count(), - "active_users": user_queryset.filter( - is_active=True, last_active_date__gte=now - timedelta(days=31) - ).count(), - "statuses": status_queryset.count(), - "works": models.Work.objects.count(), - "reports": models.Report.objects.filter(resolved=False).count(), - "pending_domains": models.LinkDomain.objects.filter( - status="pending" - ).count(), - "invite_requests": models.InviteRequest.objects.filter( - ignored=False, invite__isnull=True - ).count(), - "user_stats": user_chart.get_chart(start, end, interval), - "status_stats": status_chart.get_chart(start, end, interval), - "register_stats": register_chart.get_chart(start, end, interval), - "works_stats": works_chart.get_chart(start, end, interval), - "email_config_error": email_config_error, - "email_sender": f"{settings.EMAIL_SENDER_NAME}@{settings.EMAIL_SENDER_DOMAIN}", - } + data["email_config_error"] = email_config_error + # pylint: disable=line-too-long + data[ + "email_sender" + ] = f"{settings.EMAIL_SENDER_NAME}@{settings.EMAIL_SENDER_DOMAIN}" # check version try: @@ -135,6 +57,91 @@ class Dashboard(View): return TemplateResponse(request, "settings/dashboard/dashboard.html", data) +def get_charts_and_stats(request): + """Defines the dashbaord charts""" + interval = int(request.GET.get("days", 1)) + now = timezone.now() + start = request.GET.get("start") + if start: + start = timezone.make_aware(parse(start)) + else: + start = now - timedelta(days=6 * interval) + + end = request.GET.get("end") + end = timezone.make_aware(parse(end)) if end else now + start = start.replace(hour=0, minute=0, second=0) + + user_queryset = models.User.objects.filter(local=True) + user_chart = Chart( + queryset=user_queryset, + queries={ + "total": lambda q, s, e: q.filter( + Q(is_active=True) | Q(deactivation_date__gt=e), + created_date__lte=e, + ).count(), + "active": lambda q, s, e: q.filter( + Q(is_active=True) | Q(deactivation_date__gt=e), + created_date__lte=e, + ) + .filter( + last_active_date__gt=e - timedelta(days=31), + ) + .count(), + }, + ) + + status_queryset = models.Status.objects.filter(user__local=True, deleted=False) + status_chart = Chart( + queryset=status_queryset, + queries={ + "total": lambda q, s, e: q.filter( + created_date__gt=s, + created_date__lte=e, + ).count() + }, + ) + + register_chart = Chart( + queryset=user_queryset, + queries={ + "total": lambda q, s, e: q.filter( + created_date__gt=s, + created_date__lte=e, + ).count() + }, + ) + + works_chart = Chart( + queryset=models.Work.objects, + queries={ + "total": lambda q, s, e: q.filter( + created_date__gt=s, + created_date__lte=e, + ).count() + }, + ) + return { + "start": start.strftime("%Y-%m-%d"), + "end": end.strftime("%Y-%m-%d"), + "interval": interval, + "users": user_queryset.filter(is_active=True).count(), + "active_users": user_queryset.filter( + is_active=True, last_active_date__gte=now - timedelta(days=31) + ).count(), + "statuses": status_queryset.count(), + "works": models.Work.objects.count(), + "reports": models.Report.objects.filter(resolved=False).count(), + "pending_domains": models.LinkDomain.objects.filter(status="pending").count(), + "invite_requests": models.InviteRequest.objects.filter( + ignored=False, invite__isnull=True + ).count(), + "user_stats": user_chart.get_chart(start, end, interval), + "status_stats": status_chart.get_chart(start, end, interval), + "register_stats": register_chart.get_chart(start, end, interval), + "works_stats": works_chart.get_chart(start, end, interval), + } + + class Chart: """Data for a chart""" From 5d363da175faa8289d94e51778af281add117178 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 3 Jul 2022 11:05:20 -0700 Subject: [PATCH 06/87] Handle getting edition data as dict or string --- bookwyrm/connectors/inventaire.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py index 3d5f913bd..df9b2e43a 100644 --- a/bookwyrm/connectors/inventaire.py +++ b/bookwyrm/connectors/inventaire.py @@ -160,12 +160,13 @@ class Connector(AbstractConnector): def create_edition_from_data(self, work, edition_data, instance=None): """pass in the url as data and then call the version in abstract connector""" - try: - data = self.get_book_data(edition_data) - except ConnectorException: - # who, indeed, knows - return - super().create_edition_from_data(work, data, instance=instance) + if isinstance(edition_data, str): + try: + edition_data = self.get_book_data(edition_data) + except ConnectorException: + # who, indeed, knows + return + super().create_edition_from_data(work, edition_data, instance=instance) def get_cover_url(self, cover_blob, *_): """format the relative cover url into an absolute one: From 7f78140015d6e2c9bac86ba9931911009c407dba Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Mon, 4 Jul 2022 13:14:22 -0700 Subject: [PATCH 07/87] Uses library for html cleanup --- bookwyrm/models/fields.py | 6 +-- bookwyrm/sanitize_html.py | 71 ---------------------------- bookwyrm/tests/test_sanitize_html.py | 36 ++++++-------- bookwyrm/utils/sanitizer.py | 25 ++++++++++ bookwyrm/views/status.py | 7 +-- requirements.txt | 1 + 6 files changed, 44 insertions(+), 102 deletions(-) delete mode 100644 bookwyrm/sanitize_html.py create mode 100644 bookwyrm/utils/sanitizer.py diff --git a/bookwyrm/models/fields.py b/bookwyrm/models/fields.py index 62c61cc40..785f3397c 100644 --- a/bookwyrm/models/fields.py +++ b/bookwyrm/models/fields.py @@ -16,7 +16,7 @@ from django.utils.encoding import filepath_to_uri from bookwyrm import activitypub from bookwyrm.connectors import get_image -from bookwyrm.sanitize_html import InputHtmlParser +from bookwyrm.utils.sanitizer import clean from bookwyrm.settings import MEDIA_FULL_URL @@ -497,9 +497,7 @@ class HtmlField(ActivitypubFieldMixin, models.TextField): def field_from_activity(self, value): if not value or value == MISSING: return None - sanitizer = InputHtmlParser() - sanitizer.feed(value) - return sanitizer.get_output() + return clean(value) class ArrayField(ActivitypubFieldMixin, DjangoArrayField): diff --git a/bookwyrm/sanitize_html.py b/bookwyrm/sanitize_html.py deleted file mode 100644 index 4edd2818e..000000000 --- a/bookwyrm/sanitize_html.py +++ /dev/null @@ -1,71 +0,0 @@ -""" html parser to clean up incoming text from unknown sources """ -from html.parser import HTMLParser - - -class InputHtmlParser(HTMLParser): # pylint: disable=abstract-method - """Removes any html that isn't allowed_tagsed from a block""" - - def __init__(self): - HTMLParser.__init__(self) - self.allowed_tags = [ - "p", - "blockquote", - "br", - "b", - "i", - "strong", - "em", - "pre", - "a", - "span", - "ul", - "ol", - "li", - ] - self.allowed_attrs = ["href", "rel", "src", "alt"] - self.tag_stack = [] - self.output = [] - # if the html appears invalid, we just won't allow any at all - self.allow_html = True - - def handle_starttag(self, tag, attrs): - """check if the tag is valid""" - if self.allow_html and tag in self.allowed_tags: - allowed_attrs = " ".join( - f'{a}="{v}"' for a, v in attrs if a in self.allowed_attrs - ) - reconstructed = f"<{tag}" - if allowed_attrs: - reconstructed += " " + allowed_attrs - reconstructed += ">" - self.output.append(("tag", reconstructed)) - self.tag_stack.append(tag) - else: - self.output.append(("data", "")) - - def handle_endtag(self, tag): - """keep the close tag""" - if not self.allow_html or tag not in self.allowed_tags: - self.output.append(("data", "")) - return - - if not self.tag_stack or self.tag_stack[-1] != tag: - # the end tag doesn't match the most recent start tag - self.allow_html = False - self.output.append(("data", "")) - return - - self.tag_stack = self.tag_stack[:-1] - self.output.append(("tag", f"")) - - def handle_data(self, data): - """extract the answer, if we're in an answer tag""" - self.output.append(("data", data)) - - def get_output(self): - """convert the output from a list of tuples to a string""" - if self.tag_stack: - self.allow_html = False - if not self.allow_html: - return "".join(v for (k, v) in self.output if k == "data") - return "".join(v for (k, v) in self.output) diff --git a/bookwyrm/tests/test_sanitize_html.py b/bookwyrm/tests/test_sanitize_html.py index 5814f2207..ecdd69793 100644 --- a/bookwyrm/tests/test_sanitize_html.py +++ b/bookwyrm/tests/test_sanitize_html.py @@ -1,7 +1,7 @@ """ make sure only valid html gets to the app """ from django.test import TestCase -from bookwyrm.sanitize_html import InputHtmlParser +from bookwyrm.utils.sanitizer import clean class Sanitizer(TestCase): @@ -10,53 +10,45 @@ class Sanitizer(TestCase): def test_no_html(self): """just text""" input_text = "no html " - parser = InputHtmlParser() - parser.feed(input_text) - output = parser.get_output() + output = clean(input_text) self.assertEqual(input_text, output) def test_valid_html(self): """leave the html untouched""" input_text = "yes html" - parser = InputHtmlParser() - parser.feed(input_text) - output = parser.get_output() + output = clean(input_text) self.assertEqual(input_text, output) def test_valid_html_attrs(self): """and don't remove useful attributes""" input_text = 'yes html' - parser = InputHtmlParser() - parser.feed(input_text) - output = parser.get_output() + output = clean(input_text) self.assertEqual(input_text, output) def test_valid_html_invalid_attrs(self): """do remove un-approved attributes""" input_text = 'yes html' - parser = InputHtmlParser() - parser.feed(input_text) - output = parser.get_output() + output = clean(input_text) self.assertEqual(output, 'yes html') def test_invalid_html(self): """remove all html when the html is malformed""" input_text = "yes html" - parser = InputHtmlParser() - parser.feed(input_text) - output = parser.get_output() + output = clean(input_text) self.assertEqual("yes html", output) input_text = "yes html " - parser = InputHtmlParser() - parser.feed(input_text) - output = parser.get_output() + output = clean(input_text) self.assertEqual("yes html ", output) def test_disallowed_html(self): """remove disallowed html but keep allowed html""" input_text = "
yes html
" - parser = InputHtmlParser() - parser.feed(input_text) - output = parser.get_output() + output = clean(input_text) self.assertEqual(" yes html", output) + + def test_escaped_bracket(self): + """remove > and <""" + input_text = "<dev>hi</div>" + output = clean(input_text) + self.assertEqual("hi", output) diff --git a/bookwyrm/utils/sanitizer.py b/bookwyrm/utils/sanitizer.py new file mode 100644 index 000000000..676921949 --- /dev/null +++ b/bookwyrm/utils/sanitizer.py @@ -0,0 +1,25 @@ +"""Clean user-provided text""" +import bleach + + +def clean(input_text): + """Run through "bleach" """ + return bleach.clean( + input_text, + tags=[ + "p", + "blockquote", + "br", + "b", + "i", + "strong", + "em", + "pre", + "a", + "span", + "ul", + "ol", + "li", + ], + attributes=["href", "rel", "src", "alt"], + ) diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py index 670ea5717..0dd9e0f80 100644 --- a/bookwyrm/views/status.py +++ b/bookwyrm/views/status.py @@ -16,9 +16,8 @@ from django.views.decorators.http import require_POST from markdown import markdown from bookwyrm import forms, models -from bookwyrm.sanitize_html import InputHtmlParser from bookwyrm.settings import DOMAIN -from bookwyrm.utils import regex +from bookwyrm.utils import regex, sanitizer from .helpers import handle_remote_webfinger, is_api_request from .helpers import load_date_in_user_tz_as_utc @@ -268,6 +267,4 @@ def to_markdown(content): content = format_links(content) content = markdown(content) # sanitize resulting html - sanitizer = InputHtmlParser() - sanitizer.feed(content) - return sanitizer.get_output() + return sanitizer.clean(content) diff --git a/requirements.txt b/requirements.txt index 0155782cc..3d9f686ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ aiohttp==3.8.1 +bleach==5.0.1 celery==5.2.2 colorthief==0.2.1 Django==3.2.13 From 62aa4bf869625186e0c7f24d259cfb2caac6f4d3 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Mon, 4 Jul 2022 13:21:18 -0700 Subject: [PATCH 08/87] Tick version number --- bookwyrm/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bookwyrm/settings.py b/bookwyrm/settings.py index dc0d71f30..e67fb5e1e 100644 --- a/bookwyrm/settings.py +++ b/bookwyrm/settings.py @@ -11,7 +11,7 @@ from django.utils.translation import gettext_lazy as _ env = Env() env.read_env() DOMAIN = env("DOMAIN") -VERSION = "0.4.0" +VERSION = "0.4.1" RELEASE_API = env( "RELEASE_API", From 13376f89708fea095d97fdeed6950da701aa52d9 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Mon, 4 Jul 2022 13:24:13 -0700 Subject: [PATCH 09/87] Catches missing reference to previous sanitizer --- bookwyrm/status.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bookwyrm/status.py b/bookwyrm/status.py index 09fbdc06e..de7682ee7 100644 --- a/bookwyrm/status.py +++ b/bookwyrm/status.py @@ -2,15 +2,13 @@ from django.db import transaction from bookwyrm import models -from bookwyrm.sanitize_html import InputHtmlParser +from bookwyrm.utils import sanitizer def create_generated_note(user, content, mention_books=None, privacy="public"): """a note created by the app about user activity""" # sanitize input html - parser = InputHtmlParser() - parser.feed(content) - content = parser.get_output() + content = sanitizer.clean(content) with transaction.atomic(): # create but don't save From 70beb24d95657d5186d73d71599088046dd5e891 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Mon, 4 Jul 2022 13:34:09 -0700 Subject: [PATCH 10/87] Removed misleading test This wasn't really testing what I wanted it to. --- bookwyrm/tests/test_sanitize_html.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bookwyrm/tests/test_sanitize_html.py b/bookwyrm/tests/test_sanitize_html.py index ecdd69793..ca1643e8f 100644 --- a/bookwyrm/tests/test_sanitize_html.py +++ b/bookwyrm/tests/test_sanitize_html.py @@ -46,9 +46,3 @@ class Sanitizer(TestCase): input_text = "
yes html
" output = clean(input_text) self.assertEqual(" yes html", output) - - def test_escaped_bracket(self): - """remove > and <""" - input_text = "<dev>hi</div>" - output = clean(input_text) - self.assertEqual("hi", output) From 9d9b7f366a2b6b711c5b6f3609286a26f98486de Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Mon, 4 Jul 2022 13:45:28 -0700 Subject: [PATCH 11/87] Use "strip" in bleach This removes forbidden html, rather than leaving them in place but unrendered. --- bookwyrm/tests/test_sanitize_html.py | 6 +++--- bookwyrm/utils/sanitizer.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bookwyrm/tests/test_sanitize_html.py b/bookwyrm/tests/test_sanitize_html.py index ca1643e8f..449acdafb 100644 --- a/bookwyrm/tests/test_sanitize_html.py +++ b/bookwyrm/tests/test_sanitize_html.py @@ -32,14 +32,14 @@ class Sanitizer(TestCase): self.assertEqual(output, 'yes html') def test_invalid_html(self): - """remove all html when the html is malformed""" + """don't allow malformed html""" input_text = "yes html" output = clean(input_text) - self.assertEqual("yes html", output) + self.assertEqual("yes html", output) input_text = "yes html " output = clean(input_text) - self.assertEqual("yes html ", output) + self.assertEqual("yes html ", output) def test_disallowed_html(self): """remove disallowed html but keep allowed html""" diff --git a/bookwyrm/utils/sanitizer.py b/bookwyrm/utils/sanitizer.py index 676921949..f6c87358c 100644 --- a/bookwyrm/utils/sanitizer.py +++ b/bookwyrm/utils/sanitizer.py @@ -22,4 +22,5 @@ def clean(input_text): "li", ], attributes=["href", "rel", "src", "alt"], + strip=True, ) From 5672c73ac420d152858aa57a8e1a2cf132bc0086 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Mon, 4 Jul 2022 17:32:55 -0700 Subject: [PATCH 12/87] Show deleted users as red in the user list It can be hard to differentiate at a glance if a user is deleted or suspended -- without this, you would have to read the deactivation reason. By making deletions (moderator and self deletions) red, it's clear at a glance if an account has been permanently deleted or just temporarily suspended. --- bookwyrm/templates/settings/users/user_admin.html | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bookwyrm/templates/settings/users/user_admin.html b/bookwyrm/templates/settings/users/user_admin.html index 4144f0bde..059e064db 100644 --- a/bookwyrm/templates/settings/users/user_admin.html +++ b/bookwyrm/templates/settings/users/user_admin.html @@ -72,6 +72,12 @@ {% trans "Active" %} + {% elif user.deactivation_reason == "moderator_deletion" or user.deactivation_reason == "self_deletion" %} + + {% trans "Deleted" %} + ({{ user.get_deactivation_reason_display }}) {% else %}