From 276b255f32690165afe5ec9ca6bd6e18b04eaf9b Mon Sep 17 00:00:00 2001 From: Christof Dorner Date: Fri, 17 Feb 2023 19:24:42 +0100 Subject: [PATCH] Post-process status.content field to change hashtag URLs Since the status content already contains rendered HTML when we receive an ActivityPub inbox message it contains links to the mentioned hashtags on the originating instance. To fix this on the receiving instance we need to post-process the status content after successfully storing the status and its many-to-many fields (the one we're is the `mention_hashtags`). Post-processing means that we run a regex against the content to find the anchor tags linking to the originating hashtag and replace the `href` attribute with the URL to the hashtag page on the receiving (local) instance. --- bookwyrm/activitypub/note.py | 47 ++++++++++++++++++- bookwyrm/tests/activitypub/test_note.py | 61 +++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 bookwyrm/tests/activitypub/test_note.py diff --git a/bookwyrm/activitypub/note.py b/bookwyrm/activitypub/note.py index eb18b8b8a..dcd6e5dba 100644 --- a/bookwyrm/activitypub/note.py +++ b/bookwyrm/activitypub/note.py @@ -1,9 +1,12 @@ """ note serializer and children thereof """ from dataclasses import dataclass, field from typing import Dict, List -from django.apps import apps +import re -from .base_activity import ActivityObject, Link +from django.apps import apps +from django.db import IntegrityError, transaction + +from .base_activity import ActivityObject, ActivitySerializerError, Link from .image import Document @@ -38,6 +41,46 @@ class Note(ActivityObject): updated: str = None type: str = "Note" + # pylint: disable=too-many-arguments + def to_model( + self, + model=None, + instance=None, + allow_create=True, + save=True, + overwrite=True, + allow_external_connections=True, + ): + instance = super().to_model( + model, instance, allow_create, save, overwrite, allow_external_connections + ) + + if instance is None: + return instance + + # Replace links to hashtags in content with local URLs + changed_content = False + for hashtag in instance.mention_hashtags.all(): + updated_content = re.sub( + rf'({hashtag.name})', + rf"\1{hashtag.remote_id}\2", + instance.content, + ) + if instance.content != updated_content: + instance.content = updated_content + changed_content = True + + if not save or not changed_content: + return instance + + with transaction.atomic(): + try: + instance.save(broadcast=False, update_fields=["content"]) + except IntegrityError as e: + raise ActivitySerializerError(e) + + return instance + @dataclass(init=False) class Article(Note): diff --git a/bookwyrm/tests/activitypub/test_note.py b/bookwyrm/tests/activitypub/test_note.py new file mode 100644 index 000000000..76453ea5f --- /dev/null +++ b/bookwyrm/tests/activitypub/test_note.py @@ -0,0 +1,61 @@ +""" tests functionality specifically for the Note ActivityPub dataclass""" +from unittest.mock import patch + +from django.test import TestCase + +from bookwyrm import activitypub +from bookwyrm import models + + +class Note(TestCase): + """the model-linked ActivityPub dataclass for Note-based types""" + + # pylint: disable=invalid-name + def setUp(self): + """create a shared user""" + with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch( + "bookwyrm.activitystreams.populate_stream_task.delay" + ), patch("bookwyrm.lists_stream.populate_lists_task.delay"): + self.user = models.User.objects.create_user( + "mouse", "mouse@mouse.mouse", "mouseword", local=True, localname="mouse" + ) + self.user.remote_id = "https://test-instance.org/user/critic" + self.user.save(broadcast=False, update_fields=["remote_id"]) + + self.book = models.Edition.objects.create( + title="Test Edition", remote_id="http://book.com/book" + ) + + def test_to_model_hashtag_postprocess_content(self): + """test that hashtag links are post-processed and link to local URLs""" + update_data = activitypub.Comment( + id="https://test-instance.org/user/critic/comment/42", + attributedTo=self.user.remote_id, + inReplyToBook=self.book.remote_id, + content="

This is interesting " + + '#BookClub

', + published="2023-02-17T23:12:59.398030+00:00", + to=[], + cc=[], + tag=[ + { + "type": "Edition", + "name": "gerald j. books", + "href": "http://book.com/book", + }, + { + "type": "Hashtag", + "name": "#BookClub", + "href": "https://test-instance.org/hashtag/2", + }, + ], + ) + + instance = update_data.to_model(model=models.Status) + self.assertIsNotNone(instance) + hashtag = models.Hashtag.objects.filter(name="#BookClub").first() + self.assertIsNotNone(hashtag) + self.assertEqual( + instance.content, + f'

This is interesting #BookClub

', + )