Post-process status.content field to change hashtag URLs

Since the status content already contains rendered HTML when we receive an
ActivityPub inbox message it contains links to the mentioned hashtags on the
originating instance.

To fix this on the receiving instance we need to post-process the status content
after successfully storing the status and its many-to-many fields (the one we're
is the `mention_hashtags`). Post-processing means that we run a regex against the
content to find the anchor tags linking to the originating hashtag and replace the
`href` attribute with the URL to the hashtag page on the receiving (local) instance.
This commit is contained in:
Christof Dorner 2023-02-17 19:24:42 +01:00
parent 0fd49d2aea
commit 276b255f32
2 changed files with 106 additions and 2 deletions

View file

@ -1,9 +1,12 @@
""" note serializer and children thereof """
from dataclasses import dataclass, field
from typing import Dict, List
from django.apps import apps
import re
from .base_activity import ActivityObject, Link
from django.apps import apps
from django.db import IntegrityError, transaction
from .base_activity import ActivityObject, ActivitySerializerError, Link
from .image import Document
@ -38,6 +41,46 @@ class Note(ActivityObject):
updated: str = None
type: str = "Note"
# pylint: disable=too-many-arguments
def to_model(
self,
model=None,
instance=None,
allow_create=True,
save=True,
overwrite=True,
allow_external_connections=True,
):
instance = super().to_model(
model, instance, allow_create, save, overwrite, allow_external_connections
)
if instance is None:
return instance
# Replace links to hashtags in content with local URLs
changed_content = False
for hashtag in instance.mention_hashtags.all():
updated_content = re.sub(
rf'(<a href=")[^"]*/hashtag/[^"]*(">{hashtag.name}</a>)',
rf"\1{hashtag.remote_id}\2",
instance.content,
)
if instance.content != updated_content:
instance.content = updated_content
changed_content = True
if not save or not changed_content:
return instance
with transaction.atomic():
try:
instance.save(broadcast=False, update_fields=["content"])
except IntegrityError as e:
raise ActivitySerializerError(e)
return instance
@dataclass(init=False)
class Article(Note):

View file

@ -0,0 +1,61 @@
""" tests functionality specifically for the Note ActivityPub dataclass"""
from unittest.mock import patch
from django.test import TestCase
from bookwyrm import activitypub
from bookwyrm import models
class Note(TestCase):
"""the model-linked ActivityPub dataclass for Note-based types"""
# pylint: disable=invalid-name
def setUp(self):
"""create a shared user"""
with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch(
"bookwyrm.activitystreams.populate_stream_task.delay"
), patch("bookwyrm.lists_stream.populate_lists_task.delay"):
self.user = models.User.objects.create_user(
"mouse", "mouse@mouse.mouse", "mouseword", local=True, localname="mouse"
)
self.user.remote_id = "https://test-instance.org/user/critic"
self.user.save(broadcast=False, update_fields=["remote_id"])
self.book = models.Edition.objects.create(
title="Test Edition", remote_id="http://book.com/book"
)
def test_to_model_hashtag_postprocess_content(self):
"""test that hashtag links are post-processed and link to local URLs"""
update_data = activitypub.Comment(
id="https://test-instance.org/user/critic/comment/42",
attributedTo=self.user.remote_id,
inReplyToBook=self.book.remote_id,
content="<p>This is interesting "
+ '<a href="https://test-instance.org/hashtag/2">#BookClub</a></p>',
published="2023-02-17T23:12:59.398030+00:00",
to=[],
cc=[],
tag=[
{
"type": "Edition",
"name": "gerald j. books",
"href": "http://book.com/book",
},
{
"type": "Hashtag",
"name": "#BookClub",
"href": "https://test-instance.org/hashtag/2",
},
],
)
instance = update_data.to_model(model=models.Status)
self.assertIsNotNone(instance)
hashtag = models.Hashtag.objects.filter(name="#BookClub").first()
self.assertIsNotNone(hashtag)
self.assertEqual(
instance.content,
f'<p>This is interesting <a href="{hashtag.remote_id}">#BookClub</a></p>',
)