Merge pull request #2524 from chdorner/feature/tag-support

Initial hashtag support
This commit is contained in:
Mouse Reeve 2023-03-12 16:37:39 -07:00 committed by GitHub
commit 12af5992a3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 635 additions and 13 deletions

View file

@ -3,7 +3,7 @@ import inspect
import sys
from .base_activity import ActivityEncoder, Signature, naive_parse
from .base_activity import Link, Mention
from .base_activity import Link, Mention, Hashtag
from .base_activity import ActivitySerializerError, resolve_remote_id
from .image import Document, Image
from .note import Note, GeneratedNote, Article, Comment, Quotation

View file

@ -186,7 +186,7 @@ class ActivityObject:
# add many to many fields, which have to be set post-save
for field in instance.many_to_many_fields:
# mention books/users, for example
# mention books/users/hashtags, for example
field.set_field_from_activity(
instance,
self,
@ -426,3 +426,10 @@ class Mention(Link):
"""a subtype of Link for mentioning an actor"""
type: str = "Mention"
@dataclass(init=False)
class Hashtag(Link):
"""a subtype of Link for mentioning a hashtag"""
type: str = "Hashtag"

View file

@ -1,9 +1,12 @@
""" note serializer and children thereof """
from dataclasses import dataclass, field
from typing import Dict, List
from django.apps import apps
import re
from .base_activity import ActivityObject, Link
from django.apps import apps
from django.db import IntegrityError, transaction
from .base_activity import ActivityObject, ActivitySerializerError, Link
from .image import Document
@ -38,6 +41,47 @@ class Note(ActivityObject):
updated: str = None
type: str = "Note"
# pylint: disable=too-many-arguments
def to_model(
self,
model=None,
instance=None,
allow_create=True,
save=True,
overwrite=True,
allow_external_connections=True,
):
instance = super().to_model(
model, instance, allow_create, save, overwrite, allow_external_connections
)
if instance is None:
return instance
# Replace links to hashtags in content with local URLs
changed_content = False
for hashtag in instance.mention_hashtags.all():
updated_content = re.sub(
rf'(<a href=")[^"]*(" data-mention="hashtag">{hashtag.name}</a>)',
rf"\1{hashtag.remote_id}\2",
instance.content,
flags=re.IGNORECASE,
)
if instance.content != updated_content:
instance.content = updated_content
changed_content = True
if not save or not changed_content:
return instance
with transaction.atomic():
try:
instance.save(broadcast=False, update_fields=["content"])
except IntegrityError as e:
raise ActivitySerializerError(e)
return instance
@dataclass(init=False)
class Article(Note):

View file

@ -0,0 +1,53 @@
# Generated by Django 3.2.16 on 2022-12-17 19:28
import bookwyrm.models.fields
import django.contrib.postgres.fields.citext
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0174_auto_20230130_1240"),
]
operations = [
migrations.CreateModel(
name="Hashtag",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("created_date", models.DateTimeField(auto_now_add=True)),
("updated_date", models.DateTimeField(auto_now=True)),
(
"remote_id",
bookwyrm.models.fields.RemoteIdField(
max_length=255,
null=True,
validators=[bookwyrm.models.fields.validate_remote_id],
),
),
(
"name",
django.contrib.postgres.fields.citext.CICharField(max_length=256),
),
],
options={
"abstract": False,
},
),
migrations.AddField(
model_name="status",
name="mention_hashtags",
field=bookwyrm.models.fields.TagField(
related_name="mention_hashtag", to="bookwyrm.Hashtag"
),
),
]

View file

@ -34,6 +34,8 @@ from .antispam import EmailBlocklist, IPBlocklist, AutoMod, automod_task
from .notification import Notification
from .hashtag import Hashtag
cls_members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
activity_models = {
c[1].activity_serializer.__name__: c[1]

View file

@ -7,6 +7,7 @@ from urllib.parse import urljoin
import dateutil.parser
from dateutil.parser import ParserError
from django.contrib.postgres.fields import ArrayField as DjangoArrayField
from django.contrib.postgres.fields import CICharField as DjangoCICharField
from django.core.exceptions import ValidationError
from django.db import models
from django.forms import ClearableFileInput, ImageField as DjangoImageField
@ -388,6 +389,15 @@ class TagField(ManyToManyField):
if tag_type != self.related_model.activity_serializer.type:
# tags can contain multiple types
continue
if tag_type == "Hashtag":
# we already have all data to create hashtags,
# no need to fetch from remote
item = self.related_model.activity_serializer(**link_json)
hashtag = item.to_model(model=self.related_model, save=True)
items.append(hashtag)
else:
# for other tag types we fetch them remotely
items.append(
activitypub.resolve_remote_id(
link.href,
@ -546,6 +556,10 @@ class CharField(ActivitypubFieldMixin, models.CharField):
"""activitypub-aware char field"""
class CICharField(ActivitypubFieldMixin, DjangoCICharField):
"""activitypub-aware cichar field"""
class URLField(ActivitypubFieldMixin, models.URLField):
"""activitypub-aware url field"""

View file

@ -0,0 +1,23 @@
""" model for tags """
from bookwyrm import activitypub
from .activitypub_mixin import ActivitypubMixin
from .base_model import BookWyrmModel
from .fields import CICharField
class Hashtag(ActivitypubMixin, BookWyrmModel):
"a hashtag which can be used in statuses"
name = CICharField(
max_length=256,
blank=False,
null=False,
activitypub_field="name",
deduplication_field=True,
)
name_field = "name"
activity_serializer = activitypub.Hashtag
def __repr__(self):
return f"<{self.__class__} id={self.id} name={self.name}>"

View file

@ -34,6 +34,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
raw_content = models.TextField(blank=True, null=True)
mention_users = fields.TagField("User", related_name="mention_user")
mention_books = fields.TagField("Edition", related_name="mention_book")
mention_hashtags = fields.TagField("Hashtag", related_name="mention_hashtag")
local = models.BooleanField(default=True)
content_warning = fields.CharField(
max_length=500, blank=True, null=True, activitypub_field="summary"

View file

@ -0,0 +1,32 @@
{% extends "layout.html" %}
{% load i18n %}
{% block title %}{{ hashtag }}{% endblock %}
{% block content %}
<div class="container is-max-desktop">
<section class="block">
<header class="block content has-text-centered">
<h1 class="title">{{ hashtag }}</h1>
<p class="subtitle">
{% blocktrans trimmed with site_name=site.name %}
See tagged statuses in the local {{ site_name }} community
{% endblocktrans %}
</p>
</header>
{% for activity in activities %}
<div class="block">
{% include 'snippets/status/status.html' with status=activity %}
</div>
{% endfor %}
{% if not activities %}
<div class="block">
<p>{% trans "No activities for this hashtag yet!" %}</p>
</div>
{% endif %}
{% include 'snippets/pagination.html' with page=activities path=path %}
</section>
</div>
{% endblock %}

View file

@ -183,12 +183,21 @@ class BaseActivity(TestCase):
"name": "gerald j. books",
"href": "http://book.com/book",
},
{
"type": "Hashtag",
"name": "#BookClub",
"href": "http://example.com/tags/BookClub",
},
],
)
update_data.to_model(model=models.Status, instance=status)
self.assertEqual(status.mention_users.first(), self.user)
self.assertEqual(status.mention_books.first(), book)
hashtag = models.Hashtag.objects.filter(name="#BookClub").first()
self.assertIsNotNone(hashtag)
self.assertEqual(status.mention_hashtags.first(), hashtag)
@responses.activate
def test_to_model_one_to_many(self, *_):
"""these are reversed relationships, where the secondary object

View file

@ -0,0 +1,64 @@
""" tests functionality specifically for the Note ActivityPub dataclass"""
from unittest.mock import patch
from django.test import TestCase
from bookwyrm import activitypub
from bookwyrm import models
class Note(TestCase):
"""the model-linked ActivityPub dataclass for Note-based types"""
# pylint: disable=invalid-name
def setUp(self):
"""create a shared user"""
with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch(
"bookwyrm.activitystreams.populate_stream_task.delay"
), patch("bookwyrm.lists_stream.populate_lists_task.delay"):
self.user = models.User.objects.create_user(
"mouse", "mouse@mouse.mouse", "mouseword", local=True, localname="mouse"
)
self.user.remote_id = "https://test-instance.org/user/critic"
self.user.save(broadcast=False, update_fields=["remote_id"])
self.book = models.Edition.objects.create(
title="Test Edition", remote_id="http://book.com/book"
)
def test_to_model_hashtag_postprocess_content(self):
"""test that hashtag links are post-processed and link to local URLs"""
update_data = activitypub.Comment(
id="https://test-instance.org/user/critic/comment/42",
attributedTo=self.user.remote_id,
inReplyToBook=self.book.remote_id,
content="<p>This is interesting "
+ '<a href="https://test-instance.org/hashtag/2" data-mention="hashtag">'
+ "#bookclub</a></p>",
published="2023-02-17T23:12:59.398030+00:00",
to=[],
cc=[],
tag=[
{
"type": "Edition",
"name": "gerald j. books",
"href": "http://book.com/book",
},
{
"type": "Hashtag",
"name": "#BookClub",
"href": "https://test-instance.org/hashtag/2",
},
],
)
instance = update_data.to_model(model=models.Status)
self.assertIsNotNone(instance)
hashtag = models.Hashtag.objects.filter(name="#BookClub").first()
self.assertIsNotNone(hashtag)
self.assertEqual(
instance.content,
"<p>This is interesting "
+ f'<a href="{hashtag.remote_id}" data-mention="hashtag">'
+ "#bookclub</a></p>",
)

View file

@ -265,7 +265,7 @@ class Status(TestCase):
self.assertEqual(activity["attachment"][0]["type"], "Document")
self.assertTrue(
re.match(
r"https:\/\/your.domain.here\/images\/covers\/test_[A-z0-9]+.jpg",
r"https:\/\/your.domain.here\/images\/covers\/test(_[A-z0-9]+)?.jpg",
activity["attachment"][0]["url"],
)
)

View file

@ -0,0 +1,197 @@
""" tests for hashtag view """
from unittest.mock import patch
from django.contrib.auth.models import AnonymousUser
from django.http import Http404
from django.template.response import TemplateResponse
from django.test import TestCase
from django.test.client import RequestFactory
from bookwyrm import models, views
from bookwyrm.tests.validate_html import validate_html
class HashtagView(TestCase):
"""hashtag view"""
def setUp(self):
self.factory = RequestFactory()
with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch(
"bookwyrm.activitystreams.populate_stream_task.delay"
), patch("bookwyrm.lists_stream.populate_lists_task.delay"):
self.local_user = models.User.objects.create_user(
"mouse@local.com",
"mouse@mouse.com",
"mouseword",
local=True,
localname="mouse",
remote_id="https://example.com/users/mouse",
)
self.follower_user = models.User.objects.create_user(
"follower@local.com",
"follower@email.com",
"followerword",
local=True,
localname="follower",
remote_id="https://example.com/users/follower",
)
self.local_user.followers.add(self.follower_user)
self.other_user = models.User.objects.create_user(
"other@local.com",
"other@email.com",
"otherword",
local=True,
localname="other",
remote_id="https://example.com/users/other",
)
self.work = models.Work.objects.create(title="Test Work")
self.book = models.Edition.objects.create(
title="Example Edition",
remote_id="https://example.com/book/1",
parent_work=self.work,
)
self.hashtag_bookclub = models.Hashtag.objects.create(name="#BookClub")
with patch(
"bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"
), patch("bookwyrm.activitystreams.add_status_task.delay"):
self.statuses_bookclub = [
models.Comment.objects.create(
book=self.book, user=self.local_user, content="#BookClub"
),
]
for status in self.statuses_bookclub:
status.mention_hashtags.add(self.hashtag_bookclub)
self.anonymous_user = AnonymousUser
self.anonymous_user.is_authenticated = False
models.SiteSettings.objects.create()
def test_hashtag_page(self):
"""just make sure it loads"""
view = views.Hashtag.as_view()
request = self.factory.get("")
request.user = self.local_user
result = view(request, self.hashtag_bookclub.id)
self.assertIsInstance(result, TemplateResponse)
validate_html(result.render())
self.assertEqual(result.status_code, 200)
self.assertEqual(len(result.context_data["activities"]), 1)
def test_privacy_direct(self):
"""ensure statuses with privacy set to direct are always filtered out"""
view = views.Hashtag.as_view()
request = self.factory.get("")
hashtag = models.Hashtag.objects.create(name="#test")
with patch(
"bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"
), patch("bookwyrm.activitystreams.add_status_task.delay"):
status = models.Comment.objects.create(
user=self.local_user, book=self.book, content="#test", privacy="direct"
)
status.mention_hashtags.add(hashtag)
for user in [
self.local_user,
self.follower_user,
self.other_user,
self.anonymous_user,
]:
request.user = user
result = view(request, hashtag.id)
self.assertNotIn(status, result.context_data["activities"])
def test_privacy_unlisted(self):
"""ensure statuses with privacy set to unlisted are always filtered out"""
view = views.Hashtag.as_view()
request = self.factory.get("")
hashtag = models.Hashtag.objects.create(name="#test")
with patch(
"bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"
), patch("bookwyrm.activitystreams.add_status_task.delay"):
status = models.Comment.objects.create(
user=self.local_user,
book=self.book,
content="#test",
privacy="unlisted",
)
status.mention_hashtags.add(hashtag)
for user in [
self.local_user,
self.follower_user,
self.other_user,
self.anonymous_user,
]:
request.user = user
result = view(request, hashtag.id)
self.assertNotIn(status, result.context_data["activities"])
def test_privacy_following(self):
"""ensure only creator and followers can see statuses with privacy
set to followers"""
view = views.Hashtag.as_view()
request = self.factory.get("")
hashtag = models.Hashtag.objects.create(name="#test")
with patch(
"bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"
), patch("bookwyrm.activitystreams.add_status_task.delay"):
status = models.Comment.objects.create(
user=self.local_user,
book=self.book,
content="#test",
privacy="followers",
)
status.mention_hashtags.add(hashtag)
for user in [self.local_user, self.follower_user]:
request.user = user
result = view(request, hashtag.id)
self.assertIn(status, result.context_data["activities"])
for user in [self.other_user, self.anonymous_user]:
request.user = user
result = view(request, hashtag.id)
self.assertNotIn(status, result.context_data["activities"])
def test_not_found(self):
"""make sure 404 is rendered"""
view = views.Hashtag.as_view()
request = self.factory.get("")
request.user = self.local_user
with self.assertRaises(Http404):
view(request, 42)
def test_empty(self):
"""hashtag without any statuses should still render"""
view = views.Hashtag.as_view()
request = self.factory.get("")
request.user = self.local_user
hashtag_empty = models.Hashtag.objects.create(name="#empty")
result = view(request, hashtag_empty.id)
self.assertIsInstance(result, TemplateResponse)
validate_html(result.render())
self.assertEqual(result.status_code, 200)
self.assertEqual(len(result.context_data["activities"]), 0)
def test_logged_out(self):
"""make sure it loads all activities"""
view = views.Hashtag.as_view()
request = self.factory.get("")
request.user = self.anonymous_user
result = view(request, self.hashtag_bookclub.id)
self.assertIsInstance(result, TemplateResponse)
validate_html(result.render())
self.assertEqual(result.status_code, 200)
self.assertEqual(len(result.context_data["activities"]), 1)

View file

@ -6,7 +6,7 @@ from django.test import TestCase, TransactionTestCase
from django.test.client import RequestFactory
from bookwyrm import forms, models, views
from bookwyrm.views.status import find_mentions
from bookwyrm.views.status import find_mentions, find_or_create_hashtags
from bookwyrm.settings import DOMAIN
from bookwyrm.tests.validate_html import validate_html
@ -95,6 +95,7 @@ class StatusViews(TestCase):
local=True,
localname="nutria",
)
self.existing_hashtag = models.Hashtag.objects.create(name="#existing")
with patch("bookwyrm.models.user.set_remote_server"):
self.remote_user = models.User.objects.create_user(
"rat",
@ -333,6 +334,71 @@ class StatusViews(TestCase):
result = find_mentions(self.local_user, "@beep@beep.com")
self.assertEqual(result, {})
def test_create_status_hashtags(self, *_):
"""#mention a hashtag in a post"""
view = views.CreateStatus.as_view()
form = forms.CommentForm(
{
"content": "this is an #EXISTING hashtag but all uppercase, "
+ "this one is #NewTag.",
"user": self.local_user.id,
"book": self.book.id,
"privacy": "public",
}
)
request = self.factory.post("", form.data)
request.user = self.local_user
view(request, "comment")
status = models.Status.objects.get()
hashtags = models.Hashtag.objects.all()
self.assertEqual(len(hashtags), 2)
self.assertEqual(list(status.mention_hashtags.all()), list(hashtags))
hashtag_exising = models.Hashtag.objects.filter(name="#existing").first()
hashtag_new = models.Hashtag.objects.filter(name="#NewTag").first()
self.assertEqual(
status.content,
"<p>this is an "
+ f'<a href="{hashtag_exising.remote_id}" data-mention="hashtag">'
+ "#EXISTING</a> hashtag but all uppercase, this one is "
+ f'<a href="{hashtag_new.remote_id}" data-mention="hashtag">'
+ "#NewTag</a>.</p>",
)
def test_find_or_create_hashtags(self, *_):
"""detect and look up #hashtags"""
result = find_or_create_hashtags("no hashtag to be found here")
self.assertEqual(result, {})
result = find_or_create_hashtags("#existing")
self.assertEqual(result["#existing"], self.existing_hashtag)
result = find_or_create_hashtags("leading text #existing")
self.assertEqual(result["#existing"], self.existing_hashtag)
result = find_or_create_hashtags("leading #existing trailing")
self.assertEqual(result["#existing"], self.existing_hashtag)
self.assertIsNone(models.Hashtag.objects.filter(name="new").first())
result = find_or_create_hashtags("leading #new trailing")
new_hashtag = models.Hashtag.objects.filter(name="#new").first()
self.assertIsNotNone(new_hashtag)
self.assertEqual(result["#new"], new_hashtag)
result = find_or_create_hashtags("leading #existing #new trailing")
self.assertEqual(result["#existing"], self.existing_hashtag)
self.assertEqual(result["#new"], new_hashtag)
result = find_or_create_hashtags("#Braunbär")
hashtag = models.Hashtag.objects.filter(name="#Braunbär").first()
self.assertEqual(result["#Braunbär"], hashtag)
result = find_or_create_hashtags("#ひぐま")
hashtag = models.Hashtag.objects.filter(name="#ひぐま").first()
self.assertEqual(result["#ひぐま"], hashtag)
def test_format_links_simple_url(self, *_):
"""find and format urls into a tags"""
url = "http://www.fish.com/"

View file

@ -356,6 +356,15 @@ urlpatterns = [
name="notifications",
),
re_path(r"^directory/?", views.Directory.as_view(), name="directory"),
# hashtag
re_path(
r"^hashtag/(?P<hashtag_id>\d+)/?$", views.Hashtag.as_view(), name="hashtag"
),
re_path(
rf"^hashtag/(?P<hashtag_id>\d+){regex.SLUG}/?$",
views.Hashtag.as_view(),
name="hashtag",
),
# Get started
re_path(
r"^get-started/profile/?$",

View file

@ -7,5 +7,6 @@ USERNAME = rf"{LOCALNAME}(@{DOMAIN})?"
STRICT_USERNAME = rf"(\B{STRICT_LOCALNAME}(@{DOMAIN})?\b)"
FULL_USERNAME = rf"{LOCALNAME}@{DOMAIN}\b"
SLUG = r"/s/(?P<slug>[-_a-z0-9]*)"
HASHTAG = r"(#[^!@#$%^&*(),.?\":{}|<>\s]+)"
# should match (BookWyrm/1.0.0; or (BookWyrm/99.1.2;
BOOKWYRM_USER_AGENT = r"\(BookWyrm/[0-9]+\.[0-9]+\.[0-9]+;"

View file

@ -21,6 +21,6 @@ def clean(input_text):
"ol",
"li",
],
attributes=["href", "rel", "src", "alt"],
attributes=["href", "rel", "src", "alt", "data-mention"],
strip=True,
)

View file

@ -130,6 +130,7 @@ from .group import (
accept_membership,
reject_membership,
)
from .hashtag import Hashtag
from .inbox import Inbox
from .interaction import Favorite, Unfavorite, Boost, Unboost
from .isbn import Isbn

54
bookwyrm/views/hashtag.py Normal file
View file

@ -0,0 +1,54 @@
""" listing statuses for a given hashtag """
from django.core.paginator import Paginator
from django.db.models import Q
from django.views import View
from django.shortcuts import get_object_or_404
from django.template.response import TemplateResponse
from bookwyrm import models
from bookwyrm.settings import PAGE_LENGTH
from bookwyrm.views.helpers import maybe_redirect_local_path
# pylint: disable= no-self-use
class Hashtag(View):
"""listing statuses for a given hashtag"""
# pylint: disable=unused-argument
def get(self, request, hashtag_id, slug=None):
"""show hashtag with related statuses"""
hashtag = get_object_or_404(models.Hashtag, id=hashtag_id)
if redirect_local_path := maybe_redirect_local_path(request, hashtag):
return redirect_local_path
activities = (
models.Status.privacy_filter(
request.user,
)
.filter(
Q(mention_hashtags=hashtag),
)
.exclude(
privacy__in=["direct", "unlisted"],
)
.select_related(
"user",
"reply_parent",
"review__book",
"comment__book",
"quotation__book",
)
.prefetch_related(
"mention_books",
"mention_users",
"attachments",
)
)
paginated = Paginator(activities, PAGE_LENGTH)
data = {
"hashtag": hashtag.name,
"activities": paginated.get_page(request.GET.get("page", 1)),
}
return TemplateResponse(request, "hashtag.html", data)

View file

@ -115,6 +115,19 @@ class CreateStatus(View):
if status.reply_parent:
status.mention_users.add(status.reply_parent.user)
# inspect the text for hashtags
for (mention_text, mention_hashtag) in find_or_create_hashtags(content).items():
# add them to status mentions fk
status.mention_hashtags.add(mention_hashtag)
# turn the mention into a link
content = re.sub(
rf"{mention_text}\b(?!@)",
rf'<a href="{mention_hashtag.remote_id}" data-mention="hashtag">'
+ rf"{mention_text}</a>",
content,
)
# deduplicate mentions
status.mention_users.set(set(status.mention_users.all()))
@ -237,6 +250,38 @@ def find_mentions(user, content):
return username_dict
def find_or_create_hashtags(content):
"""detect #hashtags in raw status content
it stores hashtags case-sensitive, but ensures that an existing
hashtag with different case are found and re-used. for example,
an existing #BookWyrm hashtag will be found and used even if the
status content is using #bookwyrm.
"""
if not content:
return {}
found_hashtags = {t.lower(): t for t in re.findall(regex.HASHTAG, content)}
if len(found_hashtags) == 0:
return {}
known_hashtags = {
t.name.lower(): t
for t in models.Hashtag.objects.filter(
Q(name__in=found_hashtags.keys())
).distinct()
}
not_found = found_hashtags.keys() - known_hashtags.keys()
for lower_name in not_found:
tag_name = found_hashtags[lower_name]
mention_hashtag = models.Hashtag(name=tag_name)
mention_hashtag.save()
known_hashtags[lower_name] = mention_hashtag
return {found_hashtags[k]: v for k, v in known_hashtags.items()}
def format_links(content):
"""detect and format links"""
validator = URLValidator()