Parse hashtags and store them in CreateStatus view

This commit is contained in:
Christof Dorner 2022-12-17 20:13:57 +01:00
parent c402433587
commit c68304a99b
10 changed files with 174 additions and 2 deletions

View file

@ -3,7 +3,7 @@ import inspect
import sys
from .base_activity import ActivityEncoder, Signature, naive_parse
from .base_activity import Link, Mention
from .base_activity import Link, Mention, Hashtag
from .base_activity import ActivitySerializerError, resolve_remote_id
from .image import Document, Image
from .note import Note, GeneratedNote, Article, Comment, Quotation

View file

@ -426,3 +426,10 @@ class Mention(Link):
"""a subtype of Link for mentioning an actor"""
type: str = "Mention"
@dataclass(init=False)
class Hashtag(Link):
"""a subtype of Link for mentioning a hashtag"""
type: str = "Hashtag"

View file

@ -0,0 +1,53 @@
# Generated by Django 3.2.16 on 2022-12-17 19:28
import bookwyrm.models.fields
import django.contrib.postgres.fields.citext
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0170_merge_0168_auto_20221205_2331_0169_auto_20221206_0902"),
]
operations = [
migrations.CreateModel(
name="Hashtag",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("created_date", models.DateTimeField(auto_now_add=True)),
("updated_date", models.DateTimeField(auto_now=True)),
(
"remote_id",
bookwyrm.models.fields.RemoteIdField(
max_length=255,
null=True,
validators=[bookwyrm.models.fields.validate_remote_id],
),
),
(
"name",
django.contrib.postgres.fields.citext.CICharField(max_length=256),
),
],
options={
"abstract": False,
},
),
migrations.AddField(
model_name="status",
name="mention_hashtags",
field=bookwyrm.models.fields.TagField(
related_name="mention_hashtag", to="bookwyrm.Hashtag"
),
),
]

View file

@ -34,6 +34,8 @@ from .antispam import EmailBlocklist, IPBlocklist, AutoMod, automod_task
from .notification import Notification
from .hashtag import Hashtag
cls_members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
activity_models = {
c[1].activity_serializer.__name__: c[1]

View file

@ -7,6 +7,7 @@ from urllib.parse import urljoin
import dateutil.parser
from dateutil.parser import ParserError
from django.contrib.postgres.fields import ArrayField as DjangoArrayField
from django.contrib.postgres.fields import CICharField as DjangoCICharField
from django.core.exceptions import ValidationError
from django.db import models
from django.forms import ClearableFileInput, ImageField as DjangoImageField
@ -546,6 +547,10 @@ class CharField(ActivitypubFieldMixin, models.CharField):
"""activitypub-aware char field"""
class CICharField(ActivitypubFieldMixin, DjangoCICharField):
"""activitypub-aware cichar field"""
class URLField(ActivitypubFieldMixin, models.URLField):
"""activitypub-aware url field"""

View file

@ -0,0 +1,19 @@
""" model for tags """
from bookwyrm import activitypub
from .activitypub_mixin import ActivitypubMixin
from .base_model import BookWyrmModel
from .fields import CICharField
class Hashtag(ActivitypubMixin, BookWyrmModel):
"a hashtag which can be used in statuses"
name = CICharField(
max_length=256, blank=False, null=False, activitypub_field="name"
)
name_field = "name"
activity_serializer = activitypub.Hashtag
def __repr__(self):
return f"<{self.__class__} id={self.id} name={self.name}>"

View file

@ -34,6 +34,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
raw_content = models.TextField(blank=True, null=True)
mention_users = fields.TagField("User", related_name="mention_user")
mention_books = fields.TagField("Edition", related_name="mention_book")
mention_hashtags = fields.TagField("Hashtag", related_name="mention_hashtag")
local = models.BooleanField(default=True)
content_warning = fields.CharField(
max_length=500, blank=True, null=True, activitypub_field="summary"

View file

@ -6,7 +6,7 @@ from django.test import TestCase, TransactionTestCase
from django.test.client import RequestFactory
from bookwyrm import forms, models, views
from bookwyrm.views.status import find_mentions
from bookwyrm.views.status import find_mentions, find_hashtags
from bookwyrm.settings import DOMAIN
from bookwyrm.tests.validate_html import validate_html
@ -95,6 +95,7 @@ class StatusViews(TestCase):
local=True,
localname="nutria",
)
self.existing_hashtag = models.Hashtag.objects.create(name="#existing")
with patch("bookwyrm.models.user.set_remote_server"):
self.remote_user = models.User.objects.create_user(
"rat",
@ -333,6 +334,60 @@ class StatusViews(TestCase):
result = find_mentions(self.local_user, "@beep@beep.com")
self.assertEqual(result, {})
def test_create_status_hashtags(self, *_):
"""#mention a hashtag in a post"""
view = views.CreateStatus.as_view()
form = forms.CommentForm(
{
"content": "this is an #existing hashtag, this is a #new hashtag",
"user": self.local_user.id,
"book": self.book.id,
"privacy": "public",
}
)
request = self.factory.post("", form.data)
request.user = self.local_user
view(request, "comment")
status = models.Status.objects.get()
hashtags = models.Hashtag.objects.all()
self.assertEqual(len(hashtags), 2)
self.assertEqual(list(status.mention_hashtags.all()), list(hashtags))
# TODO: assert tag is linked to a page listing all statuses by tag
def test_find_hashtags(self, *_):
"""detect and look up #hashtags"""
result = find_hashtags("no hashtag to be found here")
self.assertEqual(result, {})
result = find_hashtags("#existing")
self.assertEqual(result["#existing"], self.existing_hashtag)
result = find_hashtags("leading text #existing")
self.assertEqual(result["#existing"], self.existing_hashtag)
result = find_hashtags("leading #existing trailing")
self.assertEqual(result["#existing"], self.existing_hashtag)
self.assertIsNone(models.Hashtag.objects.filter(name="new").first())
result = find_hashtags("leading #new trailing")
new_hashtag = models.Hashtag.objects.filter(name="#new").first()
self.assertIsNotNone(new_hashtag)
self.assertEqual(result["#new"], new_hashtag)
result = find_hashtags("leading #existing #new trailing")
self.assertEqual(result["#existing"], self.existing_hashtag)
self.assertEqual(result["#new"], new_hashtag)
result = find_hashtags("#Braunbär")
hashtag = models.Hashtag.objects.filter(name="#Braunbär").first()
self.assertEqual(result["#Braunbär"], hashtag)
result = find_hashtags("#ひぐま")
hashtag = models.Hashtag.objects.filter(name="#ひぐま").first()
self.assertEqual(result["#ひぐま"], hashtag)
def test_format_links_simple_url(self, *_):
"""find and format urls into a tags"""
url = "http://www.fish.com/"

View file

@ -7,5 +7,6 @@ USERNAME = rf"{LOCALNAME}(@{DOMAIN})?"
STRICT_USERNAME = rf"(\B{STRICT_LOCALNAME}(@{DOMAIN})?\b)"
FULL_USERNAME = rf"{LOCALNAME}@{DOMAIN}\b"
SLUG = r"/s/(?P<slug>[-_a-z0-9]*)"
HASHTAG = r"(#[^!@#$%^&*(),.?\":{}|<>\s]+)"
# should match (BookWyrm/1.0.0; or (BookWyrm/99.1.2;
BOOKWYRM_USER_AGENT = r"\(BookWyrm/[0-9]+\.[0-9]+\.[0-9]+;"

View file

@ -115,6 +115,14 @@ class CreateStatus(View):
if status.reply_parent:
status.mention_users.add(status.reply_parent.user)
# inspect the text for hashtags
for (tag, mention_hashtag) in find_hashtags(content).items():
# add them to status mentions fk
status.mention_hashtags.add(mention_hashtag)
# TODO: turn the mention into a link
content = content
# deduplicate mentions
status.mention_users.set(set(status.mention_users.all()))
@ -237,6 +245,27 @@ def find_mentions(user, content):
return username_dict
def find_hashtags(content):
"""detect #hashtags in raw status content"""
if not content:
return {}
hashtags = re.findall(regex.HASHTAG, content)
if len(hashtags) == 0:
return {}
known_tags = models.Hashtag.objects.filter(Q(name__in=hashtags)).distinct()
hashtag_dict = {t.name: t for t in known_tags}
not_found = set(hashtags) - set(hashtag_dict.keys())
for tag_name in not_found:
mention_hashtag = models.Hashtag(name=tag_name)
mention_hashtag.save()
hashtag_dict[mention_hashtag.name] = mention_hashtag
return hashtag_dict
def format_links(content):
"""detect and format links"""
validator = URLValidator()