mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-27 18:40:31 +00:00
Parse hashtags and store them in CreateStatus view
This commit is contained in:
parent
c402433587
commit
c68304a99b
10 changed files with 174 additions and 2 deletions
|
@ -3,7 +3,7 @@ import inspect
|
|||
import sys
|
||||
|
||||
from .base_activity import ActivityEncoder, Signature, naive_parse
|
||||
from .base_activity import Link, Mention
|
||||
from .base_activity import Link, Mention, Hashtag
|
||||
from .base_activity import ActivitySerializerError, resolve_remote_id
|
||||
from .image import Document, Image
|
||||
from .note import Note, GeneratedNote, Article, Comment, Quotation
|
||||
|
|
|
@ -426,3 +426,10 @@ class Mention(Link):
|
|||
"""a subtype of Link for mentioning an actor"""
|
||||
|
||||
type: str = "Mention"
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
class Hashtag(Link):
|
||||
"""a subtype of Link for mentioning a hashtag"""
|
||||
|
||||
type: str = "Hashtag"
|
||||
|
|
53
bookwyrm/migrations/0171_hashtag_support.py
Normal file
53
bookwyrm/migrations/0171_hashtag_support.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
# Generated by Django 3.2.16 on 2022-12-17 19:28
|
||||
|
||||
import bookwyrm.models.fields
|
||||
import django.contrib.postgres.fields.citext
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("bookwyrm", "0170_merge_0168_auto_20221205_2331_0169_auto_20221206_0902"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="Hashtag",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("created_date", models.DateTimeField(auto_now_add=True)),
|
||||
("updated_date", models.DateTimeField(auto_now=True)),
|
||||
(
|
||||
"remote_id",
|
||||
bookwyrm.models.fields.RemoteIdField(
|
||||
max_length=255,
|
||||
null=True,
|
||||
validators=[bookwyrm.models.fields.validate_remote_id],
|
||||
),
|
||||
),
|
||||
(
|
||||
"name",
|
||||
django.contrib.postgres.fields.citext.CICharField(max_length=256),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"abstract": False,
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="status",
|
||||
name="mention_hashtags",
|
||||
field=bookwyrm.models.fields.TagField(
|
||||
related_name="mention_hashtag", to="bookwyrm.Hashtag"
|
||||
),
|
||||
),
|
||||
]
|
|
@ -34,6 +34,8 @@ from .antispam import EmailBlocklist, IPBlocklist, AutoMod, automod_task
|
|||
|
||||
from .notification import Notification
|
||||
|
||||
from .hashtag import Hashtag
|
||||
|
||||
cls_members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
|
||||
activity_models = {
|
||||
c[1].activity_serializer.__name__: c[1]
|
||||
|
|
|
@ -7,6 +7,7 @@ from urllib.parse import urljoin
|
|||
import dateutil.parser
|
||||
from dateutil.parser import ParserError
|
||||
from django.contrib.postgres.fields import ArrayField as DjangoArrayField
|
||||
from django.contrib.postgres.fields import CICharField as DjangoCICharField
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.db import models
|
||||
from django.forms import ClearableFileInput, ImageField as DjangoImageField
|
||||
|
@ -546,6 +547,10 @@ class CharField(ActivitypubFieldMixin, models.CharField):
|
|||
"""activitypub-aware char field"""
|
||||
|
||||
|
||||
class CICharField(ActivitypubFieldMixin, DjangoCICharField):
|
||||
"""activitypub-aware cichar field"""
|
||||
|
||||
|
||||
class URLField(ActivitypubFieldMixin, models.URLField):
|
||||
"""activitypub-aware url field"""
|
||||
|
||||
|
|
19
bookwyrm/models/hashtag.py
Normal file
19
bookwyrm/models/hashtag.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
""" model for tags """
|
||||
from bookwyrm import activitypub
|
||||
from .activitypub_mixin import ActivitypubMixin
|
||||
from .base_model import BookWyrmModel
|
||||
from .fields import CICharField
|
||||
|
||||
|
||||
class Hashtag(ActivitypubMixin, BookWyrmModel):
|
||||
"a hashtag which can be used in statuses"
|
||||
|
||||
name = CICharField(
|
||||
max_length=256, blank=False, null=False, activitypub_field="name"
|
||||
)
|
||||
|
||||
name_field = "name"
|
||||
activity_serializer = activitypub.Hashtag
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{self.__class__} id={self.id} name={self.name}>"
|
|
@ -34,6 +34,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
|
|||
raw_content = models.TextField(blank=True, null=True)
|
||||
mention_users = fields.TagField("User", related_name="mention_user")
|
||||
mention_books = fields.TagField("Edition", related_name="mention_book")
|
||||
mention_hashtags = fields.TagField("Hashtag", related_name="mention_hashtag")
|
||||
local = models.BooleanField(default=True)
|
||||
content_warning = fields.CharField(
|
||||
max_length=500, blank=True, null=True, activitypub_field="summary"
|
||||
|
|
|
@ -6,7 +6,7 @@ from django.test import TestCase, TransactionTestCase
|
|||
from django.test.client import RequestFactory
|
||||
|
||||
from bookwyrm import forms, models, views
|
||||
from bookwyrm.views.status import find_mentions
|
||||
from bookwyrm.views.status import find_mentions, find_hashtags
|
||||
from bookwyrm.settings import DOMAIN
|
||||
|
||||
from bookwyrm.tests.validate_html import validate_html
|
||||
|
@ -95,6 +95,7 @@ class StatusViews(TestCase):
|
|||
local=True,
|
||||
localname="nutria",
|
||||
)
|
||||
self.existing_hashtag = models.Hashtag.objects.create(name="#existing")
|
||||
with patch("bookwyrm.models.user.set_remote_server"):
|
||||
self.remote_user = models.User.objects.create_user(
|
||||
"rat",
|
||||
|
@ -333,6 +334,60 @@ class StatusViews(TestCase):
|
|||
result = find_mentions(self.local_user, "@beep@beep.com")
|
||||
self.assertEqual(result, {})
|
||||
|
||||
def test_create_status_hashtags(self, *_):
|
||||
"""#mention a hashtag in a post"""
|
||||
view = views.CreateStatus.as_view()
|
||||
form = forms.CommentForm(
|
||||
{
|
||||
"content": "this is an #existing hashtag, this is a #new hashtag",
|
||||
"user": self.local_user.id,
|
||||
"book": self.book.id,
|
||||
"privacy": "public",
|
||||
}
|
||||
)
|
||||
request = self.factory.post("", form.data)
|
||||
request.user = self.local_user
|
||||
|
||||
view(request, "comment")
|
||||
status = models.Status.objects.get()
|
||||
|
||||
hashtags = models.Hashtag.objects.all()
|
||||
self.assertEqual(len(hashtags), 2)
|
||||
self.assertEqual(list(status.mention_hashtags.all()), list(hashtags))
|
||||
# TODO: assert tag is linked to a page listing all statuses by tag
|
||||
|
||||
def test_find_hashtags(self, *_):
|
||||
"""detect and look up #hashtags"""
|
||||
result = find_hashtags("no hashtag to be found here")
|
||||
self.assertEqual(result, {})
|
||||
|
||||
result = find_hashtags("#existing")
|
||||
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||
|
||||
result = find_hashtags("leading text #existing")
|
||||
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||
|
||||
result = find_hashtags("leading #existing trailing")
|
||||
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||
|
||||
self.assertIsNone(models.Hashtag.objects.filter(name="new").first())
|
||||
result = find_hashtags("leading #new trailing")
|
||||
new_hashtag = models.Hashtag.objects.filter(name="#new").first()
|
||||
self.assertIsNotNone(new_hashtag)
|
||||
self.assertEqual(result["#new"], new_hashtag)
|
||||
|
||||
result = find_hashtags("leading #existing #new trailing")
|
||||
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||
self.assertEqual(result["#new"], new_hashtag)
|
||||
|
||||
result = find_hashtags("#Braunbär")
|
||||
hashtag = models.Hashtag.objects.filter(name="#Braunbär").first()
|
||||
self.assertEqual(result["#Braunbär"], hashtag)
|
||||
|
||||
result = find_hashtags("#ひぐま")
|
||||
hashtag = models.Hashtag.objects.filter(name="#ひぐま").first()
|
||||
self.assertEqual(result["#ひぐま"], hashtag)
|
||||
|
||||
def test_format_links_simple_url(self, *_):
|
||||
"""find and format urls into a tags"""
|
||||
url = "http://www.fish.com/"
|
||||
|
|
|
@ -7,5 +7,6 @@ USERNAME = rf"{LOCALNAME}(@{DOMAIN})?"
|
|||
STRICT_USERNAME = rf"(\B{STRICT_LOCALNAME}(@{DOMAIN})?\b)"
|
||||
FULL_USERNAME = rf"{LOCALNAME}@{DOMAIN}\b"
|
||||
SLUG = r"/s/(?P<slug>[-_a-z0-9]*)"
|
||||
HASHTAG = r"(#[^!@#$%^&*(),.?\":{}|<>\s]+)"
|
||||
# should match (BookWyrm/1.0.0; or (BookWyrm/99.1.2;
|
||||
BOOKWYRM_USER_AGENT = r"\(BookWyrm/[0-9]+\.[0-9]+\.[0-9]+;"
|
||||
|
|
|
@ -115,6 +115,14 @@ class CreateStatus(View):
|
|||
if status.reply_parent:
|
||||
status.mention_users.add(status.reply_parent.user)
|
||||
|
||||
# inspect the text for hashtags
|
||||
for (tag, mention_hashtag) in find_hashtags(content).items():
|
||||
# add them to status mentions fk
|
||||
status.mention_hashtags.add(mention_hashtag)
|
||||
|
||||
# TODO: turn the mention into a link
|
||||
content = content
|
||||
|
||||
# deduplicate mentions
|
||||
status.mention_users.set(set(status.mention_users.all()))
|
||||
|
||||
|
@ -237,6 +245,27 @@ def find_mentions(user, content):
|
|||
return username_dict
|
||||
|
||||
|
||||
def find_hashtags(content):
|
||||
"""detect #hashtags in raw status content"""
|
||||
if not content:
|
||||
return {}
|
||||
|
||||
hashtags = re.findall(regex.HASHTAG, content)
|
||||
if len(hashtags) == 0:
|
||||
return {}
|
||||
|
||||
known_tags = models.Hashtag.objects.filter(Q(name__in=hashtags)).distinct()
|
||||
hashtag_dict = {t.name: t for t in known_tags}
|
||||
|
||||
not_found = set(hashtags) - set(hashtag_dict.keys())
|
||||
for tag_name in not_found:
|
||||
mention_hashtag = models.Hashtag(name=tag_name)
|
||||
mention_hashtag.save()
|
||||
hashtag_dict[mention_hashtag.name] = mention_hashtag
|
||||
|
||||
return hashtag_dict
|
||||
|
||||
|
||||
def format_links(content):
|
||||
"""detect and format links"""
|
||||
validator = URLValidator()
|
||||
|
|
Loading…
Reference in a new issue