mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2025-02-28 00:36:46 +00:00
Parse hashtags and store them in CreateStatus view
This commit is contained in:
parent
c402433587
commit
c68304a99b
10 changed files with 174 additions and 2 deletions
|
@ -3,7 +3,7 @@ import inspect
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from .base_activity import ActivityEncoder, Signature, naive_parse
|
from .base_activity import ActivityEncoder, Signature, naive_parse
|
||||||
from .base_activity import Link, Mention
|
from .base_activity import Link, Mention, Hashtag
|
||||||
from .base_activity import ActivitySerializerError, resolve_remote_id
|
from .base_activity import ActivitySerializerError, resolve_remote_id
|
||||||
from .image import Document, Image
|
from .image import Document, Image
|
||||||
from .note import Note, GeneratedNote, Article, Comment, Quotation
|
from .note import Note, GeneratedNote, Article, Comment, Quotation
|
||||||
|
|
|
@ -426,3 +426,10 @@ class Mention(Link):
|
||||||
"""a subtype of Link for mentioning an actor"""
|
"""a subtype of Link for mentioning an actor"""
|
||||||
|
|
||||||
type: str = "Mention"
|
type: str = "Mention"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(init=False)
|
||||||
|
class Hashtag(Link):
|
||||||
|
"""a subtype of Link for mentioning a hashtag"""
|
||||||
|
|
||||||
|
type: str = "Hashtag"
|
||||||
|
|
53
bookwyrm/migrations/0171_hashtag_support.py
Normal file
53
bookwyrm/migrations/0171_hashtag_support.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
# Generated by Django 3.2.16 on 2022-12-17 19:28
|
||||||
|
|
||||||
|
import bookwyrm.models.fields
|
||||||
|
import django.contrib.postgres.fields.citext
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("bookwyrm", "0170_merge_0168_auto_20221205_2331_0169_auto_20221206_0902"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="Hashtag",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.AutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("created_date", models.DateTimeField(auto_now_add=True)),
|
||||||
|
("updated_date", models.DateTimeField(auto_now=True)),
|
||||||
|
(
|
||||||
|
"remote_id",
|
||||||
|
bookwyrm.models.fields.RemoteIdField(
|
||||||
|
max_length=255,
|
||||||
|
null=True,
|
||||||
|
validators=[bookwyrm.models.fields.validate_remote_id],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"name",
|
||||||
|
django.contrib.postgres.fields.citext.CICharField(max_length=256),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"abstract": False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="status",
|
||||||
|
name="mention_hashtags",
|
||||||
|
field=bookwyrm.models.fields.TagField(
|
||||||
|
related_name="mention_hashtag", to="bookwyrm.Hashtag"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
|
@ -34,6 +34,8 @@ from .antispam import EmailBlocklist, IPBlocklist, AutoMod, automod_task
|
||||||
|
|
||||||
from .notification import Notification
|
from .notification import Notification
|
||||||
|
|
||||||
|
from .hashtag import Hashtag
|
||||||
|
|
||||||
cls_members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
|
cls_members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
|
||||||
activity_models = {
|
activity_models = {
|
||||||
c[1].activity_serializer.__name__: c[1]
|
c[1].activity_serializer.__name__: c[1]
|
||||||
|
|
|
@ -7,6 +7,7 @@ from urllib.parse import urljoin
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
from dateutil.parser import ParserError
|
from dateutil.parser import ParserError
|
||||||
from django.contrib.postgres.fields import ArrayField as DjangoArrayField
|
from django.contrib.postgres.fields import ArrayField as DjangoArrayField
|
||||||
|
from django.contrib.postgres.fields import CICharField as DjangoCICharField
|
||||||
from django.core.exceptions import ValidationError
|
from django.core.exceptions import ValidationError
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.forms import ClearableFileInput, ImageField as DjangoImageField
|
from django.forms import ClearableFileInput, ImageField as DjangoImageField
|
||||||
|
@ -546,6 +547,10 @@ class CharField(ActivitypubFieldMixin, models.CharField):
|
||||||
"""activitypub-aware char field"""
|
"""activitypub-aware char field"""
|
||||||
|
|
||||||
|
|
||||||
|
class CICharField(ActivitypubFieldMixin, DjangoCICharField):
|
||||||
|
"""activitypub-aware cichar field"""
|
||||||
|
|
||||||
|
|
||||||
class URLField(ActivitypubFieldMixin, models.URLField):
|
class URLField(ActivitypubFieldMixin, models.URLField):
|
||||||
"""activitypub-aware url field"""
|
"""activitypub-aware url field"""
|
||||||
|
|
||||||
|
|
19
bookwyrm/models/hashtag.py
Normal file
19
bookwyrm/models/hashtag.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
""" model for tags """
|
||||||
|
from bookwyrm import activitypub
|
||||||
|
from .activitypub_mixin import ActivitypubMixin
|
||||||
|
from .base_model import BookWyrmModel
|
||||||
|
from .fields import CICharField
|
||||||
|
|
||||||
|
|
||||||
|
class Hashtag(ActivitypubMixin, BookWyrmModel):
|
||||||
|
"a hashtag which can be used in statuses"
|
||||||
|
|
||||||
|
name = CICharField(
|
||||||
|
max_length=256, blank=False, null=False, activitypub_field="name"
|
||||||
|
)
|
||||||
|
|
||||||
|
name_field = "name"
|
||||||
|
activity_serializer = activitypub.Hashtag
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<{self.__class__} id={self.id} name={self.name}>"
|
|
@ -34,6 +34,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
|
||||||
raw_content = models.TextField(blank=True, null=True)
|
raw_content = models.TextField(blank=True, null=True)
|
||||||
mention_users = fields.TagField("User", related_name="mention_user")
|
mention_users = fields.TagField("User", related_name="mention_user")
|
||||||
mention_books = fields.TagField("Edition", related_name="mention_book")
|
mention_books = fields.TagField("Edition", related_name="mention_book")
|
||||||
|
mention_hashtags = fields.TagField("Hashtag", related_name="mention_hashtag")
|
||||||
local = models.BooleanField(default=True)
|
local = models.BooleanField(default=True)
|
||||||
content_warning = fields.CharField(
|
content_warning = fields.CharField(
|
||||||
max_length=500, blank=True, null=True, activitypub_field="summary"
|
max_length=500, blank=True, null=True, activitypub_field="summary"
|
||||||
|
|
|
@ -6,7 +6,7 @@ from django.test import TestCase, TransactionTestCase
|
||||||
from django.test.client import RequestFactory
|
from django.test.client import RequestFactory
|
||||||
|
|
||||||
from bookwyrm import forms, models, views
|
from bookwyrm import forms, models, views
|
||||||
from bookwyrm.views.status import find_mentions
|
from bookwyrm.views.status import find_mentions, find_hashtags
|
||||||
from bookwyrm.settings import DOMAIN
|
from bookwyrm.settings import DOMAIN
|
||||||
|
|
||||||
from bookwyrm.tests.validate_html import validate_html
|
from bookwyrm.tests.validate_html import validate_html
|
||||||
|
@ -95,6 +95,7 @@ class StatusViews(TestCase):
|
||||||
local=True,
|
local=True,
|
||||||
localname="nutria",
|
localname="nutria",
|
||||||
)
|
)
|
||||||
|
self.existing_hashtag = models.Hashtag.objects.create(name="#existing")
|
||||||
with patch("bookwyrm.models.user.set_remote_server"):
|
with patch("bookwyrm.models.user.set_remote_server"):
|
||||||
self.remote_user = models.User.objects.create_user(
|
self.remote_user = models.User.objects.create_user(
|
||||||
"rat",
|
"rat",
|
||||||
|
@ -333,6 +334,60 @@ class StatusViews(TestCase):
|
||||||
result = find_mentions(self.local_user, "@beep@beep.com")
|
result = find_mentions(self.local_user, "@beep@beep.com")
|
||||||
self.assertEqual(result, {})
|
self.assertEqual(result, {})
|
||||||
|
|
||||||
|
def test_create_status_hashtags(self, *_):
|
||||||
|
"""#mention a hashtag in a post"""
|
||||||
|
view = views.CreateStatus.as_view()
|
||||||
|
form = forms.CommentForm(
|
||||||
|
{
|
||||||
|
"content": "this is an #existing hashtag, this is a #new hashtag",
|
||||||
|
"user": self.local_user.id,
|
||||||
|
"book": self.book.id,
|
||||||
|
"privacy": "public",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
request = self.factory.post("", form.data)
|
||||||
|
request.user = self.local_user
|
||||||
|
|
||||||
|
view(request, "comment")
|
||||||
|
status = models.Status.objects.get()
|
||||||
|
|
||||||
|
hashtags = models.Hashtag.objects.all()
|
||||||
|
self.assertEqual(len(hashtags), 2)
|
||||||
|
self.assertEqual(list(status.mention_hashtags.all()), list(hashtags))
|
||||||
|
# TODO: assert tag is linked to a page listing all statuses by tag
|
||||||
|
|
||||||
|
def test_find_hashtags(self, *_):
|
||||||
|
"""detect and look up #hashtags"""
|
||||||
|
result = find_hashtags("no hashtag to be found here")
|
||||||
|
self.assertEqual(result, {})
|
||||||
|
|
||||||
|
result = find_hashtags("#existing")
|
||||||
|
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||||
|
|
||||||
|
result = find_hashtags("leading text #existing")
|
||||||
|
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||||
|
|
||||||
|
result = find_hashtags("leading #existing trailing")
|
||||||
|
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||||
|
|
||||||
|
self.assertIsNone(models.Hashtag.objects.filter(name="new").first())
|
||||||
|
result = find_hashtags("leading #new trailing")
|
||||||
|
new_hashtag = models.Hashtag.objects.filter(name="#new").first()
|
||||||
|
self.assertIsNotNone(new_hashtag)
|
||||||
|
self.assertEqual(result["#new"], new_hashtag)
|
||||||
|
|
||||||
|
result = find_hashtags("leading #existing #new trailing")
|
||||||
|
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||||
|
self.assertEqual(result["#new"], new_hashtag)
|
||||||
|
|
||||||
|
result = find_hashtags("#Braunbär")
|
||||||
|
hashtag = models.Hashtag.objects.filter(name="#Braunbär").first()
|
||||||
|
self.assertEqual(result["#Braunbär"], hashtag)
|
||||||
|
|
||||||
|
result = find_hashtags("#ひぐま")
|
||||||
|
hashtag = models.Hashtag.objects.filter(name="#ひぐま").first()
|
||||||
|
self.assertEqual(result["#ひぐま"], hashtag)
|
||||||
|
|
||||||
def test_format_links_simple_url(self, *_):
|
def test_format_links_simple_url(self, *_):
|
||||||
"""find and format urls into a tags"""
|
"""find and format urls into a tags"""
|
||||||
url = "http://www.fish.com/"
|
url = "http://www.fish.com/"
|
||||||
|
|
|
@ -7,5 +7,6 @@ USERNAME = rf"{LOCALNAME}(@{DOMAIN})?"
|
||||||
STRICT_USERNAME = rf"(\B{STRICT_LOCALNAME}(@{DOMAIN})?\b)"
|
STRICT_USERNAME = rf"(\B{STRICT_LOCALNAME}(@{DOMAIN})?\b)"
|
||||||
FULL_USERNAME = rf"{LOCALNAME}@{DOMAIN}\b"
|
FULL_USERNAME = rf"{LOCALNAME}@{DOMAIN}\b"
|
||||||
SLUG = r"/s/(?P<slug>[-_a-z0-9]*)"
|
SLUG = r"/s/(?P<slug>[-_a-z0-9]*)"
|
||||||
|
HASHTAG = r"(#[^!@#$%^&*(),.?\":{}|<>\s]+)"
|
||||||
# should match (BookWyrm/1.0.0; or (BookWyrm/99.1.2;
|
# should match (BookWyrm/1.0.0; or (BookWyrm/99.1.2;
|
||||||
BOOKWYRM_USER_AGENT = r"\(BookWyrm/[0-9]+\.[0-9]+\.[0-9]+;"
|
BOOKWYRM_USER_AGENT = r"\(BookWyrm/[0-9]+\.[0-9]+\.[0-9]+;"
|
||||||
|
|
|
@ -115,6 +115,14 @@ class CreateStatus(View):
|
||||||
if status.reply_parent:
|
if status.reply_parent:
|
||||||
status.mention_users.add(status.reply_parent.user)
|
status.mention_users.add(status.reply_parent.user)
|
||||||
|
|
||||||
|
# inspect the text for hashtags
|
||||||
|
for (tag, mention_hashtag) in find_hashtags(content).items():
|
||||||
|
# add them to status mentions fk
|
||||||
|
status.mention_hashtags.add(mention_hashtag)
|
||||||
|
|
||||||
|
# TODO: turn the mention into a link
|
||||||
|
content = content
|
||||||
|
|
||||||
# deduplicate mentions
|
# deduplicate mentions
|
||||||
status.mention_users.set(set(status.mention_users.all()))
|
status.mention_users.set(set(status.mention_users.all()))
|
||||||
|
|
||||||
|
@ -237,6 +245,27 @@ def find_mentions(user, content):
|
||||||
return username_dict
|
return username_dict
|
||||||
|
|
||||||
|
|
||||||
|
def find_hashtags(content):
|
||||||
|
"""detect #hashtags in raw status content"""
|
||||||
|
if not content:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
hashtags = re.findall(regex.HASHTAG, content)
|
||||||
|
if len(hashtags) == 0:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
known_tags = models.Hashtag.objects.filter(Q(name__in=hashtags)).distinct()
|
||||||
|
hashtag_dict = {t.name: t for t in known_tags}
|
||||||
|
|
||||||
|
not_found = set(hashtags) - set(hashtag_dict.keys())
|
||||||
|
for tag_name in not_found:
|
||||||
|
mention_hashtag = models.Hashtag(name=tag_name)
|
||||||
|
mention_hashtag.save()
|
||||||
|
hashtag_dict[mention_hashtag.name] = mention_hashtag
|
||||||
|
|
||||||
|
return hashtag_dict
|
||||||
|
|
||||||
|
|
||||||
def format_links(content):
|
def format_links(content):
|
||||||
"""detect and format links"""
|
"""detect and format links"""
|
||||||
validator = URLValidator()
|
validator = URLValidator()
|
||||||
|
|
Loading…
Reference in a new issue