mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-26 19:41:11 +00:00
Enable finding existing hashtags case-insensitive
We should store hashtags case-sensitive, but ensures that an existing hashtag with different case are found and re-used. for example, an existing #BookWyrm hashtag will be found and used even if the status content is using #bookwyrm.
This commit is contained in:
parent
f3334b1550
commit
9ca9883e0b
4 changed files with 41 additions and 27 deletions
|
@ -65,6 +65,7 @@ class Note(ActivityObject):
|
|||
rf'(<a href=")[^"]*(" data-mention="hashtag">{hashtag.name}</a>)',
|
||||
rf"\1{hashtag.remote_id}\2",
|
||||
instance.content,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
if instance.content != updated_content:
|
||||
instance.content = updated_content
|
||||
|
|
|
@ -34,7 +34,7 @@ class Note(TestCase):
|
|||
inReplyToBook=self.book.remote_id,
|
||||
content="<p>This is interesting "
|
||||
+ '<a href="https://test-instance.org/hashtag/2" data-mention="hashtag">'
|
||||
+ "#BookClub</a></p>",
|
||||
+ "#bookclub</a></p>",
|
||||
published="2023-02-17T23:12:59.398030+00:00",
|
||||
to=[],
|
||||
cc=[],
|
||||
|
@ -60,5 +60,5 @@ class Note(TestCase):
|
|||
instance.content,
|
||||
"<p>This is interesting "
|
||||
+ f'<a href="{hashtag.remote_id}" data-mention="hashtag">'
|
||||
+ "#BookClub</a></p>",
|
||||
+ "#bookclub</a></p>",
|
||||
)
|
||||
|
|
|
@ -6,7 +6,7 @@ from django.test import TestCase, TransactionTestCase
|
|||
from django.test.client import RequestFactory
|
||||
|
||||
from bookwyrm import forms, models, views
|
||||
from bookwyrm.views.status import find_mentions, find_hashtags
|
||||
from bookwyrm.views.status import find_mentions, find_or_create_hashtags
|
||||
from bookwyrm.settings import DOMAIN
|
||||
|
||||
from bookwyrm.tests.validate_html import validate_html
|
||||
|
@ -339,7 +339,8 @@ class StatusViews(TestCase):
|
|||
view = views.CreateStatus.as_view()
|
||||
form = forms.CommentForm(
|
||||
{
|
||||
"content": "this is an #existing hashtag, this one is #new.",
|
||||
"content": "this is an #EXISTING hashtag but all uppercase, "
|
||||
+ "this one is #NewTag.",
|
||||
"user": self.local_user.id,
|
||||
"book": self.book.id,
|
||||
"privacy": "public",
|
||||
|
@ -356,44 +357,45 @@ class StatusViews(TestCase):
|
|||
self.assertEqual(list(status.mention_hashtags.all()), list(hashtags))
|
||||
|
||||
hashtag_exising = models.Hashtag.objects.filter(name="#existing").first()
|
||||
hashtag_new = models.Hashtag.objects.filter(name="#new").first()
|
||||
hashtag_new = models.Hashtag.objects.filter(name="#NewTag").first()
|
||||
self.assertEqual(
|
||||
status.content,
|
||||
"<p>this is an "
|
||||
+ f'<a href="{hashtag_exising.remote_id}" data-mention="hashtag">'
|
||||
+ "#existing</a> hashtag, this one is "
|
||||
+ f'<a href="{hashtag_new.remote_id}" data-mention="hashtag">#new</a>.</p>',
|
||||
+ "#EXISTING</a> hashtag but all uppercase, this one is "
|
||||
+ f'<a href="{hashtag_new.remote_id}" data-mention="hashtag">'
|
||||
+ "#NewTag</a>.</p>",
|
||||
)
|
||||
|
||||
def test_find_hashtags(self, *_):
|
||||
def test_find_or_create_hashtags(self, *_):
|
||||
"""detect and look up #hashtags"""
|
||||
result = find_hashtags("no hashtag to be found here")
|
||||
result = find_or_create_hashtags("no hashtag to be found here")
|
||||
self.assertEqual(result, {})
|
||||
|
||||
result = find_hashtags("#existing")
|
||||
result = find_or_create_hashtags("#existing")
|
||||
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||
|
||||
result = find_hashtags("leading text #existing")
|
||||
result = find_or_create_hashtags("leading text #existing")
|
||||
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||
|
||||
result = find_hashtags("leading #existing trailing")
|
||||
result = find_or_create_hashtags("leading #existing trailing")
|
||||
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||
|
||||
self.assertIsNone(models.Hashtag.objects.filter(name="new").first())
|
||||
result = find_hashtags("leading #new trailing")
|
||||
result = find_or_create_hashtags("leading #new trailing")
|
||||
new_hashtag = models.Hashtag.objects.filter(name="#new").first()
|
||||
self.assertIsNotNone(new_hashtag)
|
||||
self.assertEqual(result["#new"], new_hashtag)
|
||||
|
||||
result = find_hashtags("leading #existing #new trailing")
|
||||
result = find_or_create_hashtags("leading #existing #new trailing")
|
||||
self.assertEqual(result["#existing"], self.existing_hashtag)
|
||||
self.assertEqual(result["#new"], new_hashtag)
|
||||
|
||||
result = find_hashtags("#Braunbär")
|
||||
result = find_or_create_hashtags("#Braunbär")
|
||||
hashtag = models.Hashtag.objects.filter(name="#Braunbär").first()
|
||||
self.assertEqual(result["#Braunbär"], hashtag)
|
||||
|
||||
result = find_hashtags("#ひぐま")
|
||||
result = find_or_create_hashtags("#ひぐま")
|
||||
hashtag = models.Hashtag.objects.filter(name="#ひぐま").first()
|
||||
self.assertEqual(result["#ひぐま"], hashtag)
|
||||
|
||||
|
|
|
@ -116,7 +116,7 @@ class CreateStatus(View):
|
|||
status.mention_users.add(status.reply_parent.user)
|
||||
|
||||
# inspect the text for hashtags
|
||||
for (mention_text, mention_hashtag) in find_hashtags(content).items():
|
||||
for (mention_text, mention_hashtag) in find_or_create_hashtags(content).items():
|
||||
# add them to status mentions fk
|
||||
status.mention_hashtags.add(mention_hashtag)
|
||||
|
||||
|
@ -250,25 +250,36 @@ def find_mentions(user, content):
|
|||
return username_dict
|
||||
|
||||
|
||||
def find_hashtags(content):
|
||||
"""detect #hashtags in raw status content"""
|
||||
def find_or_create_hashtags(content):
|
||||
"""detect #hashtags in raw status content
|
||||
|
||||
it stores hashtags case-sensitive, but ensures that an existing
|
||||
hashtag with different case are found and re-used. for example,
|
||||
an existing #BookWyrm hashtag will be found and used even if the
|
||||
status content is using #bookwyrm.
|
||||
"""
|
||||
if not content:
|
||||
return {}
|
||||
|
||||
hashtags = re.findall(regex.HASHTAG, content)
|
||||
if len(hashtags) == 0:
|
||||
found_hashtags = {t.lower(): t for t in re.findall(regex.HASHTAG, content)}
|
||||
if len(found_hashtags) == 0:
|
||||
return {}
|
||||
|
||||
known_tags = models.Hashtag.objects.filter(Q(name__in=hashtags)).distinct()
|
||||
hashtag_dict = {t.name: t for t in known_tags}
|
||||
known_hashtags = {
|
||||
t.name.lower(): t
|
||||
for t in models.Hashtag.objects.filter(
|
||||
Q(name__in=found_hashtags.keys())
|
||||
).distinct()
|
||||
}
|
||||
|
||||
not_found = set(hashtags) - set(hashtag_dict.keys())
|
||||
for tag_name in not_found:
|
||||
not_found = found_hashtags.keys() - known_hashtags.keys()
|
||||
for lower_name in not_found:
|
||||
tag_name = found_hashtags[lower_name]
|
||||
mention_hashtag = models.Hashtag(name=tag_name)
|
||||
mention_hashtag.save()
|
||||
hashtag_dict[mention_hashtag.name] = mention_hashtag
|
||||
known_hashtags[lower_name] = mention_hashtag
|
||||
|
||||
return hashtag_dict
|
||||
return {found_hashtags[k]: v for k, v in known_hashtags.items()}
|
||||
|
||||
|
||||
def format_links(content):
|
||||
|
|
Loading…
Reference in a new issue