[feature] Allow partial-word hashtags using non-breaking spaces (#3606)

* [feature] Allow partial-word hashtags using non-breaking spaces

* update docs
This commit is contained in:
tobi 2024-12-08 16:03:00 +01:00 committed by GitHub
parent 642f5230e6
commit 9477fd7eba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 44 additions and 2 deletions

View file

@ -285,6 +285,9 @@ For accessibility reasons, it is considerate to use upper camel case when you're
You can include as many hashtags as you like within a GoToSocial post, and each hashtag has a length limit of 100 characters.
!!! tip
To end a hashtag, you can simply use a space, for example in the text `this #soup rules`, the hashtag is terminated by a space so `#soup` becomes the hashtag. However, you can also use a pipe character `|`, or the unicode characters `\u200B` (zero-width no-break space) or `\uFEFF` (zero-width space), to create "partial-word" hashtags. For example, with input text `this #so|up rules`, only the `#so` part becomes the hashtag. Likewise, with the input text `this #soup rules`, which contains an invisible zero-width space after the o and before the u, only the `#so` part becomes the hashtag. See here for more information on zero-width spaces: https://en.wikipedia.org/wiki/Zero-width_space.
## Input Sanitization
In order not to spread scripts, vulnerabilities, and glitchy HTML all over the place, GoToSocial performs the following types of input sanitization:

View file

@ -36,6 +36,8 @@ const (
moreComplexExpected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text<br><br>:rainbow:</p>"
withUTF8Link = "here's a link with utf-8 characters in it: https://example.org/söme_url"
withUTF8LinkExpected = "<p>here's a link with utf-8 characters in it: <a href=\"https://example.org/s%C3%B6me_url\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://example.org/söme_url</a></p>"
withFunkyTags = "#hashtag1 pee #hashtag2\u200Bpee #hashtag3|poo #hashtag4\uFEFFpoo"
withFunkyTagsExpected = "<p><a href=\"http://localhost:8080/tags/hashtag1\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag1</span></a> pee <a href=\"http://localhost:8080/tags/hashtag2\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag2</span></a>\u200bpee <a href=\"http://localhost:8080/tags/hashtag3\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag3</span></a>|poo <a href=\"http://localhost:8080/tags/hashtag4\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag4</span></a>\ufeffpoo</p>"
)
type PlainTestSuite struct {
@ -136,6 +138,17 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
suite.Equal("올빼미", tags[0].Name)
}
func (suite *PlainTestSuite) TestFunkyTags() {
formatted := suite.FromPlain(withFunkyTags)
suite.Equal(withFunkyTagsExpected, formatted.HTML)
tags := formatted.Tags
suite.Equal("hashtag1", tags[0].Name)
suite.Equal("hashtag2", tags[1].Name)
suite.Equal("hashtag3", tags[2].Name)
suite.Equal("hashtag4", tags[3].Name)
}
func (suite *PlainTestSuite) TestDeriveMultiple() {
statusText := `Another test @foss_satan@fossbros-anonymous.io

View file

@ -38,8 +38,34 @@ func isPermittedInHashtag(r rune) bool {
// is a recognized break character for before
// or after a #hashtag.
func isHashtagBoundary(r rune) bool {
return unicode.IsSpace(r) ||
(unicode.IsPunct(r) && r != '_')
switch {
// Zero width space.
case r == '\u200B':
return true
// Zero width no-break space.
case r == '\uFEFF':
return true
// Pipe character sometimes
// used as workaround.
case r == '|':
return true
// Standard Unicode white space.
case unicode.IsSpace(r):
return true
// Non-underscore punctuation.
case unicode.IsPunct(r) && r != '_':
return true
// Not recognized
// hashtag boundary.
default:
return false
}
}
// isMentionBoundary returns true if rune r