mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2024-12-12 02:06:31 +00:00
[feature] Allow partial-word hashtags using non-breaking spaces (#3606)
* [feature] Allow partial-word hashtags using non-breaking spaces * update docs
This commit is contained in:
parent
642f5230e6
commit
9477fd7eba
3 changed files with 44 additions and 2 deletions
|
@ -285,6 +285,9 @@ For accessibility reasons, it is considerate to use upper camel case when you're
|
||||||
|
|
||||||
You can include as many hashtags as you like within a GoToSocial post, and each hashtag has a length limit of 100 characters.
|
You can include as many hashtags as you like within a GoToSocial post, and each hashtag has a length limit of 100 characters.
|
||||||
|
|
||||||
|
!!! tip
|
||||||
|
To end a hashtag, you can simply use a space, for example in the text `this #soup rules`, the hashtag is terminated by a space so `#soup` becomes the hashtag. However, you can also use a pipe character `|`, or the unicode characters `\u200B` (zero-width no-break space) or `\uFEFF` (zero-width space), to create "partial-word" hashtags. For example, with input text `this #so|up rules`, only the `#so` part becomes the hashtag. Likewise, with the input text `this #soup rules`, which contains an invisible zero-width space after the o and before the u, only the `#so` part becomes the hashtag. See here for more information on zero-width spaces: https://en.wikipedia.org/wiki/Zero-width_space.
|
||||||
|
|
||||||
## Input Sanitization
|
## Input Sanitization
|
||||||
|
|
||||||
In order not to spread scripts, vulnerabilities, and glitchy HTML all over the place, GoToSocial performs the following types of input sanitization:
|
In order not to spread scripts, vulnerabilities, and glitchy HTML all over the place, GoToSocial performs the following types of input sanitization:
|
||||||
|
|
|
@ -36,6 +36,8 @@ const (
|
||||||
moreComplexExpected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text<br><br>:rainbow:</p>"
|
moreComplexExpected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text<br><br>:rainbow:</p>"
|
||||||
withUTF8Link = "here's a link with utf-8 characters in it: https://example.org/söme_url"
|
withUTF8Link = "here's a link with utf-8 characters in it: https://example.org/söme_url"
|
||||||
withUTF8LinkExpected = "<p>here's a link with utf-8 characters in it: <a href=\"https://example.org/s%C3%B6me_url\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://example.org/söme_url</a></p>"
|
withUTF8LinkExpected = "<p>here's a link with utf-8 characters in it: <a href=\"https://example.org/s%C3%B6me_url\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://example.org/söme_url</a></p>"
|
||||||
|
withFunkyTags = "#hashtag1 pee #hashtag2\u200Bpee #hashtag3|poo #hashtag4\uFEFFpoo"
|
||||||
|
withFunkyTagsExpected = "<p><a href=\"http://localhost:8080/tags/hashtag1\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag1</span></a> pee <a href=\"http://localhost:8080/tags/hashtag2\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag2</span></a>\u200bpee <a href=\"http://localhost:8080/tags/hashtag3\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag3</span></a>|poo <a href=\"http://localhost:8080/tags/hashtag4\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag4</span></a>\ufeffpoo</p>"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PlainTestSuite struct {
|
type PlainTestSuite struct {
|
||||||
|
@ -136,6 +138,17 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
|
||||||
suite.Equal("올빼미", tags[0].Name)
|
suite.Equal("올빼미", tags[0].Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (suite *PlainTestSuite) TestFunkyTags() {
|
||||||
|
formatted := suite.FromPlain(withFunkyTags)
|
||||||
|
suite.Equal(withFunkyTagsExpected, formatted.HTML)
|
||||||
|
|
||||||
|
tags := formatted.Tags
|
||||||
|
suite.Equal("hashtag1", tags[0].Name)
|
||||||
|
suite.Equal("hashtag2", tags[1].Name)
|
||||||
|
suite.Equal("hashtag3", tags[2].Name)
|
||||||
|
suite.Equal("hashtag4", tags[3].Name)
|
||||||
|
}
|
||||||
|
|
||||||
func (suite *PlainTestSuite) TestDeriveMultiple() {
|
func (suite *PlainTestSuite) TestDeriveMultiple() {
|
||||||
statusText := `Another test @foss_satan@fossbros-anonymous.io
|
statusText := `Another test @foss_satan@fossbros-anonymous.io
|
||||||
|
|
||||||
|
|
|
@ -38,8 +38,34 @@ func isPermittedInHashtag(r rune) bool {
|
||||||
// is a recognized break character for before
|
// is a recognized break character for before
|
||||||
// or after a #hashtag.
|
// or after a #hashtag.
|
||||||
func isHashtagBoundary(r rune) bool {
|
func isHashtagBoundary(r rune) bool {
|
||||||
return unicode.IsSpace(r) ||
|
switch {
|
||||||
(unicode.IsPunct(r) && r != '_')
|
|
||||||
|
// Zero width space.
|
||||||
|
case r == '\u200B':
|
||||||
|
return true
|
||||||
|
|
||||||
|
// Zero width no-break space.
|
||||||
|
case r == '\uFEFF':
|
||||||
|
return true
|
||||||
|
|
||||||
|
// Pipe character sometimes
|
||||||
|
// used as workaround.
|
||||||
|
case r == '|':
|
||||||
|
return true
|
||||||
|
|
||||||
|
// Standard Unicode white space.
|
||||||
|
case unicode.IsSpace(r):
|
||||||
|
return true
|
||||||
|
|
||||||
|
// Non-underscore punctuation.
|
||||||
|
case unicode.IsPunct(r) && r != '_':
|
||||||
|
return true
|
||||||
|
|
||||||
|
// Not recognized
|
||||||
|
// hashtag boundary.
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// isMentionBoundary returns true if rune r
|
// isMentionBoundary returns true if rune r
|
||||||
|
|
Loading…
Reference in a new issue