mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2024-12-28 01:50:30 +00:00
[chore/bugfix] Deinterface text.Formatter, allow underscores in hashtags (#2233)
This commit is contained in:
parent
b6b8f82c87
commit
536d9e482d
18 changed files with 1040 additions and 713 deletions
|
@ -41,7 +41,7 @@ type Processor struct {
|
|||
mediaManager *media.Manager
|
||||
oauthServer oauth.Server
|
||||
filter *visibility.Filter
|
||||
formatter text.Formatter
|
||||
formatter *text.Formatter
|
||||
federator federation.Federator
|
||||
parseMention gtsmodel.ParseMentionFunc
|
||||
}
|
||||
|
|
|
@ -277,7 +277,7 @@ func processLanguage(ctx context.Context, form *apimodel.AdvancedStatusCreateFor
|
|||
return nil
|
||||
}
|
||||
|
||||
func processContent(ctx context.Context, dbService db.DB, formatter text.Formatter, parseMention gtsmodel.ParseMentionFunc, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
|
||||
func processContent(ctx context.Context, dbService db.DB, formatter *text.Formatter, parseMention gtsmodel.ParseMentionFunc, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
|
||||
// if there's nothing in the status at all we can just return early
|
||||
if form.Status == "" {
|
||||
status.Content = ""
|
||||
|
|
|
@ -31,7 +31,7 @@ type Processor struct {
|
|||
federator federation.Federator
|
||||
converter *typeutils.Converter
|
||||
filter *visibility.Filter
|
||||
formatter text.Formatter
|
||||
formatter *text.Formatter
|
||||
parseMention gtsmodel.ParseMentionFunc
|
||||
}
|
||||
|
||||
|
|
|
@ -1,70 +0,0 @@
|
|||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package text
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/renderer/html"
|
||||
"github.com/yuin/goldmark/util"
|
||||
)
|
||||
|
||||
func (f *formatter) FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
|
||||
result := &FormatResult{
|
||||
Mentions: []*gtsmodel.Mention{},
|
||||
Tags: []*gtsmodel.Tag{},
|
||||
Emojis: []*gtsmodel.Emoji{},
|
||||
}
|
||||
// parse markdown text into html, using custom renderer to add hashtag/mention links
|
||||
md := goldmark.New(
|
||||
goldmark.WithRendererOptions(
|
||||
html.WithXHTML(),
|
||||
html.WithHardWraps(),
|
||||
),
|
||||
goldmark.WithParser(
|
||||
parser.NewParser(
|
||||
parser.WithBlockParsers(
|
||||
util.Prioritized(newPlaintextParser(), 500),
|
||||
),
|
||||
),
|
||||
),
|
||||
goldmark.WithExtensions(
|
||||
&customRenderer{f, ctx, pmf, authorID, statusID, true, result},
|
||||
),
|
||||
)
|
||||
|
||||
var htmlContentBytes bytes.Buffer
|
||||
err := md.Convert([]byte(plain), &htmlContentBytes)
|
||||
if err != nil {
|
||||
log.Errorf(ctx, "error formatting plaintext to HTML: %s", err)
|
||||
}
|
||||
result.HTML = htmlContentBytes.String()
|
||||
|
||||
// clean anything dangerous out of the HTML
|
||||
result.HTML = SanitizeToHTML(result.HTML)
|
||||
|
||||
// shrink ray
|
||||
result.HTML = MinifyHTML(result.HTML)
|
||||
|
||||
return result
|
||||
}
|
|
@ -24,29 +24,25 @@ import (
|
|||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
)
|
||||
|
||||
// Formatter wraps some logic and functions for parsing statuses and other text input into nice html.
|
||||
// Each of the member functions returns a struct containing the formatted HTML and any tags, mentions, and
|
||||
// emoji that were found in the text.
|
||||
type Formatter interface {
|
||||
// FromPlain parses an HTML text from a plaintext.
|
||||
FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult
|
||||
// FromPlainNoParagraph parses an HTML text from a plaintext, without wrapping the resulting text in <p> tags.
|
||||
FromPlainNoParagraph(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult
|
||||
// FromMarkdown parses an HTML text from a markdown-formatted text.
|
||||
FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, md string) *FormatResult
|
||||
// FromPlainEmojiOnly parses an HTML text from a plaintext, only parsing emojis and not mentions etc.
|
||||
FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult
|
||||
}
|
||||
// FormatFunc is fulfilled by FromPlain,
|
||||
// FromPlainNoParagraph, and FromMarkdown.
|
||||
type FormatFunc func(
|
||||
ctx context.Context,
|
||||
parseMention gtsmodel.ParseMentionFunc,
|
||||
authorID string,
|
||||
statusID string,
|
||||
text string,
|
||||
) *FormatResult
|
||||
|
||||
type FormatFunc func(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, text string) *FormatResult
|
||||
|
||||
type formatter struct {
|
||||
// Formatter wraps logic and functions for parsing
|
||||
// statuses and other text input into nice html.
|
||||
type Formatter struct {
|
||||
db db.DB
|
||||
}
|
||||
|
||||
// NewFormatter returns a new Formatter interface for parsing statuses and other text input into nice html.
|
||||
func NewFormatter(db db.DB) Formatter {
|
||||
return &formatter{
|
||||
// NewFormatter returns a new Formatter.
|
||||
func NewFormatter(db db.DB) *Formatter {
|
||||
return &Formatter{
|
||||
db: db,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,7 +48,7 @@ type TextStandardTestSuite struct {
|
|||
testEmojis map[string]*gtsmodel.Emoji
|
||||
|
||||
// module being tested
|
||||
formatter text.Formatter
|
||||
formatter *text.Formatter
|
||||
}
|
||||
|
||||
func (suite *TextStandardTestSuite) SetupSuite() {
|
||||
|
@ -85,14 +85,32 @@ func (suite *TextStandardTestSuite) TearDownTest() {
|
|||
testrig.StandardDBTeardown(suite.db)
|
||||
}
|
||||
|
||||
func (suite *TextStandardTestSuite) FromMarkdown(text string) *text.FormatResult {
|
||||
return suite.formatter.FromMarkdown(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text)
|
||||
func (suite *TextStandardTestSuite) FromMarkdown(input string) *text.FormatResult {
|
||||
return suite.formatter.FromMarkdown(
|
||||
context.Background(),
|
||||
suite.parseMention,
|
||||
suite.testAccounts["local_account_1"].ID,
|
||||
"dummy_status_ID",
|
||||
input,
|
||||
)
|
||||
}
|
||||
|
||||
func (suite *TextStandardTestSuite) FromPlain(text string) *text.FormatResult {
|
||||
return suite.formatter.FromPlain(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text)
|
||||
func (suite *TextStandardTestSuite) FromPlain(input string) *text.FormatResult {
|
||||
return suite.formatter.FromPlain(
|
||||
context.Background(),
|
||||
suite.parseMention,
|
||||
suite.testAccounts["local_account_1"].ID,
|
||||
"dummy_status_ID",
|
||||
input,
|
||||
)
|
||||
}
|
||||
|
||||
func (suite *TextStandardTestSuite) FromPlainNoParagraph(text string) *text.FormatResult {
|
||||
return suite.formatter.FromPlainNoParagraph(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text)
|
||||
func (suite *TextStandardTestSuite) FromPlainNoParagraph(input string) *text.FormatResult {
|
||||
return suite.formatter.FromPlainNoParagraph(
|
||||
context.Background(),
|
||||
suite.parseMention,
|
||||
suite.testAccounts["local_account_1"].ID,
|
||||
"dummmy_status_ID",
|
||||
input,
|
||||
)
|
||||
}
|
||||
|
|
423
internal/text/goldmark_custom_renderer.go
Normal file
423
internal/text/goldmark_custom_renderer.go
Normal file
|
@ -0,0 +1,423 @@
|
|||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package text
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"github.com/superseriousbusiness/gotosocial/internal/db"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/id"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/uris"
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/renderer"
|
||||
mdutil "github.com/yuin/goldmark/util"
|
||||
)
|
||||
|
||||
// customRenderer fulfils the following goldmark interfaces:
|
||||
//
|
||||
// - renderer.NodeRenderer
|
||||
// - goldmark.Extender.
|
||||
//
|
||||
// It is used as a goldmark extension by FromMarkdown and
|
||||
// (variants of) FromPlain.
|
||||
//
|
||||
// The custom renderer extracts and re-renders mentions, hashtags,
|
||||
// and emojis that are encountered during parsing, writing out valid
|
||||
// HTML representations of these elements.
|
||||
//
|
||||
// The customRenderer has the following side effects:
|
||||
//
|
||||
// - May use its db connection to retrieve existing and/or
|
||||
// store new mentions, hashtags, and emojis.
|
||||
// - May update its *FormatResult to append discovered
|
||||
// mentions, hashtags, and emojis to it.
|
||||
type customRenderer struct {
|
||||
ctx context.Context
|
||||
db db.DB
|
||||
parseMention gtsmodel.ParseMentionFunc
|
||||
accountID string
|
||||
statusID string
|
||||
emojiOnly bool
|
||||
result *FormatResult
|
||||
}
|
||||
|
||||
func (cr *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
|
||||
reg.Register(kindMention, cr.renderMention)
|
||||
reg.Register(kindHashtag, cr.renderHashtag)
|
||||
reg.Register(kindEmoji, cr.renderEmoji)
|
||||
}
|
||||
|
||||
func (cr *customRenderer) Extend(markdown goldmark.Markdown) {
|
||||
// 1000 is set as the lowest
|
||||
// priority, but it's arbitrary.
|
||||
const prio = 1000
|
||||
|
||||
if cr.emojiOnly {
|
||||
// Parse + render only emojis.
|
||||
markdown.Parser().AddOptions(
|
||||
parser.WithInlineParsers(
|
||||
mdutil.Prioritized(new(emojiParser), prio),
|
||||
),
|
||||
)
|
||||
} else {
|
||||
// Parse + render emojis, mentions, hashtags.
|
||||
markdown.Parser().AddOptions(parser.WithInlineParsers(
|
||||
mdutil.Prioritized(new(emojiParser), prio),
|
||||
mdutil.Prioritized(new(mentionParser), prio),
|
||||
mdutil.Prioritized(new(hashtagParser), prio),
|
||||
))
|
||||
}
|
||||
|
||||
// Add this custom renderer.
|
||||
markdown.Renderer().AddOptions(
|
||||
renderer.WithNodeRenderers(
|
||||
mdutil.Prioritized(cr, prio),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
/*
|
||||
MENTION RENDERING STUFF
|
||||
*/
|
||||
|
||||
// renderMention takes a mention
|
||||
// ast.Node and renders it as HTML.
|
||||
func (cr *customRenderer) renderMention(
|
||||
w mdutil.BufWriter,
|
||||
source []byte,
|
||||
node ast.Node,
|
||||
entering bool,
|
||||
) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
// This function is registered
|
||||
// only for kindMention, and
|
||||
// should not be called for
|
||||
// any other node type.
|
||||
n, ok := node.(*mention)
|
||||
if !ok {
|
||||
log.Panic(cr.ctx, "type assertion failed")
|
||||
}
|
||||
|
||||
// Get raw mention string eg., '@someone@domain.org'.
|
||||
text := string(n.Segment.Value(source))
|
||||
|
||||
// Handle mention and get text to render.
|
||||
text = cr.handleMention(text)
|
||||
|
||||
// Write returned text into HTML.
|
||||
if _, err := w.WriteString(text); err != nil {
|
||||
// We don't have much recourse if this fails.
|
||||
log.Errorf(cr.ctx, "error writing HTML: %s", err)
|
||||
}
|
||||
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
// handleMention takes a string in the form '@username@domain.com'
|
||||
// or '@localusername', and does the following:
|
||||
//
|
||||
// - Parse the mention string into a *gtsmodel.Mention.
|
||||
// - Insert mention into database if necessary.
|
||||
// - Add mention to cr.results.Mentions slice.
|
||||
// - Return mention rendered as nice HTML.
|
||||
//
|
||||
// If the mention is invalid or cannot be created,
|
||||
// the unaltered input text will be returned instead.
|
||||
func (cr *customRenderer) handleMention(text string) string {
|
||||
mention, err := cr.parseMention(cr.ctx, text, cr.accountID, cr.statusID)
|
||||
if err != nil {
|
||||
log.Errorf(cr.ctx, "error parsing mention %s from status: %s", text, err)
|
||||
return text
|
||||
}
|
||||
|
||||
if cr.statusID != "" {
|
||||
if err := cr.db.PutMention(cr.ctx, mention); err != nil {
|
||||
log.Errorf(cr.ctx, "error putting mention in db: %s", err)
|
||||
return text
|
||||
}
|
||||
}
|
||||
|
||||
// Append mention to result if not done already.
|
||||
//
|
||||
// This prevents multiple occurences of mention
|
||||
// in the same status generating multiple
|
||||
// entries for the same mention in result.
|
||||
func() {
|
||||
for _, m := range cr.result.Mentions {
|
||||
if mention.TargetAccountID == m.TargetAccountID {
|
||||
// Already appended.
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Not appended yet.
|
||||
cr.result.Mentions = append(cr.result.Mentions, mention)
|
||||
}()
|
||||
|
||||
if mention.TargetAccount == nil {
|
||||
// Fetch mention target account if not yet populated.
|
||||
mention.TargetAccount, err = cr.db.GetAccountByID(
|
||||
gtscontext.SetBarebones(cr.ctx),
|
||||
mention.TargetAccountID,
|
||||
)
|
||||
if err != nil {
|
||||
log.Errorf(cr.ctx, "error populating mention target account: %v", err)
|
||||
return text
|
||||
}
|
||||
}
|
||||
|
||||
// Replace the mention with the formatted mention content,
|
||||
// eg. `@someone@domain.org` becomes:
|
||||
// `<span class="h-card"><a href="https://domain.org/@someone" class="u-url mention">@<span>someone</span></a></span>`
|
||||
var b strings.Builder
|
||||
b.WriteString(`<span class="h-card"><a href="`)
|
||||
b.WriteString(mention.TargetAccount.URL)
|
||||
b.WriteString(`" class="u-url mention">@<span>`)
|
||||
b.WriteString(mention.TargetAccount.Username)
|
||||
b.WriteString(`</span></a></span>`)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
/*
|
||||
HASHTAG RENDERING STUFF
|
||||
*/
|
||||
|
||||
// renderHashtag takes a hashtag
|
||||
// ast.Node and renders it as HTML.
|
||||
func (cr *customRenderer) renderHashtag(
|
||||
w mdutil.BufWriter,
|
||||
source []byte,
|
||||
node ast.Node,
|
||||
entering bool,
|
||||
) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
// This function is registered
|
||||
// only for kindHashtag, and
|
||||
// should not be called for
|
||||
// any other node type.
|
||||
n, ok := node.(*hashtag)
|
||||
if !ok {
|
||||
log.Panic(cr.ctx, "type assertion failed")
|
||||
}
|
||||
|
||||
// Get raw hashtag string eg., '#SomeHashtag'.
|
||||
text := string(n.Segment.Value(source))
|
||||
|
||||
// Handle hashtag and get text to render.
|
||||
text = cr.handleHashtag(text)
|
||||
|
||||
// Write returned text into HTML.
|
||||
if _, err := w.WriteString(text); err != nil {
|
||||
// We don't have much recourse if this fails.
|
||||
log.Errorf(cr.ctx, "error writing HTML: %s", err)
|
||||
}
|
||||
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
// handleHashtag takes a string in the form '#SomeHashtag',
|
||||
// and does the following:
|
||||
//
|
||||
// - Normalize + validate the hashtag.
|
||||
// - Get or create hashtag in the db.
|
||||
// - Add hashtag to cr.results.Tags slice.
|
||||
// - Return hashtag rendered as nice HTML.
|
||||
//
|
||||
// If the hashtag is invalid or cannot be retrieved,
|
||||
// the unaltered input text will be returned instead.
|
||||
func (cr *customRenderer) handleHashtag(text string) string {
|
||||
normalized, ok := NormalizeHashtag(text)
|
||||
if !ok {
|
||||
// Not a valid hashtag.
|
||||
return text
|
||||
}
|
||||
|
||||
getOrCreateHashtag := func(name string) (*gtsmodel.Tag, error) {
|
||||
var (
|
||||
tag *gtsmodel.Tag
|
||||
err error
|
||||
)
|
||||
|
||||
// Check if we have a tag with this name already.
|
||||
tag, err = cr.db.GetTagByName(cr.ctx, name)
|
||||
if err != nil && !errors.Is(err, db.ErrNoEntries) {
|
||||
return nil, gtserror.Newf("db error getting tag %s: %w", name, err)
|
||||
}
|
||||
|
||||
if tag != nil {
|
||||
// We had it!
|
||||
return tag, nil
|
||||
}
|
||||
|
||||
// We didn't have a tag with
|
||||
// this name, create one.
|
||||
tag = >smodel.Tag{
|
||||
ID: id.NewULID(),
|
||||
Name: name,
|
||||
}
|
||||
|
||||
if err = cr.db.PutTag(cr.ctx, tag); err != nil {
|
||||
return nil, gtserror.Newf("db error putting new tag %s: %w", name, err)
|
||||
}
|
||||
|
||||
return tag, nil
|
||||
}
|
||||
|
||||
tag, err := getOrCreateHashtag(normalized)
|
||||
if err != nil {
|
||||
log.Errorf(cr.ctx, "error generating hashtags from status: %s", err)
|
||||
return text
|
||||
}
|
||||
|
||||
// Append tag to result if not done already.
|
||||
//
|
||||
// This prevents multiple uses of a tag in
|
||||
// the same status generating multiple
|
||||
// entries for the same tag in result.
|
||||
func() {
|
||||
for _, t := range cr.result.Tags {
|
||||
if tag.ID == t.ID {
|
||||
// Already appended.
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Not appended yet.
|
||||
cr.result.Tags = append(cr.result.Tags, tag)
|
||||
}()
|
||||
|
||||
// Replace tag with the formatted tag content, eg. `#SomeHashtag` becomes:
|
||||
// `<a href="https://example.org/tags/somehashtag" class="mention hashtag" rel="tag">#<span>SomeHashtag</span></a>`
|
||||
var b strings.Builder
|
||||
b.WriteString(`<a href="`)
|
||||
b.WriteString(uris.GenerateURIForTag(normalized))
|
||||
b.WriteString(`" class="mention hashtag" rel="tag">#<span>`)
|
||||
b.WriteString(normalized)
|
||||
b.WriteString(`</span></a>`)
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
/*
|
||||
EMOJI RENDERING STUFF
|
||||
*/
|
||||
|
||||
// renderEmoji doesn't actually turn an emoji
|
||||
// ast.Node into HTML, but instead only adds it to
|
||||
// the custom renderer results for later processing.
|
||||
func (cr *customRenderer) renderEmoji(
|
||||
w mdutil.BufWriter,
|
||||
source []byte,
|
||||
node ast.Node,
|
||||
entering bool,
|
||||
) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
// This function is registered
|
||||
// only for kindEmoji, and
|
||||
// should not be called for
|
||||
// any other node type.
|
||||
n, ok := node.(*emoji)
|
||||
if !ok {
|
||||
log.Panic(cr.ctx, "type assertion failed")
|
||||
}
|
||||
|
||||
// Get raw emoji string eg., ':boobs:'.
|
||||
text := string(n.Segment.Value(source))
|
||||
|
||||
// Handle emoji and get text to render.
|
||||
text = cr.handleEmoji(text)
|
||||
|
||||
// Write returned text into HTML.
|
||||
if _, err := w.WriteString(text); err != nil {
|
||||
// We don't have much recourse if this fails.
|
||||
log.Errorf(cr.ctx, "error writing HTML: %s", err)
|
||||
}
|
||||
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
// handleEmoji takes a string in the form ':some_emoji:',
|
||||
// and does the following:
|
||||
//
|
||||
// - Try to get emoji from the db.
|
||||
// - Add emoji to cr.results.Emojis slice if found and useable.
|
||||
//
|
||||
// This function will always return the unaltered input
|
||||
// text, since emojification is handled elsewhere.
|
||||
func (cr *customRenderer) handleEmoji(text string) string {
|
||||
// Check if text points to a valid
|
||||
// local emoji by using its shortcode.
|
||||
//
|
||||
// The shortcode is the text
|
||||
// between enclosing ':' chars.
|
||||
shortcode := strings.Trim(text, ":")
|
||||
|
||||
// Try to fetch emoji as a locally stored emoji.
|
||||
emoji, err := cr.db.GetEmojiByShortcodeDomain(cr.ctx, shortcode, "")
|
||||
if err != nil && !errors.Is(err, db.ErrNoEntries) {
|
||||
log.Errorf(nil, "db error getting local emoji with shortcode %s: %s", shortcode, err)
|
||||
}
|
||||
|
||||
if emoji == nil {
|
||||
// No emoji found for this
|
||||
// shortcode, oh well!
|
||||
return text
|
||||
}
|
||||
|
||||
if *emoji.Disabled || !*emoji.VisibleInPicker {
|
||||
// Emoji was found but not useable.
|
||||
return text
|
||||
}
|
||||
|
||||
// Emoji was found and useable.
|
||||
// Append to result if not done already.
|
||||
//
|
||||
// This prevents multiple uses of an emoji
|
||||
// in the same status generating multiple
|
||||
// entries for the same emoji in result.
|
||||
func() {
|
||||
for _, e := range cr.result.Emojis {
|
||||
if emoji.Shortcode == e.Shortcode {
|
||||
// Already appended.
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Not appended yet.
|
||||
cr.result.Emojis = append(cr.result.Emojis, emoji)
|
||||
}()
|
||||
|
||||
return text
|
||||
}
|
|
@ -1,313 +0,0 @@
|
|||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package text
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/superseriousbusiness/gotosocial/internal/db"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/regexes"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/util"
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/renderer"
|
||||
"github.com/yuin/goldmark/text"
|
||||
mdutil "github.com/yuin/goldmark/util"
|
||||
)
|
||||
|
||||
// A goldmark extension that parses potential mentions and hashtags separately from regular
|
||||
// text, so that they stay as one contiguous text fragment in the AST, and then renders
|
||||
// them separately too, to avoid scanning normal text for mentions and tags.
|
||||
|
||||
// mention and hashtag fulfil the goldmark ast.Node interface.
|
||||
type mention struct {
|
||||
ast.BaseInline
|
||||
Segment text.Segment
|
||||
}
|
||||
|
||||
type hashtag struct {
|
||||
ast.BaseInline
|
||||
Segment text.Segment
|
||||
}
|
||||
|
||||
type emoji struct {
|
||||
ast.BaseInline
|
||||
Segment text.Segment
|
||||
}
|
||||
|
||||
var (
|
||||
kindMention = ast.NewNodeKind("Mention")
|
||||
kindHashtag = ast.NewNodeKind("Hashtag")
|
||||
kindEmoji = ast.NewNodeKind("Emoji")
|
||||
)
|
||||
|
||||
func (n *mention) Kind() ast.NodeKind {
|
||||
return kindMention
|
||||
}
|
||||
|
||||
func (n *hashtag) Kind() ast.NodeKind {
|
||||
return kindHashtag
|
||||
}
|
||||
|
||||
func (n *emoji) Kind() ast.NodeKind {
|
||||
return kindEmoji
|
||||
}
|
||||
|
||||
// Dump can be used for debugging.
|
||||
func (n *mention) Dump(source []byte, level int) {
|
||||
fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
||||
}
|
||||
|
||||
func (n *hashtag) Dump(source []byte, level int) {
|
||||
fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
||||
}
|
||||
|
||||
func (n *emoji) Dump(source []byte, level int) {
|
||||
fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
||||
}
|
||||
|
||||
// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
|
||||
// The contained segment is used in rendering.
|
||||
func newMention(s text.Segment) *mention {
|
||||
return &mention{
|
||||
BaseInline: ast.BaseInline{},
|
||||
Segment: s,
|
||||
}
|
||||
}
|
||||
|
||||
func newHashtag(s text.Segment) *hashtag {
|
||||
return &hashtag{
|
||||
BaseInline: ast.BaseInline{},
|
||||
Segment: s,
|
||||
}
|
||||
}
|
||||
|
||||
func newEmoji(s text.Segment) *emoji {
|
||||
return &emoji{
|
||||
BaseInline: ast.BaseInline{},
|
||||
Segment: s,
|
||||
}
|
||||
}
|
||||
|
||||
// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
|
||||
type mentionParser struct{}
|
||||
|
||||
type hashtagParser struct{}
|
||||
|
||||
type emojiParser struct{}
|
||||
|
||||
func (p *mentionParser) Trigger() []byte {
|
||||
return []byte{'@'}
|
||||
}
|
||||
|
||||
func (p *hashtagParser) Trigger() []byte {
|
||||
return []byte{'#'}
|
||||
}
|
||||
|
||||
func (p *emojiParser) Trigger() []byte {
|
||||
return []byte{':'}
|
||||
}
|
||||
|
||||
func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
||||
before := block.PrecendingCharacter()
|
||||
line, segment := block.PeekLine()
|
||||
|
||||
if !util.IsMentionOrHashtagBoundary(before) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// unideal for performance but makes use of existing regex
|
||||
loc := regexes.MentionFinder.FindIndex(line)
|
||||
switch {
|
||||
case loc == nil:
|
||||
fallthrough
|
||||
case loc[0] != 0: // fail if not found at start
|
||||
return nil
|
||||
default:
|
||||
block.Advance(loc[1])
|
||||
return newMention(segment.WithStop(segment.Start + loc[1]))
|
||||
}
|
||||
}
|
||||
|
||||
func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
||||
before := block.PrecendingCharacter()
|
||||
line, segment := block.PeekLine()
|
||||
s := string(line)
|
||||
|
||||
if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for i, r := range s {
|
||||
switch {
|
||||
case r == '#' && i == 0:
|
||||
// ignore initial #
|
||||
continue
|
||||
case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):
|
||||
// Fake hashtag, don't trust it
|
||||
return nil
|
||||
case util.IsMentionOrHashtagBoundary(r):
|
||||
if i <= 1 {
|
||||
// empty
|
||||
return nil
|
||||
}
|
||||
// End of hashtag
|
||||
block.Advance(i)
|
||||
return newHashtag(segment.WithStop(segment.Start + i))
|
||||
}
|
||||
}
|
||||
// If we don't find invalid characters before the end of the line then it's all hashtag, babey
|
||||
block.Advance(segment.Len())
|
||||
return newHashtag(segment)
|
||||
}
|
||||
|
||||
func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
||||
line, segment := block.PeekLine()
|
||||
|
||||
// unideal for performance but makes use of existing regex
|
||||
loc := regexes.EmojiFinder.FindIndex(line)
|
||||
switch {
|
||||
case loc == nil:
|
||||
fallthrough
|
||||
case loc[0] != 0: // fail if not found at start
|
||||
return nil
|
||||
default:
|
||||
block.Advance(loc[1])
|
||||
return newEmoji(segment.WithStop(segment.Start + loc[1]))
|
||||
}
|
||||
}
|
||||
|
||||
// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
|
||||
// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the
|
||||
// fields are used to report tags and mentions to the caller for use as metadata.
|
||||
type customRenderer struct {
|
||||
f *formatter
|
||||
ctx context.Context
|
||||
parseMention gtsmodel.ParseMentionFunc
|
||||
accountID string
|
||||
statusID string
|
||||
emojiOnly bool
|
||||
result *FormatResult
|
||||
}
|
||||
|
||||
func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
|
||||
reg.Register(kindMention, r.renderMention)
|
||||
reg.Register(kindHashtag, r.renderHashtag)
|
||||
reg.Register(kindEmoji, r.renderEmoji)
|
||||
}
|
||||
|
||||
func (r *customRenderer) Extend(m goldmark.Markdown) {
|
||||
// 1000 is set as the lowest priority, but it's arbitrary
|
||||
m.Parser().AddOptions(parser.WithInlineParsers(
|
||||
mdutil.Prioritized(&emojiParser{}, 1000),
|
||||
))
|
||||
if !r.emojiOnly {
|
||||
m.Parser().AddOptions(parser.WithInlineParsers(
|
||||
mdutil.Prioritized(&mentionParser{}, 1000),
|
||||
mdutil.Prioritized(&hashtagParser{}, 1000),
|
||||
))
|
||||
}
|
||||
m.Renderer().AddOptions(renderer.WithNodeRenderers(
|
||||
mdutil.Prioritized(r, 1000),
|
||||
))
|
||||
}
|
||||
|
||||
// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
|
||||
func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
n, ok := node.(*mention) // this function is only registered for kindMention
|
||||
if !ok {
|
||||
log.Panic(r.ctx, "type assertion failed")
|
||||
}
|
||||
text := string(n.Segment.Value(source))
|
||||
|
||||
html := r.replaceMention(text)
|
||||
|
||||
// we don't have much recourse if this fails
|
||||
if _, err := w.WriteString(html); err != nil {
|
||||
log.Errorf(r.ctx, "error writing HTML: %s", err)
|
||||
}
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
n, ok := node.(*hashtag) // this function is only registered for kindHashtag
|
||||
if !ok {
|
||||
log.Panic(r.ctx, "type assertion failed")
|
||||
}
|
||||
text := string(n.Segment.Value(source))
|
||||
|
||||
html := r.replaceHashtag(text)
|
||||
|
||||
_, err := w.WriteString(html)
|
||||
// we don't have much recourse if this fails
|
||||
if err != nil {
|
||||
log.Errorf(r.ctx, "error writing HTML: %s", err)
|
||||
}
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata.
|
||||
func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
n, ok := node.(*emoji) // this function is only registered for kindEmoji
|
||||
if !ok {
|
||||
log.Panic(r.ctx, "type assertion failed")
|
||||
}
|
||||
text := string(n.Segment.Value(source))
|
||||
shortcode := text[1 : len(text)-1]
|
||||
|
||||
emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "")
|
||||
if err != nil {
|
||||
if err != db.ErrNoEntries {
|
||||
log.Errorf(nil, "error getting local emoji with shortcode %s: %s", shortcode, err)
|
||||
}
|
||||
} else if *emoji.VisibleInPicker && !*emoji.Disabled {
|
||||
listed := false
|
||||
for _, e := range r.result.Emojis {
|
||||
if e.Shortcode == emoji.Shortcode {
|
||||
listed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !listed {
|
||||
r.result.Emojis = append(r.result.Emojis, emoji)
|
||||
}
|
||||
}
|
||||
|
||||
// we don't have much recourse if this fails
|
||||
if _, err := w.WriteString(text); err != nil {
|
||||
log.Errorf(r.ctx, "error writing HTML: %s", err)
|
||||
}
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
281
internal/text/goldmark_parsers.go
Normal file
281
internal/text/goldmark_parsers.go
Normal file
|
@ -0,0 +1,281 @@
|
|||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package text
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/superseriousbusiness/gotosocial/internal/regexes"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/text"
|
||||
)
|
||||
|
||||
/*
|
||||
MENTION PARSER STUFF
|
||||
*/
|
||||
|
||||
// mention fulfils the goldmark
|
||||
// ast.Node interface.
|
||||
type mention struct {
|
||||
ast.BaseInline
|
||||
Segment text.Segment
|
||||
}
|
||||
|
||||
var kindMention = ast.NewNodeKind("Mention")
|
||||
|
||||
func (n *mention) Kind() ast.NodeKind {
|
||||
return kindMention
|
||||
}
|
||||
|
||||
func (n *mention) Dump(source []byte, level int) {
|
||||
fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
||||
}
|
||||
|
||||
// newMention creates a goldmark ast.Node
|
||||
// from a text.Segment. The contained segment
|
||||
// is used in rendering.
|
||||
func newMention(s text.Segment) *mention {
|
||||
return &mention{
|
||||
BaseInline: ast.BaseInline{},
|
||||
Segment: s,
|
||||
}
|
||||
}
|
||||
|
||||
// mentionParser fulfils the goldmark
|
||||
// parser.InlineParser interface.
|
||||
type mentionParser struct{}
|
||||
|
||||
// Mention parsing is triggered by the `@` symbol
|
||||
// which appears at the beginning of a mention.
|
||||
func (p *mentionParser) Trigger() []byte {
|
||||
return []byte{'@'}
|
||||
}
|
||||
|
||||
func (p *mentionParser) Parse(
|
||||
_ ast.Node,
|
||||
block text.Reader,
|
||||
_ parser.Context,
|
||||
) ast.Node {
|
||||
// If preceding character is not a valid boundary
|
||||
// character, then this cannot be a valid mention.
|
||||
if !isMentionBoundary(block.PrecendingCharacter()) {
|
||||
return nil
|
||||
}
|
||||
|
||||
line, segment := block.PeekLine()
|
||||
|
||||
// Ascertain location of mention in the line
|
||||
// that starts with the trigger character.
|
||||
loc := regexes.MentionFinder.FindIndex(line)
|
||||
if loc == nil || loc[0] != 0 {
|
||||
// Noop if not found or
|
||||
// not found at start.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Advance the block to
|
||||
// the end of the mention.
|
||||
block.Advance(loc[1])
|
||||
|
||||
// mention ast.Node spans from the
|
||||
// beginning of this segment up to
|
||||
// the last character of the mention.
|
||||
return newMention(
|
||||
segment.WithStop(
|
||||
segment.Start + loc[1],
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
/*
|
||||
HASHTAG PARSER STUFF
|
||||
*/
|
||||
|
||||
// hashtag fulfils the goldmark
|
||||
// ast.Node interface.
|
||||
type hashtag struct {
|
||||
ast.BaseInline
|
||||
Segment text.Segment
|
||||
}
|
||||
|
||||
var kindHashtag = ast.NewNodeKind("Hashtag")
|
||||
|
||||
func (n *hashtag) Kind() ast.NodeKind {
|
||||
return kindHashtag
|
||||
}
|
||||
|
||||
func (n *hashtag) Dump(source []byte, level int) {
|
||||
fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
||||
}
|
||||
|
||||
// newHashtag creates a goldmark ast.Node
|
||||
// from a text.Segment. The contained segment
|
||||
// is used in rendering.
|
||||
func newHashtag(s text.Segment) *hashtag {
|
||||
return &hashtag{
|
||||
BaseInline: ast.BaseInline{},
|
||||
Segment: s,
|
||||
}
|
||||
}
|
||||
|
||||
type hashtagParser struct{}
|
||||
|
||||
// Hashtag parsing is triggered by a '#' symbol
|
||||
// which appears at the beginning of a hashtag.
|
||||
func (p *hashtagParser) Trigger() []byte {
|
||||
return []byte{'#'}
|
||||
}
|
||||
|
||||
func (p *hashtagParser) Parse(
|
||||
_ ast.Node,
|
||||
block text.Reader,
|
||||
_ parser.Context,
|
||||
) ast.Node {
|
||||
// If preceding character is not a valid boundary
|
||||
// character, then this cannot be a valid hashtag.
|
||||
if !isHashtagBoundary(block.PrecendingCharacter()) {
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
line, segment = block.PeekLine()
|
||||
lineStr = string(line)
|
||||
lineStrLen = len(lineStr)
|
||||
)
|
||||
|
||||
if lineStrLen <= 1 {
|
||||
// This is probably just
|
||||
// a lonely '#' char.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Iterate through the runes in the detected
|
||||
// hashtag string until we reach either:
|
||||
// - A weird character (bad).
|
||||
// - The end of the hashtag (ok).
|
||||
// - The end of the string (also ok).
|
||||
for i, r := range lineStr {
|
||||
switch {
|
||||
case r == '#' && i == 0:
|
||||
// Ignore initial '#'.
|
||||
continue
|
||||
|
||||
case !isPlausiblyInHashtag(r) &&
|
||||
!isHashtagBoundary(r):
|
||||
// Weird non-boundary character
|
||||
// in the hashtag. Don't trust it.
|
||||
return nil
|
||||
|
||||
case isHashtagBoundary(r):
|
||||
// Reached closing hashtag
|
||||
// boundary. Advance block
|
||||
// to the end of the hashtag.
|
||||
block.Advance(i)
|
||||
|
||||
// hashtag ast.Node spans from
|
||||
// the beginning of this segment
|
||||
// up to the boundary character.
|
||||
return newHashtag(
|
||||
segment.WithStop(
|
||||
segment.Start + i,
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// No invalid or boundary characters before the
|
||||
// end of the line: it's all hashtag, baby 😎
|
||||
//
|
||||
// Advance block to the end of the segment.
|
||||
block.Advance(segment.Len())
|
||||
|
||||
// hashtag ast.Node spans
|
||||
// the entire segment.
|
||||
return newHashtag(segment)
|
||||
}
|
||||
|
||||
/*
|
||||
EMOJI PARSER STUFF
|
||||
*/
|
||||
|
||||
// emoji fulfils the goldmark
|
||||
// ast.Node interface.
|
||||
type emoji struct {
|
||||
ast.BaseInline
|
||||
Segment text.Segment
|
||||
}
|
||||
|
||||
var kindEmoji = ast.NewNodeKind("Emoji")
|
||||
|
||||
func (n *emoji) Kind() ast.NodeKind {
|
||||
return kindEmoji
|
||||
}
|
||||
|
||||
func (n *emoji) Dump(source []byte, level int) {
|
||||
fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
||||
}
|
||||
|
||||
// newEmoji creates a goldmark ast.Node
|
||||
// from a text.Segment. The contained
|
||||
// segment is used in rendering.
|
||||
func newEmoji(s text.Segment) *emoji {
|
||||
return &emoji{
|
||||
BaseInline: ast.BaseInline{},
|
||||
Segment: s,
|
||||
}
|
||||
}
|
||||
|
||||
type emojiParser struct{}
|
||||
|
||||
// Emoji parsing is triggered by a ':' char
|
||||
// which appears at the start of the emoji.
|
||||
func (p *emojiParser) Trigger() []byte {
|
||||
return []byte{':'}
|
||||
}
|
||||
|
||||
func (p *emojiParser) Parse(
|
||||
_ ast.Node,
|
||||
block text.Reader,
|
||||
_ parser.Context,
|
||||
) ast.Node {
|
||||
line, segment := block.PeekLine()
|
||||
|
||||
// Ascertain location of emoji in the line
|
||||
// that starts with the trigger character.
|
||||
loc := regexes.EmojiFinder.FindIndex(line)
|
||||
if loc == nil || loc[0] != 0 {
|
||||
// Noop if not found or
|
||||
// not found at start.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Advance the block to
|
||||
// the end of the emoji.
|
||||
block.Advance(loc[1])
|
||||
|
||||
// emoji ast.Node spans from the
|
||||
// beginning of this segment up to
|
||||
// the last character of the emoji.
|
||||
return newEmoji(
|
||||
segment.WithStop(
|
||||
segment.Start + loc[1],
|
||||
),
|
||||
)
|
||||
}
|
|
@ -26,7 +26,7 @@ import (
|
|||
// plaintextParser implements goldmark.parser.BlockParser
|
||||
type plaintextParser struct{}
|
||||
|
||||
var defaultPlaintextParser = &plaintextParser{}
|
||||
var defaultPlaintextParser = new(plaintextParser)
|
||||
|
||||
func newPlaintextParser() parser.BlockParser {
|
||||
return defaultPlaintextParser
|
||||
|
@ -64,7 +64,7 @@ func (b *plaintextParser) CanAcceptIndentedLine() bool {
|
|||
// plaintextParserNoParagraph implements goldmark.parser.BlockParser
|
||||
type plaintextParserNoParagraph struct{}
|
||||
|
||||
var defaultPlaintextParserNoParagraph = &plaintextParserNoParagraph{}
|
||||
var defaultPlaintextParserNoParagraph = new(plaintextParserNoParagraph)
|
||||
|
||||
func newPlaintextParserNoParagraph() parser.BlockParser {
|
||||
return defaultPlaintextParserNoParagraph
|
||||
|
|
|
@ -28,38 +28,55 @@ import (
|
|||
"github.com/yuin/goldmark/renderer/html"
|
||||
)
|
||||
|
||||
func (f *formatter) FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, markdownText string) *FormatResult {
|
||||
result := &FormatResult{
|
||||
Mentions: []*gtsmodel.Mention{},
|
||||
Tags: []*gtsmodel.Tag{},
|
||||
Emojis: []*gtsmodel.Emoji{},
|
||||
}
|
||||
// FromMarkdown fulfils FormatFunc by parsing
|
||||
// the given markdown input into a FormatResult.
|
||||
func (f *Formatter) FromMarkdown(
|
||||
ctx context.Context,
|
||||
parseMention gtsmodel.ParseMentionFunc,
|
||||
authorID string,
|
||||
statusID string,
|
||||
input string,
|
||||
) *FormatResult {
|
||||
result := new(FormatResult)
|
||||
|
||||
// parse markdown text into html, using custom renderer to add hashtag/mention links
|
||||
// Instantiate goldmark parser for
|
||||
// markdown, using custom renderer
|
||||
// to add hashtag/mention links.
|
||||
md := goldmark.New(
|
||||
goldmark.WithRendererOptions(
|
||||
html.WithXHTML(),
|
||||
html.WithHardWraps(),
|
||||
html.WithUnsafe(), // allows raw HTML
|
||||
// Allows raw HTML. We sanitize
|
||||
// at the end so this is OK.
|
||||
html.WithUnsafe(),
|
||||
),
|
||||
goldmark.WithExtensions(
|
||||
&customRenderer{f, ctx, pmf, authorID, statusID, false, result},
|
||||
extension.Linkify, // turns URLs into links
|
||||
&customRenderer{
|
||||
ctx,
|
||||
f.db,
|
||||
parseMention,
|
||||
authorID,
|
||||
statusID,
|
||||
false, // emojiOnly = false.
|
||||
result,
|
||||
},
|
||||
extension.Linkify, // Turns URLs into links.
|
||||
extension.Strikethrough,
|
||||
),
|
||||
)
|
||||
|
||||
var htmlContentBytes bytes.Buffer
|
||||
err := md.Convert([]byte(markdownText), &htmlContentBytes)
|
||||
if err != nil {
|
||||
log.Errorf(ctx, "error formatting markdown to HTML: %s", err)
|
||||
// Parse input into HTML.
|
||||
var htmlBytes bytes.Buffer
|
||||
if err := md.Convert(
|
||||
[]byte(input),
|
||||
&htmlBytes,
|
||||
); err != nil {
|
||||
log.Errorf(ctx, "error formatting markdown input to HTML: %s", err)
|
||||
}
|
||||
result.HTML = htmlContentBytes.String()
|
||||
|
||||
// clean anything dangerous out of the HTML
|
||||
// Clean and shrink HTML.
|
||||
result.HTML = htmlBytes.String()
|
||||
result.HTML = SanitizeToHTML(result.HTML)
|
||||
|
||||
// shrink ray
|
||||
result.HTML = MinifyHTML(result.HTML)
|
||||
|
||||
return result
|
||||
|
|
|
@ -76,10 +76,16 @@ const (
|
|||
mdWithLinkExpected = "<p>Check out this code, i heard it was written by a sloth <a href=\"https://github.com/superseriousbusiness/gotosocial\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://github.com/superseriousbusiness/gotosocial</a></p>"
|
||||
mdObjectInCodeBlock = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps"
|
||||
mdObjectInCodeBlockExpected = "<p><span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span> this is how to mention a user</p><pre><code>@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n</code></pre><p>hope that helps</p>"
|
||||
mdItalicHashtag = "_#hashtag_"
|
||||
mdItalicHashtagExpected = "<p><em><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>"
|
||||
mdItalicHashtags = "_#hashtag #hashtag #hashtag_"
|
||||
mdItalicHashtagsExpected = "<p><em><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>"
|
||||
// Hashtags can be italicized but only with *, not _.
|
||||
mdItalicHashtag = "*#hashtag*"
|
||||
mdItalicHashtagExpected = "<p><em><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>"
|
||||
mdItalicHashtags = "*#hashtag #hashtag #hashtag*"
|
||||
mdItalicHashtagsExpected = "<p><em><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>"
|
||||
// Hashtags can end with or contain _ but not start with it.
|
||||
mdUnderscorePrefixHashtag = "_#hashtag"
|
||||
mdUnderscorePrefixHashtagExpected = "<p>_#hashtag</p>"
|
||||
mdUnderscoreSuffixHashtag = "#hashtag_"
|
||||
mdUnderscoreSuffixHashtagExpected = "<p><a href=\"http://localhost:8080/tags/hashtag_\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag_</span></a></p>"
|
||||
// BEWARE: sneaky unicode business going on.
|
||||
// the first ö is one rune, the second ö is an o with a combining diacritic.
|
||||
mdUnnormalizedHashtag = "#hellöthere #hellöthere"
|
||||
|
@ -194,6 +200,19 @@ func (suite *MarkdownTestSuite) TestParseItalicHashtags() {
|
|||
suite.Equal(mdItalicHashtagsExpected, formatted.HTML)
|
||||
}
|
||||
|
||||
func (suite *MarkdownTestSuite) TestParseHashtagUnderscorePrefix() {
|
||||
formatted := suite.FromMarkdown(mdUnderscorePrefixHashtag)
|
||||
suite.Equal(mdUnderscorePrefixHashtagExpected, formatted.HTML)
|
||||
suite.Empty(formatted.Tags)
|
||||
}
|
||||
|
||||
func (suite *MarkdownTestSuite) TestParseHashtagUnderscoreSuffix() {
|
||||
formatted := suite.FromMarkdown(mdUnderscoreSuffixHashtag)
|
||||
suite.Equal(mdUnderscoreSuffixHashtagExpected, formatted.HTML)
|
||||
suite.NotEmpty(formatted.Tags)
|
||||
suite.Equal("hashtag_", formatted.Tags[0].Name)
|
||||
}
|
||||
|
||||
func (suite *MarkdownTestSuite) TestParseUnnormalizedHashtag() {
|
||||
formatted := suite.FromMarkdown(mdUnnormalizedHashtag)
|
||||
suite.Equal(mdUnnormalizedHashtagExpected, formatted.HTML)
|
||||
|
|
|
@ -20,7 +20,6 @@ package text
|
|||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/superseriousbusiness/gotosocial/internal/util"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
|
@ -36,8 +35,10 @@ const (
|
|||
//
|
||||
// Finally, it will do a check on the normalized string to
|
||||
// ensure that it's below maximumHashtagLength chars, and
|
||||
// contains only unicode letters and numbers. If this passes,
|
||||
// returned bool will be true.
|
||||
// contains only letters, numbers, and underscores (and not
|
||||
// *JUST* underscores).
|
||||
//
|
||||
// If all this passes, returned bool will be true.
|
||||
func NormalizeHashtag(text string) (string, bool) {
|
||||
// This normalization is specifically to avoid cases
|
||||
// where visually-identical hashtags are stored with
|
||||
|
@ -47,14 +48,31 @@ func NormalizeHashtag(text string) (string, bool) {
|
|||
// with parent characters to form regular letter symbols.
|
||||
normalized := norm.NFC.String(strings.TrimPrefix(text, "#"))
|
||||
|
||||
// Validate normalized.
|
||||
ok := true
|
||||
// Validate normalized result.
|
||||
var (
|
||||
notJustUnderscores = false
|
||||
onlyPermittedChars = true
|
||||
lengthOK = true
|
||||
)
|
||||
|
||||
for i, r := range normalized {
|
||||
if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) {
|
||||
ok = false
|
||||
if r != '_' {
|
||||
// This isn't an underscore,
|
||||
// so the whole hashtag isn't
|
||||
// just underscores.
|
||||
notJustUnderscores = true
|
||||
}
|
||||
|
||||
if i >= maximumHashtagLength {
|
||||
lengthOK = false
|
||||
break
|
||||
}
|
||||
|
||||
if !isPermittedInHashtag(r) {
|
||||
onlyPermittedChars = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return normalized, ok
|
||||
return normalized, (lengthOK && onlyPermittedChars && notJustUnderscores)
|
||||
}
|
||||
|
|
|
@ -30,66 +30,150 @@ import (
|
|||
"github.com/yuin/goldmark/util"
|
||||
)
|
||||
|
||||
func (f *formatter) fromPlain(
|
||||
// FromPlain fulfils FormatFunc by parsing
|
||||
// the given plaintext input into a FormatResult.
|
||||
func (f *Formatter) FromPlain(
|
||||
ctx context.Context,
|
||||
ptParser parser.Parser,
|
||||
pmf gtsmodel.ParseMentionFunc,
|
||||
parseMention gtsmodel.ParseMentionFunc,
|
||||
authorID string,
|
||||
statusID string,
|
||||
plain string,
|
||||
input string,
|
||||
) *FormatResult {
|
||||
result := &FormatResult{
|
||||
Mentions: []*gtsmodel.Mention{},
|
||||
Tags: []*gtsmodel.Tag{},
|
||||
Emojis: []*gtsmodel.Emoji{},
|
||||
}
|
||||
|
||||
// Parse markdown into html, using custom renderer
|
||||
// to add hashtag/mention links and emoji images.
|
||||
md := goldmark.New(
|
||||
goldmark.WithRendererOptions(
|
||||
html.WithXHTML(),
|
||||
html.WithHardWraps(),
|
||||
),
|
||||
goldmark.WithParser(ptParser), // use parser we were passed
|
||||
goldmark.WithExtensions(
|
||||
&customRenderer{f, ctx, pmf, authorID, statusID, false, result},
|
||||
extension.Linkify, // turns URLs into links
|
||||
),
|
||||
)
|
||||
|
||||
var htmlContentBytes bytes.Buffer
|
||||
if err := md.Convert([]byte(plain), &htmlContentBytes); err != nil {
|
||||
log.Errorf(ctx, "error formatting plaintext to HTML: %s", err)
|
||||
}
|
||||
result.HTML = htmlContentBytes.String()
|
||||
|
||||
// Clean anything dangerous out of resulting HTML.
|
||||
result.HTML = SanitizeToHTML(result.HTML)
|
||||
|
||||
// Shrink ray!
|
||||
result.HTML = MinifyHTML(result.HTML)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (f *formatter) FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
|
||||
ptParser := parser.NewParser(
|
||||
// Initialize standard block parser
|
||||
// that wraps result in <p> tags.
|
||||
plainTextParser := parser.NewParser(
|
||||
parser.WithBlockParsers(
|
||||
util.Prioritized(newPlaintextParser(), 500),
|
||||
),
|
||||
)
|
||||
|
||||
return f.fromPlain(ctx, ptParser, pmf, authorID, statusID, plain)
|
||||
return f.fromPlain(
|
||||
ctx,
|
||||
plainTextParser,
|
||||
false, // emojiOnly = false
|
||||
parseMention,
|
||||
authorID,
|
||||
statusID,
|
||||
input,
|
||||
)
|
||||
}
|
||||
|
||||
func (f *formatter) FromPlainNoParagraph(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
|
||||
ptParser := parser.NewParser(
|
||||
// FromPlainNoParagraph fulfils FormatFunc by parsing
|
||||
// the given plaintext input into a FormatResult.
|
||||
//
|
||||
// Unlike FromPlain, it will not wrap the resulting
|
||||
// HTML in <p> tags, making it useful for parsing
|
||||
// short fragments of text that oughtn't be formally
|
||||
// wrapped as a paragraph.
|
||||
func (f *Formatter) FromPlainNoParagraph(
|
||||
ctx context.Context,
|
||||
parseMention gtsmodel.ParseMentionFunc,
|
||||
authorID string,
|
||||
statusID string,
|
||||
input string,
|
||||
) *FormatResult {
|
||||
// Initialize block parser that
|
||||
// doesn't wrap result in <p> tags.
|
||||
plainTextParser := parser.NewParser(
|
||||
parser.WithBlockParsers(
|
||||
// Initialize block parser that doesn't wrap in <p> tags.
|
||||
util.Prioritized(newPlaintextParserNoParagraph(), 500),
|
||||
),
|
||||
)
|
||||
|
||||
return f.fromPlain(ctx, ptParser, pmf, authorID, statusID, plain)
|
||||
return f.fromPlain(
|
||||
ctx,
|
||||
plainTextParser,
|
||||
false, // emojiOnly = false
|
||||
parseMention,
|
||||
authorID,
|
||||
statusID,
|
||||
input,
|
||||
)
|
||||
}
|
||||
|
||||
// FromPlainEmojiOnly fulfils FormatFunc by parsing
|
||||
// the given plaintext input into a FormatResult.
|
||||
//
|
||||
// Unlike FromPlain, it will only parse emojis with
|
||||
// the custom renderer, leaving aside mentions and tags.
|
||||
func (f *Formatter) FromPlainEmojiOnly(
|
||||
ctx context.Context,
|
||||
parseMention gtsmodel.ParseMentionFunc,
|
||||
authorID string,
|
||||
statusID string,
|
||||
input string,
|
||||
) *FormatResult {
|
||||
// Initialize standard block parser
|
||||
// that wraps result in <p> tags.
|
||||
plainTextParser := parser.NewParser(
|
||||
parser.WithBlockParsers(
|
||||
util.Prioritized(newPlaintextParser(), 500),
|
||||
),
|
||||
)
|
||||
|
||||
return f.fromPlain(
|
||||
ctx,
|
||||
plainTextParser,
|
||||
true, // emojiOnly = true
|
||||
parseMention,
|
||||
authorID,
|
||||
statusID,
|
||||
input,
|
||||
)
|
||||
}
|
||||
|
||||
// fromPlain parses the given input text
|
||||
// using the given plainTextParser, and
|
||||
// returns the result.
|
||||
func (f *Formatter) fromPlain(
|
||||
ctx context.Context,
|
||||
plainTextParser parser.Parser,
|
||||
emojiOnly bool,
|
||||
parseMention gtsmodel.ParseMentionFunc,
|
||||
authorID string,
|
||||
statusID string,
|
||||
input string,
|
||||
) *FormatResult {
|
||||
result := new(FormatResult)
|
||||
|
||||
// Instantiate goldmark parser for
|
||||
// plaintext, using custom renderer
|
||||
// to add hashtag/mention links.
|
||||
md := goldmark.New(
|
||||
goldmark.WithRendererOptions(
|
||||
html.WithXHTML(),
|
||||
html.WithHardWraps(),
|
||||
),
|
||||
// Use whichever plaintext
|
||||
// parser we were passed.
|
||||
goldmark.WithParser(plainTextParser),
|
||||
goldmark.WithExtensions(
|
||||
&customRenderer{
|
||||
ctx,
|
||||
f.db,
|
||||
parseMention,
|
||||
authorID,
|
||||
statusID,
|
||||
emojiOnly,
|
||||
result,
|
||||
},
|
||||
extension.Linkify, // Turns URLs into links.
|
||||
),
|
||||
)
|
||||
|
||||
// Parse input into HTML.
|
||||
var htmlBytes bytes.Buffer
|
||||
if err := md.Convert(
|
||||
[]byte(input),
|
||||
&htmlBytes,
|
||||
); err != nil {
|
||||
log.Errorf(ctx, "error formatting plaintext input to HTML: %s", err)
|
||||
}
|
||||
|
||||
// Clean and shrink HTML.
|
||||
result.HTML = htmlBytes.String()
|
||||
result.HTML = SanitizeToHTML(result.HTML)
|
||||
result.HTML = MinifyHTML(result.HTML)
|
||||
|
||||
return result
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ package text_test
|
|||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/suite"
|
||||
)
|
||||
|
||||
|
@ -85,7 +84,7 @@ that link shouldn't come out formatted as a mention!`
|
|||
func (suite *PlainTestSuite) TestDeriveMentionsEmpty() {
|
||||
statusText := ``
|
||||
menchies := suite.FromPlain(statusText).Mentions
|
||||
assert.Len(suite.T(), menchies, 0)
|
||||
suite.Len(menchies, 0)
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
|
||||
|
@ -98,7 +97,9 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
|
|||
here's a link with a fragment: https://example.org/whatever#ahhh
|
||||
here's another link with a fragment: https://example.org/whatever/#ahhh
|
||||
|
||||
(#ThisShouldAlsoWork) #this_should_be_split
|
||||
(#ThisShouldAlsoWork) #this_should_not_be_split
|
||||
|
||||
#__ <- just underscores, shouldn't work
|
||||
|
||||
#111111 thisalsoshouldn'twork#### ##
|
||||
|
||||
|
@ -108,24 +109,24 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
|
|||
`
|
||||
|
||||
tags := suite.FromPlain(statusText).Tags
|
||||
assert.Len(suite.T(), tags, 13)
|
||||
assert.Equal(suite.T(), "testing123", tags[0].Name)
|
||||
assert.Equal(suite.T(), "also", tags[1].Name)
|
||||
assert.Equal(suite.T(), "thisshouldwork", tags[2].Name)
|
||||
assert.Equal(suite.T(), "dupe", tags[3].Name)
|
||||
assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4].Name)
|
||||
assert.Equal(suite.T(), "this", tags[5].Name)
|
||||
assert.Equal(suite.T(), "111111", tags[6].Name)
|
||||
assert.Equal(suite.T(), "alimentación", tags[7].Name)
|
||||
assert.Equal(suite.T(), "saúde", tags[8].Name)
|
||||
assert.Equal(suite.T(), "lävistää", tags[9].Name)
|
||||
assert.Equal(suite.T(), "ö", tags[10].Name)
|
||||
assert.Equal(suite.T(), "네", tags[11].Name)
|
||||
assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[12].Name)
|
||||
suite.Len(tags, 13)
|
||||
suite.Equal("testing123", tags[0].Name)
|
||||
suite.Equal("also", tags[1].Name)
|
||||
suite.Equal("thisshouldwork", tags[2].Name)
|
||||
suite.Equal("dupe", tags[3].Name)
|
||||
suite.Equal("ThisShouldAlsoWork", tags[4].Name)
|
||||
suite.Equal("this_should_not_be_split", tags[5].Name)
|
||||
suite.Equal("111111", tags[6].Name)
|
||||
suite.Equal("alimentación", tags[7].Name)
|
||||
suite.Equal("saúde", tags[8].Name)
|
||||
suite.Equal("lävistää", tags[9].Name)
|
||||
suite.Equal("ö", tags[10].Name)
|
||||
suite.Equal("네", tags[11].Name)
|
||||
suite.Equal("ThisOneIsThirteyCharactersLong", tags[12].Name)
|
||||
|
||||
statusText = `#올빼미 hej`
|
||||
tags = suite.FromPlain(statusText).Tags
|
||||
assert.Equal(suite.T(), "올빼미", tags[0].Name)
|
||||
suite.Equal("올빼미", tags[0].Name)
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestDeriveMultiple() {
|
||||
|
@ -137,20 +138,20 @@ func (suite *PlainTestSuite) TestDeriveMultiple() {
|
|||
|
||||
f := suite.FromPlain(statusText)
|
||||
|
||||
assert.Len(suite.T(), f.Mentions, 1)
|
||||
assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString)
|
||||
suite.Len(f.Mentions, 1)
|
||||
suite.Equal("@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString)
|
||||
|
||||
assert.Len(suite.T(), f.Tags, 1)
|
||||
assert.Equal(suite.T(), "hashtag", f.Tags[0].Name)
|
||||
suite.Len(f.Tags, 1)
|
||||
suite.Equal("hashtag", f.Tags[0].Name)
|
||||
|
||||
assert.Len(suite.T(), f.Emojis, 0)
|
||||
suite.Len(f.Emojis, 0)
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestZalgoHashtag() {
|
||||
statusText := `yo who else loves #praying to #z̸͉̅a̸͚͋l̵͈̊g̸̫͌ỏ̷̪?`
|
||||
f := suite.FromPlain(statusText)
|
||||
assert.Len(suite.T(), f.Tags, 1)
|
||||
assert.Equal(suite.T(), "praying", f.Tags[0].Name)
|
||||
suite.Len(f.Tags, 1)
|
||||
suite.Equal("praying", f.Tags[0].Name)
|
||||
}
|
||||
|
||||
func TestPlainTestSuite(t *testing.T) {
|
||||
|
|
|
@ -1,161 +0,0 @@
|
|||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package text
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"github.com/superseriousbusiness/gotosocial/internal/db"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/id"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/uris"
|
||||
)
|
||||
|
||||
// replaceMention takes a string in the form @username@domain.com or @localusername
|
||||
func (r *customRenderer) replaceMention(text string) string {
|
||||
mention, err := r.parseMention(r.ctx, text, r.accountID, r.statusID)
|
||||
if err != nil {
|
||||
log.Errorf(r.ctx, "error parsing mention %s from status: %s", text, err)
|
||||
return text
|
||||
}
|
||||
|
||||
if r.statusID != "" {
|
||||
if err := r.f.db.PutMention(r.ctx, mention); err != nil {
|
||||
log.Errorf(r.ctx, "error putting mention in db: %s", err)
|
||||
return text
|
||||
}
|
||||
}
|
||||
|
||||
// only append if it's not been listed yet
|
||||
listed := false
|
||||
for _, m := range r.result.Mentions {
|
||||
if mention.ID == m.ID {
|
||||
listed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !listed {
|
||||
r.result.Mentions = append(r.result.Mentions, mention)
|
||||
}
|
||||
|
||||
if mention.TargetAccount == nil {
|
||||
// Fetch mention target account if not yet populated.
|
||||
mention.TargetAccount, err = r.f.db.GetAccountByID(
|
||||
gtscontext.SetBarebones(r.ctx),
|
||||
mention.TargetAccountID,
|
||||
)
|
||||
if err != nil {
|
||||
log.Errorf(r.ctx, "error populating mention target account: %v", err)
|
||||
return text
|
||||
}
|
||||
}
|
||||
|
||||
// The mention's target is our target
|
||||
targetAccount := mention.TargetAccount
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
// replace the mention with the formatted mention content
|
||||
// <span class="h-card"><a href="targetAccount.URL" class="u-url mention">@<span>targetAccount.Username</span></a></span>
|
||||
b.WriteString(`<span class="h-card"><a href="`)
|
||||
b.WriteString(targetAccount.URL)
|
||||
b.WriteString(`" class="u-url mention">@<span>`)
|
||||
b.WriteString(targetAccount.Username)
|
||||
b.WriteString(`</span></a></span>`)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// replaceHashtag takes a string in the form #SomeHashtag, and will normalize
|
||||
// it before adding it to the db (or just getting it from the db if it already
|
||||
// exists) and turning it into HTML.
|
||||
func (r *customRenderer) replaceHashtag(text string) string {
|
||||
normalized, ok := NormalizeHashtag(text)
|
||||
if !ok {
|
||||
// Not a valid hashtag.
|
||||
return text
|
||||
}
|
||||
|
||||
tag, err := r.getOrCreateHashtag(normalized)
|
||||
if err != nil {
|
||||
log.Errorf(r.ctx, "error generating hashtags from status: %s", err)
|
||||
return text
|
||||
}
|
||||
|
||||
// Append tag to result if not done already.
|
||||
//
|
||||
// This prevents multiple uses of a tag in
|
||||
// the same status generating multiple
|
||||
// entries for the same tag in result.
|
||||
func() {
|
||||
for _, t := range r.result.Tags {
|
||||
if tag.ID == t.ID {
|
||||
// Already appended.
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Not appended yet.
|
||||
r.result.Tags = append(r.result.Tags, tag)
|
||||
}()
|
||||
|
||||
// Replace tag with the formatted tag content, eg. `#SomeHashtag` becomes:
|
||||
// `<a href="https://example.org/tags/somehashtag" class="mention hashtag" rel="tag">#<span>SomeHashtag</span></a>`
|
||||
var b strings.Builder
|
||||
b.WriteString(`<a href="`)
|
||||
b.WriteString(uris.GenerateURIForTag(normalized))
|
||||
b.WriteString(`" class="mention hashtag" rel="tag">#<span>`)
|
||||
b.WriteString(normalized)
|
||||
b.WriteString(`</span></a>`)
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (r *customRenderer) getOrCreateHashtag(name string) (*gtsmodel.Tag, error) {
|
||||
var (
|
||||
tag *gtsmodel.Tag
|
||||
err error
|
||||
)
|
||||
|
||||
// Check if we have a tag with this name already.
|
||||
tag, err = r.f.db.GetTagByName(r.ctx, name)
|
||||
if err != nil && !errors.Is(err, db.ErrNoEntries) {
|
||||
return nil, gtserror.Newf("db error getting tag %s: %w", name, err)
|
||||
}
|
||||
|
||||
if tag != nil {
|
||||
// We had it!
|
||||
return tag, nil
|
||||
}
|
||||
|
||||
// We didn't have a tag with
|
||||
// this name, create one.
|
||||
tag = >smodel.Tag{
|
||||
ID: id.NewULID(),
|
||||
Name: name,
|
||||
}
|
||||
|
||||
if err = r.f.db.PutTag(r.ctx, tag); err != nil {
|
||||
return nil, gtserror.Newf("db error putting new tag %s: %w", name, err)
|
||||
}
|
||||
|
||||
return tag, nil
|
||||
}
|
51
internal/text/util.go
Normal file
51
internal/text/util.go
Normal file
|
@ -0,0 +1,51 @@
|
|||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package text
|
||||
|
||||
import "unicode"
|
||||
|
||||
func isPlausiblyInHashtag(r rune) bool {
|
||||
// Marks are allowed during parsing
|
||||
// prior to normalization, but not after,
|
||||
// since they may be combined into letters
|
||||
// during normalization.
|
||||
return unicode.IsMark(r) ||
|
||||
isPermittedInHashtag(r)
|
||||
}
|
||||
|
||||
func isPermittedInHashtag(r rune) bool {
|
||||
return unicode.IsLetter(r) ||
|
||||
unicode.IsNumber(r) ||
|
||||
r == '_'
|
||||
}
|
||||
|
||||
// isHashtagBoundary returns true if rune r
|
||||
// is a recognized break character for before
|
||||
// or after a #hashtag.
|
||||
func isHashtagBoundary(r rune) bool {
|
||||
return unicode.IsSpace(r) ||
|
||||
(unicode.IsPunct(r) && r != '_')
|
||||
}
|
||||
|
||||
// isMentionBoundary returns true if rune r
|
||||
// is a recognized break character for before
|
||||
// or after a @mention.
|
||||
func isMentionBoundary(r rune) bool {
|
||||
return unicode.IsSpace(r) ||
|
||||
unicode.IsPunct(r)
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package util
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
)
|
||||
|
||||
func IsPlausiblyInHashtag(r rune) bool {
|
||||
// Marks are allowed during parsing, prior to normalization, but not after,
|
||||
// since they may be combined into letters during normalization.
|
||||
return unicode.IsLetter(r) || unicode.IsNumber(r) || unicode.IsMark(r)
|
||||
}
|
||||
|
||||
func IsPermittedInHashtag(r rune) bool {
|
||||
return unicode.IsLetter(r) || unicode.IsNumber(r)
|
||||
}
|
||||
|
||||
// Decides where to break before or after a #hashtag or @mention
|
||||
func IsMentionOrHashtagBoundary(r rune) bool {
|
||||
return unicode.IsSpace(r) || unicode.IsPunct(r)
|
||||
}
|
Loading…
Reference in a new issue