diff --git a/internal/api/client/account/accountupdate_test.go b/internal/api/client/account/accountupdate_test.go index 8e35c236..c6d07d9d 100644 --- a/internal/api/client/account/accountupdate_test.go +++ b/internal/api/client/account/accountupdate_test.go @@ -75,6 +75,7 @@ func (suite *AccountUpdateTestSuite) TestAccountUpdateCredentialsPATCHHandler() // check the returned api model account // fields should be updated suite.Equal("

this is my new bio read it and weep

", apimodelAccount.Note) + suite.Equal(newBio, apimodelAccount.Source.Note) } func (suite *AccountUpdateTestSuite) TestAccountUpdateCredentialsPATCHHandlerUnlockLock() { @@ -194,6 +195,7 @@ func (suite *AccountUpdateTestSuite) TestAccountUpdateCredentialsPATCHHandlerGet // check the returned api model account // fields should be updated suite.Equal("

this is my new bio read it and weep

", apimodelAccount.Note) + suite.Equal(newBio, apimodelAccount.Source.Note) } func (suite *AccountUpdateTestSuite) TestAccountUpdateCredentialsPATCHHandlerTwoFields() { @@ -235,6 +237,7 @@ func (suite *AccountUpdateTestSuite) TestAccountUpdateCredentialsPATCHHandlerTwo // check the returned api model account // fields should be updated suite.Equal("

this is my new bio read it and weep

", apimodelAccount.Note) + suite.Equal(newBio, apimodelAccount.Source.Note) suite.True(apimodelAccount.Locked) } @@ -280,6 +283,7 @@ func (suite *AccountUpdateTestSuite) TestAccountUpdateCredentialsPATCHHandlerWit suite.Equal("updated zork display name!!!", apimodelAccount.DisplayName) suite.True(apimodelAccount.Locked) suite.Empty(apimodelAccount.Note) + suite.Empty(apimodelAccount.Source.Note) // header values... // should be set diff --git a/internal/api/client/account/accountverify_test.go b/internal/api/client/account/accountverify_test.go index 2732a618..4d682088 100644 --- a/internal/api/client/account/accountverify_test.go +++ b/internal/api/client/account/accountverify_test.go @@ -86,6 +86,7 @@ func (suite *AccountVerifyTestSuite) TestAccountVerifyGet() { suite.WithinDuration(time.Now(), lastStatusAt, 5*time.Minute) suite.EqualValues(gtsmodel.VisibilityPublic, apimodelAccount.Source.Privacy) suite.Equal(testAccount.Language, apimodelAccount.Source.Language) + suite.Equal(testAccount.NoteRaw, apimodelAccount.Source.Note) } func TestAccountVerifyTestSuite(t *testing.T) { diff --git a/internal/cache/account.go b/internal/cache/account.go index 02ec7150..8dbb0784 100644 --- a/internal/cache/account.go +++ b/internal/cache/account.go @@ -134,6 +134,7 @@ func copyAccount(account *gtsmodel.Account) *gtsmodel.Account { DisplayName: account.DisplayName, Fields: account.Fields, Note: account.Note, + NoteRaw: account.NoteRaw, Memorial: account.Memorial, MovedToAccountID: account.MovedToAccountID, CreatedAt: account.CreatedAt, diff --git a/internal/db/bundb/migrations/20220506110822_add_account_raw_note.go b/internal/db/bundb/migrations/20220506110822_add_account_raw_note.go new file mode 100644 index 00000000..3574c9ce --- /dev/null +++ b/internal/db/bundb/migrations/20220506110822_add_account_raw_note.go @@ -0,0 +1,50 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package migrations + +import ( + "context" + + gtsmodel "github.com/superseriousbusiness/gotosocial/internal/db/bundb/migrations/20211113114307_init" + "github.com/uptrace/bun" +) + +func init() { + up := func(ctx context.Context, db *bun.DB) error { + return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + // add account raw_note column + expr := tx. + NewAddColumn(). + Model(>smodel.Account{}). + ColumnExpr("note_raw") + _, err := expr.Exec(ctx) + return err + }) + } + + down := func(ctx context.Context, db *bun.DB) error { + return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + return nil + }) + } + + if err := Migrations.Register(up, down); err != nil { + panic(err) + } +} diff --git a/internal/gtsmodel/account.go b/internal/gtsmodel/account.go index ecbeed63..13b9d2cf 100644 --- a/internal/gtsmodel/account.go +++ b/internal/gtsmodel/account.go @@ -43,6 +43,7 @@ type Account struct { DisplayName string `validate:"-" bun:""` // DisplayName for this account. Can be empty, then just the Username will be used for display purposes. Fields []Field `validate:"-"` // a key/value map of fields that this account has added to their profile Note string `validate:"-" bun:""` // A note that this account has on their profile (ie., the account's bio/description of themselves) + NoteRaw string `validate:"-" bun:""` // The raw contents of .Note without conversion to HTML, only available when requester = target Memorial bool `validate:"-" bun:",default:false"` // Is this a memorial account, ie., has the user passed away? AlsoKnownAs string `validate:"omitempty,ulid" bun:"type:CHAR(26),nullzero"` // This account is associated with x account id (TODO: migrate to be AlsoKnownAsID) MovedToAccountID string `validate:"omitempty,ulid" bun:"type:CHAR(26),nullzero"` // This account has moved this account id in the database diff --git a/internal/processing/account/update.go b/internal/processing/account/update.go index 738aa8c8..3d6bbae2 100644 --- a/internal/processing/account/update.go +++ b/internal/processing/account/update.go @@ -60,10 +60,17 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, form if err := validate.Note(*form.Note); err != nil { return nil, err } + + // Set the raw note before processing + account.NoteRaw = *form.Note + + // Process note to generate a valid HTML representation note, err := p.processNote(ctx, *form.Note, account.ID) if err != nil { return nil, err } + + // Set updated HTML-ified note account.Note = note } diff --git a/internal/processing/status/create.go b/internal/processing/status/create.go index 1e93af16..add8a5bc 100644 --- a/internal/processing/status/create.go +++ b/internal/processing/status/create.go @@ -39,13 +39,11 @@ func (p *processor) Create(ctx context.Context, account *gtsmodel.Account, appli if err != nil { return nil, gtserror.NewErrorInternalError(err) } - thisStatusURI := fmt.Sprintf("%s/%s", accountURIs.StatusesURI, thisStatusID) - thisStatusURL := fmt.Sprintf("%s/%s", accountURIs.StatusesURL, thisStatusID) newStatus := >smodel.Status{ ID: thisStatusID, - URI: thisStatusURI, - URL: thisStatusURL, + URI: accountURIs.StatusesURI + "/" + thisStatusID, + URL: accountURIs.StatusesURL + "/" + thisStatusID, CreatedAt: time.Now(), UpdatedAt: time.Now(), Local: true, diff --git a/internal/processing/status/util.go b/internal/processing/status/util.go index 5de66af8..190d88f1 100644 --- a/internal/processing/status/util.go +++ b/internal/processing/status/util.go @@ -242,11 +242,11 @@ func (p *processor) ProcessTags(ctx context.Context, form *apimodel.AdvancedStat } func (p *processor) ProcessEmojis(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { - emojis := []string{} gtsEmojis, err := p.db.EmojiStringsToEmojis(ctx, util.DeriveEmojisFromText(form.Status)) if err != nil { return fmt.Errorf("error generating emojis from status: %s", err) } + emojis := make([]string, 0, len(gtsEmojis)) for _, e := range gtsEmojis { emojis = append(emojis, e.ID) } diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go index 9302b544..f05f9b39 100644 --- a/internal/regexes/regexes.go +++ b/internal/regexes/regexes.go @@ -19,8 +19,12 @@ package regexes import ( + "bytes" "fmt" "regexp" + "sync" + + "mvdan.cc/xurls/v2" ) const ( @@ -47,6 +51,16 @@ const ( ) var ( + schemes = `(http|https)://` + // LinkScheme captures http/https schemes in URLs. + LinkScheme = func() *regexp.Regexp { + rgx, err := xurls.StrictMatchingScheme(schemes) + if err != nil { + panic(err) + } + return rgx + }() + mentionName = `^@(\w+)(?:@([a-zA-Z0-9_\-\.:]+))?$` // MentionName captures the username and domain part from a mention string // such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols) @@ -58,7 +72,7 @@ var ( MentionFinder = regexp.MustCompile(mentionFinder) // hashtag regex can be played with here: https://regex101.com/r/bPxeca/1 - hashtagFinder = fmt.Sprintf(`(?:^|\n|\s)(#[a-zA-Z0-9]{1,%d})(?:\b)`, maximumHashtagLength) + hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[a-zA-Z0-9]{1,%d})(?:#|\b)`, maximumHashtagLength) // HashtagFinder finds possible hashtags in a string. // It returns just the string part of the hashtag, not the # symbol. HashtagFinder = regexp.MustCompile(hashtagFinder) @@ -68,7 +82,7 @@ var ( EmojiShortcode = regexp.MustCompile(fmt.Sprintf("^%s$", emojiShortcode)) // emoji regex can be played with here: https://regex101.com/r/478XGM/1 - emojiFinderString = fmt.Sprintf(`(?:\B)?:(%s):(?:\B)?`, emojiShortcode) + emojiFinderString = fmt.Sprintf(`(?:\b)?:(%s):(?:\b)?`, emojiShortcode) // EmojiFinder extracts emoji strings from a piece of text. EmojiFinder = regexp.MustCompile(emojiFinderString) @@ -134,3 +148,21 @@ var ( // from eg /users/example_username/blocks/01F7XT5JZW1WMVSW1KADS8PVDH BlockPath = regexp.MustCompile(blockPath) ) + +// bufpool is a memory pool of byte buffers for use in our regex utility functions. +var bufpool = sync.Pool{ + New: func() any { + buf := bytes.NewBuffer(make([]byte, 0, 512)) + return buf + }, +} + +// ReplaceAllStringFunc will call through to .ReplaceAllStringFunc in the provided regex, but provide you a clean byte buffer for optimized string writes. +func ReplaceAllStringFunc(rgx *regexp.Regexp, src string, repl func(match string, buf *bytes.Buffer) string) string { + buf := bufpool.Get().(*bytes.Buffer) //nolint + defer bufpool.Put(buf) + return rgx.ReplaceAllStringFunc(src, func(match string) string { + buf.Reset() // reset use + return repl(match, buf) + }) +} diff --git a/internal/text/common.go b/internal/text/common.go index 4148ece1..12c0f1df 100644 --- a/internal/text/common.go +++ b/internal/text/common.go @@ -19,10 +19,11 @@ package text import ( + "bytes" "context" - "fmt" "html" "strings" + "unicode" "github.com/sirupsen/logrus" @@ -63,38 +64,40 @@ func postformat(in string) string { } func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string { - return regexes.HashtagFinder.ReplaceAllStringFunc(in, func(match string) string { + return regexes.ReplaceAllStringFunc(regexes.HashtagFinder, in, func(match string, buf *bytes.Buffer) string { // we have a match matchTrimmed := strings.TrimSpace(match) - tagAsEntered := strings.Split(matchTrimmed, "#")[1] + tagAsEntered := matchTrimmed[1:] // check through the tags to find what we're matching for _, tag := range tags { - - if strings.EqualFold(matchTrimmed, fmt.Sprintf("#%s", tag.Name)) { - // replace the #tag with the formatted tag content - tagContent := fmt.Sprintf(``, tag.URL, tagAsEntered) - - // in case the match picked up any previous space or newlines (thanks to the regex), include them as well - if strings.HasPrefix(match, " ") { - tagContent = " " + tagContent - } else if strings.HasPrefix(match, "\n") { - tagContent = "\n" + tagContent + if strings.EqualFold(tagAsEntered, tag.Name) { + // Add any dropped space from match + if unicode.IsSpace(rune(match[0])) { + buf.WriteByte(match[0]) } - // done - return tagContent + // replace the #tag with the formatted tag content + // ` + buf.WriteString(``) + return buf.String() } } + // the match wasn't in the list of tags for whatever reason, so just return the match as we found it so nothing changes return match }) } func (f *formatter) ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string { - return regexes.MentionFinder.ReplaceAllStringFunc(in, func(match string) string { - // we have a match + return regexes.ReplaceAllStringFunc(regexes.MentionFinder, in, func(match string, buf *bytes.Buffer) string { + // we have a match, trim any spaces matchTrimmed := strings.TrimSpace(match) + // check through mentions to find what we're matching for _, menchie := range mentions { if strings.EqualFold(matchTrimmed, menchie.NameString) { @@ -107,22 +110,26 @@ func (f *formatter) ReplaceMentions(ctx context.Context, in string, mentions []* } menchie.TargetAccount = a } + + // The mention's target is our target targetAccount := menchie.TargetAccount - // replace the mention with the formatted mention content - mentionContent := fmt.Sprintf(`@%s`, targetAccount.URL, targetAccount.Username) - - // in case the match picked up any previous space or newlines (thanks to the regex), include them as well - if strings.HasPrefix(match, " ") { - mentionContent = " " + mentionContent - } else if strings.HasPrefix(match, "\n") { - mentionContent = "\n" + mentionContent + // Add any dropped space from match + if unicode.IsSpace(rune(match[0])) { + buf.WriteByte(match[0]) } - // done - return mentionContent + // replace the mention with the formatted mention content + // @targetAccount.Username + buf.WriteString(`@`) + buf.WriteString(targetAccount.Username) + buf.WriteString(``) + return buf.String() } } + // the match wasn't in the list of mentions for whatever reason, so just return the match as we found it so nothing changes return match }) diff --git a/internal/text/link.go b/internal/text/link.go index d8d83df6..f72c451f 100644 --- a/internal/text/link.go +++ b/internal/text/link.go @@ -19,34 +19,28 @@ package text import ( + "bytes" "context" - "fmt" "net/url" + "strings" - "mvdan.cc/xurls/v2" + "github.com/superseriousbusiness/gotosocial/internal/regexes" ) -// schemes is the regex for schemes we accept when looking for links. -// Basically, we accept https or http. -var schemes = `(((http|https))://)` - // FindLinks parses the given string looking for recognizable URLs (including scheme). // It returns a list of those URLs, without changing the string, or an error if something goes wrong. // If no URLs are found within the given string, an empty slice and nil will be returned. -func FindLinks(in string) ([]*url.URL, error) { - rxStrict, err := xurls.StrictMatchingScheme(schemes) - if err != nil { - return nil, err - } - - urls := []*url.URL{} +func FindLinks(in string) []*url.URL { + var urls []*url.URL // bail already if we don't find anything - found := rxStrict.FindAllString(in, -1) + found := regexes.LinkScheme.FindAllString(in, -1) if len(found) == 0 { - return urls, nil + return nil } + urlmap := map[string]struct{}{} + // for each string we find, we want to parse it into a URL if we can // if we fail to parse it, just ignore this match and continue for _, f := range found { @@ -54,29 +48,18 @@ func FindLinks(in string) ([]*url.URL, error) { if err != nil { continue } - urls = append(urls, u) - } - // deduplicate the URLs - urlsDeduped := []*url.URL{} + // Calculate string + ustr := u.String() - for _, u := range urls { - if !contains(urlsDeduped, u) { - urlsDeduped = append(urlsDeduped, u) + if _, ok := urlmap[ustr]; !ok { + // Has not been encountered yet + urls = append(urls, u) + urlmap[ustr] = struct{}{} } } - return urlsDeduped, nil -} - -// contains checks if the given url is already within a slice of URLs -func contains(urls []*url.URL, url *url.URL) bool { - for _, u := range urls { - if u.String() == url.String() { - return true - } - } - return false + return urls } // ReplaceLinks replaces all detected links in a piece of text with their HTML (href) equivalents. @@ -84,33 +67,20 @@ func contains(urls []*url.URL, url *url.URL) bool { // href will end up double-formatted, if the text you pass here contains one or more hrefs already. // To avoid this, you should sanitize any HTML out of text before you pass it into this function. func (f *formatter) ReplaceLinks(ctx context.Context, in string) string { - rxStrict, err := xurls.StrictMatchingScheme(schemes) - if err != nil { - panic(err) - } - - replaced := rxStrict.ReplaceAllStringFunc(in, func(urlString string) string { + return regexes.ReplaceAllStringFunc(regexes.LinkScheme, in, func(urlString string, buf *bytes.Buffer) string { thisURL, err := url.Parse(urlString) if err != nil { return urlString // we can't parse it as a URL so don't replace it } - - shortString := thisURL.Hostname() - - if thisURL.Path != "" { - shortString += thisURL.Path - } - - if thisURL.Fragment != "" { - shortString = shortString + "#" + thisURL.Fragment - } - - if thisURL.RawQuery != "" { - shortString = shortString + "?" + thisURL.RawQuery - } - - replacement := fmt.Sprintf(`%s`, urlString, shortString) - return replacement + // urlString + urlString = thisURL.String() + buf.WriteString(``) + urlString = strings.TrimPrefix(urlString, thisURL.Scheme) + urlString = strings.TrimPrefix(urlString, "://") + buf.WriteString(urlString) + buf.WriteString(``) + return buf.String() }) - return replaced } diff --git a/internal/text/link_test.go b/internal/text/link_test.go index e524315e..24484e02 100644 --- a/internal/text/link_test.go +++ b/internal/text/link_test.go @@ -75,9 +75,7 @@ func (suite *LinkTestSuite) TestParseSimple() { } func (suite *LinkTestSuite) TestParseURLsFromText1() { - urls, err := text.FindLinks(text1) - - assert.NoError(suite.T(), err) + urls := text.FindLinks(text1) assert.Equal(suite.T(), "https://example.org/link/to/something#fragment", urls[0].String()) assert.Equal(suite.T(), "http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) @@ -86,16 +84,14 @@ func (suite *LinkTestSuite) TestParseURLsFromText1() { } func (suite *LinkTestSuite) TestParseURLsFromText2() { - urls, err := text.FindLinks(text2) - assert.NoError(suite.T(), err) + urls := text.FindLinks(text2) // assert length 1 because the found links will be deduplicated assert.Len(suite.T(), urls, 1) } func (suite *LinkTestSuite) TestParseURLsFromText3() { - urls, err := text.FindLinks(text3) - assert.NoError(suite.T(), err) + urls := text.FindLinks(text3) // assert length 0 because `mailto:` isn't accepted assert.Len(suite.T(), urls, 0) @@ -112,7 +108,7 @@ Here's link number two: example.orghttps//google.com <-- this shouldn't work either, but it does?! OK +example.orghttps://google.com <-- this shouldn't work either, but it does?! OK `, replaced) } diff --git a/internal/text/plain.go b/internal/text/plain.go index 453f4dd3..4ef3b371 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -20,12 +20,17 @@ package text import ( "context" - "fmt" "strings" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) +// breakReplacer replaces new-lines with HTML breaks. +var breakReplacer = strings.NewReplacer( + "\r\n", "
", + "\n", "
", +) + func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { content := preformat(plain) @@ -42,10 +47,10 @@ func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gts content = f.ReplaceMentions(ctx, content, mentions) // replace newlines with breaks - content = strings.ReplaceAll(content, "\n", "
") + content = breakReplacer.Replace(content) // wrap the whole thing in a pee - content = fmt.Sprintf(`

%s

`, content) + content = `

` + content + `

` return postformat(content) } diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index d8b7c17d..2b7b50d5 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -53,7 +53,6 @@ func (suite *PlainTestSuite) TestParseSimple() { } func (suite *PlainTestSuite) TestParseWithTag() { - foundTags := []*gtsmodel.Tag{ suite.testTags["welcome"], } @@ -63,7 +62,6 @@ func (suite *PlainTestSuite) TestParseWithTag() { } func (suite *PlainTestSuite) TestParseMoreComplex() { - foundTags := []*gtsmodel.Tag{ suite.testTags["Hashtag"], } diff --git a/internal/typeutils/internaltofrontend.go b/internal/typeutils/internaltofrontend.go index 39a2861f..55527925 100644 --- a/internal/typeutils/internaltofrontend.go +++ b/internal/typeutils/internaltofrontend.go @@ -58,7 +58,7 @@ func (c *converter) AccountToAPIAccountSensitive(ctx context.Context, a *gtsmode Privacy: c.VisToAPIVis(ctx, a.Privacy), Sensitive: a.Sensitive, Language: a.Language, - Note: a.Note, + Note: a.NoteRaw, Fields: apiAccount.Fields, FollowRequestsCount: frc, } @@ -703,7 +703,6 @@ func (c *converter) NotificationToAPINotification(ctx context.Context, n *gtsmod } func (c *converter) DomainBlockToAPIDomainBlock(ctx context.Context, b *gtsmodel.DomainBlock, export bool) (*model.DomainBlock, error) { - domainBlock := &model.DomainBlock{ Domain: b.Domain, PublicComment: b.PublicComment, diff --git a/testrig/testmodels.go b/testrig/testmodels.go index cc32aa39..8894e562 100644 --- a/testrig/testmodels.go +++ b/testrig/testmodels.go @@ -319,6 +319,7 @@ func NewTestAccounts() map[string]*gtsmodel.Account { DisplayName: "", Fields: []gtsmodel.Field{}, Note: "", + NoteRaw: "", Memorial: false, MovedToAccountID: "", CreatedAt: time.Now().Add(-72 * time.Hour), @@ -356,7 +357,8 @@ func NewTestAccounts() map[string]*gtsmodel.Account { HeaderMediaAttachmentID: "01PFPMWK2FF0D9WMHEJHR07C3Q", DisplayName: "original zork (he/they)", Fields: []gtsmodel.Field{}, - Note: "hey yo this is my profile!", + Note: "

hey yo this is my profile!

", + NoteRaw: "hey yo this is my profile!", Memorial: false, MovedToAccountID: "", CreatedAt: time.Now().Add(-48 * time.Hour), @@ -394,7 +396,8 @@ func NewTestAccounts() map[string]*gtsmodel.Account { HeaderMediaAttachmentID: "", DisplayName: "happy little turtle :3", Fields: []gtsmodel.Field{}, - Note: "i post about things that concern me", + Note: "

i post about things that concern me

", + NoteRaw: "i post about things that concern me", Memorial: false, MovedToAccountID: "", CreatedAt: time.Now().Add(-190 * time.Hour),