diff --git a/internal/api/client/statuses/statuscreate_test.go b/internal/api/client/statuses/statuscreate_test.go
index ab45af35..6802558e 100644
--- a/internal/api/client/statuses/statuscreate_test.go
+++ b/internal/api/client/statuses/statuscreate_test.go
@@ -219,7 +219,7 @@ func (suite *StatusCreateTestSuite) TestPostAnotherNewStatus() {
err = json.Unmarshal(b, statusReply)
suite.NoError(err)
- suite.Equal("
#test alright, should be able to post #links with fragments in them now, let's see........
docs.gotosocial.org/en/latest/user_guide/posts/#links
#gotosocial
(tobi remember to pull the docker image challenge)
", statusReply.Content)
+ suite.Equal("#test alright, should be able to post #links with fragments in them now, let's see........
https://docs.gotosocial.org/en/latest/user_guide/posts/#links
#gotosocial
(tobi remember to pull the docker image challenge)
", statusReply.Content)
}
func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() {
@@ -252,7 +252,7 @@ func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() {
suite.NoError(err)
suite.Equal("", statusReply.SpoilerText)
- suite.Equal("here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:
here's an emoji that isn't in the db: :test_emoji:
", statusReply.Content)
+ suite.Equal("here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:
here's an emoji that isn't in the db: :test_emoji:
", statusReply.Content)
suite.Len(statusReply.Emojis, 1)
apiEmoji := statusReply.Emojis[0]
@@ -371,7 +371,7 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() {
suite.NoError(err)
suite.Equal("", statusResponse.SpoilerText)
- suite.Equal("here's an image attachment
", statusResponse.Content)
+ suite.Equal("here's an image attachment
", statusResponse.Content)
suite.False(statusResponse.Sensitive)
suite.Equal(apimodel.VisibilityPublic, statusResponse.Visibility)
diff --git a/internal/db/bundb/bundb.go b/internal/db/bundb/bundb.go
index 0ab1d1b8..2f7a8a02 100644
--- a/internal/db/bundb/bundb.go
+++ b/internal/db/bundb/bundb.go
@@ -473,43 +473,40 @@ func sqlitePragmas(ctx context.Context, conn *DBConn) error {
CONVERSION FUNCTIONS
*/
-func (dbService *DBService) TagStringsToTags(ctx context.Context, tags []string, originAccountID string) ([]*gtsmodel.Tag, error) {
+func (dbService *DBService) TagStringToTag(ctx context.Context, t string, originAccountID string) (*gtsmodel.Tag, error) {
protocol := config.GetProtocol()
host := config.GetHost()
+ now := time.Now()
- newTags := []*gtsmodel.Tag{}
- for _, t := range tags {
- tag := >smodel.Tag{}
- // we can use selectorinsert here to create the new tag if it doesn't exist already
- // inserted will be true if this is a new tag we just created
- if err := dbService.conn.NewSelect().Model(tag).Where("LOWER(?) = LOWER(?)", bun.Ident("name"), t).Scan(ctx); err != nil {
- if err == sql.ErrNoRows {
- // tag doesn't exist yet so populate it
- newID, err := id.NewRandomULID()
- if err != nil {
- return nil, err
- }
- tag.ID = newID
- tag.URL = fmt.Sprintf("%s://%s/tags/%s", protocol, host, t)
- tag.Name = t
- tag.FirstSeenFromAccountID = originAccountID
- tag.CreatedAt = time.Now()
- tag.UpdatedAt = time.Now()
- useable := true
- tag.Useable = &useable
- listable := true
- tag.Listable = &listable
- } else {
- return nil, fmt.Errorf("error getting tag with name %s: %s", t, err)
- }
- }
-
- // bail already if the tag isn't useable
- if !*tag.Useable {
- continue
- }
- tag.LastStatusAt = time.Now()
- newTags = append(newTags, tag)
+ tag := >smodel.Tag{}
+ // we can use selectorinsert here to create the new tag if it doesn't exist already
+ // inserted will be true if this is a new tag we just created
+ if err := dbService.conn.NewSelect().Model(tag).Where("LOWER(?) = LOWER(?)", bun.Ident("name"), t).Scan(ctx); err != nil && err != sql.ErrNoRows {
+ return nil, fmt.Errorf("error getting tag with name %s: %s", t, err)
}
- return newTags, nil
+
+ if tag.ID == "" {
+ // tag doesn't exist yet so populate it
+ newID, err := id.NewRandomULID()
+ if err != nil {
+ return nil, err
+ }
+ tag.ID = newID
+ tag.URL = protocol + "://" + host + "/tags/" + t
+ tag.Name = t
+ tag.FirstSeenFromAccountID = originAccountID
+ tag.CreatedAt = now
+ tag.UpdatedAt = now
+ useable := true
+ tag.Useable = &useable
+ listable := true
+ tag.Listable = &listable
+ }
+
+ // bail already if the tag isn't useable
+ if !*tag.Useable {
+ return nil, fmt.Errorf("tag %s is not useable", t)
+ }
+ tag.LastStatusAt = now
+ return tag, nil
}
diff --git a/internal/db/db.go b/internal/db/db.go
index aa1929da..b66b2114 100644
--- a/internal/db/db.go
+++ b/internal/db/db.go
@@ -52,12 +52,12 @@ type DB interface {
USEFUL CONVERSION FUNCTIONS
*/
- // TagStringsToTags takes a slice of deduplicated, lowercase tags in the form "somehashtag", which have been
+ // TagStringToTag takes a lowercase tag in the form "somehashtag", which has been
// used in a status. It takes the id of the account that wrote the status, and the id of the status itself, and then
- // returns a slice of *apimodel.Tag corresponding to the given tags. If the tag already exists in database, that tag
+ // returns an *apimodel.Tag corresponding to the given tags. If the tag already exists in database, that tag
// will be returned. Otherwise a pointer to a new tag struct will be created and returned.
//
- // Note: this func doesn't/shouldn't do any manipulation of the tags in the DB, it's just for checking
+ // Note: this func doesn't/shouldn't do any manipulation of tags in the DB, it's just for checking
// if they exist in the db already, and conveniently returning them, or creating new tag structs.
- TagStringsToTags(ctx context.Context, tags []string, originAccountID string) ([]*gtsmodel.Tag, error)
+ TagStringToTag(ctx context.Context, tag string, originAccountID string) (*gtsmodel.Tag, error)
}
diff --git a/internal/processing/account/update.go b/internal/processing/account/update.go
index 055d1f0e..c7939034 100644
--- a/internal/processing/account/update.go
+++ b/internal/processing/account/update.go
@@ -27,14 +27,12 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/ap"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config"
- "github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/messages"
"github.com/superseriousbusiness/gotosocial/internal/text"
- "github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/superseriousbusiness/gotosocial/internal/validate"
)
@@ -47,14 +45,20 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, form
account.Bot = form.Bot
}
- var updateEmojis bool
+ account.Emojis = []*gtsmodel.Emoji{}
+ account.EmojiIDs = []string{}
if form.DisplayName != nil {
if err := validate.DisplayName(*form.DisplayName); err != nil {
return nil, gtserror.NewErrorBadRequest(err)
}
account.DisplayName = text.SanitizePlaintext(*form.DisplayName)
- updateEmojis = true
+
+ formatResult := p.formatter.FromPlainEmojiOnly(ctx, p.parseMention, account.ID, "", account.DisplayName)
+ for _, emoji := range formatResult.Emojis {
+ account.Emojis = append(account.Emojis, emoji)
+ account.EmojiIDs = append(account.EmojiIDs, emoji.ID)
+ }
}
if form.Note != nil {
@@ -66,36 +70,19 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, form
account.NoteRaw = *form.Note
// Process note to generate a valid HTML representation
- note, err := p.processNote(ctx, *form.Note, account)
- if err != nil {
- return nil, gtserror.NewErrorBadRequest(err)
+ var f text.FormatFunc
+ if account.StatusFormat == "markdown" {
+ f = p.formatter.FromMarkdown
+ } else {
+ f = p.formatter.FromPlain
}
+ formatted := f(ctx, p.parseMention, account.ID, "", *form.Note)
// Set updated HTML-ified note
- account.Note = note
- updateEmojis = true
- }
-
- if updateEmojis {
- // account emojis -- treat the sanitized display name and raw
- // note like one long text for the purposes of deriving emojis
- accountEmojiShortcodes := util.DeriveEmojisFromText(account.DisplayName + "\n\n" + account.NoteRaw)
- account.Emojis = make([]*gtsmodel.Emoji, 0, len(accountEmojiShortcodes))
- account.EmojiIDs = make([]string, 0, len(accountEmojiShortcodes))
-
- for _, shortcode := range accountEmojiShortcodes {
- emoji, err := p.db.GetEmojiByShortcodeDomain(ctx, shortcode, "")
- if err != nil {
- if err != db.ErrNoEntries {
- log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err)
- }
- continue
- }
-
- if *emoji.VisibleInPicker && !*emoji.Disabled {
- account.Emojis = append(account.Emojis, emoji)
- account.EmojiIDs = append(account.EmojiIDs, emoji.ID)
- }
+ account.Note = formatted.HTML
+ for _, emoji := range formatted.Emojis {
+ account.Emojis = append(account.Emojis, emoji)
+ account.EmojiIDs = append(account.EmojiIDs, emoji.ID)
}
}
@@ -240,35 +227,3 @@ func (p *processor) UpdateHeader(ctx context.Context, header *multipart.FileHead
return processingMedia.LoadAttachment(ctx)
}
-
-func (p *processor) processNote(ctx context.Context, note string, account *gtsmodel.Account) (string, error) {
- if note == "" {
- return "", nil
- }
-
- tagStrings := util.DeriveHashtagsFromText(note)
- tags, err := p.db.TagStringsToTags(ctx, tagStrings, account.ID)
- if err != nil {
- return "", err
- }
-
- mentionStrings := util.DeriveMentionNamesFromText(note)
- mentions := []*gtsmodel.Mention{}
- for _, mentionString := range mentionStrings {
- mention, err := p.parseMention(ctx, mentionString, account.ID, "")
- if err != nil {
- continue
- }
- mentions = append(mentions, mention)
- }
-
- // TODO: support emojis in account notes
- // emojiStrings := util.DeriveEmojisFromText(note)
- // emojis, err := p.db.EmojiStringsToEmojis(ctx, emojiStrings)
-
- if account.StatusFormat == "markdown" {
- return p.formatter.FromMarkdown(ctx, note, mentions, tags, nil), nil
- }
-
- return p.formatter.FromPlain(ctx, note, mentions, tags), nil
-}
diff --git a/internal/processing/account/update_test.go b/internal/processing/account/update_test.go
index e4b04607..8ebce788 100644
--- a/internal/processing/account/update_test.go
+++ b/internal/processing/account/update_test.go
@@ -76,8 +76,8 @@ func (suite *AccountUpdateTestSuite) TestAccountUpdateWithMention() {
var (
locked = true
displayName = "new display name"
- note = "#hello here i am!\n\ngo check out @1happyturtle, they have a cool account!\n"
- noteExpected = "#hello here i am!
go check out @1happyturtle, they have a cool account!
"
+ note = "#hello here i am!\n\ngo check out @1happyturtle, they have a cool account!"
+ noteExpected = "#hello here i am!
go check out @1happyturtle, they have a cool account!
"
)
form := &apimodel.UpdateCredentialsRequest{
diff --git a/internal/processing/status/create.go b/internal/processing/status/create.go
index 1a6177ef..9e9d24c8 100644
--- a/internal/processing/status/create.go
+++ b/internal/processing/status/create.go
@@ -76,18 +76,6 @@ func (p *processor) Create(ctx context.Context, account *gtsmodel.Account, appli
return nil, gtserror.NewErrorInternalError(err)
}
- if err := p.ProcessMentions(ctx, form, account.ID, newStatus); err != nil {
- return nil, gtserror.NewErrorInternalError(err)
- }
-
- if err := p.ProcessTags(ctx, form, account.ID, newStatus); err != nil {
- return nil, gtserror.NewErrorInternalError(err)
- }
-
- if err := p.ProcessEmojis(ctx, form, account.ID, newStatus); err != nil {
- return nil, gtserror.NewErrorInternalError(err)
- }
-
if err := p.ProcessContent(ctx, form, account.ID, newStatus); err != nil {
return nil, gtserror.NewErrorInternalError(err)
}
diff --git a/internal/processing/status/status.go b/internal/processing/status/status.go
index 379b0661..56b8b23e 100644
--- a/internal/processing/status/status.go
+++ b/internal/processing/status/status.go
@@ -67,9 +67,6 @@ type Processor interface {
ProcessReplyToID(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, thisAccountID string, status *gtsmodel.Status) gtserror.WithCode
ProcessMediaIDs(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, thisAccountID string, status *gtsmodel.Status) gtserror.WithCode
ProcessLanguage(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountDefaultLanguage string, status *gtsmodel.Status) error
- ProcessMentions(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error
- ProcessTags(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error
- ProcessEmojis(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error
ProcessContent(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error
}
diff --git a/internal/processing/status/util.go b/internal/processing/status/util.go
index 2847d1c4..1115219c 100644
--- a/internal/processing/status/util.go
+++ b/internal/processing/status/util.go
@@ -28,8 +28,7 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
- "github.com/superseriousbusiness/gotosocial/internal/log"
- "github.com/superseriousbusiness/gotosocial/internal/util"
+ "github.com/superseriousbusiness/gotosocial/internal/text"
)
func (p *processor) ProcessVisibility(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountDefaultVis gtsmodel.Visibility, status *gtsmodel.Status) error {
@@ -212,80 +211,6 @@ func (p *processor) ProcessLanguage(ctx context.Context, form *apimodel.Advanced
return nil
}
-func (p *processor) ProcessMentions(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
- mentionedAccountNames := util.DeriveMentionNamesFromText(form.Status)
- mentions := []*gtsmodel.Mention{}
- mentionIDs := []string{}
-
- for _, mentionedAccountName := range mentionedAccountNames {
- gtsMention, err := p.parseMention(ctx, mentionedAccountName, accountID, status.ID)
- if err != nil {
- log.Errorf("ProcessMentions: error parsing mention %s from status: %s", mentionedAccountName, err)
- continue
- }
-
- if err := p.db.Put(ctx, gtsMention); err != nil {
- log.Errorf("ProcessMentions: error putting mention in db: %s", err)
- }
-
- mentions = append(mentions, gtsMention)
- mentionIDs = append(mentionIDs, gtsMention.ID)
- }
-
- // add full populated gts menchies to the status for passing them around conveniently
- status.Mentions = mentions
- // add just the ids of the mentioned accounts to the status for putting in the db
- status.MentionIDs = mentionIDs
-
- return nil
-}
-
-func (p *processor) ProcessTags(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
- tags := []string{}
- gtsTags, err := p.db.TagStringsToTags(ctx, util.DeriveHashtagsFromText(form.Status), accountID)
- if err != nil {
- return fmt.Errorf("error generating hashtags from status: %s", err)
- }
- for _, tag := range gtsTags {
- if err := p.db.Put(ctx, tag); err != nil {
- if !errors.Is(err, db.ErrAlreadyExists) {
- return fmt.Errorf("error putting tags in db: %s", err)
- }
- }
- tags = append(tags, tag.ID)
- }
- // add full populated gts tags to the status for passing them around conveniently
- status.Tags = gtsTags
- // add just the ids of the used tags to the status for putting in the db
- status.TagIDs = tags
- return nil
-}
-
-func (p *processor) ProcessEmojis(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
- // for each emoji shortcode in the text, check if it's an enabled
- // emoji on this instance, and if so, add it to the status
- emojiShortcodes := util.DeriveEmojisFromText(form.SpoilerText + "\n\n" + form.Status)
- status.Emojis = make([]*gtsmodel.Emoji, 0, len(emojiShortcodes))
- status.EmojiIDs = make([]string, 0, len(emojiShortcodes))
-
- for _, shortcode := range emojiShortcodes {
- emoji, err := p.db.GetEmojiByShortcodeDomain(ctx, shortcode, "")
- if err != nil {
- if err != db.ErrNoEntries {
- log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err)
- }
- continue
- }
-
- if *emoji.VisibleInPicker && !*emoji.Disabled {
- status.Emojis = append(status.Emojis, emoji)
- status.EmojiIDs = append(status.EmojiIDs, emoji.ID)
- }
- }
-
- return nil
-}
-
func (p *processor) ProcessContent(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
// if there's nothing in the status at all we can just return early
if form.Status == "" {
@@ -311,16 +236,43 @@ func (p *processor) ProcessContent(ctx context.Context, form *apimodel.AdvancedS
}
// parse content out of the status depending on what format has been submitted
- var formatted string
+ var f text.FormatFunc
switch form.Format {
case apimodel.StatusFormatPlain:
- formatted = p.formatter.FromPlain(ctx, form.Status, status.Mentions, status.Tags)
+ f = p.formatter.FromPlain
case apimodel.StatusFormatMarkdown:
- formatted = p.formatter.FromMarkdown(ctx, form.Status, status.Mentions, status.Tags, status.Emojis)
+ f = p.formatter.FromMarkdown
default:
return fmt.Errorf("format %s not recognised as a valid status format", form.Format)
}
+ formatted := f(ctx, p.parseMention, accountID, status.ID, form.Status)
- status.Content = formatted
+ // add full populated gts {mentions, tags, emojis} to the status for passing them around conveniently
+ // add just their ids to the status for putting in the db
+ status.Mentions = formatted.Mentions
+ status.MentionIDs = make([]string, 0, len(formatted.Mentions))
+ for _, gtsmention := range formatted.Mentions {
+ status.MentionIDs = append(status.MentionIDs, gtsmention.ID)
+ }
+
+ status.Tags = formatted.Tags
+ status.TagIDs = make([]string, 0, len(formatted.Tags))
+ for _, gtstag := range formatted.Tags {
+ status.TagIDs = append(status.TagIDs, gtstag.ID)
+ }
+
+ status.Emojis = formatted.Emojis
+ status.EmojiIDs = make([]string, 0, len(formatted.Emojis))
+ for _, gtsemoji := range formatted.Emojis {
+ status.EmojiIDs = append(status.EmojiIDs, gtsemoji.ID)
+ }
+
+ spoilerformatted := p.formatter.FromPlainEmojiOnly(ctx, p.parseMention, accountID, status.ID, form.SpoilerText)
+ for _, gtsemoji := range spoilerformatted.Emojis {
+ status.Emojis = append(status.Emojis, gtsemoji)
+ status.EmojiIDs = append(status.EmojiIDs, gtsemoji.ID)
+ }
+
+ status.Content = formatted.HTML
return nil
}
diff --git a/internal/processing/status/util_test.go b/internal/processing/status/util_test.go
index d4be4337..acd82318 100644
--- a/internal/processing/status/util_test.go
+++ b/internal/processing/status/util_test.go
@@ -29,22 +29,23 @@ import (
)
const (
- statusText1 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText"
- statusText1ExpectedFull = "Another test @foss_satan
#Hashtag
Text
"
- statusText1ExpectedPartial = "Another test @foss_satan
#Hashtag
Text
"
- statusText2 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\n#hashTAG"
- status2TextExpectedFull = "Another test @foss_satan
#Hashtag
#hashTAG
"
- status2TextExpectedPartial = "Another test @foss_satan
#Hashtag
#hashTAG
"
+ statusText1 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText"
+ statusText1Expected = "Another test @foss_satan
#Hashtag
Text
"
+ statusText2 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\n#hashTAG"
+ status2TextExpected = "Another test @foss_satan
#Hashtag
#hashTAG
"
)
type UtilTestSuite struct {
StatusStandardTestSuite
}
-func (suite *UtilTestSuite) TestProcessMentions1() {
+func (suite *UtilTestSuite) TestProcessContent1() {
+ /*
+ TEST PREPARATION
+ */
+ // we need to partially process the status first since processContent expects a status with some stuff already set on it
creatingAccount := suite.testAccounts["local_account_1"]
mentionedAccount := suite.testAccounts["remote_account_1"]
-
form := &apimodel.AdvancedStatusCreateForm{
StatusCreateRequest: apimodel.StatusCreateRequest{
Status: statusText1,
@@ -70,8 +71,13 @@ func (suite *UtilTestSuite) TestProcessMentions1() {
ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ",
}
- err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
+ /*
+ ACTUAL TEST
+ */
+
+ err := suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)
suite.NoError(err)
+ suite.Equal(statusText1Expected, status.Content)
suite.Len(status.Mentions, 1)
newMention := status.Mentions[0]
@@ -88,102 +94,13 @@ func (suite *UtilTestSuite) TestProcessMentions1() {
suite.Equal(newMention.ID, status.MentionIDs[0])
}
-func (suite *UtilTestSuite) TestProcessContentFull1() {
+func (suite *UtilTestSuite) TestProcessContent2() {
/*
TEST PREPARATION
*/
// we need to partially process the status first since processContent expects a status with some stuff already set on it
- creatingAccount := suite.testAccounts["local_account_1"]
- form := &apimodel.AdvancedStatusCreateForm{
- StatusCreateRequest: apimodel.StatusCreateRequest{
- Status: statusText1,
- MediaIDs: []string{},
- Poll: nil,
- InReplyToID: "",
- Sensitive: false,
- SpoilerText: "",
- Visibility: apimodel.VisibilityPublic,
- ScheduledAt: "",
- Language: "en",
- Format: apimodel.StatusFormatPlain,
- },
- AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{
- Federated: nil,
- Boostable: nil,
- Replyable: nil,
- Likeable: nil,
- },
- }
-
- status := >smodel.Status{
- ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ",
- }
-
- err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
- suite.NoError(err)
- suite.Empty(status.Content) // shouldn't be set yet
-
- err = suite.status.ProcessTags(context.Background(), form, creatingAccount.ID, status)
- suite.NoError(err)
- suite.Empty(status.Content) // shouldn't be set yet
-
- /*
- ACTUAL TEST
- */
-
- err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)
- suite.NoError(err)
- suite.Equal(statusText1ExpectedFull, status.Content)
-}
-
-func (suite *UtilTestSuite) TestProcessContentPartial1() {
- /*
- TEST PREPARATION
- */
- // we need to partially process the status first since processContent expects a status with some stuff already set on it
- creatingAccount := suite.testAccounts["local_account_1"]
- form := &apimodel.AdvancedStatusCreateForm{
- StatusCreateRequest: apimodel.StatusCreateRequest{
- Status: statusText1,
- MediaIDs: []string{},
- Poll: nil,
- InReplyToID: "",
- Sensitive: false,
- SpoilerText: "",
- Visibility: apimodel.VisibilityPublic,
- ScheduledAt: "",
- Language: "en",
- Format: apimodel.StatusFormatPlain,
- },
- AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{
- Federated: nil,
- Boostable: nil,
- Replyable: nil,
- Likeable: nil,
- },
- }
-
- status := >smodel.Status{
- ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ",
- }
-
- err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
- suite.NoError(err)
- suite.Empty(status.Content) // shouldn't be set yet
-
- /*
- ACTUAL TEST
- */
-
- err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)
- suite.NoError(err)
- suite.Equal(statusText1ExpectedPartial, status.Content)
-}
-
-func (suite *UtilTestSuite) TestProcessMentions2() {
creatingAccount := suite.testAccounts["local_account_1"]
mentionedAccount := suite.testAccounts["remote_account_1"]
-
form := &apimodel.AdvancedStatusCreateForm{
StatusCreateRequest: apimodel.StatusCreateRequest{
Status: statusText2,
@@ -209,9 +126,15 @@ func (suite *UtilTestSuite) TestProcessMentions2() {
ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ",
}
- err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
+ /*
+ ACTUAL TEST
+ */
+
+ err := suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)
suite.NoError(err)
+ suite.Equal(status2TextExpected, status.Content)
+
suite.Len(status.Mentions, 1)
newMention := status.Mentions[0]
suite.Equal(mentionedAccount.ID, newMention.TargetAccountID)
@@ -227,96 +150,6 @@ func (suite *UtilTestSuite) TestProcessMentions2() {
suite.Equal(newMention.ID, status.MentionIDs[0])
}
-func (suite *UtilTestSuite) TestProcessContentFull2() {
- /*
- TEST PREPARATION
- */
- // we need to partially process the status first since processContent expects a status with some stuff already set on it
- creatingAccount := suite.testAccounts["local_account_1"]
- form := &apimodel.AdvancedStatusCreateForm{
- StatusCreateRequest: apimodel.StatusCreateRequest{
- Status: statusText2,
- MediaIDs: []string{},
- Poll: nil,
- InReplyToID: "",
- Sensitive: false,
- SpoilerText: "",
- Visibility: apimodel.VisibilityPublic,
- ScheduledAt: "",
- Language: "en",
- Format: apimodel.StatusFormatPlain,
- },
- AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{
- Federated: nil,
- Boostable: nil,
- Replyable: nil,
- Likeable: nil,
- },
- }
-
- status := >smodel.Status{
- ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ",
- }
-
- err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
- suite.NoError(err)
- suite.Empty(status.Content) // shouldn't be set yet
-
- err = suite.status.ProcessTags(context.Background(), form, creatingAccount.ID, status)
- suite.NoError(err)
- suite.Empty(status.Content) // shouldn't be set yet
-
- /*
- ACTUAL TEST
- */
-
- err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)
- suite.NoError(err)
-
- suite.Equal(status2TextExpectedFull, status.Content)
-}
-
-func (suite *UtilTestSuite) TestProcessContentPartial2() {
- /*
- TEST PREPARATION
- */
- // we need to partially process the status first since processContent expects a status with some stuff already set on it
- creatingAccount := suite.testAccounts["local_account_1"]
- form := &apimodel.AdvancedStatusCreateForm{
- StatusCreateRequest: apimodel.StatusCreateRequest{
- Status: statusText2,
- MediaIDs: []string{},
- Poll: nil,
- InReplyToID: "",
- Sensitive: false,
- SpoilerText: "",
- Visibility: apimodel.VisibilityPublic,
- ScheduledAt: "",
- Language: "en",
- Format: apimodel.StatusFormatPlain,
- },
- AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{
- Federated: nil,
- Boostable: nil,
- Replyable: nil,
- Likeable: nil,
- },
- }
-
- status := >smodel.Status{
- ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ",
- }
-
- err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status)
- suite.NoError(err)
- suite.Empty(status.Content)
-
- err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)
- suite.NoError(err)
-
- suite.Equal(status2TextExpectedPartial, status.Content)
-}
-
func TestUtilTestSuite(t *testing.T) {
suite.Run(t, new(UtilTestSuite))
}
diff --git a/internal/text/common.go b/internal/text/common.go
deleted file mode 100644
index 2293ca3f..00000000
--- a/internal/text/common.go
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- GoToSocial
- Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-*/
-
-package text
-
-import (
- "bytes"
- "context"
- "strings"
- "unicode"
-
- "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
- "github.com/superseriousbusiness/gotosocial/internal/log"
- "github.com/superseriousbusiness/gotosocial/internal/regexes"
- "github.com/superseriousbusiness/gotosocial/internal/util"
-)
-
-func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string {
- spans := util.FindHashtagSpansInText(in)
-
- if len(spans) == 0 {
- return in
- }
-
- var b strings.Builder
- i := 0
-
-spans:
- for _, t := range spans {
- b.WriteString(in[i:t.First])
- i = t.Second
- tagAsEntered := in[t.First+1 : t.Second]
-
- for _, tag := range tags {
- if strings.EqualFold(tagAsEntered, tag.Name) {
- // replace the #tag with the formatted tag content
- // `#tagAsEntered
- b.WriteString(`#`)
- b.WriteString(tagAsEntered)
- b.WriteString(``)
- continue spans
- }
- }
-
- b.WriteString(in[t.First:t.Second])
- }
-
- // Get the last bits.
- i = spans[len(spans)-1].Second
- b.WriteString(in[i:])
-
- return b.String()
-}
-
-func (f *formatter) ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string {
- return regexes.ReplaceAllStringFunc(regexes.MentionFinder, in, func(match string, buf *bytes.Buffer) string {
- // we have a match, trim any spaces
- matchTrimmed := strings.TrimSpace(match)
-
- // check through mentions to find what we're matching
- for _, menchie := range mentions {
- if strings.EqualFold(matchTrimmed, menchie.NameString) {
- // make sure we have an account attached to this mention
- if menchie.TargetAccount == nil {
- a, err := f.db.GetAccountByID(ctx, menchie.TargetAccountID)
- if err != nil {
- log.Errorf("error getting account with id %s from the db: %s", menchie.TargetAccountID, err)
- return match
- }
- menchie.TargetAccount = a
- }
-
- // The mention's target is our target
- targetAccount := menchie.TargetAccount
-
- // Add any dropped space from match
- if unicode.IsSpace(rune(match[0])) {
- buf.WriteByte(match[0])
- }
-
- // replace the mention with the formatted mention content
- // @targetAccount.Username
- buf.WriteString(`@`)
- buf.WriteString(targetAccount.Username)
- buf.WriteString(``)
- return buf.String()
- }
- }
-
- // the match wasn't in the list of mentions for whatever reason, so just return the match as we found it so nothing changes
- return match
- })
-}
diff --git a/internal/text/common_test.go b/internal/text/common_test.go
deleted file mode 100644
index 3949226c..00000000
--- a/internal/text/common_test.go
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- GoToSocial
- Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-*/
-
-package text_test
-
-import (
- "context"
- "testing"
- "time"
-
- "github.com/stretchr/testify/suite"
- "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
-)
-
-const (
- replaceMentionsString = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText"
- replaceMentionsExpected = "Another test @foss_satan\n\n#Hashtag\n\nText"
- replaceHashtagsExpected = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText"
- replaceHashtagsAfterMentionsExpected = "Another test @foss_satan\n\n#Hashtag\n\nText"
- replaceMentionsWithLinkString = "Another test @foss_satan@fossbros-anonymous.io\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060"
- replaceMentionsWithLinkStringExpected = "Another test @foss_satan\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060"
- replaceMentionsWithLinkSelfString = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR"
- replaceMemtionsWithLinkSelfExpected = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR"
-)
-
-type CommonTestSuite struct {
- TextStandardTestSuite
-}
-
-func (suite *CommonTestSuite) TestReplaceMentions() {
- foundMentions := []*gtsmodel.Mention{
- suite.testMentions["zork_mention_foss_satan"],
- }
-
- f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsString, foundMentions)
- suite.Equal(replaceMentionsExpected, f)
-}
-
-func (suite *CommonTestSuite) TestReplaceHashtags() {
- foundTags := []*gtsmodel.Tag{
- suite.testTags["Hashtag"],
- }
-
- f := suite.formatter.ReplaceTags(context.Background(), replaceMentionsString, foundTags)
-
- suite.Equal(replaceHashtagsExpected, f)
-}
-
-func (suite *CommonTestSuite) TestReplaceHashtagsAfterReplaceMentions() {
- foundTags := []*gtsmodel.Tag{
- suite.testTags["Hashtag"],
- }
-
- f := suite.formatter.ReplaceTags(context.Background(), replaceMentionsExpected, foundTags)
-
- suite.Equal(replaceHashtagsAfterMentionsExpected, f)
-}
-
-func (suite *CommonTestSuite) TestReplaceMentionsWithLink() {
- foundMentions := []*gtsmodel.Mention{
- suite.testMentions["zork_mention_foss_satan"],
- }
-
- f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsWithLinkString, foundMentions)
- suite.Equal(replaceMentionsWithLinkStringExpected, f)
-}
-
-func (suite *CommonTestSuite) TestReplaceMentionsWithLinkSelf() {
- mentioningAccount := suite.testAccounts["local_account_1"]
-
- foundMentions := []*gtsmodel.Mention{
- {
- ID: "01FGXKN5F815DVFVD53PN9NYM6",
- CreatedAt: time.Now(),
- UpdatedAt: time.Now(),
- StatusID: "01FGXKP0S5THQXFC1D9R141DDR",
- OriginAccountID: mentioningAccount.ID,
- TargetAccountID: mentioningAccount.ID,
- NameString: "@the_mighty_zork",
- TargetAccountURI: mentioningAccount.URI,
- TargetAccountURL: mentioningAccount.URL,
- },
- }
-
- f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsWithLinkSelfString, foundMentions)
- suite.Equal(replaceMemtionsWithLinkSelfExpected, f)
-}
-
-func TestCommonTestSuite(t *testing.T) {
- suite.Run(t, new(CommonTestSuite))
-}
diff --git a/internal/text/emojionly.go b/internal/text/emojionly.go
new file mode 100644
index 00000000..1a3c0e96
--- /dev/null
+++ b/internal/text/emojionly.go
@@ -0,0 +1,71 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package text
+
+import (
+ "bytes"
+ "context"
+
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
+ "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/renderer/html"
+ "github.com/yuin/goldmark/util"
+)
+
+func (f *formatter) FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
+ result := &FormatResult{
+ Mentions: []*gtsmodel.Mention{},
+ Tags: []*gtsmodel.Tag{},
+ Emojis: []*gtsmodel.Emoji{},
+ }
+ // parse markdown text into html, using custom renderer to add hashtag/mention links
+ md := goldmark.New(
+ goldmark.WithRendererOptions(
+ html.WithXHTML(),
+ html.WithHardWraps(),
+ ),
+ goldmark.WithParser(
+ parser.NewParser(
+ parser.WithBlockParsers(
+ util.Prioritized(newPlaintextParser(), 500),
+ ),
+ ),
+ ),
+ goldmark.WithExtensions(
+ &customRenderer{f, ctx, pmf, authorID, statusID, true, result},
+ ),
+ )
+
+ var htmlContentBytes bytes.Buffer
+ err := md.Convert([]byte(plain), &htmlContentBytes)
+ if err != nil {
+ log.Errorf("error formatting plaintext to HTML: %s", err)
+ }
+ result.HTML = htmlContentBytes.String()
+
+ // clean anything dangerous out of the HTML
+ result.HTML = SanitizeHTML(result.HTML)
+
+ // shrink ray
+ result.HTML = minifyHTML(result.HTML)
+
+ return result
+}
diff --git a/internal/text/formatter.go b/internal/text/formatter.go
index cb4de402..bdad6c0f 100644
--- a/internal/text/formatter.go
+++ b/internal/text/formatter.go
@@ -26,20 +26,19 @@ import (
)
// Formatter wraps some logic and functions for parsing statuses and other text input into nice html.
+// Each of the member functions returns a struct containing the formatted HTML and any tags, mentions, and
+// emoji that were found in the text.
type Formatter interface {
// FromPlain parses an HTML text from a plaintext.
- FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string
+ FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult
// FromMarkdown parses an HTML text from a markdown-formatted text.
- FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string
-
- // ReplaceTags takes a piece of text and a slice of tags, and returns the same text with the tags nicely formatted as hrefs.
- ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string
- // ReplaceMentions takes a piece of text and a slice of mentions, and returns the same text with the mentions nicely formatted as hrefs.
- ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string
- // ReplaceLinks takes a piece of text, finds all recognizable links in that text, and replaces them with hrefs.
- ReplaceLinks(ctx context.Context, in string) string
+ FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, md string) *FormatResult
+ // FromPlainEmojiOnly parses an HTML text from a plaintext, only parsing emojis and not mentions etc.
+ FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult
}
+type FormatFunc func(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, text string) *FormatResult
+
type formatter struct {
db db.DB
}
@@ -50,3 +49,10 @@ func NewFormatter(db db.DB) Formatter {
db: db,
}
}
+
+type FormatResult struct {
+ HTML string
+ Mentions []*gtsmodel.Mention
+ Tags []*gtsmodel.Tag
+ Emojis []*gtsmodel.Emoji
+}
diff --git a/internal/text/formatter_test.go b/internal/text/formatter_test.go
index 438a69c7..32ae7448 100644
--- a/internal/text/formatter_test.go
+++ b/internal/text/formatter_test.go
@@ -19,9 +19,13 @@
package text_test
import (
+ "context"
"github.com/stretchr/testify/suite"
+ "github.com/superseriousbusiness/gotosocial/internal/concurrency"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/messages"
+ "github.com/superseriousbusiness/gotosocial/internal/processing"
"github.com/superseriousbusiness/gotosocial/internal/text"
"github.com/superseriousbusiness/gotosocial/testrig"
)
@@ -29,7 +33,8 @@ import (
type TextStandardTestSuite struct {
// standard suite interfaces
suite.Suite
- db db.DB
+ db db.DB
+ parseMention gtsmodel.ParseMentionFunc
// standard suite models
testTokens map[string]*gtsmodel.Token
@@ -41,6 +46,7 @@ type TextStandardTestSuite struct {
testStatuses map[string]*gtsmodel.Status
testTags map[string]*gtsmodel.Tag
testMentions map[string]*gtsmodel.Mention
+ testEmojis map[string]*gtsmodel.Emoji
// module being tested
formatter text.Formatter
@@ -56,6 +62,7 @@ func (suite *TextStandardTestSuite) SetupSuite() {
suite.testStatuses = testrig.NewTestStatuses()
suite.testTags = testrig.NewTestTags()
suite.testMentions = testrig.NewTestMentions()
+ suite.testEmojis = testrig.NewTestEmojis()
}
func (suite *TextStandardTestSuite) SetupTest() {
@@ -63,6 +70,11 @@ func (suite *TextStandardTestSuite) SetupTest() {
testrig.InitTestConfig()
suite.db = testrig.NewTestDB()
+
+ fedWorker := concurrency.NewWorkerPool[messages.FromFederator](-1, -1)
+ federator := testrig.NewTestFederator(suite.db, testrig.NewTestTransportController(testrig.NewMockHTTPClient(nil, "../../testrig/media"), suite.db, fedWorker), nil, nil, fedWorker)
+ suite.parseMention = processing.GetParseMentionFunc(suite.db, federator)
+
suite.formatter = text.NewFormatter(suite.db)
testrig.StandardDBSetup(suite.db, nil)
@@ -71,3 +83,11 @@ func (suite *TextStandardTestSuite) SetupTest() {
func (suite *TextStandardTestSuite) TearDownTest() {
testrig.StandardDBTeardown(suite.db)
}
+
+func (suite *TextStandardTestSuite) FromMarkdown(text string) *text.FormatResult {
+ return suite.formatter.FromMarkdown(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text)
+}
+
+func (suite *TextStandardTestSuite) FromPlain(text string) *text.FormatResult {
+ return suite.formatter.FromPlain(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text)
+}
diff --git a/internal/text/markdownextension.go b/internal/text/goldmark_extension.go
similarity index 57%
rename from internal/text/markdownextension.go
rename to internal/text/goldmark_extension.go
index 2d8eae90..11e4fde2 100644
--- a/internal/text/markdownextension.go
+++ b/internal/text/goldmark_extension.go
@@ -17,8 +17,10 @@ package text
import (
"context"
- "unicode"
+ "fmt"
+ "strings"
+ "github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
@@ -46,8 +48,14 @@ type hashtag struct {
Segment text.Segment
}
+type emoji struct {
+ ast.BaseInline
+ Segment text.Segment
+}
+
var kindMention = ast.NewNodeKind("Mention")
var kindHashtag = ast.NewNodeKind("Hashtag")
+var kindEmoji = ast.NewNodeKind("Emoji")
func (n *mention) Kind() ast.NodeKind {
return kindMention
@@ -57,14 +65,21 @@ func (n *hashtag) Kind() ast.NodeKind {
return kindHashtag
}
-// Dump is used by goldmark for debugging. It is implemented only minimally because
-// it is not used in our code.
+func (n *emoji) Kind() ast.NodeKind {
+ return kindEmoji
+}
+
+// Dump can be used for debugging.
func (n *mention) Dump(source []byte, level int) {
- ast.DumpHelper(n, source, level, nil, nil)
+ fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
func (n *hashtag) Dump(source []byte, level int) {
- ast.DumpHelper(n, source, level, nil, nil)
+ fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
+}
+
+func (n *emoji) Dump(source []byte, level int) {
+ fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
@@ -83,6 +98,13 @@ func newHashtag(s text.Segment) *hashtag {
}
}
+func newEmoji(s text.Segment) *emoji {
+ return &emoji{
+ BaseInline: ast.BaseInline{},
+ Segment: s,
+ }
+}
+
// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
type mentionParser struct {
}
@@ -90,6 +112,9 @@ type mentionParser struct {
type hashtagParser struct {
}
+type emojiParser struct {
+}
+
func (p *mentionParser) Trigger() []byte {
return []byte{'@'}
}
@@ -98,11 +123,15 @@ func (p *hashtagParser) Trigger() []byte {
return []byte{'#'}
}
+func (p *emojiParser) Trigger() []byte {
+ return []byte{':'}
+}
+
func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
before := block.PrecendingCharacter()
line, segment := block.PeekLine()
- if !unicode.IsSpace(before) {
+ if !util.IsMentionOrHashtagBoundary(before) {
return nil
}
@@ -124,59 +153,88 @@ func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
line, segment := block.PeekLine()
s := string(line)
- if !util.IsHashtagBoundary(before) {
+ if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {
return nil
}
for i, r := range s {
switch {
case r == '#' && i == 0:
+ // ignore initial #
continue
- case !util.IsPermittedInHashtag(r) && !util.IsHashtagBoundary(r):
+ case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):
// Fake hashtag, don't trust it
return nil
- case util.IsHashtagBoundary(r):
+ case util.IsMentionOrHashtagBoundary(r):
+ if i <= 1 {
+ // empty
+ return nil
+ }
// End of hashtag
block.Advance(i)
return newHashtag(segment.WithStop(segment.Start + i))
}
}
- // If we don't find invalid characters before the end of the line then it's good
- block.Advance(len(s))
+ // If we don't find invalid characters before the end of the line then it's all hashtag, babey
+ block.Advance(segment.Len())
return newHashtag(segment)
}
+func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
+ line, segment := block.PeekLine()
+
+ // unideal for performance but makes use of existing regex
+ loc := regexes.EmojiFinder.FindIndex(line)
+ switch {
+ case loc == nil:
+ fallthrough
+ case loc[0] != 0: // fail if not found at start
+ return nil
+ default:
+ block.Advance(loc[1])
+ return newEmoji(segment.WithStop(segment.Start + loc[1]))
+ }
+}
+
// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
-// It is created in FromMarkdown to be used a goldmark extension, and the fields are used
-// when rendering mentions and tags.
+// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the
+// fields are used to report tags and mentions to the caller for use as metadata.
type customRenderer struct {
- f *formatter
- ctx context.Context
- mentions []*gtsmodel.Mention
- tags []*gtsmodel.Tag
+ f *formatter
+ ctx context.Context
+ parseMention gtsmodel.ParseMentionFunc
+ accountID string
+ statusID string
+ emojiOnly bool
+ result *FormatResult
}
func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(kindMention, r.renderMention)
reg.Register(kindHashtag, r.renderHashtag)
+ reg.Register(kindEmoji, r.renderEmoji)
}
func (r *customRenderer) Extend(m goldmark.Markdown) {
+ // 1000 is set as the lowest priority, but it's arbitrary
m.Parser().AddOptions(parser.WithInlineParsers(
- // 500 is pretty arbitrary here, it was copied from example goldmark extension code.
- // https://github.com/yuin/goldmark/blob/75d8cce5b78c7e1d5d9c4ca32c1164f0a1e57b53/extension/strikethrough.go#L111
- mdutil.Prioritized(&mentionParser{}, 500),
- mdutil.Prioritized(&hashtagParser{}, 500),
+ mdutil.Prioritized(&emojiParser{}, 1000),
))
+ if !r.emojiOnly {
+ m.Parser().AddOptions(parser.WithInlineParsers(
+ mdutil.Prioritized(&mentionParser{}, 1000),
+ mdutil.Prioritized(&hashtagParser{}, 1000),
+ ))
+ }
m.Renderer().AddOptions(renderer.WithNodeRenderers(
- mdutil.Prioritized(r, 500),
+ mdutil.Prioritized(r, 1000),
))
}
// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
- return ast.WalkContinue, nil
+ return ast.WalkSkipChildren, nil
}
n, ok := node.(*mention) // this function is only registered for kindMention
@@ -185,18 +243,18 @@ func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node a
}
text := string(n.Segment.Value(source))
- html := r.f.ReplaceMentions(r.ctx, text, r.mentions)
+ html := r.replaceMention(text)
// we don't have much recourse if this fails
if _, err := w.WriteString(html); err != nil {
- log.Errorf("error outputting markdown text: %s", err)
+ log.Errorf("error writing HTML: %s", err)
}
- return ast.WalkContinue, nil
+ return ast.WalkSkipChildren, nil
}
func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
- return ast.WalkContinue, nil
+ return ast.WalkSkipChildren, nil
}
n, ok := node.(*hashtag) // this function is only registered for kindHashtag
@@ -205,11 +263,50 @@ func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node a
}
text := string(n.Segment.Value(source))
- html := r.f.ReplaceTags(r.ctx, text, r.tags)
+ html := r.replaceHashtag(text)
+
+ _, err := w.WriteString(html)
+ // we don't have much recourse if this fails
+ if err != nil {
+ log.Errorf("error writing HTML: %s", err)
+ }
+ return ast.WalkSkipChildren, nil
+}
+
+// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata.
+func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkSkipChildren, nil
+ }
+
+ n, ok := node.(*emoji) // this function is only registered for kindEmoji
+ if !ok {
+ log.Errorf("type assertion failed")
+ }
+ text := string(n.Segment.Value(source))
+ shortcode := text[1 : len(text)-1]
+
+ emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "")
+ if err != nil {
+ if err != db.ErrNoEntries {
+ log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err)
+ }
+ } else if *emoji.VisibleInPicker && !*emoji.Disabled {
+ listed := false
+ for _, e := range r.result.Emojis {
+ if e.Shortcode == emoji.Shortcode {
+ listed = true
+ break
+ }
+ }
+ if !listed {
+ r.result.Emojis = append(r.result.Emojis, emoji)
+ }
+ }
// we don't have much recourse if this fails
- if _, err := w.WriteString(html); err != nil {
- log.Errorf("error outputting markdown text: %s", err)
+ if _, err := w.WriteString(text); err != nil {
+ log.Errorf("error writing HTML: %s", err)
}
- return ast.WalkContinue, nil
+ return ast.WalkSkipChildren, nil
}
diff --git a/internal/text/goldmark_plaintext.go b/internal/text/goldmark_plaintext.go
new file mode 100644
index 00000000..84916b1d
--- /dev/null
+++ b/internal/text/goldmark_plaintext.go
@@ -0,0 +1,64 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package text
+
+import (
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/text"
+)
+
+// plaintextParser implements goldmark.parser.BlockParser
+type plaintextParser struct {
+}
+
+var defaultPlaintextParser = &plaintextParser{}
+
+func newPlaintextParser() parser.BlockParser {
+ return defaultPlaintextParser
+}
+
+func (b *plaintextParser) Trigger() []byte {
+ return nil
+}
+
+func (b *plaintextParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) {
+ _, segment := reader.PeekLine()
+ node := ast.NewParagraph()
+ node.Lines().Append(segment)
+ reader.Advance(segment.Len() - 1)
+ return node, parser.NoChildren
+}
+
+func (b *plaintextParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State {
+ _, segment := reader.PeekLine()
+ node.Lines().Append(segment)
+ reader.Advance(segment.Len() - 1)
+ return parser.Continue | parser.NoChildren
+}
+
+func (b *plaintextParser) Close(node ast.Node, reader text.Reader, pc parser.Context) {}
+
+func (b *plaintextParser) CanInterruptParagraph() bool {
+ return false
+}
+
+func (b *plaintextParser) CanAcceptIndentedLine() bool {
+ return true
+}
diff --git a/internal/text/link.go b/internal/text/link.go
deleted file mode 100644
index 2b2b45e7..00000000
--- a/internal/text/link.go
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- GoToSocial
- Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-*/
-
-package text
-
-import (
- "bytes"
- "context"
- "net/url"
- "strings"
-
- "github.com/superseriousbusiness/gotosocial/internal/regexes"
-)
-
-// FindLinks parses the given string looking for recognizable URLs (including scheme).
-// It returns a list of those URLs, without changing the string, or an error if something goes wrong.
-// If no URLs are found within the given string, an empty slice and nil will be returned.
-func FindLinks(in string) []*url.URL {
- var urls []*url.URL
-
- // bail already if we don't find anything
- found := regexes.LinkScheme.FindAllString(in, -1)
- if len(found) == 0 {
- return nil
- }
-
- urlmap := map[string]struct{}{}
-
- // for each string we find, we want to parse it into a URL if we can
- // if we fail to parse it, just ignore this match and continue
- for _, f := range found {
- u, err := url.Parse(f)
- if err != nil {
- continue
- }
-
- // Calculate string
- ustr := u.String()
-
- if _, ok := urlmap[ustr]; !ok {
- // Has not been encountered yet
- urls = append(urls, u)
- urlmap[ustr] = struct{}{}
- }
- }
-
- return urls
-}
-
-// ReplaceLinks replaces all detected links in a piece of text with their HTML (href) equivalents.
-// Note: because Go doesn't allow negative lookbehinds in regex, it's possible that an already-formatted
-// href will end up double-formatted, if the text you pass here contains one or more hrefs already.
-// To avoid this, you should sanitize any HTML out of text before you pass it into this function.
-func (f *formatter) ReplaceLinks(ctx context.Context, in string) string {
- return regexes.ReplaceAllStringFunc(regexes.LinkScheme, in, func(urlString string, buf *bytes.Buffer) string {
- thisURL, err := url.Parse(urlString)
- if err != nil {
- return urlString // we can't parse it as a URL so don't replace it
- }
- // urlString
- urlString = thisURL.String()
- buf.WriteString(``)
- urlString = strings.TrimPrefix(urlString, thisURL.Scheme)
- urlString = strings.TrimPrefix(urlString, "://")
- buf.WriteString(urlString)
- buf.WriteString(``)
- return buf.String()
- })
-}
diff --git a/internal/text/link_test.go b/internal/text/link_test.go
deleted file mode 100644
index dfb4656b..00000000
--- a/internal/text/link_test.go
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- GoToSocial
- Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-*/
-
-package text_test
-
-import (
- "context"
- "testing"
-
- "github.com/stretchr/testify/assert"
- "github.com/stretchr/testify/suite"
- "github.com/superseriousbusiness/gotosocial/internal/text"
-)
-
-const text1 = `
-This is a text with some links in it. Here's link number one: https://example.org/link/to/something#fragment
-
-Here's link number two: http://test.example.org?q=bahhhhhhhhhhhh
-
-https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it
-
-really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme
-
-https://example.orghttps://google.com <-- this shouldn't work either, but it does?! OK
-`
-
-const text2 = `
-this is one link: https://example.org
-
-this is the same link again: https://example.org
-
-these should be deduplicated
-`
-
-const text3 = `
-here's a mailto link: mailto:whatever@test.org
-`
-
-const text4 = `
-two similar links:
-
-https://example.org
-
-https://example.org/test
-`
-
-const text5 = `
-what happens when we already have a link within an href?
-
-https://example.org
-`
-
-type LinkTestSuite struct {
- TextStandardTestSuite
-}
-
-func (suite *LinkTestSuite) TestParseSimple() {
- f := suite.formatter.FromPlain(context.Background(), simple, nil, nil)
- suite.Equal(simpleExpected, f)
-}
-
-func (suite *LinkTestSuite) TestParseURLsFromText1() {
- urls := text.FindLinks(text1)
-
- suite.Equal("https://example.org/link/to/something#fragment", urls[0].String())
- suite.Equal("http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String())
- suite.Equal("https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String())
- suite.Equal("https://example.orghttps://google.com", urls[3].String())
-}
-
-func (suite *LinkTestSuite) TestParseURLsFromText2() {
- urls := text.FindLinks(text2)
-
- // assert length 1 because the found links will be deduplicated
- assert.Len(suite.T(), urls, 1)
-}
-
-func (suite *LinkTestSuite) TestParseURLsFromText3() {
- urls := text.FindLinks(text3)
-
- // assert length 0 because `mailto:` isn't accepted
- assert.Len(suite.T(), urls, 0)
-}
-
-func (suite *LinkTestSuite) TestReplaceLinksFromText1() {
- replaced := suite.formatter.ReplaceLinks(context.Background(), text1)
- suite.Equal(`
-This is a text with some links in it. Here's link number one: example.org/link/to/something#fragment
-
-Here's link number two: test.example.org?q=bahhhhhhhhhhhh
-
-another.link.example.org/with/a/pretty/long/path/at/the/end/of/it
-
-really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme
-
-example.orghttps://google.com <-- this shouldn't work either, but it does?! OK
-`, replaced)
-}
-
-func (suite *LinkTestSuite) TestReplaceLinksFromText2() {
- replaced := suite.formatter.ReplaceLinks(context.Background(), text2)
- suite.Equal(`
-this is one link: example.org
-
-this is the same link again: example.org
-
-these should be deduplicated
-`, replaced)
-}
-
-func (suite *LinkTestSuite) TestReplaceLinksFromText3() {
- // we know mailto links won't be replaced with hrefs -- we only accept https and http
- replaced := suite.formatter.ReplaceLinks(context.Background(), text3)
- suite.Equal(`
-here's a mailto link: mailto:whatever@test.org
-`, replaced)
-}
-
-func (suite *LinkTestSuite) TestReplaceLinksFromText4() {
- replaced := suite.formatter.ReplaceLinks(context.Background(), text4)
- suite.Equal(`
-two similar links:
-
-example.org
-
-example.org/test
-`, replaced)
-}
-
-func (suite *LinkTestSuite) TestReplaceLinksFromText5() {
- // we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function
- replaced := suite.formatter.ReplaceLinks(context.Background(), text5)
- suite.Equal(`
-what happens when we already have a link within an href?
-
-example.org">example.org
-`, replaced)
-}
-
-func TestLinkTestSuite(t *testing.T) {
- suite.Run(t, new(LinkTestSuite))
-}
diff --git a/internal/text/markdown.go b/internal/text/markdown.go
index dbe86d11..232f0f72 100644
--- a/internal/text/markdown.go
+++ b/internal/text/markdown.go
@@ -21,32 +21,19 @@ package text
import (
"bytes"
"context"
- "strings"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
- "github.com/tdewolff/minify/v2"
- minifyHtml "github.com/tdewolff/minify/v2/html"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/renderer/html"
)
-var (
- m *minify.M
-)
-
-func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string {
-
- // Temporarily replace all found emoji shortcodes in the markdown text with
- // their ID so that they're not parsed as anything by the markdown parser -
- // this fixes cases where emojis with some underscores in them are parsed as
- // words with emphasis, eg `:_some_emoji:` becomes `:someemoji:`
- //
- // Since the IDs of the emojis are just uppercase letters + numbers they should
- // be safe to pass through the markdown parser without unexpected effects.
- for _, e := range emojis {
- markdownText = strings.ReplaceAll(markdownText, ":"+e.Shortcode+":", ":"+e.ID+":")
+func (f *formatter) FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, markdownText string) *FormatResult {
+ result := &FormatResult{
+ Mentions: []*gtsmodel.Mention{},
+ Tags: []*gtsmodel.Tag{},
+ Emojis: []*gtsmodel.Emoji{},
}
// parse markdown text into html, using custom renderer to add hashtag/mention links
@@ -57,7 +44,7 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti
html.WithUnsafe(), // allows raw HTML
),
goldmark.WithExtensions(
- &customRenderer{f, ctx, mentions, tags},
+ &customRenderer{f, ctx, pmf, authorID, statusID, false, result},
extension.Linkify, // turns URLs into links
extension.Strikethrough,
),
@@ -66,30 +53,15 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti
var htmlContentBytes bytes.Buffer
err := md.Convert([]byte(markdownText), &htmlContentBytes)
if err != nil {
- log.Errorf("error rendering markdown to HTML: %s", err)
+ log.Errorf("error formatting markdown to HTML: %s", err)
}
- htmlContent := htmlContentBytes.String()
+ result.HTML = htmlContentBytes.String()
- // Replace emoji IDs in the parsed html content with their shortcodes again
- for _, e := range emojis {
- htmlContent = strings.ReplaceAll(htmlContent, ":"+e.ID+":", ":"+e.Shortcode+":")
- }
+ // clean anything dangerous out of the HTML
+ result.HTML = SanitizeHTML(result.HTML)
- // clean anything dangerous out of the html
- htmlContent = SanitizeHTML(htmlContent)
+ // shrink ray
+ result.HTML = minifyHTML(result.HTML)
- if m == nil {
- m = minify.New()
- m.Add("text/html", &minifyHtml.Minifier{
- KeepEndTags: true,
- KeepQuotes: true,
- })
- }
-
- minified, err := m.String("text/html", htmlContent)
- if err != nil {
- log.Errorf("error minifying markdown text: %s", err)
- }
-
- return minified
+ return result
}
diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go
index 384f4389..80547f8b 100644
--- a/internal/text/markdown_test.go
+++ b/internal/text/markdown_test.go
@@ -19,11 +19,9 @@
package text_test
import (
- "context"
"testing"
"github.com/stretchr/testify/suite"
- "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
var withCodeBlock = `# Title
@@ -77,6 +75,16 @@ const (
mdWithStrikethroughExpected = "I have mdae made an error
"
mdWithLink = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial"
mdWithLinkExpected = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial
"
+ mdObjectInCodeBlock = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps"
+ mdObjectInCodeBlockExpected = "@foss_satan this is how to mention a user
@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n
hope that helps
"
+ mdItalicHashtag = "_#hashtag_"
+ mdItalicHashtagExpected = "#hashtag
"
+ mdItalicHashtags = "_#hashtag #hashtag #hashtag_"
+ mdItalicHashtagsExpected = "#hashtag #hashtag #hashtag
"
+ // BEWARE: sneaky unicode business going on.
+ // the first ö is one rune, the second ö is an o with a combining diacritic.
+ mdUnnormalizedHashtag = "#hellöthere #hellöthere"
+ mdUnnormalizedHashtagExpected = "#hellöthere #hellöthere
"
)
type MarkdownTestSuite struct {
@@ -84,101 +92,112 @@ type MarkdownTestSuite struct {
}
func (suite *MarkdownTestSuite) TestParseSimple() {
- s := suite.formatter.FromMarkdown(context.Background(), simpleMarkdown, nil, nil, nil)
- suite.Equal(simpleMarkdownExpected, s)
+ formatted := suite.FromMarkdown(simpleMarkdown)
+ suite.Equal(simpleMarkdownExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithCodeBlock() {
- s := suite.formatter.FromMarkdown(context.Background(), withCodeBlock, nil, nil, nil)
- suite.Equal(withCodeBlockExpected, s)
+ formatted := suite.FromMarkdown(withCodeBlock)
+ suite.Equal(withCodeBlockExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithInlineCode() {
- s := suite.formatter.FromMarkdown(context.Background(), withInlineCode, nil, nil, nil)
- suite.Equal(withInlineCodeExpected, s)
+ formatted := suite.FromMarkdown(withInlineCode)
+ suite.Equal(withInlineCodeExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithInlineCode2() {
- s := suite.formatter.FromMarkdown(context.Background(), withInlineCode2, nil, nil, nil)
- suite.Equal(withInlineCode2Expected, s)
+ formatted := suite.FromMarkdown(withInlineCode2)
+ suite.Equal(withInlineCode2Expected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithHashtag() {
- foundTags := []*gtsmodel.Tag{
- suite.testTags["Hashtag"],
- }
-
- s := suite.formatter.FromMarkdown(context.Background(), withHashtag, nil, foundTags, nil)
- suite.Equal(withHashtagExpected, s)
+ formatted := suite.FromMarkdown(withHashtag)
+ suite.Equal(withHashtagExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithHTML() {
- s := suite.formatter.FromMarkdown(context.Background(), mdWithHTML, nil, nil, nil)
- suite.Equal(mdWithHTMLExpected, s)
+ formatted := suite.FromMarkdown(mdWithHTML)
+ suite.Equal(mdWithHTMLExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithCheekyHTML() {
- s := suite.formatter.FromMarkdown(context.Background(), mdWithCheekyHTML, nil, nil, nil)
- suite.Equal(mdWithCheekyHTMLExpected, s)
+ formatted := suite.FromMarkdown(mdWithCheekyHTML)
+ suite.Equal(mdWithCheekyHTMLExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithHashtagInitial() {
- s := suite.formatter.FromMarkdown(context.Background(), mdWithHashtagInitial, nil, []*gtsmodel.Tag{
- suite.testTags["Hashtag"],
- suite.testTags["welcome"],
- }, nil)
- suite.Equal(mdWithHashtagInitialExpected, s)
+ formatted := suite.FromMarkdown(mdWithHashtagInitial)
+ suite.Equal(mdWithHashtagInitialExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseCodeBlockWithNewlines() {
- s := suite.formatter.FromMarkdown(context.Background(), mdCodeBlockWithNewlines, nil, nil, nil)
- suite.Equal(mdCodeBlockWithNewlinesExpected, s)
+ formatted := suite.FromMarkdown(mdCodeBlockWithNewlines)
+ suite.Equal(mdCodeBlockWithNewlinesExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithFootnote() {
- s := suite.formatter.FromMarkdown(context.Background(), mdWithFootnote, nil, nil, nil)
- suite.Equal(mdWithFootnoteExpected, s)
+ formatted := suite.FromMarkdown(mdWithFootnote)
+ suite.Equal(mdWithFootnoteExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithBlockquote() {
- s := suite.formatter.FromMarkdown(context.Background(), mdWithBlockQuote, nil, nil, nil)
- suite.Equal(mdWithBlockQuoteExpected, s)
+ formatted := suite.FromMarkdown(mdWithBlockQuote)
+ suite.Equal(mdWithBlockQuoteExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseHashtagWithCodeBlock() {
- s := suite.formatter.FromMarkdown(context.Background(), mdHashtagAndCodeBlock, nil, []*gtsmodel.Tag{
- suite.testTags["Hashtag"],
- }, nil)
- suite.Equal(mdHashtagAndCodeBlockExpected, s)
+ formatted := suite.FromMarkdown(mdHashtagAndCodeBlock)
+ suite.Equal(mdHashtagAndCodeBlockExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseMentionWithCodeBlock() {
- s := suite.formatter.FromMarkdown(context.Background(), mdMentionAndCodeBlock, []*gtsmodel.Mention{
- suite.testMentions["local_user_2_mention_zork"],
- }, nil, nil)
- suite.Equal(mdMentionAndCodeBlockExpected, s)
+ formatted := suite.FromMarkdown(mdMentionAndCodeBlock)
+ suite.Equal(mdMentionAndCodeBlockExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseSmartypants() {
- s := suite.formatter.FromMarkdown(context.Background(), mdWithSmartypants, []*gtsmodel.Mention{
- suite.testMentions["local_user_2_mention_zork"],
- }, nil, nil)
- suite.Equal(mdWithSmartypantsExpected, s)
+ formatted := suite.FromMarkdown(mdWithSmartypants)
+ suite.Equal(mdWithSmartypantsExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseAsciiHeart() {
- s := suite.formatter.FromMarkdown(context.Background(), mdWithAsciiHeart, nil, nil, nil)
- suite.Equal(mdWithAsciiHeartExpected, s)
+ formatted := suite.FromMarkdown(mdWithAsciiHeart)
+ suite.Equal(mdWithAsciiHeartExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseStrikethrough() {
- s := suite.formatter.FromMarkdown(context.Background(), mdWithStrikethrough, nil, nil, nil)
- suite.Equal(mdWithStrikethroughExpected, s)
+ formatted := suite.FromMarkdown(mdWithStrikethrough)
+ suite.Equal(mdWithStrikethroughExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseLink() {
- s := suite.formatter.FromMarkdown(context.Background(), mdWithLink, nil, nil, nil)
- suite.Equal(mdWithLinkExpected, s)
+ formatted := suite.FromMarkdown(mdWithLink)
+ suite.Equal(mdWithLinkExpected, formatted.HTML)
+}
+
+func (suite *MarkdownTestSuite) TestParseObjectInCodeBlock() {
+ formatted := suite.FromMarkdown(mdObjectInCodeBlock)
+ suite.Equal(mdObjectInCodeBlockExpected, formatted.HTML)
+ suite.Len(formatted.Mentions, 1)
+ suite.Equal("@foss_satan@fossbros-anonymous.io", formatted.Mentions[0].NameString)
+ suite.Empty(formatted.Tags)
+ suite.Empty(formatted.Emojis)
+}
+
+func (suite *MarkdownTestSuite) TestParseItalicHashtag() {
+ formatted := suite.FromMarkdown(mdItalicHashtag)
+ suite.Equal(mdItalicHashtagExpected, formatted.HTML)
+}
+
+func (suite *MarkdownTestSuite) TestParseItalicHashtags() {
+ formatted := suite.FromMarkdown(mdItalicHashtags)
+ suite.Equal(mdItalicHashtagsExpected, formatted.HTML)
+}
+
+func (suite *MarkdownTestSuite) TestParseUnnormalizedHashtag() {
+ formatted := suite.FromMarkdown(mdUnnormalizedHashtag)
+ suite.Equal(mdUnnormalizedHashtagExpected, formatted.HTML)
}
func TestMarkdownTestSuite(t *testing.T) {
diff --git a/internal/text/minify.go b/internal/text/minify.go
new file mode 100644
index 00000000..62562c7c
--- /dev/null
+++ b/internal/text/minify.go
@@ -0,0 +1,45 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package text
+
+import (
+ "github.com/superseriousbusiness/gotosocial/internal/log"
+ "github.com/tdewolff/minify/v2"
+ "github.com/tdewolff/minify/v2/html"
+)
+
+var (
+ m *minify.M
+)
+
+func minifyHTML(content string) string {
+ if m == nil {
+ m = minify.New()
+ m.Add("text/html", &html.Minifier{
+ KeepEndTags: true,
+ KeepQuotes: true,
+ })
+ }
+
+ minified, err := m.String("text/html", content)
+ if err != nil {
+ log.Errorf("error minifying HTML: %s", err)
+ }
+ return minified
+}
diff --git a/internal/text/plain.go b/internal/text/plain.go
index a64a14f0..3549200c 100644
--- a/internal/text/plain.go
+++ b/internal/text/plain.go
@@ -19,40 +19,56 @@
package text
import (
+ "bytes"
"context"
- "html"
- "strings"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
+ "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/extension"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/renderer/html"
+ "github.com/yuin/goldmark/util"
)
-// breakReplacer replaces new-lines with HTML breaks.
-var breakReplacer = strings.NewReplacer(
- "\r\n", "
",
- "\n", "
",
-)
+func (f *formatter) FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
+ result := &FormatResult{
+ Mentions: []*gtsmodel.Mention{},
+ Tags: []*gtsmodel.Tag{},
+ Emojis: []*gtsmodel.Emoji{},
+ }
-func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string {
- // trim any crap
- content := strings.TrimSpace(plain)
+ // parse markdown text into html, using custom renderer to add hashtag/mention links
+ md := goldmark.New(
+ goldmark.WithRendererOptions(
+ html.WithXHTML(),
+ html.WithHardWraps(),
+ ),
+ goldmark.WithParser(
+ parser.NewParser(
+ parser.WithBlockParsers(
+ util.Prioritized(newPlaintextParser(), 500),
+ ),
+ ),
+ ),
+ goldmark.WithExtensions(
+ &customRenderer{f, ctx, pmf, authorID, statusID, false, result},
+ extension.Linkify, // turns URLs into links
+ ),
+ )
- // clean 'er up
- content = html.EscapeString(content)
+ var htmlContentBytes bytes.Buffer
+ err := md.Convert([]byte(plain), &htmlContentBytes)
+ if err != nil {
+ log.Errorf("error formatting plaintext to HTML: %s", err)
+ }
+ result.HTML = htmlContentBytes.String()
- // format links nicely
- content = f.ReplaceLinks(ctx, content)
+ // clean anything dangerous out of the HTML
+ result.HTML = SanitizeHTML(result.HTML)
- // format tags nicely
- content = f.ReplaceTags(ctx, content, tags)
+ // shrink ray
+ result.HTML = minifyHTML(result.HTML)
- // format mentions nicely
- content = f.ReplaceMentions(ctx, content, mentions)
-
- // replace newlines with breaks
- content = breakReplacer.Replace(content)
-
- // wrap the whole thing in a pee
- content = `` + content + `
`
-
- return SanitizeHTML(content)
+ return result
}
diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go
index 6b850cb4..3693ada9 100644
--- a/internal/text/plain_test.go
+++ b/internal/text/plain_test.go
@@ -19,22 +19,21 @@
package text_test
import (
- "context"
"testing"
+ "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
- "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
const (
- simple = "this is a plain and simple status"
- simpleExpected = "this is a plain and simple status
"
- withTag = "here's a simple status that uses hashtag #welcome!"
- withTagExpected = "here's a simple status that uses hashtag #welcome!
"
- withHTML = "blah this should just be html escaped blah
"
- withHTMLExpected = "<div>blah this should just be html escaped blah</div>
"
- moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText"
- moreComplexFull = "Another test @foss_satan
#Hashtag
Text
"
+ simple = "this is a plain and simple status"
+ simpleExpected = "this is a plain and simple status
"
+ withTag = "here's a simple status that uses hashtag #welcome!"
+ withTagExpected = "here's a simple status that uses hashtag #welcome!
"
+ withHTML = "blah this should just be html escaped blah
"
+ withHTMLExpected = "<div>blah this should just be html escaped blah</div>
"
+ moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText\n\n:rainbow:"
+ moreComplexExpected = "Another test @foss_satan
#Hashtag
Text
:rainbow:
"
)
type PlainTestSuite struct {
@@ -42,35 +41,105 @@ type PlainTestSuite struct {
}
func (suite *PlainTestSuite) TestParseSimple() {
- f := suite.formatter.FromPlain(context.Background(), simple, nil, nil)
- suite.Equal(simpleExpected, f)
+ formatted := suite.FromPlain(simple)
+ suite.Equal(simpleExpected, formatted.HTML)
}
func (suite *PlainTestSuite) TestParseWithTag() {
- foundTags := []*gtsmodel.Tag{
- suite.testTags["welcome"],
- }
-
- f := suite.formatter.FromPlain(context.Background(), withTag, nil, foundTags)
- suite.Equal(withTagExpected, f)
+ formatted := suite.FromPlain(withTag)
+ suite.Equal(withTagExpected, formatted.HTML)
}
func (suite *PlainTestSuite) TestParseWithHTML() {
- f := suite.formatter.FromPlain(context.Background(), withHTML, nil, nil)
- suite.Equal(withHTMLExpected, f)
+ formatted := suite.FromPlain(withHTML)
+ suite.Equal(withHTMLExpected, formatted.HTML)
}
func (suite *PlainTestSuite) TestParseMoreComplex() {
- foundTags := []*gtsmodel.Tag{
- suite.testTags["Hashtag"],
- }
+ formatted := suite.FromPlain(moreComplex)
+ suite.Equal(moreComplexExpected, formatted.HTML)
+}
- foundMentions := []*gtsmodel.Mention{
- suite.testMentions["zork_mention_foss_satan"],
- }
+func (suite *PlainTestSuite) TestLinkNoMention() {
+ statusText := `here's a link to a post by zork
- f := suite.formatter.FromPlain(context.Background(), moreComplex, foundMentions, foundTags)
- suite.Equal(moreComplexFull, f)
+https://example.com/@the_mighty_zork/statuses/01FGVP55XMF2K6316MQRX6PFG1
+
+that link shouldn't come out formatted as a mention!`
+
+ menchies := suite.FromPlain(statusText).Mentions
+ suite.Empty(menchies)
+}
+
+func (suite *PlainTestSuite) TestDeriveMentionsEmpty() {
+ statusText := ``
+ menchies := suite.FromPlain(statusText).Mentions
+ assert.Len(suite.T(), menchies, 0)
+}
+
+func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
+ statusText := `weeeeeeee #testing123 #also testing
+
+# testing this one shouldn't work
+
+ #thisshouldwork #dupe #dupe!! #dupe
+
+ here's a link with a fragment: https://example.org/whatever#ahhh
+ here's another link with a fragment: https://example.org/whatever/#ahhh
+
+(#ThisShouldAlsoWork) #this_should_be_split
+
+#111111 thisalsoshouldn'twork#### ##
+
+#alimentación, #saúde, #lävistää, #ö, #네
+#ThisOneIsThirtyOneCharactersLon... ...ng
+#ThisOneIsThirteyCharactersLong
+`
+
+ tags := suite.FromPlain(statusText).Tags
+ assert.Len(suite.T(), tags, 13)
+ assert.Equal(suite.T(), "testing123", tags[0].Name)
+ assert.Equal(suite.T(), "also", tags[1].Name)
+ assert.Equal(suite.T(), "thisshouldwork", tags[2].Name)
+ assert.Equal(suite.T(), "dupe", tags[3].Name)
+ assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4].Name)
+ assert.Equal(suite.T(), "this", tags[5].Name)
+ assert.Equal(suite.T(), "111111", tags[6].Name)
+ assert.Equal(suite.T(), "alimentación", tags[7].Name)
+ assert.Equal(suite.T(), "saúde", tags[8].Name)
+ assert.Equal(suite.T(), "lävistää", tags[9].Name)
+ assert.Equal(suite.T(), "ö", tags[10].Name)
+ assert.Equal(suite.T(), "네", tags[11].Name)
+ assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[12].Name)
+
+ statusText = `#올빼미 hej`
+ tags = suite.FromPlain(statusText).Tags
+ assert.Equal(suite.T(), "올빼미", tags[0].Name)
+}
+
+func (suite *PlainTestSuite) TestDeriveMultiple() {
+ statusText := `Another test @foss_satan@fossbros-anonymous.io
+
+ #Hashtag
+
+ Text`
+
+ f := suite.FromPlain(statusText)
+
+ assert.Len(suite.T(), f.Mentions, 1)
+ assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString)
+
+ assert.Len(suite.T(), f.Tags, 1)
+ assert.Equal(suite.T(), "Hashtag", f.Tags[0].Name)
+
+ assert.Len(suite.T(), f.Emojis, 0)
+}
+
+func (suite *PlainTestSuite) TestZalgoHashtag() {
+ statusText := `yo who else loves #praying to #z̸͉̅a̸͚͋l̵͈̊g̸̫͌ỏ̷̪?`
+ f := suite.FromPlain(statusText)
+ assert.Len(suite.T(), f.Tags, 1)
+ assert.Equal(suite.T(), "praying", f.Tags[0].Name)
}
func TestPlainTestSuite(t *testing.T) {
diff --git a/internal/text/replace.go b/internal/text/replace.go
new file mode 100644
index 00000000..5deab5d4
--- /dev/null
+++ b/internal/text/replace.go
@@ -0,0 +1,141 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package text
+
+import (
+ "errors"
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
+ "github.com/superseriousbusiness/gotosocial/internal/util"
+ "golang.org/x/text/unicode/norm"
+ "strings"
+)
+
+const (
+ maximumHashtagLength = 30
+)
+
+// given a mention or a hashtag string, the methods in this file will attempt to parse it,
+// add it to the database, and render it as HTML. If any of these steps fails, the method
+// will just return the original string and log an error.
+
+// replaceMention takes a string in the form @username@domain.com or @localusername
+func (r *customRenderer) replaceMention(text string) string {
+ menchie, err := r.parseMention(r.ctx, text, r.accountID, r.statusID)
+ if err != nil {
+ log.Errorf("error parsing mention %s from status: %s", text, err)
+ return text
+ }
+
+ if r.statusID != "" {
+ if err := r.f.db.Put(r.ctx, menchie); err != nil {
+ log.Errorf("error putting mention in db: %s", err)
+ return text
+ }
+ }
+
+ // only append if it's not been listed yet
+ listed := false
+ for _, m := range r.result.Mentions {
+ if menchie.ID == m.ID {
+ listed = true
+ break
+ }
+ }
+ if !listed {
+ r.result.Mentions = append(r.result.Mentions, menchie)
+ }
+
+ // make sure we have an account attached to this mention
+ if menchie.TargetAccount == nil {
+ a, err := r.f.db.GetAccountByID(r.ctx, menchie.TargetAccountID)
+ if err != nil {
+ log.Errorf("error getting account with id %s from the db: %s", menchie.TargetAccountID, err)
+ return text
+ }
+ menchie.TargetAccount = a
+ }
+
+ // The mention's target is our target
+ targetAccount := menchie.TargetAccount
+
+ var b strings.Builder
+
+ // replace the mention with the formatted mention content
+ // @targetAccount.Username
+ b.WriteString(`@`)
+ b.WriteString(targetAccount.Username)
+ b.WriteString(``)
+ return b.String()
+}
+
+// replaceMention takes a string in the form #HashedTag, and will normalize it before
+// adding it to the db and turning it into HTML.
+func (r *customRenderer) replaceHashtag(text string) string {
+ // this normalization is specifically to avoid cases where visually-identical
+ // hashtags are stored with different unicode representations (e.g. with combining
+ // diacritics). It allows a tasteful number of combining diacritics to be used,
+ // as long as they can be combined with parent characters to form regular letter
+ // symbols.
+ normalized := norm.NFC.String(text[1:])
+
+ for i, r := range normalized {
+ if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) {
+ return text
+ }
+ }
+
+ tag, err := r.f.db.TagStringToTag(r.ctx, normalized, r.accountID)
+ if err != nil {
+ log.Errorf("error generating hashtags from status: %s", err)
+ return text
+ }
+
+ // only append if it's not been listed yet
+ listed := false
+ for _, t := range r.result.Tags {
+ if tag.ID == t.ID {
+ listed = true
+ break
+ }
+ }
+ if !listed {
+ err = r.f.db.Put(r.ctx, tag)
+ if err != nil {
+ if !errors.Is(err, db.ErrAlreadyExists) {
+ log.Errorf("error putting tags in db: %s", err)
+ return text
+ }
+ }
+ r.result.Tags = append(r.result.Tags, tag)
+ }
+
+ var b strings.Builder
+ // replace the #tag with the formatted tag content
+ // `#tagAsEntered
+ b.WriteString(`#`)
+ b.WriteString(normalized)
+ b.WriteString(``)
+
+ return b.String()
+}
diff --git a/internal/util/statustools.go b/internal/util/statustools.go
index 80a09162..a4bb1500 100644
--- a/internal/util/statustools.go
+++ b/internal/util/statustools.go
@@ -20,115 +20,19 @@ package util
import (
"unicode"
- "unicode/utf8"
-
- "github.com/superseriousbusiness/gotosocial/internal/regexes"
)
-const (
- maximumHashtagLength = 30
-)
-
-// DeriveMentionNamesFromText takes a plaintext (ie., not html-formatted) text,
-// and applies a regex to it to return a deduplicated list of account names
-// mentioned in that text, in the format "@user@example.org" or "@username" for
-// local users.
-func DeriveMentionNamesFromText(text string) []string {
- mentionedAccounts := []string{}
- for _, m := range regexes.MentionFinder.FindAllStringSubmatch(text, -1) {
- mentionedAccounts = append(mentionedAccounts, m[1])
- }
- return UniqueStrings(mentionedAccounts)
-}
-
-type Pair[A, B any] struct {
- First A
- Second B
-}
-
-// Byte index in original string
-// `First` includes `#`.
-type Span = Pair[int, int]
-
-// Takes a plaintext (ie., not HTML-formatted) text,
-// and returns a slice of unique hashtags.
-func DeriveHashtagsFromText(text string) []string {
- tagsMap := make(map[string]bool)
- tags := []string{}
-
- for _, v := range FindHashtagSpansInText(text) {
- t := text[v.First+1 : v.Second]
- if _, value := tagsMap[t]; !value {
- tagsMap[t] = true
- tags = append(tags, t)
- }
- }
-
- return tags
-}
-
-// Takes a plaintext (ie., not HTML-formatted) text,
-// and returns a list of pairs of indices into the original string, where
-// hashtags are located.
-func FindHashtagSpansInText(text string) []Span {
- tags := []Span{}
- start := 0
- // Keep one rune of lookbehind.
- prev := ' '
- inTag := false
-
- for i, r := range text {
- if r == '#' && IsHashtagBoundary(prev) {
- // Start of hashtag.
- inTag = true
- start = i
- } else if inTag && !IsPermittedInHashtag(r) && !IsHashtagBoundary(r) {
- // Inside the hashtag, but it was a phoney, gottem.
- inTag = false
- } else if inTag && IsHashtagBoundary(r) {
- // End of hashtag.
- inTag = false
- appendTag(&tags, text, start, i)
- } else if irl := i + utf8.RuneLen(r); inTag && irl == len(text) {
- // End of text.
- appendTag(&tags, text, start, irl)
- }
-
- prev = r
- }
-
- return tags
-}
-
-func appendTag(tags *[]Span, text string, start int, end int) {
- l := end - start - 1
- // This check could be moved out into the parsing loop if necessary!
- if 0 < l && l <= maximumHashtagLength {
- *tags = append(*tags, Span{First: start, Second: end})
- }
-}
-
-// DeriveEmojisFromText takes a plaintext (ie., not html-formatted) text,
-// and applies a regex to it to return a deduplicated list of emojis
-// used in that text, without the surrounding `::`
-func DeriveEmojisFromText(text string) []string {
- emojis := []string{}
- for _, m := range regexes.EmojiFinder.FindAllStringSubmatch(text, -1) {
- emojis = append(emojis, m[1])
- }
- return UniqueStrings(emojis)
+func IsPlausiblyInHashtag(r rune) bool {
+ // Marks are allowed during parsing, prior to normalization, but not after,
+ // since they may be combined into letters during normalization.
+ return unicode.IsLetter(r) || unicode.IsNumber(r) || unicode.IsMark(r)
}
func IsPermittedInHashtag(r rune) bool {
return unicode.IsLetter(r) || unicode.IsNumber(r)
}
-// Decides where to break before or after a hashtag.
-func IsHashtagBoundary(r rune) bool {
- return r == '#' || // `###lol` should work
- unicode.IsSpace(r) || // All kinds of Unicode whitespace.
- unicode.IsControl(r) || // All kinds of control characters, like tab.
- // Most kinds of punctuation except "Pc" ("Punctuation, connecting", like `_`).
- // But `someurl/#fragment` should not match, neither should HTML entities like `#`.
- ('/' != r && '&' != r && !unicode.Is(unicode.Categories["Pc"], r) && unicode.IsPunct(r))
+// Decides where to break before or after a #hashtag or @mention
+func IsMentionOrHashtagBoundary(r rune) bool {
+ return unicode.IsSpace(r) || unicode.IsPunct(r)
}
diff --git a/internal/util/statustools_test.go b/internal/util/statustools_test.go
deleted file mode 100644
index bb01557c..00000000
--- a/internal/util/statustools_test.go
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- GoToSocial
- Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-*/
-
-package util_test
-
-import (
- "testing"
-
- "github.com/stretchr/testify/assert"
- "github.com/stretchr/testify/suite"
- "github.com/superseriousbusiness/gotosocial/internal/util"
-)
-
-type StatusTestSuite struct {
- suite.Suite
-}
-
-func (suite *StatusTestSuite) TestLinkNoMention() {
- statusText := `here's a link to a post by zork:
-
-https://localhost:8080/@the_mighty_zork/statuses/01FGVP55XMF2K6316MQRX6PFG1
-
-that link shouldn't come out formatted as a mention!`
-
- menchies := util.DeriveMentionNamesFromText(statusText)
- suite.Empty(menchies)
-}
-
-func (suite *StatusTestSuite) TestDeriveMentionsOK() {
- statusText := `@dumpsterqueer@example.org testing testing
-
- is this thing on?
-
- @someone_else@testing.best-horse.com can you confirm? @hello@test.lgbt
-
- @thisisalocaluser!
-
- here is a duplicate mention: @hello@test.lgbt @hello@test.lgbt
-
- @account1@whatever.com @account2@whatever.com
-
- `
-
- menchies := util.DeriveMentionNamesFromText(statusText)
- assert.Len(suite.T(), menchies, 6)
- assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0])
- assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1])
- assert.Equal(suite.T(), "@hello@test.lgbt", menchies[2])
- assert.Equal(suite.T(), "@thisisalocaluser", menchies[3])
- assert.Equal(suite.T(), "@account1@whatever.com", menchies[4])
- assert.Equal(suite.T(), "@account2@whatever.com", menchies[5])
-}
-
-func (suite *StatusTestSuite) TestDeriveMentionsEmpty() {
- statusText := ``
- menchies := util.DeriveMentionNamesFromText(statusText)
- assert.Len(suite.T(), menchies, 0)
-}
-
-func (suite *StatusTestSuite) TestDeriveHashtagsOK() {
- statusText := `weeeeeeee #testing123 #also testing
-
-# testing this one shouldn't work
-
- #thisshouldwork #dupe #dupe!! #dupe
-
- here's a link with a fragment: https://example.org/whatever#ahhh
- here's another link with a fragment: https://example.org/whatever/#ahhh
-
-(#ThisShouldAlsoWork) #not_this_though
-
-#111111 thisalsoshouldn'twork#### ##
-
-#alimentación, #saúde, #lävistää, #ö, #네
-#ThisOneIsThirtyOneCharactersLon... ...ng
-#ThisOneIsThirteyCharactersLong
-`
-
- tags := util.DeriveHashtagsFromText(statusText)
- assert.Len(suite.T(), tags, 12)
- assert.Equal(suite.T(), "testing123", tags[0])
- assert.Equal(suite.T(), "also", tags[1])
- assert.Equal(suite.T(), "thisshouldwork", tags[2])
- assert.Equal(suite.T(), "dupe", tags[3])
- assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4])
- assert.Equal(suite.T(), "111111", tags[5])
- assert.Equal(suite.T(), "alimentación", tags[6])
- assert.Equal(suite.T(), "saúde", tags[7])
- assert.Equal(suite.T(), "lävistää", tags[8])
- assert.Equal(suite.T(), "ö", tags[9])
- assert.Equal(suite.T(), "네", tags[10])
- assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[11])
-
- statusText = `#올빼미 hej`
- tags = util.DeriveHashtagsFromText(statusText)
- assert.Equal(suite.T(), "올빼미", tags[0])
-}
-
-func (suite *StatusTestSuite) TestHashtagSpansOK() {
- statusText := `#0 #3 #8aa`
-
- spans := util.FindHashtagSpansInText(statusText)
- assert.Equal(suite.T(), 0, spans[0].First)
- assert.Equal(suite.T(), 2, spans[0].Second)
- assert.Equal(suite.T(), 3, spans[1].First)
- assert.Equal(suite.T(), 5, spans[1].Second)
- assert.Equal(suite.T(), 8, spans[2].First)
- assert.Equal(suite.T(), 12, spans[2].Second)
-}
-
-func (suite *StatusTestSuite) TestDeriveEmojiOK() {
- statusText := `:test: :another:
-
-Here's some normal text with an :emoji: at the end
-
-:spaces shouldnt work:
-
-:emoji1::emoji2:
-
-:anotheremoji:emoji2:
-:anotheremoji::anotheremoji::anotheremoji::anotheremoji:
-:underscores_ok_too:
-`
-
- tags := util.DeriveEmojisFromText(statusText)
- assert.Len(suite.T(), tags, 7)
- assert.Equal(suite.T(), "test", tags[0])
- assert.Equal(suite.T(), "another", tags[1])
- assert.Equal(suite.T(), "emoji", tags[2])
- assert.Equal(suite.T(), "emoji1", tags[3])
- assert.Equal(suite.T(), "emoji2", tags[4])
- assert.Equal(suite.T(), "anotheremoji", tags[5])
- assert.Equal(suite.T(), "underscores_ok_too", tags[6])
-}
-
-func (suite *StatusTestSuite) TestDeriveMultiple() {
- statusText := `Another test @foss_satan@fossbros-anonymous.io
-
- #HashTag
-
- Text`
-
- ms := util.DeriveMentionNamesFromText(statusText)
- hs := util.DeriveHashtagsFromText(statusText)
- es := util.DeriveEmojisFromText(statusText)
-
- assert.Len(suite.T(), ms, 1)
- assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", ms[0])
-
- assert.Len(suite.T(), hs, 1)
- assert.Contains(suite.T(), hs, "HashTag")
-
- assert.Len(suite.T(), es, 0)
-}
-
-func TestStatusTestSuite(t *testing.T) {
- suite.Run(t, new(StatusTestSuite))
-}