From 5668ce1ec701ed12eb099020e8a322de08e6f810 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Thu, 26 May 2022 11:37:13 +0200 Subject: [PATCH] [bugfix] Fix HTML escaping in instance title (#607) * move caption sanitization -> sanitize.go * use sanitizeplaintext rather than removehtml * rename sanitizecaption to sanitizeplaintext * avoid removing html twice from statuses * unexport remoteHTML it's no longer used outside the text package so this makes it less confusing * test instance PATCH --- internal/api/client/instance/instance_test.go | 126 +++++++++++++++++ .../api/client/instance/instancepatch_test.go | 130 ++++++++++++++++++ internal/processing/account/create.go | 2 +- internal/processing/account/update.go | 2 +- .../processing/admin/createdomainblock.go | 4 +- internal/processing/instance.go | 2 +- internal/processing/media/update.go | 2 +- internal/processing/status/create.go | 2 +- internal/processing/status/util.go | 8 +- internal/text/caption.go | 29 ---- internal/text/caption_test.go | 82 ----------- internal/text/plain.go | 2 +- internal/text/removehtml_test.go | 57 ++++++++ internal/text/sanitize.go | 16 ++- internal/text/sanitize_test.go | 68 ++++++--- 15 files changed, 381 insertions(+), 151 deletions(-) create mode 100644 internal/api/client/instance/instance_test.go create mode 100644 internal/api/client/instance/instancepatch_test.go delete mode 100644 internal/text/caption.go delete mode 100644 internal/text/caption_test.go create mode 100644 internal/text/removehtml_test.go diff --git a/internal/api/client/instance/instance_test.go b/internal/api/client/instance/instance_test.go new file mode 100644 index 000000000..a1fe1f17c --- /dev/null +++ b/internal/api/client/instance/instance_test.go @@ -0,0 +1,126 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package instance_test + +import ( + "bytes" + "fmt" + "net/http" + "net/http/httptest" + + "codeberg.org/gruf/go-store/kv" + "github.com/gin-gonic/gin" + "github.com/spf13/viper" + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/api/client/instance" + "github.com/superseriousbusiness/gotosocial/internal/concurrency" + "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/email" + "github.com/superseriousbusiness/gotosocial/internal/federation" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/media" + "github.com/superseriousbusiness/gotosocial/internal/messages" + "github.com/superseriousbusiness/gotosocial/internal/oauth" + "github.com/superseriousbusiness/gotosocial/internal/processing" + "github.com/superseriousbusiness/gotosocial/testrig" +) + +type InstanceStandardTestSuite struct { + // standard suite interfaces + suite.Suite + db db.DB + storage *kv.KVStore + mediaManager media.Manager + federator federation.Federator + processor processing.Processor + emailSender email.Sender + sentEmails map[string]string + + // standard suite models + testTokens map[string]*gtsmodel.Token + testClients map[string]*gtsmodel.Client + testApplications map[string]*gtsmodel.Application + testUsers map[string]*gtsmodel.User + testAccounts map[string]*gtsmodel.Account + testAttachments map[string]*gtsmodel.MediaAttachment + testStatuses map[string]*gtsmodel.Status + + // module being tested + instanceModule *instance.Module +} + +func (suite *InstanceStandardTestSuite) SetupSuite() { + suite.testTokens = testrig.NewTestTokens() + suite.testClients = testrig.NewTestClients() + suite.testApplications = testrig.NewTestApplications() + suite.testUsers = testrig.NewTestUsers() + suite.testAccounts = testrig.NewTestAccounts() + suite.testAttachments = testrig.NewTestAttachments() + suite.testStatuses = testrig.NewTestStatuses() +} + +func (suite *InstanceStandardTestSuite) SetupTest() { + testrig.InitTestConfig() + testrig.InitTestLog() + + fedWorker := concurrency.NewWorkerPool[messages.FromFederator](-1, -1) + clientWorker := concurrency.NewWorkerPool[messages.FromClientAPI](-1, -1) + + suite.db = testrig.NewTestDB() + suite.storage = testrig.NewTestStorage() + suite.mediaManager = testrig.NewTestMediaManager(suite.db, suite.storage) + suite.federator = testrig.NewTestFederator(suite.db, testrig.NewTestTransportController(testrig.NewMockHTTPClient(nil), suite.db, fedWorker), suite.storage, suite.mediaManager, fedWorker) + suite.sentEmails = make(map[string]string) + suite.emailSender = testrig.NewEmailSender("../../../../web/template/", suite.sentEmails) + suite.processor = testrig.NewTestProcessor(suite.db, suite.storage, suite.federator, suite.emailSender, suite.mediaManager, clientWorker, fedWorker) + suite.instanceModule = instance.New(suite.processor).(*instance.Module) + testrig.StandardDBSetup(suite.db, nil) + testrig.StandardStorageSetup(suite.storage, "../../../../testrig/media") +} + +func (suite *InstanceStandardTestSuite) TearDownTest() { + testrig.StandardDBTeardown(suite.db) + testrig.StandardStorageTeardown(suite.storage) +} + +func (suite *InstanceStandardTestSuite) newContext(recorder *httptest.ResponseRecorder, requestMethod string, requestBody []byte, requestPath string, bodyContentType string) *gin.Context { + ctx, _ := gin.CreateTestContext(recorder) + + ctx.Set(oauth.SessionAuthorizedAccount, suite.testAccounts["admin_account"]) + ctx.Set(oauth.SessionAuthorizedToken, oauth.DBTokenToToken(suite.testTokens["admin_account"])) + ctx.Set(oauth.SessionAuthorizedApplication, suite.testApplications["admin_account"]) + ctx.Set(oauth.SessionAuthorizedUser, suite.testUsers["admin_account"]) + + protocol := viper.GetString(config.Keys.Protocol) + host := viper.GetString(config.Keys.Host) + + baseURI := fmt.Sprintf("%s://%s", protocol, host) + requestURI := fmt.Sprintf("%s/%s", baseURI, requestPath) + + ctx.Request = httptest.NewRequest(http.MethodPatch, requestURI, bytes.NewReader(requestBody)) // the endpoint we're hitting + + if bodyContentType != "" { + ctx.Request.Header.Set("Content-Type", bodyContentType) + } + + ctx.Request.Header.Set("accept", "application/json") + + return ctx +} diff --git a/internal/api/client/instance/instancepatch_test.go b/internal/api/client/instance/instancepatch_test.go new file mode 100644 index 000000000..5577cbbcc --- /dev/null +++ b/internal/api/client/instance/instancepatch_test.go @@ -0,0 +1,130 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package instance_test + +import ( + "io" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/api/client/instance" + "github.com/superseriousbusiness/gotosocial/testrig" +) + +type InstancePatchTestSuite struct { + InstanceStandardTestSuite +} + +func (suite *InstancePatchTestSuite) TestInstancePatch1() { + requestBody, w, err := testrig.CreateMultipartFormData( + "", "", + map[string]string{ + "title": "Example Instance", + "contact_username": "admin", + "contact_email": "someone@example.org", + }) + if err != nil { + panic(err) + } + bodyBytes := requestBody.Bytes() + + // set up the request + recorder := httptest.NewRecorder() + ctx := suite.newContext(recorder, http.MethodPatch, bodyBytes, instance.InstanceInformationPath, w.FormDataContentType()) + + // call the handler + suite.instanceModule.InstanceUpdatePATCHHandler(ctx) + + // we should have OK because our request was valid + suite.Equal(http.StatusOK, recorder.Code) + + result := recorder.Result() + defer result.Body.Close() + + b, err := io.ReadAll(result.Body) + suite.NoError(err) + + suite.Equal(`{"uri":"http://localhost:8080","title":"Example Instance","description":"","short_description":"","email":"someone@example.org","version":"","registrations":true,"approval_required":true,"invites_enabled":false,"urls":{"streaming_api":"wss://localhost:8080"},"stats":{"domain_count":0,"status_count":16,"user_count":4},"thumbnail":"","contact_account":{"id":"01F8MH17FWEB39HZJ76B6VXSKF","username":"admin","acct":"admin","display_name":"","locked":false,"bot":false,"created_at":"2022-05-17T13:10:59Z","note":"","url":"http://localhost:8080/@admin","avatar":"","avatar_static":"","header":"","header_static":"","followers_count":1,"following_count":1,"statuses_count":4,"last_status_at":"2021-10-20T10:41:37Z","emojis":[],"fields":[]},"max_toot_chars":5000}`, string(b)) +} + +func (suite *InstancePatchTestSuite) TestInstancePatch2() { + requestBody, w, err := testrig.CreateMultipartFormData( + "", "", + map[string]string{ + "title": "

Geoff's Instance

", + }) + if err != nil { + panic(err) + } + bodyBytes := requestBody.Bytes() + + // set up the request + recorder := httptest.NewRecorder() + ctx := suite.newContext(recorder, http.MethodPatch, bodyBytes, instance.InstanceInformationPath, w.FormDataContentType()) + + // call the handler + suite.instanceModule.InstanceUpdatePATCHHandler(ctx) + + // we should have OK because our request was valid + suite.Equal(http.StatusOK, recorder.Code) + + result := recorder.Result() + defer result.Body.Close() + + b, err := io.ReadAll(result.Body) + suite.NoError(err) + + suite.Equal(`{"uri":"http://localhost:8080","title":"Geoff's Instance","description":"","short_description":"","email":"","version":"","registrations":true,"approval_required":true,"invites_enabled":false,"urls":{"streaming_api":"wss://localhost:8080"},"stats":{"domain_count":0,"status_count":16,"user_count":4},"thumbnail":"","max_toot_chars":5000}`, string(b)) +} + +func (suite *InstancePatchTestSuite) TestInstancePatch3() { + requestBody, w, err := testrig.CreateMultipartFormData( + "", "", + map[string]string{ + "short_description": "

This is some html, which is allowed in short descriptions.

", + }) + if err != nil { + panic(err) + } + bodyBytes := requestBody.Bytes() + + // set up the request + recorder := httptest.NewRecorder() + ctx := suite.newContext(recorder, http.MethodPatch, bodyBytes, instance.InstanceInformationPath, w.FormDataContentType()) + + // call the handler + suite.instanceModule.InstanceUpdatePATCHHandler(ctx) + + // we should have OK because our request was valid + suite.Equal(http.StatusOK, recorder.Code) + + result := recorder.Result() + defer result.Body.Close() + + b, err := io.ReadAll(result.Body) + suite.NoError(err) + + suite.Equal(`{"uri":"http://localhost:8080","title":"localhost:8080","description":"","short_description":"\u003cp\u003eThis is some html, which is \u003cem\u003eallowed\u003c/em\u003e in short descriptions.\u003c/p\u003e","email":"","version":"","registrations":true,"approval_required":true,"invites_enabled":false,"urls":{"streaming_api":"wss://localhost:8080"},"stats":{"domain_count":0,"status_count":16,"user_count":4},"thumbnail":"","max_toot_chars":5000}`, string(b)) +} + +func TestInstancePatchTestSuite(t *testing.T) { + suite.Run(t, &InstancePatchTestSuite{}) +} diff --git a/internal/processing/account/create.go b/internal/processing/account/create.go index bbca11fae..61c4f95ef 100644 --- a/internal/processing/account/create.go +++ b/internal/processing/account/create.go @@ -64,7 +64,7 @@ func (p *processor) Create(ctx context.Context, applicationToken oauth2.TokenInf } l.Trace("creating new username and account") - user, err := p.db.NewSignup(ctx, form.Username, text.RemoveHTML(reason), approvalRequired, form.Email, form.Password, form.IP, form.Locale, application.ID, false, false) + user, err := p.db.NewSignup(ctx, form.Username, text.SanitizePlaintext(reason), approvalRequired, form.Email, form.Password, form.IP, form.Locale, application.ID, false, false) if err != nil { return nil, fmt.Errorf("error creating new signup in the database: %s", err) } diff --git a/internal/processing/account/update.go b/internal/processing/account/update.go index 3d6bbae2a..5fae6e73b 100644 --- a/internal/processing/account/update.go +++ b/internal/processing/account/update.go @@ -53,7 +53,7 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, form if err := validate.DisplayName(*form.DisplayName); err != nil { return nil, err } - account.DisplayName = text.RemoveHTML(*form.DisplayName) + account.DisplayName = text.SanitizePlaintext(*form.DisplayName) } if form.Note != nil { diff --git a/internal/processing/admin/createdomainblock.go b/internal/processing/admin/createdomainblock.go index 3cfaabce0..1c641950c 100644 --- a/internal/processing/admin/createdomainblock.go +++ b/internal/processing/admin/createdomainblock.go @@ -59,8 +59,8 @@ func (p *processor) DomainBlockCreate(ctx context.Context, account *gtsmodel.Acc ID: blockID, Domain: domain, CreatedByAccountID: account.ID, - PrivateComment: text.RemoveHTML(privateComment), - PublicComment: text.RemoveHTML(publicComment), + PrivateComment: text.SanitizePlaintext(privateComment), + PublicComment: text.SanitizePlaintext(publicComment), Obfuscate: obfuscate, SubscriptionID: subscriptionID, } diff --git a/internal/processing/instance.go b/internal/processing/instance.go index 11f966adb..f4fe2ca79 100644 --- a/internal/processing/instance.go +++ b/internal/processing/instance.go @@ -65,7 +65,7 @@ func (p *processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe if err := validate.SiteTitle(*form.Title); err != nil { return nil, gtserror.NewErrorBadRequest(err, fmt.Sprintf("site title invalid: %s", err)) } - i.Title = text.RemoveHTML(*form.Title) // don't allow html in site title + i.Title = text.SanitizePlaintext(*form.Title) // don't allow html in site title } // validate & update site contact account if it's set on the form diff --git a/internal/processing/media/update.go b/internal/processing/media/update.go index b275b9ffe..116588a48 100644 --- a/internal/processing/media/update.go +++ b/internal/processing/media/update.go @@ -45,7 +45,7 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, media } if form.Description != nil { - attachment.Description = text.SanitizeCaption(*form.Description) + attachment.Description = text.SanitizePlaintext(*form.Description) if err := p.db.UpdateByPrimaryKey(ctx, attachment); err != nil { return nil, gtserror.NewErrorInternalError(fmt.Errorf("database error updating description: %s", err)) } diff --git a/internal/processing/status/create.go b/internal/processing/status/create.go index add8a5bc6..e5f6e9647 100644 --- a/internal/processing/status/create.go +++ b/internal/processing/status/create.go @@ -49,7 +49,7 @@ func (p *processor) Create(ctx context.Context, account *gtsmodel.Account, appli Local: true, AccountID: account.ID, AccountURI: account.URI, - ContentWarning: text.SanitizeCaption(form.SpoilerText), + ContentWarning: text.SanitizePlaintext(form.SpoilerText), ActivityStreamsType: ap.ObjectNote, Sensitive: form.Sensitive, Language: form.Language, diff --git a/internal/processing/status/util.go b/internal/processing/status/util.go index 190d88f1b..df645189e 100644 --- a/internal/processing/status/util.go +++ b/internal/processing/status/util.go @@ -27,7 +27,6 @@ import ( apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" - "github.com/superseriousbusiness/gotosocial/internal/text" "github.com/superseriousbusiness/gotosocial/internal/util" ) @@ -269,16 +268,13 @@ func (p *processor) ProcessContent(ctx context.Context, form *apimodel.AdvancedS form.Format = apimodel.StatusFormatDefault } - // remove any existing html from the status - content := text.RemoveHTML(form.Status) - // parse content out of the status depending on what format has been submitted var formatted string switch form.Format { case apimodel.StatusFormatPlain: - formatted = p.formatter.FromPlain(ctx, content, status.Mentions, status.Tags) + formatted = p.formatter.FromPlain(ctx, form.Status, status.Mentions, status.Tags) case apimodel.StatusFormatMarkdown: - formatted = p.formatter.FromMarkdown(ctx, content, status.Mentions, status.Tags) + formatted = p.formatter.FromMarkdown(ctx, form.Status, status.Mentions, status.Tags) default: return fmt.Errorf("format %s not recognised as a valid status format", form.Format) } diff --git a/internal/text/caption.go b/internal/text/caption.go deleted file mode 100644 index c3c86b0b1..000000000 --- a/internal/text/caption.go +++ /dev/null @@ -1,29 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . -*/ - -package text - -// SanitizeCaption runs image captions (or indeed any plain text) through basic sanitization. -// It returns plain text rather than HTML, in contrast to other functions in this package. -func SanitizeCaption(in string) string { - content := preformat(in) - - content = RemoveHTML(content) - - return postformat(content) -} diff --git a/internal/text/caption_test.go b/internal/text/caption_test.go deleted file mode 100644 index f1337df09..000000000 --- a/internal/text/caption_test.go +++ /dev/null @@ -1,82 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . -*/ - -package text_test - -import ( - "testing" - - "github.com/stretchr/testify/suite" - "github.com/superseriousbusiness/gotosocial/internal/text" -) - -type CaptionTestSuite struct { - suite.Suite -} - -func (suite *CaptionTestSuite) TestSanitizeCaption1() { - dodgyCaption := "this is just a normal caption ;)" - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("this is just a normal caption ;)", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption2() { - dodgyCaption := "here's a LOUD caption" - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("here's a LOUD caption", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption3() { - dodgyCaption := "" - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption4() { - dodgyCaption := ` - - -here is -a multi line -caption -with some newlines - - - -` - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption5() { - // html-escaped: " hello world" - dodgyCaption := `<script>console.log('aha!')</script> hello world` - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("hello world", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption6() { - // html-encoded: " hello world" - dodgyCaption := `<script>console.log('aha!')</script> hello world` - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("hello world", sanitized) -} - -func TestCaptionTestSuite(t *testing.T) { - suite.Run(t, new(CaptionTestSuite)) -} diff --git a/internal/text/plain.go b/internal/text/plain.go index 4ef3b3715..bc10d1b67 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -35,7 +35,7 @@ func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gts content := preformat(plain) // sanitize any html elements - content = RemoveHTML(content) + content = removeHTML(content) // format links nicely content = f.ReplaceLinks(ctx, content) diff --git a/internal/text/removehtml_test.go b/internal/text/removehtml_test.go new file mode 100644 index 000000000..0029b45a5 --- /dev/null +++ b/internal/text/removehtml_test.go @@ -0,0 +1,57 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package text + +import ( + "testing" + + "github.com/stretchr/testify/suite" +) + +const ( + test_removeHTML = `

Another test @foss_satan

#Hashtag

Text

` + test_removedHTML = `Another test @foss_satan#HashtagText` + test_withEscapedLiteral = `it\u0026amp;#39;s its it is` + test_withEscapedLiteralExpected = `it\u0026amp;#39;s its it is` + test_withEscaped = "it\u0026amp;#39;s its it is" + test_withEscapedExpected = "it&#39;s its it is" +) + +type RemoveHTMLTestSuite struct { + suite.Suite +} + +func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscapedLiteral() { + s := removeHTML(test_withEscapedLiteral) + suite.Equal(test_withEscapedLiteralExpected, s) +} + +func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscaped() { + s := removeHTML(test_withEscaped) + suite.Equal(test_withEscapedExpected, s) +} + +func (suite *RemoveHTMLTestSuite) TestRemoveHTML() { + s := removeHTML(test_removeHTML) + suite.Equal(test_removedHTML, s) +} + +func TestRemoveHTMLTestSuite(t *testing.T) { + suite.Run(t, &RemoveHTMLTestSuite{}) +} diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go index 897dea34d..d4faabbb1 100644 --- a/internal/text/sanitize.go +++ b/internal/text/sanitize.go @@ -46,12 +46,20 @@ var regular *bluemonday.Policy = bluemonday.UGCPolicy(). // Source: https://github.com/microcosm-cc/bluemonday#usage var strict *bluemonday.Policy = bluemonday.StrictPolicy() -// SanitizeHTML cleans up HTML in the given string, allowing through only safe HTML elements. +// removeHTML strictly removes *all* recognized HTML elements from the given string. +func removeHTML(in string) string { + return strict.Sanitize(in) +} + +// SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through. func SanitizeHTML(in string) string { return regular.Sanitize(in) } -// RemoveHTML removes all HTML from the given string. -func RemoveHTML(in string) string { - return strict.Sanitize(in) +// SanitizePlaintext runs text through basic sanitization. This removes +// any html elements that were in the string, and returns clean plaintext. +func SanitizePlaintext(in string) string { + content := preformat(in) + content = removeHTML(content) + return postformat(content) } diff --git a/internal/text/sanitize_test.go b/internal/text/sanitize_test.go index 4270e2602..eea5daadb 100644 --- a/internal/text/sanitize_test.go +++ b/internal/text/sanitize_test.go @@ -26,17 +26,8 @@ import ( ) const ( - removeHTML = `

Another test @foss_satan

#Hashtag

Text

` - removedHTML = `Another test @foss_satan#HashtagText` - - sanitizeHTML = `here's some naughty html: !!!` - sanitizedHTML = `here's some naughty html: !!!` - - withEscapedLiteral = `it\u0026amp;#39;s its it is` - withEscapedLiteralExpected = `it\u0026amp;#39;s its it is` - withEscaped = "it\u0026amp;#39;s its it is" - withEscapedExpected = "it&#39;s its it is" - + sanitizeHTML = `here's some naughty html: !!!` + sanitizedHTML = `here's some naughty html: !!!` sanitizeOutgoing = `

gotta test some fucking ''''''''' marks

` sanitizedOutgoing = `

gotta test some fucking ''''''''' marks

` ) @@ -45,11 +36,6 @@ type SanitizeTestSuite struct { suite.Suite } -func (suite *SanitizeTestSuite) TestRemoveHTML() { - s := text.RemoveHTML(removeHTML) - suite.Equal(removedHTML, s) -} - func (suite *SanitizeTestSuite) TestSanitizeOutgoing() { s := text.SanitizeHTML(sanitizeOutgoing) suite.Equal(sanitizedOutgoing, s) @@ -60,14 +46,52 @@ func (suite *SanitizeTestSuite) TestSanitizeHTML() { suite.Equal(sanitizedHTML, s) } -func (suite *SanitizeTestSuite) TestSanitizeWithEscapedLiteral() { - s := text.RemoveHTML(withEscapedLiteral) - suite.Equal(withEscapedLiteralExpected, s) +func (suite *SanitizeTestSuite) TestSanitizeCaption1() { + dodgyCaption := "this is just a normal caption ;)" + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("this is just a normal caption ;)", sanitized) } -func (suite *SanitizeTestSuite) TestSanitizeWithEscaped() { - s := text.RemoveHTML(withEscaped) - suite.Equal(withEscapedExpected, s) +func (suite *SanitizeTestSuite) TestSanitizeCaption2() { + dodgyCaption := "here's a LOUD caption" + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("here's a LOUD caption", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption3() { + dodgyCaption := "" + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption4() { + dodgyCaption := ` + + +here is +a multi line +caption +with some newlines + + + +` + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption5() { + // html-escaped: " hello world" + dodgyCaption := `<script>console.log('aha!')</script> hello world` + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("hello world", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption6() { + // html-encoded: " hello world" + dodgyCaption := `<script>console.log('aha!')</script> hello world` + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("hello world", sanitized) } func TestSanitizeTestSuite(t *testing.T) {