From ce190d867ca126001a1c0417b00810fc03c0b3ba Mon Sep 17 00:00:00 2001 From: Tobi Smethurst <31960611+tsmethurst@users.noreply.github.com> Date: Mon, 16 Aug 2021 19:17:56 +0200 Subject: [PATCH] Text/status parsing fixes (#141) * aaaaaa * vendor minify * update + test markdown parsing --- CONTRIBUTING.md | 4 +- README.md | 1 + dockerbuild.sh | 2 +- dockerpush.sh | 2 +- go.mod | 1 + go.sum | 10 + .../api/client/status/statuscreate_test.go | 6 +- internal/processing/status/util_test.go | 6 +- internal/text/common.go | 33 +- internal/text/markdown.go | 11 +- internal/text/markdown_test.go | 116 ++ internal/text/minify.go | 39 + internal/text/plain.go | 7 + internal/text/plain_test.go | 8 +- internal/text/sanitize.go | 17 +- internal/text/sanitize_test.go | 75 + .../tdewolff/minify/v2/.gitattributes | 2 + .../github.com/tdewolff/minify/v2/.gitignore | 14 + .../tdewolff/minify/v2/.golangci.yml | 16 + .../github.com/tdewolff/minify/v2/Dockerfile | 14 + vendor/github.com/tdewolff/minify/v2/LICENSE | 22 + vendor/github.com/tdewolff/minify/v2/Makefile | 56 + .../github.com/tdewolff/minify/v2/README.md | 693 +++++++++ .../github.com/tdewolff/minify/v2/common.go | 515 +++++++ vendor/github.com/tdewolff/minify/v2/go.mod | 14 + vendor/github.com/tdewolff/minify/v2/go.sum | 17 + .../tdewolff/minify/v2/html/buffer.go | 137 ++ .../tdewolff/minify/v2/html/hash.go | 543 +++++++ .../tdewolff/minify/v2/html/html.go | 511 +++++++ .../tdewolff/minify/v2/html/table.go | 1346 +++++++++++++++++ .../github.com/tdewolff/minify/v2/minify.go | 345 +++++ .../tdewolff/parse/v2/.gitattributes | 1 + .../github.com/tdewolff/parse/v2/.gitignore | 5 + .../tdewolff/parse/v2/.golangci.yml | 16 + .../github.com/tdewolff/parse/v2/LICENSE.md | 22 + vendor/github.com/tdewolff/parse/v2/README.md | 64 + .../tdewolff/parse/v2/buffer/buffer.go | 12 + .../tdewolff/parse/v2/buffer/lexer.go | 164 ++ .../tdewolff/parse/v2/buffer/reader.go | 44 + .../tdewolff/parse/v2/buffer/streamlexer.go | 223 +++ .../tdewolff/parse/v2/buffer/writer.go | 41 + vendor/github.com/tdewolff/parse/v2/common.go | 237 +++ vendor/github.com/tdewolff/parse/v2/error.go | 47 + vendor/github.com/tdewolff/parse/v2/go.mod | 5 + vendor/github.com/tdewolff/parse/v2/go.sum | 2 + .../tdewolff/parse/v2/html/README.md | 98 ++ .../github.com/tdewolff/parse/v2/html/hash.go | 81 + .../github.com/tdewolff/parse/v2/html/lex.go | 493 ++++++ .../github.com/tdewolff/parse/v2/html/util.go | 103 ++ vendor/github.com/tdewolff/parse/v2/input.go | 173 +++ .../github.com/tdewolff/parse/v2/position.go | 95 ++ .../tdewolff/parse/v2/strconv/float.go | 257 ++++ .../tdewolff/parse/v2/strconv/int.go | 88 ++ .../tdewolff/parse/v2/strconv/price.go | 83 + vendor/github.com/tdewolff/parse/v2/util.go | 489 ++++++ vendor/modules.txt | 9 + 56 files changed, 7390 insertions(+), 45 deletions(-) create mode 100644 internal/text/markdown_test.go create mode 100644 internal/text/minify.go create mode 100644 internal/text/sanitize_test.go create mode 100644 vendor/github.com/tdewolff/minify/v2/.gitattributes create mode 100644 vendor/github.com/tdewolff/minify/v2/.gitignore create mode 100644 vendor/github.com/tdewolff/minify/v2/.golangci.yml create mode 100644 vendor/github.com/tdewolff/minify/v2/Dockerfile create mode 100644 vendor/github.com/tdewolff/minify/v2/LICENSE create mode 100644 vendor/github.com/tdewolff/minify/v2/Makefile create mode 100644 vendor/github.com/tdewolff/minify/v2/README.md create mode 100644 vendor/github.com/tdewolff/minify/v2/common.go create mode 100644 vendor/github.com/tdewolff/minify/v2/go.mod create mode 100644 vendor/github.com/tdewolff/minify/v2/go.sum create mode 100644 vendor/github.com/tdewolff/minify/v2/html/buffer.go create mode 100644 vendor/github.com/tdewolff/minify/v2/html/hash.go create mode 100644 vendor/github.com/tdewolff/minify/v2/html/html.go create mode 100644 vendor/github.com/tdewolff/minify/v2/html/table.go create mode 100644 vendor/github.com/tdewolff/minify/v2/minify.go create mode 100644 vendor/github.com/tdewolff/parse/v2/.gitattributes create mode 100644 vendor/github.com/tdewolff/parse/v2/.gitignore create mode 100644 vendor/github.com/tdewolff/parse/v2/.golangci.yml create mode 100644 vendor/github.com/tdewolff/parse/v2/LICENSE.md create mode 100644 vendor/github.com/tdewolff/parse/v2/README.md create mode 100644 vendor/github.com/tdewolff/parse/v2/buffer/buffer.go create mode 100644 vendor/github.com/tdewolff/parse/v2/buffer/lexer.go create mode 100644 vendor/github.com/tdewolff/parse/v2/buffer/reader.go create mode 100644 vendor/github.com/tdewolff/parse/v2/buffer/streamlexer.go create mode 100644 vendor/github.com/tdewolff/parse/v2/buffer/writer.go create mode 100644 vendor/github.com/tdewolff/parse/v2/common.go create mode 100644 vendor/github.com/tdewolff/parse/v2/error.go create mode 100644 vendor/github.com/tdewolff/parse/v2/go.mod create mode 100644 vendor/github.com/tdewolff/parse/v2/go.sum create mode 100644 vendor/github.com/tdewolff/parse/v2/html/README.md create mode 100644 vendor/github.com/tdewolff/parse/v2/html/hash.go create mode 100644 vendor/github.com/tdewolff/parse/v2/html/lex.go create mode 100644 vendor/github.com/tdewolff/parse/v2/html/util.go create mode 100644 vendor/github.com/tdewolff/parse/v2/input.go create mode 100644 vendor/github.com/tdewolff/parse/v2/position.go create mode 100644 vendor/github.com/tdewolff/parse/v2/strconv/float.go create mode 100644 vendor/github.com/tdewolff/parse/v2/strconv/int.go create mode 100644 vendor/github.com/tdewolff/parse/v2/strconv/price.go create mode 100644 vendor/github.com/tdewolff/parse/v2/util.go diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 97220f221..8ccd7e7dc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -139,7 +139,7 @@ We use [golangci-lint](https://golangci-lint.run/) for linting. To run this loca Then, you can run the linter with: ```bash -golangci-lint run +golangci-lint run --tests=false ``` Note that this linter also runs as a job on the Github repo, so if you make a PR that doesn't pass the linter, it will be rejected. As such, it's good practice to run the linter locally before pushing or opening a PR. @@ -155,7 +155,7 @@ go get -u github.com/golang/lint/golint To run the linter, use: ```bash -golint ./... +golint ./internal/... ``` Then make sure to run `go fmt ./...` to update whitespace and other opinionated formatting. diff --git a/README.md b/README.md index f336b440a..5a64dfa4f 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,7 @@ The following libraries and frameworks are used by GoToSocial, with gratitude * [superseriousbusiness/exifremove](https://github.com/superseriousbusiness/exifremove) forked from [scottleedavis/go-exif-remove](https://github.com/scottleedavis/go-exif-remove); EXIF data removal. [MIT License](https://spdx.org/licenses/MIT.html). * [superseriousbusiness/oauth2](https://github.com/superseriousbusiness/oauth2) forked from [go-oauth2/oauth2](https://github.com/go-oauth2/oauth2); oauth server framework and token handling. [MIT License](https://spdx.org/licenses/MIT.html). * [go-swagger/go-swagger](https://github.com/go-swagger/go-swagger); Swagger OpenAPI spec generation. [Apache-2.0 License](https://spdx.org/licenses/Apache-2.0.html). +* [tdewolff/minify](https://github.com/tdewolff/minify); HTML minification. [MIT License](https://spdx.org/licenses/MIT.html). * [urfave/cli](https://github.com/urfave/cli); command-line interface framework. [MIT License](https://spdx.org/licenses/MIT.html). * [wagslane/go-password-validator](https://github.com/wagslane/go-password-validator); password strength validation. [MIT License](https://spdx.org/licenses/MIT.html). diff --git a/dockerbuild.sh b/dockerbuild.sh index 87893c65c..e2628ce55 100755 --- a/dockerbuild.sh +++ b/dockerbuild.sh @@ -1,3 +1,3 @@ #!/bin/bash -docker build -t "superseriousbusiness/gotosocial:$(cat version)" . +docker build -t "superseriousbusiness/gotosocial:$(git rev-parse --abbrev-ref HEAD)" . diff --git a/dockerpush.sh b/dockerpush.sh index 8377f8e4a..188b3cae0 100755 --- a/dockerpush.sh +++ b/dockerpush.sh @@ -1,3 +1,3 @@ #!/bin/bash -docker push "superseriousbusiness/gotosocial:$(cat version)" +docker push "superseriousbusiness/gotosocial:$(git rev-parse --abbrev-ref HEAD)" diff --git a/go.mod b/go.mod index 48febd4a6..10597a06b 100644 --- a/go.mod +++ b/go.mod @@ -44,6 +44,7 @@ require ( github.com/stretchr/testify v1.7.0 github.com/superseriousbusiness/exifremove v0.0.0-20210330092427-6acd27eac203 github.com/superseriousbusiness/oauth2/v4 v4.3.0-SSB + github.com/tdewolff/minify/v2 v2.9.21 github.com/tidwall/buntdb v1.2.4 // indirect github.com/urfave/cli/v2 v2.3.0 github.com/vmihailenco/msgpack/v5 v5.3.4 // indirect diff --git a/go.sum b/go.sum index e3599fa9d..4d6968ada 100644 --- a/go.sum +++ b/go.sum @@ -46,6 +46,7 @@ github.com/bradleypeabody/gorilla-sessions-memcache v0.0.0-20181103040241-659414 github.com/buckket/go-blurhash v1.1.0 h1:X5M6r0LIvwdvKiUtiNcRL2YlmOfMzYobI3VCKCZc9Do= github.com/buckket/go-blurhash v1.1.0/go.mod h1:aT2iqo5W9vu9GpyoLErKfTHwgODsZp3bQfXjXJUxNb8= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cheekybits/is v0.0.0-20150225183255-68e9c0620927/go.mod h1:h/aW8ynjgkuj+NQRlZcDbAbM1ORAbXjXX77sX7T289U= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -92,6 +93,7 @@ github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3 github.com/dsoprea/go-utility v0.0.0-20200717064901-2fccff4aa15e h1:ojqYA1mU6LuRm8XzrVOvyfb000y59cbUcu6Wt8sFSAs= github.com/dsoprea/go-utility v0.0.0-20200717064901-2fccff4aa15e/go.mod h1:KVK+/Hul09ujXAGq+42UBgCTnXkiJZRnLYdURGjQUwo= github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -266,6 +268,7 @@ github.com/leodido/go-urn v1.1.0/go.mod h1:+cyI34gQWZcE1eQU7NVgKkkzdXDQHr1dBMtdA github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= github.com/leodido/go-urn v1.2.1 h1:BqpAaACuzVSgi/VLzGZIobT2z4v53pjosyNd9Yv6n/w= github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY= +github.com/matryer/try v0.0.0-20161228173917-9ac251b645a2/go.mod h1:0KeJpeMD6o+O4hW7qJOT7vyQPKrWmj26uf5wMc/IiIs= github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= @@ -323,6 +326,7 @@ github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykE github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= @@ -335,6 +339,12 @@ github.com/superseriousbusiness/exifremove v0.0.0-20210330092427-6acd27eac203 h1 github.com/superseriousbusiness/exifremove v0.0.0-20210330092427-6acd27eac203/go.mod h1:0Xw5cYMOYpgaWs+OOSx41ugycl2qvKTi9tlMMcZhFyY= github.com/superseriousbusiness/oauth2/v4 v4.3.0-SSB h1:dzMVC+oPTxFL5cv29egBrftlqIWPXQ6/VzkuoySwgm4= github.com/superseriousbusiness/oauth2/v4 v4.3.0-SSB/go.mod h1:8p0a/BEN9hhsGzE3tPaFFlIZgxAaLyLN5KY0bPg9ZBc= +github.com/tdewolff/minify/v2 v2.9.21 h1:nO4s1PEMy7aRjlIlbr3Jgr+bJby8QYuifa2Vs2f9lh4= +github.com/tdewolff/minify/v2 v2.9.21/go.mod h1:PoDBts2L7sCwUT28vTAlozGeD6qxjrrihtin4bR/RMM= +github.com/tdewolff/parse/v2 v2.5.19 h1:Kjaj3KQOx/4elIxlBSglus4E2oMfdROphvbq2b+OBZ0= +github.com/tdewolff/parse/v2 v2.5.19/go.mod h1:WzaJpRSbwq++EIQHYIRTpbYKNA3gn9it1Ik++q4zyho= +github.com/tdewolff/test v1.0.6 h1:76mzYJQ83Op284kMT+63iCNCI7NEERsIN8dLM+RiKr4= +github.com/tdewolff/test v1.0.6/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= github.com/tidwall/btree v0.0.0-20191029221954-400434d76274/go.mod h1:huei1BkDWJ3/sLXmO+bsCNELL+Bp2Kks9OLyQFkzvA8= github.com/tidwall/btree v0.5.0 h1:IBfCtOj4uOMQcodv3wzYVo0zPqSJObm71mE039/dlXY= github.com/tidwall/btree v0.5.0/go.mod h1:TzIRzen6yHbibdSfK6t8QimqbUnoxUSrZfeW7Uob0q4= diff --git a/internal/api/client/status/statuscreate_test.go b/internal/api/client/status/statuscreate_test.go index c175a54ec..33912397e 100644 --- a/internal/api/client/status/statuscreate_test.go +++ b/internal/api/client/status/statuscreate_test.go @@ -165,7 +165,7 @@ func (suite *StatusCreateTestSuite) TestPostAnotherNewStatus() { err = json.Unmarshal(b, statusReply) assert.NoError(suite.T(), err) - assert.Equal(suite.T(), "\u003cp\u003e\u003ca href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\"\u003e#\u003cspan\u003etest\u003c/span\u003e\u003c/a\u003e alright, should be able to post \u003ca href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\"\u003e#\u003cspan\u003elinks\u003c/span\u003e\u003c/a\u003e with fragments in them now, let\u0026#39;s see........\u003cbr/\u003e\u003cbr/\u003e\u003ca href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"noopener nofollow noreferrer\" target=\"_blank\"\u003edocs.gotosocial.org/en/latest/user_guide/posts/#links\u003c/a\u003e\u003cbr/\u003e\u003cbr/\u003e\u003ca href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\"\u003e#\u003cspan\u003egotosocial\u003c/span\u003e\u003c/a\u003e\u003cbr/\u003e\u003cbr/\u003e(tobi remember to pull the docker image challenge)\u003c/p\u003e", statusReply.Content) + assert.Equal(suite.T(), "

#test alright, should be able to post #links with fragments in them now, let's see........

docs.gotosocial.org/en/latest/user_guide/posts/#links

#gotosocial

(tobi remember to pull the docker image challenge)

", statusReply.Content) } func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() { @@ -198,7 +198,7 @@ func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() { assert.NoError(suite.T(), err) assert.Equal(suite.T(), "", statusReply.SpoilerText) - assert.Equal(suite.T(), "

here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:
here's an emoji that isn't in the db: :test_emoji:

", statusReply.Content) + assert.Equal(suite.T(), "

here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:
here's an emoji that isn't in the db: :test_emoji:

", statusReply.Content) assert.Len(suite.T(), statusReply.Emojis, 1) mastoEmoji := statusReply.Emojis[0] @@ -314,7 +314,7 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() { assert.NoError(suite.T(), err) assert.Equal(suite.T(), "", statusResponse.SpoilerText) - assert.Equal(suite.T(), "

here's an image attachment

", statusResponse.Content) + assert.Equal(suite.T(), "

here's an image attachment

", statusResponse.Content) assert.False(suite.T(), statusResponse.Sensitive) assert.Equal(suite.T(), model.VisibilityPublic, statusResponse.Visibility) diff --git a/internal/processing/status/util_test.go b/internal/processing/status/util_test.go index 9a4bd6515..4bf508848 100644 --- a/internal/processing/status/util_test.go +++ b/internal/processing/status/util_test.go @@ -17,8 +17,8 @@ const statusText1 = `Another test @foss_satan@fossbros-anonymous.io #Hashtag Text` -const statusText1ExpectedFull = `

Another test @foss_satan

#Hashtag

Text

` -const statusText1ExpectedPartial = `

Another test @foss_satan

#Hashtag

Text

` +const statusText1ExpectedFull = "

Another test @foss_satan

#Hashtag

Text

" +const statusText1ExpectedPartial = "

Another test @foss_satan

#Hashtag

Text

" const statusText2 = `Another test @foss_satan@fossbros-anonymous.io @@ -26,7 +26,7 @@ const statusText2 = `Another test @foss_satan@fossbros-anonymous.io #hashTAG` -const status2TextExpectedFull = `

Another test @foss_satan

#Hashtag

#hashTAG

` +const status2TextExpectedFull = "

Another test @foss_satan

#Hashtag

#hashTAG

" type UtilTestSuite struct { StatusStandardTestSuite diff --git a/internal/text/common.go b/internal/text/common.go index 4f0bad9dc..f6a5ca5f5 100644 --- a/internal/text/common.go +++ b/internal/text/common.go @@ -20,6 +20,7 @@ package text import ( "fmt" + "html" "strings" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -29,23 +30,33 @@ import ( // preformat contains some common logic for making a string ready for formatting, which should be used for all user-input text. func preformat(in string) string { // do some preformatting of the text - // 1. Trim all the whitespace - s := strings.TrimSpace(in) + + // 1. unescape everything that might be html escaped + s := html.UnescapeString(in) + + // 2. trim leading or trailing whitespace + s = strings.TrimSpace(s) return s } // postformat contains some common logic for html sanitization of text, wrapping elements, and trimming newlines and whitespace func postformat(in string) string { // do some postformatting of the text - // 1. sanitize html to remove any dodgy scripts or other disallowed elements - s := SanitizeOutgoing(in) - // 2. wrap the whole thing in a paragraph - s = fmt.Sprintf(`

%s

`, s) - // 3. remove any cheeky newlines - s = strings.ReplaceAll(s, "\n", "") - // 4. remove any whitespace added as a result of the formatting - s = strings.TrimSpace(s) - return s + + // 1. sanitize html to remove potentially dangerous elements + s := SanitizeHTML(in) + + // 2. the sanitize step tends to escape characters inside codeblocks, which is behavior we don't want, so unescape everything again + s = html.UnescapeString(s) + + // 3. minify html to remove any trailing newlines, spaces, unnecessary elements, etc etc + mini, err := minifyHTML(s) + if err != nil { + // if the minify failed, just return what we have + return s + } + // return minified version of the html + return mini } func (f *formatter) ReplaceTags(in string, tags []*gtsmodel.Tag) string { diff --git a/internal/text/markdown.go b/internal/text/markdown.go index f9d12209a..5a7603615 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -23,21 +23,14 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) -var bfExtensions = blackfriday.NoIntraEmphasis | - blackfriday.FencedCode | - blackfriday.Autolink | - blackfriday.Strikethrough | - blackfriday.SpaceHeadings | - blackfriday.BackslashLineBreak - func (f *formatter) FromMarkdown(md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { content := preformat(md) // do the markdown parsing *first* - content = string(blackfriday.Run([]byte(content), blackfriday.WithExtensions(bfExtensions))) + contentBytes := blackfriday.Run([]byte(content)) // format tags nicely - content = f.ReplaceTags(content, tags) + content = f.ReplaceTags(string(contentBytes), tags) // format mentions nicely content = f.ReplaceMentions(content, mentions) diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go new file mode 100644 index 000000000..432e9a4ec --- /dev/null +++ b/internal/text/markdown_test.go @@ -0,0 +1,116 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package text_test + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/text" + "github.com/superseriousbusiness/gotosocial/testrig" +) + +const ( + simpleMarkdown = `# Title + +Here's a simple text in markdown. + +Here's a [link](https://example.org).` + + simpleMarkdownExpected = "

Title

Here’s a simple text in markdown.

Here’s a link.

" + + withCodeBlockExpected = "

Title

Below is some JSON.

{\n  \"key\": \"value\",\n  \"another_key\": [\n    \"value1\",\n    \"value2\"\n  ]\n}\n

that was some JSON :)

" + + withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!" + withHashtagExpected = "

Title

here’s a simple status that uses hashtag #Hashtag!

" +) + +var ( + withCodeBlock = `# Title + +Below is some JSON. + +` + "```" + `json +{ + "key": "value", + "another_key": [ + "value1", + "value2" + ] +} +` + "```" + ` + +that was some JSON :) +` +) + +type MarkdownTestSuite struct { + TextStandardTestSuite +} + +func (suite *MarkdownTestSuite) SetupSuite() { + suite.testTokens = testrig.NewTestTokens() + suite.testClients = testrig.NewTestClients() + suite.testApplications = testrig.NewTestApplications() + suite.testUsers = testrig.NewTestUsers() + suite.testAccounts = testrig.NewTestAccounts() + suite.testAttachments = testrig.NewTestAttachments() + suite.testStatuses = testrig.NewTestStatuses() + suite.testTags = testrig.NewTestTags() + suite.testMentions = testrig.NewTestMentions() +} + +func (suite *MarkdownTestSuite) SetupTest() { + suite.config = testrig.NewTestConfig() + suite.db = testrig.NewTestDB() + suite.log = testrig.NewTestLog() + suite.formatter = text.NewFormatter(suite.config, suite.db, suite.log) + + testrig.StandardDBSetup(suite.db, suite.testAccounts) +} + +func (suite *MarkdownTestSuite) TearDownTest() { + testrig.StandardDBTeardown(suite.db) +} + +func (suite *MarkdownTestSuite) TestParseSimple() { + s := suite.formatter.FromMarkdown(simpleMarkdown, nil, nil) + suite.Equal(simpleMarkdownExpected, s) +} + +func (suite *MarkdownTestSuite) TestParseWithCodeBlock() { + fmt.Println(withCodeBlock) + s := suite.formatter.FromMarkdown(withCodeBlock, nil, nil) + suite.Equal(withCodeBlockExpected, s) +} + +func (suite *MarkdownTestSuite) TestParseWithHashtag() { + foundTags := []*gtsmodel.Tag{ + suite.testTags["Hashtag"], + } + + s := suite.formatter.FromMarkdown(withHashtag, nil, foundTags) + suite.Equal(withHashtagExpected, s) +} + +func TestMarkdownTestSuite(t *testing.T) { + suite.Run(t, new(MarkdownTestSuite)) +} diff --git a/internal/text/minify.go b/internal/text/minify.go new file mode 100644 index 000000000..c6d7b9bc1 --- /dev/null +++ b/internal/text/minify.go @@ -0,0 +1,39 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package text + +import ( + "github.com/tdewolff/minify/v2" + "github.com/tdewolff/minify/v2/html" +) + +var m *minify.M + +// minifyHTML runs html through a minifier, reducing it in size. +func minifyHTML(in string) (string, error) { + if m == nil { + m = minify.New() + m.Add("text/html", &html.Minifier{ + KeepQuotes: true, + KeepEndTags: true, + KeepDocumentTags: true, + }) + } + return m.String("text/html", in) +} diff --git a/internal/text/plain.go b/internal/text/plain.go index 40fb6412f..a44e02c80 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -19,6 +19,7 @@ package text import ( + "fmt" "strings" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -27,6 +28,9 @@ import ( func (f *formatter) FromPlain(plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { content := preformat(plain) + // sanitize any html elements + content = RemoveHTML(content) + // format links nicely content = f.ReplaceLinks(content) @@ -39,5 +43,8 @@ func (f *formatter) FromPlain(plain string, mentions []*gtsmodel.Mention, tags [ // replace newlines with breaks content = strings.ReplaceAll(content, "\n", "
") + // wrap the whole thing in a pee + content = fmt.Sprintf(`

%s

`, content) + return postformat(content) } diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index 2f9eb3a29..33c95234c 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -33,15 +33,15 @@ const ( simple = "this is a plain and simple status" simpleExpected = "

this is a plain and simple status

" - withTag = "this is a simple status that uses hashtag #welcome!" - withTagExpected = "

this is a simple status that uses hashtag #welcome!

" + withTag = "here's a simple status that uses hashtag #welcome!" + withTagExpected = "

here's a simple status that uses hashtag #welcome!

" moreComplex = `Another test @foss_satan@fossbros-anonymous.io #Hashtag Text` - moreComplexExpected = `

Another test @foss_satan

#Hashtag

Text

` + moreComplexFull = "

Another test @foss_satan

#Hashtag

Text

" ) type PlainTestSuite struct { @@ -102,7 +102,7 @@ func (suite *PlainTestSuite) TestParseMoreComplex() { fmt.Println(f) - assert.Equal(suite.T(), moreComplexExpected, f) + assert.Equal(suite.T(), moreComplexFull, f) } func TestPlainTestSuite(t *testing.T) { diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go index 365875d46..e1bc73559 100644 --- a/internal/text/sanitize.go +++ b/internal/text/sanitize.go @@ -19,6 +19,8 @@ package text import ( + "regexp" + "github.com/microcosm-cc/bluemonday" ) @@ -31,12 +33,11 @@ var regular *bluemonday.Policy = bluemonday.UGCPolicy(). RequireNoReferrerOnLinks(true). RequireNoFollowOnLinks(true). RequireCrossOriginAnonymous(true). - AddTargetBlankToFullyQualifiedLinks(true) - -// outgoing policy should be used on statuses we've already parsed and added our own elements etc to. It is less strict than regular. -var outgoing *bluemonday.Policy = regular. + AddTargetBlankToFullyQualifiedLinks(true). AllowAttrs("class", "href", "rel").OnElements("a"). - AllowAttrs("class").OnElements("span") + AllowAttrs("class").OnElements("span"). + AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code"). + SkipElementsContent("code", "pre") // '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist. // An example usage scenario would be blog post titles where HTML tags are not expected at all @@ -54,9 +55,3 @@ func SanitizeHTML(in string) string { func RemoveHTML(in string) string { return strict.Sanitize(in) } - -// SanitizeOutgoing cleans up HTML in the given string, allowing through only safe elements and elements that were added during the parsing process. -// This should be used on text that we've already converted into HTML, just to catch any weirdness. -func SanitizeOutgoing(in string) string { - return outgoing.Sanitize(in) -} diff --git a/internal/text/sanitize_test.go b/internal/text/sanitize_test.go new file mode 100644 index 000000000..19a5f6a06 --- /dev/null +++ b/internal/text/sanitize_test.go @@ -0,0 +1,75 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package text_test + +import ( + "testing" + + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/text" +) + +const ( + removeHTML = `

Another test @foss_satan

#Hashtag

Text

` + removedHTML = `Another test @foss_satan#HashtagText` + + sanitizeHTML = `here's some naughty html: !!!` + sanitizedHTML = `here's some naughty html: !!!` + + withEscapedLiteral = `it\u0026amp;#39;s its it is` + withEscapedLiteralExpected = `it\u0026amp;#39;s its it is` + withEscaped = "it\u0026amp;#39;s its it is" + withEscapedExpected = "it&#39;s its it is" + + sanitizeOutgoing = `

gotta test some fucking ''''''''' marks

` + sanitizedOutgoing = `

gotta test some fucking ''''''''' marks

` +) + +type SanitizeTestSuite struct { + suite.Suite +} + +func (suite *SanitizeTestSuite) TestRemoveHTML() { + s := text.RemoveHTML(removeHTML) + suite.Equal(removedHTML, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeOutgoing() { + s := text.SanitizeHTML(sanitizeOutgoing) + suite.Equal(sanitizedOutgoing, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeHTML() { + s := text.SanitizeHTML(sanitizeHTML) + suite.Equal(sanitizedHTML, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeWithEscapedLiteral() { + s := text.RemoveHTML(withEscapedLiteral) + suite.Equal(withEscapedLiteralExpected, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeWithEscaped() { + s := text.RemoveHTML(withEscaped) + suite.Equal(withEscapedExpected, s) +} + +func TestSanitizeTestSuite(t *testing.T) { + suite.Run(t, new(SanitizeTestSuite)) +} diff --git a/vendor/github.com/tdewolff/minify/v2/.gitattributes b/vendor/github.com/tdewolff/minify/v2/.gitattributes new file mode 100644 index 000000000..16a3a8b06 --- /dev/null +++ b/vendor/github.com/tdewolff/minify/v2/.gitattributes @@ -0,0 +1,2 @@ +benchmarks/sample_* linguist-generated +tests/*/corpus/* linguist-generated diff --git a/vendor/github.com/tdewolff/minify/v2/.gitignore b/vendor/github.com/tdewolff/minify/v2/.gitignore new file mode 100644 index 000000000..8653de91d --- /dev/null +++ b/vendor/github.com/tdewolff/minify/v2/.gitignore @@ -0,0 +1,14 @@ +dist/ +benchmarks/* +!benchmarks/*.go +!benchmarks/sample_* +tests/*/fuzz-fuzz.zip +tests/*/crashers +tests/*/suppressions +tests/*/corpus/* +!tests/*/corpus/*.* +parse/tests/*/fuzz-fuzz.zip +parse/tests/*/crashers +parse/tests/*/suppressions +parse/tests/*/corpus/* +!parse/tests/*/corpus/*.* diff --git a/vendor/github.com/tdewolff/minify/v2/.golangci.yml b/vendor/github.com/tdewolff/minify/v2/.golangci.yml new file mode 100644 index 000000000..7009f9201 --- /dev/null +++ b/vendor/github.com/tdewolff/minify/v2/.golangci.yml @@ -0,0 +1,16 @@ +linters: + enable: + - depguard + - dogsled + - gofmt + - goimports + - golint + - gosec + - govet + - megacheck + - misspell + - nakedret + - prealloc + - unconvert + - unparam + - wastedassign diff --git a/vendor/github.com/tdewolff/minify/v2/Dockerfile b/vendor/github.com/tdewolff/minify/v2/Dockerfile new file mode 100644 index 000000000..6cc2de9cc --- /dev/null +++ b/vendor/github.com/tdewolff/minify/v2/Dockerfile @@ -0,0 +1,14 @@ +# Use this image to build the executable +FROM golang:1.16-alpine AS compiler + +WORKDIR $GOPATH/src/minify +COPY . . + +RUN apk add --update --update-cache --no-cache git ca-certificates && \ + GO111MODULES=on CGO_ENABLED=0 go build -ldflags "-s -w" -trimpath -o /bin/minify ./cmd/minify + + +# Final image containing the executable from the previous step +FROM alpine:3 + +COPY --from=compiler /bin/minify /bin/minify diff --git a/vendor/github.com/tdewolff/minify/v2/LICENSE b/vendor/github.com/tdewolff/minify/v2/LICENSE new file mode 100644 index 000000000..41677de41 --- /dev/null +++ b/vendor/github.com/tdewolff/minify/v2/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2015 Taco de Wolff + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/vendor/github.com/tdewolff/minify/v2/Makefile b/vendor/github.com/tdewolff/minify/v2/Makefile new file mode 100644 index 000000000..22e448a3c --- /dev/null +++ b/vendor/github.com/tdewolff/minify/v2/Makefile @@ -0,0 +1,56 @@ +NAME=minify +CMD=./cmd/minify +TARGETS=linux_amd64 darwin_amd64 freebsd_amd64 netbsd_amd64 openbsd_amd64 windows_amd64 +VERSION=`git describe --tags` +FLAGS=-ldflags "-s -w -X 'main.Version=${VERSION}'" -trimpath +ENVS=GO111MODULES=on CGO_ENABLED=0 + +all: install + +install: + echo "Installing ${VERSION}" + ${ENVS} go install ${FLAGS} ./cmd/minify + . cmd/minify/bash_completion + +release: + TAG=$(shell git describe --tags --exact-match 2> /dev/null); + if [ "${.SHELLSTATUS}" -eq 0 ]; then \ + echo "Releasing ${VERSION}"; \ + else \ + echo "WARNING: commit is not tagged with a version"; \ + echo ""; \ + fi + rm -rf dist + mkdir -p dist + for t in ${TARGETS}; do \ + echo Building $$t...; \ + mkdir dist/$$t; \ + os=$$(echo $$t | cut -f1 -d_); \ + arch=$$(echo $$t | cut -f2 -d_); \ + ${ENVS} GOOS=$$os GOARCH=$$arch go build ${FLAGS} -o dist/$$t/${NAME} ${CMD}; \ + \ + cp LICENSE dist/$$t/.; \ + cp cmd/minify/README.md dist/$$t/.; \ + if [ "$$os" == "windows" ]; then \ + mv dist/$$t/${NAME} dist/$$t/${NAME}.exe; \ + zip -jq dist/${NAME}_$$t.zip dist/$$t/*; \ + cd dist; \ + sha256sum ${NAME}_$$t.zip >> checksums.txt; \ + cd ..; \ + else \ + cp cmd/minify/bash_completion dist/$$t/.; \ + cd dist/$$t; \ + tar -cf - * | gzip -9 > ../${NAME}_$$t.tar.gz; \ + cd ..; \ + sha256sum ${NAME}_$$t.tar.gz >> checksums.txt; \ + cd ..; \ + fi; \ + rm -rf dist/$$t; \ + done + +clean: + echo "Cleaning dist/" + rm -rf dist + +.PHONY: install release clean +.SILENT: install release clean diff --git a/vendor/github.com/tdewolff/minify/v2/README.md b/vendor/github.com/tdewolff/minify/v2/README.md new file mode 100644 index 000000000..0dfcb06c2 --- /dev/null +++ b/vendor/github.com/tdewolff/minify/v2/README.md @@ -0,0 +1,693 @@ +# Minify [![API reference](https://img.shields.io/badge/godoc-reference-5272B4)](https://pkg.go.dev/github.com/tdewolff/minify/v2?tab=doc) [![Go Report Card](https://goreportcard.com/badge/github.com/tdewolff/minify)](https://goreportcard.com/report/github.com/tdewolff/minify) [![codecov](https://codecov.io/gh/tdewolff/minify/branch/master/graph/badge.svg?token=Cr7r2EKPj2)](https://codecov.io/gh/tdewolff/minify) [![Donate](https://img.shields.io/badge/patreon-donate-DFB317)](https://www.patreon.com/tdewolff) + +**[Online demo](https://go.tacodewolff.nl/minify) if you need to minify files *now*.** + +**[Command line tool](https://github.com/tdewolff/minify/tree/master/cmd/minify) that minifies concurrently and watches file changes.** + +**[Releases](https://github.com/tdewolff/minify/releases) of CLI for various platforms.** See [CLI](https://github.com/tdewolff/minify/tree/master/cmd/minify) for more installation instructions. + +**[Parse](https://github.com/tdewolff/minify/tree/master/parse) subpackage on which minify depends.** + +--- + +*Did you know that the shortest valid piece of HTML5 is `x`? See for yourself at the [W3C Validator](http://validator.w3.org/)!* + +Minify is a minifier package written in [Go][1]. It provides HTML5, CSS3, JS, JSON, SVG and XML minifiers and an interface to implement any other minifier. Minification is the process of removing bytes from a file (such as whitespace) without changing its output and therefore shrinking its size and speeding up transmission over the internet and possibly parsing. The implemented minifiers are designed for high performance. + +The core functionality associates mimetypes with minification functions, allowing embedded resources (like CSS or JS within HTML files) to be minified as well. Users can add new implementations that are triggered based on a mimetype (or pattern), or redirect to an external command (like ClosureCompiler, UglifyCSS, ...). + +### Sponsors + +[![SiteGround](https://www.siteground.com/img/downloads/siteground-logo-black-transparent-vector.svg)](https://www.siteground.com/) + +Please see https://www.patreon.com/tdewolff for ways to contribute, otherwise please contact me directly! + +#### Table of Contents + +- [Minify](#minify) + - [Prologue](#prologue) + - [Installation](#installation) + - [API stability](#api-stability) + - [Testing](#testing) + - [Performance](#performance) + - [HTML](#html) + - [Whitespace removal](#whitespace-removal) + - [CSS](#css) + - [JS](#js) + - [Comparison with other tools](#comparison-with-other-tools) + - [Compression ratio (lower is better)](#compression-ratio-lower-is-better) + - [Time (lower is better)](#time-lower-is-better) + - [JSON](#json) + - [SVG](#svg) + - [XML](#xml) + - [Usage](#usage) + - [New](#new) + - [From reader](#from-reader) + - [From bytes](#from-bytes) + - [From string](#from-string) + - [To reader](#to-reader) + - [To writer](#to-writer) + - [Middleware](#middleware) + - [Custom minifier](#custom-minifier) + - [Mediatypes](#mediatypes) + - [Examples](#examples) + - [Common minifiers](#common-minifiers) + - [External minifiers](#external-minifiers) + - [Closure Compiler](#closure-compiler) + - [UglifyJS](#uglifyjs) + - [esbuild](#esbuild) + - [Custom minifier](#custom-minifier-example) + - [ResponseWriter](#responsewriter) + - [Templates](#templates) + - [License](#license) + +### Roadmap + +- [ ] Use ASM/SSE to further speed-up core parts of the parsers/minifiers +- [x] Improve JS minifiers by shortening variables and proper semicolon omission +- [ ] Speed-up SVG minifier, it is very slow +- [x] Proper parser error reporting and line number + column information +- [ ] Generation of source maps (uncertain, might slow down parsers too much if it cannot run separately nicely) +- [ ] Create a cmd to pack webfiles (much like webpack), ie. merging CSS and JS files, inlining small external files, minification and gzipping. This would work on HTML files. + +## Prologue +Minifiers or bindings to minifiers exist in almost all programming languages. Some implementations are merely using several regular expressions to trim whitespace and comments (even though regex for parsing HTML/XML is ill-advised, for a good read see [Regular Expressions: Now You Have Two Problems](http://blog.codinghorror.com/regular-expressions-now-you-have-two-problems/)). Some implementations are much more profound, such as the [YUI Compressor](http://yui.github.io/yuicompressor/) and [Google Closure Compiler](https://github.com/google/closure-compiler) for JS. As most existing implementations either use JavaScript, use regexes, and don't focus on performance, they are pretty slow. + +This minifier proves to be that fast and extensive minifier that can handle HTML and any other filetype it may contain (CSS, JS, ...). It is usually orders of magnitude faster than existing minifiers. + +## Installation +Make sure you have [Git](https://git-scm.com/) and [Go](https://golang.org/dl/) (1.13 or higher) installed, run +``` +mkdir Project +cd Project +go mod init +go get -u github.com/tdewolff/minify/v2 +``` + +Then add the following imports to be able to use the various minifiers +``` go +import ( + "github.com/tdewolff/minify/v2" + "github.com/tdewolff/minify/v2/css" + "github.com/tdewolff/minify/v2/html" + "github.com/tdewolff/minify/v2/js" + "github.com/tdewolff/minify/v2/json" + "github.com/tdewolff/minify/v2/svg" + "github.com/tdewolff/minify/v2/xml" +) +``` + +You can optionally run `go mod tidy` to clean up the `go.mod` and `go.sum` files. + +See [CLI tool](https://github.com/tdewolff/minify/tree/master/cmd/minify) for installation instructions of the binary. + +### Docker + +If you want to use Docker, please see https://hub.docker.com/r/tdewolff/minify. + +## API stability +There is no guarantee for absolute stability, but I take issues and bugs seriously and don't take API changes lightly. The library will be maintained in a compatible way unless vital bugs prevent me from doing so. There has been one API change after v1 which added options support and I took the opportunity to push through some more API clean up as well. There are no plans whatsoever for future API changes. + +## Testing +For all subpackages and the imported `parse` package, test coverage of 100% is pursued. Besides full coverage, the minifiers are [fuzz tested](https://github.com/tdewolff/fuzz) using [github.com/dvyukov/go-fuzz](http://www.github.com/dvyukov/go-fuzz), see [the wiki](https://github.com/tdewolff/minify/wiki) for the most important bugs found by fuzz testing. These tests ensure that everything works as intended and that the code does not crash (whatever the input). If you still encounter a bug, please file a [bug report](https://github.com/tdewolff/minify/issues)! + +## Performance +The benchmarks directory contains a number of standardized samples used to compare performance between changes. To give an indication of the speed of this library, I've ran the tests on my Thinkpad T460 (i5-6300U quad-core 2.4GHz running Arch Linux) using Go 1.15. + +``` +name time/op +CSS/sample_bootstrap.css-4 2.70ms ± 0% +CSS/sample_gumby.css-4 3.57ms ± 0% +CSS/sample_fontawesome.css-4 767µs ± 0% +CSS/sample_normalize.css-4 85.5µs ± 0% +HTML/sample_amazon.html-4 15.2ms ± 0% +HTML/sample_bbc.html-4 3.90ms ± 0% +HTML/sample_blogpost.html-4 420µs ± 0% +HTML/sample_es6.html-4 15.6ms ± 0% +HTML/sample_stackoverflow.html-4 3.73ms ± 0% +HTML/sample_wikipedia.html-4 6.60ms ± 0% +JS/sample_ace.js-4 28.7ms ± 0% +JS/sample_dot.js-4 357µs ± 0% +JS/sample_jquery.js-4 10.0ms ± 0% +JS/sample_jqueryui.js-4 20.4ms ± 0% +JS/sample_moment.js-4 3.47ms ± 0% +JSON/sample_large.json-4 3.25ms ± 0% +JSON/sample_testsuite.json-4 1.74ms ± 0% +JSON/sample_twitter.json-4 24.2µs ± 0% +SVG/sample_arctic.svg-4 34.7ms ± 0% +SVG/sample_gopher.svg-4 307µs ± 0% +SVG/sample_usa.svg-4 57.4ms ± 0% +SVG/sample_car.svg-4 18.0ms ± 0% +SVG/sample_tiger.svg-4 5.61ms ± 0% +XML/sample_books.xml-4 54.7µs ± 0% +XML/sample_catalog.xml-4 33.0µs ± 0% +XML/sample_omg.xml-4 7.17ms ± 0% + +name speed +CSS/sample_bootstrap.css-4 50.7MB/s ± 0% +CSS/sample_gumby.css-4 52.1MB/s ± 0% +CSS/sample_fontawesome.css-4 61.2MB/s ± 0% +CSS/sample_normalize.css-4 70.8MB/s ± 0% +HTML/sample_amazon.html-4 31.1MB/s ± 0% +HTML/sample_bbc.html-4 29.5MB/s ± 0% +HTML/sample_blogpost.html-4 49.8MB/s ± 0% +HTML/sample_es6.html-4 65.6MB/s ± 0% +HTML/sample_stackoverflow.html-4 55.0MB/s ± 0% +HTML/sample_wikipedia.html-4 67.5MB/s ± 0% +JS/sample_ace.js-4 22.4MB/s ± 0% +JS/sample_dot.js-4 14.5MB/s ± 0% +JS/sample_jquery.js-4 24.8MB/s ± 0% +JS/sample_jqueryui.js-4 23.0MB/s ± 0% +JS/sample_moment.js-4 28.6MB/s ± 0% +JSON/sample_large.json-4 234MB/s ± 0% +JSON/sample_testsuite.json-4 394MB/s ± 0% +JSON/sample_twitter.json-4 63.0MB/s ± 0% +SVG/sample_arctic.svg-4 42.4MB/s ± 0% +SVG/sample_gopher.svg-4 19.0MB/s ± 0% +SVG/sample_usa.svg-4 17.8MB/s ± 0% +SVG/sample_car.svg-4 29.3MB/s ± 0% +SVG/sample_tiger.svg-4 12.2MB/s ± 0% +XML/sample_books.xml-4 81.0MB/s ± 0% +XML/sample_catalog.xml-4 58.6MB/s ± 0% +XML/sample_omg.xml-4 159MB/s ± 0% +``` + +## HTML + +HTML (with JS and CSS) minification typically shaves off about 10%. + +The HTML5 minifier uses these minifications: + +- strip unnecessary whitespace and otherwise collapse it to one space (or newline if it originally contained a newline) +- strip superfluous quotes, or uses single/double quotes whichever requires fewer escapes +- strip default attribute values and attribute boolean values +- strip some empty attributes +- strip unrequired tags (`html`, `head`, `body`, ...) +- strip unrequired end tags (`tr`, `td`, `li`, ... and often `p`) +- strip default protocols (`http:`, `https:` and `javascript:`) +- strip all comments (including conditional comments, old IE versions are not supported anymore by Microsoft) +- shorten `doctype` and `meta` charset +- lowercase tags, attributes and some values to enhance gzip compression + +Options: + +- `KeepConditionalComments` preserve all IE conditional comments such as `` and ``, see https://msdn.microsoft.com/en-us/library/ms537512(v=vs.85).aspx#syntax +- `KeepDefaultAttrVals` preserve default attribute values such as `