[chore]: Bump github.com/minio/minio-go/v7 from 7.0.48 to 7.0.49 (#1567)

Bumps [github.com/minio/minio-go/v7](https://github.com/minio/minio-go) from 7.0.48 to 7.0.49.
- [Release notes](https://github.com/minio/minio-go/releases)
- [Commits](https://github.com/minio/minio-go/compare/v7.0.48...v7.0.49)

---
updated-dependencies:
- dependency-name: github.com/minio/minio-go/v7
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
dependabot[bot] 2023-02-27 10:21:58 +01:00 committed by GitHub
parent e1b704e06e
commit 752c38b0d5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
38 changed files with 1696 additions and 1023 deletions

8
go.mod
View file

@ -35,7 +35,7 @@ require (
github.com/jackc/pgx/v4 v4.17.2 github.com/jackc/pgx/v4 v4.17.2
github.com/microcosm-cc/bluemonday v1.0.22 github.com/microcosm-cc/bluemonday v1.0.22
github.com/miekg/dns v1.1.50 github.com/miekg/dns v1.1.50
github.com/minio/minio-go/v7 v7.0.48 github.com/minio/minio-go/v7 v7.0.49
github.com/mitchellh/mapstructure v1.5.0 github.com/mitchellh/mapstructure v1.5.0
github.com/oklog/ulid v1.3.1 github.com/oklog/ulid v1.3.1
github.com/spf13/cobra v1.6.1 github.com/spf13/cobra v1.6.1
@ -89,7 +89,7 @@ require (
github.com/dsoprea/go-photoshop-info-format v0.0.0-20200610045659-121dd752914d // indirect github.com/dsoprea/go-photoshop-info-format v0.0.0-20200610045659-121dd752914d // indirect
github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d // indirect github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d // indirect
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e // indirect github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e // indirect
github.com/dustin/go-humanize v1.0.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-errors/errors v1.4.1 // indirect github.com/go-errors/errors v1.4.1 // indirect
@ -118,8 +118,8 @@ require (
github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect github.com/json-iterator/go v1.1.12 // indirect
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect
github.com/klauspost/compress v1.15.9 // indirect github.com/klauspost/compress v1.15.15 // indirect
github.com/klauspost/cpuid/v2 v2.1.1 // indirect github.com/klauspost/cpuid/v2 v2.2.3 // indirect
github.com/leodido/go-urn v1.2.1 // indirect github.com/leodido/go-urn v1.2.1 // indirect
github.com/magiconair/properties v1.8.7 // indirect github.com/magiconair/properties v1.8.7 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect github.com/mattn/go-isatty v0.0.17 // indirect

15
go.sum
View file

@ -161,8 +161,9 @@ github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d/
github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8= github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8=
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E= github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E=
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU= github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
@ -398,13 +399,13 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.10.10/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.10.10/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY= github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw=
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.1.1 h1:t0wUqjowdm8ezddV5k0tLWVklVuvLJpoHeb4WBdydm0= github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU=
github.com/klauspost/cpuid/v2 v2.1.1/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
@ -443,8 +444,8 @@ github.com/miekg/dns v1.1.50 h1:DQUfb9uc6smULcREF09Uc+/Gd46YWqJd5DbpPE9xkcA=
github.com/miekg/dns v1.1.50/go.mod h1:e3IlAVfNqAllflbibAZEWOXOQ+Ynzk/dDozDxY7XnME= github.com/miekg/dns v1.1.50/go.mod h1:e3IlAVfNqAllflbibAZEWOXOQ+Ynzk/dDozDxY7XnME=
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
github.com/minio/minio-go/v7 v7.0.48 h1:VQtYB/2xHW2SlxqhjRlDpvSiSOfGlyFlXZF1EHARPHM= github.com/minio/minio-go/v7 v7.0.49 h1:dE5DfOtnXMXCjr/HWI6zN9vCrY6Sv666qhhiwUMvGV4=
github.com/minio/minio-go/v7 v7.0.48/go.mod h1:nCrRzjoSUQh8hgKKtu3Y708OLvRLtuASMg2/nvmbarw= github.com/minio/minio-go/v7 v7.0.49/go.mod h1:UI34MvQEiob3Cf/gGExGMmzugkM/tNgbFypNDy5LMVc=
github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g=
github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM=
github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4= github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=

View file

@ -1,12 +1,12 @@
sudo: false sudo: false
language: go language: go
go_import_path: github.com/dustin/go-humanize
go: go:
- 1.3.x - 1.13.x
- 1.5.x - 1.14.x
- 1.6.x - 1.15.x
- 1.7.x - 1.16.x
- 1.8.x - stable
- 1.9.x
- master - master
matrix: matrix:
allow_failures: allow_failures:
@ -15,7 +15,7 @@ matrix:
install: install:
- # Do nothing. This is needed to prevent default install action "go get -t -v ./..." from happening here (we want it to happen inside script step). - # Do nothing. This is needed to prevent default install action "go get -t -v ./..." from happening here (we want it to happen inside script step).
script: script:
- go get -t -v ./...
- diff -u <(echo -n) <(gofmt -d -s .) - diff -u <(echo -n) <(gofmt -d -s .)
- go tool vet . - go vet .
- go install -v -race ./...
- go test -v -race ./... - go test -v -race ./...

View file

@ -5,7 +5,7 @@ Just a few functions for helping humanize times and sizes.
`go get` it as `github.com/dustin/go-humanize`, import it as `go get` it as `github.com/dustin/go-humanize`, import it as
`"github.com/dustin/go-humanize"`, use it as `humanize`. `"github.com/dustin/go-humanize"`, use it as `humanize`.
See [godoc](https://godoc.org/github.com/dustin/go-humanize) for See [godoc](https://pkg.go.dev/github.com/dustin/go-humanize) for
complete documentation. complete documentation.
## Sizes ## Sizes

View file

@ -28,6 +28,10 @@ var (
BigZiByte = (&big.Int{}).Mul(BigEiByte, bigIECExp) BigZiByte = (&big.Int{}).Mul(BigEiByte, bigIECExp)
// BigYiByte is 1,024 z bytes in bit.Ints // BigYiByte is 1,024 z bytes in bit.Ints
BigYiByte = (&big.Int{}).Mul(BigZiByte, bigIECExp) BigYiByte = (&big.Int{}).Mul(BigZiByte, bigIECExp)
// BigRiByte is 1,024 y bytes in bit.Ints
BigRiByte = (&big.Int{}).Mul(BigYiByte, bigIECExp)
// BigQiByte is 1,024 r bytes in bit.Ints
BigQiByte = (&big.Int{}).Mul(BigRiByte, bigIECExp)
) )
var ( var (
@ -51,6 +55,10 @@ var (
BigZByte = (&big.Int{}).Mul(BigEByte, bigSIExp) BigZByte = (&big.Int{}).Mul(BigEByte, bigSIExp)
// BigYByte is 1,000 SI z bytes in big.Ints // BigYByte is 1,000 SI z bytes in big.Ints
BigYByte = (&big.Int{}).Mul(BigZByte, bigSIExp) BigYByte = (&big.Int{}).Mul(BigZByte, bigSIExp)
// BigRByte is 1,000 SI y bytes in big.Ints
BigRByte = (&big.Int{}).Mul(BigYByte, bigSIExp)
// BigQByte is 1,000 SI r bytes in big.Ints
BigQByte = (&big.Int{}).Mul(BigRByte, bigSIExp)
) )
var bigBytesSizeTable = map[string]*big.Int{ var bigBytesSizeTable = map[string]*big.Int{
@ -71,6 +79,10 @@ var bigBytesSizeTable = map[string]*big.Int{
"zb": BigZByte, "zb": BigZByte,
"yib": BigYiByte, "yib": BigYiByte,
"yb": BigYByte, "yb": BigYByte,
"rib": BigRiByte,
"rb": BigRByte,
"qib": BigQiByte,
"qb": BigQByte,
// Without suffix // Without suffix
"": BigByte, "": BigByte,
"ki": BigKiByte, "ki": BigKiByte,
@ -89,6 +101,10 @@ var bigBytesSizeTable = map[string]*big.Int{
"zi": BigZiByte, "zi": BigZiByte,
"y": BigYByte, "y": BigYByte,
"yi": BigYiByte, "yi": BigYiByte,
"r": BigRByte,
"ri": BigRiByte,
"q": BigQByte,
"qi": BigQiByte,
} }
var ten = big.NewInt(10) var ten = big.NewInt(10)
@ -115,7 +131,7 @@ func humanateBigBytes(s, base *big.Int, sizes []string) string {
// //
// BigBytes(82854982) -> 83 MB // BigBytes(82854982) -> 83 MB
func BigBytes(s *big.Int) string { func BigBytes(s *big.Int) string {
sizes := []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"} sizes := []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", "RB", "QB"}
return humanateBigBytes(s, bigSIExp, sizes) return humanateBigBytes(s, bigSIExp, sizes)
} }
@ -125,7 +141,7 @@ func BigBytes(s *big.Int) string {
// //
// BigIBytes(82854982) -> 79 MiB // BigIBytes(82854982) -> 79 MiB
func BigIBytes(s *big.Int) string { func BigIBytes(s *big.Int) string {
sizes := []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"} sizes := []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", "RiB", "QiB"}
return humanateBigBytes(s, bigIECExp, sizes) return humanateBigBytes(s, bigIECExp, sizes)
} }

View file

@ -1,3 +1,4 @@
//go:build go1.6
// +build go1.6 // +build go1.6
package humanize package humanize

View file

@ -6,6 +6,9 @@ import (
) )
func stripTrailingZeros(s string) string { func stripTrailingZeros(s string) string {
if !strings.ContainsRune(s, '.') {
return s
}
offset := len(s) - 1 offset := len(s) - 1
for offset > 0 { for offset > 0 {
if s[offset] == '.' { if s[offset] == '.' {

View file

@ -73,7 +73,7 @@ func FormatFloat(format string, n float64) string {
if n > math.MaxFloat64 { if n > math.MaxFloat64 {
return "Infinity" return "Infinity"
} }
if n < -math.MaxFloat64 { if n < (0.0 - math.MaxFloat64) {
return "-Infinity" return "-Infinity"
} }

View file

@ -8,6 +8,8 @@ import (
) )
var siPrefixTable = map[float64]string{ var siPrefixTable = map[float64]string{
-30: "q", // quecto
-27: "r", // ronto
-24: "y", // yocto -24: "y", // yocto
-21: "z", // zepto -21: "z", // zepto
-18: "a", // atto -18: "a", // atto
@ -25,6 +27,8 @@ var siPrefixTable = map[float64]string{
18: "E", // exa 18: "E", // exa
21: "Z", // zetta 21: "Z", // zetta
24: "Y", // yotta 24: "Y", // yotta
27: "R", // ronna
30: "Q", // quetta
} }
var revSIPrefixTable = revfmap(siPrefixTable) var revSIPrefixTable = revfmap(siPrefixTable)

View file

@ -131,7 +131,8 @@ func (d *compressor) fillDeflate(b []byte) int {
s := d.state s := d.state
if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
// shift the window by windowSize // shift the window by windowSize
copy(d.window[:], d.window[windowSize:2*windowSize]) //copy(d.window[:], d.window[windowSize:2*windowSize])
*(*[windowSize]byte)(d.window) = *(*[windowSize]byte)(d.window[windowSize:])
s.index -= windowSize s.index -= windowSize
d.windowEnd -= windowSize d.windowEnd -= windowSize
if d.blockStart >= windowSize { if d.blockStart >= windowSize {
@ -293,7 +294,6 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of
} }
offset = 0 offset = 0
cGain := 0
if d.chain < 100 { if d.chain < 100 {
for i := prevHead; tries > 0; tries-- { for i := prevHead; tries > 0; tries-- {
if wEnd == win[i+length] { if wEnd == win[i+length] {
@ -321,10 +321,14 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of
return return
} }
// Minimum gain to accept a match.
cGain := 4
// Some like it higher (CSV), some like it lower (JSON) // Some like it higher (CSV), some like it lower (JSON)
const baseCost = 6 const baseCost = 3
// Base is 4 bytes at with an additional cost. // Base is 4 bytes at with an additional cost.
// Matches must be better than this. // Matches must be better than this.
for i := prevHead; tries > 0; tries-- { for i := prevHead; tries > 0; tries-- {
if wEnd == win[i+length] { if wEnd == win[i+length] {
n := matchLen(win[i:i+minMatchLook], wPos) n := matchLen(win[i:i+minMatchLook], wPos)
@ -332,7 +336,7 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of
// Calculate gain. Estimate // Calculate gain. Estimate
newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]])
//fmt.Println(n, "gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n])) //fmt.Println("gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]), "this-len:", n, "prev-len:", length)
if newGain > cGain { if newGain > cGain {
length = n length = n
offset = pos - i offset = pos - i
@ -373,6 +377,12 @@ func hash4(b []byte) uint32 {
return hash4u(binary.LittleEndian.Uint32(b), hashBits) return hash4u(binary.LittleEndian.Uint32(b), hashBits)
} }
// hash4 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4u(u uint32, h uint8) uint32 {
return (u * prime4bytes) >> (32 - h)
}
// bulkHash4 will compute hashes using the same // bulkHash4 will compute hashes using the same
// algorithm as hash4 // algorithm as hash4
func bulkHash4(b []byte, dst []uint32) { func bulkHash4(b []byte, dst []uint32) {
@ -483,27 +493,103 @@ func (d *compressor) deflateLazy() {
} }
if prevLength >= minMatchLength && s.length <= prevLength { if prevLength >= minMatchLength && s.length <= prevLength {
// Check for better match at end... // No better match, but check for better match at end...
// //
// checkOff must be >=2 since we otherwise risk checking s.index // Skip forward a number of bytes.
// Offset of 2 seems to yield best results. // Offset of 2 seems to yield best results. 3 is sometimes better.
const checkOff = 2 const checkOff = 2
prevIndex := s.index - 1
if prevIndex+prevLength+checkOff < s.maxInsertIndex { // Check all, except full length
end := lookahead if prevLength < maxMatchLength-checkOff {
if lookahead > maxMatchLength { prevIndex := s.index - 1
end = maxMatchLength if prevIndex+prevLength < s.maxInsertIndex {
} end := lookahead
end += prevIndex if lookahead > maxMatchLength+checkOff {
idx := prevIndex + prevLength - (4 - checkOff) end = maxMatchLength + checkOff
h := hash4(d.window[idx:]) }
ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + (4 - checkOff) end += prevIndex
if ch2 > minIndex {
length := matchLen(d.window[prevIndex:end], d.window[ch2:]) // Hash at match end.
// It seems like a pure length metric is best. h := hash4(d.window[prevIndex+prevLength:])
if length > prevLength { ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength
prevLength = length if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff {
prevOffset = prevIndex - ch2 length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:])
// It seems like a pure length metric is best.
if length > prevLength {
prevLength = length
prevOffset = prevIndex - ch2
// Extend back...
for i := checkOff - 1; i >= 0; i-- {
if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i] {
// Emit tokens we "owe"
for j := 0; j <= i; j++ {
d.tokens.AddLiteral(d.window[prevIndex+j])
if d.tokens.n == maxFlateBlockTokens {
// The block includes the current character
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
s.index++
if s.index < s.maxInsertIndex {
h := hash4(d.window[s.index:])
ch := s.hashHead[h]
s.chainHead = int(ch)
s.hashPrev[s.index&windowMask] = ch
s.hashHead[h] = uint32(s.index + s.hashOffset)
}
}
break
} else {
prevLength++
}
}
} else if false {
// Check one further ahead.
// Only rarely better, disabled for now.
prevIndex++
h := hash4(d.window[prevIndex+prevLength:])
ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength
if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff {
length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:])
// It seems like a pure length metric is best.
if length > prevLength+checkOff {
prevLength = length
prevOffset = prevIndex - ch2
prevIndex--
// Extend back...
for i := checkOff; i >= 0; i-- {
if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i-1] {
// Emit tokens we "owe"
for j := 0; j <= i; j++ {
d.tokens.AddLiteral(d.window[prevIndex+j])
if d.tokens.n == maxFlateBlockTokens {
// The block includes the current character
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
s.index++
if s.index < s.maxInsertIndex {
h := hash4(d.window[s.index:])
ch := s.hashHead[h]
s.chainHead = int(ch)
s.hashPrev[s.index&windowMask] = ch
s.hashHead[h] = uint32(s.index + s.hashOffset)
}
}
break
} else {
prevLength++
}
}
}
}
}
} }
} }
} }

View file

@ -7,19 +7,19 @@ package flate
// dictDecoder implements the LZ77 sliding dictionary as used in decompression. // dictDecoder implements the LZ77 sliding dictionary as used in decompression.
// LZ77 decompresses data through sequences of two forms of commands: // LZ77 decompresses data through sequences of two forms of commands:
// //
// * Literal insertions: Runs of one or more symbols are inserted into the data // - Literal insertions: Runs of one or more symbols are inserted into the data
// stream as is. This is accomplished through the writeByte method for a // stream as is. This is accomplished through the writeByte method for a
// single symbol, or combinations of writeSlice/writeMark for multiple symbols. // single symbol, or combinations of writeSlice/writeMark for multiple symbols.
// Any valid stream must start with a literal insertion if no preset dictionary // Any valid stream must start with a literal insertion if no preset dictionary
// is used. // is used.
// //
// * Backward copies: Runs of one or more symbols are copied from previously // - Backward copies: Runs of one or more symbols are copied from previously
// emitted data. Backward copies come as the tuple (dist, length) where dist // emitted data. Backward copies come as the tuple (dist, length) where dist
// determines how far back in the stream to copy from and length determines how // determines how far back in the stream to copy from and length determines how
// many bytes to copy. Note that it is valid for the length to be greater than // many bytes to copy. Note that it is valid for the length to be greater than
// the distance. Since LZ77 uses forward copies, that situation is used to // the distance. Since LZ77 uses forward copies, that situation is used to
// perform a form of run-length encoding on repeated runs of symbols. // perform a form of run-length encoding on repeated runs of symbols.
// The writeCopy and tryWriteCopy are used to implement this command. // The writeCopy and tryWriteCopy are used to implement this command.
// //
// For performance reasons, this implementation performs little to no sanity // For performance reasons, this implementation performs little to no sanity
// checks about the arguments. As such, the invariants documented for each // checks about the arguments. As such, the invariants documented for each

View file

@ -58,17 +58,6 @@ const (
prime8bytes = 0xcf1bbcdcb7a56463 prime8bytes = 0xcf1bbcdcb7a56463
) )
func load32(b []byte, i int) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load64(b []byte, i int) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}
func load3232(b []byte, i int32) uint32 { func load3232(b []byte, i int32) uint32 {
return binary.LittleEndian.Uint32(b[i:]) return binary.LittleEndian.Uint32(b[i:])
} }
@ -77,10 +66,6 @@ func load6432(b []byte, i int32) uint64 {
return binary.LittleEndian.Uint64(b[i:]) return binary.LittleEndian.Uint64(b[i:])
} }
func hash(u uint32) uint32 {
return (u * 0x1e35a7bd) >> tableShift
}
type tableEntry struct { type tableEntry struct {
offset int32 offset int32
} }
@ -104,7 +89,8 @@ func (e *fastGen) addBlock(src []byte) int32 {
} }
// Move down // Move down
offset := int32(len(e.hist)) - maxMatchOffset offset := int32(len(e.hist)) - maxMatchOffset
copy(e.hist[0:maxMatchOffset], e.hist[offset:]) // copy(e.hist[0:maxMatchOffset], e.hist[offset:])
*(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:])
e.cur += offset e.cur += offset
e.hist = e.hist[:maxMatchOffset] e.hist = e.hist[:maxMatchOffset]
} }
@ -114,39 +100,36 @@ func (e *fastGen) addBlock(src []byte) int32 {
return s return s
} }
// hash4 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4u(u uint32, h uint8) uint32 {
return (u * prime4bytes) >> (32 - h)
}
type tableEntryPrev struct { type tableEntryPrev struct {
Cur tableEntry Cur tableEntry
Prev tableEntry Prev tableEntry
} }
// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4x64(u uint64, h uint8) uint32 {
return (uint32(u) * prime4bytes) >> ((32 - h) & reg8SizeMask32)
}
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. // hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64. // Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 { func hash7(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64)) return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64))
} }
// hash8 returns the hash of u to fit in a hash table with h bits. // hashLen returns a hash of the lowest mls bytes of with length output bits.
// Preferably h should be a constant and should always be <64. // mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
func hash8(u uint64, h uint8) uint32 { // length should always be < 32.
return uint32((u * prime8bytes) >> ((64 - h) & reg8SizeMask64)) // Preferably length and mls should be a constant for inlining.
} func hashLen(u uint64, length, mls uint8) uint32 {
switch mls {
// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. case 3:
// Preferably h should be a constant and should always be <64. return (uint32(u<<8) * prime3bytes) >> (32 - length)
func hash6(u uint64, h uint8) uint32 { case 5:
return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & reg8SizeMask64)) return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
case 6:
return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
case 7:
return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
case 8:
return uint32((u * prime8bytes) >> (64 - length))
default:
return (uint32(u) * prime4bytes) >> (32 - length)
}
} }
// matchlen will return the match length between offsets and t in src. // matchlen will return the match length between offsets and t in src.

View file

@ -265,9 +265,9 @@ func (w *huffmanBitWriter) writeBytes(bytes []byte) {
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
// information. Code badCode is an end marker // information. Code badCode is an end marker
// //
// numLiterals The number of literals in literalEncoding // numLiterals The number of literals in literalEncoding
// numOffsets The number of offsets in offsetEncoding // numOffsets The number of offsets in offsetEncoding
// litenc, offenc The literal and offset encoder to use // litenc, offenc The literal and offset encoder to use
func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
for i := range w.codegenFreq { for i := range w.codegenFreq {
w.codegenFreq[i] = 0 w.codegenFreq[i] = 0
@ -460,9 +460,9 @@ func (w *huffmanBitWriter) writeOutBits() {
// Write the header of a dynamic Huffman block to the output stream. // Write the header of a dynamic Huffman block to the output stream.
// //
// numLiterals The number of literals specified in codegen // numLiterals The number of literals specified in codegen
// numOffsets The number of offsets specified in codegen // numOffsets The number of offsets specified in codegen
// numCodegens The number of codegens used in codegen // numCodegens The number of codegens used in codegen
func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
if w.err != nil { if w.err != nil {
return return
@ -790,9 +790,11 @@ func (w *huffmanBitWriter) fillTokens() {
// and offsetEncoding. // and offsetEncoding.
// The number of literal and offset tokens is returned. // The number of literal and offset tokens is returned.
func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) { func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) {
copy(w.literalFreq[:], t.litHist[:]) //copy(w.literalFreq[:], t.litHist[:])
copy(w.literalFreq[256:], t.extraHist[:]) *(*[256]uint16)(w.literalFreq[:]) = t.litHist
copy(w.offsetFreq[:], t.offHist[:offsetCodeCount]) //copy(w.literalFreq[256:], t.extraHist[:])
*(*[32]uint16)(w.literalFreq[256:]) = t.extraHist
w.offsetFreq = t.offHist
if t.n == 0 { if t.n == 0 {
return return

View file

@ -168,13 +168,18 @@ func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
// The cases of 0, 1, and 2 literals are handled by special case code. // The cases of 0, 1, and 2 literals are handled by special case code.
// //
// list An array of the literals with non-zero frequencies // list An array of the literals with non-zero frequencies
// and their associated frequencies. The array is in order of increasing //
// frequency, and has as its last element a special element with frequency // and their associated frequencies. The array is in order of increasing
// MaxInt32 // frequency, and has as its last element a special element with frequency
// MaxInt32
//
// maxBits The maximum number of bits that should be used to encode any literal. // maxBits The maximum number of bits that should be used to encode any literal.
// Must be less than 16. //
// Must be less than 16.
//
// return An integer array in which array[i] indicates the number of literals // return An integer array in which array[i] indicates the number of literals
// that should be encoded in i bits. //
// that should be encoded in i bits.
func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
if maxBits >= maxBitsLimit { if maxBits >= maxBitsLimit {
panic("flate: maxBits too large") panic("flate: maxBits too large")

View file

@ -19,6 +19,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
const ( const (
inputMargin = 12 - 1 inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
hashBytes = 5
) )
if debugDeflate && e.cur < 0 { if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur)) panic(fmt.Sprint("e.cur < 0: ", e.cur))
@ -68,7 +69,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin) sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from. // nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s) cv := load6432(src, s)
for { for {
const skipLog = 5 const skipLog = 5
@ -77,7 +78,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
nextS := s nextS := s
var candidate tableEntry var candidate tableEntry
for { for {
nextHash := hash(cv) nextHash := hashLen(cv, tableBits, hashBytes)
candidate = e.table[nextHash] candidate = e.table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit { if nextS > sLimit {
@ -86,16 +87,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
now := load6432(src, nextS) now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur} e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash(uint32(now)) nextHash = hashLen(now, tableBits, hashBytes)
offset := s - (candidate.offset - e.cur) offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur} e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break break
} }
// Do one right away... // Do one right away...
cv = uint32(now) cv = now
s = nextS s = nextS
nextS++ nextS++
candidate = e.table[nextHash] candidate = e.table[nextHash]
@ -103,11 +104,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
e.table[nextHash] = tableEntry{offset: s + e.cur} e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur) offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur} e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break break
} }
cv = uint32(now) cv = now
s = nextS s = nextS
} }
@ -198,9 +199,9 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
} }
if s >= sLimit { if s >= sLimit {
// Index first pair after match end. // Index first pair after match end.
if int(s+l+4) < len(src) { if int(s+l+8) < len(src) {
cv := load3232(src, s) cv := load6432(src, s)
e.table[hash(cv)] = tableEntry{offset: s + e.cur} e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur}
} }
goto emitRemainder goto emitRemainder
} }
@ -213,16 +214,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
// three load32 calls. // three load32 calls.
x := load6432(src, s-2) x := load6432(src, s-2)
o := e.cur + s - 2 o := e.cur + s - 2
prevHash := hash(uint32(x)) prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o} e.table[prevHash] = tableEntry{offset: o}
x >>= 16 x >>= 16
currHash := hash(uint32(x)) currHash := hashLen(x, tableBits, hashBytes)
candidate = e.table[currHash] candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2} e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur) offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) { if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 8) cv = x >> 8
s++ s++
break break
} }

View file

@ -16,6 +16,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
const ( const (
inputMargin = 12 - 1 inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
hashBytes = 5
) )
if debugDeflate && e.cur < 0 { if debugDeflate && e.cur < 0 {
@ -66,7 +67,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin) sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from. // nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s) cv := load6432(src, s)
for { for {
// When should we start skipping if we haven't found matches in a long while. // When should we start skipping if we haven't found matches in a long while.
const skipLog = 5 const skipLog = 5
@ -75,7 +76,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
nextS := s nextS := s
var candidate tableEntry var candidate tableEntry
for { for {
nextHash := hash4u(cv, bTableBits) nextHash := hashLen(cv, bTableBits, hashBytes)
s = nextS s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit { if nextS > sLimit {
@ -84,16 +85,16 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
candidate = e.table[nextHash] candidate = e.table[nextHash]
now := load6432(src, nextS) now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur} e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash4u(uint32(now), bTableBits) nextHash = hashLen(now, bTableBits, hashBytes)
offset := s - (candidate.offset - e.cur) offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur} e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break break
} }
// Do one right away... // Do one right away...
cv = uint32(now) cv = now
s = nextS s = nextS
nextS++ nextS++
candidate = e.table[nextHash] candidate = e.table[nextHash]
@ -101,10 +102,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
e.table[nextHash] = tableEntry{offset: s + e.cur} e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur) offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break break
} }
cv = uint32(now) cv = now
} }
// A 4-byte match has been found. We'll later see if more than 4 bytes // A 4-byte match has been found. We'll later see if more than 4 bytes
@ -154,9 +155,9 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
if s >= sLimit { if s >= sLimit {
// Index first pair after match end. // Index first pair after match end.
if int(s+l+4) < len(src) { if int(s+l+8) < len(src) {
cv := load3232(src, s) cv := load6432(src, s)
e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur} e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur}
} }
goto emitRemainder goto emitRemainder
} }
@ -164,15 +165,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
// Store every second hash in-between, but offset by 1. // Store every second hash in-between, but offset by 1.
for i := s - l + 2; i < s-5; i += 7 { for i := s - l + 2; i < s-5; i += 7 {
x := load6432(src, i) x := load6432(src, i)
nextHash := hash4u(uint32(x), bTableBits) nextHash := hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i} e.table[nextHash] = tableEntry{offset: e.cur + i}
// Skip one // Skip one
x >>= 16 x >>= 16
nextHash = hash4u(uint32(x), bTableBits) nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2} e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
// Skip one // Skip one
x >>= 16 x >>= 16
nextHash = hash4u(uint32(x), bTableBits) nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4} e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
} }
@ -184,17 +185,17 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
// three load32 calls. // three load32 calls.
x := load6432(src, s-2) x := load6432(src, s-2)
o := e.cur + s - 2 o := e.cur + s - 2
prevHash := hash4u(uint32(x), bTableBits) prevHash := hashLen(x, bTableBits, hashBytes)
prevHash2 := hash4u(uint32(x>>8), bTableBits) prevHash2 := hashLen(x>>8, bTableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o} e.table[prevHash] = tableEntry{offset: o}
e.table[prevHash2] = tableEntry{offset: o + 1} e.table[prevHash2] = tableEntry{offset: o + 1}
currHash := hash4u(uint32(x>>16), bTableBits) currHash := hashLen(x>>16, bTableBits, hashBytes)
candidate = e.table[currHash] candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2} e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur) offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 24) cv = x >> 24
s++ s++
break break
} }

View file

@ -11,10 +11,11 @@ type fastEncL3 struct {
// Encode uses a similar algorithm to level 2, will check up to two candidates. // Encode uses a similar algorithm to level 2, will check up to two candidates.
func (e *fastEncL3) Encode(dst *tokens, src []byte) { func (e *fastEncL3) Encode(dst *tokens, src []byte) {
const ( const (
inputMargin = 8 - 1 inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
tableBits = 16 tableBits = 16
tableSize = 1 << tableBits tableSize = 1 << tableBits
hashBytes = 5
) )
if debugDeflate && e.cur < 0 { if debugDeflate && e.cur < 0 {
@ -69,20 +70,20 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin) sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from. // nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s) cv := load6432(src, s)
for { for {
const skipLog = 6 const skipLog = 7
nextS := s nextS := s
var candidate tableEntry var candidate tableEntry
for { for {
nextHash := hash4u(cv, tableBits) nextHash := hashLen(cv, tableBits, hashBytes)
s = nextS s = nextS
nextS = s + 1 + (s-nextEmit)>>skipLog nextS = s + 1 + (s-nextEmit)>>skipLog
if nextS > sLimit { if nextS > sLimit {
goto emitRemainder goto emitRemainder
} }
candidates := e.table[nextHash] candidates := e.table[nextHash]
now := load3232(src, nextS) now := load6432(src, nextS)
// Safe offset distance until s + 4... // Safe offset distance until s + 4...
minOffset := e.cur + s - (maxMatchOffset - 4) minOffset := e.cur + s - (maxMatchOffset - 4)
@ -96,8 +97,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
continue continue
} }
if cv == load3232(src, candidate.offset-e.cur) { if uint32(cv) == load3232(src, candidate.offset-e.cur) {
if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) { if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) {
break break
} }
// Both match and are valid, pick longest. // Both match and are valid, pick longest.
@ -112,7 +113,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
// We only check if value mismatches. // We only check if value mismatches.
// Offset will always be invalid in other cases. // Offset will always be invalid in other cases.
candidate = candidates.Prev candidate = candidates.Prev
if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break break
} }
} }
@ -164,9 +165,9 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
if s >= sLimit { if s >= sLimit {
t += l t += l
// Index first pair after match end. // Index first pair after match end.
if int(t+4) < len(src) && t > 0 { if int(t+8) < len(src) && t > 0 {
cv := load3232(src, t) cv = load6432(src, t)
nextHash := hash4u(cv, tableBits) nextHash := hashLen(cv, tableBits, hashBytes)
e.table[nextHash] = tableEntryPrev{ e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur, Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t}, Cur: tableEntry{offset: e.cur + t},
@ -176,8 +177,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
} }
// Store every 5th hash in-between. // Store every 5th hash in-between.
for i := s - l + 2; i < s-5; i += 5 { for i := s - l + 2; i < s-5; i += 6 {
nextHash := hash4u(load3232(src, i), tableBits) nextHash := hashLen(load6432(src, i), tableBits, hashBytes)
e.table[nextHash] = tableEntryPrev{ e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur, Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + i}} Cur: tableEntry{offset: e.cur + i}}
@ -185,23 +186,23 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
// We could immediately start working at s now, but to improve // We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 to s. // compression we first update the hash table at s-2 to s.
x := load6432(src, s-2) x := load6432(src, s-2)
prevHash := hash4u(uint32(x), tableBits) prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntryPrev{ e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur, Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2}, Cur: tableEntry{offset: e.cur + s - 2},
} }
x >>= 8 x >>= 8
prevHash = hash4u(uint32(x), tableBits) prevHash = hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntryPrev{ e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur, Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1}, Cur: tableEntry{offset: e.cur + s - 1},
} }
x >>= 8 x >>= 8
currHash := hash4u(uint32(x), tableBits) currHash := hashLen(x, tableBits, hashBytes)
candidates := e.table[currHash] candidates := e.table[currHash]
cv = uint32(x) cv = x
e.table[currHash] = tableEntryPrev{ e.table[currHash] = tableEntryPrev{
Prev: candidates.Cur, Prev: candidates.Cur,
Cur: tableEntry{offset: s + e.cur}, Cur: tableEntry{offset: s + e.cur},
@ -212,17 +213,17 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
minOffset := e.cur + s - (maxMatchOffset - 4) minOffset := e.cur + s - (maxMatchOffset - 4)
if candidate.offset > minOffset { if candidate.offset > minOffset {
if cv == load3232(src, candidate.offset-e.cur) { if uint32(cv) == load3232(src, candidate.offset-e.cur) {
// Found a match... // Found a match...
continue continue
} }
candidate = candidates.Prev candidate = candidates.Prev
if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
// Match at prev... // Match at prev...
continue continue
} }
} }
cv = uint32(x >> 8) cv = x >> 8
s++ s++
break break
} }

View file

@ -12,6 +12,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
const ( const (
inputMargin = 12 - 1 inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
) )
if debugDeflate && e.cur < 0 { if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur)) panic(fmt.Sprint("e.cur < 0: ", e.cur))
@ -80,7 +81,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
nextS := s nextS := s
var t int32 var t int32
for { for {
nextHashS := hash4x64(cv, tableBits) nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits) nextHashL := hash7(cv, tableBits)
s = nextS s = nextS
@ -168,7 +169,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
// Index first pair after match end. // Index first pair after match end.
if int(s+8) < len(src) { if int(s+8) < len(src) {
cv := load6432(src, s) cv := load6432(src, s)
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur} e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur}
e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur}
} }
goto emitRemainder goto emitRemainder
@ -183,7 +184,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1} t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2 e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hash4u(uint32(cv>>8), tableBits)] = t2 e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
i += 3 i += 3
for ; i < s-1; i += 3 { for ; i < s-1; i += 3 {
@ -192,7 +193,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1} t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2 e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hash4u(uint32(cv>>8), tableBits)] = t2 e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
} }
} }
} }
@ -201,7 +202,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
// compression we first update the hash table at s-1 and at s. // compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1) x := load6432(src, s-1)
o := e.cur + s - 1 o := e.cur + s - 1
prevHashS := hash4x64(x, tableBits) prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits) prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o} e.table[prevHashS] = tableEntry{offset: o}
e.bTable[prevHashL] = tableEntry{offset: o} e.bTable[prevHashL] = tableEntry{offset: o}

View file

@ -12,6 +12,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
const ( const (
inputMargin = 12 - 1 inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
) )
if debugDeflate && e.cur < 0 { if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur)) panic(fmt.Sprint("e.cur < 0: ", e.cur))
@ -88,7 +89,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
var l int32 var l int32
var t int32 var t int32
for { for {
nextHashS := hash4x64(cv, tableBits) nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits) nextHashL := hash7(cv, tableBits)
s = nextS s = nextS
@ -105,7 +106,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
eLong := &e.bTable[nextHashL] eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur eLong.Cur, eLong.Prev = entry, eLong.Cur
nextHashS = hash4x64(next, tableBits) nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits) nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur t = lCandidate.Cur.offset - e.cur
@ -191,14 +192,21 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// Try to locate a better match by checking the end of best match... // Try to locate a better match by checking the end of best match...
if sAt := s + l; l < 30 && sAt < sLimit { if sAt := s + l; l < 30 && sAt < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is 2/3 bytes depending on input.
// 3 is only a little better when it is but sometimes a lot worse.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
// Test current t2 := eLong - e.cur - l + skipBeginning
t2 := eLong - e.cur - l s2 := s + skipBeginning
off := s - t2 off := s2 - t2
if t2 >= 0 && off < maxMatchOffset && off > 0 { if t2 >= 0 && off < maxMatchOffset && off > 0 {
if l2 := e.matchlenLong(s, t2, src); l2 > l { if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2 t = t2
l = l2 l = l2
s = s2
} }
} }
} }
@ -250,7 +258,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
if i < s-1 { if i < s-1 {
cv := load6432(src, i) cv := load6432(src, i)
t := tableEntry{offset: i + e.cur} t := tableEntry{offset: i + e.cur}
e.table[hash4x64(cv, tableBits)] = t e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong := &e.bTable[hash7(cv, tableBits)] eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur eLong.Cur, eLong.Prev = t, eLong.Cur
@ -263,7 +271,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// We only have enough bits for a short entry at i+2 // We only have enough bits for a short entry at i+2
cv >>= 8 cv >>= 8
t = tableEntry{offset: t.offset + 1} t = tableEntry{offset: t.offset + 1}
e.table[hash4x64(cv, tableBits)] = t e.table[hashLen(cv, tableBits, hashShortBytes)] = t
// Skip one - otherwise we risk hitting 's' // Skip one - otherwise we risk hitting 's'
i += 4 i += 4
@ -273,7 +281,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1} t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)] eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur eLong.Cur, eLong.Prev = t, eLong.Cur
e.table[hash4u(uint32(cv>>8), tableBits)] = t2 e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
} }
} }
} }
@ -282,7 +290,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// compression we first update the hash table at s-1 and at s. // compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1) x := load6432(src, s-1)
o := e.cur + s - 1 o := e.cur + s - 1
prevHashS := hash4x64(x, tableBits) prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits) prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o} e.table[prevHashS] = tableEntry{offset: o}
eLong := &e.bTable[prevHashL] eLong := &e.bTable[prevHashL]

View file

@ -12,6 +12,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
const ( const (
inputMargin = 12 - 1 inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
) )
if debugDeflate && e.cur < 0 { if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur)) panic(fmt.Sprint("e.cur < 0: ", e.cur))
@ -90,7 +91,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
var l int32 var l int32
var t int32 var t int32
for { for {
nextHashS := hash4x64(cv, tableBits) nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits) nextHashL := hash7(cv, tableBits)
s = nextS s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog nextS = s + doEvery + (s-nextEmit)>>skipLog
@ -107,7 +108,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
eLong.Cur, eLong.Prev = entry, eLong.Cur eLong.Cur, eLong.Prev = entry, eLong.Cur
// Calculate hashes of 'next' // Calculate hashes of 'next'
nextHashS = hash4x64(next, tableBits) nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits) nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur t = lCandidate.Cur.offset - e.cur
@ -213,24 +214,33 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
// Try to locate a better match by checking the end-of-match... // Try to locate a better match by checking the end-of-match...
if sAt := s + l; sAt < sLimit { if sAt := s + l; sAt < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is 2/3 bytes depending on input.
// 3 is only a little better when it is but sometimes a lot worse.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)] eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)]
// Test current // Test current
t2 := eLong.Cur.offset - e.cur - l t2 := eLong.Cur.offset - e.cur - l + skipBeginning
off := s - t2 s2 := s + skipBeginning
off := s2 - t2
if off < maxMatchOffset { if off < maxMatchOffset {
if off > 0 && t2 >= 0 { if off > 0 && t2 >= 0 {
if l2 := e.matchlenLong(s, t2, src); l2 > l { if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2 t = t2
l = l2 l = l2
s = s2
} }
} }
// Test next: // Test next:
t2 = eLong.Prev.offset - e.cur - l t2 = eLong.Prev.offset - e.cur - l + skipBeginning
off := s - t2 off := s2 - t2
if off > 0 && off < maxMatchOffset && t2 >= 0 { if off > 0 && off < maxMatchOffset && t2 >= 0 {
if l2 := e.matchlenLong(s, t2, src); l2 > l { if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2 t = t2
l = l2 l = l2
s = s2
} }
} }
} }
@ -277,7 +287,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
// Index after match end. // Index after match end.
for i := nextS + 1; i < int32(len(src))-8; i += 2 { for i := nextS + 1; i < int32(len(src))-8; i += 2 {
cv := load6432(src, i) cv := load6432(src, i)
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur} e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur}
eLong := &e.bTable[hash7(cv, tableBits)] eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
} }
@ -292,7 +302,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1} t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)] eLong := &e.bTable[hash7(cv, tableBits)]
eLong2 := &e.bTable[hash7(cv>>8, tableBits)] eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
e.table[hash4x64(cv, tableBits)] = t e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong.Cur, eLong.Prev = t, eLong.Cur eLong.Cur, eLong.Prev = t, eLong.Cur
eLong2.Cur, eLong2.Prev = t2, eLong2.Cur eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
} }

View file

@ -86,11 +86,19 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
dict = dict[len(dict)-maxStatelessDict:] dict = dict[len(dict)-maxStatelessDict:]
} }
// For subsequent loops, keep shallow dict reference to avoid alloc+copy.
var inDict []byte
for len(in) > 0 { for len(in) > 0 {
todo := in todo := in
if len(todo) > maxStatelessBlock-len(dict) { if len(inDict) > 0 {
if len(todo) > maxStatelessBlock-maxStatelessDict {
todo = todo[:maxStatelessBlock-maxStatelessDict]
}
} else if len(todo) > maxStatelessBlock-len(dict) {
todo = todo[:maxStatelessBlock-len(dict)] todo = todo[:maxStatelessBlock-len(dict)]
} }
inOrg := in
in = in[len(todo):] in = in[len(todo):]
uncompressed := todo uncompressed := todo
if len(dict) > 0 { if len(dict) > 0 {
@ -102,7 +110,11 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
todo = combined todo = combined
} }
// Compress // Compress
statelessEnc(&dst, todo, int16(len(dict))) if len(inDict) == 0 {
statelessEnc(&dst, todo, int16(len(dict)))
} else {
statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
}
isEof := eof && len(in) == 0 isEof := eof && len(in) == 0
if dst.n == 0 { if dst.n == 0 {
@ -119,7 +131,8 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
} }
if len(in) > 0 { if len(in) > 0 {
// Retain a dict if we have more // Retain a dict if we have more
dict = todo[len(todo)-maxStatelessDict:] inDict = inOrg[len(uncompressed)-maxStatelessDict:]
dict = nil
dst.Reset() dst.Reset()
} }
if bw.err != nil { if bw.err != nil {

View file

@ -325,35 +325,35 @@ The content compressed in this mode is fully compatible with the standard decode
Snappy vs S2 **compression** speed on 16 core (32 thread) computer, using all threads and a single thread (1 CPU): Snappy vs S2 **compression** speed on 16 core (32 thread) computer, using all threads and a single thread (1 CPU):
| File | S2 speed | S2 Throughput | S2 % smaller | S2 "better" | "better" throughput | "better" % smaller | | File | S2 Speed | S2 Throughput | S2 % smaller | S2 "better" | "better" throughput | "better" % smaller |
|-----------------------------------------------------------------------------------------------------|----------|---------------|--------------|-------------|---------------------|--------------------| |---------------------------------------------------------------------------------------------------------|----------|---------------|--------------|-------------|---------------------|--------------------|
| [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 12.70x | 10556 MB/s | 7.35% | 4.15x | 3455 MB/s | 12.79% | | [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 16.33x | 10556 MB/s | 8.0% | 6.04x | 5252 MB/s | 14.7% |
| (1 CPU) | 1.14x | 948 MB/s | - | 0.42x | 349 MB/s | - | | (1 CPU) | 1.08x | 940 MB/s | - | 0.46x | 400 MB/s | - |
| [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 17.13x | 14484 MB/s | 31.60% | 10.09x | 8533 MB/s | 37.71% | | [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 16.51x | 15224 MB/s | 31.70% | 9.47x | 8734 MB/s | 37.71% |
| (1 CPU) | 1.33x | 1127 MB/s | - | 0.70x | 589 MB/s | - | | (1 CPU) | 1.26x | 1157 MB/s | - | 0.60x | 556 MB/s | - |
| [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 15.14x | 12000 MB/s | -5.79% | 6.59x | 5223 MB/s | 5.80% | | [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 15.14x | 12598 MB/s | -5.76% | 6.23x | 5675 MB/s | 3.62% |
| (1 CPU) | 1.11x | 877 MB/s | - | 0.47x | 370 MB/s | - | | (1 CPU) | 1.02x | 932 MB/s | - | 0.47x | 432 MB/s | - |
| [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 14.62x | 12116 MB/s | 15.90% | 5.35x | 4430 MB/s | 16.08% | | [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 11.21x | 12116 MB/s | 15.95% | 3.24x | 3500 MB/s | 18.00% |
| (1 CPU) | 1.38x | 1146 MB/s | - | 0.38x | 312 MB/s | - | | (1 CPU) | 1.05x | 1135 MB/s | - | 0.27x | 292 MB/s | - |
| [adresser.json](https://files.klauspost.com/compress/adresser.json.zst) | 8.83x | 17579 MB/s | 43.86% | 6.54x | 13011 MB/s | 47.23% | | [apache.log](https://files.klauspost.com/compress/apache.log.zst) | 8.55x | 16673 MB/s | 20.54% | 5.85x | 11420 MB/s | 24.97% |
| (1 CPU) | 1.14x | 2259 MB/s | - | 0.74x | 1475 MB/s | - | | (1 CPU) | 1.91x | 1771 MB/s | - | 0.53x | 1041 MB/s | - |
| [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 16.72x | 14019 MB/s | 24.02% | 10.11x | 8477 MB/s | 30.48% | | [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 15.76x | 14357 MB/s | 24.01% | 8.67x | 7891 MB/s | 33.68% |
| (1 CPU) | 1.24x | 1043 MB/s | - | 0.70x | 586 MB/s | - | | (1 CPU) | 1.17x | 1064 MB/s | - | 0.65x | 595 MB/s | - |
| [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 13.33x | 9254 MB/s | 1.84% | 6.75x | 4686 MB/s | 6.72% | | [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 13.33x | 9835 MB/s | 2.34% | 6.85x | 4863 MB/s | 9.96% |
| (1 CPU) | 0.97x | 672 MB/s | - | 0.53x | 366 MB/s | - | | (1 CPU) | 0.97x | 689 MB/s | - | 0.55x | 387 MB/s | - |
| sharnd.out.2gb | 2.11x | 12639 MB/s | 0.01% | 1.98x | 11833 MB/s | 0.01% | | sharnd.out.2gb | 9.11x | 13213 MB/s | 0.01% | 1.49x | 9184 MB/s | 0.01% |
| (1 CPU) | 0.93x | 5594 MB/s | - | 1.34x | 8030 MB/s | - | | (1 CPU) | 0.88x | 5418 MB/s | - | 0.77x | 5417 MB/s | - |
| [enwik9](http://mattmahoney.net/dc/textdata.html) | 19.34x | 8220 MB/s | 3.98% | 7.87x | 3345 MB/s | 15.82% | | [sofia-air-quality-dataset csv](https://files.klauspost.com/compress/sofia-air-quality-dataset.tar.zst) | 22.00x | 11477 MB/s | 18.73% | 11.15x | 5817 MB/s | 27.88% |
| (1 CPU) | 1.06x | 452 MB/s | - | 0.50x | 213 MB/s | - | | (1 CPU) | 1.23x | 642 MB/s | - | 0.71x | 642 MB/s | - |
| [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 10.48x | 6124 MB/s | 5.67% | 3.76x | 2197 MB/s | 12.60% | | [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 11.23x | 6520 MB/s | 5.9% | 5.35x | 3109 MB/s | 15.88% |
| (1 CPU) | 0.97x | 568 MB/s | - | 0.46x | 271 MB/s | - | | (1 CPU) | 1.05x | 607 MB/s | - | 0.52x | 304 MB/s | - |
| [enwik10](https://encode.su/threads/3315-enwik10-benchmark-results) | 21.07x | 9020 MB/s | 6.36% | 6.91x | 2959 MB/s | 16.95% | | [enwik9](https://files.klauspost.com/compress/enwik9.zst) | 19.28x | 8440 MB/s | 4.04% | 9.31x | 4076 MB/s | 18.04% |
| (1 CPU) | 1.07x | 460 MB/s | - | 0.51x | 220 MB/s | - | | (1 CPU) | 1.12x | 488 MB/s | - | 0.57x | 250 MB/s | - |
### Legend ### Legend
* `S2 speed`: Speed of S2 compared to Snappy, using 16 cores and 1 core. * `S2 Speed`: Speed of S2 compared to Snappy, using 16 cores and 1 core.
* `S2 throughput`: Throughput of S2 in MB/s. * `S2 Throughput`: Throughput of S2 in MB/s.
* `S2 % smaller`: How many percent of the Snappy output size is S2 better. * `S2 % smaller`: How many percent of the Snappy output size is S2 better.
* `S2 "better"`: Speed when enabling "better" compression mode in S2 compared to Snappy. * `S2 "better"`: Speed when enabling "better" compression mode in S2 compared to Snappy.
* `"better" throughput`: Speed when enabling "better" compression mode in S2 compared to Snappy. * `"better" throughput`: Speed when enabling "better" compression mode in S2 compared to Snappy.
@ -361,7 +361,7 @@ Snappy vs S2 **compression** speed on 16 core (32 thread) computer, using all th
There is a good speedup across the board when using a single thread and a significant speedup when using multiple threads. There is a good speedup across the board when using a single thread and a significant speedup when using multiple threads.
Machine generated data gets by far the biggest compression boost, with size being being reduced by up to 45% of Snappy size. Machine generated data gets by far the biggest compression boost, with size being reduced by up to 35% of Snappy size.
The "better" compression mode sees a good improvement in all cases, but usually at a performance cost. The "better" compression mode sees a good improvement in all cases, but usually at a performance cost.
@ -404,15 +404,15 @@ The "better" compression mode will actively look for shorter matches, which is w
Without assembly decompression is also very fast; single goroutine decompression speed. No assembly: Without assembly decompression is also very fast; single goroutine decompression speed. No assembly:
| File | S2 Throughput | S2 throughput | | File | S2 Throughput | S2 throughput |
|--------------------------------|--------------|---------------| |--------------------------------|---------------|---------------|
| consensus.db.10gb.s2 | 1.84x | 2289.8 MB/s | | consensus.db.10gb.s2 | 1.84x | 2289.8 MB/s |
| 10gb.tar.s2 | 1.30x | 867.07 MB/s | | 10gb.tar.s2 | 1.30x | 867.07 MB/s |
| rawstudio-mint14.tar.s2 | 1.66x | 1329.65 MB/s | | rawstudio-mint14.tar.s2 | 1.66x | 1329.65 MB/s |
| github-june-2days-2019.json.s2 | 2.36x | 1831.59 MB/s | | github-june-2days-2019.json.s2 | 2.36x | 1831.59 MB/s |
| github-ranks-backup.bin.s2 | 1.73x | 1390.7 MB/s | | github-ranks-backup.bin.s2 | 1.73x | 1390.7 MB/s |
| enwik9.s2 | 1.67x | 681.53 MB/s | | enwik9.s2 | 1.67x | 681.53 MB/s |
| adresser.json.s2 | 3.41x | 4230.53 MB/s | | adresser.json.s2 | 3.41x | 4230.53 MB/s |
| silesia.tar.s2 | 1.52x | 811.58 | | silesia.tar.s2 | 1.52x | 811.58 |
Even though S2 typically compresses better than Snappy, decompression speed is always better. Even though S2 typically compresses better than Snappy, decompression speed is always better.
@ -450,14 +450,14 @@ The most reliable is a wide dataset.
For this we use [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z), For this we use [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z),
53927 files, total input size: 4,014,735,833 bytes. Single goroutine used. 53927 files, total input size: 4,014,735,833 bytes. Single goroutine used.
| * | Input | Output | Reduction | MB/s | | * | Input | Output | Reduction | MB/s |
|-------------------|------------|------------|-----------|--------| |-------------------|------------|------------|------------|------------|
| S2 | 4014735833 | 1059723369 | 73.60% | **934.34** | | S2 | 4014735833 | 1059723369 | 73.60% | **936.73** |
| S2 Better | 4014735833 | 969670507 | 75.85% | 532.70 | | S2 Better | 4014735833 | 961580539 | 76.05% | 451.10 |
| S2 Best | 4014735833 | 906625668 | **77.85%** | 46.84 | | S2 Best | 4014735833 | 899182886 | **77.60%** | 46.84 |
| Snappy | 4014735833 | 1128706759 | 71.89% | 762.59 | | Snappy | 4014735833 | 1128706759 | 71.89% | 790.15 |
| S2, Snappy Output | 4014735833 | 1093821420 | 72.75% | 908.60 | | S2, Snappy Output | 4014735833 | 1093823291 | 72.75% | 936.60 |
| LZ4 | 4014735833 | 1079259294 | 73.12% | 526.94 | | LZ4 | 4014735833 | 1063768713 | 73.50% | 452.02 |
S2 delivers both the best single threaded throughput with regular mode and the best compression rate with "best". S2 delivers both the best single threaded throughput with regular mode and the best compression rate with "best".
"Better" mode provides the same compression speed as LZ4 with better compression ratio. "Better" mode provides the same compression speed as LZ4 with better compression ratio.
@ -489,43 +489,24 @@ AMD64 assembly is use for both S2 and Snappy.
| Absolute Perf | Snappy size | S2 Size | Snappy Speed | S2 Speed | Snappy dec | S2 dec | | Absolute Perf | Snappy size | S2 Size | Snappy Speed | S2 Speed | Snappy dec | S2 dec |
|-----------------------|-------------|---------|--------------|-------------|-------------|-------------| |-----------------------|-------------|---------|--------------|-------------|-------------|-------------|
| html | 22843 | 21111 | 16246 MB/s | 17438 MB/s | 40972 MB/s | 49263 MB/s | | html | 22843 | 20868 | 16246 MB/s | 18617 MB/s | 40972 MB/s | 49263 MB/s |
| urls.10K | 335492 | 287326 | 7943 MB/s | 9693 MB/s | 22523 MB/s | 26484 MB/s | | urls.10K | 335492 | 286541 | 7943 MB/s | 10201 MB/s | 22523 MB/s | 26484 MB/s |
| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 273889 MB/s | 718321 MB/s | 827552 MB/s | | fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 303228 MB/s | 718321 MB/s | 827552 MB/s |
| fireworks.jpeg (200B) | 146 | 155 | 8869 MB/s | 17773 MB/s | 33691 MB/s | 52421 MB/s | | fireworks.jpeg (200B) | 146 | 155 | 8869 MB/s | 20180 MB/s | 33691 MB/s | 52421 MB/s |
| paper-100k.pdf | 85304 | 84459 | 167546 MB/s | 101263 MB/s | 326905 MB/s | 291944 MB/s | | paper-100k.pdf | 85304 | 84202 | 167546 MB/s | 112988 MB/s | 326905 MB/s | 291944 MB/s |
| html_x_4 | 92234 | 21113 | 15194 MB/s | 50670 MB/s | 30843 MB/s | 32217 MB/s | | html_x_4 | 92234 | 20870 | 15194 MB/s | 54457 MB/s | 30843 MB/s | 32217 MB/s |
| alice29.txt | 88034 | 85975 | 5936 MB/s | 6139 MB/s | 12882 MB/s | 20044 MB/s | | alice29.txt | 88034 | 85934 | 5936 MB/s | 6540 MB/s | 12882 MB/s | 20044 MB/s |
| asyoulik.txt | 77503 | 79650 | 5517 MB/s | 6366 MB/s | 12735 MB/s | 22806 MB/s | | asyoulik.txt | 77503 | 79575 | 5517 MB/s | 6657 MB/s | 12735 MB/s | 22806 MB/s |
| lcet10.txt | 234661 | 220670 | 6235 MB/s | 6067 MB/s | 14519 MB/s | 18697 MB/s | | lcet10.txt | 234661 | 220383 | 6235 MB/s | 6303 MB/s | 14519 MB/s | 18697 MB/s |
| plrabn12.txt | 319267 | 317985 | 5159 MB/s | 5726 MB/s | 11923 MB/s | 19901 MB/s | | plrabn12.txt | 319267 | 318196 | 5159 MB/s | 6074 MB/s | 11923 MB/s | 19901 MB/s |
| geo.protodata | 23335 | 18690 | 21220 MB/s | 26529 MB/s | 56271 MB/s | 62540 MB/s | | geo.protodata | 23335 | 18606 | 21220 MB/s | 25432 MB/s | 56271 MB/s | 62540 MB/s |
| kppkn.gtb | 69526 | 65312 | 9732 MB/s | 8559 MB/s | 18491 MB/s | 18969 MB/s | | kppkn.gtb | 69526 | 65019 | 9732 MB/s | 8905 MB/s | 18491 MB/s | 18969 MB/s |
| alice29.txt (128B) | 80 | 82 | 6691 MB/s | 15489 MB/s | 31883 MB/s | 38874 MB/s | | alice29.txt (128B) | 80 | 82 | 6691 MB/s | 17179 MB/s | 31883 MB/s | 38874 MB/s |
| alice29.txt (1000B) | 774 | 774 | 12204 MB/s | 13000 MB/s | 48056 MB/s | 52341 MB/s | | alice29.txt (1000B) | 774 | 774 | 12204 MB/s | 13273 MB/s | 48056 MB/s | 52341 MB/s |
| alice29.txt (10000B) | 6648 | 6933 | 10044 MB/s | 12806 MB/s | 32378 MB/s | 46322 MB/s | | alice29.txt (10000B) | 6648 | 6933 | 10044 MB/s | 12824 MB/s | 32378 MB/s | 46322 MB/s |
| alice29.txt (20000B) | 12686 | 13574 | 7733 MB/s | 11210 MB/s | 30566 MB/s | 58969 MB/s | | alice29.txt (20000B) | 12686 | 13516 | 7733 MB/s | 12160 MB/s | 30566 MB/s | 58969 MB/s |
| Relative Perf | Snappy size | S2 size improved | S2 Speed | S2 Dec Speed |
|-----------------------|-------------|------------------|----------|--------------|
| html | 22.31% | 7.58% | 1.07x | 1.20x |
| urls.10K | 47.78% | 14.36% | 1.22x | 1.18x |
| fireworks.jpeg | 99.95% | -0.05% | 0.78x | 1.15x |
| fireworks.jpeg (200B) | 73.00% | -6.16% | 2.00x | 1.56x |
| paper-100k.pdf | 83.30% | 0.99% | 0.60x | 0.89x |
| html_x_4 | 22.52% | 77.11% | 3.33x | 1.04x |
| alice29.txt | 57.88% | 2.34% | 1.03x | 1.56x |
| asyoulik.txt | 61.91% | -2.77% | 1.15x | 1.79x |
| lcet10.txt | 54.99% | 5.96% | 0.97x | 1.29x |
| plrabn12.txt | 66.26% | 0.40% | 1.11x | 1.67x |
| geo.protodata | 19.68% | 19.91% | 1.25x | 1.11x |
| kppkn.gtb | 37.72% | 6.06% | 0.88x | 1.03x |
| alice29.txt (128B) | 62.50% | -2.50% | 2.31x | 1.22x |
| alice29.txt (1000B) | 77.40% | 0.00% | 1.07x | 1.09x |
| alice29.txt (10000B) | 66.48% | -4.29% | 1.27x | 1.43x |
| alice29.txt (20000B) | 63.43% | -7.00% | 1.45x | 1.93x |
Speed is generally at or above Snappy. Small blocks gets a significant speedup, although at the expense of size. Speed is generally at or above Snappy. Small blocks gets a significant speedup, although at the expense of size.
Decompression speed is better than Snappy, except in one case. Decompression speed is better than Snappy, except in one case.
@ -543,43 +524,24 @@ So individual benchmarks should only be seen as a guideline and the overall pict
| Absolute Perf | Snappy size | Better Size | Snappy Speed | Better Speed | Snappy dec | Better dec | | Absolute Perf | Snappy size | Better Size | Snappy Speed | Better Speed | Snappy dec | Better dec |
|-----------------------|-------------|-------------|--------------|--------------|-------------|-------------| |-----------------------|-------------|-------------|--------------|--------------|-------------|-------------|
| html | 22843 | 19833 | 16246 MB/s | 7731 MB/s | 40972 MB/s | 40292 MB/s | | html | 22843 | 18972 | 16246 MB/s | 8621 MB/s | 40972 MB/s | 40292 MB/s |
| urls.10K | 335492 | 253529 | 7943 MB/s | 3980 MB/s | 22523 MB/s | 20981 MB/s | | urls.10K | 335492 | 248079 | 7943 MB/s | 5104 MB/s | 22523 MB/s | 20981 MB/s |
| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 9760 MB/s | 718321 MB/s | 823698 MB/s | | fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 84429 MB/s | 718321 MB/s | 823698 MB/s |
| fireworks.jpeg (200B) | 146 | 142 | 8869 MB/s | 594 MB/s | 33691 MB/s | 30101 MB/s | | fireworks.jpeg (200B) | 146 | 149 | 8869 MB/s | 7125 MB/s | 33691 MB/s | 30101 MB/s |
| paper-100k.pdf | 85304 | 82915 | 167546 MB/s | 7470 MB/s | 326905 MB/s | 198869 MB/s | | paper-100k.pdf | 85304 | 82887 | 167546 MB/s | 11087 MB/s | 326905 MB/s | 198869 MB/s |
| html_x_4 | 92234 | 19841 | 15194 MB/s | 23403 MB/s | 30843 MB/s | 30937 MB/s | | html_x_4 | 92234 | 18982 | 15194 MB/s | 29316 MB/s | 30843 MB/s | 30937 MB/s |
| alice29.txt | 88034 | 73218 | 5936 MB/s | 2945 MB/s | 12882 MB/s | 16611 MB/s | | alice29.txt | 88034 | 71611 | 5936 MB/s | 3709 MB/s | 12882 MB/s | 16611 MB/s |
| asyoulik.txt | 77503 | 66844 | 5517 MB/s | 2739 MB/s | 12735 MB/s | 14975 MB/s | | asyoulik.txt | 77503 | 65941 | 5517 MB/s | 3380 MB/s | 12735 MB/s | 14975 MB/s |
| lcet10.txt | 234661 | 190589 | 6235 MB/s | 3099 MB/s | 14519 MB/s | 16634 MB/s | | lcet10.txt | 234661 | 184939 | 6235 MB/s | 3537 MB/s | 14519 MB/s | 16634 MB/s |
| plrabn12.txt | 319267 | 270828 | 5159 MB/s | 2600 MB/s | 11923 MB/s | 13382 MB/s | | plrabn12.txt | 319267 | 264990 | 5159 MB/s | 2960 MB/s | 11923 MB/s | 13382 MB/s |
| geo.protodata | 23335 | 18278 | 21220 MB/s | 11208 MB/s | 56271 MB/s | 57961 MB/s | | geo.protodata | 23335 | 17689 | 21220 MB/s | 10859 MB/s | 56271 MB/s | 57961 MB/s |
| kppkn.gtb | 69526 | 61851 | 9732 MB/s | 4556 MB/s | 18491 MB/s | 16524 MB/s | | kppkn.gtb | 69526 | 55398 | 9732 MB/s | 5206 MB/s | 18491 MB/s | 16524 MB/s |
| alice29.txt (128B) | 80 | 81 | 6691 MB/s | 529 MB/s | 31883 MB/s | 34225 MB/s | | alice29.txt (128B) | 80 | 78 | 6691 MB/s | 7422 MB/s | 31883 MB/s | 34225 MB/s |
| alice29.txt (1000B) | 774 | 748 | 12204 MB/s | 1943 MB/s | 48056 MB/s | 42068 MB/s | | alice29.txt (1000B) | 774 | 746 | 12204 MB/s | 5734 MB/s | 48056 MB/s | 42068 MB/s |
| alice29.txt (10000B) | 6648 | 6234 | 10044 MB/s | 2949 MB/s | 32378 MB/s | 28813 MB/s | | alice29.txt (10000B) | 6648 | 6218 | 10044 MB/s | 6055 MB/s | 32378 MB/s | 28813 MB/s |
| alice29.txt (20000B) | 12686 | 11584 | 7733 MB/s | 2822 MB/s | 30566 MB/s | 27315 MB/s | | alice29.txt (20000B) | 12686 | 11492 | 7733 MB/s | 3143 MB/s | 30566 MB/s | 27315 MB/s |
| Relative Perf | Snappy size | Better size | Better Speed | Better dec |
|-----------------------|-------------|-------------|--------------|------------|
| html | 22.31% | 13.18% | 0.48x | 0.98x |
| urls.10K | 47.78% | 24.43% | 0.50x | 0.93x |
| fireworks.jpeg | 99.95% | -0.05% | 0.03x | 1.15x |
| fireworks.jpeg (200B) | 73.00% | 2.74% | 0.07x | 0.89x |
| paper-100k.pdf | 83.30% | 2.80% | 0.07x | 0.61x |
| html_x_4 | 22.52% | 78.49% | 0.04x | 1.00x |
| alice29.txt | 57.88% | 16.83% | 1.54x | 1.29x |
| asyoulik.txt | 61.91% | 13.75% | 0.50x | 1.18x |
| lcet10.txt | 54.99% | 18.78% | 0.50x | 1.15x |
| plrabn12.txt | 66.26% | 15.17% | 0.50x | 1.12x |
| geo.protodata | 19.68% | 21.67% | 0.50x | 1.03x |
| kppkn.gtb | 37.72% | 11.04% | 0.53x | 0.89x |
| alice29.txt (128B) | 62.50% | -1.25% | 0.47x | 1.07x |
| alice29.txt (1000B) | 77.40% | 3.36% | 0.08x | 0.88x |
| alice29.txt (10000B) | 66.48% | 6.23% | 0.16x | 0.89x |
| alice29.txt (20000B) | 63.43% | 8.69% | 0.29x | 0.89x |
Except for the mostly incompressible JPEG image compression is better and usually in the Except for the mostly incompressible JPEG image compression is better and usually in the
double digits in terms of percentage reduction over Snappy. double digits in terms of percentage reduction over Snappy.
@ -605,29 +567,29 @@ Some examples compared on 16 core CPU, amd64 assembly used:
``` ```
* enwik10 * enwik10
Default... 10000000000 -> 4761467548 [47.61%]; 1.098s, 8685.6MB/s Default... 10000000000 -> 4759950115 [47.60%]; 1.03s, 9263.0MB/s
Better... 10000000000 -> 4219438251 [42.19%]; 1.925s, 4954.2MB/s Better... 10000000000 -> 4084706676 [40.85%]; 2.16s, 4415.4MB/s
Best... 10000000000 -> 3627364337 [36.27%]; 43.051s, 221.5MB/s Best... 10000000000 -> 3615520079 [36.16%]; 42.259s, 225.7MB/s
* github-june-2days-2019.json * github-june-2days-2019.json
Default... 6273951764 -> 1043196283 [16.63%]; 431ms, 13882.3MB/s Default... 6273951764 -> 1041700255 [16.60%]; 431ms, 13882.3MB/s
Better... 6273951764 -> 949146808 [15.13%]; 547ms, 10938.4MB/s Better... 6273951764 -> 945841238 [15.08%]; 547ms, 10938.4MB/s
Best... 6273951764 -> 832855506 [13.27%]; 9.455s, 632.8MB/s Best... 6273951764 -> 826392576 [13.17%]; 9.455s, 632.8MB/s
* nyc-taxi-data-10M.csv * nyc-taxi-data-10M.csv
Default... 3325605752 -> 1095998837 [32.96%]; 324ms, 9788.7MB/s Default... 3325605752 -> 1093516949 [32.88%]; 324ms, 9788.7MB/s
Better... 3325605752 -> 954776589 [28.71%]; 491ms, 6459.4MB/s Better... 3325605752 -> 885394158 [26.62%]; 491ms, 6459.4MB/s
Best... 3325605752 -> 779098746 [23.43%]; 8.29s, 382.6MB/s Best... 3325605752 -> 773681257 [23.26%]; 8.29s, 412.0MB/s
* 10gb.tar * 10gb.tar
Default... 10065157632 -> 5916578242 [58.78%]; 1.028s, 9337.4MB/s Default... 10065157632 -> 5915541066 [58.77%]; 1.028s, 9337.4MB/s
Better... 10065157632 -> 5649207485 [56.13%]; 1.597s, 6010.6MB/s Better... 10065157632 -> 5453844650 [54.19%]; 1.597s, 4862.7MB/s
Best... 10065157632 -> 5208719802 [51.75%]; 32.78s, 292.8MB/ Best... 10065157632 -> 5192495021 [51.59%]; 32.78s, 308.2MB/
* consensus.db.10gb * consensus.db.10gb
Default... 10737418240 -> 4562648848 [42.49%]; 882ms, 11610.0MB/s Default... 10737418240 -> 4549762344 [42.37%]; 882ms, 12118.4MB/s
Better... 10737418240 -> 4542428129 [42.30%]; 1.533s, 6679.7MB/s Better... 10737418240 -> 4438535064 [41.34%]; 1.533s, 3500.9MB/s
Best... 10737418240 -> 4244773384 [39.53%]; 42.96s, 238.4MB/s Best... 10737418240 -> 4210602774 [39.21%]; 42.96s, 254.4MB/s
``` ```
Decompression speed should be around the same as using the 'better' compression mode. Decompression speed should be around the same as using the 'better' compression mode.
@ -648,10 +610,10 @@ If you would like more control, you can use the s2 package as described below:
Snappy compatible blocks can be generated with the S2 encoder. Snappy compatible blocks can be generated with the S2 encoder.
Compression and speed is typically a bit better `MaxEncodedLen` is also smaller for smaller memory usage. Replace Compression and speed is typically a bit better `MaxEncodedLen` is also smaller for smaller memory usage. Replace
| Snappy | S2 replacement | | Snappy | S2 replacement |
|----------------------------|-------------------------| |---------------------------|-----------------------|
| snappy.Encode(...) | s2.EncodeSnappy(...) | | snappy.Encode(...) | s2.EncodeSnappy(...) |
| snappy.MaxEncodedLen(...) | s2.MaxEncodedLen(...) | | snappy.MaxEncodedLen(...) | s2.MaxEncodedLen(...) |
`s2.EncodeSnappy` can be replaced with `s2.EncodeSnappyBetter` or `s2.EncodeSnappyBest` to get more efficiently compressed snappy compatible output. `s2.EncodeSnappy` can be replaced with `s2.EncodeSnappyBetter` or `s2.EncodeSnappyBest` to get more efficiently compressed snappy compatible output.
@ -660,12 +622,12 @@ Compression and speed is typically a bit better `MaxEncodedLen` is also smaller
Comparison of [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z), Comparison of [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z),
53927 files, total input size: 4,014,735,833 bytes. amd64, single goroutine used: 53927 files, total input size: 4,014,735,833 bytes. amd64, single goroutine used:
| Encoder | Size | MB/s | Reduction | | Encoder | Size | MB/s | Reduction |
|-----------------------|------------|------------|------------ |-----------------------|------------|------------|------------|
| snappy.Encode | 1128706759 | 725.59 | 71.89% | | snappy.Encode | 1128706759 | 725.59 | 71.89% |
| s2.EncodeSnappy | 1093823291 | **899.16** | 72.75% | | s2.EncodeSnappy | 1093823291 | **899.16** | 72.75% |
| s2.EncodeSnappyBetter | 1001158548 | 578.49 | 75.06% | | s2.EncodeSnappyBetter | 1001158548 | 578.49 | 75.06% |
| s2.EncodeSnappyBest | 944507998 | 66.00 | **76.47%**| | s2.EncodeSnappyBest | 944507998 | 66.00 | **76.47%** |
## Streams ## Streams
@ -835,6 +797,13 @@ This is done using the regular "Skip" function:
This will ensure that we are at exactly the offset we want, and reading from `dec` will start at the requested offset. This will ensure that we are at exactly the offset we want, and reading from `dec` will start at the requested offset.
# Compact storage
For compact storage [RemoveIndexHeaders](https://pkg.go.dev/github.com/klauspost/compress/s2#RemoveIndexHeaders) can be used to remove any redundant info from
a serialized index. If you remove the header it must be restored before [Loading](https://pkg.go.dev/github.com/klauspost/compress/s2#Index.Load).
This is expected to save 20 bytes. These can be restored using [RestoreIndexHeaders](https://pkg.go.dev/github.com/klauspost/compress/s2#RestoreIndexHeaders). This removes a layer of security, but is the most compact representation. Returns nil if headers contains errors.
## Index Format: ## Index Format:
Each block is structured as a snappy skippable block, with the chunk ID 0x99. Each block is structured as a snappy skippable block, with the chunk ID 0x99.
@ -844,20 +813,20 @@ The block can be read from the front, but contains information so it can be read
Numbers are stored as fixed size little endian values or [zigzag encoded](https://developers.google.com/protocol-buffers/docs/encoding#signed_integers) [base 128 varints](https://developers.google.com/protocol-buffers/docs/encoding), Numbers are stored as fixed size little endian values or [zigzag encoded](https://developers.google.com/protocol-buffers/docs/encoding#signed_integers) [base 128 varints](https://developers.google.com/protocol-buffers/docs/encoding),
with un-encoded value length of 64 bits, unless other limits are specified. with un-encoded value length of 64 bits, unless other limits are specified.
| Content | Format | | Content | Format |
|---------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------| |--------------------------------------|-------------------------------------------------------------------------------------------------------------------------------|
| ID, `[1]byte` | Always 0x99. | | ID, `[1]byte` | Always 0x99. |
| Data Length, `[3]byte` | 3 byte little-endian length of the chunk in bytes, following this. | | Data Length, `[3]byte` | 3 byte little-endian length of the chunk in bytes, following this. |
| Header `[6]byte` | Header, must be `[115, 50, 105, 100, 120, 0]` or in text: "s2idx\x00". | | Header `[6]byte` | Header, must be `[115, 50, 105, 100, 120, 0]` or in text: "s2idx\x00". |
| UncompressedSize, Varint | Total Uncompressed size. | | UncompressedSize, Varint | Total Uncompressed size. |
| CompressedSize, Varint | Total Compressed size if known. Should be -1 if unknown. | | CompressedSize, Varint | Total Compressed size if known. Should be -1 if unknown. |
| EstBlockSize, Varint | Block Size, used for guessing uncompressed offsets. Must be >= 0. | | EstBlockSize, Varint | Block Size, used for guessing uncompressed offsets. Must be >= 0. |
| Entries, Varint | Number of Entries in index, must be < 65536 and >=0. | | Entries, Varint | Number of Entries in index, must be < 65536 and >=0. |
| HasUncompressedOffsets `byte` | 0 if no uncompressed offsets are present, 1 if present. Other values are invalid. | | HasUncompressedOffsets `byte` | 0 if no uncompressed offsets are present, 1 if present. Other values are invalid. |
| UncompressedOffsets, [Entries]VarInt | Uncompressed offsets. See below how to decode. | | UncompressedOffsets, [Entries]VarInt | Uncompressed offsets. See below how to decode. |
| CompressedOffsets, [Entries]VarInt | Compressed offsets. See below how to decode. | | CompressedOffsets, [Entries]VarInt | Compressed offsets. See below how to decode. |
| Block Size, `[4]byte` | Little Endian total encoded size (including header and trailer). Can be used for searching backwards to start of block. | | Block Size, `[4]byte` | Little Endian total encoded size (including header and trailer). Can be used for searching backwards to start of block. |
| Trailer `[6]byte` | Trailer, must be `[0, 120, 100, 105, 50, 115]` or in text: "\x00xdi2s". Can be used for identifying block from end of stream. | | Trailer `[6]byte` | Trailer, must be `[0, 120, 100, 105, 50, 115]` or in text: "\x00xdi2s". Can be used for identifying block from end of stream. |
For regular streams the uncompressed offsets are fully predictable, For regular streams the uncompressed offsets are fully predictable,
so `HasUncompressedOffsets` allows to specify that compressed blocks all have so `HasUncompressedOffsets` allows to specify that compressed blocks all have
@ -929,6 +898,7 @@ To decode from any given uncompressed offset `(wantOffset)`:
See [using indexes](https://github.com/klauspost/compress/tree/master/s2#using-indexes) for functions that perform the operations with a simpler interface. See [using indexes](https://github.com/klauspost/compress/tree/master/s2#using-indexes) for functions that perform the operations with a simpler interface.
# Format Extensions # Format Extensions
* Frame [Stream identifier](https://github.com/google/snappy/blob/master/framing_format.txt#L68) changed from `sNaPpY` to `S2sTwO`. * Frame [Stream identifier](https://github.com/google/snappy/blob/master/framing_format.txt#L68) changed from `sNaPpY` to `S2sTwO`.
@ -951,10 +921,11 @@ The length is specified by reading the 3-bit length specified in the tag and dec
| 7 | 65540 + read 3 bytes | | 7 | 65540 + read 3 bytes |
This allows any repeat offset + length to be represented by 2 to 5 bytes. This allows any repeat offset + length to be represented by 2 to 5 bytes.
It also allows to emit matches longer than 64 bytes with one copy + one repeat instead of several 64 byte copies.
Lengths are stored as little endian values. Lengths are stored as little endian values.
The first copy of a block cannot be a repeat offset and the offset is not carried across blocks in streams. The first copy of a block cannot be a repeat offset and the offset is reset on every block in streams.
Default streaming block size is 1MB. Default streaming block size is 1MB.

View file

@ -952,7 +952,11 @@ func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) {
// Seek allows seeking in compressed data. // Seek allows seeking in compressed data.
func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) { func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
if r.err != nil { if r.err != nil {
return 0, r.err if !errors.Is(r.err, io.EOF) {
return 0, r.err
}
// Reset on EOF
r.err = nil
} }
if offset == 0 && whence == io.SeekCurrent { if offset == 0 && whence == io.SeekCurrent {
return r.blockStart + int64(r.i), nil return r.blockStart + int64(r.i), nil

View file

@ -28,6 +28,9 @@ func s2Decode(dst, src []byte) int {
// As long as we can read at least 5 bytes... // As long as we can read at least 5 bytes...
for s < len(src)-5 { for s < len(src)-5 {
// Removing bounds checks is SLOWER, when if doing
// in := src[s:s+5]
// Checked on Go 1.18
switch src[s] & 0x03 { switch src[s] & 0x03 {
case tagLiteral: case tagLiteral:
x := uint32(src[s] >> 2) x := uint32(src[s] >> 2)
@ -38,14 +41,19 @@ func s2Decode(dst, src []byte) int {
s += 2 s += 2
x = uint32(src[s-1]) x = uint32(src[s-1])
case x == 61: case x == 61:
in := src[s : s+3]
x = uint32(in[1]) | uint32(in[2])<<8
s += 3 s += 3
x = uint32(src[s-2]) | uint32(src[s-1])<<8
case x == 62: case x == 62:
in := src[s : s+4]
// Load as 32 bit and shift down.
x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
x >>= 8
s += 4 s += 4
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
case x == 63: case x == 63:
in := src[s : s+5]
x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
s += 5 s += 5
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
} }
length = int(x) + 1 length = int(x) + 1
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) { if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
@ -62,8 +70,8 @@ func s2Decode(dst, src []byte) int {
case tagCopy1: case tagCopy1:
s += 2 s += 2
length = int(src[s-2]) >> 2 & 0x7
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
length = int(src[s-2]) >> 2 & 0x7
if toffset == 0 { if toffset == 0 {
if debug { if debug {
fmt.Print("(repeat) ") fmt.Print("(repeat) ")
@ -71,14 +79,16 @@ func s2Decode(dst, src []byte) int {
// keep last offset // keep last offset
switch length { switch length {
case 5: case 5:
length = int(src[s]) + 4
s += 1 s += 1
length = int(uint32(src[s-1])) + 4
case 6: case 6:
in := src[s : s+2]
length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
s += 2 s += 2
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
case 7: case 7:
in := src[s : s+3]
length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
s += 3 s += 3
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
default: // 0-> 4 default: // 0-> 4
} }
} else { } else {
@ -86,14 +96,16 @@ func s2Decode(dst, src []byte) int {
} }
length += 4 length += 4
case tagCopy2: case tagCopy2:
in := src[s : s+3]
offset = int(uint32(in[1]) | uint32(in[2])<<8)
length = 1 + int(in[0])>>2
s += 3 s += 3
length = 1 + int(src[s-3])>>2
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
case tagCopy4: case tagCopy4:
in := src[s : s+5]
offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
length = 1 + int(in[0])>>2
s += 5 s += 5
length = 1 + int(src[s-5])>>2
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
} }
if offset <= 0 || d < offset || length > len(dst)-d { if offset <= 0 || d < offset || length > len(dst)-d {

View file

@ -58,8 +58,9 @@ func encodeGo(dst, src []byte) []byte {
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) && // len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockGo(dst, src []byte) (d int) { func encodeBlockGo(dst, src []byte) (d int) {
// Initialize the hash table. // Initialize the hash table.
const ( const (

View file

@ -8,8 +8,9 @@ package s2
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) && // len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlock(dst, src []byte) (d int) { func encodeBlock(dst, src []byte) (d int) {
const ( const (
// Use 12 bit table when less than... // Use 12 bit table when less than...
@ -43,8 +44,9 @@ func encodeBlock(dst, src []byte) (d int) {
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) && // len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetter(dst, src []byte) (d int) { func encodeBlockBetter(dst, src []byte) (d int) {
const ( const (
// Use 12 bit table when less than... // Use 12 bit table when less than...
@ -78,8 +80,9 @@ func encodeBlockBetter(dst, src []byte) (d int) {
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) && // len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockSnappy(dst, src []byte) (d int) { func encodeBlockSnappy(dst, src []byte) (d int) {
const ( const (
// Use 12 bit table when less than... // Use 12 bit table when less than...
@ -112,8 +115,9 @@ func encodeBlockSnappy(dst, src []byte) (d int) {
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) && // len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterSnappy(dst, src []byte) (d int) { func encodeBlockBetterSnappy(dst, src []byte) (d int) {
const ( const (
// Use 12 bit table when less than... // Use 12 bit table when less than...

View file

@ -15,8 +15,9 @@ import (
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) && // len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBest(dst, src []byte) (d int) { func encodeBlockBest(dst, src []byte) (d int) {
// Initialize the hash tables. // Initialize the hash tables.
const ( const (
@ -176,14 +177,21 @@ func encodeBlockBest(dst, src []byte) (d int) {
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
} }
// Search for a match at best match end, see if that is better. // Search for a match at best match end, see if that is better.
if sAt := best.s + best.length; sAt < sLimit { // Allow some bytes at the beginning to mismatch.
sBack := best.s // Sweet spot is around 1-2 bytes, but depends on input.
backL := best.length // The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
const skipEnd = 1
if sAt := best.s + best.length - skipEnd; sAt < sLimit {
sBack := best.s + skipBeginning - skipEnd
backL := best.length - skipBeginning
// Load initial values // Load initial values
cv = load64(src, sBack) cv = load64(src, sBack)
// Search for mismatch
// Grab candidates...
next := lTable[hash8(load64(src, sAt), lTableBits)] next := lTable[hash8(load64(src, sAt), lTableBits)]
//next := sTable[hash4(load64(src, sAt), sTableBits)]
if checkAt := getCur(next) - backL; checkAt > 0 { if checkAt := getCur(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
@ -191,6 +199,16 @@ func encodeBlockBest(dst, src []byte) (d int) {
if checkAt := getPrev(next) - backL; checkAt > 0 { if checkAt := getPrev(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
} }
// Disabled: Extremely small gain
if false {
next = sTable[hash4(load64(src, sAt), sTableBits)]
if checkAt := getCur(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
}
if checkAt := getPrev(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
}
}
} }
} }
} }
@ -288,8 +306,9 @@ emitRemainder:
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) && // len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBestSnappy(dst, src []byte) (d int) { func encodeBlockBestSnappy(dst, src []byte) (d int) {
// Initialize the hash tables. // Initialize the hash tables.
const ( const (
@ -546,6 +565,7 @@ emitRemainder:
// emitCopySize returns the size to encode the offset+length // emitCopySize returns the size to encode the offset+length
// //
// It assumes that: // It assumes that:
//
// 1 <= offset && offset <= math.MaxUint32 // 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24 // 4 <= length && length <= 1 << 24
func emitCopySize(offset, length int) int { func emitCopySize(offset, length int) int {
@ -584,6 +604,7 @@ func emitCopySize(offset, length int) int {
// emitCopyNoRepeatSize returns the size to encode the offset+length // emitCopyNoRepeatSize returns the size to encode the offset+length
// //
// It assumes that: // It assumes that:
//
// 1 <= offset && offset <= math.MaxUint32 // 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24 // 4 <= length && length <= 1 << 24
func emitCopyNoRepeatSize(offset, length int) int { func emitCopyNoRepeatSize(offset, length int) int {

View file

@ -42,8 +42,9 @@ func hash8(u uint64, h uint8) uint32 {
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) && // len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterGo(dst, src []byte) (d int) { func encodeBlockBetterGo(dst, src []byte) (d int) {
// sLimit is when to stop looking for offset/length copies. The inputMargin // sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are // lets us use a fast path for emitLiteral in the main loop, while we are
@ -56,7 +57,7 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
// Initialize the hash tables. // Initialize the hash tables.
const ( const (
// Long hash matches. // Long hash matches.
lTableBits = 16 lTableBits = 17
maxLTableSize = 1 << lTableBits maxLTableSize = 1 << lTableBits
// Short hash matches. // Short hash matches.
@ -97,9 +98,26 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
lTable[hashL] = uint32(s) lTable[hashL] = uint32(s)
sTable[hashS] = uint32(s) sTable[hashS] = uint32(s)
valLong := load64(src, candidateL)
valShort := load64(src, candidateS)
// If long matches at least 8 bytes, use that.
if cv == valLong {
break
}
if cv == valShort {
candidateL = candidateS
break
}
// Check repeat at offset checkRep. // Check repeat at offset checkRep.
const checkRep = 1 const checkRep = 1
if false && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { // Minimum length of a repeat. Tested with various values.
// While 4-5 offers improvements in some, 6 reduces
// regressions significantly.
const wantRepeatBytes = 6
const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep)
if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask {
base := s + checkRep base := s + checkRep
// Extend back // Extend back
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
@ -109,8 +127,8 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
d += emitLiteral(dst[d:], src[nextEmit:base]) d += emitLiteral(dst[d:], src[nextEmit:base])
// Extend forward // Extend forward
candidate := s - repeat + 4 + checkRep candidate := s - repeat + wantRepeatBytes + checkRep
s += 4 + checkRep s += wantRepeatBytes + checkRep
for s < len(src) { for s < len(src) {
if len(src)-s < 8 { if len(src)-s < 8 {
if src[s] == src[candidate] { if src[s] == src[candidate] {
@ -127,28 +145,40 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
s += 8 s += 8
candidate += 8 candidate += 8
} }
if nextEmit > 0 { // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. d += emitRepeat(dst[d:], repeat, s-base)
d += emitRepeat(dst[d:], repeat, s-base)
} else {
// First match, cannot be repeat.
d += emitCopy(dst[d:], repeat, s-base)
}
nextEmit = s nextEmit = s
if s >= sLimit { if s >= sLimit {
goto emitRemainder goto emitRemainder
} }
// Index in-between
index0 := base + 1
index1 := s - 2
cv = load64(src, s)
for index0 < index1 {
cv0 := load64(src, index0)
cv1 := load64(src, index1)
lTable[hash7(cv0, lTableBits)] = uint32(index0)
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
lTable[hash7(cv1, lTableBits)] = uint32(index1)
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
index0 += 2
index1 -= 2
}
cv = load64(src, s) cv = load64(src, s)
continue continue
} }
if uint32(cv) == load32(src, candidateL) { // Long likely matches 7, so take that.
if uint32(cv) == uint32(valLong) {
break break
} }
// Check our short candidate // Check our short candidate
if uint32(cv) == load32(src, candidateS) { if uint32(cv) == uint32(valShort) {
// Try a long candidate at s+1 // Try a long candidate at s+1
hashL = hash7(cv>>8, lTableBits) hashL = hash7(cv>>8, lTableBits)
candidateL = int(lTable[hashL]) candidateL = int(lTable[hashL])
@ -227,21 +257,29 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
// Do we have space for more, if not bail. // Do we have space for more, if not bail.
return 0 return 0
} }
// Index match start+1 (long) and start+2 (short)
// Index short & long
index0 := base + 1 index0 := base + 1
// Index match end-2 (long) and end-1 (short)
index1 := s - 2 index1 := s - 2
cv0 := load64(src, index0) cv0 := load64(src, index0)
cv1 := load64(src, index1) cv1 := load64(src, index1)
cv = load64(src, s)
lTable[hash7(cv0, lTableBits)] = uint32(index0) lTable[hash7(cv0, lTableBits)] = uint32(index0)
lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1)
lTable[hash7(cv1, lTableBits)] = uint32(index1)
lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1)
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2)
lTable[hash7(cv1, lTableBits)] = uint32(index1)
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
index0 += 1
index1 -= 1
cv = load64(src, s)
// index every second long in between.
for index0 < index1 {
lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
index0 += 2
index1 -= 2
}
} }
emitRemainder: emitRemainder:
@ -260,8 +298,9 @@ emitRemainder:
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) && // len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterSnappyGo(dst, src []byte) (d int) { func encodeBlockBetterSnappyGo(dst, src []byte) (d int) {
// sLimit is when to stop looking for offset/length copies. The inputMargin // sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are // lets us use a fast path for emitLiteral in the main loop, while we are
@ -402,21 +441,29 @@ func encodeBlockBetterSnappyGo(dst, src []byte) (d int) {
// Do we have space for more, if not bail. // Do we have space for more, if not bail.
return 0 return 0
} }
// Index match start+1 (long) and start+2 (short)
// Index short & long
index0 := base + 1 index0 := base + 1
// Index match end-2 (long) and end-1 (short)
index1 := s - 2 index1 := s - 2
cv0 := load64(src, index0) cv0 := load64(src, index0)
cv1 := load64(src, index1) cv1 := load64(src, index1)
cv = load64(src, s)
lTable[hash7(cv0, lTableBits)] = uint32(index0) lTable[hash7(cv0, lTableBits)] = uint32(index0)
lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1)
lTable[hash7(cv1, lTableBits)] = uint32(index1)
lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1)
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2)
lTable[hash7(cv1, lTableBits)] = uint32(index1)
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
index0 += 1
index1 -= 1
cv = load64(src, s)
// index every second long in between.
for index0 < index1 {
lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
index0 += 2
index1 -= 2
}
} }
emitRemainder: emitRemainder:

View file

@ -12,6 +12,7 @@ import (
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) // len(dst) >= MaxEncodedLen(len(src))
func encodeBlock(dst, src []byte) (d int) { func encodeBlock(dst, src []byte) (d int) {
if len(src) < minNonLiteralBlockSize { if len(src) < minNonLiteralBlockSize {
@ -25,6 +26,7 @@ func encodeBlock(dst, src []byte) (d int) {
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) // len(dst) >= MaxEncodedLen(len(src))
func encodeBlockBetter(dst, src []byte) (d int) { func encodeBlockBetter(dst, src []byte) (d int) {
return encodeBlockBetterGo(dst, src) return encodeBlockBetterGo(dst, src)
@ -35,6 +37,7 @@ func encodeBlockBetter(dst, src []byte) (d int) {
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) // len(dst) >= MaxEncodedLen(len(src))
func encodeBlockBetterSnappy(dst, src []byte) (d int) { func encodeBlockBetterSnappy(dst, src []byte) (d int) {
return encodeBlockBetterSnappyGo(dst, src) return encodeBlockBetterSnappyGo(dst, src)
@ -45,6 +48,7 @@ func encodeBlockBetterSnappy(dst, src []byte) (d int) {
// been written. // been written.
// //
// It also assumes that: // It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) // len(dst) >= MaxEncodedLen(len(src))
func encodeBlockSnappy(dst, src []byte) (d int) { func encodeBlockSnappy(dst, src []byte) (d int) {
if len(src) < minNonLiteralBlockSize { if len(src) < minNonLiteralBlockSize {
@ -56,6 +60,7 @@ func encodeBlockSnappy(dst, src []byte) (d int) {
// emitLiteral writes a literal chunk and returns the number of bytes written. // emitLiteral writes a literal chunk and returns the number of bytes written.
// //
// It assumes that: // It assumes that:
//
// dst is long enough to hold the encoded bytes // dst is long enough to hold the encoded bytes
// 0 <= len(lit) && len(lit) <= math.MaxUint32 // 0 <= len(lit) && len(lit) <= math.MaxUint32
func emitLiteral(dst, lit []byte) int { func emitLiteral(dst, lit []byte) int {
@ -146,6 +151,7 @@ func emitRepeat(dst []byte, offset, length int) int {
// emitCopy writes a copy chunk and returns the number of bytes written. // emitCopy writes a copy chunk and returns the number of bytes written.
// //
// It assumes that: // It assumes that:
//
// dst is long enough to hold the encoded bytes // dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= math.MaxUint32 // 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24 // 4 <= length && length <= 1 << 24
@ -214,6 +220,7 @@ func emitCopy(dst []byte, offset, length int) int {
// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written. // emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
// //
// It assumes that: // It assumes that:
//
// dst is long enough to hold the encoded bytes // dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= math.MaxUint32 // 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24 // 4 <= length && length <= 1 << 24
@ -273,8 +280,8 @@ func emitCopyNoRepeat(dst []byte, offset, length int) int {
// matchLen returns how many bytes match in a and b // matchLen returns how many bytes match in a and b
// //
// It assumes that: // It assumes that:
// len(a) <= len(b)
// //
// len(a) <= len(b)
func matchLen(a []byte, b []byte) int { func matchLen(a []byte, b []byte) int {
b = b[:len(a)] b = b[:len(a)]
var checked int var checked int

View file

@ -1,7 +1,6 @@
// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT. // Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm //go:build !appengine && !noasm && gc && !noasm
// +build !appengine,!noasm,gc,!noasm
package s2 package s2
@ -150,8 +149,9 @@ func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
// emitLiteral writes a literal chunk and returns the number of bytes written. // emitLiteral writes a literal chunk and returns the number of bytes written.
// //
// It assumes that: // It assumes that:
// dst is long enough to hold the encoded bytes with margin of 0 bytes //
// 0 <= len(lit) && len(lit) <= math.MaxUint32 // dst is long enough to hold the encoded bytes with margin of 0 bytes
// 0 <= len(lit) && len(lit) <= math.MaxUint32
// //
//go:noescape //go:noescape
func emitLiteral(dst []byte, lit []byte) int func emitLiteral(dst []byte, lit []byte) int
@ -165,9 +165,10 @@ func emitRepeat(dst []byte, offset int, length int) int
// emitCopy writes a copy chunk and returns the number of bytes written. // emitCopy writes a copy chunk and returns the number of bytes written.
// //
// It assumes that: // It assumes that:
// dst is long enough to hold the encoded bytes //
// 1 <= offset && offset <= math.MaxUint32 // dst is long enough to hold the encoded bytes
// 4 <= length && length <= 1 << 24 // 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
// //
//go:noescape //go:noescape
func emitCopy(dst []byte, offset int, length int) int func emitCopy(dst []byte, offset int, length int) int
@ -175,9 +176,10 @@ func emitCopy(dst []byte, offset int, length int) int
// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written. // emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
// //
// It assumes that: // It assumes that:
// dst is long enough to hold the encoded bytes //
// 1 <= offset && offset <= math.MaxUint32 // dst is long enough to hold the encoded bytes
// 4 <= length && length <= 1 << 24 // 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
// //
//go:noescape //go:noescape
func emitCopyNoRepeat(dst []byte, offset int, length int) int func emitCopyNoRepeat(dst []byte, offset int, length int) int
@ -185,7 +187,8 @@ func emitCopyNoRepeat(dst []byte, offset int, length int) int
// matchLen returns how many bytes match in a and b // matchLen returns how many bytes match in a and b
// //
// It assumes that: // It assumes that:
// len(a) <= len(b) //
// len(a) <= len(b)
// //
//go:noescape //go:noescape
func matchLen(a []byte, b []byte) int func matchLen(a []byte, b []byte) int

File diff suppressed because it is too large Load diff

View file

@ -16,10 +16,17 @@ Package home: https://github.com/klauspost/cpuid
## installing ## installing
`go get -u github.com/klauspost/cpuid/v2` using modules. `go get -u github.com/klauspost/cpuid/v2` using modules.
Drop `v2` for others. Drop `v2` for others.
### Homebrew
For macOS/Linux users, you can install via [brew](https://brew.sh/)
```sh
$ brew install cpuid
```
## example ## example
```Go ```Go
@ -77,10 +84,14 @@ We have Streaming SIMD 2 Extensions
The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features. The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler. A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.
To test a larger number of features, they can be combined using `f := CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)`, etc.
This can be using with `cpuid.CPU.HasAll(f)` to quickly test if all features are supported.
Note that for some cpu/os combinations some features will not be detected. Note that for some cpu/os combinations some features will not be detected.
`amd64` has rather good support and should work reliably on all platforms. `amd64` has rather good support and should work reliably on all platforms.
Note that hypervisors may not pass through all CPU features. Note that hypervisors may not pass through all CPU features through to the guest OS,
so even if your host supports a feature it may not be visible on guests.
## arm64 feature detection ## arm64 feature detection
@ -253,6 +264,218 @@ Exit Code 0
Exit Code 1 Exit Code 1
``` ```
## Available flags
### x86 & amd64
| Feature Flag | Description |
|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| ADX | Intel ADX (Multi-Precision Add-Carry Instruction Extensions) |
| AESNI | Advanced Encryption Standard New Instructions |
| AMD3DNOW | AMD 3DNOW |
| AMD3DNOWEXT | AMD 3DNowExt |
| AMXBF16 | Tile computational operations on BFLOAT16 numbers |
| AMXINT8 | Tile computational operations on 8-bit integers |
| AMXFP16 | Tile computational operations on FP16 numbers |
| AMXTILE | Tile architecture |
| AVX | AVX functions |
| AVX2 | AVX2 functions |
| AVX512BF16 | AVX-512 BFLOAT16 Instructions |
| AVX512BITALG | AVX-512 Bit Algorithms |
| AVX512BW | AVX-512 Byte and Word Instructions |
| AVX512CD | AVX-512 Conflict Detection Instructions |
| AVX512DQ | AVX-512 Doubleword and Quadword Instructions |
| AVX512ER | AVX-512 Exponential and Reciprocal Instructions |
| AVX512F | AVX-512 Foundation |
| AVX512FP16 | AVX-512 FP16 Instructions |
| AVX512IFMA | AVX-512 Integer Fused Multiply-Add Instructions |
| AVX512PF | AVX-512 Prefetch Instructions |
| AVX512VBMI | AVX-512 Vector Bit Manipulation Instructions |
| AVX512VBMI2 | AVX-512 Vector Bit Manipulation Instructions, Version 2 |
| AVX512VL | AVX-512 Vector Length Extensions |
| AVX512VNNI | AVX-512 Vector Neural Network Instructions |
| AVX512VP2INTERSECT | AVX-512 Intersect for D/Q |
| AVX512VPOPCNTDQ | AVX-512 Vector Population Count Doubleword and Quadword |
| AVXIFMA | AVX-IFMA instructions |
| AVXNECONVERT | AVX-NE-CONVERT instructions |
| AVXSLOW | Indicates the CPU performs 2 128 bit operations instead of one |
| AVXVNNI | AVX (VEX encoded) VNNI neural network instructions |
| AVXVNNIINT8 | AVX-VNNI-INT8 instructions |
| BMI1 | Bit Manipulation Instruction Set 1 |
| BMI2 | Bit Manipulation Instruction Set 2 |
| CETIBT | Intel CET Indirect Branch Tracking |
| CETSS | Intel CET Shadow Stack |
| CLDEMOTE | Cache Line Demote |
| CLMUL | Carry-less Multiplication |
| CLZERO | CLZERO instruction supported |
| CMOV | i686 CMOV |
| CMPCCXADD | CMPCCXADD instructions |
| CMPSB_SCADBS_SHORT | Fast short CMPSB and SCASB |
| CMPXCHG8 | CMPXCHG8 instruction |
| CPBOOST | Core Performance Boost |
| CPPC | AMD: Collaborative Processor Performance Control |
| CX16 | CMPXCHG16B Instruction |
| EFER_LMSLE_UNS | AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ |
| ENQCMD | Enqueue Command |
| ERMS | Enhanced REP MOVSB/STOSB |
| F16C | Half-precision floating-point conversion |
| FLUSH_L1D | Flush L1D cache |
| FMA3 | Intel FMA 3. Does not imply AVX. |
| FMA4 | Bulldozer FMA4 functions |
| FP128 | AMD: When set, the internal FP/SIMD execution datapath is 128-bits wide |
| FP256 | AMD: When set, the internal FP/SIMD execution datapath is 256-bits wide |
| FSRM | Fast Short Rep Mov |
| FXSR | FXSAVE, FXRESTOR instructions, CR4 bit 9 |
| FXSROPT | FXSAVE/FXRSTOR optimizations |
| GFNI | Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. |
| HLE | Hardware Lock Elision |
| HRESET | If set CPU supports history reset and the IA32_HRESET_ENABLE MSR |
| HTT | Hyperthreading (enabled) |
| HWA | Hardware assert supported. Indicates support for MSRC001_10 |
| HYBRID_CPU | This part has CPUs of more than one type. |
| HYPERVISOR | This bit has been reserved by Intel & AMD for use by hypervisors |
| IA32_ARCH_CAP | IA32_ARCH_CAPABILITIES MSR (Intel) |
| IA32_CORE_CAP | IA32_CORE_CAPABILITIES MSR |
| IBPB | Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) |
| IBRS | AMD: Indirect Branch Restricted Speculation |
| IBRS_PREFERRED | AMD: IBRS is preferred over software solution |
| IBRS_PROVIDES_SMP | AMD: IBRS provides Same Mode Protection |
| IBS | Instruction Based Sampling (AMD) |
| IBSBRNTRGT | Instruction Based Sampling Feature (AMD) |
| IBSFETCHSAM | Instruction Based Sampling Feature (AMD) |
| IBSFFV | Instruction Based Sampling Feature (AMD) |
| IBSOPCNT | Instruction Based Sampling Feature (AMD) |
| IBSOPCNTEXT | Instruction Based Sampling Feature (AMD) |
| IBSOPSAM | Instruction Based Sampling Feature (AMD) |
| IBSRDWROPCNT | Instruction Based Sampling Feature (AMD) |
| IBSRIPINVALIDCHK | Instruction Based Sampling Feature (AMD) |
| IBS_FETCH_CTLX | AMD: IBS fetch control extended MSR supported |
| IBS_OPDATA4 | AMD: IBS op data 4 MSR supported |
| IBS_OPFUSE | AMD: Indicates support for IbsOpFuse |
| IBS_PREVENTHOST | Disallowing IBS use by the host supported |
| IBS_ZEN4 | Fetch and Op IBS support IBS extensions added with Zen4 |
| INT_WBINVD | WBINVD/WBNOINVD are interruptible. |
| INVLPGB | NVLPGB and TLBSYNC instruction supported |
| LAHF | LAHF/SAHF in long mode |
| LAM | If set, CPU supports Linear Address Masking |
| LBRVIRT | LBR virtualization |
| LZCNT | LZCNT instruction |
| MCAOVERFLOW | MCA overflow recovery support. |
| MCDT_NO | Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. |
| MCOMMIT | MCOMMIT instruction supported |
| MD_CLEAR | VERW clears CPU buffers |
| MMX | standard MMX |
| MMXEXT | SSE integer functions or AMD MMX ext |
| MOVBE | MOVBE instruction (big-endian) |
| MOVDIR64B | Move 64 Bytes as Direct Store |
| MOVDIRI | Move Doubleword as Direct Store |
| MOVSB_ZL | Fast Zero-Length MOVSB |
| MPX | Intel MPX (Memory Protection Extensions) |
| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD |
| MSRIRC | Instruction Retired Counter MSR available |
| MSR_PAGEFLUSH | Page Flush MSR available |
| NRIPS | Indicates support for NRIP save on VMEXIT |
| NX | NX (No-Execute) bit |
| OSXSAVE | XSAVE enabled by OS |
| PCONFIG | PCONFIG for Intel Multi-Key Total Memory Encryption |
| POPCNT | POPCNT instruction |
| PPIN | AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled |
| PREFETCHI | PREFETCHIT0/1 instructions |
| PSFD | AMD: Predictive Store Forward Disable |
| RDPRU | RDPRU instruction supported |
| RDRAND | RDRAND instruction is available |
| RDSEED | RDSEED instruction is available |
| RDTSCP | RDTSCP Instruction |
| RTM | Restricted Transactional Memory |
| RTM_ALWAYS_ABORT | Indicates that the loaded microcode is forcing RTM abort. |
| SERIALIZE | Serialize Instruction Execution |
| SEV | AMD Secure Encrypted Virtualization supported |
| SEV_64BIT | AMD SEV guest execution only allowed from a 64-bit host |
| SEV_ALTERNATIVE | AMD SEV Alternate Injection supported |
| SEV_DEBUGSWAP | Full debug state swap supported for SEV-ES guests |
| SEV_ES | AMD SEV Encrypted State supported |
| SEV_RESTRICTED | AMD SEV Restricted Injection supported |
| SEV_SNP | AMD SEV Secure Nested Paging supported |
| SGX | Software Guard Extensions |
| SGXLC | Software Guard Extensions Launch Control |
| SHA | Intel SHA Extensions |
| SME | AMD Secure Memory Encryption supported |
| SME_COHERENT | AMD Hardware cache coherency across encryption domains enforced |
| SPEC_CTRL_SSBD | Speculative Store Bypass Disable |
| SRBDS_CTRL | SRBDS mitigation MSR available |
| SSE | SSE functions |
| SSE2 | P4 SSE functions |
| SSE3 | Prescott SSE3 functions |
| SSE4 | Penryn SSE4.1 functions |
| SSE42 | Nehalem SSE4.2 functions |
| SSE4A | AMD Barcelona microarchitecture SSE4a instructions |
| SSSE3 | Conroe SSSE3 functions |
| STIBP | Single Thread Indirect Branch Predictors |
| STIBP_ALWAYSON | AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On |
| STOSB_SHORT | Fast short STOSB |
| SUCCOR | Software uncorrectable error containment and recovery capability. |
| SVM | AMD Secure Virtual Machine |
| SVMDA | Indicates support for the SVM decode assists. |
| SVMFBASID | SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control |
| SVML | AMD SVM lock. Indicates support for SVM-Lock. |
| SVMNP | AMD SVM nested paging |
| SVMPF | SVM pause intercept filter. Indicates support for the pause intercept filter |
| SVMPFT | SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold |
| SYSCALL | System-Call Extension (SCE): SYSCALL and SYSRET instructions. |
| SYSEE | SYSENTER and SYSEXIT instructions |
| TBM | AMD Trailing Bit Manipulation |
| TLB_FLUSH_NESTED | AMD: Flushing includes all the nested translations for guest translations |
| TME | Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. |
| TOPEXT | TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. |
| TSCRATEMSR | MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 |
| TSXLDTRK | Intel TSX Suspend Load Address Tracking |
| VAES | Vector AES. AVX(512) versions requires additional checks. |
| VMCBCLEAN | VMCB clean bits. Indicates support for VMCB clean bits. |
| VMPL | AMD VM Permission Levels supported |
| VMSA_REGPROT | AMD VMSA Register Protection supported |
| VMX | Virtual Machine Extensions |
| VPCLMULQDQ | Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. |
| VTE | AMD Virtual Transparent Encryption supported |
| WAITPKG | TPAUSE, UMONITOR, UMWAIT |
| WBNOINVD | Write Back and Do Not Invalidate Cache |
| X87 | FPU |
| XGETBV1 | Supports XGETBV with ECX = 1 |
| XOP | Bulldozer XOP functions |
| XSAVE | XSAVE, XRESTOR, XSETBV, XGETBV |
| XSAVEC | Supports XSAVEC and the compacted form of XRSTOR. |
| XSAVEOPT | XSAVEOPT available |
| XSAVES | Supports XSAVES/XRSTORS and IA32_XSS |
# ARM features:
| Feature Flag | Description |
|--------------|------------------------------------------------------------------|
| AESARM | AES instructions |
| ARMCPUID | Some CPU ID registers readable at user-level |
| ASIMD | Advanced SIMD |
| ASIMDDP | SIMD Dot Product |
| ASIMDHP | Advanced SIMD half-precision floating point |
| ASIMDRDM | Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) |
| ATOMICS | Large System Extensions (LSE) |
| CRC32 | CRC32/CRC32C instructions |
| DCPOP | Data cache clean to Point of Persistence (DC CVAP) |
| EVTSTRM | Generic timer |
| FCMA | Floatin point complex number addition and multiplication |
| FP | Single-precision and double-precision floating point |
| FPHP | Half-precision floating point |
| GPA | Generic Pointer Authentication |
| JSCVT | Javascript-style double->int convert (FJCVTZS) |
| LRCPC | Weaker release consistency (LDAPR, etc) |
| PMULL | Polynomial Multiply instructions (PMULL/PMULL2) |
| SHA1 | SHA-1 instructions (SHA1C, etc) |
| SHA2 | SHA-2 instructions (SHA256H, etc) |
| SHA3 | SHA-3 instructions (EOR3, RAXI, XAR, BCAX) |
| SHA512 | SHA512 instructions |
| SM3 | SM3 instructions |
| SM4 | SM4 instructions |
| SVE | Scalable Vector Extension |
# license # license
This code is published under an MIT license. See LICENSE file for more information. This code is published under an MIT license. See LICENSE file for more information.

View file

@ -73,6 +73,7 @@ const (
AMD3DNOW // AMD 3DNOW AMD3DNOW // AMD 3DNOW
AMD3DNOWEXT // AMD 3DNowExt AMD3DNOWEXT // AMD 3DNowExt
AMXBF16 // Tile computational operations on BFLOAT16 numbers AMXBF16 // Tile computational operations on BFLOAT16 numbers
AMXFP16 // Tile computational operations on FP16 numbers
AMXINT8 // Tile computational operations on 8-bit integers AMXINT8 // Tile computational operations on 8-bit integers
AMXTILE // Tile architecture AMXTILE // Tile architecture
AVX // AVX functions AVX // AVX functions
@ -93,8 +94,11 @@ const (
AVX512VNNI // AVX-512 Vector Neural Network Instructions AVX512VNNI // AVX-512 Vector Neural Network Instructions
AVX512VP2INTERSECT // AVX-512 Intersect for D/Q AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
AVXIFMA // AVX-IFMA instructions
AVXNECONVERT // AVX-NE-CONVERT instructions
AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one
AVXVNNI // AVX (VEX encoded) VNNI neural network instructions AVXVNNI // AVX (VEX encoded) VNNI neural network instructions
AVXVNNIINT8 // AVX-VNNI-INT8 instructions
BMI1 // Bit Manipulation Instruction Set 1 BMI1 // Bit Manipulation Instruction Set 1
BMI2 // Bit Manipulation Instruction Set 2 BMI2 // Bit Manipulation Instruction Set 2
CETIBT // Intel CET Indirect Branch Tracking CETIBT // Intel CET Indirect Branch Tracking
@ -103,15 +107,22 @@ const (
CLMUL // Carry-less Multiplication CLMUL // Carry-less Multiplication
CLZERO // CLZERO instruction supported CLZERO // CLZERO instruction supported
CMOV // i686 CMOV CMOV // i686 CMOV
CMPCCXADD // CMPCCXADD instructions
CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB
CMPXCHG8 // CMPXCHG8 instruction CMPXCHG8 // CMPXCHG8 instruction
CPBOOST // Core Performance Boost CPBOOST // Core Performance Boost
CPPC // AMD: Collaborative Processor Performance Control
CX16 // CMPXCHG16B Instruction CX16 // CMPXCHG16B Instruction
EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
ENQCMD // Enqueue Command ENQCMD // Enqueue Command
ERMS // Enhanced REP MOVSB/STOSB ERMS // Enhanced REP MOVSB/STOSB
F16C // Half-precision floating-point conversion F16C // Half-precision floating-point conversion
FLUSH_L1D // Flush L1D cache
FMA3 // Intel FMA 3. Does not imply AVX. FMA3 // Intel FMA 3. Does not imply AVX.
FMA4 // Bulldozer FMA4 functions FMA4 // Bulldozer FMA4 functions
FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
FSRM // Fast Short Rep Mov
FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9
FXSROPT // FXSAVE/FXRSTOR optimizations FXSROPT // FXSAVE/FXRSTOR optimizations
GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
@ -119,8 +130,14 @@ const (
HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
HTT // Hyperthreading (enabled) HTT // Hyperthreading (enabled)
HWA // Hardware assert supported. Indicates support for MSRC001_10 HWA // Hardware assert supported. Indicates support for MSRC001_10
HYBRID_CPU // This part has CPUs of more than one type.
HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel)
IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR
IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
IBRS // AMD: Indirect Branch Restricted Speculation
IBRS_PREFERRED // AMD: IBRS is preferred over software solution
IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection
IBS // Instruction Based Sampling (AMD) IBS // Instruction Based Sampling (AMD)
IBSBRNTRGT // Instruction Based Sampling Feature (AMD) IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
IBSFETCHSAM // Instruction Based Sampling Feature (AMD) IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
@ -130,7 +147,11 @@ const (
IBSOPSAM // Instruction Based Sampling Feature (AMD) IBSOPSAM // Instruction Based Sampling Feature (AMD)
IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported
IBS_OPDATA4 // AMD: IBS op data 4 MSR supported
IBS_OPFUSE // AMD: Indicates support for IbsOpFuse
IBS_PREVENTHOST // Disallowing IBS use by the host supported IBS_PREVENTHOST // Disallowing IBS use by the host supported
IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4
INT_WBINVD // WBINVD/WBNOINVD are interruptible. INT_WBINVD // WBINVD/WBNOINVD are interruptible.
INVLPGB // NVLPGB and TLBSYNC instruction supported INVLPGB // NVLPGB and TLBSYNC instruction supported
LAHF // LAHF/SAHF in long mode LAHF // LAHF/SAHF in long mode
@ -138,13 +159,16 @@ const (
LBRVIRT // LBR virtualization LBRVIRT // LBR virtualization
LZCNT // LZCNT instruction LZCNT // LZCNT instruction
MCAOVERFLOW // MCA overflow recovery support. MCAOVERFLOW // MCA overflow recovery support.
MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
MCOMMIT // MCOMMIT instruction supported MCOMMIT // MCOMMIT instruction supported
MD_CLEAR // VERW clears CPU buffers
MMX // standard MMX MMX // standard MMX
MMXEXT // SSE integer functions or AMD MMX ext MMXEXT // SSE integer functions or AMD MMX ext
MOVBE // MOVBE instruction (big-endian) MOVBE // MOVBE instruction (big-endian)
MOVDIR64B // Move 64 Bytes as Direct Store MOVDIR64B // Move 64 Bytes as Direct Store
MOVDIRI // Move Doubleword as Direct Store MOVDIRI // Move Doubleword as Direct Store
MOVSB_ZL // Fast Zero-Length MOVSB MOVSB_ZL // Fast Zero-Length MOVSB
MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
MPX // Intel MPX (Memory Protection Extensions) MPX // Intel MPX (Memory Protection Extensions)
MSRIRC // Instruction Retired Counter MSR available MSRIRC // Instruction Retired Counter MSR available
MSR_PAGEFLUSH // Page Flush MSR available MSR_PAGEFLUSH // Page Flush MSR available
@ -153,6 +177,9 @@ const (
OSXSAVE // XSAVE enabled by OS OSXSAVE // XSAVE enabled by OS
PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption
POPCNT // POPCNT instruction POPCNT // POPCNT instruction
PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
PREFETCHI // PREFETCHIT0/1 instructions
PSFD // AMD: Predictive Store Forward Disable
RDPRU // RDPRU instruction supported RDPRU // RDPRU instruction supported
RDRAND // RDRAND instruction is available RDRAND // RDRAND instruction is available
RDSEED // RDSEED instruction is available RDSEED // RDSEED instruction is available
@ -172,6 +199,8 @@ const (
SHA // Intel SHA Extensions SHA // Intel SHA Extensions
SME // AMD Secure Memory Encryption supported SME // AMD Secure Memory Encryption supported
SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
SPEC_CTRL_SSBD // Speculative Store Bypass Disable
SRBDS_CTRL // SRBDS mitigation MSR available
SSE // SSE functions SSE // SSE functions
SSE2 // P4 SSE functions SSE2 // P4 SSE functions
SSE3 // Prescott SSE3 functions SSE3 // Prescott SSE3 functions
@ -180,6 +209,7 @@ const (
SSE4A // AMD Barcelona microarchitecture SSE4a instructions SSE4A // AMD Barcelona microarchitecture SSE4a instructions
SSSE3 // Conroe SSSE3 functions SSSE3 // Conroe SSSE3 functions
STIBP // Single Thread Indirect Branch Predictors STIBP // Single Thread Indirect Branch Predictors
STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
STOSB_SHORT // Fast short STOSB STOSB_SHORT // Fast short STOSB
SUCCOR // Software uncorrectable error containment and recovery capability. SUCCOR // Software uncorrectable error containment and recovery capability.
SVM // AMD Secure Virtual Machine SVM // AMD Secure Virtual Machine
@ -192,8 +222,9 @@ const (
SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
SYSEE // SYSENTER and SYSEXIT instructions SYSEE // SYSENTER and SYSEXIT instructions
TBM // AMD Trailing Bit Manipulation TBM // AMD Trailing Bit Manipulation
TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations
TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
TSXLDTRK // Intel TSX Suspend Load Address Tracking TSXLDTRK // Intel TSX Suspend Load Address Tracking
VAES // Vector AES. AVX(512) versions requires additional checks. VAES // Vector AES. AVX(512) versions requires additional checks.
@ -358,7 +389,7 @@ func (c CPUInfo) Supports(ids ...FeatureID) bool {
// Has allows for checking a single feature. // Has allows for checking a single feature.
// Should be inlined by the compiler. // Should be inlined by the compiler.
func (c CPUInfo) Has(id FeatureID) bool { func (c *CPUInfo) Has(id FeatureID) bool {
return c.featureSet.inSet(id) return c.featureSet.inSet(id)
} }
@ -372,26 +403,47 @@ func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
return false return false
} }
// Features contains several features combined for a fast check using
// CpuInfo.HasAll
type Features *flagSet
// CombineFeatures allows to combine several features for a close to constant time lookup.
func CombineFeatures(ids ...FeatureID) Features {
var v flagSet
for _, id := range ids {
v.set(id)
}
return &v
}
func (c *CPUInfo) HasAll(f Features) bool {
return c.featureSet.hasSetP(f)
}
// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2) var oneOfLevel = CombineFeatures(SYSEE, SYSCALL)
var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2)
var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
// X64Level returns the microarchitecture level detected on the CPU. // X64Level returns the microarchitecture level detected on the CPU.
// If features are lacking or non x64 mode, 0 is returned. // If features are lacking or non x64 mode, 0 is returned.
// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
func (c CPUInfo) X64Level() int { func (c CPUInfo) X64Level() int {
if c.featureSet.hasSet(level4Features) { if !c.featureSet.hasOneOf(oneOfLevel) {
return 0
}
if c.featureSet.hasSetP(level4Features) {
return 4 return 4
} }
if c.featureSet.hasSet(level3Features) { if c.featureSet.hasSetP(level3Features) {
return 3 return 3
} }
if c.featureSet.hasSet(level2Features) { if c.featureSet.hasSetP(level2Features) {
return 2 return 2
} }
if c.featureSet.hasSet(level1Features) { if c.featureSet.hasSetP(level1Features) {
return 1 return 1
} }
return 0 return 0
@ -555,7 +607,7 @@ const flagMask = flagBits - 1
// flagSet contains detected cpu features and characteristics in an array of flags // flagSet contains detected cpu features and characteristics in an array of flags
type flagSet [(lastID + flagMask) / flagBits]flags type flagSet [(lastID + flagMask) / flagBits]flags
func (s flagSet) inSet(feat FeatureID) bool { func (s *flagSet) inSet(feat FeatureID) bool {
return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
} }
@ -585,7 +637,7 @@ func (s *flagSet) or(other flagSet) {
} }
// hasSet returns whether all features are present. // hasSet returns whether all features are present.
func (s flagSet) hasSet(other flagSet) bool { func (s *flagSet) hasSet(other flagSet) bool {
for i, v := range other[:] { for i, v := range other[:] {
if s[i]&v != v { if s[i]&v != v {
return false return false
@ -594,8 +646,28 @@ func (s flagSet) hasSet(other flagSet) bool {
return true return true
} }
// hasSet returns whether all features are present.
func (s *flagSet) hasSetP(other *flagSet) bool {
for i, v := range other[:] {
if s[i]&v != v {
return false
}
}
return true
}
// hasOneOf returns whether one or more features are present.
func (s *flagSet) hasOneOf(other *flagSet) bool {
for i, v := range other[:] {
if s[i]&v != 0 {
return true
}
}
return false
}
// nEnabled will return the number of enabled flags. // nEnabled will return the number of enabled flags.
func (s flagSet) nEnabled() (n int) { func (s *flagSet) nEnabled() (n int) {
for _, v := range s[:] { for _, v := range s[:] {
n += bits.OnesCount64(uint64(v)) n += bits.OnesCount64(uint64(v))
} }
@ -1093,21 +1165,36 @@ func support() flagSet {
fs.setIf(ecx&(1<<30) != 0, SGXLC) fs.setIf(ecx&(1<<30) != 0, SGXLC)
// CPUID.(EAX=7, ECX=0).EDX // CPUID.(EAX=7, ECX=0).EDX
fs.setIf(edx&(1<<4) != 0, FSRM)
fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL)
fs.setIf(edx&(1<<10) != 0, MD_CLEAR)
fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
fs.setIf(edx&(1<<14) != 0, SERIALIZE) fs.setIf(edx&(1<<14) != 0, SERIALIZE)
fs.setIf(edx&(1<<15) != 0, HYBRID_CPU)
fs.setIf(edx&(1<<16) != 0, TSXLDTRK) fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
fs.setIf(edx&(1<<18) != 0, PCONFIG) fs.setIf(edx&(1<<18) != 0, PCONFIG)
fs.setIf(edx&(1<<20) != 0, CETIBT) fs.setIf(edx&(1<<20) != 0, CETIBT)
fs.setIf(edx&(1<<26) != 0, IBPB) fs.setIf(edx&(1<<26) != 0, IBPB)
fs.setIf(edx&(1<<27) != 0, STIBP) fs.setIf(edx&(1<<27) != 0, STIBP)
fs.setIf(edx&(1<<28) != 0, FLUSH_L1D)
fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP)
fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
// CPUID.(EAX=7, ECX=1) // CPUID.(EAX=7, ECX=1).EDX
fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8)
fs.setIf(edx&(1<<5) != 0, AVXNECONVERT)
fs.setIf(edx&(1<<14) != 0, PREFETCHI)
// CPUID.(EAX=7, ECX=1).EAX
eax1, _, _, _ := cpuidex(7, 1) eax1, _, _, _ := cpuidex(7, 1)
fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
fs.setIf(eax1&(1<<22) != 0, HRESET) fs.setIf(eax1&(1<<22) != 0, HRESET)
fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
fs.setIf(eax1&(1<<26) != 0, LAM) fs.setIf(eax1&(1<<26) != 0, LAM)
// Only detect AVX-512 features if XGETBV is supported // Only detect AVX-512 features if XGETBV is supported
@ -1145,9 +1232,15 @@ func support() flagSet {
fs.setIf(edx&(1<<25) != 0, AMXINT8) fs.setIf(edx&(1<<25) != 0, AMXINT8)
// eax1 = CPUID.(EAX=7, ECX=1).EAX // eax1 = CPUID.(EAX=7, ECX=1).EAX
fs.setIf(eax1&(1<<5) != 0, AVX512BF16) fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
fs.setIf(eax1&(1<<21) != 0, AMXFP16)
} }
} }
// CPUID.(EAX=7, ECX=2)
_, _, _, edx = cpuidex(7, 2)
fs.setIf(edx&(1<<5) != 0, MCDT_NO)
} }
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
// EAX // EAX
// Bit 00: XSAVEOPT is available. // Bit 00: XSAVEOPT is available.
@ -1212,9 +1305,21 @@ func support() flagSet {
if maxExtendedFunction() >= 0x80000008 { if maxExtendedFunction() >= 0x80000008 {
_, b, _, _ := cpuid(0x80000008) _, b, _, _ := cpuid(0x80000008)
fs.setIf(b&(1<<28) != 0, PSFD)
fs.setIf(b&(1<<27) != 0, CPPC)
fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD)
fs.setIf(b&(1<<23) != 0, PPIN)
fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED)
fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS)
fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP)
fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED)
fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON)
fs.setIf(b&(1<<15) != 0, STIBP)
fs.setIf(b&(1<<14) != 0, IBRS)
fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
fs.setIf(b&(1<<12) != 0, IBPB)
fs.setIf((b&(1<<9)) != 0, WBNOINVD) fs.setIf((b&(1<<9)) != 0, WBNOINVD)
fs.setIf((b&(1<<8)) != 0, MCOMMIT) fs.setIf((b&(1<<8)) != 0, MCOMMIT)
fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
fs.setIf((b&(1<<4)) != 0, RDPRU) fs.setIf((b&(1<<4)) != 0, RDPRU)
fs.setIf((b&(1<<3)) != 0, INVLPGB) fs.setIf((b&(1<<3)) != 0, INVLPGB)
fs.setIf((b&(1<<1)) != 0, MSRIRC) fs.setIf((b&(1<<1)) != 0, MSRIRC)
@ -1235,6 +1340,13 @@ func support() flagSet {
fs.setIf((edx>>12)&1 == 1, SVMPFT) fs.setIf((edx>>12)&1 == 1, SVMPFT)
} }
if maxExtendedFunction() >= 0x8000001a {
eax, _, _, _ := cpuid(0x8000001a)
fs.setIf((eax>>0)&1 == 1, FP128)
fs.setIf((eax>>1)&1 == 1, MOVU)
fs.setIf((eax>>2)&1 == 1, FP256)
}
if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
eax, _, _, _ := cpuid(0x8000001b) eax, _, _, _ := cpuid(0x8000001b)
fs.setIf((eax>>0)&1 == 1, IBSFFV) fs.setIf((eax>>0)&1 == 1, IBSFFV)
@ -1245,6 +1357,10 @@ func support() flagSet {
fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE)
fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX)
fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1.
fs.setIf((eax>>11)&1 == 1, IBS_ZEN4)
} }
if maxExtendedFunction() >= 0x8000001f && vend == AMD { if maxExtendedFunction() >= 0x8000001f && vend == AMD {

View file

@ -13,176 +13,207 @@ func _() {
_ = x[AMD3DNOW-3] _ = x[AMD3DNOW-3]
_ = x[AMD3DNOWEXT-4] _ = x[AMD3DNOWEXT-4]
_ = x[AMXBF16-5] _ = x[AMXBF16-5]
_ = x[AMXINT8-6] _ = x[AMXFP16-6]
_ = x[AMXTILE-7] _ = x[AMXINT8-7]
_ = x[AVX-8] _ = x[AMXTILE-8]
_ = x[AVX2-9] _ = x[AVX-9]
_ = x[AVX512BF16-10] _ = x[AVX2-10]
_ = x[AVX512BITALG-11] _ = x[AVX512BF16-11]
_ = x[AVX512BW-12] _ = x[AVX512BITALG-12]
_ = x[AVX512CD-13] _ = x[AVX512BW-13]
_ = x[AVX512DQ-14] _ = x[AVX512CD-14]
_ = x[AVX512ER-15] _ = x[AVX512DQ-15]
_ = x[AVX512F-16] _ = x[AVX512ER-16]
_ = x[AVX512FP16-17] _ = x[AVX512F-17]
_ = x[AVX512IFMA-18] _ = x[AVX512FP16-18]
_ = x[AVX512PF-19] _ = x[AVX512IFMA-19]
_ = x[AVX512VBMI-20] _ = x[AVX512PF-20]
_ = x[AVX512VBMI2-21] _ = x[AVX512VBMI-21]
_ = x[AVX512VL-22] _ = x[AVX512VBMI2-22]
_ = x[AVX512VNNI-23] _ = x[AVX512VL-23]
_ = x[AVX512VP2INTERSECT-24] _ = x[AVX512VNNI-24]
_ = x[AVX512VPOPCNTDQ-25] _ = x[AVX512VP2INTERSECT-25]
_ = x[AVXSLOW-26] _ = x[AVX512VPOPCNTDQ-26]
_ = x[AVXVNNI-27] _ = x[AVXIFMA-27]
_ = x[BMI1-28] _ = x[AVXNECONVERT-28]
_ = x[BMI2-29] _ = x[AVXSLOW-29]
_ = x[CETIBT-30] _ = x[AVXVNNI-30]
_ = x[CETSS-31] _ = x[AVXVNNIINT8-31]
_ = x[CLDEMOTE-32] _ = x[BMI1-32]
_ = x[CLMUL-33] _ = x[BMI2-33]
_ = x[CLZERO-34] _ = x[CETIBT-34]
_ = x[CMOV-35] _ = x[CETSS-35]
_ = x[CMPSB_SCADBS_SHORT-36] _ = x[CLDEMOTE-36]
_ = x[CMPXCHG8-37] _ = x[CLMUL-37]
_ = x[CPBOOST-38] _ = x[CLZERO-38]
_ = x[CX16-39] _ = x[CMOV-39]
_ = x[ENQCMD-40] _ = x[CMPCCXADD-40]
_ = x[ERMS-41] _ = x[CMPSB_SCADBS_SHORT-41]
_ = x[F16C-42] _ = x[CMPXCHG8-42]
_ = x[FMA3-43] _ = x[CPBOOST-43]
_ = x[FMA4-44] _ = x[CPPC-44]
_ = x[FXSR-45] _ = x[CX16-45]
_ = x[FXSROPT-46] _ = x[EFER_LMSLE_UNS-46]
_ = x[GFNI-47] _ = x[ENQCMD-47]
_ = x[HLE-48] _ = x[ERMS-48]
_ = x[HRESET-49] _ = x[F16C-49]
_ = x[HTT-50] _ = x[FLUSH_L1D-50]
_ = x[HWA-51] _ = x[FMA3-51]
_ = x[HYPERVISOR-52] _ = x[FMA4-52]
_ = x[IBPB-53] _ = x[FP128-53]
_ = x[IBS-54] _ = x[FP256-54]
_ = x[IBSBRNTRGT-55] _ = x[FSRM-55]
_ = x[IBSFETCHSAM-56] _ = x[FXSR-56]
_ = x[IBSFFV-57] _ = x[FXSROPT-57]
_ = x[IBSOPCNT-58] _ = x[GFNI-58]
_ = x[IBSOPCNTEXT-59] _ = x[HLE-59]
_ = x[IBSOPSAM-60] _ = x[HRESET-60]
_ = x[IBSRDWROPCNT-61] _ = x[HTT-61]
_ = x[IBSRIPINVALIDCHK-62] _ = x[HWA-62]
_ = x[IBS_PREVENTHOST-63] _ = x[HYBRID_CPU-63]
_ = x[INT_WBINVD-64] _ = x[HYPERVISOR-64]
_ = x[INVLPGB-65] _ = x[IA32_ARCH_CAP-65]
_ = x[LAHF-66] _ = x[IA32_CORE_CAP-66]
_ = x[LAM-67] _ = x[IBPB-67]
_ = x[LBRVIRT-68] _ = x[IBRS-68]
_ = x[LZCNT-69] _ = x[IBRS_PREFERRED-69]
_ = x[MCAOVERFLOW-70] _ = x[IBRS_PROVIDES_SMP-70]
_ = x[MCOMMIT-71] _ = x[IBS-71]
_ = x[MMX-72] _ = x[IBSBRNTRGT-72]
_ = x[MMXEXT-73] _ = x[IBSFETCHSAM-73]
_ = x[MOVBE-74] _ = x[IBSFFV-74]
_ = x[MOVDIR64B-75] _ = x[IBSOPCNT-75]
_ = x[MOVDIRI-76] _ = x[IBSOPCNTEXT-76]
_ = x[MOVSB_ZL-77] _ = x[IBSOPSAM-77]
_ = x[MPX-78] _ = x[IBSRDWROPCNT-78]
_ = x[MSRIRC-79] _ = x[IBSRIPINVALIDCHK-79]
_ = x[MSR_PAGEFLUSH-80] _ = x[IBS_FETCH_CTLX-80]
_ = x[NRIPS-81] _ = x[IBS_OPDATA4-81]
_ = x[NX-82] _ = x[IBS_OPFUSE-82]
_ = x[OSXSAVE-83] _ = x[IBS_PREVENTHOST-83]
_ = x[PCONFIG-84] _ = x[IBS_ZEN4-84]
_ = x[POPCNT-85] _ = x[INT_WBINVD-85]
_ = x[RDPRU-86] _ = x[INVLPGB-86]
_ = x[RDRAND-87] _ = x[LAHF-87]
_ = x[RDSEED-88] _ = x[LAM-88]
_ = x[RDTSCP-89] _ = x[LBRVIRT-89]
_ = x[RTM-90] _ = x[LZCNT-90]
_ = x[RTM_ALWAYS_ABORT-91] _ = x[MCAOVERFLOW-91]
_ = x[SERIALIZE-92] _ = x[MCDT_NO-92]
_ = x[SEV-93] _ = x[MCOMMIT-93]
_ = x[SEV_64BIT-94] _ = x[MD_CLEAR-94]
_ = x[SEV_ALTERNATIVE-95] _ = x[MMX-95]
_ = x[SEV_DEBUGSWAP-96] _ = x[MMXEXT-96]
_ = x[SEV_ES-97] _ = x[MOVBE-97]
_ = x[SEV_RESTRICTED-98] _ = x[MOVDIR64B-98]
_ = x[SEV_SNP-99] _ = x[MOVDIRI-99]
_ = x[SGX-100] _ = x[MOVSB_ZL-100]
_ = x[SGXLC-101] _ = x[MOVU-101]
_ = x[SHA-102] _ = x[MPX-102]
_ = x[SME-103] _ = x[MSRIRC-103]
_ = x[SME_COHERENT-104] _ = x[MSR_PAGEFLUSH-104]
_ = x[SSE-105] _ = x[NRIPS-105]
_ = x[SSE2-106] _ = x[NX-106]
_ = x[SSE3-107] _ = x[OSXSAVE-107]
_ = x[SSE4-108] _ = x[PCONFIG-108]
_ = x[SSE42-109] _ = x[POPCNT-109]
_ = x[SSE4A-110] _ = x[PPIN-110]
_ = x[SSSE3-111] _ = x[PREFETCHI-111]
_ = x[STIBP-112] _ = x[PSFD-112]
_ = x[STOSB_SHORT-113] _ = x[RDPRU-113]
_ = x[SUCCOR-114] _ = x[RDRAND-114]
_ = x[SVM-115] _ = x[RDSEED-115]
_ = x[SVMDA-116] _ = x[RDTSCP-116]
_ = x[SVMFBASID-117] _ = x[RTM-117]
_ = x[SVML-118] _ = x[RTM_ALWAYS_ABORT-118]
_ = x[SVMNP-119] _ = x[SERIALIZE-119]
_ = x[SVMPF-120] _ = x[SEV-120]
_ = x[SVMPFT-121] _ = x[SEV_64BIT-121]
_ = x[SYSCALL-122] _ = x[SEV_ALTERNATIVE-122]
_ = x[SYSEE-123] _ = x[SEV_DEBUGSWAP-123]
_ = x[TBM-124] _ = x[SEV_ES-124]
_ = x[TOPEXT-125] _ = x[SEV_RESTRICTED-125]
_ = x[TME-126] _ = x[SEV_SNP-126]
_ = x[TSCRATEMSR-127] _ = x[SGX-127]
_ = x[TSXLDTRK-128] _ = x[SGXLC-128]
_ = x[VAES-129] _ = x[SHA-129]
_ = x[VMCBCLEAN-130] _ = x[SME-130]
_ = x[VMPL-131] _ = x[SME_COHERENT-131]
_ = x[VMSA_REGPROT-132] _ = x[SPEC_CTRL_SSBD-132]
_ = x[VMX-133] _ = x[SRBDS_CTRL-133]
_ = x[VPCLMULQDQ-134] _ = x[SSE-134]
_ = x[VTE-135] _ = x[SSE2-135]
_ = x[WAITPKG-136] _ = x[SSE3-136]
_ = x[WBNOINVD-137] _ = x[SSE4-137]
_ = x[X87-138] _ = x[SSE42-138]
_ = x[XGETBV1-139] _ = x[SSE4A-139]
_ = x[XOP-140] _ = x[SSSE3-140]
_ = x[XSAVE-141] _ = x[STIBP-141]
_ = x[XSAVEC-142] _ = x[STIBP_ALWAYSON-142]
_ = x[XSAVEOPT-143] _ = x[STOSB_SHORT-143]
_ = x[XSAVES-144] _ = x[SUCCOR-144]
_ = x[AESARM-145] _ = x[SVM-145]
_ = x[ARMCPUID-146] _ = x[SVMDA-146]
_ = x[ASIMD-147] _ = x[SVMFBASID-147]
_ = x[ASIMDDP-148] _ = x[SVML-148]
_ = x[ASIMDHP-149] _ = x[SVMNP-149]
_ = x[ASIMDRDM-150] _ = x[SVMPF-150]
_ = x[ATOMICS-151] _ = x[SVMPFT-151]
_ = x[CRC32-152] _ = x[SYSCALL-152]
_ = x[DCPOP-153] _ = x[SYSEE-153]
_ = x[EVTSTRM-154] _ = x[TBM-154]
_ = x[FCMA-155] _ = x[TLB_FLUSH_NESTED-155]
_ = x[FP-156] _ = x[TME-156]
_ = x[FPHP-157] _ = x[TOPEXT-157]
_ = x[GPA-158] _ = x[TSCRATEMSR-158]
_ = x[JSCVT-159] _ = x[TSXLDTRK-159]
_ = x[LRCPC-160] _ = x[VAES-160]
_ = x[PMULL-161] _ = x[VMCBCLEAN-161]
_ = x[SHA1-162] _ = x[VMPL-162]
_ = x[SHA2-163] _ = x[VMSA_REGPROT-163]
_ = x[SHA3-164] _ = x[VMX-164]
_ = x[SHA512-165] _ = x[VPCLMULQDQ-165]
_ = x[SM3-166] _ = x[VTE-166]
_ = x[SM4-167] _ = x[WAITPKG-167]
_ = x[SVE-168] _ = x[WBNOINVD-168]
_ = x[lastID-169] _ = x[X87-169]
_ = x[XGETBV1-170]
_ = x[XOP-171]
_ = x[XSAVE-172]
_ = x[XSAVEC-173]
_ = x[XSAVEOPT-174]
_ = x[XSAVES-175]
_ = x[AESARM-176]
_ = x[ARMCPUID-177]
_ = x[ASIMD-178]
_ = x[ASIMDDP-179]
_ = x[ASIMDHP-180]
_ = x[ASIMDRDM-181]
_ = x[ATOMICS-182]
_ = x[CRC32-183]
_ = x[DCPOP-184]
_ = x[EVTSTRM-185]
_ = x[FCMA-186]
_ = x[FP-187]
_ = x[FPHP-188]
_ = x[GPA-189]
_ = x[JSCVT-190]
_ = x[LRCPC-191]
_ = x[PMULL-192]
_ = x[SHA1-193]
_ = x[SHA2-194]
_ = x[SHA3-195]
_ = x[SHA512-196]
_ = x[SM3-197]
_ = x[SM4-198]
_ = x[SVE-199]
_ = x[lastID-200]
_ = x[firstID-0] _ = x[firstID-0]
} }
const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXSLOWAVXVNNIBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCX16ENQCMDERMSF16CFMA3FMA4FXSRFXSROPTGFNIHLEHRESETHTTHWAHYPERVISORIBPBIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_PREVENTHOSTINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCOMMITMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTOPEXTTMETSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4INT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 58, 62, 72, 84, 92, 100, 108, 116, 123, 133, 143, 151, 161, 172, 180, 190, 208, 223, 230, 237, 241, 245, 251, 256, 264, 269, 275, 279, 297, 305, 312, 316, 322, 326, 330, 334, 338, 342, 349, 353, 356, 362, 365, 368, 378, 382, 385, 395, 406, 412, 420, 431, 439, 451, 467, 482, 492, 499, 503, 506, 513, 518, 529, 536, 539, 545, 550, 559, 566, 574, 577, 583, 596, 601, 603, 610, 617, 623, 628, 634, 640, 646, 649, 665, 674, 677, 686, 701, 714, 720, 734, 741, 744, 749, 752, 755, 767, 770, 774, 778, 782, 787, 792, 797, 802, 813, 819, 822, 827, 836, 840, 845, 850, 856, 863, 868, 871, 877, 880, 890, 898, 902, 911, 915, 927, 930, 940, 943, 950, 958, 961, 968, 971, 976, 982, 990, 996, 1002, 1010, 1015, 1022, 1029, 1037, 1044, 1049, 1054, 1061, 1065, 1067, 1071, 1074, 1079, 1084, 1089, 1093, 1097, 1101, 1107, 1110, 1113, 1116, 1122} var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 278, 282, 288, 293, 301, 306, 312, 316, 325, 343, 351, 358, 362, 366, 380, 386, 390, 394, 403, 407, 411, 416, 421, 425, 429, 436, 440, 443, 449, 452, 455, 465, 475, 488, 501, 505, 509, 523, 540, 543, 553, 564, 570, 578, 589, 597, 609, 625, 639, 650, 660, 675, 683, 693, 700, 704, 707, 714, 719, 730, 737, 744, 752, 755, 761, 766, 775, 782, 790, 794, 797, 803, 816, 821, 823, 830, 837, 843, 847, 856, 860, 865, 871, 877, 883, 886, 902, 911, 914, 923, 938, 951, 957, 971, 978, 981, 986, 989, 992, 1004, 1018, 1028, 1031, 1035, 1039, 1043, 1048, 1053, 1058, 1063, 1077, 1088, 1094, 1097, 1102, 1111, 1115, 1120, 1125, 1131, 1138, 1143, 1146, 1162, 1165, 1171, 1181, 1189, 1193, 1202, 1206, 1218, 1221, 1231, 1234, 1241, 1249, 1252, 1259, 1262, 1267, 1273, 1281, 1287, 1293, 1301, 1306, 1313, 1320, 1328, 1335, 1340, 1345, 1352, 1356, 1358, 1362, 1365, 1370, 1375, 1380, 1384, 1388, 1392, 1398, 1401, 1404, 1407, 1413}
func (i FeatureID) String() string { func (i FeatureID) String() string {
if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) { if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {

View file

@ -83,7 +83,7 @@ func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
c.Model = sysctlGetInt(0, "machdep.cpu.model") c.Model = sysctlGetInt(0, "machdep.cpu.model")
c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize") c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize")
c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize") c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize")
c.Cache.L1D = sysctlGetInt64(-1, "hw.l1icachesize") c.Cache.L1D = sysctlGetInt64(-1, "hw.l1dcachesize")
c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize") c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize") c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")

View file

@ -93,6 +93,28 @@ type PutObjectOptions struct {
// This can be used for faster uploads on non-seekable or slow-to-seek input. // This can be used for faster uploads on non-seekable or slow-to-seek input.
ConcurrentStreamParts bool ConcurrentStreamParts bool
Internal AdvancedPutOptions Internal AdvancedPutOptions
customHeaders http.Header
}
// SetMatchETag if etag matches while PUT MinIO returns an error
// this is a MinIO specific extension to support optimistic locking
// semantics.
func (opts *PutObjectOptions) SetMatchETag(etag string) {
if opts.customHeaders == nil {
opts.customHeaders = http.Header{}
}
opts.customHeaders.Set("If-Match", "\""+etag+"\"")
}
// SetMatchETagExcept if etag does not match while PUT MinIO returns an
// error this is a MinIO specific extension to support optimistic locking
// semantics.
func (opts *PutObjectOptions) SetMatchETagExcept(etag string) {
if opts.customHeaders == nil {
opts.customHeaders = http.Header{}
}
opts.customHeaders.Set("If-None-Match", "\""+etag+"\"")
} }
// getNumThreads - gets the number of threads to be used in the multipart // getNumThreads - gets the number of threads to be used in the multipart
@ -187,6 +209,12 @@ func (opts PutObjectOptions) Header() (header http.Header) {
header.Set("x-amz-meta-"+k, v) header.Set("x-amz-meta-"+k, v)
} }
} }
// set any other additional custom headers.
for k, v := range opts.customHeaders {
header[k] = v
}
return return
} }

View file

@ -118,7 +118,7 @@ type Options struct {
// Global constants. // Global constants.
const ( const (
libraryName = "minio-go" libraryName = "minio-go"
libraryVersion = "v7.0.48" libraryVersion = "v7.0.49"
) )
// User Agent should always following the below style. // User Agent should always following the below style.

12
vendor/modules.txt vendored
View file

@ -163,8 +163,8 @@ github.com/dsoprea/go-png-image-structure/v2
## explicit; go 1.12 ## explicit; go 1.12
github.com/dsoprea/go-utility/v2/filesystem github.com/dsoprea/go-utility/v2/filesystem
github.com/dsoprea/go-utility/v2/image github.com/dsoprea/go-utility/v2/image
# github.com/dustin/go-humanize v1.0.0 # github.com/dustin/go-humanize v1.0.1
## explicit ## explicit; go 1.16
github.com/dustin/go-humanize github.com/dustin/go-humanize
# github.com/fsnotify/fsnotify v1.6.0 # github.com/fsnotify/fsnotify v1.6.0
## explicit; go 1.16 ## explicit; go 1.16
@ -324,14 +324,14 @@ github.com/json-iterator/go
# github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 # github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
## explicit ## explicit
github.com/kballard/go-shellquote github.com/kballard/go-shellquote
# github.com/klauspost/compress v1.15.9 # github.com/klauspost/compress v1.15.15
## explicit; go 1.16 ## explicit; go 1.17
github.com/klauspost/compress/flate github.com/klauspost/compress/flate
github.com/klauspost/compress/gzip github.com/klauspost/compress/gzip
github.com/klauspost/compress/s2 github.com/klauspost/compress/s2
github.com/klauspost/compress/snappy github.com/klauspost/compress/snappy
github.com/klauspost/compress/zlib github.com/klauspost/compress/zlib
# github.com/klauspost/cpuid/v2 v2.1.1 # github.com/klauspost/cpuid/v2 v2.2.3
## explicit; go 1.15 ## explicit; go 1.15
github.com/klauspost/cpuid/v2 github.com/klauspost/cpuid/v2
# github.com/leodido/go-urn v1.2.1 # github.com/leodido/go-urn v1.2.1
@ -353,7 +353,7 @@ github.com/miekg/dns
# github.com/minio/md5-simd v1.1.2 # github.com/minio/md5-simd v1.1.2
## explicit; go 1.14 ## explicit; go 1.14
github.com/minio/md5-simd github.com/minio/md5-simd
# github.com/minio/minio-go/v7 v7.0.48 # github.com/minio/minio-go/v7 v7.0.49
## explicit; go 1.17 ## explicit; go 1.17
github.com/minio/minio-go/v7 github.com/minio/minio-go/v7
github.com/minio/minio-go/v7/pkg/credentials github.com/minio/minio-go/v7/pkg/credentials