diff --git a/README.md b/README.md index 8fb6b9d55..c25e21cb9 100644 --- a/README.md +++ b/README.md @@ -275,15 +275,16 @@ The following open source libraries, frameworks, and tools are used by GoToSocia - [go-swagger/go-swagger](https://github.com/go-swagger/go-swagger); Swagger OpenAPI spec generation. [Apache-2.0 License](https://spdx.org/licenses/Apache-2.0.html). - gruf: - [gruf/go-bytesize](https://codeberg.org/gruf/go-bytesize); byte size parsing / formatting. [MIT License](https://spdx.org/licenses/MIT.html). - - [gruf/go-cache](https://codeberg.org/gruf/go-cache); object & result caching. [MIT License](https://spdx.org/licenses/MIT.html). + - [gruf/go-cache](https://codeberg.org/gruf/go-cache); LRU and TTL caches. [MIT License](https://spdx.org/licenses/MIT.html). - [gruf/go-debug](https://codeberg.org/gruf/go-debug); debug build tag. [MIT License](https://spdx.org/licenses/MIT.html). - - [gruf/go-errors](https://codeberg.org/gruf/go-errors); performant multi-error checking [MIT License](https://spdx.org/licenses/MIT.html). + - [gruf/go-errors](https://codeberg.org/gruf/go-errors); context-like error w/ value wrapping [MIT License](https://spdx.org/licenses/MIT.html). - [gruf/go-fastcopy](https://codeberg.org/gruf/go-fastcopy); performant pooled I/O copying [MIT License](https://spdx.org/licenses/MIT.html). - [gruf/go-kv](https://codeberg.org/gruf/go-kv); log field formatting. [MIT License](https://spdx.org/licenses/MIT.html). - [gruf/go-mutexes](https://codeberg.org/gruf/go-mutexes); safemutex & mutex map. [MIT License](https://spdx.org/licenses/MIT.html). - [gruf/go-runners](https://codeberg.org/gruf/go-runners); workerpools and synchronization. [MIT License](https://spdx.org/licenses/MIT.html). - [gruf/go-sched](https://codeberg.org/gruf/go-sched); task scheduler. [MIT License](https://spdx.org/licenses/MIT.html). - [gruf/go-store](https://codeberg.org/gruf/go-store); file storage backend (local & s3). [MIT License](https://spdx.org/licenses/MIT.html). + - [gruf/go-structr](https://codeberg.org/gruf/go-structr); struct caching w/ automated multiple indexing. [MIT License](https://spdx.org/licenses/MIT.html). - [h2non/filetype](https://github.com/h2non/filetype); filetype checking. [MIT License](https://spdx.org/licenses/MIT.html). - jackc: - [jackc/pgx](https://github.com/jackc/pgconn); Postgres driver. [MIT License](https://spdx.org/licenses/MIT.html). diff --git a/go.mod b/go.mod index c4e3ab25a..5f7cd8e63 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ require ( codeberg.org/gruf/go-runners v1.6.2 codeberg.org/gruf/go-sched v1.2.3 codeberg.org/gruf/go-store/v2 v2.2.4 - codeberg.org/gruf/go-structr v0.1.1 + codeberg.org/gruf/go-structr v0.2.0 codeberg.org/superseriousbusiness/exif-terminator v0.7.0 github.com/DmitriyVTitov/size v1.5.0 github.com/KimMachineGun/automemlimit v0.5.0 @@ -82,7 +82,6 @@ require ( codeberg.org/gruf/go-bitutil v1.1.0 // indirect codeberg.org/gruf/go-bytes v1.0.2 // indirect codeberg.org/gruf/go-fastpath/v2 v2.0.0 // indirect - codeberg.org/gruf/go-mangler v1.2.3 // indirect codeberg.org/gruf/go-maps v1.0.3 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -170,6 +169,7 @@ require ( github.com/uptrace/opentelemetry-go-extra/otelsql v0.2.3 // indirect github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect + github.com/zeebo/xxh3 v1.0.2 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 // indirect go.opentelemetry.io/otel/metric v1.20.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect diff --git a/go.sum b/go.sum index 2f2bc4e73..2f79c5853 100644 --- a/go.sum +++ b/go.sum @@ -58,8 +58,6 @@ codeberg.org/gruf/go-kv v1.6.4 h1:3NZiW8HVdBM3kpOiLb7XfRiihnzZWMAixdCznguhILk= codeberg.org/gruf/go-kv v1.6.4/go.mod h1:O/YkSvKiS9XsRolM3rqCd9YJmND7dAXu9z+PrlYO4bc= codeberg.org/gruf/go-logger/v2 v2.2.1 h1:RP2u059EQKTBFV3cN8X6xDxNk2RkzqdgXGKflKqB7Oc= codeberg.org/gruf/go-logger/v2 v2.2.1/go.mod h1:m/vBfG5jNUmYXI8Hg9aVSk7Pn8YgEBITQB/B/CzdRss= -codeberg.org/gruf/go-mangler v1.2.3 h1:sj0dey2lF5GRQL7fXmCY0wPNaI5JrROiThb0VDbzF8A= -codeberg.org/gruf/go-mangler v1.2.3/go.mod h1:X/7URkFhLBAVKkTxmqF11Oxw3A6pSSxgPeHssQaiq28= codeberg.org/gruf/go-maps v1.0.3 h1:VDwhnnaVNUIy5O93CvkcE2IZXnMB1+IJjzfop9V12es= codeberg.org/gruf/go-maps v1.0.3/go.mod h1:D5LNDxlC9rsDuVQVM6JObaVGAdHB6g2dTdOdkh1aXWA= codeberg.org/gruf/go-mutexes v1.4.0 h1:53H6bFDRcG6rjk3iOTuGaStT/VTFdU5Uw8Dszy88a8g= @@ -70,8 +68,8 @@ codeberg.org/gruf/go-sched v1.2.3 h1:H5ViDxxzOBR3uIyGBCf0eH8b1L8wMybOXcdtUUTXZHk codeberg.org/gruf/go-sched v1.2.3/go.mod h1:vT9uB6KWFIIwnG9vcPY2a0alYNoqdL1mSzRM8I+PK7A= codeberg.org/gruf/go-store/v2 v2.2.4 h1:8HO1Jh2gg7boQKA3hsDAIXd9zwieu5uXwDXEcTOD9js= codeberg.org/gruf/go-store/v2 v2.2.4/go.mod h1:zI4VWe5CpXAktYMtaBMrgA5QmO0sQH53LBRvfn1huys= -codeberg.org/gruf/go-structr v0.1.1 h1:nR6EcZjXn+oby2nH1Mi6i8S5GWhyjUknkQMXsjbbK0g= -codeberg.org/gruf/go-structr v0.1.1/go.mod h1:OBajB6wcz0BbX0Ns88w2rdUF52rgIej471NJgV0GCW4= +codeberg.org/gruf/go-structr v0.2.0 h1:9U9uWae4j//HxpztDjw4z07WJi+8F8gMrRPLDBZ/rw4= +codeberg.org/gruf/go-structr v0.2.0/go.mod h1:iTMx2Jw2yekHdg4VVY9Clz5u8Suf9veGdk3sWwNmM4M= codeberg.org/superseriousbusiness/exif-terminator v0.7.0 h1:Y6VApSXhKqExG0H2hZ2JelRK4xmWdjDQjn13CpEfzko= codeberg.org/superseriousbusiness/exif-terminator v0.7.0/go.mod h1:gCWKduudUWFzsnixoMzu0FYVdxHWG+AbXnZ50DqxsUE= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= @@ -116,8 +114,6 @@ github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4= github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cnf/structhash v0.0.0-20201127153200-e1b16c1ebc08 h1:ox2F0PSMlrAAiAdknSRMDrAr8mfxPCfSZolH+/qQnyQ= -github.com/cnf/structhash v0.0.0-20201127153200-e1b16c1ebc08/go.mod h1:pCxVEbcm3AMg7ejXyorUXi6HQCzOIBf7zEDVPtw0/U4= github.com/containerd/cgroups/v3 v3.0.1 h1:4hfGvu8rfGIwVIDd+nLzn/B9ZXx4BcCjzt5ToenJRaE= github.com/containerd/cgroups/v3 v3.0.1/go.mod h1:/vtwk1VXrtoa5AaZLkypuOJgA/6DyPMZHJPGQNtlHnw= github.com/coreos/go-oidc/v3 v3.9.0 h1:0J/ogVOd4y8P0f0xUh8l9t07xRP/d8tccvjHl2dcsSo= @@ -168,8 +164,6 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/fxamacker/cbor v1.5.1 h1:XjQWBgdmQyqimslUh5r4tUGmoqzHmBFQOImkWGi2awg= -github.com/fxamacker/cbor v1.5.1/go.mod h1:3aPGItF174ni7dDzd6JZ206H8cmr4GDNBGpPa971zsU= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/gavv/httpexpect v2.0.0+incompatible h1:1X9kcRshkSKEjNJJxX9Y9mQ5BRfbxU5kORdjhlA1yX8= @@ -393,8 +387,6 @@ github.com/minio/minio-go/v7 v7.0.66 h1:bnTOXOHjOqv/gcMuiVbN9o2ngRItvqE774dG9nq0 github.com/minio/minio-go/v7 v7.0.66/go.mod h1:DHAgmyQEGdW3Cif0UooKOyrT3Vxs82zNdV6tkKhRtbs= github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM= github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8= -github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4= -github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -560,8 +552,6 @@ github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAh github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= github.com/wagslane/go-password-validator v0.3.0 h1:vfxOPzGHkz5S146HDpavl0cw1DSVP061Ry2PX0/ON6I= github.com/wagslane/go-password-validator v0.3.0/go.mod h1:TI1XJ6T5fRdRnHqHt14pvy1tNVnrwe7m3/f1f2fDphQ= -github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= -github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= @@ -581,6 +571,10 @@ github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.6.0 h1:boZcn2GTjpsynOsC0iJHnBWa4Bi0qzfJjthwauItG68= github.com/yuin/goldmark v1.6.0/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= diff --git a/vendor/codeberg.org/gruf/go-mangler/LICENSE b/vendor/codeberg.org/gruf/go-mangler/LICENSE deleted file mode 100644 index dffbdf0c9..000000000 --- a/vendor/codeberg.org/gruf/go-mangler/LICENSE +++ /dev/null @@ -1,9 +0,0 @@ -MIT License - -Copyright (c) 2023 gruf - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/codeberg.org/gruf/go-mangler/README.md b/vendor/codeberg.org/gruf/go-mangler/README.md deleted file mode 100644 index d0de88a43..000000000 --- a/vendor/codeberg.org/gruf/go-mangler/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# go-mangler - -[Documentation](https://pkg.go.dev/codeberg.org/gruf/go-mangler). - -To put it simply is a bit of an odd library. It aims to provide incredibly fast, unique string outputs for all default supported input data types during a given runtime instance. - -It is useful, for example, for use as part of larger abstractions involving hashmaps. That was my particular usecase anyways... - -This package does make liberal use of the "unsafe" package. - -Benchmarks are below. Those with missing values panicked during our set of benchmarks, usually a case of not handling nil values elegantly. Please note the more important thing to notice here is the relative difference in benchmark scores, the actual `ns/op`,`B/op`,`allocs/op` accounts for running through over 80 possible test cases, including some not-ideal situations. - -The choice of libraries in the benchmark are just a selection of libraries that could be used in a similar manner to this one, i.e. serializing in some manner. - -``` -goos: linux -goarch: amd64 -pkg: codeberg.org/gruf/go-mangler -cpu: 11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz -BenchmarkMangle -BenchmarkMangle-8 533011 2003 ns/op 1168 B/op 120 allocs/op -BenchmarkMangleKnown -BenchmarkMangleKnown-8 817060 1458 ns/op 1168 B/op 120 allocs/op -BenchmarkJSON -BenchmarkJSON-8 188637 5899 ns/op 4211 B/op 142 allocs/op -BenchmarkFmt -BenchmarkFmt-8 162735 7053 ns/op 2257 B/op 161 allocs/op -BenchmarkFxmackerCbor -BenchmarkFxmackerCbor-8 362403 3336 ns/op 1496 B/op 122 allocs/op -BenchmarkMitchellhHashStructure -BenchmarkMitchellhHashStructure-8 113982 10079 ns/op 8443 B/op 961 allocs/op -BenchmarkCnfStructhash -BenchmarkCnfStructhash-8 7162 167613 ns/op 288619 B/op 5841 allocs/op -PASS -ok codeberg.org/gruf/go-mangler 11.352s -``` \ No newline at end of file diff --git a/vendor/codeberg.org/gruf/go-mangler/helpers.go b/vendor/codeberg.org/gruf/go-mangler/helpers.go deleted file mode 100644 index 6658fbef4..000000000 --- a/vendor/codeberg.org/gruf/go-mangler/helpers.go +++ /dev/null @@ -1,144 +0,0 @@ -package mangler - -import ( - "reflect" - "unsafe" -) - -type ( - // serializing interfacing types. - stringer interface{ String() string } - binarymarshaler interface{ MarshalBinary() ([]byte, error) } - textmarshaler interface{ MarshalText() ([]byte, error) } - jsonmarshaler interface{ MarshalJSON() ([]byte, error) } -) - -func append_uint16(b []byte, u uint16) []byte { - return append(b, // LE - byte(u), - byte(u>>8), - ) -} - -func append_uint32(b []byte, u uint32) []byte { - return append(b, // LE - byte(u), - byte(u>>8), - byte(u>>16), - byte(u>>24), - ) -} - -func append_uint64(b []byte, u uint64) []byte { - return append(b, // LE - byte(u), - byte(u>>8), - byte(u>>16), - byte(u>>24), - byte(u>>32), - byte(u>>40), - byte(u>>48), - byte(u>>56), - ) -} - -func deref_ptr_mangler(mangle Mangler, count int) rMangler { - return func(buf []byte, v reflect.Value) []byte { - for i := 0; i < count; i++ { - // Check for nil - if v.IsNil() { - buf = append(buf, '0') - return buf - } - - // Further deref ptr - buf = append(buf, '1') - v = v.Elem() - } - - // Mangle fully deref'd ptr - return mangle(buf, v.Interface()) - } -} - -func deref_ptr_rmangler(mangle rMangler, count int) rMangler { - return func(buf []byte, v reflect.Value) []byte { - for i := 0; i < count; i++ { - // Check for nil - if v.IsNil() { - buf = append(buf, '0') - return buf - } - - // Further deref ptr - buf = append(buf, '1') - v = v.Elem() - } - - // Mangle fully deref'd ptr - return mangle(buf, v) - } -} - -func array_to_slice_mangler(mangle Mangler) rMangler { - return func(buf []byte, v reflect.Value) []byte { - // Get slice of whole array - v = v.Slice(0, v.Len()) - - // Mangle as known slice type - return mangle(buf, v.Interface()) - } -} - -func iter_array_mangler(mangle Mangler) rMangler { - return func(buf []byte, v reflect.Value) []byte { - n := v.Len() - for i := 0; i < n; i++ { - buf = mangle(buf, v.Index(i).Interface()) - buf = append(buf, ',') - } - if n > 0 { - buf = buf[:len(buf)-1] - } - return buf - } -} - -func iter_array_rmangler(mangle rMangler) rMangler { - return func(buf []byte, v reflect.Value) []byte { - n := v.Len() - for i := 0; i < n; i++ { - buf = mangle(buf, v.Index(i)) - buf = append(buf, ',') - } - if n > 0 { - buf = buf[:len(buf)-1] - } - return buf - } -} - -func iter_map_rmangler(kMangle, vMangle rMangler) rMangler { - return func(buf []byte, v reflect.Value) []byte { - r := v.MapRange() - for r.Next() { - buf = kMangle(buf, r.Key()) - buf = append(buf, ':') - buf = vMangle(buf, r.Value()) - buf = append(buf, ',') - } - if v.Len() > 0 { - buf = buf[:len(buf)-1] - } - return buf - } -} - -// iface_value returns the raw value ptr for input boxed within interface{} type. -func iface_value(a any) unsafe.Pointer { - type eface struct { - Type unsafe.Pointer - Value unsafe.Pointer - } - return (*eface)(unsafe.Pointer(&a)).Value -} diff --git a/vendor/codeberg.org/gruf/go-mangler/init.go b/vendor/codeberg.org/gruf/go-mangler/init.go deleted file mode 100644 index 2c20e6761..000000000 --- a/vendor/codeberg.org/gruf/go-mangler/init.go +++ /dev/null @@ -1,119 +0,0 @@ -package mangler - -import ( - "net" - "net/netip" - "net/url" - "reflect" - "time" - _ "unsafe" -) - -func init() { - // Register standard library performant manglers. - Register(reflect.TypeOf(net.IPAddr{}), mangle_ipaddr) - Register(reflect.TypeOf(&net.IPAddr{}), mangle_ipaddr_ptr) - Register(reflect.TypeOf(netip.Addr{}), mangle_addr) - Register(reflect.TypeOf(&netip.Addr{}), mangle_addr_ptr) - Register(reflect.TypeOf(netip.AddrPort{}), mangle_addrport) - Register(reflect.TypeOf(&netip.AddrPort{}), mangle_addrport_ptr) - Register(reflect.TypeOf(time.Time{}), mangle_time) - Register(reflect.TypeOf(&time.Time{}), mangle_time_ptr) - Register(reflect.TypeOf(url.URL{}), mangle_url) - Register(reflect.TypeOf(&url.URL{}), mangle_url_ptr) -} - -//go:linkname time_sec time.(*Time).sec -func time_sec(*time.Time) int64 - -//go:linkname time_nsec time.(*Time).nsec -func time_nsec(*time.Time) int32 - -//go:linkname time_stripMono time.(*Time).stripMono -func time_stripMono(*time.Time) - -func mangle_url(buf []byte, a any) []byte { - u := (*url.URL)(iface_value(a)) - return append(buf, u.String()...) -} - -func mangle_url_ptr(buf []byte, a any) []byte { - if ptr := (*url.URL)(iface_value(a)); ptr != nil { - s := ptr.String() - buf = append(buf, '1') - return append(buf, s...) - } - buf = append(buf, '0') - return buf -} - -func mangle_time(buf []byte, a any) []byte { - t := *(*time.Time)(iface_value(a)) - time_stripMono(&t) // force non-monotonic time value. - buf = append_uint64(buf, uint64(time_sec(&t))) - buf = append_uint32(buf, uint32(time_nsec(&t))) - return buf -} - -func mangle_time_ptr(buf []byte, a any) []byte { - if ptr := (*time.Time)(iface_value(a)); ptr != nil { - t := *ptr - buf = append(buf, '1') - time_stripMono(&t) // force non-monotonic time value. - buf = append_uint64(buf, uint64(time_sec(&t))) - buf = append_uint32(buf, uint32(time_nsec(&t))) - return buf - } - buf = append(buf, '0') - return buf -} - -func mangle_ipaddr(buf []byte, a any) []byte { - i := *(*net.IPAddr)(iface_value(a)) - buf = append(buf, i.IP...) - buf = append(buf, i.Zone...) - return buf -} - -func mangle_ipaddr_ptr(buf []byte, a any) []byte { - if ptr := (*net.IPAddr)(iface_value(a)); ptr != nil { - buf = append(buf, '1') - buf = append(buf, ptr.IP...) - buf = append(buf, ptr.Zone...) - return buf - } - buf = append(buf, '0') - return buf -} - -func mangle_addr(buf []byte, a any) []byte { - i := (*netip.Addr)(iface_value(a)) - b, _ := i.MarshalBinary() - return append(buf, b...) -} - -func mangle_addr_ptr(buf []byte, a any) []byte { - if ptr := (*netip.Addr)(iface_value(a)); ptr != nil { - buf = append(buf, '1') - b, _ := ptr.MarshalBinary() - return append(buf, b...) - } - buf = append(buf, '0') - return buf -} - -func mangle_addrport(buf []byte, a any) []byte { - i := (*netip.AddrPort)(iface_value(a)) - b, _ := i.MarshalBinary() - return append(buf, b...) -} - -func mangle_addrport_ptr(buf []byte, a any) []byte { - if ptr := (*netip.AddrPort)(iface_value(a)); ptr != nil { - buf = append(buf, '1') - b, _ := ptr.MarshalBinary() - return append(buf, b...) - } - buf = append(buf, '0') - return buf -} diff --git a/vendor/codeberg.org/gruf/go-mangler/load.go b/vendor/codeberg.org/gruf/go-mangler/load.go deleted file mode 100644 index 752e5d337..000000000 --- a/vendor/codeberg.org/gruf/go-mangler/load.go +++ /dev/null @@ -1,333 +0,0 @@ -package mangler - -import ( - "reflect" -) - -// loadMangler is the top-most Mangler load function. It guarantees that a Mangler -// function will be returned for given value interface{} and reflected type. Else panics. -func loadMangler(a any, t reflect.Type) Mangler { - // Load mangler function - mng, rmng := load(a, t) - - if mng != nil { - // Use preferred mangler. - return mng - } - - if rmng != nil { - // Wrap reflect mangler to handle iface - return func(buf []byte, a any) []byte { - return rmng(buf, reflect.ValueOf(a)) - } - } - - // No mangler function could be determined - panic("cannot mangle type: " + t.String()) -} - -// load will load a Mangler or reflect Mangler for given type and iface 'a'. -// Note: allocates new interface value if nil provided, i.e. if coming via reflection. -func load(a any, t reflect.Type) (Mangler, rMangler) { - if t == nil { - // There is no reflect type to search by - panic("cannot mangle nil interface{} type") - } - - if a == nil { - // Alloc new iface instance - v := reflect.New(t).Elem() - a = v.Interface() - } - - // Check for Mangled implementation. - if _, ok := a.(Mangled); ok { - return mangle_mangled, nil - } - - // Search mangler by reflection. - mng, rmng := loadReflect(t) - if mng != nil { - return mng, nil - } - - // Prefer iface mangler, else, reflected. - return loadIface(a), rmng -} - -// loadIface is used as a near-last-resort interface{} type switch -// loader for types implementating other known (slower) functions. -func loadIface(a any) Mangler { - switch a.(type) { - case binarymarshaler: - return mangle_binary - case stringer: - return mangle_stringer - case textmarshaler: - return mangle_text - case jsonmarshaler: - return mangle_json - default: - return nil - } -} - -// loadReflect will load a Mangler (or rMangler) function for the given reflected type info. -// NOTE: this is used as the top level load function for nested reflective searches. -func loadReflect(t reflect.Type) (Mangler, rMangler) { - switch t.Kind() { - case reflect.Pointer: - return loadReflectPtr(t.Elem()) - - case reflect.String: - return mangle_string, nil - - case reflect.Array: - return nil, loadReflectArray(t.Elem()) - - case reflect.Slice: - // Element type - et := t.Elem() - - // Preferably look for known slice mangler func - if mng := loadReflectKnownSlice(et); mng != nil { - return mng, nil - } - - // Else handle as array elements - return nil, loadReflectArray(et) - - case reflect.Map: - return nil, loadReflectMap(t.Key(), t.Elem()) - - case reflect.Bool: - return mangle_bool, nil - - case reflect.Int, - reflect.Uint, - reflect.Uintptr: - return mangle_platform_int, nil - - case reflect.Int8, reflect.Uint8: - return mangle_8bit, nil - - case reflect.Int16, reflect.Uint16: - return mangle_16bit, nil - - case reflect.Int32, reflect.Uint32: - return mangle_32bit, nil - - case reflect.Int64, reflect.Uint64: - return mangle_64bit, nil - - case reflect.Float32: - return mangle_32bit, nil - - case reflect.Float64: - return mangle_64bit, nil - - case reflect.Complex64: - return mangle_64bit, nil - - case reflect.Complex128: - return mangle_128bit, nil - - default: - return nil, nil - } -} - -// loadReflectPtr loads a Mangler (or rMangler) function for a ptr's element type. -// This also handles further dereferencing of any further ptr indrections (e.g. ***int). -func loadReflectPtr(et reflect.Type) (Mangler, rMangler) { - count := 1 - - // Iteratively dereference ptrs - for et.Kind() == reflect.Pointer { - et = et.Elem() - count++ - } - - if et.Kind() == reflect.Array { - // Array elem type - at := et.Elem() - - // Special case of addressable (sliceable) array - if mng := loadReflectKnownSlice(at); mng != nil { - rmng := array_to_slice_mangler(mng) - return nil, deref_ptr_rmangler(rmng, count) - } - - // Look for an array mangler function, this will - // access elements by index using reflect.Value and - // pass each one to a separate mangler function. - if rmng := loadReflectArray(at); rmng != nil { - return nil, deref_ptr_rmangler(rmng, count) - } - - return nil, nil - } - - // Try remove a layer of derefs by loading a mangler - // for a known ptr kind. The less reflection the better! - if mng := loadReflectKnownPtr(et); mng != nil { - if count == 1 { - return mng, nil - } - return nil, deref_ptr_mangler(mng, count-1) - } - - // Search for ptr elemn type mangler - if mng, rmng := load(nil, et); mng != nil { - return nil, deref_ptr_mangler(mng, count) - } else if rmng != nil { - return nil, deref_ptr_rmangler(rmng, count) - } - - return nil, nil -} - -// loadReflectKnownPtr loads a Mangler function for a known ptr-of-element type (in this case, primtive ptrs). -func loadReflectKnownPtr(et reflect.Type) Mangler { - switch et.Kind() { - case reflect.String: - return mangle_string_ptr - - case reflect.Bool: - return mangle_bool_ptr - - case reflect.Int, - reflect.Uint, - reflect.Uintptr: - return mangle_platform_int_ptr - - case reflect.Int8, reflect.Uint8: - return mangle_8bit_ptr - - case reflect.Int16, reflect.Uint16: - return mangle_16bit_ptr - - case reflect.Int32, reflect.Uint32: - return mangle_32bit_ptr - - case reflect.Int64, reflect.Uint64: - return mangle_64bit_ptr - - case reflect.Float32: - return mangle_32bit_ptr - - case reflect.Float64: - return mangle_64bit_ptr - - case reflect.Complex64: - return mangle_64bit_ptr - - case reflect.Complex128: - return mangle_128bit_ptr - - default: - return nil - } -} - -// loadReflectKnownSlice loads a Mangler function for a known slice-of-element type (in this case, primtives). -func loadReflectKnownSlice(et reflect.Type) Mangler { - switch et.Kind() { - case reflect.String: - return mangle_string_slice - - case reflect.Bool: - return mangle_bool_slice - - case reflect.Int, - reflect.Uint, - reflect.Uintptr: - return mangle_platform_int_slice - - case reflect.Int8, reflect.Uint8: - return mangle_8bit_slice - - case reflect.Int16, reflect.Uint16: - return mangle_16bit_slice - - case reflect.Int32, reflect.Uint32: - return mangle_32bit_slice - - case reflect.Int64, reflect.Uint64: - return mangle_64bit_slice - - case reflect.Float32: - return mangle_32bit_slice - - case reflect.Float64: - return mangle_64bit_slice - - case reflect.Complex64: - return mangle_64bit_slice - - case reflect.Complex128: - return mangle_128bit_slice - - default: - return nil - } -} - -// loadReflectArray loads an rMangler function for an array (or slice) or given element type. -func loadReflectArray(et reflect.Type) rMangler { - // Search via reflected array element type - if mng, rmng := load(nil, et); mng != nil { - return iter_array_mangler(mng) - } else if rmng != nil { - return iter_array_rmangler(rmng) - } - return nil -} - -// loadReflectMap loads an rMangler function for a map of given key and value types. -func loadReflectMap(kt, vt reflect.Type) rMangler { - var kmng, vmng rMangler - - // Search for mangler for key type - mng, rmng := load(nil, kt) - - switch { - // Wrap key mangler to reflect - case mng != nil: - mng := mng // take our own ptr - kmng = func(buf []byte, v reflect.Value) []byte { - return mng(buf, v.Interface()) - } - - // Use reflect key mangler as-is - case rmng != nil: - kmng = rmng - - // No mangler found - default: - return nil - } - - // Search for mangler for value type - mng, rmng = load(nil, vt) - - switch { - // Wrap value mangler to reflect - case mng != nil: - mng := mng // take our own ptr - vmng = func(buf []byte, v reflect.Value) []byte { - return mng(buf, v.Interface()) - } - - // Use reflect value mangler as-is - case rmng != nil: - vmng = rmng - - // No mangler found - default: - return nil - } - - // Wrap key/value manglers in map iter - return iter_map_rmangler(kmng, vmng) -} diff --git a/vendor/codeberg.org/gruf/go-mangler/mangle.go b/vendor/codeberg.org/gruf/go-mangler/mangle.go deleted file mode 100644 index e12748e67..000000000 --- a/vendor/codeberg.org/gruf/go-mangler/mangle.go +++ /dev/null @@ -1,154 +0,0 @@ -package mangler - -import ( - "reflect" - "sync" - "unsafe" -) - -// manglers is a map of runtime type ptrs => Mangler functions. -var manglers sync.Map - -// Mangled is an interface that allows any type to implement a custom -// Mangler function to improve performance when mangling this type. -type Mangled interface { - Mangle(buf []byte) []byte -} - -// Mangler is a function that will take an input interface value of known -// type, and append it in mangled serialized form to the given byte buffer. -// While the value type is an interface, the Mangler functions are accessed -// by the value's runtime type pointer, allowing the input value type to be known. -type Mangler func(buf []byte, value any) []byte - -// rMangler is functionally the same as a Mangler function, but it -// takes the value input in reflected form. By specifying these differences -// in mangler function types, it allows us to cut back on new calls to -// `reflect.ValueOf()` and instead pass by existing reflected values. -type rMangler func(buf []byte, value reflect.Value) []byte - -// Get will fetch the Mangler function for given runtime type. -// Note that the returned mangler will be a no-op in the case -// that an incorrect type is passed as the value argument. -func Get(t reflect.Type) Mangler { - var mng Mangler - - // Get raw runtime type ptr - uptr := uintptr(iface_value(t)) - - // Look for a cached mangler - v, ok := manglers.Load(uptr) - - if !ok { - // Load mangler function - mng = loadMangler(nil, t) - } else { - // cast cached value - mng = v.(Mangler) - } - - return func(buf []byte, value any) []byte { - // Type check passed value against original arg type. - if vt := reflect.TypeOf(value); vt != t { - return buf - } - - // First write the type ptr (this adds - // a unique prefix for each runtime type). - buf = mangle_platform_int(buf, uptr) - - // Finally, mangle value - return mng(buf, value) - } -} - -// Register will register the given Mangler function for use with vars of given runtime type. This allows -// registering performant manglers for existing types not implementing Mangled (e.g. std library types). -// NOTE: panics if there already exists a Mangler function for given type. Register on init(). -func Register(t reflect.Type, m Mangler) { - if t == nil { - // Nil interface{} types cannot be searched by, do not accept - panic("cannot register mangler for nil interface{} type") - } - - // Get raw runtime type ptr - uptr := uintptr(iface_value(t)) - - // Ensure this is a unique encoder - if _, ok := manglers.Load(uptr); ok { - panic("already registered mangler for type: " + t.String()) - } - - // Cache this encoder func - manglers.Store(uptr, m) -} - -// Append will append the mangled form of input value 'a' to buffer 'b'. -// See mangler.String() for more information on mangled output. -func Append(b []byte, a any) []byte { - var mng Mangler - - // Get reflect type of 'a' - t := reflect.TypeOf(a) - - // Get raw runtime type ptr - uptr := uintptr(iface_value(t)) - - // Look for a cached mangler - v, ok := manglers.Load(uptr) - - if !ok { - // Load mangler into cache - mng = loadMangler(nil, t) - manglers.Store(uptr, mng) - } else { - // cast cached value - mng = v.(Mangler) - } - - // First write the type ptr (this adds - // a unique prefix for each runtime type). - b = mangle_platform_int(b, uptr) - - // Finally, mangle value - return mng(b, a) -} - -// String will return the mangled format of input value 'a'. This -// mangled output will be unique for all default supported input types -// during a single runtime instance. Uniqueness cannot be guaranteed -// between separate runtime instances (whether running concurrently, or -// the same application running at different times). -// -// The exact formatting of the output data should not be relied upon, -// only that it is unique given the above constraints. Generally though, -// the mangled output is the binary formatted text of given input data. -// -// Uniqueness is guaranteed for similar input data of differing types -// (e.g. string("hello world") vs. []byte("hello world")) by prefixing -// mangled output with the input data's runtime type pointer. -// -// Default supported types include: -// - string -// - bool -// - int,int8,int16,int32,int64 -// - uint,uint8,uint16,uint32,uint64,uintptr -// - float32,float64 -// - complex64,complex128 -// - all type aliases of above -// - time.Time{} -// - url.URL{} -// - net.IPAddr{} -// - netip.Addr{}, netip.AddrPort{} -// - mangler.Mangled{} -// - fmt.Stringer{} -// - json.Marshaler{} -// - encoding.BinaryMarshaler{} -// - encoding.TextMarshaler{} -// - all pointers to the above -// - all slices / arrays of the above -// - all map keys / values of the above -func String(a any) string { - b := Append(make([]byte, 0, 32), a) - return *(*string)(unsafe.Pointer(&b)) -} diff --git a/vendor/codeberg.org/gruf/go-mangler/manglers.go b/vendor/codeberg.org/gruf/go-mangler/manglers.go deleted file mode 100644 index b9ba81705..000000000 --- a/vendor/codeberg.org/gruf/go-mangler/manglers.go +++ /dev/null @@ -1,265 +0,0 @@ -package mangler - -import ( - "math/bits" - _ "unsafe" -) - -// Notes: -// the use of unsafe conversion from the direct interface values to -// the chosen types in each of the below functions allows us to convert -// not only those types directly, but anything type-aliased to those -// types. e.g. `time.Duration` directly as int64. - -func mangle_string(buf []byte, a any) []byte { - return append(buf, *(*string)(iface_value(a))...) -} - -func mangle_string_ptr(buf []byte, a any) []byte { - if ptr := (*string)(iface_value(a)); ptr != nil { - buf = append(buf, '1') - return append(buf, *ptr...) - } - buf = append(buf, '0') - return buf -} - -func mangle_string_slice(buf []byte, a any) []byte { - s := *(*[]string)(iface_value(a)) - for _, s := range s { - buf = append(buf, s...) - buf = append(buf, ',') - } - if len(s) > 0 { - buf = buf[:len(buf)-1] - } - return buf -} - -func mangle_bool(buf []byte, a any) []byte { - if *(*bool)(iface_value(a)) { - return append(buf, '1') - } - return append(buf, '0') -} - -func mangle_bool_ptr(buf []byte, a any) []byte { - if ptr := (*bool)(iface_value(a)); ptr != nil { - buf = append(buf, '1') - if *ptr { - return append(buf, '1') - } - return append(buf, '0') - } - buf = append(buf, '0') - return buf -} - -func mangle_bool_slice(buf []byte, a any) []byte { - for _, b := range *(*[]bool)(iface_value(a)) { - if b { - buf = append(buf, '1') - } else { - buf = append(buf, '0') - } - } - return buf -} - -func mangle_8bit(buf []byte, a any) []byte { - return append(buf, *(*uint8)(iface_value(a))) -} - -func mangle_8bit_ptr(buf []byte, a any) []byte { - if ptr := (*uint8)(iface_value(a)); ptr != nil { - buf = append(buf, '1') - return append(buf, *ptr) - } - buf = append(buf, '0') - return buf -} - -func mangle_8bit_slice(buf []byte, a any) []byte { - return append(buf, *(*[]uint8)(iface_value(a))...) -} - -func mangle_16bit(buf []byte, a any) []byte { - return append_uint16(buf, *(*uint16)(iface_value(a))) -} - -func mangle_16bit_ptr(buf []byte, a any) []byte { - if ptr := (*uint16)(iface_value(a)); ptr != nil { - buf = append(buf, '1') - return append_uint16(buf, *ptr) - } - buf = append(buf, '0') - return buf -} - -func mangle_16bit_slice(buf []byte, a any) []byte { - for _, u := range *(*[]uint16)(iface_value(a)) { - buf = append_uint16(buf, u) - } - return buf -} - -func mangle_32bit(buf []byte, a any) []byte { - return append_uint32(buf, *(*uint32)(iface_value(a))) -} - -func mangle_32bit_ptr(buf []byte, a any) []byte { - if ptr := (*uint32)(iface_value(a)); ptr != nil { - buf = append(buf, '1') - return append_uint32(buf, *ptr) - } - buf = append(buf, '0') - return buf -} - -func mangle_32bit_slice(buf []byte, a any) []byte { - for _, u := range *(*[]uint32)(iface_value(a)) { - buf = append_uint32(buf, u) - } - return buf -} - -func mangle_64bit(buf []byte, a any) []byte { - return append_uint64(buf, *(*uint64)(iface_value(a))) -} - -func mangle_64bit_ptr(buf []byte, a any) []byte { - if ptr := (*uint64)(iface_value(a)); ptr != nil { - buf = append(buf, '1') - return append_uint64(buf, *ptr) - } - buf = append(buf, '0') - return buf -} - -func mangle_64bit_slice(buf []byte, a any) []byte { - for _, u := range *(*[]uint64)(iface_value(a)) { - buf = append_uint64(buf, u) - } - return buf -} - -// mangle_platform_int contains the correct iface mangler on runtime for platform int size. -var mangle_platform_int = func() Mangler { - switch bits.UintSize { - case 32: - return mangle_32bit - case 64: - return mangle_64bit - default: - panic("unexpected platform int size") - } -}() - -// mangle_platform_int_ptr contains the correct iface mangler on runtime for platform int size. -var mangle_platform_int_ptr = func() Mangler { - switch bits.UintSize { - case 32: - return mangle_32bit_ptr - case 64: - return mangle_64bit_ptr - default: - panic("unexpected platform int size") - } -}() - -// mangle_platform_int_slice contains the correct iface mangler on runtime for platform int size. -var mangle_platform_int_slice = func() Mangler { - switch bits.UintSize { - case 32: - return mangle_32bit_slice - case 64: - return mangle_64bit_slice - default: - panic("unexpected platform int size") - } -}() - -// uint128 provides an easily mangleable data type for 128bit data types to be cast into. -type uint128 [2]uint64 - -func mangle_128bit(buf []byte, a any) []byte { - u2 := *(*uint128)(iface_value(a)) - buf = append_uint64(buf, u2[0]) - buf = append_uint64(buf, u2[1]) - return buf -} - -func mangle_128bit_ptr(buf []byte, a any) []byte { - if ptr := (*uint128)(iface_value(a)); ptr != nil { - buf = append(buf, '1') - buf = append_uint64(buf, (*ptr)[0]) - buf = append_uint64(buf, (*ptr)[1]) - } - buf = append(buf, '0') - return buf -} - -func mangle_128bit_slice(buf []byte, a any) []byte { - for _, u2 := range *(*[]uint128)(iface_value(a)) { - buf = append_uint64(buf, u2[0]) - buf = append_uint64(buf, u2[1]) - } - return buf -} - -func mangle_mangled(buf []byte, a any) []byte { - if v := a.(Mangled); v != nil { - buf = append(buf, '1') - return v.Mangle(buf) - } - buf = append(buf, '0') - return buf -} - -func mangle_binary(buf []byte, a any) []byte { - if v := a.(binarymarshaler); v != nil { - b, err := v.MarshalBinary() - if err != nil { - panic("mangle_binary: " + err.Error()) - } - buf = append(buf, '1') - return append(buf, b...) - } - buf = append(buf, '0') - return buf -} - -func mangle_stringer(buf []byte, a any) []byte { - if v := a.(stringer); v != nil { - buf = append(buf, '1') - return append(buf, v.String()...) - } - buf = append(buf, '0') - return buf -} - -func mangle_text(buf []byte, a any) []byte { - if v := a.(textmarshaler); v != nil { - b, err := v.MarshalText() - if err != nil { - panic("mangle_text: " + err.Error()) - } - buf = append(buf, '1') - return append(buf, b...) - } - buf = append(buf, '0') - return buf -} - -func mangle_json(buf []byte, a any) []byte { - if v := a.(jsonmarshaler); v != nil { - b, err := v.MarshalJSON() - if err != nil { - panic("mangle_json: " + err.Error()) - } - buf = append(buf, '1') - return append(buf, b...) - } - buf = append(buf, '0') - return buf -} diff --git a/vendor/codeberg.org/gruf/go-structr/README.md b/vendor/codeberg.org/gruf/go-structr/README.md index e2a9bdc15..125b20090 100644 --- a/vendor/codeberg.org/gruf/go-structr/README.md +++ b/vendor/codeberg.org/gruf/go-structr/README.md @@ -2,4 +2,74 @@ A performant struct caching library with automated indexing by arbitrary combinations of fields, including support for negative results (errors!). An example use case is in database lookups. +Some example code of how you can use `go-structr` in your application: +```golang +type Cached struct { + Username string + Domain string + URL string + CountryCode int +} + +var c structr.Cache[*Cached] + +c.Init(structr.Config[*Cached]{ + + // Fields this cached struct type + // will be indexed and stored under. + Indices: []structr.IndexConfig{ + {Fields: "Username,Domain", AllowZero: true}, + {Fields: "URL"}, + {Fields: "CountryCode", Multiple: true}, + }, + + // Maximum LRU cache size before + // new entries cause evictions. + MaxSize: 1000, + + // User provided value copy function to + // reduce need for reflection + ensure + // concurrency safety for returned values. + CopyValue: func(c *Cached) *Cached { + c2 := new(Cached) + *c2 = *c + return c2 + }, + + // User defined invalidation hook. + Invalidate: func(c *Cached) { + log.Println("invalidated:", c) + }, +}) + +var url string + +// Load value from cache, with callback function to hydrate +// cache if value cannot be found under index name with key. +// Negative (error) results are also cached, with user definable +// errors to ignore from caching (e.g. context deadline errs). +value, err := c.LoadOne("URL", func() (*Cached, error) { + return dbType.SelectByURL(url) +}, url) +if err != nil { + return nil, err +} + +// Store value in cache, only if provided callback +// function returns without error. Passes value through +// invalidation hook regardless of error return value. +// +// On success value will be automatically added to and +// accessible under all initially configured indices. +if err := c.Store(value, func() error { + return dbType.Insert(value) +}); err != nil { + return nil, err +} + +// Invalidate all cached results stored under +// provided index name with give field value(s). +c.Invalidate("CountryCode", 42) +``` + This is a core underpinning of [GoToSocial](https://github.com/superseriousbusiness/gotosocial)'s performance. \ No newline at end of file diff --git a/vendor/codeberg.org/gruf/go-structr/cache.go b/vendor/codeberg.org/gruf/go-structr/cache.go index b958fdfdb..fb52f0d8d 100644 --- a/vendor/codeberg.org/gruf/go-structr/cache.go +++ b/vendor/codeberg.org/gruf/go-structr/cache.go @@ -111,8 +111,8 @@ func (c *Cache[T]) Init(config Config[T]) { // provided config. c.mutex.Lock() c.indices = make([]Index[T], len(config.Indices)) - for i, config := range config.Indices { - c.indices[i].init(config) + for i, cfg := range config.Indices { + c.indices[i].init(cfg, config.MaxSize) } c.ignore = config.IgnoreErr c.copy = config.CopyValue @@ -138,7 +138,7 @@ func (c *Cache[T]) GetOne(index string, keyParts ...any) (T, bool) { idx := c.Index(index) // Generate index key from provided parts. - key, ok := idx.keygen.FromParts(keyParts...) + key, ok := idx.hasher.FromParts(keyParts...) if !ok { var zero T return zero, false @@ -149,7 +149,7 @@ func (c *Cache[T]) GetOne(index string, keyParts ...any) (T, bool) { } // GetOneBy fetches value from cache stored under index, using precalculated index key. -func (c *Cache[T]) GetOneBy(index *Index[T], key string) (T, bool) { +func (c *Cache[T]) GetOneBy(index *Index[T], key uint64) (T, bool) { if index == nil { panic("no index given") } else if !index.unique { @@ -170,37 +170,33 @@ func (c *Cache[T]) Get(index string, keysParts ...[]any) []T { idx := c.Index(index) // Preallocate expected keys slice length. - keys := make([]string, 0, len(keysParts)) + keys := make([]uint64, 0, len(keysParts)) - // Acquire buf. - buf := getBuf() + // Acquire hasher. + h := getHasher() for _, parts := range keysParts { - // Reset buf. - buf.Reset() + h.Reset() // Generate key from provided parts into buffer. - if !idx.keygen.AppendFromParts(buf, parts...) { + key, ok := idx.hasher.fromParts(h, parts...) + if !ok { continue } - // Get string copy of - // genarated idx key. - key := string(buf.B) - - // Append key to keys. + // Append hash sum to keys. keys = append(keys, key) } - // Done with buf. - putBuf(buf) + // Done with h. + putHasher(h) // Continue fetching values. return c.GetBy(idx, keys...) } // GetBy fetches values from the cache stored under index, using precalculated index keys. -func (c *Cache[T]) GetBy(index *Index[T], keys ...string) []T { +func (c *Cache[T]) GetBy(index *Index[T], keys ...uint64) []T { if index == nil { panic("no index given") } @@ -265,7 +261,7 @@ func (c *Cache[T]) Put(values ...T) { // Store all the passed values. for _, value := range values { - c.store(nil, "", value, nil) + c.store(nil, 0, value, nil) } // Done with lock. @@ -288,7 +284,7 @@ func (c *Cache[T]) LoadOne(index string, load func() (T, error), keyParts ...any idx := c.Index(index) // Generate cache from from provided parts. - key, _ := idx.keygen.FromParts(keyParts...) + key, _ := idx.hasher.FromParts(keyParts...) // Continue loading this result. return c.LoadOneBy(idx, load, key) @@ -296,7 +292,7 @@ func (c *Cache[T]) LoadOne(index string, load func() (T, error), keyParts ...any // LoadOneBy fetches one result from the cache stored under index, using precalculated index key. // In the case that no result is found, provided load callback will be used to hydrate the cache. -func (c *Cache[T]) LoadOneBy(index *Index[T], load func() (T, error), key string) (T, error) { +func (c *Cache[T]) LoadOneBy(index *Index[T], load func() (T, error), key uint64) (T, error) { if index == nil { panic("no index given") } else if !index.unique { @@ -421,26 +417,21 @@ func (c *Cache[T]) LoadBy(index *Index[T], get func(load func(keyParts ...any) b } }() - // Acquire buf. - buf := getBuf() + // Acquire hasher. + h := getHasher() // Pass cache check to user func. get(func(keyParts ...any) bool { - - // Reset buf. - buf.Reset() + h.Reset() // Generate index key from provided key parts. - if !index.keygen.AppendFromParts(buf, keyParts...) { + key, ok := index.hasher.fromParts(h, keyParts...) + if !ok { return false } - // Get temp generated key str, - // (not needed after return). - keyStr := buf.String() - // Get all indexed results. - list := index.data[keyStr] + list := index.data[key] if list != nil && list.len > 0 { // Value length before @@ -471,8 +462,8 @@ func (c *Cache[T]) LoadBy(index *Index[T], get func(load func(keyParts ...any) b return false }) - // Done with buf. - putBuf(buf) + // Done with h. + putHasher(h) // Done with lock. c.mutex.Unlock() @@ -528,7 +519,7 @@ func (c *Cache[T]) Invalidate(index string, keyParts ...any) { idx := c.Index(index) // Generate cache from from provided parts. - key, ok := idx.keygen.FromParts(keyParts...) + key, ok := idx.hasher.FromParts(keyParts...) if !ok { return } @@ -538,7 +529,7 @@ func (c *Cache[T]) Invalidate(index string, keyParts ...any) { } // InvalidateBy invalidates all results stored under index key. -func (c *Cache[T]) InvalidateBy(index *Index[T], key string) { +func (c *Cache[T]) InvalidateBy(index *Index[T], key uint64) { if index == nil { panic("no index given") } @@ -639,7 +630,7 @@ func (c *Cache[T]) Cap() int { // store will store the given value / error result in the cache, storing it under the // already provided index + key if provided, else generating keys from provided value. -func (c *Cache[T]) store(index *Index[T], key string, value T, err error) { +func (c *Cache[T]) store(index *Index[T], key uint64, value T, err error) { // Acquire new result. res := result_acquire(c) @@ -671,8 +662,8 @@ func (c *Cache[T]) store(index *Index[T], key string, value T, err error) { // value, used during cache key gen. rvalue := reflect.ValueOf(value) - // Acquire buf. - buf := getBuf() + // Acquire hasher. + h := getHasher() for i := range c.indices { // Get current index ptr. @@ -684,22 +675,20 @@ func (c *Cache[T]) store(index *Index[T], key string, value T, err error) { continue } - // Generate key from reflect value, + // Generate hash from reflect value, // (this ignores zero value keys). - buf.Reset() // reset buf first - if !idx.keygen.appendFromRValue(buf, rvalue) { + h.Reset() // reset buf first + key, ok := idx.hasher.fromRValue(h, rvalue) + if !ok { continue } - // Alloc key copy. - key := string(buf.B) - // Append result to index at key. index_append(c, idx, key, res) } - // Done with buf. - putBuf(buf) + // Done with h. + putHasher(h) } if c.lruList.len > c.maxSize { diff --git a/vendor/codeberg.org/gruf/go-structr/hash.go b/vendor/codeberg.org/gruf/go-structr/hash.go new file mode 100644 index 000000000..84f0e62fc --- /dev/null +++ b/vendor/codeberg.org/gruf/go-structr/hash.go @@ -0,0 +1,370 @@ +package structr + +import ( + "reflect" + "unsafe" + + "github.com/zeebo/xxh3" +) + +func hasher(t reflect.Type) func(*xxh3.Hasher, any) bool { + switch t.Kind() { + case reflect.Int, + reflect.Uint, + reflect.Uintptr: + switch unsafe.Sizeof(int(0)) { + case 4: + return hash32bit + case 8: + return hash64bit + default: + panic("unexpected platform int size") + } + + case reflect.Int8, + reflect.Uint8: + return hash8bit + + case reflect.Int16, + reflect.Uint16: + return hash16bit + + case reflect.Int32, + reflect.Uint32, + reflect.Float32: + return hash32bit + + case reflect.Int64, + reflect.Uint64, + reflect.Float64, + reflect.Complex64: + return hash64bit + + case reflect.String: + return hashstring + + case reflect.Pointer: + switch t.Elem().Kind() { + case reflect.Int, + reflect.Uint, + reflect.Uintptr: + switch unsafe.Sizeof(int(0)) { + case 4: + return hash32bitptr + case 8: + return hash64bitptr + default: + panic("unexpected platform int size") + } + + case reflect.Int8, + reflect.Uint8: + return hash8bitptr + + case reflect.Int16, + reflect.Uint16: + return hash16bitptr + + case reflect.Int32, + reflect.Uint32, + reflect.Float32: + return hash32bitptr + + case reflect.Int64, + reflect.Uint64, + reflect.Float64, + reflect.Complex64: + return hash64bitptr + + case reflect.String: + return hashstringptr + } + + case reflect.Slice: + switch t.Elem().Kind() { + case reflect.Int, + reflect.Uint, + reflect.Uintptr: + switch unsafe.Sizeof(int(0)) { + case 4: + return hash32bitslice + case 8: + return hash64bitslice + default: + panic("unexpected platform int size") + } + + case reflect.Int8, + reflect.Uint8: + return hash8bitslice + + case reflect.Int16, + reflect.Uint16: + return hash16bitslice + + case reflect.Int32, + reflect.Uint32, + reflect.Float32: + return hash32bitslice + + case reflect.Int64, + reflect.Uint64, + reflect.Float64, + reflect.Complex64: + return hash64bitslice + + case reflect.String: + return hashstringslice + } + } + switch { + case t.Implements(reflect.TypeOf((*interface{ MarshalBinary() ([]byte, error) })(nil)).Elem()): + return hashbinarymarshaler + + case t.Implements(reflect.TypeOf((*interface{ Bytes() []byte })(nil)).Elem()): + return hashbytesmethod + + case t.Implements(reflect.TypeOf((*interface{ String() string })(nil)).Elem()): + return hashstringmethod + + case t.Implements(reflect.TypeOf((*interface{ MarshalText() ([]byte, error) })(nil)).Elem()): + return hashtextmarshaler + + case t.Implements(reflect.TypeOf((*interface{ MarshalJSON() ([]byte, error) })(nil)).Elem()): + return hashjsonmarshaler + } + panic("unhashable type") +} + +func hash8bit(h *xxh3.Hasher, a any) bool { + u := *(*uint8)(iface_value(a)) + _, _ = h.Write([]byte{u}) + return u == 0 +} + +func hash8bitptr(h *xxh3.Hasher, a any) bool { + u := (*uint8)(iface_value(a)) + if u == nil { + _, _ = h.Write([]byte{ + 0, + }) + return true + } else { + _, _ = h.Write([]byte{ + 1, + byte(*u), + }) + return false + } +} + +func hash8bitslice(h *xxh3.Hasher, a any) bool { + b := *(*[]byte)(iface_value(a)) + _, _ = h.Write(b) + return b == nil +} + +func hash16bit(h *xxh3.Hasher, a any) bool { + u := *(*uint16)(iface_value(a)) + _, _ = h.Write([]byte{ + byte(u), + byte(u >> 8), + }) + return u == 0 +} + +func hash16bitptr(h *xxh3.Hasher, a any) bool { + u := (*uint16)(iface_value(a)) + if u == nil { + _, _ = h.Write([]byte{ + 0, + }) + return true + } else { + _, _ = h.Write([]byte{ + 1, + byte(*u), + byte(*u >> 8), + }) + return false + } +} + +func hash16bitslice(h *xxh3.Hasher, a any) bool { + u := *(*[]uint16)(iface_value(a)) + for i := range u { + _, _ = h.Write([]byte{ + byte(u[i]), + byte(u[i] >> 8), + }) + } + return u == nil +} + +func hash32bit(h *xxh3.Hasher, a any) bool { + u := *(*uint32)(iface_value(a)) + _, _ = h.Write([]byte{ + byte(u), + byte(u >> 8), + byte(u >> 16), + byte(u >> 24), + }) + return u == 0 +} + +func hash32bitptr(h *xxh3.Hasher, a any) bool { + u := (*uint32)(iface_value(a)) + if u == nil { + _, _ = h.Write([]byte{ + 0, + }) + return true + } else { + _, _ = h.Write([]byte{ + 1, + byte(*u), + byte(*u >> 8), + byte(*u >> 16), + byte(*u >> 24), + }) + return false + } +} + +func hash32bitslice(h *xxh3.Hasher, a any) bool { + u := *(*[]uint32)(iface_value(a)) + for i := range u { + _, _ = h.Write([]byte{ + byte(u[i]), + byte(u[i] >> 8), + byte(u[i] >> 16), + byte(u[i] >> 24), + }) + } + return u == nil +} + +func hash64bit(h *xxh3.Hasher, a any) bool { + u := *(*uint64)(iface_value(a)) + _, _ = h.Write([]byte{ + byte(u), + byte(u >> 8), + byte(u >> 16), + byte(u >> 24), + byte(u >> 32), + byte(u >> 40), + byte(u >> 48), + byte(u >> 56), + }) + return u == 0 +} + +func hash64bitptr(h *xxh3.Hasher, a any) bool { + u := (*uint64)(iface_value(a)) + if u == nil { + _, _ = h.Write([]byte{ + 0, + }) + return true + } else { + _, _ = h.Write([]byte{ + 1, + byte(*u), + byte(*u >> 8), + byte(*u >> 16), + byte(*u >> 24), + byte(*u >> 32), + byte(*u >> 40), + byte(*u >> 48), + byte(*u >> 56), + }) + return false + } +} + +func hash64bitslice(h *xxh3.Hasher, a any) bool { + u := *(*[]uint64)(iface_value(a)) + for i := range u { + _, _ = h.Write([]byte{ + byte(u[i]), + byte(u[i] >> 8), + byte(u[i] >> 16), + byte(u[i] >> 24), + byte(u[i] >> 32), + byte(u[i] >> 40), + byte(u[i] >> 48), + byte(u[i] >> 56), + }) + } + return u == nil +} + +func hashstring(h *xxh3.Hasher, a any) bool { + s := *(*string)(iface_value(a)) + _, _ = h.WriteString(s) + return s == "" +} + +func hashstringptr(h *xxh3.Hasher, a any) bool { + s := (*string)(iface_value(a)) + if s == nil { + _, _ = h.Write([]byte{ + 0, + }) + return true + } else { + _, _ = h.Write([]byte{ + 1, + }) + _, _ = h.WriteString(*s) + return false + } +} + +func hashstringslice(h *xxh3.Hasher, a any) bool { + s := *(*[]string)(iface_value(a)) + for i := range s { + _, _ = h.WriteString(s[i]) + } + return s == nil +} + +func hashbinarymarshaler(h *xxh3.Hasher, a any) bool { + i := a.(interface{ MarshalBinary() ([]byte, error) }) + b, _ := i.MarshalBinary() + _, _ = h.Write(b) + return b == nil +} + +func hashbytesmethod(h *xxh3.Hasher, a any) bool { + i := a.(interface{ Bytes() []byte }) + b := i.Bytes() + _, _ = h.Write(b) + return b == nil +} + +func hashstringmethod(h *xxh3.Hasher, a any) bool { + i := a.(interface{ String() string }) + s := i.String() + _, _ = h.WriteString(s) + return s == "" +} + +func hashtextmarshaler(h *xxh3.Hasher, a any) bool { + i := a.(interface{ MarshalText() ([]byte, error) }) + b, _ := i.MarshalText() + _, _ = h.Write(b) + return b == nil +} + +func hashjsonmarshaler(h *xxh3.Hasher, a any) bool { + i := a.(interface{ MarshalJSON() ([]byte, error) }) + b, _ := i.MarshalJSON() + _, _ = h.Write(b) + return b == nil +} + +func iface_value(a any) unsafe.Pointer { + type eface struct{ _, v unsafe.Pointer } + return (*eface)(unsafe.Pointer(&a)).v +} diff --git a/vendor/codeberg.org/gruf/go-structr/hasher.go b/vendor/codeberg.org/gruf/go-structr/hasher.go new file mode 100644 index 000000000..77b8a0991 --- /dev/null +++ b/vendor/codeberg.org/gruf/go-structr/hasher.go @@ -0,0 +1,176 @@ +package structr + +import ( + "reflect" + "strings" + + "github.com/zeebo/xxh3" +) + +// Hasher provides hash checksumming for a configured +// index, based on an arbitrary combination of generic +// paramter struct type's fields. This provides hashing +// both by input of the fields separately, or passing +// an instance of the generic paramter struct type. +// +// Supported field types by the hasher include: +// - ~int +// - ~int8 +// - ~int16 +// - ~int32 +// - ~int64 +// - ~float32 +// - ~float64 +// - ~string +// - slices / ptrs of the above +type Hasher[StructType any] struct { + + // fields contains our representation + // of struct fields contained in the + // creation of sums by this hasher. + fields []structfield + + // zero specifies whether zero + // value fields are permitted. + zero bool +} + +// NewHasher returns a new initialized Hasher for the receiving generic +// parameter type, comprising of the given field strings, and whether to +// allow zero values to be incldued within generated hash checksum values. +func NewHasher[T any](fields []string, allowZero bool) Hasher[T] { + var h Hasher[T] + + // Preallocate expected struct field slice. + h.fields = make([]structfield, len(fields)) + + // Get the reflected struct ptr type. + t := reflect.TypeOf((*T)(nil)).Elem() + + for i, fieldName := range fields { + // Split name to account for nesting. + names := strings.Split(fieldName, ".") + + // Look for a usable struct field from type. + sfield, ok := findField(t, names, allowZero) + if !ok { + panicf("failed finding field: %s", fieldName) + } + + // Set parsed struct field. + h.fields[i] = sfield + } + + // Set config flags. + h.zero = allowZero + + return h +} + +// FromParts generates hash checksum (used as index key) from individual key parts. +func (h *Hasher[T]) FromParts(parts ...any) (sum uint64, ok bool) { + hh := getHasher() + sum, ok = h.fromParts(hh, parts...) + putHasher(hh) + return + +} + +func (h *Hasher[T]) fromParts(hh *xxh3.Hasher, parts ...any) (sum uint64, ok bool) { + if len(parts) != len(h.fields) { + // User must provide correct number of parts for key. + panicf("incorrect number key parts: want=%d received=%d", + len(parts), + len(h.fields), + ) + } + + if h.zero { + // Zero values are permitted, + // mangle all values and ignore + // zero value return booleans. + for i, part := range parts { + + // Write mangled part to hasher. + _ = h.fields[i].hasher(hh, part) + } + } else { + // Zero values are NOT permitted. + for i, part := range parts { + + // Write mangled field to hasher. + z := h.fields[i].hasher(hh, part) + + if z { + // The value was zero for + // this type, return early. + return 0, false + } + } + } + + return hh.Sum64(), true +} + +// FromValue generates hash checksum (used as index key) from a value, via reflection. +func (h *Hasher[T]) FromValue(value T) (sum uint64, ok bool) { + rvalue := reflect.ValueOf(value) + hh := getHasher() + sum, ok = h.fromRValue(hh, rvalue) + putHasher(hh) + return +} + +func (h *Hasher[T]) fromRValue(hh *xxh3.Hasher, rvalue reflect.Value) (uint64, bool) { + // Follow any ptrs leading to value. + for rvalue.Kind() == reflect.Pointer { + rvalue = rvalue.Elem() + } + + if h.zero { + // Zero values are permitted, + // mangle all values and ignore + // zero value return booleans. + for i := range h.fields { + + // Get the reflect value's field at idx. + fv := rvalue.FieldByIndex(h.fields[i].index) + fi := fv.Interface() + + // Write mangled field to hasher. + _ = h.fields[i].hasher(hh, fi) + } + } else { + // Zero values are NOT permitted. + for i := range h.fields { + + // Get the reflect value's field at idx. + fv := rvalue.FieldByIndex(h.fields[i].index) + fi := fv.Interface() + + // Write mangled field to hasher. + z := h.fields[i].hasher(hh, fi) + + if z { + // The value was zero for + // this type, return early. + return 0, false + } + } + } + + return hh.Sum64(), true +} + +type structfield struct { + // index is the reflected index + // of this field (this takes into + // account struct nesting). + index []int + + // hasher is the relevant function + // for hashing value of structfield + // into the supplied hashbuf, where + // return value indicates if zero. + hasher func(*xxh3.Hasher, any) bool +} diff --git a/vendor/codeberg.org/gruf/go-structr/index.go b/vendor/codeberg.org/gruf/go-structr/index.go index bacf6142e..4999249f5 100644 --- a/vendor/codeberg.org/gruf/go-structr/index.go +++ b/vendor/codeberg.org/gruf/go-structr/index.go @@ -45,12 +45,12 @@ type Index[StructType any] struct { // string value of contained fields. name string - // struct field key serializer. - keygen KeyGen[StructType] + // struct field key hasher. + hasher Hasher[StructType] // backing in-memory data store of // generated index keys to result lists. - data map[string]*list[*result[StructType]] + data map[uint64]*list[*result[StructType]] // whether to allow // multiple results @@ -59,20 +59,20 @@ type Index[StructType any] struct { } // init initializes this index with the given configuration. -func (i *Index[T]) init(config IndexConfig) { +func (i *Index[T]) init(config IndexConfig, max int) { fields := strings.Split(config.Fields, ",") i.name = config.Fields - i.keygen = NewKeyGen[T](fields, config.AllowZero) + i.hasher = NewHasher[T](fields, config.AllowZero) i.unique = !config.Multiple - i.data = make(map[string]*list[*result[T]]) + i.data = make(map[uint64]*list[*result[T]], max+1) } -// KeyGen returns the key generator associated with this index. -func (i *Index[T]) KeyGen() *KeyGen[T] { - return &i.keygen +// Hasher returns the hash checksummer associated with this index. +func (i *Index[T]) Hasher() *Hasher[T] { + return &i.hasher } -func index_append[T any](c *Cache[T], i *Index[T], key string, res *result[T]) { +func index_append[T any](c *Cache[T], i *Index[T], key uint64, res *result[T]) { // Acquire + setup indexkey. ikey := indexkey_acquire(c) ikey.entry.Value = res @@ -138,7 +138,7 @@ func index_deleteOne[T any](c *Cache[T], i *Index[T], ikey *indexkey[T]) { } } -func index_delete[T any](c *Cache[T], i *Index[T], key string, fn func(*result[T])) { +func index_delete[T any](c *Cache[T], i *Index[T], key uint64, fn func(*result[T])) { if fn == nil { panic("nil fn") } @@ -180,7 +180,7 @@ type indexkey[T any] struct { // key is the generated index key // the related result is indexed // under, in the below index. - key string + key uint64 // index is the index that the // related result is indexed in. @@ -205,7 +205,7 @@ func indexkey_acquire[T any](c *Cache[T]) *indexkey[T] { func indexkey_release[T any](c *Cache[T], ikey *indexkey[T]) { // Reset indexkey. ikey.entry.Value = nil - ikey.key = "" + ikey.key = 0 ikey.index = nil // Release indexkey to memory pool. diff --git a/vendor/codeberg.org/gruf/go-structr/key.go b/vendor/codeberg.org/gruf/go-structr/key.go deleted file mode 100644 index 557a5f033..000000000 --- a/vendor/codeberg.org/gruf/go-structr/key.go +++ /dev/null @@ -1,204 +0,0 @@ -package structr - -import ( - "reflect" - "strings" - - "codeberg.org/gruf/go-byteutil" - "codeberg.org/gruf/go-mangler" -) - -// KeyGen is the underlying index key generator -// used within Index, and therefore Cache itself. -type KeyGen[StructType any] struct { - - // fields contains our representation of - // the struct fields contained in the - // creation of keys by this generator. - fields []structfield - - // zero specifies whether zero - // value fields are permitted. - zero bool -} - -// NewKeyGen returns a new initialized KeyGen for the receiving generic -// parameter type, comprising of the given field strings, and whether to -// allow zero values to be included within generated output strings. -func NewKeyGen[T any](fields []string, allowZero bool) KeyGen[T] { - var kgen KeyGen[T] - - // Preallocate expected struct field slice. - kgen.fields = make([]structfield, len(fields)) - - // Get the reflected struct ptr type. - t := reflect.TypeOf((*T)(nil)).Elem() - - for i, fieldName := range fields { - // Split name to account for nesting. - names := strings.Split(fieldName, ".") - - // Look for a usable struct field from type. - sfield, ok := findField(t, names, allowZero) - if !ok { - panicf("failed finding field: %s", fieldName) - } - - // Set parsed struct field. - kgen.fields[i] = sfield - } - - // Set config flags. - kgen.zero = allowZero - - return kgen -} - -// FromParts generates key string from individual key parts. -func (kgen *KeyGen[T]) FromParts(parts ...any) (key string, ok bool) { - buf := getBuf() - if ok = kgen.AppendFromParts(buf, parts...); ok { - key = string(buf.B) - } - putBuf(buf) - return -} - -// FromValue generates key string from a value, via reflection. -func (kgen *KeyGen[T]) FromValue(value T) (key string, ok bool) { - buf := getBuf() - rvalue := reflect.ValueOf(value) - if ok = kgen.appendFromRValue(buf, rvalue); ok { - key = string(buf.B) - } - putBuf(buf) - return -} - -// AppendFromParts generates key string into provided buffer, from individual key parts. -func (kgen *KeyGen[T]) AppendFromParts(buf *byteutil.Buffer, parts ...any) bool { - if len(parts) != len(kgen.fields) { - // User must provide correct number of parts for key. - panicf("incorrect number key parts: want=%d received=%d", - len(parts), - len(kgen.fields), - ) - } - - if kgen.zero { - // Zero values are permitted, - // mangle all values and ignore - // zero value return booleans. - for i, part := range parts { - - // Mangle this value into buffer. - _ = kgen.fields[i].Mangle(buf, part) - - // Append part separator. - buf.B = append(buf.B, '.') - } - } else { - // Zero values are NOT permitted. - for i, part := range parts { - - // Mangle this value into buffer. - z := kgen.fields[i].Mangle(buf, part) - - if z { - // The value was zero for - // this type, return early. - return false - } - - // Append part separator. - buf.B = append(buf.B, '.') - } - } - - // Drop the last separator. - buf.B = buf.B[:len(buf.B)-1] - - return true -} - -// AppendFromValue generates key string into provided buffer, from a value via reflection. -func (kgen *KeyGen[T]) AppendFromValue(buf *byteutil.Buffer, value T) bool { - return kgen.appendFromRValue(buf, reflect.ValueOf(value)) -} - -// appendFromRValue is the underlying generator function for the exported ___FromValue() functions, -// accepting a reflected input. We do not expose this as the reflected value is EXPECTED to be right. -func (kgen *KeyGen[T]) appendFromRValue(buf *byteutil.Buffer, rvalue reflect.Value) bool { - // Follow any ptrs leading to value. - for rvalue.Kind() == reflect.Pointer { - rvalue = rvalue.Elem() - } - - if kgen.zero { - // Zero values are permitted, - // mangle all values and ignore - // zero value return booleans. - for i := range kgen.fields { - - // Get the reflect value's field at idx. - fv := rvalue.FieldByIndex(kgen.fields[i].index) - fi := fv.Interface() - - // Mangle this value into buffer. - _ = kgen.fields[i].Mangle(buf, fi) - - // Append part separator. - buf.B = append(buf.B, '.') - } - } else { - // Zero values are NOT permitted. - for i := range kgen.fields { - - // Get the reflect value's field at idx. - fv := rvalue.FieldByIndex(kgen.fields[i].index) - fi := fv.Interface() - - // Mangle this value into buffer. - z := kgen.fields[i].Mangle(buf, fi) - - if z { - // The value was zero for - // this type, return early. - return false - } - - // Append part separator. - buf.B = append(buf.B, '.') - } - } - - // Drop the last separator. - buf.B = buf.B[:len(buf.B)-1] - - return true -} - -type structfield struct { - // index is the reflected index - // of this field (this takes into - // account struct nesting). - index []int - - // zero is the possible mangled - // zero value for this field. - zero string - - // mangler is the mangler function for - // serializing values of this field. - mangler mangler.Mangler -} - -// Mangle mangles the given value, using the determined type-appropriate -// field's type. The returned boolean indicates whether this is a zero value. -func (f *structfield) Mangle(buf *byteutil.Buffer, value any) (isZero bool) { - s := len(buf.B) // start pos. - buf.B = f.mangler(buf.B, value) - e := len(buf.B) // end pos. - isZero = (f.zero == string(buf.B[s:e])) - return -} diff --git a/vendor/codeberg.org/gruf/go-structr/util.go b/vendor/codeberg.org/gruf/go-structr/util.go index d8f227baf..01ac06cf1 100644 --- a/vendor/codeberg.org/gruf/go-structr/util.go +++ b/vendor/codeberg.org/gruf/go-structr/util.go @@ -7,8 +7,7 @@ import ( "unicode" "unicode/utf8" - "codeberg.org/gruf/go-byteutil" - "codeberg.org/gruf/go-mangler" + "github.com/zeebo/xxh3" ) // findField will search for a struct field with given set of names, where names is a len > 0 slice of names account for nesting. @@ -68,22 +67,8 @@ func findField(t reflect.Type, names []string, allowZero bool) (sfield structfie t = field.Type } - // Get final type mangler func. - sfield.mangler = mangler.Get(t) - - if allowZero { - var buf []byte - - // Allocate field instance. - v := reflect.New(field.Type) - v = v.Elem() - - // Serialize this zero value into buf. - buf = sfield.mangler(buf, v.Interface()) - - // Set zero value str. - sfield.zero = string(buf) - } + // Get final type hash func. + sfield.hasher = hasher(t) return } @@ -93,26 +78,21 @@ func panicf(format string, args ...any) { panic(fmt.Sprintf(format, args...)) } -// bufpool provides a memory pool of byte -// buffers used when encoding key types. -var bufPool sync.Pool +// hashPool provides a memory pool of xxh3 +// hasher objects used indexing field vals. +var hashPool sync.Pool -// getBuf fetches buffer from memory pool. -func getBuf() *byteutil.Buffer { - v := bufPool.Get() +// gethashbuf fetches hasher from memory pool. +func getHasher() *xxh3.Hasher { + v := hashPool.Get() if v == nil { - buf := new(byteutil.Buffer) - buf.B = make([]byte, 0, 512) - v = buf + v = new(xxh3.Hasher) } - return v.(*byteutil.Buffer) + return v.(*xxh3.Hasher) } -// putBuf replaces buffer in memory pool. -func putBuf(buf *byteutil.Buffer) { - if buf.Cap() > int(^uint16(0)) { - return // drop large bufs - } - buf.Reset() - bufPool.Put(buf) +// putHasher replaces hasher in memory pool. +func putHasher(h *xxh3.Hasher) { + h.Reset() + hashPool.Put(h) } diff --git a/vendor/github.com/zeebo/xxh3/.gitignore b/vendor/github.com/zeebo/xxh3/.gitignore new file mode 100644 index 000000000..928e12f53 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/.gitignore @@ -0,0 +1,6 @@ +upstream +*.pprof +xxh3.test +.vscode +*.txt +_compat diff --git a/vendor/github.com/zeebo/xxh3/LICENSE b/vendor/github.com/zeebo/xxh3/LICENSE new file mode 100644 index 000000000..477f8e5e1 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/LICENSE @@ -0,0 +1,25 @@ +xxHash Library +Copyright (c) 2012-2014, Yann Collet +Copyright (c) 2019, Jeff Wendling +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/zeebo/xxh3/Makefile b/vendor/github.com/zeebo/xxh3/Makefile new file mode 100644 index 000000000..8bd78c482 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/Makefile @@ -0,0 +1,27 @@ +.PHONY: all vet +all: genasm _compat + +genasm: avo/avx.go avo/sse.go + cd ./avo; go generate gen.go + +clean: + rm accum_vector_avx_amd64.s + rm accum_vector_sse_amd64.s + rm _compat + +upstream/xxhash.o: upstream/xxhash.h + ( cd upstream && make ) + +_compat: _compat.c upstream/xxhash.o + gcc -o _compat _compat.c ./upstream/xxhash.o + +vet: + GOOS=linux GOARCH=386 GO386=softfloat go vet ./... + GOOS=windows GOARCH=386 GO386=softfloat go vet ./... + GOOS=linux GOARCH=amd64 go vet ./... + GOOS=windows GOARCH=amd64 go vet ./... + GOOS=darwin GOARCH=amd64 go vet ./... + GOOS=linux GOARCH=arm go vet ./... + GOOS=linux GOARCH=arm64 go vet ./... + GOOS=windows GOARCH=arm64 go vet ./... + GOOS=darwin GOARCH=arm64 go vet ./... \ No newline at end of file diff --git a/vendor/github.com/zeebo/xxh3/README.md b/vendor/github.com/zeebo/xxh3/README.md new file mode 100644 index 000000000..4633fc03a --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/README.md @@ -0,0 +1,38 @@ +# XXH3 +[![GoDoc](https://godoc.org/github.com/zeebo/xxh3?status.svg)](https://godoc.org/github.com/zeebo/xxh3) +[![Sourcegraph](https://sourcegraph.com/github.com/zeebo/xxh3/-/badge.svg)](https://sourcegraph.com/github.com/zeebo/xxh3?badge) +[![Go Report Card](https://goreportcard.com/badge/github.com/zeebo/xxh3)](https://goreportcard.com/report/github.com/zeebo/xxh3) + +This package is a port of the [xxh3](https://github.com/Cyan4973/xxHash) library to Go. + +Upstream has fixed the output as of v0.8.0, and this package matches that. + +--- + +# Benchmarks + +Run on my `i7-8850H CPU @ 2.60GHz` + +## Small Sizes + +| Bytes | Rate | +|-----------|--------------------------------------| +|` 0 ` |` 0.74 ns/op ` | +|` 1-3 ` |` 4.19 ns/op (0.24 GB/s - 0.71 GB/s) `| +|` 4-8 ` |` 4.16 ns/op (0.97 GB/s - 1.98 GB/s) `| +|` 9-16 ` |` 4.46 ns/op (2.02 GB/s - 3.58 GB/s) `| +|` 17-32 ` |` 6.22 ns/op (2.76 GB/s - 5.15 GB/s) `| +|` 33-64 ` |` 8.00 ns/op (4.13 GB/s - 8.13 GB/s) `| +|` 65-96 ` |` 11.0 ns/op (5.91 GB/s - 8.84 GB/s) `| +|` 97-128 ` |` 12.8 ns/op (7.68 GB/s - 10.0 GB/s) `| + +## Large Sizes + +| Bytes | Rate | SSE2 Rate | AVX2 Rate | +|---------|--------------------------|--------------------------|--------------------------| +|` 129 ` |` 13.6 ns/op (9.45 GB/s) `| | | +|` 240 ` |` 23.8 ns/op (10.1 GB/s) `| | | +|` 241 ` |` 40.5 ns/op (5.97 GB/s) `|` 23.3 ns/op (10.4 GB/s) `|` 20.1 ns/op (12.0 GB/s) `| +|` 512 ` |` 69.8 ns/op (7.34 GB/s) `|` 30.4 ns/op (16.9 GB/s) `|` 24.7 ns/op (20.7 GB/s) `| +|` 1024 ` |` 132 ns/op (7.77 GB/s) `|` 48.9 ns/op (20.9 GB/s) `|` 37.7 ns/op (27.2 GB/s) `| +|` 100KB `|` 13.0 us/op (7.88 GB/s) `|` 4.05 us/op (25.3 GB/s) `|` 2.31 us/op (44.3 GB/s) `| diff --git a/vendor/github.com/zeebo/xxh3/_compat.c b/vendor/github.com/zeebo/xxh3/_compat.c new file mode 100644 index 000000000..fda9f36ff --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/_compat.c @@ -0,0 +1,39 @@ +#include "upstream/xxhash.h" +#include + +int main() { + unsigned char buf[4096]; + for (int i = 0; i < 4096; i++) { + buf[i] = (unsigned char)((i+1)%251); + } + + printf("var testVecs64 = []uint64{\n"); + for (int i = 0; i < 4096; i++) { + if (i % 4 == 0) { + printf("\t"); + } + + uint64_t h = XXH3_64bits(buf, (size_t)i); + printf("0x%lx, ", h); + + if (i % 4 == 3) { + printf("\n\t"); + } + } + printf("}\n\n"); + + printf("var testVecs128 = [][2]uint64{\n"); + for (int i = 0; i < 4096; i++) { + if (i % 4 == 0) { + printf("\t"); + } + + XXH128_hash_t h = XXH3_128bits(buf, (size_t)i); + printf("{0x%lx, 0x%lx}, ", h.high64, h.low64); + + if (i % 4 == 3) { + printf("\n"); + } + } + printf("}\n\n"); +} diff --git a/vendor/github.com/zeebo/xxh3/accum_generic.go b/vendor/github.com/zeebo/xxh3/accum_generic.go new file mode 100644 index 000000000..b1be78507 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/accum_generic.go @@ -0,0 +1,542 @@ +package xxh3 + +// avx512Switch is the size at which the avx512 code is used. +// Bigger blocks benefit more. +const avx512Switch = 1 << 10 + +func accumScalar(accs *[8]u64, p, secret ptr, l u64) { + if secret != key { + accumScalarSeed(accs, p, secret, l) + return + } + for l > _block { + k := secret + + // accs + for i := 0; i < 16; i++ { + dv0 := readU64(p, 8*0) + dk0 := dv0 ^ readU64(k, 8*0) + accs[1] += dv0 + accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) + + dv1 := readU64(p, 8*1) + dk1 := dv1 ^ readU64(k, 8*1) + accs[0] += dv1 + accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) + + dv2 := readU64(p, 8*2) + dk2 := dv2 ^ readU64(k, 8*2) + accs[3] += dv2 + accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) + + dv3 := readU64(p, 8*3) + dk3 := dv3 ^ readU64(k, 8*3) + accs[2] += dv3 + accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) + + dv4 := readU64(p, 8*4) + dk4 := dv4 ^ readU64(k, 8*4) + accs[5] += dv4 + accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) + + dv5 := readU64(p, 8*5) + dk5 := dv5 ^ readU64(k, 8*5) + accs[4] += dv5 + accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) + + dv6 := readU64(p, 8*6) + dk6 := dv6 ^ readU64(k, 8*6) + accs[7] += dv6 + accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) + + dv7 := readU64(p, 8*7) + dk7 := dv7 ^ readU64(k, 8*7) + accs[6] += dv7 + accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) + + l -= _stripe + if l > 0 { + p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) + } + } + + // scramble accs + accs[0] ^= accs[0] >> 47 + accs[0] ^= key64_128 + accs[0] *= prime32_1 + + accs[1] ^= accs[1] >> 47 + accs[1] ^= key64_136 + accs[1] *= prime32_1 + + accs[2] ^= accs[2] >> 47 + accs[2] ^= key64_144 + accs[2] *= prime32_1 + + accs[3] ^= accs[3] >> 47 + accs[3] ^= key64_152 + accs[3] *= prime32_1 + + accs[4] ^= accs[4] >> 47 + accs[4] ^= key64_160 + accs[4] *= prime32_1 + + accs[5] ^= accs[5] >> 47 + accs[5] ^= key64_168 + accs[5] *= prime32_1 + + accs[6] ^= accs[6] >> 47 + accs[6] ^= key64_176 + accs[6] *= prime32_1 + + accs[7] ^= accs[7] >> 47 + accs[7] ^= key64_184 + accs[7] *= prime32_1 + } + + if l > 0 { + t, k := (l-1)/_stripe, secret + + for i := u64(0); i < t; i++ { + dv0 := readU64(p, 8*0) + dk0 := dv0 ^ readU64(k, 8*0) + accs[1] += dv0 + accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) + + dv1 := readU64(p, 8*1) + dk1 := dv1 ^ readU64(k, 8*1) + accs[0] += dv1 + accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) + + dv2 := readU64(p, 8*2) + dk2 := dv2 ^ readU64(k, 8*2) + accs[3] += dv2 + accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) + + dv3 := readU64(p, 8*3) + dk3 := dv3 ^ readU64(k, 8*3) + accs[2] += dv3 + accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) + + dv4 := readU64(p, 8*4) + dk4 := dv4 ^ readU64(k, 8*4) + accs[5] += dv4 + accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) + + dv5 := readU64(p, 8*5) + dk5 := dv5 ^ readU64(k, 8*5) + accs[4] += dv5 + accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) + + dv6 := readU64(p, 8*6) + dk6 := dv6 ^ readU64(k, 8*6) + accs[7] += dv6 + accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) + + dv7 := readU64(p, 8*7) + dk7 := dv7 ^ readU64(k, 8*7) + accs[6] += dv7 + accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) + + l -= _stripe + if l > 0 { + p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) + } + } + + if l > 0 { + p = ptr(ui(p) - uintptr(_stripe-l)) + + dv0 := readU64(p, 8*0) + dk0 := dv0 ^ key64_121 + accs[1] += dv0 + accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) + + dv1 := readU64(p, 8*1) + dk1 := dv1 ^ key64_129 + accs[0] += dv1 + accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) + + dv2 := readU64(p, 8*2) + dk2 := dv2 ^ key64_137 + accs[3] += dv2 + accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) + + dv3 := readU64(p, 8*3) + dk3 := dv3 ^ key64_145 + accs[2] += dv3 + accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) + + dv4 := readU64(p, 8*4) + dk4 := dv4 ^ key64_153 + accs[5] += dv4 + accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) + + dv5 := readU64(p, 8*5) + dk5 := dv5 ^ key64_161 + accs[4] += dv5 + accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) + + dv6 := readU64(p, 8*6) + dk6 := dv6 ^ key64_169 + accs[7] += dv6 + accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) + + dv7 := readU64(p, 8*7) + dk7 := dv7 ^ key64_177 + accs[6] += dv7 + accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) + } + } +} + +func accumBlockScalar(accs *[8]u64, p, secret ptr) { + if secret != key { + accumBlockScalarSeed(accs, p, secret) + return + } + // accs + for i := 0; i < 16; i++ { + dv0 := readU64(p, 8*0) + dk0 := dv0 ^ readU64(secret, 8*0) + accs[1] += dv0 + accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) + + dv1 := readU64(p, 8*1) + dk1 := dv1 ^ readU64(secret, 8*1) + accs[0] += dv1 + accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) + + dv2 := readU64(p, 8*2) + dk2 := dv2 ^ readU64(secret, 8*2) + accs[3] += dv2 + accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) + + dv3 := readU64(p, 8*3) + dk3 := dv3 ^ readU64(secret, 8*3) + accs[2] += dv3 + accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) + + dv4 := readU64(p, 8*4) + dk4 := dv4 ^ readU64(secret, 8*4) + accs[5] += dv4 + accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) + + dv5 := readU64(p, 8*5) + dk5 := dv5 ^ readU64(secret, 8*5) + accs[4] += dv5 + accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) + + dv6 := readU64(p, 8*6) + dk6 := dv6 ^ readU64(secret, 8*6) + accs[7] += dv6 + accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) + + dv7 := readU64(p, 8*7) + dk7 := dv7 ^ readU64(secret, 8*7) + accs[6] += dv7 + accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) + + p, secret = ptr(ui(p)+_stripe), ptr(ui(secret)+8) + } + + // scramble accs + accs[0] ^= accs[0] >> 47 + accs[0] ^= key64_128 + accs[0] *= prime32_1 + + accs[1] ^= accs[1] >> 47 + accs[1] ^= key64_136 + accs[1] *= prime32_1 + + accs[2] ^= accs[2] >> 47 + accs[2] ^= key64_144 + accs[2] *= prime32_1 + + accs[3] ^= accs[3] >> 47 + accs[3] ^= key64_152 + accs[3] *= prime32_1 + + accs[4] ^= accs[4] >> 47 + accs[4] ^= key64_160 + accs[4] *= prime32_1 + + accs[5] ^= accs[5] >> 47 + accs[5] ^= key64_168 + accs[5] *= prime32_1 + + accs[6] ^= accs[6] >> 47 + accs[6] ^= key64_176 + accs[6] *= prime32_1 + + accs[7] ^= accs[7] >> 47 + accs[7] ^= key64_184 + accs[7] *= prime32_1 +} + +// accumScalarSeed should be used with custom key. +func accumScalarSeed(accs *[8]u64, p, secret ptr, l u64) { + for l > _block { + k := secret + + // accs + for i := 0; i < 16; i++ { + dv0 := readU64(p, 8*0) + dk0 := dv0 ^ readU64(k, 8*0) + accs[1] += dv0 + accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) + + dv1 := readU64(p, 8*1) + dk1 := dv1 ^ readU64(k, 8*1) + accs[0] += dv1 + accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) + + dv2 := readU64(p, 8*2) + dk2 := dv2 ^ readU64(k, 8*2) + accs[3] += dv2 + accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) + + dv3 := readU64(p, 8*3) + dk3 := dv3 ^ readU64(k, 8*3) + accs[2] += dv3 + accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) + + dv4 := readU64(p, 8*4) + dk4 := dv4 ^ readU64(k, 8*4) + accs[5] += dv4 + accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) + + dv5 := readU64(p, 8*5) + dk5 := dv5 ^ readU64(k, 8*5) + accs[4] += dv5 + accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) + + dv6 := readU64(p, 8*6) + dk6 := dv6 ^ readU64(k, 8*6) + accs[7] += dv6 + accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) + + dv7 := readU64(p, 8*7) + dk7 := dv7 ^ readU64(k, 8*7) + accs[6] += dv7 + accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) + + l -= _stripe + if l > 0 { + p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) + } + } + + // scramble accs + accs[0] ^= accs[0] >> 47 + accs[0] ^= readU64(secret, 128) + accs[0] *= prime32_1 + + accs[1] ^= accs[1] >> 47 + accs[1] ^= readU64(secret, 136) + accs[1] *= prime32_1 + + accs[2] ^= accs[2] >> 47 + accs[2] ^= readU64(secret, 144) + accs[2] *= prime32_1 + + accs[3] ^= accs[3] >> 47 + accs[3] ^= readU64(secret, 152) + accs[3] *= prime32_1 + + accs[4] ^= accs[4] >> 47 + accs[4] ^= readU64(secret, 160) + accs[4] *= prime32_1 + + accs[5] ^= accs[5] >> 47 + accs[5] ^= readU64(secret, 168) + accs[5] *= prime32_1 + + accs[6] ^= accs[6] >> 47 + accs[6] ^= readU64(secret, 176) + accs[6] *= prime32_1 + + accs[7] ^= accs[7] >> 47 + accs[7] ^= readU64(secret, 184) + accs[7] *= prime32_1 + } + + if l > 0 { + t, k := (l-1)/_stripe, secret + + for i := u64(0); i < t; i++ { + dv0 := readU64(p, 8*0) + dk0 := dv0 ^ readU64(k, 8*0) + accs[1] += dv0 + accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) + + dv1 := readU64(p, 8*1) + dk1 := dv1 ^ readU64(k, 8*1) + accs[0] += dv1 + accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) + + dv2 := readU64(p, 8*2) + dk2 := dv2 ^ readU64(k, 8*2) + accs[3] += dv2 + accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) + + dv3 := readU64(p, 8*3) + dk3 := dv3 ^ readU64(k, 8*3) + accs[2] += dv3 + accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) + + dv4 := readU64(p, 8*4) + dk4 := dv4 ^ readU64(k, 8*4) + accs[5] += dv4 + accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) + + dv5 := readU64(p, 8*5) + dk5 := dv5 ^ readU64(k, 8*5) + accs[4] += dv5 + accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) + + dv6 := readU64(p, 8*6) + dk6 := dv6 ^ readU64(k, 8*6) + accs[7] += dv6 + accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) + + dv7 := readU64(p, 8*7) + dk7 := dv7 ^ readU64(k, 8*7) + accs[6] += dv7 + accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) + + l -= _stripe + if l > 0 { + p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) + } + } + + if l > 0 { + p = ptr(ui(p) - uintptr(_stripe-l)) + + dv0 := readU64(p, 8*0) + dk0 := dv0 ^ readU64(secret, 121) + accs[1] += dv0 + accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) + + dv1 := readU64(p, 8*1) + dk1 := dv1 ^ readU64(secret, 129) + accs[0] += dv1 + accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) + + dv2 := readU64(p, 8*2) + dk2 := dv2 ^ readU64(secret, 137) + accs[3] += dv2 + accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) + + dv3 := readU64(p, 8*3) + dk3 := dv3 ^ readU64(secret, 145) + accs[2] += dv3 + accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) + + dv4 := readU64(p, 8*4) + dk4 := dv4 ^ readU64(secret, 153) + accs[5] += dv4 + accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) + + dv5 := readU64(p, 8*5) + dk5 := dv5 ^ readU64(secret, 161) + accs[4] += dv5 + accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) + + dv6 := readU64(p, 8*6) + dk6 := dv6 ^ readU64(secret, 169) + accs[7] += dv6 + accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) + + dv7 := readU64(p, 8*7) + dk7 := dv7 ^ readU64(secret, 177) + accs[6] += dv7 + accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) + } + } +} + +// accumBlockScalarSeed should be used with custom key. +func accumBlockScalarSeed(accs *[8]u64, p, secret ptr) { + // accs + { + secret := secret + for i := 0; i < 16; i++ { + dv0 := readU64(p, 8*0) + dk0 := dv0 ^ readU64(secret, 8*0) + accs[1] += dv0 + accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) + + dv1 := readU64(p, 8*1) + dk1 := dv1 ^ readU64(secret, 8*1) + accs[0] += dv1 + accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) + + dv2 := readU64(p, 8*2) + dk2 := dv2 ^ readU64(secret, 8*2) + accs[3] += dv2 + accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) + + dv3 := readU64(p, 8*3) + dk3 := dv3 ^ readU64(secret, 8*3) + accs[2] += dv3 + accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) + + dv4 := readU64(p, 8*4) + dk4 := dv4 ^ readU64(secret, 8*4) + accs[5] += dv4 + accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) + + dv5 := readU64(p, 8*5) + dk5 := dv5 ^ readU64(secret, 8*5) + accs[4] += dv5 + accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) + + dv6 := readU64(p, 8*6) + dk6 := dv6 ^ readU64(secret, 8*6) + accs[7] += dv6 + accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) + + dv7 := readU64(p, 8*7) + dk7 := dv7 ^ readU64(secret, 8*7) + accs[6] += dv7 + accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) + + p, secret = ptr(ui(p)+_stripe), ptr(ui(secret)+8) + } + } + + // scramble accs + accs[0] ^= accs[0] >> 47 + accs[0] ^= readU64(secret, 128) + accs[0] *= prime32_1 + + accs[1] ^= accs[1] >> 47 + accs[1] ^= readU64(secret, 136) + accs[1] *= prime32_1 + + accs[2] ^= accs[2] >> 47 + accs[2] ^= readU64(secret, 144) + accs[2] *= prime32_1 + + accs[3] ^= accs[3] >> 47 + accs[3] ^= readU64(secret, 152) + accs[3] *= prime32_1 + + accs[4] ^= accs[4] >> 47 + accs[4] ^= readU64(secret, 160) + accs[4] *= prime32_1 + + accs[5] ^= accs[5] >> 47 + accs[5] ^= readU64(secret, 168) + accs[5] *= prime32_1 + + accs[6] ^= accs[6] >> 47 + accs[6] ^= readU64(secret, 176) + accs[6] *= prime32_1 + + accs[7] ^= accs[7] >> 47 + accs[7] ^= readU64(secret, 184) + accs[7] *= prime32_1 +} diff --git a/vendor/github.com/zeebo/xxh3/accum_stubs_amd64.go b/vendor/github.com/zeebo/xxh3/accum_stubs_amd64.go new file mode 100644 index 000000000..9baff6c41 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/accum_stubs_amd64.go @@ -0,0 +1,40 @@ +package xxh3 + +import ( + "unsafe" + + "github.com/klauspost/cpuid/v2" +) + +var ( + hasAVX2 = cpuid.CPU.Has(cpuid.AVX2) + hasSSE2 = cpuid.CPU.Has(cpuid.SSE2) // Always true on amd64 + hasAVX512 = cpuid.CPU.Has(cpuid.AVX512F) +) + +//go:noescape +func accumAVX2(acc *[8]u64, data, key unsafe.Pointer, len u64) + +//go:noescape +func accumAVX512(acc *[8]u64, data, key unsafe.Pointer, len u64) + +//go:noescape +func accumSSE(acc *[8]u64, data, key unsafe.Pointer, len u64) + +//go:noescape +func accumBlockAVX2(acc *[8]u64, data, key unsafe.Pointer) + +//go:noescape +func accumBlockSSE(acc *[8]u64, data, key unsafe.Pointer) + +func withOverrides(avx512, avx2, sse2 bool, cb func()) { + avx512Orig, avx2Orig, sse2Orig := hasAVX512, hasAVX2, hasSSE2 + hasAVX512, hasAVX2, hasSSE2 = avx512, avx2, sse2 + defer func() { hasAVX512, hasAVX2, hasSSE2 = avx512Orig, avx2Orig, sse2Orig }() + cb() +} + +func withAVX512(cb func()) { withOverrides(hasAVX512, false, false, cb) } +func withAVX2(cb func()) { withOverrides(false, hasAVX2, false, cb) } +func withSSE2(cb func()) { withOverrides(false, false, hasSSE2, cb) } +func withGeneric(cb func()) { withOverrides(false, false, false, cb) } diff --git a/vendor/github.com/zeebo/xxh3/accum_stubs_other.go b/vendor/github.com/zeebo/xxh3/accum_stubs_other.go new file mode 100644 index 000000000..93bf6258a --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/accum_stubs_other.go @@ -0,0 +1,25 @@ +//go:build !amd64 +// +build !amd64 + +package xxh3 + +import ( + "unsafe" +) + +const ( + hasAVX2 = false + hasSSE2 = false + hasAVX512 = false +) + +func accumAVX2(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") } +func accumSSE(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") } +func accumBlockAVX2(acc *[8]u64, data, key unsafe.Pointer) { panic("unreachable") } +func accumBlockSSE(acc *[8]u64, data, key unsafe.Pointer) { panic("unreachable") } +func accumAVX512(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") } + +func withAVX512(cb func()) { cb() } +func withAVX2(cb func()) { cb() } +func withSSE2(cb func()) { cb() } +func withGeneric(cb func()) { cb() } diff --git a/vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s b/vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s new file mode 100644 index 000000000..cfaf9f0a7 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s @@ -0,0 +1,379 @@ +// Code generated by command: go run gen.go -avx512 -out ../accum_vector_avx512_amd64.s -pkg xxh3. DO NOT EDIT. + +#include "textflag.h" + +DATA prime_avx512<>+0(SB)/8, $0x000000009e3779b1 +DATA prime_avx512<>+8(SB)/8, $0x000000009e3779b1 +DATA prime_avx512<>+16(SB)/8, $0x000000009e3779b1 +DATA prime_avx512<>+24(SB)/8, $0x000000009e3779b1 +DATA prime_avx512<>+32(SB)/8, $0x000000009e3779b1 +DATA prime_avx512<>+40(SB)/8, $0x000000009e3779b1 +DATA prime_avx512<>+48(SB)/8, $0x000000009e3779b1 +DATA prime_avx512<>+56(SB)/8, $0x000000009e3779b1 +GLOBL prime_avx512<>(SB), RODATA|NOPTR, $64 + +// func accumAVX512(acc *[8]uint64, data *byte, key *byte, len uint64) +// Requires: AVX, AVX512F, MMX+ +TEXT ·accumAVX512(SB), NOSPLIT, $0-32 + MOVQ acc+0(FP), AX + MOVQ data+8(FP), CX + MOVQ key+16(FP), DX + MOVQ len+24(FP), BX + VMOVDQU64 (AX), Z1 + VMOVDQU64 prime_avx512<>+0(SB), Z0 + VMOVDQU64 (DX), Z2 + VMOVDQU64 8(DX), Z3 + VMOVDQU64 16(DX), Z4 + VMOVDQU64 24(DX), Z5 + VMOVDQU64 32(DX), Z6 + VMOVDQU64 40(DX), Z7 + VMOVDQU64 48(DX), Z8 + VMOVDQU64 56(DX), Z9 + VMOVDQU64 64(DX), Z10 + VMOVDQU64 72(DX), Z11 + VMOVDQU64 80(DX), Z12 + VMOVDQU64 88(DX), Z13 + VMOVDQU64 96(DX), Z14 + VMOVDQU64 104(DX), Z15 + VMOVDQU64 112(DX), Z16 + VMOVDQU64 120(DX), Z17 + VMOVDQU64 128(DX), Z18 + VMOVDQU64 121(DX), Z19 + +accum_large: + CMPQ BX, $0x00000400 + JLE accum + VMOVDQU64 (CX), Z20 + PREFETCHT0 1024(CX) + VPXORD Z2, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 64(CX), Z20 + PREFETCHT0 1088(CX) + VPXORD Z3, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 128(CX), Z20 + PREFETCHT0 1152(CX) + VPXORD Z4, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 192(CX), Z20 + PREFETCHT0 1216(CX) + VPXORD Z5, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 256(CX), Z20 + PREFETCHT0 1280(CX) + VPXORD Z6, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 320(CX), Z20 + PREFETCHT0 1344(CX) + VPXORD Z7, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 384(CX), Z20 + PREFETCHT0 1408(CX) + VPXORD Z8, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 448(CX), Z20 + PREFETCHT0 1472(CX) + VPXORD Z9, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 512(CX), Z20 + PREFETCHT0 1536(CX) + VPXORD Z10, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 576(CX), Z20 + PREFETCHT0 1600(CX) + VPXORD Z11, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 640(CX), Z20 + PREFETCHT0 1664(CX) + VPXORD Z12, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 704(CX), Z20 + PREFETCHT0 1728(CX) + VPXORD Z13, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 768(CX), Z20 + PREFETCHT0 1792(CX) + VPXORD Z14, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 832(CX), Z20 + PREFETCHT0 1856(CX) + VPXORD Z15, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 896(CX), Z20 + PREFETCHT0 1920(CX) + VPXORD Z16, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + VMOVDQU64 960(CX), Z20 + PREFETCHT0 1984(CX) + VPXORD Z17, Z20, Z21 + VPSHUFD $0x31, Z21, Z22 + VPMULUDQ Z21, Z22, Z21 + VPSHUFD $0x4e, Z20, Z20 + VPADDQ Z1, Z20, Z1 + VPADDQ Z1, Z21, Z1 + ADDQ $0x00000400, CX + SUBQ $0x00000400, BX + VPSRLQ $0x2f, Z1, Z20 + VPTERNLOGD $0x96, Z1, Z18, Z20 + VPMULUDQ Z0, Z20, Z1 + VPSHUFD $0xf5, Z20, Z20 + VPMULUDQ Z0, Z20, Z20 + VPSLLQ $0x20, Z20, Z20 + VPADDQ Z1, Z20, Z1 + JMP accum_large + +accum: + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z2, Z0, Z2 + VPSHUFD $0x31, Z2, Z18 + VPMULUDQ Z2, Z18, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z3, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z4, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z5, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z6, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z7, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z8, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z9, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z10, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z11, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z12, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z13, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z14, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z15, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z16, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + CMPQ BX, $0x40 + JLE finalize + VMOVDQU64 (CX), Z0 + VPXORD Z17, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + ADDQ $0x00000040, CX + SUBQ $0x00000040, BX + +finalize: + CMPQ BX, $0x00 + JE return + SUBQ $0x40, CX + ADDQ BX, CX + VMOVDQU64 (CX), Z0 + VPXORD Z19, Z0, Z2 + VPSHUFD $0x31, Z2, Z3 + VPMULUDQ Z2, Z3, Z2 + VPSHUFD $0x4e, Z0, Z0 + VPADDQ Z1, Z0, Z1 + VPADDQ Z1, Z2, Z1 + +return: + VMOVDQU64 Z1, (AX) + VZEROUPPER + RET diff --git a/vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s b/vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s new file mode 100644 index 000000000..b53c1521f --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s @@ -0,0 +1,586 @@ +// Code generated by command: go run gen.go -avx -out ../accum_vector_avx_amd64.s -pkg xxh3. DO NOT EDIT. + +#include "textflag.h" + +DATA prime_avx<>+0(SB)/8, $0x000000009e3779b1 +DATA prime_avx<>+8(SB)/8, $0x000000009e3779b1 +DATA prime_avx<>+16(SB)/8, $0x000000009e3779b1 +DATA prime_avx<>+24(SB)/8, $0x000000009e3779b1 +GLOBL prime_avx<>(SB), RODATA|NOPTR, $32 + +// func accumAVX2(acc *[8]uint64, data *byte, key *byte, len uint64) +// Requires: AVX, AVX2, MMX+ +TEXT ·accumAVX2(SB), NOSPLIT, $0-32 + MOVQ acc+0(FP), AX + MOVQ data+8(FP), CX + MOVQ key+16(FP), DX + MOVQ key+16(FP), BX + MOVQ len+24(FP), SI + VMOVDQU (AX), Y1 + VMOVDQU 32(AX), Y2 + VMOVDQU prime_avx<>+0(SB), Y0 + +accum_large: + CMPQ SI, $0x00000400 + JLE accum + VMOVDQU (CX), Y3 + VMOVDQU 32(CX), Y6 + PREFETCHT0 512(CX) + VPXOR (DX), Y3, Y4 + VPXOR 32(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y6 + PREFETCHT0 576(CX) + VPXOR 8(DX), Y3, Y4 + VPXOR 40(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 128(CX), Y3 + VMOVDQU 160(CX), Y6 + PREFETCHT0 640(CX) + VPXOR 16(DX), Y3, Y4 + VPXOR 48(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 192(CX), Y3 + VMOVDQU 224(CX), Y6 + PREFETCHT0 704(CX) + VPXOR 24(DX), Y3, Y4 + VPXOR 56(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 256(CX), Y3 + VMOVDQU 288(CX), Y6 + PREFETCHT0 768(CX) + VPXOR 32(DX), Y3, Y4 + VPXOR 64(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 320(CX), Y3 + VMOVDQU 352(CX), Y6 + PREFETCHT0 832(CX) + VPXOR 40(DX), Y3, Y4 + VPXOR 72(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 384(CX), Y3 + VMOVDQU 416(CX), Y6 + PREFETCHT0 896(CX) + VPXOR 48(DX), Y3, Y4 + VPXOR 80(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 448(CX), Y3 + VMOVDQU 480(CX), Y6 + PREFETCHT0 960(CX) + VPXOR 56(DX), Y3, Y4 + VPXOR 88(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 512(CX), Y3 + VMOVDQU 544(CX), Y6 + PREFETCHT0 1024(CX) + VPXOR 64(DX), Y3, Y4 + VPXOR 96(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 576(CX), Y3 + VMOVDQU 608(CX), Y6 + PREFETCHT0 1088(CX) + VPXOR 72(DX), Y3, Y4 + VPXOR 104(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 640(CX), Y3 + VMOVDQU 672(CX), Y6 + PREFETCHT0 1152(CX) + VPXOR 80(DX), Y3, Y4 + VPXOR 112(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 704(CX), Y3 + VMOVDQU 736(CX), Y6 + PREFETCHT0 1216(CX) + VPXOR 88(DX), Y3, Y4 + VPXOR 120(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 768(CX), Y3 + VMOVDQU 800(CX), Y6 + PREFETCHT0 1280(CX) + VPXOR 96(DX), Y3, Y4 + VPXOR 128(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 832(CX), Y3 + VMOVDQU 864(CX), Y6 + PREFETCHT0 1344(CX) + VPXOR 104(DX), Y3, Y4 + VPXOR 136(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 896(CX), Y3 + VMOVDQU 928(CX), Y6 + PREFETCHT0 1408(CX) + VPXOR 112(DX), Y3, Y4 + VPXOR 144(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 960(CX), Y3 + VMOVDQU 992(CX), Y6 + PREFETCHT0 1472(CX) + VPXOR 120(DX), Y3, Y4 + VPXOR 152(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + ADDQ $0x00000400, CX + SUBQ $0x00000400, SI + VPSRLQ $0x2f, Y1, Y3 + VPXOR Y1, Y3, Y3 + VPXOR 128(DX), Y3, Y3 + VPMULUDQ Y0, Y3, Y1 + VPSHUFD $0xf5, Y3, Y3 + VPMULUDQ Y0, Y3, Y3 + VPSLLQ $0x20, Y3, Y3 + VPADDQ Y1, Y3, Y1 + VPSRLQ $0x2f, Y2, Y3 + VPXOR Y2, Y3, Y3 + VPXOR 160(DX), Y3, Y3 + VPMULUDQ Y0, Y3, Y2 + VPSHUFD $0xf5, Y3, Y3 + VPMULUDQ Y0, Y3, Y3 + VPSLLQ $0x20, Y3, Y3 + VPADDQ Y2, Y3, Y2 + JMP accum_large + +accum: + CMPQ SI, $0x40 + JLE finalize + VMOVDQU (CX), Y0 + VMOVDQU 32(CX), Y5 + VPXOR (BX), Y0, Y3 + VPXOR 32(BX), Y5, Y6 + VPSHUFD $0x31, Y3, Y4 + VPSHUFD $0x31, Y6, Y7 + VPMULUDQ Y3, Y4, Y3 + VPMULUDQ Y6, Y7, Y6 + VPSHUFD $0x4e, Y0, Y0 + VPSHUFD $0x4e, Y5, Y5 + VPADDQ Y1, Y0, Y1 + VPADDQ Y1, Y3, Y1 + VPADDQ Y2, Y5, Y2 + VPADDQ Y2, Y6, Y2 + ADDQ $0x00000040, CX + SUBQ $0x00000040, SI + ADDQ $0x00000008, BX + JMP accum + +finalize: + CMPQ SI, $0x00 + JE return + SUBQ $0x40, CX + ADDQ SI, CX + VMOVDQU (CX), Y0 + VMOVDQU 32(CX), Y5 + VPXOR 121(DX), Y0, Y3 + VPXOR 153(DX), Y5, Y6 + VPSHUFD $0x31, Y3, Y4 + VPSHUFD $0x31, Y6, Y7 + VPMULUDQ Y3, Y4, Y3 + VPMULUDQ Y6, Y7, Y6 + VPSHUFD $0x4e, Y0, Y0 + VPSHUFD $0x4e, Y5, Y5 + VPADDQ Y1, Y0, Y1 + VPADDQ Y1, Y3, Y1 + VPADDQ Y2, Y5, Y2 + VPADDQ Y2, Y6, Y2 + +return: + VMOVDQU Y1, (AX) + VMOVDQU Y2, 32(AX) + VZEROUPPER + RET + +// func accumBlockAVX2(acc *[8]uint64, data *byte, key *byte) +// Requires: AVX, AVX2 +TEXT ·accumBlockAVX2(SB), NOSPLIT, $0-24 + MOVQ acc+0(FP), AX + MOVQ data+8(FP), CX + MOVQ key+16(FP), DX + VMOVDQU (AX), Y1 + VMOVDQU 32(AX), Y2 + VMOVDQU prime_avx<>+0(SB), Y0 + VMOVDQU (CX), Y3 + VMOVDQU 32(CX), Y6 + VPXOR (DX), Y3, Y4 + VPXOR 32(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y6 + VPXOR 8(DX), Y3, Y4 + VPXOR 40(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 128(CX), Y3 + VMOVDQU 160(CX), Y6 + VPXOR 16(DX), Y3, Y4 + VPXOR 48(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 192(CX), Y3 + VMOVDQU 224(CX), Y6 + VPXOR 24(DX), Y3, Y4 + VPXOR 56(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 256(CX), Y3 + VMOVDQU 288(CX), Y6 + VPXOR 32(DX), Y3, Y4 + VPXOR 64(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 320(CX), Y3 + VMOVDQU 352(CX), Y6 + VPXOR 40(DX), Y3, Y4 + VPXOR 72(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 384(CX), Y3 + VMOVDQU 416(CX), Y6 + VPXOR 48(DX), Y3, Y4 + VPXOR 80(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 448(CX), Y3 + VMOVDQU 480(CX), Y6 + VPXOR 56(DX), Y3, Y4 + VPXOR 88(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 512(CX), Y3 + VMOVDQU 544(CX), Y6 + VPXOR 64(DX), Y3, Y4 + VPXOR 96(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 576(CX), Y3 + VMOVDQU 608(CX), Y6 + VPXOR 72(DX), Y3, Y4 + VPXOR 104(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 640(CX), Y3 + VMOVDQU 672(CX), Y6 + VPXOR 80(DX), Y3, Y4 + VPXOR 112(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 704(CX), Y3 + VMOVDQU 736(CX), Y6 + VPXOR 88(DX), Y3, Y4 + VPXOR 120(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 768(CX), Y3 + VMOVDQU 800(CX), Y6 + VPXOR 96(DX), Y3, Y4 + VPXOR 128(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 832(CX), Y3 + VMOVDQU 864(CX), Y6 + VPXOR 104(DX), Y3, Y4 + VPXOR 136(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 896(CX), Y3 + VMOVDQU 928(CX), Y6 + VPXOR 112(DX), Y3, Y4 + VPXOR 144(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VMOVDQU 960(CX), Y3 + VMOVDQU 992(CX), Y6 + VPXOR 120(DX), Y3, Y4 + VPXOR 152(DX), Y6, Y7 + VPSHUFD $0x31, Y4, Y5 + VPSHUFD $0x31, Y7, Y8 + VPMULUDQ Y4, Y5, Y4 + VPMULUDQ Y7, Y8, Y7 + VPSHUFD $0x4e, Y3, Y3 + VPSHUFD $0x4e, Y6, Y6 + VPADDQ Y1, Y3, Y1 + VPADDQ Y1, Y4, Y1 + VPADDQ Y2, Y6, Y2 + VPADDQ Y2, Y7, Y2 + VPSRLQ $0x2f, Y1, Y3 + VPXOR Y1, Y3, Y3 + VPXOR 128(DX), Y3, Y3 + VPMULUDQ Y0, Y3, Y1 + VPSHUFD $0xf5, Y3, Y3 + VPMULUDQ Y0, Y3, Y3 + VPSLLQ $0x20, Y3, Y3 + VPADDQ Y1, Y3, Y1 + VPSRLQ $0x2f, Y2, Y3 + VPXOR Y2, Y3, Y3 + VPXOR 160(DX), Y3, Y3 + VPMULUDQ Y0, Y3, Y2 + VPSHUFD $0xf5, Y3, Y3 + VPMULUDQ Y0, Y3, Y3 + VPSLLQ $0x20, Y3, Y3 + VPADDQ Y2, Y3, Y2 + VMOVDQU Y1, (AX) + VMOVDQU Y2, 32(AX) + VZEROUPPER + RET diff --git a/vendor/github.com/zeebo/xxh3/accum_vector_sse_amd64.s b/vendor/github.com/zeebo/xxh3/accum_vector_sse_amd64.s new file mode 100644 index 000000000..ba670e560 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/accum_vector_sse_amd64.s @@ -0,0 +1,1236 @@ +// Code generated by command: go run gen.go -sse -out ../accum_vector_sse_amd64.s -pkg xxh3. DO NOT EDIT. + +#include "textflag.h" + +DATA prime_sse<>+0(SB)/4, $0x9e3779b1 +DATA prime_sse<>+4(SB)/4, $0x9e3779b1 +DATA prime_sse<>+8(SB)/4, $0x9e3779b1 +DATA prime_sse<>+12(SB)/4, $0x9e3779b1 +GLOBL prime_sse<>(SB), RODATA|NOPTR, $16 + +// func accumSSE(acc *[8]uint64, data *byte, key *byte, len uint64) +// Requires: SSE2 +TEXT ·accumSSE(SB), NOSPLIT, $0-32 + MOVQ acc+0(FP), AX + MOVQ data+8(FP), CX + MOVQ key+16(FP), DX + MOVQ key+16(FP), BX + MOVQ len+24(FP), SI + MOVOU (AX), X1 + MOVOU 16(AX), X2 + MOVOU 32(AX), X3 + MOVOU 48(AX), X4 + MOVOU prime_sse<>+0(SB), X0 + +accum_large: + CMPQ SI, $0x00000400 + JLE accum + MOVOU (CX), X5 + MOVOU (DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 16(CX), X5 + MOVOU 16(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 32(CX), X5 + MOVOU 32(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 48(CX), X5 + MOVOU 48(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 64(CX), X5 + MOVOU 8(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 80(CX), X5 + MOVOU 24(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 96(CX), X5 + MOVOU 40(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 112(CX), X5 + MOVOU 56(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 128(CX), X5 + MOVOU 16(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 144(CX), X5 + MOVOU 32(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 160(CX), X5 + MOVOU 48(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 176(CX), X5 + MOVOU 64(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 192(CX), X5 + MOVOU 24(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 208(CX), X5 + MOVOU 40(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 224(CX), X5 + MOVOU 56(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 240(CX), X5 + MOVOU 72(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 256(CX), X5 + MOVOU 32(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 272(CX), X5 + MOVOU 48(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 288(CX), X5 + MOVOU 64(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 304(CX), X5 + MOVOU 80(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 320(CX), X5 + MOVOU 40(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 336(CX), X5 + MOVOU 56(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 352(CX), X5 + MOVOU 72(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 368(CX), X5 + MOVOU 88(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 384(CX), X5 + MOVOU 48(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 400(CX), X5 + MOVOU 64(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 416(CX), X5 + MOVOU 80(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 432(CX), X5 + MOVOU 96(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 448(CX), X5 + MOVOU 56(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 464(CX), X5 + MOVOU 72(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 480(CX), X5 + MOVOU 88(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 496(CX), X5 + MOVOU 104(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 512(CX), X5 + MOVOU 64(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 528(CX), X5 + MOVOU 80(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 544(CX), X5 + MOVOU 96(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 560(CX), X5 + MOVOU 112(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 576(CX), X5 + MOVOU 72(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 592(CX), X5 + MOVOU 88(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 608(CX), X5 + MOVOU 104(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 624(CX), X5 + MOVOU 120(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 640(CX), X5 + MOVOU 80(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 656(CX), X5 + MOVOU 96(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 672(CX), X5 + MOVOU 112(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 688(CX), X5 + MOVOU 128(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 704(CX), X5 + MOVOU 88(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 720(CX), X5 + MOVOU 104(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 736(CX), X5 + MOVOU 120(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 752(CX), X5 + MOVOU 136(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 768(CX), X5 + MOVOU 96(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 784(CX), X5 + MOVOU 112(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 800(CX), X5 + MOVOU 128(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 816(CX), X5 + MOVOU 144(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 832(CX), X5 + MOVOU 104(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 848(CX), X5 + MOVOU 120(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 864(CX), X5 + MOVOU 136(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 880(CX), X5 + MOVOU 152(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 896(CX), X5 + MOVOU 112(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 912(CX), X5 + MOVOU 128(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 928(CX), X5 + MOVOU 144(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 944(CX), X5 + MOVOU 160(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 960(CX), X5 + MOVOU 120(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 976(CX), X5 + MOVOU 136(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 992(CX), X5 + MOVOU 152(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 1008(CX), X5 + MOVOU 168(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + ADDQ $0x00000400, CX + SUBQ $0x00000400, SI + MOVOU X1, X5 + PSRLQ $0x2f, X5 + PXOR X5, X1 + MOVOU 128(DX), X5 + PXOR X5, X1 + PSHUFD $0xf5, X1, X5 + PMULULQ X0, X1 + PMULULQ X0, X5 + PSLLQ $0x20, X5 + PADDQ X5, X1 + MOVOU X2, X5 + PSRLQ $0x2f, X5 + PXOR X5, X2 + MOVOU 144(DX), X5 + PXOR X5, X2 + PSHUFD $0xf5, X2, X5 + PMULULQ X0, X2 + PMULULQ X0, X5 + PSLLQ $0x20, X5 + PADDQ X5, X2 + MOVOU X3, X5 + PSRLQ $0x2f, X5 + PXOR X5, X3 + MOVOU 160(DX), X5 + PXOR X5, X3 + PSHUFD $0xf5, X3, X5 + PMULULQ X0, X3 + PMULULQ X0, X5 + PSLLQ $0x20, X5 + PADDQ X5, X3 + MOVOU X4, X5 + PSRLQ $0x2f, X5 + PXOR X5, X4 + MOVOU 176(DX), X5 + PXOR X5, X4 + PSHUFD $0xf5, X4, X5 + PMULULQ X0, X4 + PMULULQ X0, X5 + PSLLQ $0x20, X5 + PADDQ X5, X4 + JMP accum_large + +accum: + CMPQ SI, $0x40 + JLE finalize + MOVOU (CX), X0 + MOVOU (BX), X5 + PXOR X0, X5 + PSHUFD $0x31, X5, X6 + PMULULQ X5, X6 + PSHUFD $0x4e, X0, X0 + PADDQ X0, X1 + PADDQ X6, X1 + MOVOU 16(CX), X0 + MOVOU 16(BX), X5 + PXOR X0, X5 + PSHUFD $0x31, X5, X6 + PMULULQ X5, X6 + PSHUFD $0x4e, X0, X0 + PADDQ X0, X2 + PADDQ X6, X2 + MOVOU 32(CX), X0 + MOVOU 32(BX), X5 + PXOR X0, X5 + PSHUFD $0x31, X5, X6 + PMULULQ X5, X6 + PSHUFD $0x4e, X0, X0 + PADDQ X0, X3 + PADDQ X6, X3 + MOVOU 48(CX), X0 + MOVOU 48(BX), X5 + PXOR X0, X5 + PSHUFD $0x31, X5, X6 + PMULULQ X5, X6 + PSHUFD $0x4e, X0, X0 + PADDQ X0, X4 + PADDQ X6, X4 + ADDQ $0x00000040, CX + SUBQ $0x00000040, SI + ADDQ $0x00000008, BX + JMP accum + +finalize: + CMPQ SI, $0x00 + JE return + SUBQ $0x40, CX + ADDQ SI, CX + MOVOU (CX), X0 + MOVOU 121(DX), X5 + PXOR X0, X5 + PSHUFD $0x31, X5, X6 + PMULULQ X5, X6 + PSHUFD $0x4e, X0, X0 + PADDQ X0, X1 + PADDQ X6, X1 + MOVOU 16(CX), X0 + MOVOU 137(DX), X5 + PXOR X0, X5 + PSHUFD $0x31, X5, X6 + PMULULQ X5, X6 + PSHUFD $0x4e, X0, X0 + PADDQ X0, X2 + PADDQ X6, X2 + MOVOU 32(CX), X0 + MOVOU 153(DX), X5 + PXOR X0, X5 + PSHUFD $0x31, X5, X6 + PMULULQ X5, X6 + PSHUFD $0x4e, X0, X0 + PADDQ X0, X3 + PADDQ X6, X3 + MOVOU 48(CX), X0 + MOVOU 169(DX), X5 + PXOR X0, X5 + PSHUFD $0x31, X5, X6 + PMULULQ X5, X6 + PSHUFD $0x4e, X0, X0 + PADDQ X0, X4 + PADDQ X6, X4 + +return: + MOVOU X1, (AX) + MOVOU X2, 16(AX) + MOVOU X3, 32(AX) + MOVOU X4, 48(AX) + RET + +// func accumBlockSSE(acc *[8]uint64, data *byte, key *byte) +// Requires: SSE2 +TEXT ·accumBlockSSE(SB), NOSPLIT, $0-24 + MOVQ acc+0(FP), AX + MOVQ data+8(FP), CX + MOVQ key+16(FP), DX + MOVOU (AX), X1 + MOVOU 16(AX), X2 + MOVOU 32(AX), X3 + MOVOU 48(AX), X4 + MOVOU prime_sse<>+0(SB), X0 + MOVOU (CX), X5 + MOVOU (DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 16(CX), X5 + MOVOU 16(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 32(CX), X5 + MOVOU 32(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 48(CX), X5 + MOVOU 48(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 64(CX), X5 + MOVOU 8(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 80(CX), X5 + MOVOU 24(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 96(CX), X5 + MOVOU 40(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 112(CX), X5 + MOVOU 56(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 128(CX), X5 + MOVOU 16(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 144(CX), X5 + MOVOU 32(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 160(CX), X5 + MOVOU 48(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 176(CX), X5 + MOVOU 64(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 192(CX), X5 + MOVOU 24(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 208(CX), X5 + MOVOU 40(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 224(CX), X5 + MOVOU 56(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 240(CX), X5 + MOVOU 72(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 256(CX), X5 + MOVOU 32(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 272(CX), X5 + MOVOU 48(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 288(CX), X5 + MOVOU 64(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 304(CX), X5 + MOVOU 80(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 320(CX), X5 + MOVOU 40(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 336(CX), X5 + MOVOU 56(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 352(CX), X5 + MOVOU 72(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 368(CX), X5 + MOVOU 88(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 384(CX), X5 + MOVOU 48(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 400(CX), X5 + MOVOU 64(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 416(CX), X5 + MOVOU 80(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 432(CX), X5 + MOVOU 96(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 448(CX), X5 + MOVOU 56(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 464(CX), X5 + MOVOU 72(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 480(CX), X5 + MOVOU 88(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 496(CX), X5 + MOVOU 104(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 512(CX), X5 + MOVOU 64(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 528(CX), X5 + MOVOU 80(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 544(CX), X5 + MOVOU 96(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 560(CX), X5 + MOVOU 112(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 576(CX), X5 + MOVOU 72(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 592(CX), X5 + MOVOU 88(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 608(CX), X5 + MOVOU 104(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 624(CX), X5 + MOVOU 120(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 640(CX), X5 + MOVOU 80(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 656(CX), X5 + MOVOU 96(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 672(CX), X5 + MOVOU 112(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 688(CX), X5 + MOVOU 128(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 704(CX), X5 + MOVOU 88(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 720(CX), X5 + MOVOU 104(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 736(CX), X5 + MOVOU 120(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 752(CX), X5 + MOVOU 136(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 768(CX), X5 + MOVOU 96(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 784(CX), X5 + MOVOU 112(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 800(CX), X5 + MOVOU 128(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 816(CX), X5 + MOVOU 144(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 832(CX), X5 + MOVOU 104(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 848(CX), X5 + MOVOU 120(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 864(CX), X5 + MOVOU 136(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 880(CX), X5 + MOVOU 152(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 896(CX), X5 + MOVOU 112(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 912(CX), X5 + MOVOU 128(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 928(CX), X5 + MOVOU 144(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 944(CX), X5 + MOVOU 160(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU 960(CX), X5 + MOVOU 120(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X1 + PADDQ X7, X1 + MOVOU 976(CX), X5 + MOVOU 136(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X2 + PADDQ X7, X2 + MOVOU 992(CX), X5 + MOVOU 152(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X3 + PADDQ X7, X3 + MOVOU 1008(CX), X5 + MOVOU 168(DX), X6 + PXOR X5, X6 + PSHUFD $0x31, X6, X7 + PMULULQ X6, X7 + PSHUFD $0x4e, X5, X5 + PADDQ X5, X4 + PADDQ X7, X4 + MOVOU X1, X5 + PSRLQ $0x2f, X5 + PXOR X5, X1 + MOVOU 128(DX), X5 + PXOR X5, X1 + PSHUFD $0xf5, X1, X5 + PMULULQ X0, X1 + PMULULQ X0, X5 + PSLLQ $0x20, X5 + PADDQ X5, X1 + MOVOU X2, X5 + PSRLQ $0x2f, X5 + PXOR X5, X2 + MOVOU 144(DX), X5 + PXOR X5, X2 + PSHUFD $0xf5, X2, X5 + PMULULQ X0, X2 + PMULULQ X0, X5 + PSLLQ $0x20, X5 + PADDQ X5, X2 + MOVOU X3, X5 + PSRLQ $0x2f, X5 + PXOR X5, X3 + MOVOU 160(DX), X5 + PXOR X5, X3 + PSHUFD $0xf5, X3, X5 + PMULULQ X0, X3 + PMULULQ X0, X5 + PSLLQ $0x20, X5 + PADDQ X5, X3 + MOVOU X4, X5 + PSRLQ $0x2f, X5 + PXOR X5, X4 + MOVOU 176(DX), X5 + PXOR X5, X4 + PSHUFD $0xf5, X4, X5 + PMULULQ X0, X4 + PMULULQ X0, X5 + PSLLQ $0x20, X5 + PADDQ X5, X4 + MOVOU X1, (AX) + MOVOU X2, 16(AX) + MOVOU X3, 32(AX) + MOVOU X4, 48(AX) + RET diff --git a/vendor/github.com/zeebo/xxh3/consts.go b/vendor/github.com/zeebo/xxh3/consts.go new file mode 100644 index 000000000..39ef6e179 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/consts.go @@ -0,0 +1,97 @@ +package xxh3 + +const ( + _stripe = 64 + _block = 1024 + + prime32_1 = 2654435761 + prime32_2 = 2246822519 + prime32_3 = 3266489917 + + prime64_1 = 11400714785074694791 + prime64_2 = 14029467366897019727 + prime64_3 = 1609587929392839161 + prime64_4 = 9650029242287828579 + prime64_5 = 2870177450012600261 +) + +var key = ptr(&[...]u8{ + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe /* 8 */, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, /* 16 */ + 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb /* 24 */, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, /* 32 */ + 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78 /* 40 */, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, /* 48 */ + 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e /* 56 */, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, /* 64 */ + 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb /* 72 */, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, /* 80 */ + 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e /* 88 */, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, /* 96 */ + 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f /* 104 */, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, /* 112 */ + 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31 /* 120 */, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, /* 128 */ + 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3 /* 136 */, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, /* 144 */ + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49 /* 152 */, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, /* 160 */ + 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc /* 168 */, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, /* 176 */ + 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28 /* 184 */, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, /* 192 */ +}) + +const ( + key64_000 u64 = 0xbe4ba423396cfeb8 + key64_008 u64 = 0x1cad21f72c81017c + key64_016 u64 = 0xdb979083e96dd4de + key64_024 u64 = 0x1f67b3b7a4a44072 + key64_032 u64 = 0x78e5c0cc4ee679cb + key64_040 u64 = 0x2172ffcc7dd05a82 + key64_048 u64 = 0x8e2443f7744608b8 + key64_056 u64 = 0x4c263a81e69035e0 + key64_064 u64 = 0xcb00c391bb52283c + key64_072 u64 = 0xa32e531b8b65d088 + key64_080 u64 = 0x4ef90da297486471 + key64_088 u64 = 0xd8acdea946ef1938 + key64_096 u64 = 0x3f349ce33f76faa8 + key64_104 u64 = 0x1d4f0bc7c7bbdcf9 + key64_112 u64 = 0x3159b4cd4be0518a + key64_120 u64 = 0x647378d9c97e9fc8 + key64_128 u64 = 0xc3ebd33483acc5ea + key64_136 u64 = 0xeb6313faffa081c5 + key64_144 u64 = 0x49daf0b751dd0d17 + key64_152 u64 = 0x9e68d429265516d3 + key64_160 u64 = 0xfca1477d58be162b + key64_168 u64 = 0xce31d07ad1b8f88f + key64_176 u64 = 0x280416958f3acb45 + key64_184 u64 = 0x7e404bbbcafbd7af + + key64_103 u64 = 0x4f0bc7c7bbdcf93f + key64_111 u64 = 0x59b4cd4be0518a1d + key64_119 u64 = 0x7378d9c97e9fc831 + key64_127 u64 = 0xebd33483acc5ea64 + + key64_121 u64 = 0xea647378d9c97e9f + key64_129 u64 = 0xc5c3ebd33483acc5 + key64_137 u64 = 0x17eb6313faffa081 + key64_145 u64 = 0xd349daf0b751dd0d + key64_153 u64 = 0x2b9e68d429265516 + key64_161 u64 = 0x8ffca1477d58be16 + key64_169 u64 = 0x45ce31d07ad1b8f8 + key64_177 u64 = 0xaf280416958f3acb + + key64_011 = 0x6dd4de1cad21f72c + key64_019 = 0xa44072db979083e9 + key64_027 = 0xe679cb1f67b3b7a4 + key64_035 = 0xd05a8278e5c0cc4e + key64_043 = 0x4608b82172ffcc7d + key64_051 = 0x9035e08e2443f774 + key64_059 = 0x52283c4c263a81e6 + key64_067 = 0x65d088cb00c391bb + + key64_117 = 0xd9c97e9fc83159b4 + key64_125 = 0x3483acc5ea647378 + key64_133 = 0xfaffa081c5c3ebd3 + key64_141 = 0xb751dd0d17eb6313 + key64_149 = 0x29265516d349daf0 + key64_157 = 0x7d58be162b9e68d4 + key64_165 = 0x7ad1b8f88ffca147 + key64_173 = 0x958f3acb45ce31d0 +) + +const ( + key32_000 u32 = 0xbe4ba423 + key32_004 u32 = 0x396cfeb8 + key32_008 u32 = 0x1cad21f7 + key32_012 u32 = 0x2c81017c +) diff --git a/vendor/github.com/zeebo/xxh3/hash128.go b/vendor/github.com/zeebo/xxh3/hash128.go new file mode 100644 index 000000000..0040a21bb --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/hash128.go @@ -0,0 +1,253 @@ +package xxh3 + +import ( + "math/bits" +) + +// Hash128 returns the 128-bit hash of the byte slice. +func Hash128(b []byte) Uint128 { + return hashAny128(*(*str)(ptr(&b))) +} + +// HashString128 returns the 128-bit hash of the string slice. +func HashString128(s string) Uint128 { + return hashAny128(*(*str)(ptr(&s))) +} + +func hashAny128(s str) (acc u128) { + p, l := s.p, s.l + + switch { + case l <= 16: + switch { + case l > 8: // 9-16 + const bitflipl = key64_032 ^ key64_040 + const bitfliph = key64_048 ^ key64_056 + + input_lo := readU64(p, 0) + input_hi := readU64(p, ui(l)-8) + + m128_h, m128_l := bits.Mul64(input_lo^input_hi^bitflipl, prime64_1) + + m128_l += uint64(l-1) << 54 + input_hi ^= bitfliph + + m128_h += input_hi + uint64(uint32(input_hi))*(prime32_2-1) + + m128_l ^= bits.ReverseBytes64(m128_h) + + acc.Hi, acc.Lo = bits.Mul64(m128_l, prime64_2) + acc.Hi += m128_h * prime64_2 + + acc.Lo = xxh3Avalanche(acc.Lo) + acc.Hi = xxh3Avalanche(acc.Hi) + + return acc + + case l > 3: // 4-8 + const bitflip = key64_016 ^ key64_024 + + input_lo := readU32(p, 0) + input_hi := readU32(p, ui(l)-4) + input_64 := u64(input_lo) + u64(input_hi)<<32 + keyed := input_64 ^ bitflip + + acc.Hi, acc.Lo = bits.Mul64(keyed, prime64_1+(uint64(l)<<2)) + + acc.Hi += acc.Lo << 1 + acc.Lo ^= acc.Hi >> 3 + + acc.Lo ^= acc.Lo >> 35 + acc.Lo *= 0x9fb21c651e98df25 + acc.Lo ^= acc.Lo >> 28 + acc.Hi = xxh3Avalanche(acc.Hi) + + return acc + + case l == 3: // 3 + c12 := u64(readU16(p, 0)) + c3 := u64(readU8(p, 2)) + acc.Lo = c12<<16 + c3 + 3<<8 + + case l > 1: // 2 + c12 := u64(readU16(p, 0)) + acc.Lo = c12*(1<<24+1)>>8 + 2<<8 + + case l == 1: // 1 + c1 := u64(readU8(p, 0)) + acc.Lo = c1*(1<<24+1<<16+1) + 1<<8 + + default: // 0 + return u128{0x99aa06d3014798d8, 0x6001c324468d497f} + } + + acc.Hi = uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(acc.Lo)), 13)) + acc.Lo ^= uint64(key32_000 ^ key32_004) + acc.Hi ^= uint64(key32_008 ^ key32_012) + + acc.Lo = xxh64AvalancheSmall(acc.Lo) + acc.Hi = xxh64AvalancheSmall(acc.Hi) + + return acc + + case l <= 128: + acc.Lo = u64(l) * prime64_1 + + if l > 32 { + if l > 64 { + if l > 96 { + in8, in7 := readU64(p, ui(l)-8*8), readU64(p, ui(l)-7*8) + i6, i7 := readU64(p, 6*8), readU64(p, 7*8) + + acc.Hi += mulFold64(in8^key64_112, in7^key64_120) + acc.Hi ^= i6 + i7 + acc.Lo += mulFold64(i6^key64_096, i7^key64_104) + acc.Lo ^= in8 + in7 + + } // 96 + + in6, in5 := readU64(p, ui(l)-6*8), readU64(p, ui(l)-5*8) + i4, i5 := readU64(p, 4*8), readU64(p, 5*8) + + acc.Hi += mulFold64(in6^key64_080, in5^key64_088) + acc.Hi ^= i4 + i5 + acc.Lo += mulFold64(i4^key64_064, i5^key64_072) + acc.Lo ^= in6 + in5 + + } // 64 + + in4, in3 := readU64(p, ui(l)-4*8), readU64(p, ui(l)-3*8) + i2, i3 := readU64(p, 2*8), readU64(p, 3*8) + + acc.Hi += mulFold64(in4^key64_048, in3^key64_056) + acc.Hi ^= i2 + i3 + acc.Lo += mulFold64(i2^key64_032, i3^key64_040) + acc.Lo ^= in4 + in3 + + } // 32 + + in2, in1 := readU64(p, ui(l)-2*8), readU64(p, ui(l)-1*8) + i0, i1 := readU64(p, 0*8), readU64(p, 1*8) + + acc.Hi += mulFold64(in2^key64_016, in1^key64_024) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^key64_000, i1^key64_008) + acc.Lo ^= in2 + in1 + + acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+(u64(l)*prime64_2), acc.Hi+acc.Lo + + acc.Hi = -xxh3Avalanche(acc.Hi) + acc.Lo = xxh3Avalanche(acc.Lo) + + return acc + + case l <= 240: + acc.Lo = u64(l) * prime64_1 + + { + i0, i1, i2, i3 := readU64(p, 0*8), readU64(p, 1*8), readU64(p, 2*8), readU64(p, 3*8) + + acc.Hi += mulFold64(i2^key64_016, i3^key64_024) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^key64_000, i1^key64_008) + acc.Lo ^= i2 + i3 + } + + { + i0, i1, i2, i3 := readU64(p, 4*8), readU64(p, 5*8), readU64(p, 6*8), readU64(p, 7*8) + + acc.Hi += mulFold64(i2^key64_048, i3^key64_056) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^key64_032, i1^key64_040) + acc.Lo ^= i2 + i3 + } + + { + i0, i1, i2, i3 := readU64(p, 8*8), readU64(p, 9*8), readU64(p, 10*8), readU64(p, 11*8) + + acc.Hi += mulFold64(i2^key64_080, i3^key64_088) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^key64_064, i1^key64_072) + acc.Lo ^= i2 + i3 + } + + { + i0, i1, i2, i3 := readU64(p, 12*8), readU64(p, 13*8), readU64(p, 14*8), readU64(p, 15*8) + + acc.Hi += mulFold64(i2^key64_112, i3^key64_120) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^key64_096, i1^key64_104) + acc.Lo ^= i2 + i3 + } + + // avalanche + acc.Hi = xxh3Avalanche(acc.Hi) + acc.Lo = xxh3Avalanche(acc.Lo) + + // trailing groups after 128 + top := ui(l) &^ 31 + for i := ui(4 * 32); i < top; i += 32 { + i0, i1, i2, i3 := readU64(p, i+0), readU64(p, i+8), readU64(p, i+16), readU64(p, i+24) + k0, k1, k2, k3 := readU64(key, i-125), readU64(key, i-117), readU64(key, i-109), readU64(key, i-101) + + acc.Hi += mulFold64(i2^k2, i3^k3) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^k0, i1^k1) + acc.Lo ^= i2 + i3 + } + + // last 32 bytes + { + i0, i1, i2, i3 := readU64(p, ui(l)-32), readU64(p, ui(l)-24), readU64(p, ui(l)-16), readU64(p, ui(l)-8) + + acc.Hi += mulFold64(i0^key64_119, i1^key64_127) + acc.Hi ^= i2 + i3 + acc.Lo += mulFold64(i2^key64_103, i3^key64_111) + acc.Lo ^= i0 + i1 + } + + acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+(u64(l)*prime64_2), acc.Hi+acc.Lo + + acc.Hi = -xxh3Avalanche(acc.Hi) + acc.Lo = xxh3Avalanche(acc.Lo) + + return acc + + default: + acc.Lo = u64(l) * prime64_1 + acc.Hi = ^(u64(l) * prime64_2) + + accs := [8]u64{ + prime32_3, prime64_1, prime64_2, prime64_3, + prime64_4, prime32_2, prime64_5, prime32_1, + } + + if hasAVX512 && l >= avx512Switch { + accumAVX512(&accs, p, key, u64(l)) + } else if hasAVX2 { + accumAVX2(&accs, p, key, u64(l)) + } else if hasSSE2 { + accumSSE(&accs, p, key, u64(l)) + } else { + accumScalar(&accs, p, key, u64(l)) + } + + // merge accs + acc.Lo += mulFold64(accs[0]^key64_011, accs[1]^key64_019) + acc.Hi += mulFold64(accs[0]^key64_117, accs[1]^key64_125) + + acc.Lo += mulFold64(accs[2]^key64_027, accs[3]^key64_035) + acc.Hi += mulFold64(accs[2]^key64_133, accs[3]^key64_141) + + acc.Lo += mulFold64(accs[4]^key64_043, accs[5]^key64_051) + acc.Hi += mulFold64(accs[4]^key64_149, accs[5]^key64_157) + + acc.Lo += mulFold64(accs[6]^key64_059, accs[7]^key64_067) + acc.Hi += mulFold64(accs[6]^key64_165, accs[7]^key64_173) + + acc.Lo = xxh3Avalanche(acc.Lo) + acc.Hi = xxh3Avalanche(acc.Hi) + + return acc + } +} diff --git a/vendor/github.com/zeebo/xxh3/hash128_seed.go b/vendor/github.com/zeebo/xxh3/hash128_seed.go new file mode 100644 index 000000000..358009be3 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/hash128_seed.go @@ -0,0 +1,264 @@ +package xxh3 + +import ( + "math/bits" +) + +// Hash128Seed returns the 128-bit hash of the byte slice. +func Hash128Seed(b []byte, seed uint64) Uint128 { + return hashAny128Seed(*(*str)(ptr(&b)), seed) +} + +// HashString128Seed returns the 128-bit hash of the string slice. +func HashString128Seed(s string, seed uint64) Uint128 { + return hashAny128Seed(*(*str)(ptr(&s)), seed) +} + +func hashAny128Seed(s str, seed uint64) (acc u128) { + p, l := s.p, s.l + + switch { + case l <= 16: + switch { + case l > 8: // 9-16 + bitflipl := (key64_032 ^ key64_040) - seed + bitfliph := (key64_048 ^ key64_056) + seed + + input_lo := readU64(p, 0) + input_hi := readU64(p, ui(l)-8) + + m128_h, m128_l := bits.Mul64(input_lo^input_hi^bitflipl, prime64_1) + + m128_l += uint64(l-1) << 54 + input_hi ^= bitfliph + + m128_h += input_hi + uint64(uint32(input_hi))*(prime32_2-1) + + m128_l ^= bits.ReverseBytes64(m128_h) + + acc.Hi, acc.Lo = bits.Mul64(m128_l, prime64_2) + acc.Hi += m128_h * prime64_2 + + acc.Lo = xxh3Avalanche(acc.Lo) + acc.Hi = xxh3Avalanche(acc.Hi) + + return acc + + case l > 3: // 4-8 + seed ^= u64(bits.ReverseBytes32(u32(seed))) << 32 + bitflip := (key64_016 ^ key64_024) + seed + input_lo := readU32(p, 0) + input_hi := readU32(p, ui(l)-4) + input_64 := u64(input_lo) + u64(input_hi)<<32 + keyed := input_64 ^ bitflip + + acc.Hi, acc.Lo = bits.Mul64(keyed, prime64_1+(uint64(l)<<2)) + + acc.Hi += acc.Lo << 1 + acc.Lo ^= acc.Hi >> 3 + + acc.Lo ^= acc.Lo >> 35 + acc.Lo *= 0x9fb21c651e98df25 + acc.Lo ^= acc.Lo >> 28 + acc.Hi = xxh3Avalanche(acc.Hi) + + return acc + + case l == 3: // 3 + c12 := u64(readU16(p, 0)) + c3 := u64(readU8(p, 2)) + acc.Lo = c12<<16 + c3 + 3<<8 + + case l > 1: // 2 + c12 := u64(readU16(p, 0)) + acc.Lo = c12*(1<<24+1)>>8 + 2<<8 + + case l == 1: // 1 + c1 := u64(readU8(p, 0)) + acc.Lo = c1*(1<<24+1<<16+1) + 1<<8 + + default: // 0 + bitflipl := key64_064 ^ key64_072 ^ seed + bitfliph := key64_080 ^ key64_088 ^ seed + return u128{Lo: xxh64AvalancheFull(bitflipl), Hi: xxh64AvalancheFull(bitfliph)} + } + + acc.Hi = uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(acc.Lo)), 13)) + acc.Lo ^= uint64(key32_000^key32_004) + seed + acc.Hi ^= uint64(key32_008^key32_012) - seed + + acc.Lo = xxh64AvalancheFull(acc.Lo) + acc.Hi = xxh64AvalancheFull(acc.Hi) + + return acc + + case l <= 128: + acc.Lo = u64(l) * prime64_1 + + if l > 32 { + if l > 64 { + if l > 96 { + in8, in7 := readU64(p, ui(l)-8*8), readU64(p, ui(l)-7*8) + i6, i7 := readU64(p, 6*8), readU64(p, 7*8) + + acc.Hi += mulFold64(in8^(key64_112+seed), in7^(key64_120-seed)) + acc.Hi ^= i6 + i7 + acc.Lo += mulFold64(i6^(key64_096+seed), i7^(key64_104-seed)) + acc.Lo ^= in8 + in7 + + } // 96 + + in6, in5 := readU64(p, ui(l)-6*8), readU64(p, ui(l)-5*8) + i4, i5 := readU64(p, 4*8), readU64(p, 5*8) + + acc.Hi += mulFold64(in6^(key64_080+seed), in5^(key64_088-seed)) + acc.Hi ^= i4 + i5 + acc.Lo += mulFold64(i4^(key64_064+seed), i5^(key64_072-seed)) + acc.Lo ^= in6 + in5 + + } // 64 + + in4, in3 := readU64(p, ui(l)-4*8), readU64(p, ui(l)-3*8) + i2, i3 := readU64(p, 2*8), readU64(p, 3*8) + + acc.Hi += mulFold64(in4^(key64_048+seed), in3^(key64_056-seed)) + acc.Hi ^= i2 + i3 + acc.Lo += mulFold64(i2^(key64_032+seed), i3^(key64_040-seed)) + acc.Lo ^= in4 + in3 + + } // 32 + + in2, in1 := readU64(p, ui(l)-2*8), readU64(p, ui(l)-1*8) + i0, i1 := readU64(p, 0*8), readU64(p, 1*8) + + acc.Hi += mulFold64(in2^(key64_016+seed), in1^(key64_024-seed)) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^(key64_000+seed), i1^(key64_008-seed)) + acc.Lo ^= in2 + in1 + + acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+((u64(l)-seed)*prime64_2), acc.Hi+acc.Lo + + acc.Hi = -xxh3Avalanche(acc.Hi) + acc.Lo = xxh3Avalanche(acc.Lo) + + return acc + + case l <= 240: + acc.Lo = u64(l) * prime64_1 + + { + i0, i1, i2, i3 := readU64(p, 0*8), readU64(p, 1*8), readU64(p, 2*8), readU64(p, 3*8) + + acc.Hi += mulFold64(i2^(key64_016+seed), i3^(key64_024-seed)) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^(key64_000+seed), i1^(key64_008-seed)) + acc.Lo ^= i2 + i3 + } + + { + i0, i1, i2, i3 := readU64(p, 4*8), readU64(p, 5*8), readU64(p, 6*8), readU64(p, 7*8) + + acc.Hi += mulFold64(i2^(key64_048+seed), i3^(key64_056-seed)) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^(key64_032+seed), i1^(key64_040-seed)) + acc.Lo ^= i2 + i3 + } + + { + i0, i1, i2, i3 := readU64(p, 8*8), readU64(p, 9*8), readU64(p, 10*8), readU64(p, 11*8) + + acc.Hi += mulFold64(i2^(key64_080+seed), i3^(key64_088-seed)) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^(key64_064+seed), i1^(key64_072-seed)) + acc.Lo ^= i2 + i3 + } + + { + i0, i1, i2, i3 := readU64(p, 12*8), readU64(p, 13*8), readU64(p, 14*8), readU64(p, 15*8) + + acc.Hi += mulFold64(i2^(key64_112+seed), i3^(key64_120-seed)) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^(key64_096+seed), i1^(key64_104-seed)) + acc.Lo ^= i2 + i3 + } + + // avalanche + acc.Hi = xxh3Avalanche(acc.Hi) + acc.Lo = xxh3Avalanche(acc.Lo) + + // trailing groups after 128 + top := ui(l) &^ 31 + for i := ui(4 * 32); i < top; i += 32 { + i0, i1, i2, i3 := readU64(p, i+0), readU64(p, i+8), readU64(p, i+16), readU64(p, i+24) + k0, k1, k2, k3 := readU64(key, i-125)+seed, readU64(key, i-117)-seed, readU64(key, i-109)+seed, readU64(key, i-101)-seed + + acc.Hi += mulFold64(i2^k2, i3^k3) + acc.Hi ^= i0 + i1 + acc.Lo += mulFold64(i0^k0, i1^k1) + acc.Lo ^= i2 + i3 + } + + // last 32 bytes + { + i0, i1, i2, i3 := readU64(p, ui(l)-32), readU64(p, ui(l)-24), readU64(p, ui(l)-16), readU64(p, ui(l)-8) + + seed := 0 - seed + acc.Hi += mulFold64(i0^(key64_119+seed), i1^(key64_127-seed)) + acc.Hi ^= i2 + i3 + acc.Lo += mulFold64(i2^(key64_103+seed), i3^(key64_111-seed)) + acc.Lo ^= i0 + i1 + } + + acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+((u64(l)-seed)*prime64_2), acc.Hi+acc.Lo + + acc.Hi = -xxh3Avalanche(acc.Hi) + acc.Lo = xxh3Avalanche(acc.Lo) + + return acc + + default: + acc.Lo = u64(l) * prime64_1 + acc.Hi = ^(u64(l) * prime64_2) + + secret := key + if seed != 0 { + secret = ptr(&[secretSize]byte{}) + initSecret(secret, seed) + } + + accs := [8]u64{ + prime32_3, prime64_1, prime64_2, prime64_3, + prime64_4, prime32_2, prime64_5, prime32_1, + } + + if hasAVX512 && l >= avx512Switch { + accumAVX512(&accs, p, secret, u64(l)) + } else if hasAVX2 { + accumAVX2(&accs, p, secret, u64(l)) + } else if hasSSE2 { + accumSSE(&accs, p, secret, u64(l)) + } else { + accumScalar(&accs, p, secret, u64(l)) + } + + // merge accs + const hi_off = 117 - 11 + + acc.Lo += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) + acc.Hi += mulFold64(accs[0]^readU64(secret, 11+hi_off), accs[1]^readU64(secret, 19+hi_off)) + + acc.Lo += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) + acc.Hi += mulFold64(accs[2]^readU64(secret, 27+hi_off), accs[3]^readU64(secret, 35+hi_off)) + + acc.Lo += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) + acc.Hi += mulFold64(accs[4]^readU64(secret, 43+hi_off), accs[5]^readU64(secret, 51+hi_off)) + + acc.Lo += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) + acc.Hi += mulFold64(accs[6]^readU64(secret, 59+hi_off), accs[7]^readU64(secret, 67+hi_off)) + + acc.Lo = xxh3Avalanche(acc.Lo) + acc.Hi = xxh3Avalanche(acc.Hi) + + return acc + } +} diff --git a/vendor/github.com/zeebo/xxh3/hash64.go b/vendor/github.com/zeebo/xxh3/hash64.go new file mode 100644 index 000000000..13aab9585 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/hash64.go @@ -0,0 +1,126 @@ +package xxh3 + +import "math/bits" + +// Hash returns the hash of the byte slice. +func Hash(b []byte) uint64 { + return hashAny(*(*str)(ptr(&b))) +} + +// Hash returns the hash of the string slice. +func HashString(s string) uint64 { + return hashAny(*(*str)(ptr(&s))) +} + +func hashAny(s str) (acc u64) { + p, l := s.p, s.l + + switch { + case l <= 16: + switch { + case l > 8: // 9-16 + inputlo := readU64(p, 0) ^ (key64_024 ^ key64_032) + inputhi := readU64(p, ui(l)-8) ^ (key64_040 ^ key64_048) + folded := mulFold64(inputlo, inputhi) + return xxh3Avalanche(u64(l) + bits.ReverseBytes64(inputlo) + inputhi + folded) + + case l > 3: // 4-8 + input1 := readU32(p, 0) + input2 := readU32(p, ui(l)-4) + input64 := u64(input2) + u64(input1)<<32 + keyed := input64 ^ (key64_008 ^ key64_016) + return rrmxmx(keyed, u64(l)) + + case l == 3: // 3 + c12 := u64(readU16(p, 0)) + c3 := u64(readU8(p, 2)) + acc = c12<<16 + c3 + 3<<8 + + case l > 1: // 2 + c12 := u64(readU16(p, 0)) + acc = c12*(1<<24+1)>>8 + 2<<8 + + case l == 1: // 1 + c1 := u64(readU8(p, 0)) + acc = c1*(1<<24+1<<16+1) + 1<<8 + + default: // 0 + return 0x2d06800538d394c2 // xxh_avalanche(key64_056 ^ key64_064) + } + + acc ^= u64(key32_000 ^ key32_004) + return xxhAvalancheSmall(acc) + + case l <= 128: + acc = u64(l) * prime64_1 + + if l > 32 { + if l > 64 { + if l > 96 { + acc += mulFold64(readU64(p, 6*8)^key64_096, readU64(p, 7*8)^key64_104) + acc += mulFold64(readU64(p, ui(l)-8*8)^key64_112, readU64(p, ui(l)-7*8)^key64_120) + } // 96 + acc += mulFold64(readU64(p, 4*8)^key64_064, readU64(p, 5*8)^key64_072) + acc += mulFold64(readU64(p, ui(l)-6*8)^key64_080, readU64(p, ui(l)-5*8)^key64_088) + } // 64 + acc += mulFold64(readU64(p, 2*8)^key64_032, readU64(p, 3*8)^key64_040) + acc += mulFold64(readU64(p, ui(l)-4*8)^key64_048, readU64(p, ui(l)-3*8)^key64_056) + } // 32 + acc += mulFold64(readU64(p, 0*8)^key64_000, readU64(p, 1*8)^key64_008) + acc += mulFold64(readU64(p, ui(l)-2*8)^key64_016, readU64(p, ui(l)-1*8)^key64_024) + + return xxh3Avalanche(acc) + + case l <= 240: + acc = u64(l) * prime64_1 + + acc += mulFold64(readU64(p, 0*16+0)^key64_000, readU64(p, 0*16+8)^key64_008) + acc += mulFold64(readU64(p, 1*16+0)^key64_016, readU64(p, 1*16+8)^key64_024) + acc += mulFold64(readU64(p, 2*16+0)^key64_032, readU64(p, 2*16+8)^key64_040) + acc += mulFold64(readU64(p, 3*16+0)^key64_048, readU64(p, 3*16+8)^key64_056) + acc += mulFold64(readU64(p, 4*16+0)^key64_064, readU64(p, 4*16+8)^key64_072) + acc += mulFold64(readU64(p, 5*16+0)^key64_080, readU64(p, 5*16+8)^key64_088) + acc += mulFold64(readU64(p, 6*16+0)^key64_096, readU64(p, 6*16+8)^key64_104) + acc += mulFold64(readU64(p, 7*16+0)^key64_112, readU64(p, 7*16+8)^key64_120) + + // avalanche + acc = xxh3Avalanche(acc) + + // trailing groups after 128 + top := ui(l) &^ 15 + for i := ui(8 * 16); i < top; i += 16 { + acc += mulFold64(readU64(p, i+0)^readU64(key, i-125), readU64(p, i+8)^readU64(key, i-117)) + } + + // last 16 bytes + acc += mulFold64(readU64(p, ui(l)-16)^key64_119, readU64(p, ui(l)-8)^key64_127) + + return xxh3Avalanche(acc) + + default: + acc = u64(l) * prime64_1 + + accs := [8]u64{ + prime32_3, prime64_1, prime64_2, prime64_3, + prime64_4, prime32_2, prime64_5, prime32_1, + } + + if hasAVX512 && l >= avx512Switch { + accumAVX512(&accs, p, key, u64(l)) + } else if hasAVX2 { + accumAVX2(&accs, p, key, u64(l)) + } else if hasSSE2 { + accumSSE(&accs, p, key, u64(l)) + } else { + accumScalar(&accs, p, key, u64(l)) + } + + // merge accs + acc += mulFold64(accs[0]^key64_011, accs[1]^key64_019) + acc += mulFold64(accs[2]^key64_027, accs[3]^key64_035) + acc += mulFold64(accs[4]^key64_043, accs[5]^key64_051) + acc += mulFold64(accs[6]^key64_059, accs[7]^key64_067) + + return xxh3Avalanche(acc) + } +} diff --git a/vendor/github.com/zeebo/xxh3/hash64_seed.go b/vendor/github.com/zeebo/xxh3/hash64_seed.go new file mode 100644 index 000000000..429994c36 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/hash64_seed.go @@ -0,0 +1,134 @@ +package xxh3 + +import "math/bits" + +// HashSeed returns the hash of the byte slice with given seed. +func HashSeed(b []byte, seed uint64) uint64 { + return hashAnySeed(*(*str)(ptr(&b)), seed) + +} + +// HashStringSeed returns the hash of the string slice with given seed. +func HashStringSeed(s string, seed uint64) uint64 { + return hashAnySeed(*(*str)(ptr(&s)), seed) +} + +func hashAnySeed(s str, seed uint64) (acc u64) { + p, l := s.p, s.l + + switch { + case l <= 16: + switch { + case l > 8: + inputlo := readU64(p, 0) ^ (key64_024 ^ key64_032 + seed) + inputhi := readU64(p, ui(l)-8) ^ (key64_040 ^ key64_048 - seed) + folded := mulFold64(inputlo, inputhi) + return xxh3Avalanche(u64(l) + bits.ReverseBytes64(inputlo) + inputhi + folded) + + case l > 3: + seed ^= u64(bits.ReverseBytes32(u32(seed))) << 32 + input1 := readU32(p, 0) + input2 := readU32(p, ui(l)-4) + input64 := u64(input2) + u64(input1)<<32 + keyed := input64 ^ (key64_008 ^ key64_016 - seed) + return rrmxmx(keyed, u64(l)) + + case l == 3: // 3 + c12 := u64(readU16(p, 0)) + c3 := u64(readU8(p, 2)) + acc = c12<<16 + c3 + 3<<8 + + case l > 1: // 2 + c12 := u64(readU16(p, 0)) + acc = c12*(1<<24+1)>>8 + 2<<8 + + case l == 1: // 1 + c1 := u64(readU8(p, 0)) + acc = c1*(1<<24+1<<16+1) + 1<<8 + + default: + return xxhAvalancheSmall(seed ^ key64_056 ^ key64_064) + } + + acc ^= u64(key32_000^key32_004) + seed + return xxhAvalancheSmall(acc) + + case l <= 128: + acc = u64(l) * prime64_1 + + if l > 32 { + if l > 64 { + if l > 96 { + acc += mulFold64(readU64(p, 6*8)^(key64_096+seed), readU64(p, 7*8)^(key64_104-seed)) + acc += mulFold64(readU64(p, ui(l)-8*8)^(key64_112+seed), readU64(p, ui(l)-7*8)^(key64_120-seed)) + } // 96 + acc += mulFold64(readU64(p, 4*8)^(key64_064+seed), readU64(p, 5*8)^(key64_072-seed)) + acc += mulFold64(readU64(p, ui(l)-6*8)^(key64_080+seed), readU64(p, ui(l)-5*8)^(key64_088-seed)) + } // 64 + acc += mulFold64(readU64(p, 2*8)^(key64_032+seed), readU64(p, 3*8)^(key64_040-seed)) + acc += mulFold64(readU64(p, ui(l)-4*8)^(key64_048+seed), readU64(p, ui(l)-3*8)^(key64_056-seed)) + } // 32 + acc += mulFold64(readU64(p, 0*8)^(key64_000+seed), readU64(p, 1*8)^(key64_008-seed)) + acc += mulFold64(readU64(p, ui(l)-2*8)^(key64_016+seed), readU64(p, ui(l)-1*8)^(key64_024-seed)) + + return xxh3Avalanche(acc) + + case l <= 240: + acc = u64(l) * prime64_1 + + acc += mulFold64(readU64(p, 0*16+0)^(key64_000+seed), readU64(p, 0*16+8)^(key64_008-seed)) + acc += mulFold64(readU64(p, 1*16+0)^(key64_016+seed), readU64(p, 1*16+8)^(key64_024-seed)) + acc += mulFold64(readU64(p, 2*16+0)^(key64_032+seed), readU64(p, 2*16+8)^(key64_040-seed)) + acc += mulFold64(readU64(p, 3*16+0)^(key64_048+seed), readU64(p, 3*16+8)^(key64_056-seed)) + acc += mulFold64(readU64(p, 4*16+0)^(key64_064+seed), readU64(p, 4*16+8)^(key64_072-seed)) + acc += mulFold64(readU64(p, 5*16+0)^(key64_080+seed), readU64(p, 5*16+8)^(key64_088-seed)) + acc += mulFold64(readU64(p, 6*16+0)^(key64_096+seed), readU64(p, 6*16+8)^(key64_104-seed)) + acc += mulFold64(readU64(p, 7*16+0)^(key64_112+seed), readU64(p, 7*16+8)^(key64_120-seed)) + + // avalanche + acc = xxh3Avalanche(acc) + + // trailing groups after 128 + top := ui(l) &^ 15 + for i := ui(8 * 16); i < top; i += 16 { + acc += mulFold64(readU64(p, i+0)^(readU64(key, i-125)+seed), readU64(p, i+8)^(readU64(key, i-117)-seed)) + } + + // last 16 bytes + acc += mulFold64(readU64(p, ui(l)-16)^(key64_119+seed), readU64(p, ui(l)-8)^(key64_127-seed)) + + return xxh3Avalanche(acc) + + default: + acc = u64(l) * prime64_1 + + secret := key + if seed != 0 { + secret = ptr(&[secretSize]byte{}) + initSecret(secret, seed) + } + + accs := [8]u64{ + prime32_3, prime64_1, prime64_2, prime64_3, + prime64_4, prime32_2, prime64_5, prime32_1, + } + + if hasAVX512 && l >= avx512Switch { + accumAVX512(&accs, p, secret, u64(l)) + } else if hasAVX2 { + accumAVX2(&accs, p, secret, u64(l)) + } else if hasSSE2 { + accumSSE(&accs, p, secret, u64(l)) + } else { + accumScalarSeed(&accs, p, secret, u64(l)) + } + + // merge accs + acc += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) + acc += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) + acc += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) + acc += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) + + return xxh3Avalanche(acc) + } +} diff --git a/vendor/github.com/zeebo/xxh3/hasher.go b/vendor/github.com/zeebo/xxh3/hasher.go new file mode 100644 index 000000000..d9789980a --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/hasher.go @@ -0,0 +1,239 @@ +package xxh3 + +import ( + "encoding/binary" + "hash" +) + +// Hasher implements the hash.Hash interface +type Hasher struct { + acc [8]u64 + blk u64 + len u64 + key ptr + buf [_block + _stripe]byte + seed u64 +} + +var ( + _ hash.Hash = (*Hasher)(nil) + _ hash.Hash64 = (*Hasher)(nil) +) + +// New returns a new Hasher that implements the hash.Hash interface. +func New() *Hasher { + return new(Hasher) +} + +// NewSeed returns a new Hasher that implements the hash.Hash interface. +func NewSeed(seed uint64) *Hasher { + var h Hasher + h.Reset() + h.seed = seed + h.key = key + + // Only initiate once, not on reset. + if seed != 0 { + h.key = ptr(&[secretSize]byte{}) + initSecret(h.key, seed) + } + return &h +} + +// Reset resets the Hash to its initial state. +func (h *Hasher) Reset() { + h.acc = [8]u64{ + prime32_3, prime64_1, prime64_2, prime64_3, + prime64_4, prime32_2, prime64_5, prime32_1, + } + h.blk = 0 + h.len = 0 +} + +// BlockSize returns the hash's underlying block size. +// The Write method will accept any amount of data, but +// it may operate more efficiently if all writes are a +// multiple of the block size. +func (h *Hasher) BlockSize() int { return _stripe } + +// Size returns the number of bytes Sum will return. +func (h *Hasher) Size() int { return 8 } + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (h *Hasher) Sum(b []byte) []byte { + var tmp [8]byte + binary.BigEndian.PutUint64(tmp[:], h.Sum64()) + return append(b, tmp[:]...) +} + +// Write adds more data to the running hash. +// It never returns an error. +func (h *Hasher) Write(buf []byte) (int, error) { + h.update(buf) + return len(buf), nil +} + +// WriteString adds more data to the running hash. +// It never returns an error. +func (h *Hasher) WriteString(buf string) (int, error) { + h.updateString(buf) + return len(buf), nil +} + +func (h *Hasher) update(buf []byte) { + // relies on the data pointer being the first word in the string header + h.updateString(*(*string)(ptr(&buf))) +} + +func (h *Hasher) updateString(buf string) { + if h.key == nil { + h.key = key + h.Reset() + } + + // On first write, if more than 1 block, process without copy. + for h.len == 0 && len(buf) > len(h.buf) { + if hasAVX2 { + accumBlockAVX2(&h.acc, *(*ptr)(ptr(&buf)), h.key) + } else if hasSSE2 { + accumBlockSSE(&h.acc, *(*ptr)(ptr(&buf)), h.key) + } else { + accumBlockScalar(&h.acc, *(*ptr)(ptr(&buf)), h.key) + } + buf = buf[_block:] + h.blk++ + } + + for len(buf) > 0 { + if h.len < u64(len(h.buf)) { + n := copy(h.buf[h.len:], buf) + h.len += u64(n) + buf = buf[n:] + continue + } + + if hasAVX2 { + accumBlockAVX2(&h.acc, ptr(&h.buf), h.key) + } else if hasSSE2 { + accumBlockSSE(&h.acc, ptr(&h.buf), h.key) + } else { + accumBlockScalar(&h.acc, ptr(&h.buf), h.key) + } + + h.blk++ + h.len = _stripe + copy(h.buf[:_stripe], h.buf[_block:]) + } +} + +// Sum64 returns the 64-bit hash of the written data. +func (h *Hasher) Sum64() uint64 { + if h.key == nil { + h.key = key + h.Reset() + } + + if h.blk == 0 { + if h.seed == 0 { + return Hash(h.buf[:h.len]) + } + return HashSeed(h.buf[:h.len], h.seed) + } + + l := h.blk*_block + h.len + acc := l * prime64_1 + accs := h.acc + + if h.len > 0 { + // We are only ever doing 1 block here, so no avx512. + if hasAVX2 { + accumAVX2(&accs, ptr(&h.buf[0]), h.key, h.len) + } else if hasSSE2 { + accumSSE(&accs, ptr(&h.buf[0]), h.key, h.len) + } else { + accumScalar(&accs, ptr(&h.buf[0]), h.key, h.len) + } + } + + if h.seed == 0 { + acc += mulFold64(accs[0]^key64_011, accs[1]^key64_019) + acc += mulFold64(accs[2]^key64_027, accs[3]^key64_035) + acc += mulFold64(accs[4]^key64_043, accs[5]^key64_051) + acc += mulFold64(accs[6]^key64_059, accs[7]^key64_067) + } else { + secret := h.key + acc += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) + acc += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) + acc += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) + acc += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) + } + + acc = xxh3Avalanche(acc) + + return acc +} + +// Sum128 returns the 128-bit hash of the written data. +func (h *Hasher) Sum128() Uint128 { + if h.key == nil { + h.key = key + h.Reset() + } + + if h.blk == 0 { + if h.seed == 0 { + return Hash128(h.buf[:h.len]) + } + return Hash128Seed(h.buf[:h.len], h.seed) + } + + l := h.blk*_block + h.len + acc := Uint128{Lo: l * prime64_1, Hi: ^(l * prime64_2)} + accs := h.acc + + if h.len > 0 { + // We are only ever doing 1 block here, so no avx512. + if hasAVX2 { + accumAVX2(&accs, ptr(&h.buf[0]), h.key, h.len) + } else if hasSSE2 { + accumSSE(&accs, ptr(&h.buf[0]), h.key, h.len) + } else { + accumScalar(&accs, ptr(&h.buf[0]), h.key, h.len) + } + } + + if h.seed == 0 { + acc.Lo += mulFold64(accs[0]^key64_011, accs[1]^key64_019) + acc.Hi += mulFold64(accs[0]^key64_117, accs[1]^key64_125) + + acc.Lo += mulFold64(accs[2]^key64_027, accs[3]^key64_035) + acc.Hi += mulFold64(accs[2]^key64_133, accs[3]^key64_141) + + acc.Lo += mulFold64(accs[4]^key64_043, accs[5]^key64_051) + acc.Hi += mulFold64(accs[4]^key64_149, accs[5]^key64_157) + + acc.Lo += mulFold64(accs[6]^key64_059, accs[7]^key64_067) + acc.Hi += mulFold64(accs[6]^key64_165, accs[7]^key64_173) + } else { + secret := h.key + const hi_off = 117 - 11 + + acc.Lo += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) + acc.Hi += mulFold64(accs[0]^readU64(secret, 11+hi_off), accs[1]^readU64(secret, 19+hi_off)) + + acc.Lo += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) + acc.Hi += mulFold64(accs[2]^readU64(secret, 27+hi_off), accs[3]^readU64(secret, 35+hi_off)) + + acc.Lo += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) + acc.Hi += mulFold64(accs[4]^readU64(secret, 43+hi_off), accs[5]^readU64(secret, 51+hi_off)) + + acc.Lo += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) + acc.Hi += mulFold64(accs[6]^readU64(secret, 59+hi_off), accs[7]^readU64(secret, 67+hi_off)) + } + + acc.Lo = xxh3Avalanche(acc.Lo) + acc.Hi = xxh3Avalanche(acc.Hi) + + return acc +} diff --git a/vendor/github.com/zeebo/xxh3/utils.go b/vendor/github.com/zeebo/xxh3/utils.go new file mode 100644 index 000000000..a837e68a6 --- /dev/null +++ b/vendor/github.com/zeebo/xxh3/utils.go @@ -0,0 +1,129 @@ +package xxh3 + +import ( + "math/bits" + "unsafe" +) + +// Uint128 is a 128 bit value. +// The actual value can be thought of as u.Hi<<64 | u.Lo. +type Uint128 struct { + Hi, Lo uint64 +} + +// Bytes returns the uint128 as an array of bytes in canonical form (big-endian encoded). +func (u Uint128) Bytes() [16]byte { + return [16]byte{ + byte(u.Hi >> 0x38), byte(u.Hi >> 0x30), byte(u.Hi >> 0x28), byte(u.Hi >> 0x20), + byte(u.Hi >> 0x18), byte(u.Hi >> 0x10), byte(u.Hi >> 0x08), byte(u.Hi), + byte(u.Lo >> 0x38), byte(u.Lo >> 0x30), byte(u.Lo >> 0x28), byte(u.Lo >> 0x20), + byte(u.Lo >> 0x18), byte(u.Lo >> 0x10), byte(u.Lo >> 0x08), byte(u.Lo), + } +} + +type ( + ptr = unsafe.Pointer + ui = uintptr + + u8 = uint8 + u32 = uint32 + u64 = uint64 + u128 = Uint128 +) + +type str struct { + p ptr + l uint +} + +func readU8(p ptr, o ui) uint8 { + return *(*uint8)(ptr(ui(p) + o)) +} + +func readU16(p ptr, o ui) uint16 { + b := (*[2]byte)(ptr(ui(p) + o)) + return uint16(b[0]) | uint16(b[1])<<8 +} + +func readU32(p ptr, o ui) uint32 { + b := (*[4]byte)(ptr(ui(p) + o)) + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func readU64(p ptr, o ui) uint64 { + b := (*[8]byte)(ptr(ui(p) + o)) + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func writeU64(p ptr, o ui, v u64) { + b := (*[8]byte)(ptr(ui(p) + o)) + b[0] = byte(v) + b[1] = byte(v >> 8) + b[2] = byte(v >> 16) + b[3] = byte(v >> 24) + b[4] = byte(v >> 32) + b[5] = byte(v >> 40) + b[6] = byte(v >> 48) + b[7] = byte(v >> 56) +} + +const secretSize = 192 + +func initSecret(secret ptr, seed u64) { + for i := ui(0); i < secretSize/16; i++ { + lo := readU64(key, 16*i) + seed + hi := readU64(key, 16*i+8) - seed + writeU64(secret, 16*i, lo) + writeU64(secret, 16*i+8, hi) + } +} + +func xxh64AvalancheSmall(x u64) u64 { + // x ^= x >> 33 // x must be < 32 bits + // x ^= u64(key32_000 ^ key32_004) // caller must do this + x *= prime64_2 + x ^= x >> 29 + x *= prime64_3 + x ^= x >> 32 + return x +} + +func xxhAvalancheSmall(x u64) u64 { + x ^= x >> 33 + x *= prime64_2 + x ^= x >> 29 + x *= prime64_3 + x ^= x >> 32 + return x +} + +func xxh64AvalancheFull(x u64) u64 { + x ^= x >> 33 + x *= prime64_2 + x ^= x >> 29 + x *= prime64_3 + x ^= x >> 32 + return x +} + +func xxh3Avalanche(x u64) u64 { + x ^= x >> 37 + x *= 0x165667919e3779f9 + x ^= x >> 32 + return x +} + +func rrmxmx(h64 u64, len u64) u64 { + h64 ^= bits.RotateLeft64(h64, 49) ^ bits.RotateLeft64(h64, 24) + h64 *= 0x9fb21c651e98df25 + h64 ^= (h64 >> 35) + len + h64 *= 0x9fb21c651e98df25 + h64 ^= (h64 >> 28) + return h64 +} + +func mulFold64(x, y u64) u64 { + hi, lo := bits.Mul64(x, y) + return hi ^ lo +} diff --git a/vendor/modules.txt b/vendor/modules.txt index e721bfff3..4d3011414 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -40,9 +40,6 @@ codeberg.org/gruf/go-kv/format # codeberg.org/gruf/go-logger/v2 v2.2.1 ## explicit; go 1.19 codeberg.org/gruf/go-logger/v2/level -# codeberg.org/gruf/go-mangler v1.2.3 -## explicit; go 1.19 -codeberg.org/gruf/go-mangler # codeberg.org/gruf/go-maps v1.0.3 ## explicit; go 1.19 codeberg.org/gruf/go-maps @@ -59,7 +56,7 @@ codeberg.org/gruf/go-sched ## explicit; go 1.19 codeberg.org/gruf/go-store/v2/storage codeberg.org/gruf/go-store/v2/util -# codeberg.org/gruf/go-structr v0.1.1 +# codeberg.org/gruf/go-structr v0.2.0 ## explicit; go 1.21 codeberg.org/gruf/go-structr # codeberg.org/superseriousbusiness/exif-terminator v0.7.0 @@ -781,6 +778,9 @@ github.com/yuin/goldmark/renderer github.com/yuin/goldmark/renderer/html github.com/yuin/goldmark/text github.com/yuin/goldmark/util +# github.com/zeebo/xxh3 v1.0.2 +## explicit; go 1.17 +github.com/zeebo/xxh3 # go.opentelemetry.io/otel v1.20.0 ## explicit; go 1.20 go.opentelemetry.io/otel