[feature] add per-uri dereferencer locks (#2291)

This commit is contained in:
kim 2023-10-31 11:12:22 +00:00 committed by GitHub
parent 51d0a0bba5
commit ce71a5a790
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
54 changed files with 2432 additions and 2719 deletions

13
go.mod
View file

@ -4,6 +4,8 @@ go 1.21
toolchain go1.21.3
replace codeberg.org/gruf/go-mutexes => ../go-mutexes
require (
codeberg.org/gruf/go-bytesize v1.0.2
codeberg.org/gruf/go-byteutil v1.1.2
@ -11,13 +13,13 @@ require (
codeberg.org/gruf/go-debug v1.3.0
codeberg.org/gruf/go-errors/v2 v2.2.0
codeberg.org/gruf/go-fastcopy v1.1.2
codeberg.org/gruf/go-iotools v0.0.0-20230601182242-d933b07dcbef
codeberg.org/gruf/go-iotools v0.0.0-20230811115124-5d4223615a7f
codeberg.org/gruf/go-kv v1.6.4
codeberg.org/gruf/go-logger/v2 v2.2.1
codeberg.org/gruf/go-mutexes v1.1.5
codeberg.org/gruf/go-mutexes v1.2.0
codeberg.org/gruf/go-runners v1.6.1
codeberg.org/gruf/go-sched v1.2.3
codeberg.org/gruf/go-store/v2 v2.2.2
codeberg.org/gruf/go-store/v2 v2.2.4
github.com/DmitriyVTitov/size v1.5.0
github.com/KimMachineGun/automemlimit v0.3.0
github.com/abema/go-mp4 v1.1.1
@ -77,12 +79,9 @@ require (
codeberg.org/gruf/go-atomics v1.1.0 // indirect
codeberg.org/gruf/go-bitutil v1.1.0 // indirect
codeberg.org/gruf/go-bytes v1.0.2 // indirect
codeberg.org/gruf/go-fastpath v1.0.3 // indirect
codeberg.org/gruf/go-fastpath/v2 v2.0.0 // indirect
codeberg.org/gruf/go-hashenc v1.0.2 // indirect
codeberg.org/gruf/go-mangler v1.2.3 // indirect
codeberg.org/gruf/go-maps v1.0.3 // indirect
codeberg.org/gruf/go-pools v1.1.0 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/bytedance/sonic v1.9.1 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
@ -128,7 +127,7 @@ require (
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect
github.com/klauspost/compress v1.16.7 // indirect
github.com/klauspost/compress v1.17.2 // indirect
github.com/klauspost/cpuid/v2 v2.2.5 // indirect
github.com/leodido/go-urn v1.2.4 // indirect
github.com/magiconair/properties v1.8.7 // indirect

28
go.sum
View file

@ -40,12 +40,10 @@ codeberg.org/gruf/go-atomics v1.1.0/go.mod h1:a/4/y/LgvjxjQVnpoy1VVkOSzLS1W9i1g4
codeberg.org/gruf/go-bitutil v1.0.0/go.mod h1:sb8IjlDnjVTz8zPK/8lmHesKxY0Yb3iqHWjUM/SkphA=
codeberg.org/gruf/go-bitutil v1.1.0 h1:U1Q+A1mtnPk+npqYrlRBc9ar2C5hYiBd17l1Wrp2Bt8=
codeberg.org/gruf/go-bitutil v1.1.0/go.mod h1:rGibFevYTQfYKcPv0Df5KpG8n5xC3AfD4d/UgYeoNy0=
codeberg.org/gruf/go-bytes v1.0.0/go.mod h1:1v/ibfaosfXSZtRdW2rWaVrDXMc9E3bsi/M9Ekx39cg=
codeberg.org/gruf/go-bytes v1.0.2 h1:malqE42Ni+h1nnYWBUAJaDDtEzF4aeN4uPN8DfMNNvo=
codeberg.org/gruf/go-bytes v1.0.2/go.mod h1:1v/ibfaosfXSZtRdW2rWaVrDXMc9E3bsi/M9Ekx39cg=
codeberg.org/gruf/go-bytesize v1.0.2 h1:Mo+ITi+0uZ4YNSZf2ed6Qw8acOI39W4mmgE1a8lslXw=
codeberg.org/gruf/go-bytesize v1.0.2/go.mod h1:n/GU8HzL9f3UNp/mUKyr1qVmTlj7+xacpp0OHfkvLPs=
codeberg.org/gruf/go-byteutil v1.0.0/go.mod h1:cWM3tgMCroSzqoBXUXMhvxTxYJp+TbCr6ioISRY5vSU=
codeberg.org/gruf/go-byteutil v1.1.2 h1:TQLZtTxTNca9xEfDIndmo7nBYxeS94nrv/9DS3Nk5Tw=
codeberg.org/gruf/go-byteutil v1.1.2/go.mod h1:cWM3tgMCroSzqoBXUXMhvxTxYJp+TbCr6ioISRY5vSU=
codeberg.org/gruf/go-cache/v3 v3.5.6 h1:TJnNOuij5DF/ZK9pDB61SlYzxidRQeYjYYW3dfFSznc=
@ -57,15 +55,10 @@ codeberg.org/gruf/go-errors/v2 v2.2.0 h1:CxnTtR4+BqRGeBHuG/FdCKM4m3otMdfPVez6ReB
codeberg.org/gruf/go-errors/v2 v2.2.0/go.mod h1:LfzD9nkAAJpEDbkUqOZQ2jdaQ8VrK0pnR36zLOMFq6Y=
codeberg.org/gruf/go-fastcopy v1.1.2 h1:YwmYXPsyOcRBxKEE2+w1bGAZfclHVaPijFsOVOcnNcw=
codeberg.org/gruf/go-fastcopy v1.1.2/go.mod h1:GDDYR0Cnb3U/AIfGM3983V/L+GN+vuwVMvrmVABo21s=
codeberg.org/gruf/go-fastpath v1.0.1/go.mod h1:edveE/Kp3Eqi0JJm0lXYdkVrB28cNUkcb/bRGFTPqeI=
codeberg.org/gruf/go-fastpath v1.0.3 h1:3Iftz9Z2suCEgTLkQMucew+2+4Oe46JPbAM2JEhnjTU=
codeberg.org/gruf/go-fastpath v1.0.3/go.mod h1:edveE/Kp3Eqi0JJm0lXYdkVrB28cNUkcb/bRGFTPqeI=
codeberg.org/gruf/go-fastpath/v2 v2.0.0 h1:iAS9GZahFhyWEH0KLhFEJR+txx1ZhMXxYzu2q5Qo9c0=
codeberg.org/gruf/go-fastpath/v2 v2.0.0/go.mod h1:3pPqu5nZjpbRrOqvLyAK7puS1OfEtQvjd6342Cwz56Q=
codeberg.org/gruf/go-hashenc v1.0.2 h1:U3jH6zMXZiL96czD/qaJd8OR2h7LlBzGv/2WxnMHI/g=
codeberg.org/gruf/go-hashenc v1.0.2/go.mod h1:eK+A8clLcEN/m1nftNsRId0kfYDQnETnuIfBGZ8Gvsg=
codeberg.org/gruf/go-iotools v0.0.0-20230601182242-d933b07dcbef h1:3Ydviw47TFEk27FRCOXkRxU3MfgyNzoicLzq8J3NbtI=
codeberg.org/gruf/go-iotools v0.0.0-20230601182242-d933b07dcbef/go.mod h1:B8uq4yHtIcKXhBZT9C/SYisz25lldLHMVpwZPz4ADLQ=
codeberg.org/gruf/go-iotools v0.0.0-20230811115124-5d4223615a7f h1:Kazm/PInN2m1SannRMRe3DQGQc9V2EuetsQ9KAi+pBQ=
codeberg.org/gruf/go-iotools v0.0.0-20230811115124-5d4223615a7f/go.mod h1:B8uq4yHtIcKXhBZT9C/SYisz25lldLHMVpwZPz4ADLQ=
codeberg.org/gruf/go-kv v1.6.4 h1:3NZiW8HVdBM3kpOiLb7XfRiihnzZWMAixdCznguhILk=
codeberg.org/gruf/go-kv v1.6.4/go.mod h1:O/YkSvKiS9XsRolM3rqCd9YJmND7dAXu9z+PrlYO4bc=
codeberg.org/gruf/go-logger/v2 v2.2.1 h1:RP2u059EQKTBFV3cN8X6xDxNk2RkzqdgXGKflKqB7Oc=
@ -74,16 +67,12 @@ codeberg.org/gruf/go-mangler v1.2.3 h1:sj0dey2lF5GRQL7fXmCY0wPNaI5JrROiThb0VDbzF
codeberg.org/gruf/go-mangler v1.2.3/go.mod h1:X/7URkFhLBAVKkTxmqF11Oxw3A6pSSxgPeHssQaiq28=
codeberg.org/gruf/go-maps v1.0.3 h1:VDwhnnaVNUIy5O93CvkcE2IZXnMB1+IJjzfop9V12es=
codeberg.org/gruf/go-maps v1.0.3/go.mod h1:D5LNDxlC9rsDuVQVM6JObaVGAdHB6g2dTdOdkh1aXWA=
codeberg.org/gruf/go-mutexes v1.1.5 h1:8Y8DwCGf24MyzOSaPvLrtk/B4ecVx4z+fppL6dY+PG8=
codeberg.org/gruf/go-mutexes v1.1.5/go.mod h1:1j/6/MBeBQUedAtAtysLLnBKogfOZAxdym0E3wlaBD8=
codeberg.org/gruf/go-pools v1.1.0 h1:LbYP24eQLl/YI1fSU2pafiwhGol1Z1zPjRrMsXpF88s=
codeberg.org/gruf/go-pools v1.1.0/go.mod h1:ZMYpt/DjQWYC3zFD3T97QWSFKs62zAUGJ/tzvgB9D68=
codeberg.org/gruf/go-runners v1.6.1 h1:0KNiEfGnmNUs9intqxEAWqIKUyxVOmYTtn3kPVOHsjQ=
codeberg.org/gruf/go-runners v1.6.1/go.mod h1:QRcSExqXX8DM0rm8Xs6qX7baOzyvw0JIe4mu3TsQT+Y=
codeberg.org/gruf/go-sched v1.2.3 h1:H5ViDxxzOBR3uIyGBCf0eH8b1L8wMybOXcdtUUTXZHk=
codeberg.org/gruf/go-sched v1.2.3/go.mod h1:vT9uB6KWFIIwnG9vcPY2a0alYNoqdL1mSzRM8I+PK7A=
codeberg.org/gruf/go-store/v2 v2.2.2 h1:S+OpXKXtPpkxJ7GyFZgJISXjXtGtz70//ee/uOi/6ks=
codeberg.org/gruf/go-store/v2 v2.2.2/go.mod h1:QRM3LUAfYyoGMWLTqA1WzohxQgYqPFiVv9cqwL0+Uvs=
codeberg.org/gruf/go-store/v2 v2.2.4 h1:8HO1Jh2gg7boQKA3hsDAIXd9zwieu5uXwDXEcTOD9js=
codeberg.org/gruf/go-store/v2 v2.2.4/go.mod h1:zI4VWe5CpXAktYMtaBMrgA5QmO0sQH53LBRvfn1huys=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
@ -366,8 +355,8 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:C
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.10.10/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
@ -581,9 +570,6 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/goldmark v1.6.0 h1:boZcn2GTjpsynOsC0iJHnBWa4Bi0qzfJjthwauItG68=
github.com/yuin/goldmark v1.6.0/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/zeebo/assert v1.1.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
github.com/zeebo/blake3 v0.2.1/go.mod h1:TSQ0KjMH+pht+bRyvVooJ1rBpvvngSGaPISafq9MxJk=
github.com/zeebo/pcg v1.0.1/go.mod h1:09F0S9iiKrwn9rlI5yjLkmrug154/YRW6KnnXVDM/l4=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
@ -762,7 +748,6 @@ golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201014080544-cc95f250f6bc/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@ -773,7 +758,6 @@ golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

View file

@ -23,7 +23,6 @@ import (
"fmt"
"path"
"codeberg.org/gruf/go-store/v2/kv"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
@ -32,14 +31,14 @@ import (
)
func init() {
deleteAttachment := func(ctx context.Context, l log.Entry, a *gtsmodel.MediaAttachment, s *kv.KVStore, tx bun.Tx) {
if err := s.Delete(ctx, a.File.Path); err != nil && err != storage.ErrNotFound {
deleteAttachment := func(ctx context.Context, l log.Entry, a *gtsmodel.MediaAttachment, s storage.Storage, tx bun.Tx) {
if err := s.Remove(ctx, a.File.Path); err != nil && err != storage.ErrNotFound {
l.Errorf("error removing file %s: %s", a.File.Path, err)
} else {
l.Debugf("deleted %s", a.File.Path)
}
if err := s.Delete(ctx, a.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
if err := s.Remove(ctx, a.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
l.Errorf("error removing file %s: %s", a.Thumbnail.Path, err)
} else {
l.Debugf("deleted %s", a.Thumbnail.Path)
@ -69,7 +68,7 @@ func init() {
}
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
s, err := kv.OpenDisk(storageBasePath, &storage.DiskConfig{
s, err := storage.OpenDisk(storageBasePath, &storage.DiskConfig{
LockFile: path.Join(storageBasePath, "store.lock"),
})
if err != nil {

View file

@ -122,7 +122,7 @@ func (d *Dereferencer) getAccountByURI(ctx context.Context, requestUser string,
}
// Create and pass-through a new bare-bones model for dereferencing.
return d.enrichAccount(ctx, requestUser, uri, &gtsmodel.Account{
return d.enrichAccountSafely(ctx, requestUser, uri, &gtsmodel.Account{
ID: id.NewULID(),
Domain: uri.Host,
URI: uriStr,
@ -139,7 +139,7 @@ func (d *Dereferencer) getAccountByURI(ctx context.Context, requestUser string,
}
// Try to update existing account model.
latest, apubAcc, err := d.enrichAccount(ctx,
latest, apubAcc, err := d.enrichAccountSafely(ctx,
requestUser,
uri,
account,
@ -148,10 +148,6 @@ func (d *Dereferencer) getAccountByURI(ctx context.Context, requestUser string,
if err != nil {
log.Errorf(ctx, "error enriching remote account: %v", err)
// Update fetch-at to slow re-attempts.
account.FetchedAt = time.Now()
_ = d.state.DB.UpdateAccount(ctx, account, "fetched_at")
// Fallback to existing.
return account, nil, nil
}
@ -218,7 +214,7 @@ func (d *Dereferencer) getAccountByUsernameDomain(
}
// Create and pass-through a new bare-bones model for dereferencing.
account, apubAcc, err := d.enrichAccount(ctx, requestUser, nil, &gtsmodel.Account{
account, apubAcc, err := d.enrichAccountSafely(ctx, requestUser, nil, &gtsmodel.Account{
ID: id.NewULID(),
Username: username,
Domain: domain,
@ -244,7 +240,7 @@ func (d *Dereferencer) getAccountByUsernameDomain(
if apubAcc == nil {
// This is existing up-to-date account, ensure it is populated.
if err := d.state.DB.PopulateAccount(ctx, account); err != nil {
if err := d.state.DB.PopulateAccount(ctx, latest); err != nil {
log.Errorf(ctx, "error populating existing account: %v", err)
}
}
@ -267,8 +263,8 @@ func (d *Dereferencer) RefreshAccount(ctx context.Context, requestUser string, a
return nil, nil, gtserror.Newf("invalid account uri %q: %w", account.URI, err)
}
// Try to update + deref existing account model.
latest, apubAcc, err := d.enrichAccount(ctx,
// Try to update + deref passed account model.
latest, apubAcc, err := d.enrichAccountSafely(ctx,
requestUser,
uri,
account,
@ -276,20 +272,17 @@ func (d *Dereferencer) RefreshAccount(ctx context.Context, requestUser string, a
)
if err != nil {
log.Errorf(ctx, "error enriching remote account: %v", err)
// Update fetch-at to slow re-attempts.
account.FetchedAt = time.Now()
_ = d.state.DB.UpdateAccount(ctx, account, "fetched_at")
return nil, nil, err
return nil, nil, gtserror.Newf("error enriching remote account: %w", err)
}
if apubAcc != nil {
// This account was updated, enqueue re-dereference featured posts.
d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
if err := d.dereferenceAccountFeatured(ctx, requestUser, account); err != nil {
if err := d.dereferenceAccountFeatured(ctx, requestUser, latest); err != nil {
log.Errorf(ctx, "error fetching account featured collection: %v", err)
}
})
}
return latest, apubAcc, nil
}
@ -311,21 +304,94 @@ func (d *Dereferencer) RefreshAccountAsync(ctx context.Context, requestUser stri
// Enqueue a worker function to enrich this account async.
d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
latest, _, err := d.enrichAccount(ctx, requestUser, uri, account, apubAcc)
latest, apubAcc, err := d.enrichAccountSafely(ctx, requestUser, uri, account, apubAcc)
if err != nil {
log.Errorf(ctx, "error enriching remote account: %v", err)
return
}
// This account was updated, re-dereference account featured posts.
if apubAcc != nil {
// This account was updated, enqueue re-dereference featured posts.
d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
if err := d.dereferenceAccountFeatured(ctx, requestUser, latest); err != nil {
log.Errorf(ctx, "error fetching account featured collection: %v", err)
}
})
}
})
}
// enrichAccount will enrich the given account, whether a new barebones model, or existing model from the database. It handles necessary dereferencing, webfingering etc.
func (d *Dereferencer) enrichAccount(ctx context.Context, requestUser string, uri *url.URL, account *gtsmodel.Account, apubAcc ap.Accountable) (*gtsmodel.Account, ap.Accountable, error) {
// enrichAccountSafely wraps enrichAccount() to perform
// it within the State{}.FedLocks mutexmap, which protects
// dereferencing actions with per-URI mutex locks.
func (d *Dereferencer) enrichAccountSafely(
ctx context.Context,
requestUser string,
uri *url.URL,
account *gtsmodel.Account,
apubAcc ap.Accountable,
) (*gtsmodel.Account, ap.Accountable, error) {
// By default use account.URI
// as the per-URI deref lock.
uriStr := account.URI
if uriStr == "" {
// No URI is set yet, instead generate a faux-one from user+domain.
uriStr = "https://" + account.Domain + "/user/" + account.Username
}
// Acquire per-URI deref lock, wraping unlock
// to safely defer in case of panic, while still
// performing more granular unlocks when needed.
unlock := d.state.FedLocks.Lock(uriStr)
unlock = doOnce(unlock)
defer unlock()
// Perform status enrichment with passed vars.
latest, apubAcc, err := d.enrichAccount(ctx,
requestUser,
uri,
account,
apubAcc,
)
if gtserror.StatusCode(err) >= 400 {
// Update fetch-at to slow re-attempts.
account.FetchedAt = time.Now()
_ = d.state.DB.UpdateAccount(ctx, account, "fetched_at")
}
// Unlock now
// we're done.
unlock()
if errors.Is(err, db.ErrAlreadyExists) {
// Ensure AP model isn't set,
// otherwise this indicates WE
// enriched the account.
apubAcc = nil
// DATA RACE! We likely lost out to another goroutine
// in a call to db.Put(Account). Look again in DB by URI.
latest, err = d.state.DB.GetAccountByURI(ctx, account.URI)
if err != nil {
err = gtserror.Newf("error getting account %s from database after race: %w", uriStr, err)
}
}
return latest, apubAcc, err
}
// enrichAccount will enrich the given account, whether a
// new barebones model, or existing model from the database.
// It handles necessary dereferencing, webfingering etc.
func (d *Dereferencer) enrichAccount(
ctx context.Context,
requestUser string,
uri *url.URL,
account *gtsmodel.Account,
apubAcc ap.Accountable,
) (*gtsmodel.Account, ap.Accountable, error) {
// Pre-fetch a transport for requesting username, used by later deref procedures.
tsport, err := d.transportController.NewTransportForUsername(ctx, requestUser)
if err != nil {
@ -476,13 +542,6 @@ func (d *Dereferencer) enrichAccount(ctx context.Context, requestUser string, ur
// This is new, put it in the database.
err := d.state.DB.PutAccount(ctx, latestAcc)
if errors.Is(err, db.ErrAlreadyExists) {
// TODO: replace this quick fix with per-URI deref locks.
latestAcc, err = d.state.DB.GetAccountByURI(ctx, latestAcc.URI)
return latestAcc, nil, err
}
if err != nil {
return nil, nil, gtserror.Newf("error putting in database: %w", err)
}
@ -545,7 +604,8 @@ func (d *Dereferencer) fetchRemoteAccountAvatar(ctx context.Context, tsport tran
}
// Acquire lock for derefs map.
unlock := d.derefAvatarsMu.Lock()
unlock := d.state.FedLocks.Lock(latestAcc.AvatarRemoteURL)
unlock = doOnce(unlock)
defer unlock()
// Look for an existing dereference in progress.
@ -573,7 +633,7 @@ func (d *Dereferencer) fetchRemoteAccountAvatar(ctx context.Context, tsport tran
defer func() {
// On exit safely remove media from map.
unlock := d.derefAvatarsMu.Lock()
unlock := d.state.FedLocks.Lock(latestAcc.AvatarRemoteURL)
delete(d.derefAvatars, latestAcc.AvatarRemoteURL)
unlock()
}()
@ -635,7 +695,8 @@ func (d *Dereferencer) fetchRemoteAccountHeader(ctx context.Context, tsport tran
}
// Acquire lock for derefs map.
unlock := d.derefHeadersMu.Lock()
unlock := d.state.FedLocks.Lock(latestAcc.HeaderRemoteURL)
unlock = doOnce(unlock)
defer unlock()
// Look for an existing dereference in progress.
@ -663,7 +724,7 @@ func (d *Dereferencer) fetchRemoteAccountHeader(ctx context.Context, tsport tran
defer func() {
// On exit safely remove media from map.
unlock := d.derefHeadersMu.Lock()
unlock := d.state.FedLocks.Lock(latestAcc.HeaderRemoteURL)
delete(d.derefHeaders, latestAcc.HeaderRemoteURL)
unlock()
}()

View file

@ -21,7 +21,6 @@ import (
"net/url"
"sync"
"codeberg.org/gruf/go-mutexes"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/state"
"github.com/superseriousbusiness/gotosocial/internal/transport"
@ -35,14 +34,14 @@ type Dereferencer struct {
converter *typeutils.Converter
transportController transport.Controller
mediaManager *media.Manager
// all protected by State{}.FedLocks.
derefAvatars map[string]*media.ProcessingMedia
derefAvatarsMu mutexes.Mutex
derefHeaders map[string]*media.ProcessingMedia
derefHeadersMu mutexes.Mutex
derefEmojis map[string]*media.ProcessingEmoji
derefEmojisMu mutexes.Mutex
handshakes map[string][]*url.URL
handshakesMu sync.Mutex // mutex to lock/unlock when checking or updating the handshakes map
handshakesMu sync.Mutex
}
// NewDereferencer returns a Dereferencer initialized with the given parameters.
@ -61,11 +60,5 @@ func NewDereferencer(
derefHeaders: make(map[string]*media.ProcessingMedia),
derefEmojis: make(map[string]*media.ProcessingEmoji),
handshakes: make(map[string][]*url.URL),
// use wrapped mutexes to allow safely deferring unlock
// even when more granular locks are required (only unlocks once).
derefAvatarsMu: mutexes.WithSafety(mutexes.New()),
derefHeadersMu: mutexes.WithSafety(mutexes.New()),
derefEmojisMu: mutexes.WithSafety(mutexes.New()),
}
}

View file

@ -36,8 +36,15 @@ func (d *Dereferencer) GetRemoteEmoji(ctx context.Context, requestingUsername st
processingEmoji *media.ProcessingEmoji
)
// Ensure we have been passed a valid URL.
derefURI, err := url.Parse(remoteURL)
if err != nil {
return nil, fmt.Errorf("GetRemoteEmoji: error parsing url for emoji %s: %s", shortcodeDomain, err)
}
// Acquire lock for derefs map.
unlock := d.derefEmojisMu.Lock()
unlock := d.state.FedLocks.Lock(remoteURL)
unlock = doOnce(unlock)
defer unlock()
// first check if we're already processing this emoji
@ -51,11 +58,6 @@ func (d *Dereferencer) GetRemoteEmoji(ctx context.Context, requestingUsername st
return nil, fmt.Errorf("GetRemoteEmoji: error creating transport to fetch emoji %s: %s", shortcodeDomain, err)
}
derefURI, err := url.Parse(remoteURL)
if err != nil {
return nil, fmt.Errorf("GetRemoteEmoji: error parsing url for emoji %s: %s", shortcodeDomain, err)
}
dataFunc := func(innerCtx context.Context) (io.ReadCloser, int64, error) {
return t.DereferenceMedia(innerCtx, derefURI)
}
@ -75,7 +77,7 @@ func (d *Dereferencer) GetRemoteEmoji(ctx context.Context, requestingUsername st
defer func() {
// On exit safely remove emoji from map.
unlock := d.derefEmojisMu.Lock()
unlock := d.state.FedLocks.Lock(remoteURL)
delete(d.derefEmojis, shortcodeDomain)
unlock()
}()
@ -95,7 +97,6 @@ func (d *Dereferencer) populateEmojis(ctx context.Context, rawEmojis []*gtsmodel
// * the shortcode of the emoji
// * the remote URL of the image
// This should be enough to dereference the emoji
gotEmojis := make([]*gtsmodel.Emoji, 0, len(rawEmojis))
for _, e := range rawEmojis {

View file

@ -115,7 +115,7 @@ func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, u
}
// Create and pass-through a new bare-bones model for deref.
return d.enrichStatus(ctx, requestUser, uri, &gtsmodel.Status{
return d.enrichStatusSafely(ctx, requestUser, uri, &gtsmodel.Status{
Local: func() *bool { var false bool; return &false }(),
URI: uriStr,
}, nil)
@ -131,7 +131,7 @@ func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, u
}
// Try to update + deref existing status model.
latest, apubStatus, err := d.enrichStatus(ctx,
latest, apubStatus, err := d.enrichStatusSafely(ctx,
requestUser,
uri,
status,
@ -140,10 +140,6 @@ func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, u
if err != nil {
log.Errorf(ctx, "error enriching remote status: %v", err)
// Update fetch-at to slow re-attempts.
status.FetchedAt = time.Now()
_ = d.state.DB.UpdateStatus(ctx, status, "fetched_at")
// Fallback to existing.
return status, nil, nil
}
@ -166,8 +162,8 @@ func (d *Dereferencer) RefreshStatus(ctx context.Context, requestUser string, st
return nil, nil, gtserror.Newf("invalid status uri %q: %w", status.URI, err)
}
// Try to update + deref existing status model.
latest, apubStatus, err := d.enrichStatus(ctx,
// Try to update + deref the passed status model.
latest, apubStatus, err := d.enrichStatusSafely(ctx,
requestUser,
uri,
status,
@ -189,7 +185,7 @@ func (d *Dereferencer) RefreshStatus(ctx context.Context, requestUser string, st
// This is a more optimized form of manually enqueueing .UpdateStatus() to the federation worker, since it only enqueues update if necessary.
func (d *Dereferencer) RefreshStatusAsync(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) {
// Check whether needs update.
if statusUpToDate(status) {
if !force && statusUpToDate(status) {
return
}
@ -202,17 +198,81 @@ func (d *Dereferencer) RefreshStatusAsync(ctx context.Context, requestUser strin
// Enqueue a worker function to re-fetch this status async.
d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
latest, apubStatus, err := d.enrichStatus(ctx, requestUser, uri, status, apubStatus)
latest, apubStatus, err := d.enrichStatusSafely(ctx, requestUser, uri, status, apubStatus)
if err != nil {
log.Errorf(ctx, "error enriching remote status: %v", err)
return
}
if apubStatus != nil {
// This status was updated, re-dereference the whole thread.
d.dereferenceThread(ctx, requestUser, uri, latest, apubStatus)
}
})
}
// enrichStatusSafely wraps enrichStatus() to perform
// it within the State{}.FedLocks mutexmap, which protects
// dereferencing actions with per-URI mutex locks.
func (d *Dereferencer) enrichStatusSafely(
ctx context.Context,
requestUser string,
uri *url.URL,
status *gtsmodel.Status,
apubStatus ap.Statusable,
) (*gtsmodel.Status, ap.Statusable, error) {
uriStr := status.URI
if status.ID != "" {
// This is an existing status, first try to populate it. This
// is required by the checks below for existing tags, media etc.
if err := d.state.DB.PopulateStatus(ctx, status); err != nil {
log.Errorf(ctx, "error populating existing status %s: %v", uriStr, err)
}
}
// Acquire per-URI deref lock, wraping unlock
// to safely defer in case of panic, while still
// performing more granular unlocks when needed.
unlock := d.state.FedLocks.Lock(uriStr)
unlock = doOnce(unlock)
defer unlock()
// Perform status enrichment with passed vars.
latest, apubStatus, err := d.enrichStatus(ctx,
requestUser,
uri,
status,
apubStatus,
)
if gtserror.StatusCode(err) >= 400 {
// Update fetch-at to slow re-attempts.
status.FetchedAt = time.Now()
_ = d.state.DB.UpdateStatus(ctx, status, "fetched_at")
}
// Unlock now
// we're done.
unlock()
if errors.Is(err, db.ErrAlreadyExists) {
// Ensure AP model isn't set,
// otherwise this indicates WE
// enriched the status.
apubStatus = nil
// DATA RACE! We likely lost out to another goroutine
// in a call to db.Put(Status). Look again in DB by URI.
latest, err = d.state.DB.GetStatusByURI(ctx, status.URI)
if err != nil {
err = gtserror.Newf("error getting status %s from database after race: %w", uriStr, err)
}
}
return latest, apubStatus, err
}
// enrichStatus will enrich the given status, whether a new
// barebones model, or existing model from the database.
// It handles necessary dereferencing, database updates, etc.
@ -258,16 +318,11 @@ func (d *Dereferencer) enrichStatus(
return nil, nil, gtserror.New("attributedTo was empty")
}
// Ensure we have the author account of the status dereferenced (+ up-to-date).
if author, _, err := d.getAccountByURI(ctx, requestUser, attributedTo); err != nil {
if status.AccountID == "" {
// Provided status account is nil, i.e. this is a new status / author, so a deref fail is unrecoverable.
// Ensure we have the author account of the status dereferenced (+ up-to-date). If this is a new status
// (i.e. status.AccountID == "") then any error here is irrecoverable. AccountID must ALWAYS be set.
if _, _, err := d.getAccountByURI(ctx, requestUser, attributedTo); err != nil && status.AccountID == "" {
return nil, nil, gtserror.Newf("failed to dereference status author %s: %w", uri, err)
}
} else if status.AccountID != "" && status.AccountID != author.ID {
// There already existed an account for this status author, but account ID changed. This shouldn't happen!
log.Warnf(ctx, "status author account ID changed: old=%s new=%s", status.AccountID, author.ID)
}
// ActivityPub model was recently dereferenced, so assume that passed status
// may contain out-of-date information, convert AP model to our GTS model.
@ -303,7 +358,7 @@ func (d *Dereferencer) enrichStatus(
}
// Ensure the status' tags are populated, (changes are expected / okay).
if err := d.fetchStatusTags(ctx, latestStatus); err != nil {
if err := d.fetchStatusTags(ctx, status, latestStatus); err != nil {
return nil, nil, gtserror.Newf("error populating tags for status %s: %w", uri, err)
}
@ -323,13 +378,6 @@ func (d *Dereferencer) enrichStatus(
//
// This is new, put the status in the database.
err := d.state.DB.PutStatus(ctx, latestStatus)
if errors.Is(err, db.ErrAlreadyExists) {
// TODO: replace this quick fix with per-URI deref locks.
latestStatus, err = d.state.DB.GetStatusByURI(ctx, latestStatus.URI)
return latestStatus, nil, err
}
if err != nil {
return nil, nil, gtserror.Newf("error putting in database: %w", err)
}
@ -545,36 +593,41 @@ func (d *Dereferencer) threadStatus(ctx context.Context, status *gtsmodel.Status
return nil
}
func (d *Dereferencer) fetchStatusTags(ctx context.Context, status *gtsmodel.Status) error {
func (d *Dereferencer) fetchStatusTags(ctx context.Context, existing, status *gtsmodel.Status) error {
// Allocate new slice to take the yet-to-be determined tag IDs.
status.TagIDs = make([]string, len(status.Tags))
for i := range status.Tags {
placeholder := status.Tags[i]
tag := status.Tags[i]
// Look for existing tag with this name first.
tag, err := d.state.DB.GetTagByName(ctx, placeholder.Name)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
log.Errorf(ctx, "db error getting tag %s: %v", tag.Name, err)
// Look for tag in existing status with name.
existing, ok := existing.GetTagByName(tag.Name)
if ok && existing.ID != "" {
status.Tags[i] = existing
status.TagIDs[i] = existing.ID
continue
}
if tag == nil {
// Create new ID for tag name.
tag = &gtsmodel.Tag{
ID: id.NewULID(),
Name: placeholder.Name,
// Look for existing tag with name in the database.
existing, err := d.state.DB.GetTagByName(ctx, tag.Name)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return gtserror.Newf("db error getting tag %s: %w", tag.Name, err)
} else if existing != nil {
status.Tags[i] = existing
status.TagIDs[i] = existing.ID
continue
}
// Create new ID for tag.
tag.ID = id.NewULID()
// Insert this tag with new name into the database.
if err := d.state.DB.PutTag(ctx, tag); err != nil {
log.Errorf(ctx, "db error putting tag %s: %v", tag.Name, err)
continue
}
}
// Set the *new* tag and ID.
status.Tags[i] = tag
// Set new tag ID in slice.
status.TagIDs[i] = tag.ID
}
@ -600,10 +653,10 @@ func (d *Dereferencer) fetchStatusAttachments(ctx context.Context, tsport transp
status.AttachmentIDs = make([]string, len(status.Attachments))
for i := range status.Attachments {
placeholder := status.Attachments[i]
attachment := status.Attachments[i]
// Look for existing media attachment with remoet URL first.
existing, ok := existing.GetAttachmentByRemoteURL(placeholder.RemoteURL)
existing, ok := existing.GetAttachmentByRemoteURL(attachment.RemoteURL)
if ok && existing.ID != "" && *existing.Cached {
status.Attachments[i] = existing
status.AttachmentIDs[i] = existing.ID
@ -611,9 +664,9 @@ func (d *Dereferencer) fetchStatusAttachments(ctx context.Context, tsport transp
}
// Ensure a valid media attachment remote URL.
remoteURL, err := url.Parse(placeholder.RemoteURL)
remoteURL, err := url.Parse(attachment.RemoteURL)
if err != nil {
log.Errorf(ctx, "invalid remote media url %q: %v", placeholder.RemoteURL, err)
log.Errorf(ctx, "invalid remote media url %q: %v", attachment.RemoteURL, err)
continue
}
@ -622,9 +675,9 @@ func (d *Dereferencer) fetchStatusAttachments(ctx context.Context, tsport transp
return tsport.DereferenceMedia(ctx, remoteURL)
}, status.AccountID, &media.AdditionalMediaInfo{
StatusID: &status.ID,
RemoteURL: &placeholder.RemoteURL,
Description: &placeholder.Description,
Blurhash: &placeholder.Blurhash,
RemoteURL: &attachment.RemoteURL,
Description: &attachment.Description,
Blurhash: &attachment.Blurhash,
})
if err != nil {
log.Errorf(ctx, "error processing attachment: %v", err)
@ -632,15 +685,15 @@ func (d *Dereferencer) fetchStatusAttachments(ctx context.Context, tsport transp
}
// Force attachment loading *right now*.
media, err := processing.LoadAttachment(ctx)
attachment, err = processing.LoadAttachment(ctx)
if err != nil {
log.Errorf(ctx, "error loading attachment: %v", err)
continue
}
// Set the *new* attachment and ID.
status.Attachments[i] = media
status.AttachmentIDs[i] = media.ID
status.Attachments[i] = attachment
status.AttachmentIDs[i] = attachment.ID
}
for i := 0; i < len(status.AttachmentIDs); {

View file

@ -16,3 +16,14 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package dereferencing
// doOnce wraps a function to only perform it once.
func doOnce(fn func()) func() {
var once int32
return func() {
if once == 0 {
fn()
once = 1
}
}
}

View file

@ -20,7 +20,6 @@ package federatingdb
import (
"context"
"codeberg.org/gruf/go-mutexes"
"github.com/superseriousbusiness/activity/pub"
"github.com/superseriousbusiness/activity/streams/vocab"
"github.com/superseriousbusiness/gotosocial/internal/state"
@ -40,7 +39,6 @@ type DB interface {
// FederatingDB uses the underlying DB interface to implement the go-fed pub.Database interface.
// It doesn't care what the underlying implementation of the DB interface is, as long as it works.
type federatingDB struct {
locks mutexes.MutexMap
state *state.State
converter *typeutils.Converter
}
@ -48,7 +46,6 @@ type federatingDB struct {
// New returns a DB interface using the given database and config
func New(state *state.State, converter *typeutils.Converter) DB {
fdb := federatingDB{
locks: mutexes.NewMap(-1, -1), // use defaults
state: state,
converter: converter,
}

View file

@ -19,7 +19,6 @@ package federatingdb
import (
"context"
"errors"
"net/url"
)
@ -35,9 +34,5 @@ import (
//
// Used to ensure race conditions in multiple requests do not occur.
func (f *federatingDB) Lock(c context.Context, id *url.URL) (func(), error) {
if id == nil {
return nil, errors.New("Lock: id was nil")
}
unlock := f.locks.Lock(id.String())
return unlock, nil
return f.state.FedLocks.Lock("federatingDB " + id.String()), nil // id should NEVER be nil.
}

View file

@ -18,11 +18,8 @@
package gtsmodel
import (
"time"
"slices"
"github.com/superseriousbusiness/gotosocial/internal/log"
"time"
)
// Status represents a user-created 'post' or 'status' in the database, either remote or local
@ -91,40 +88,14 @@ func (s *Status) GetBoostOfAccountID() string {
return s.BoostOfAccountID
}
func (s *Status) GetAttachmentByID(id string) (*MediaAttachment, bool) {
for _, media := range s.Attachments {
if media == nil {
log.Warnf(nil, "nil attachment in slice for status %s", s.URI)
continue
}
if media.ID == id {
return media, true
}
}
return nil, false
}
func (s *Status) GetAttachmentByRemoteURL(url string) (*MediaAttachment, bool) {
for _, media := range s.Attachments {
if media == nil {
log.Warnf(nil, "nil attachment in slice for status %s", s.URI)
continue
}
if media.RemoteURL == url {
return media, true
}
}
return nil, false
}
// AttachmentsPopulated returns whether media attachments are populated according to current AttachmentIDs.
func (s *Status) AttachmentsPopulated() bool {
if len(s.AttachmentIDs) != len(s.Attachments) {
// this is the quickest indicator.
return false
}
for _, id := range s.AttachmentIDs {
if _, ok := s.GetAttachmentByID(id); !ok {
for i, id := range s.AttachmentIDs {
if s.Attachments[i].ID != id {
return false
}
}
@ -137,55 +108,22 @@ func (s *Status) TagsPopulated() bool {
// this is the quickest indicator.
return false
}
// Tags must be in same order.
for i, id := range s.TagIDs {
if s.Tags[i] == nil {
log.Warnf(nil, "nil tag in slice for status %s", s.URI)
continue
}
if s.Tags[i].ID != id {
return false
}
}
return true
}
func (s *Status) GetMentionByID(id string) (*Mention, bool) {
for _, mention := range s.Mentions {
if mention == nil {
log.Warnf(nil, "nil mention in slice for status %s", s.URI)
continue
}
if mention.ID == id {
return mention, true
}
}
return nil, false
}
func (s *Status) GetMentionByTargetURI(uri string) (*Mention, bool) {
for _, mention := range s.Mentions {
if mention == nil {
log.Warnf(nil, "nil mention in slice for status %s", s.URI)
continue
}
if mention.TargetAccountURI == uri {
return mention, true
}
}
return nil, false
}
// MentionsPopulated returns whether mentions are populated according to current MentionIDs.
func (s *Status) MentionsPopulated() bool {
if len(s.MentionIDs) != len(s.Mentions) {
// this is the quickest indicator.
return false
}
for _, id := range s.MentionIDs {
if _, ok := s.GetMentionByID(id); !ok {
for i, id := range s.MentionIDs {
if s.Mentions[i].ID != id {
return false
}
}
@ -198,18 +136,11 @@ func (s *Status) EmojisPopulated() bool {
// this is the quickest indicator.
return false
}
// Emojis must be in same order.
for i, id := range s.EmojiIDs {
if s.Emojis[i] == nil {
log.Warnf(nil, "nil emoji in slice for status %s", s.URI)
continue
}
if s.Emojis[i].ID != id {
return false
}
}
return true
}
@ -221,28 +152,44 @@ func (s *Status) EmojisUpToDate(other *Status) bool {
// this is the quickest indicator.
return false
}
// Emojis must be in same order.
for i := range s.Emojis {
if s.Emojis[i] == nil {
log.Warnf(nil, "nil emoji in slice for status %s", s.URI)
return false
}
if other.Emojis[i] == nil {
log.Warnf(nil, "nil emoji in slice for status %s", other.URI)
return false
}
if s.Emojis[i].URI != other.Emojis[i].URI {
// Emoji URI has changed, not up-to-date!
return false
}
}
return true
}
// GetAttachmentByRemoteURL searches status for MediaAttachment{} with remote URL.
func (s *Status) GetAttachmentByRemoteURL(url string) (*MediaAttachment, bool) {
for _, media := range s.Attachments {
if media.RemoteURL == url {
return media, true
}
}
return nil, false
}
// GetMentionByTargetURI searches status for Mention{} with target URI.
func (s *Status) GetMentionByTargetURI(uri string) (*Mention, bool) {
for _, mention := range s.Mentions {
if mention.TargetAccountURI == uri {
return mention, true
}
}
return nil, false
}
// GetTagByName searches status for Tag{} with name.
func (s *Status) GetTagByName(name string) (*Tag, bool) {
for _, tag := range s.Tags {
if tag.Name == name {
return tag, true
}
}
return nil, false
}
// MentionsAccount returns whether status mentions the given account ID.
func (s *Status) MentionsAccount(accountID string) bool {
return slices.ContainsFunc(s.Mentions, func(m *Mention) bool {

View file

@ -18,6 +18,7 @@
package state
import (
"codeberg.org/gruf/go-mutexes"
"github.com/superseriousbusiness/gotosocial/internal/cache"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/storage"
@ -41,6 +42,11 @@ type State struct {
// DB provides access to the database.
DB db.DB
// FedLocks provides access to this state's mutex map
// of per URI federation locks. Used during dereferencing
// and by the go-fed/activity library.
FedLocks mutexes.MutexMap
// Storage provides access to the storage driver.
Storage *storage.Driver

View file

@ -1,9 +0,0 @@
MIT License
Copyright (c) 2021 gruf
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1 +0,0 @@
Alternative path library with a `strings.Builder` like path builder.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 106 KiB

View file

@ -1,319 +0,0 @@
package fastpath
import (
"unsafe"
)
// allocate these just once
var (
dot = []byte(dotStr)
dotStr = "."
)
type Builder struct {
B []byte // B is the underlying byte buffer
dd int // pos of last '..' appended to builder
abs bool // abs stores whether path passed to first .Append() is absolute
set bool // set stores whether b.abs has been set i.e. not first call to .Append()
}
// NewBuilder returns a new Builder object using the
// supplied byte slice as the underlying buffer
func NewBuilder(b []byte) Builder {
if b != nil {
b = b[:0]
}
return Builder{
B: b,
dd: 0,
abs: false,
set: false,
}
}
// Reset resets the Builder object
func (b *Builder) Reset() {
b.B = b.B[:0]
b.dd = 0
b.abs = false
b.set = false
}
// Len returns the number of accumulated bytes in the Builder
func (b *Builder) Len() int {
return len(b.B)
}
// Cap returns the capacity of the underlying Builder buffer
func (b *Builder) Cap() int {
return cap(b.B)
}
// Bytes returns the accumulated path bytes.
func (b *Builder) Bytes() []byte {
if len(b.B) < 1 {
return dot
}
return b.B
}
// String returns the accumulated path string.
func (b *Builder) String() string {
if len(b.B) < 1 {
return dotStr
}
return string(b.B)
}
// StringPtr returns a ptr to the accumulated path string.
//
// Please note the underlying byte slice for this string is
// tied to the builder, so any changes will result in the
// returned string changing. Consider using .String() if
// this is undesired behaviour.
func (b *Builder) StringPtr() string {
if len(b.B) < 1 {
return dotStr
}
return *(*string)(unsafe.Pointer(&b.B))
}
// Absolute returns whether current path is absolute (not relative)
func (b *Builder) Absolute() bool {
return b.abs
}
// SetAbsolute converts the current path to / from absolute
func (b *Builder) SetAbsolute(val bool) {
if !b.set {
if val {
// .Append() has not been
// called, add a '/' and set abs
b.Guarantee(1)
b.appendByte('/')
b.abs = true
}
// Set as having been set
b.set = true
return
}
if !val && b.abs {
// Already set and absolute. Update
b.abs = false
// If not empty (i.e. not just '/'),
// then shift bytes 1 left
if len(b.B) > 1 {
copy(b.B, b.B[1:])
}
// Truncate 1 byte. In the case of empty,
// i.e. just '/' then it will drop this
b.truncate(1)
} else if val && !b.abs {
// Already set but NOT abs. Update
b.abs = true
// Guarantee 1 byte available
b.Guarantee(1)
// If empty, just append '/'
if len(b.B) < 1 {
b.appendByte('/')
return
}
// Increase length
l := len(b.B)
b.B = b.B[:l+1]
// Shift bytes 1 right
copy(b.B[1:], b.B[:l])
// Set first byte '/'
b.B[0] = '/'
}
}
// Append adds and cleans the supplied path bytes to the
// builder's internal buffer, growing the buffer if necessary
// to accomodate the extra path length
func (b *Builder) Append(p []byte) {
b.AppendString(*(*string)(unsafe.Pointer(&p)))
}
// AppendString adds and cleans the supplied path string to the
// builder's internal buffer, growing the buffer if necessary
// to accomodate the extra path length
func (b *Builder) AppendString(path string) {
defer func() {
// If buffer is empty, and an absolute
// path, ensure it starts with a '/'
if len(b.B) < 1 && b.abs {
b.appendByte('/')
}
}()
// Empty path, nothing to do
if len(path) == 0 {
return
}
// Guarantee at least the total length
// of supplied path available in the buffer
b.Guarantee(len(path))
// Try store if absolute
if !b.set {
b.abs = len(path) > 0 && path[0] == '/'
b.set = true
}
i := 0
for i < len(path) {
switch {
// Empty path segment
case path[i] == '/':
i++
// Singular '.' path segment, treat as empty
case path[i] == '.' && (i+1 == len(path) || path[i+1] == '/'):
i++
// Backtrack segment
case path[i] == '.' && path[i+1] == '.' && (i+2 == len(path) || path[i+2] == '/'):
i += 2
switch {
// Check if it's possible to backtrack with
// our current state of the buffer. i.e. is
// our buffer length longer than the last
// '..' we placed?
case len(b.B) > b.dd:
b.backtrack()
// b.cp = b.lp
// b.lp = 0
// If we reached here, need to check if
// we can append '..' to the path buffer,
// which is ONLY when path is NOT absolute
case !b.abs:
if len(b.B) > 0 {
b.appendByte('/')
}
b.appendByte('.')
b.appendByte('.')
b.dd = len(b.B)
// b.lp = lp - 2
// b.cp = b.dd
}
default:
if (b.abs && len(b.B) != 1) || (!b.abs && len(b.B) > 0) {
b.appendByte('/')
}
// b.lp = b.cp
// b.cp = len(b.B)
i += b.appendSlice(path[i:])
}
}
}
// Clean creates the shortest possible functional equivalent
// to the supplied path, resetting the builder before performing
// this operation. The builder object is NOT reset after return
func (b *Builder) Clean(path string) string {
b.Reset()
b.AppendString(path)
return b.String()
}
// Join connects and cleans multiple paths, resetting the builder before
// performing this operation and returning the shortest possible combination
// of all the supplied paths. The builder object is NOT reset after return
func (b *Builder) Join(base string, paths ...string) string {
b.Reset()
b.AppendString(base)
size := len(base)
for i := 0; i < len(paths); i++ {
b.AppendString(paths[i])
size += len(paths[i])
}
if size < 1 {
return ""
} else if len(b.B) < 1 {
return dotStr
}
return string(b.B)
}
// Guarantee ensures there is at least the requested size
// free bytes available in the buffer, reallocating if necessary
func (b *Builder) Guarantee(size int) {
if size > cap(b.B)-len(b.B) {
nb := make([]byte, 2*cap(b.B)+size)
copy(nb, b.B)
b.B = nb[:len(b.B)]
}
}
// Truncate reduces the length of the buffer by the requested
// number of bytes. If the builder is set to absolute, the first
// byte (i.e. '/') will never be truncated
func (b *Builder) Truncate(size int) {
// If absolute and just '/', do nothing
if b.abs && len(b.B) == 1 {
return
}
// Truncate requested bytes
b.truncate(size)
}
// truncate reduces the length of the buffer by the requested
// size, no sanity checks are performed
func (b *Builder) truncate(size int) {
b.B = b.B[:len(b.B)-size]
}
// appendByte appends the supplied byte to the end of
// the buffer. appending is achieved by continually reslicing the
// buffer and setting the next byte-at-index, this is safe as guarantee()
// will have been called beforehand
func (b *Builder) appendByte(c byte) {
b.B = b.B[:len(b.B)+1]
b.B[len(b.B)-1] = c
}
// appendSlice appends the supplied string slice to
// the end of the buffer and returns the number of indices
// we were able to iterate before hitting a path separator '/'.
// appending is achieved by continually reslicing the buffer
// and setting the next byte-at-index, this is safe as guarantee()
// will have been called beforehand
func (b *Builder) appendSlice(slice string) int {
i := 0
for i < len(slice) && slice[i] != '/' {
b.B = b.B[:len(b.B)+1]
b.B[len(b.B)-1] = slice[i]
i++
}
return i
}
// backtrack reduces the end of the buffer back to the last
// separating '/', or end of buffer
func (b *Builder) backtrack() {
b.B = b.B[:len(b.B)-1]
for len(b.B)-1 > b.dd && b.B[len(b.B)-1] != '/' {
b.B = b.B[:len(b.B)-1]
}
if len(b.B) > 0 {
b.B = b.B[:len(b.B)-1]
}
}

View file

@ -1,9 +0,0 @@
MIT License
Copyright (c) 2021 gruf
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1 +0,0 @@
HashEncoder provides a means of quickly hash-summing and encoding data

View file

@ -1,42 +0,0 @@
package hashenc
import (
"encoding/base32"
"encoding/base64"
"encoding/hex"
)
// Encoder defines an interface for encoding binary data.
type Encoder interface {
// Encode encodes the data at src into dst
Encode(dst []byte, src []byte)
// EncodedLen returns the encoded length for input data of supplied length
EncodedLen(int) int
}
// Base32 returns a new base32 Encoder (StdEncoding, no padding).
func Base32() Encoder {
return base32.StdEncoding.WithPadding(base64.NoPadding)
}
// Base64 returns a new base64 Encoder (URLEncoding, no padding).
func Base64() Encoder {
return base64.URLEncoding.WithPadding(base64.NoPadding)
}
// Hex returns a new hex Encoder.
func Hex() Encoder {
return &hexEncoder{}
}
// hexEncoder simply provides an empty receiver to satisfy Encoder.
type hexEncoder struct{}
func (*hexEncoder) Encode(dst []byte, src []byte) {
hex.Encode(dst, src)
}
func (*hexEncoder) EncodedLen(len int) int {
return hex.EncodedLen(len)
}

View file

@ -1,58 +0,0 @@
package hashenc
import (
"hash"
"codeberg.org/gruf/go-bytes"
)
// HashEncoder defines an interface for calculating encoded hash sums of binary data
type HashEncoder interface {
// EncodeSum calculates the hash sum of src and encodes (at most) Size() into dst
EncodeSum(dst []byte, src []byte)
// EncodedSum calculates the encoded hash sum of src and returns data in a newly allocated bytes.Bytes
EncodedSum(src []byte) bytes.Bytes
// Size returns the expected length of encoded hashes
Size() int
}
// New returns a new HashEncoder instance based on supplied hash.Hash and Encoder supplying functions.
func New(hash hash.Hash, enc Encoder) HashEncoder {
hashSize := hash.Size()
return &henc{
hash: hash,
hbuf: make([]byte, hashSize),
enc: enc,
size: enc.EncodedLen(hashSize),
}
}
// henc is the HashEncoder implementation.
type henc struct {
hash hash.Hash
hbuf []byte
enc Encoder
size int
}
func (henc *henc) EncodeSum(dst []byte, src []byte) {
// Hash supplied bytes
henc.hash.Reset()
henc.hash.Write(src)
henc.hbuf = henc.hash.Sum(henc.hbuf[:0])
// Encode the hashsum and return a copy
henc.enc.Encode(dst, henc.hbuf)
}
func (henc *henc) EncodedSum(src []byte) bytes.Bytes {
dst := make([]byte, henc.size)
henc.EncodeSum(dst, src)
return bytes.ToBytes(dst)
}
func (henc *henc) Size() int {
return henc.size
}

View file

@ -12,6 +12,14 @@ func (r ReaderFunc) Read(b []byte) (int, error) {
return r(b)
}
// ReaderFromFunc is a function signature which allows
// a function to implement the io.ReaderFrom type.
type ReaderFromFunc func(io.Reader) (int64, error)
func (rf ReaderFromFunc) ReadFrom(r io.Reader) (int64, error) {
return rf(r)
}
// ReadCloser wraps an io.Reader and io.Closer in order to implement io.ReadCloser.
func ReadCloser(r io.Reader, c io.Closer) io.ReadCloser {
return &struct {

View file

@ -10,6 +10,14 @@ func (w WriterFunc) Write(b []byte) (int, error) {
return w(b)
}
// WriterToFunc is a function signature which allows
// a function to implement the io.WriterTo type.
type WriterToFunc func(io.Writer) (int64, error)
func (wt WriterToFunc) WriteTo(r io.Writer) (int64, error) {
return wt(r)
}
// WriteCloser wraps an io.Writer and io.Closer in order to implement io.WriteCloser.
func WriteCloser(w io.Writer, c io.Closer) io.WriteCloser {
return &struct {

View file

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2021 gruf
Copyright (c) gruf
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

View file

@ -1,9 +1,8 @@
package mutexes
import (
"runtime"
"sync"
"sync/atomic"
"unsafe"
)
const (
@ -12,452 +11,253 @@ const (
lockTypeWrite = uint8(1) << 1
lockTypeMap = uint8(1) << 2
// possible mutexmap states.
stateUnlockd = uint8(0)
stateRLocked = uint8(1)
stateLocked = uint8(2)
stateInUse = uint8(3)
// default values.
defaultWake = 1024
// frequency of GC cycles
// per no. unlocks. i.e.
// every 'gcfreq' unlocks.
gcfreq = 1024
)
// acquireState attempts to acquire required map state for lockType.
func acquireState(state uint8, lt uint8) (uint8, bool) {
switch state {
// Unlocked state
// (all allowed)
case stateUnlockd:
// Keys locked, no state lock.
// (don't allow map locks)
case stateInUse:
if lt&lockTypeMap != 0 {
return 0, false
}
// Read locked
// (only allow read locks)
case stateRLocked:
if lt&lockTypeRead == 0 {
return 0, false
}
// Write locked
// (none allowed)
case stateLocked:
return 0, false
// shouldn't reach here
default:
panic("unexpected state")
}
switch {
// If unlocked and not a map
// lock request, set in use
case lt&lockTypeMap == 0:
if state == stateUnlockd {
state = stateInUse
}
// Set read lock state
case lt&lockTypeRead != 0:
state = stateRLocked
// Set write lock state
case lt&lockTypeWrite != 0:
state = stateLocked
default:
panic("unexpected lock type")
}
return state, true
}
// MutexMap is a structure that allows read / write locking key, performing
// as you'd expect a map[string]*sync.RWMutex to perform. The differences
// being that the entire map can itself be read / write locked, it uses memory
// pooling for the mutex (not quite) structures, and it is self-evicting. The
// core configurations of maximum no. open locks and wake modulus* are user
// definable.
// MutexMap is a structure that allows read / write locking
// per key, performing as you'd expect a map[string]*RWMutex
// to perform, without you needing to worry about deadlocks
// between competing read / write locks and the map's own mutex.
// It uses memory pooling for the internal "mutex" (ish) types
// and performs self-eviction of keys.
//
// * The wake modulus is the number that the current number of open locks is
// modulused against to determine how often to notify sleeping goroutines.
// These are goroutines that are attempting to lock a key / whole map and are
// awaiting a permissible state (.e.g no key write locks allowed when the
// map is read locked).
// Under the hood this is achieved using a single mutex for the
// map, state tracking for individual keys, and some simple waitgroup
// type structures to park / block goroutines waiting for keys.
type MutexMap struct {
queue *sync.WaitGroup
qucnt int32
mumap map[string]*rwmutex
mpool pool
evict []*rwmutex
count int32
maxmu int32
wake int32
mapmu sync.Mutex
state uint8
mumap map[string]*rwmutexish
mupool rwmutexPool
count uint32
}
// NewMap returns a new MutexMap instance with provided max no. open mutexes.
func NewMap(max, wake int32) MutexMap {
// Determine wake mod.
if wake < 1 {
wake = defaultWake
}
// Determine max no. mutexes
if max < 1 {
procs := runtime.GOMAXPROCS(0)
max = wake * int32(procs)
}
return MutexMap{
queue: &sync.WaitGroup{},
mumap: make(map[string]*rwmutex, max),
maxmu: max,
wake: wake,
// checkInit ensures MutexMap is initialized (UNSAFE).
func (mm *MutexMap) checkInit() {
if mm.mumap == nil {
mm.mumap = make(map[string]*rwmutexish)
}
}
// SET sets the MutexMap max open locks and wake modulus, returns current values.
// For values less than zero defaults are set, and zero is non-op.
func (mm *MutexMap) SET(max, wake int32) (int32, int32) {
mm.mapmu.Lock()
switch {
// Set default wake
case wake < 0:
mm.wake = defaultWake
// Set supplied wake
case wake > 0:
mm.wake = wake
}
switch {
// Set default max
case max < 0:
procs := runtime.GOMAXPROCS(0)
mm.maxmu = wake * int32(procs)
// Set supplied max
case max > 0:
mm.maxmu = max
}
// Fetch values
max = mm.maxmu
wake = mm.wake
mm.mapmu.Unlock()
return max, wake
}
// spinLock will wait (using a mutex to sleep thread) until conditional returns true.
func (mm *MutexMap) spinLock(cond func() bool) {
for {
// Acquire map lock
mm.mapmu.Lock()
if cond() {
return
}
// Current queue ptr
queue := mm.queue
// Queue ourselves
queue.Add(1)
mm.qucnt++
// Unlock map
mm.mapmu.Unlock()
// Wait on notify
mm.queue.Wait()
}
}
// lock will acquire a lock of given type on the 'mutex' at key.
func (mm *MutexMap) lock(key string, lt uint8) func() {
var ok bool
var mu *rwmutex
// Spin lock until returns true
mm.spinLock(func() bool {
// Check not overloaded
if !(mm.count < mm.maxmu) {
return false
}
// Attempt to acquire usable map state
state, ok := acquireState(mm.state, lt)
if !ok {
return false
}
// Update state
mm.state = state
// Ensure mutex at key
// is in lockable state
mu, ok = mm.mumap[key]
return !ok || mu.CanLock(lt)
})
// Incr count
mm.count++
if !ok {
// No mutex found for key
// Alloc mu from pool
mu = mm.mpool.Acquire()
mm.mumap[key] = mu
// Set our key
mu.key = key
// Queue for eviction
mm.evict = append(mm.evict, mu)
}
// Lock mutex
mu.Lock(lt)
// Unlock map
mm.mapmu.Unlock()
return func() {
mm.mapmu.Lock()
mu.Unlock()
mm.cleanup()
}
}
// lockMap will lock the whole map under given lock type.
func (mm *MutexMap) lockMap(lt uint8) {
// Spin lock until returns true
mm.spinLock(func() bool {
// Attempt to acquire usable map state
state, ok := acquireState(mm.state, lt)
if !ok {
return false
}
// Update state
mm.state = state
return true
})
// Incr count
mm.count++
// State acquired, unlock
mm.mapmu.Unlock()
}
// cleanup is performed as the final stage of unlocking a locked key / map state, finally unlocks map.
func (mm *MutexMap) cleanup() {
// Decr count
mm.count--
// Calculate current wake modulus
wakemod := mm.count % mm.wake
if mm.count != 0 && wakemod != 0 {
// Fast path => no cleanup.
// Unlock, return early
mm.mapmu.Unlock()
return
}
go func() {
if wakemod == 0 {
// Release queued goroutines
mm.queue.Add(-int(mm.qucnt))
// Allocate new queue and reset
mm.queue = &sync.WaitGroup{}
mm.qucnt = 0
}
if mm.count == 0 {
// Perform evictions
for _, mu := range mm.evict {
key := mu.key
mu.key = ""
delete(mm.mumap, key)
mm.mpool.Release(mu)
}
// Reset map state
mm.evict = mm.evict[:0]
mm.state = stateUnlockd
mm.mpool.GC()
}
// Unlock map
mm.mapmu.Unlock()
}()
}
// RLockMap acquires a read lock over the entire map, returning a lock state for acquiring key read locks.
// Please note that the 'unlock()' function will block until all keys locked from this state are unlocked.
func (mm *MutexMap) RLockMap() *LockState {
mm.lockMap(lockTypeRead | lockTypeMap)
return &LockState{
mmap: mm,
ltyp: lockTypeRead,
}
}
// LockMap acquires a write lock over the entire map, returning a lock state for acquiring key read/write locks.
// Please note that the 'unlock()' function will block until all keys locked from this state are unlocked.
func (mm *MutexMap) LockMap() *LockState {
mm.lockMap(lockTypeWrite | lockTypeMap)
return &LockState{
mmap: mm,
ltyp: lockTypeWrite,
}
}
// RLock acquires a mutex read lock for supplied key, returning an RUnlock function.
func (mm *MutexMap) RLock(key string) (runlock func()) {
return mm.lock(key, lockTypeRead)
}
// Lock acquires a mutex write lock for supplied key, returning an Unlock function.
func (mm *MutexMap) Lock(key string) (unlock func()) {
// Lock acquires a write lock on key in map, returning unlock function.
func (mm *MutexMap) Lock(key string) func() {
return mm.lock(key, lockTypeWrite)
}
// LockState represents a window to a locked MutexMap.
type LockState struct {
wait sync.WaitGroup
mmap *MutexMap
done uint32
ltyp uint8
// RLock acquires a read lock on key in map, returning runlock function.
func (mm *MutexMap) RLock(key string) func() {
return mm.lock(key, lockTypeRead)
}
// Lock: see MutexMap.Lock() definition. Will panic if map only read locked.
func (st *LockState) Lock(key string) (unlock func()) {
return st.lock(key, lockTypeWrite)
func (mm *MutexMap) lock(key string, lt uint8) func() {
// Perform first map lock
// and check initialization
// OUTSIDE the main loop.
mm.mapmu.Lock()
mm.checkInit()
for {
// Check map for mu.
mu := mm.mumap[key]
if mu == nil {
// Allocate new mutex.
mu = mm.mupool.Acquire()
mm.mumap[key] = mu
}
// RLock: see MutexMap.RLock() definition.
func (st *LockState) RLock(key string) (runlock func()) {
return st.lock(key, lockTypeRead)
if !mu.Lock(lt) {
// Wait on mutex unlock, after
// immediately relocking map mu.
mu.WaitRelock(&mm.mapmu)
continue
}
// lock: see MutexMap.lock() definition.
func (st *LockState) lock(key string, lt uint8) func() {
st.wait.Add(1) // track lock
// Done with map.
mm.mapmu.Unlock()
if atomic.LoadUint32(&st.done) == 1 {
panic("called (r)lock on unlocked state")
} else if lt&lockTypeWrite != 0 &&
st.ltyp&lockTypeWrite == 0 {
panic("called lock on rlocked map")
// Return mutex unlock function.
return func() { mm.unlock(key, mu) }
}
}
var ok bool
var mu *rwmutex
func (mm *MutexMap) unlock(key string, mu *rwmutexish) {
// Get map lock.
mm.mapmu.Lock()
// Spin lock until returns true
st.mmap.spinLock(func() bool {
// Check not overloaded
if !(st.mmap.count < st.mmap.maxmu) {
// Unlock mutex.
if mu.Unlock() {
// Mutex fully unlocked
// with zero waiters. Self
// evict and release it.
delete(mm.mumap, key)
mm.mupool.Release(mu)
}
if mm.count++; mm.count%gcfreq == 0 {
// Every 'gcfreq' unlocks perform
// a garbage collection to keep
// us squeaky clean :]
mm.mupool.GC()
}
// Done with map.
mm.mapmu.Unlock()
}
// rwmutexPool is a very simply memory rwmutexPool.
type rwmutexPool struct {
current []*rwmutexish
victim []*rwmutexish
}
// Acquire will returns a rwmutexState from rwmutexPool (or alloc new).
func (p *rwmutexPool) Acquire() *rwmutexish {
// First try the current queue
if l := len(p.current) - 1; l >= 0 {
mu := p.current[l]
p.current = p.current[:l]
return mu
}
// Next try the victim queue.
if l := len(p.victim) - 1; l >= 0 {
mu := p.victim[l]
p.victim = p.victim[:l]
return mu
}
// Lastly, alloc new.
mu := new(rwmutexish)
return mu
}
// Release places a sync.rwmutexState back in the rwmutexPool.
func (p *rwmutexPool) Release(mu *rwmutexish) {
p.current = append(p.current, mu)
}
// GC will clear out unused entries from the rwmutexPool.
func (p *rwmutexPool) GC() {
current := p.current
p.current = nil
p.victim = current
}
// rwmutexish is a RW mutex (ish), i.e. the representation
// of one only to be accessed within
type rwmutexish struct {
tr trigger
ln int32 // no. locks
wn int32 // no. waiters
lt uint8 // lock type
}
// Lock will lock the mutex for given lock type, in the
// sense that it will update the internal state tracker
// accordingly. Return value is true on successful lock.
func (mu *rwmutexish) Lock(lt uint8) bool {
switch mu.lt {
case lockTypeRead:
// already read locked,
// only permit more reads.
if lt != lockTypeRead {
return false
}
// Ensure mutex at key
// is in lockable state
mu, ok = st.mmap.mumap[key]
return !ok || mu.CanLock(lt)
})
case lockTypeWrite:
// already write locked,
// no other locks allowed.
return false
// Incr count
st.mmap.count++
if !ok {
// No mutex found for key
// Alloc mu from pool
mu = st.mmap.mpool.Acquire()
st.mmap.mumap[key] = mu
// Set our key
mu.key = key
// Queue for eviction
st.mmap.evict = append(st.mmap.evict, mu)
default:
// Fully unlocked.
mu.lt = lt
}
// Lock mutex
mu.Lock(lt)
// Update
// count.
mu.ln++
// Unlock map
st.mmap.mapmu.Unlock()
return func() {
st.mmap.mapmu.Lock()
mu.Unlock()
st.mmap.cleanup()
st.wait.Add(-1)
}
return true
}
// UnlockMap will close this state and release the currently locked map.
func (st *LockState) UnlockMap() {
if !atomic.CompareAndSwapUint32(&st.done, 0, 1) {
panic("called unlockmap on expired state")
}
st.wait.Wait()
st.mmap.mapmu.Lock()
st.mmap.cleanup()
// Unlock will unlock the mutex, in the sense that
// it will update the internal state tracker accordingly.
// On any unlock it will awaken sleeping waiting threads.
// Returned boolean is if unlocked=true AND waiters=0.
func (mu *rwmutexish) Unlock() bool {
var ok bool
switch mu.ln--; {
case mu.ln > 0 && mu.lt == lockTypeWrite:
panic("BUG: multiple writer locks")
case mu.ln < 0:
panic("BUG: negative lock count")
case mu.ln == 0:
// Fully unlocked.
mu.lt = 0
// Only return true
// with no waiters.
ok = (mu.wn == 0)
}
// rwmutex is a very simple *representation* of a read-write
// mutex, though not one in implementation. it works by
// tracking the lock state for a given map key, which is
// protected by the map's mutex.
type rwmutex struct {
rcnt int32 // read lock count
lock uint8 // lock type
key string // map key
// Awake all waiting
// goroutines for mu.
mu.tr.Trigger()
return ok
}
func (mu *rwmutex) CanLock(lt uint8) bool {
return mu.lock == 0 ||
(mu.lock&lockTypeRead != 0 && lt&lockTypeRead != 0)
// WaitRelock expects a mutex to be passed in already in
// the lock state. It incr the rwmutexish waiter count before
// unlocking the outer mutex and blocking on internal trigger.
// On awake it will relock outer mutex and decr wait count.
func (mu *rwmutexish) WaitRelock(outer *sync.Mutex) {
mu.wn++
outer.Unlock()
mu.tr.Wait()
outer.Lock()
mu.wn--
}
func (mu *rwmutex) Lock(lt uint8) {
// Set lock type
mu.lock = lt
// trigger uses the internals of sync.Cond to provide
// a waitgroup type structure (including goroutine parks)
// without such a heavy reliance on a delta value.
type trigger struct{ notifyList }
if lt&lockTypeRead != 0 {
// RLock, increment
mu.rcnt++
}
func (t *trigger) Trigger() {
runtime_notifyListNotifyAll(&t.notifyList)
}
func (mu *rwmutex) Unlock() {
if mu.rcnt > 0 {
// RUnlock
mu.rcnt--
func (t *trigger) Wait() {
v := runtime_notifyListAdd(&t.notifyList)
runtime_notifyListWait(&t.notifyList, v)
}
if mu.rcnt == 0 {
// Total unlock
mu.lock = 0
}
// Approximation of notifyList in runtime/sema.go.
type notifyList struct {
wait uint32
notify uint32
lock uintptr // key field of the mutex
head unsafe.Pointer
tail unsafe.Pointer
}
// See runtime/sema.go for documentation.
//
//go:linkname runtime_notifyListAdd sync.runtime_notifyListAdd
func runtime_notifyListAdd(l *notifyList) uint32
// See runtime/sema.go for documentation.
//
//go:linkname runtime_notifyListWait sync.runtime_notifyListWait
func runtime_notifyListWait(l *notifyList, t uint32)
// See runtime/sema.go for documentation.
//
//go:linkname runtime_notifyListNotifyAll sync.runtime_notifyListNotifyAll
func runtime_notifyListNotifyAll(l *notifyList)

View file

@ -1,39 +0,0 @@
package mutexes
// pool is a very simply memory pool.
type pool struct {
current []*rwmutex
victim []*rwmutex
}
// Acquire will returns a rwmutex from pool (or alloc new).
func (p *pool) Acquire() *rwmutex {
// First try the current queue
if l := len(p.current) - 1; l >= 0 {
mu := p.current[l]
p.current = p.current[:l]
return mu
}
// Next try the victim queue.
if l := len(p.victim) - 1; l >= 0 {
mu := p.victim[l]
p.victim = p.victim[:l]
return mu
}
// Lastly, alloc new.
return &rwmutex{}
}
// Release places a sync.RWMutex back in the pool.
func (p *pool) Release(mu *rwmutex) {
p.current = append(p.current, mu)
}
// GC will clear out unused entries from the pool.
func (p *pool) GC() {
current := p.current
p.current = nil
p.victim = current
}

View file

@ -1,9 +0,0 @@
MIT License
Copyright (c) 2021 gruf
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1,2 +0,0 @@
A selection of type-defined `sync.Pool` implementations with redefined "getter" and "putter"
methods to handle their appropriate types.

View file

@ -1,89 +0,0 @@
package pools
import (
"bufio"
"io"
"sync"
)
// BufioReaderPool is a pooled allocator for bufio.Reader objects.
type BufioReaderPool interface {
// Get fetches a bufio.Reader from pool and resets to supplied reader
Get(io.Reader) *bufio.Reader
// Put places supplied bufio.Reader back in pool
Put(*bufio.Reader)
}
// NewBufioReaderPool returns a newly instantiated bufio.Reader pool.
func NewBufioReaderPool(size int) BufioReaderPool {
return &bufioReaderPool{
pool: sync.Pool{
New: func() interface{} {
return bufio.NewReaderSize(nil, size)
},
},
size: size,
}
}
// bufioReaderPool is our implementation of BufioReaderPool.
type bufioReaderPool struct {
pool sync.Pool
size int
}
func (p *bufioReaderPool) Get(r io.Reader) *bufio.Reader {
br := p.pool.Get().(*bufio.Reader)
br.Reset(r)
return br
}
func (p *bufioReaderPool) Put(br *bufio.Reader) {
if br.Size() < p.size {
return
}
br.Reset(nil)
p.pool.Put(br)
}
// BufioWriterPool is a pooled allocator for bufio.Writer objects.
type BufioWriterPool interface {
// Get fetches a bufio.Writer from pool and resets to supplied writer
Get(io.Writer) *bufio.Writer
// Put places supplied bufio.Writer back in pool
Put(*bufio.Writer)
}
// NewBufioWriterPool returns a newly instantiated bufio.Writer pool.
func NewBufioWriterPool(size int) BufioWriterPool {
return &bufioWriterPool{
pool: sync.Pool{
New: func() interface{} {
return bufio.NewWriterSize(nil, size)
},
},
size: size,
}
}
// bufioWriterPool is our implementation of BufioWriterPool.
type bufioWriterPool struct {
pool sync.Pool
size int
}
func (p *bufioWriterPool) Get(w io.Writer) *bufio.Writer {
bw := p.pool.Get().(*bufio.Writer)
bw.Reset(w)
return bw
}
func (p *bufioWriterPool) Put(bw *bufio.Writer) {
if bw.Size() < p.size {
return
}
bw.Reset(nil)
p.pool.Put(bw)
}

View file

@ -1,46 +0,0 @@
package pools
import (
"sync"
"codeberg.org/gruf/go-byteutil"
)
// BufferPool is a pooled allocator for bytes.Buffer objects
type BufferPool interface {
// Get fetches a bytes.Buffer from pool
Get() *byteutil.Buffer
// Put places supplied bytes.Buffer in pool
Put(*byteutil.Buffer)
}
// NewBufferPool returns a newly instantiated bytes.Buffer pool
func NewBufferPool(size int) BufferPool {
return &bufferPool{
pool: sync.Pool{
New: func() interface{} {
return &byteutil.Buffer{B: make([]byte, 0, size)}
},
},
size: size,
}
}
// bufferPool is our implementation of BufferPool
type bufferPool struct {
pool sync.Pool
size int
}
func (p *bufferPool) Get() *byteutil.Buffer {
return p.pool.Get().(*byteutil.Buffer)
}
func (p *bufferPool) Put(buf *byteutil.Buffer) {
if buf.Cap() < p.size {
return
}
buf.Reset()
p.pool.Put(buf)
}

View file

@ -1,46 +0,0 @@
package pools
import (
"sync"
"codeberg.org/gruf/go-fastpath"
)
// PathBuilderPool is a pooled allocator for fastpath.Builder objects
type PathBuilderPool interface {
// Get fetches a fastpath.Builder from pool
Get() *fastpath.Builder
// Put places supplied fastpath.Builder back in pool
Put(*fastpath.Builder)
}
// NewPathBuilderPool returns a newly instantiated fastpath.Builder pool
func NewPathBuilderPool(size int) PathBuilderPool {
return &pathBuilderPool{
pool: sync.Pool{
New: func() interface{} {
return &fastpath.Builder{B: make([]byte, 0, size)}
},
},
size: size,
}
}
// pathBuilderPool is our implementation of PathBuilderPool
type pathBuilderPool struct {
pool sync.Pool
size int
}
func (p *pathBuilderPool) Get() *fastpath.Builder {
return p.pool.Get().(*fastpath.Builder)
}
func (p *pathBuilderPool) Put(pb *fastpath.Builder) {
if pb.Cap() < p.size {
return
}
pb.Reset()
p.pool.Put(pb)
}

View file

@ -1,46 +0,0 @@
package pools
import (
"hash"
"sync"
"codeberg.org/gruf/go-hashenc"
)
// HashEncoderPool is a pooled allocator for hashenc.HashEncoder objects.
type HashEncoderPool interface {
// Get fetches a hashenc.HashEncoder from pool
Get() hashenc.HashEncoder
// Put places supplied hashenc.HashEncoder back in pool
Put(hashenc.HashEncoder)
}
// NewHashEncoderPool returns a newly instantiated hashenc.HashEncoder pool.
func NewHashEncoderPool(hash func() hash.Hash, enc func() hashenc.Encoder) HashEncoderPool {
return &hencPool{
pool: sync.Pool{
New: func() interface{} {
return hashenc.New(hash(), enc())
},
},
size: hashenc.New(hash(), enc()).Size(),
}
}
// hencPool is our implementation of HashEncoderPool.
type hencPool struct {
pool sync.Pool
size int
}
func (p *hencPool) Get() hashenc.HashEncoder {
return p.pool.Get().(hashenc.HashEncoder)
}
func (p *hencPool) Put(henc hashenc.HashEncoder) {
if henc.Size() < p.size {
return
}
p.pool.Put(henc)
}

View file

@ -1,387 +0,0 @@
package pools
import (
"runtime"
"sync"
"sync/atomic"
"unsafe"
)
type Pool struct {
// New is used to instantiate new items
New func() interface{}
// Evict is called on evicted items during pool .Clean()
Evict func(interface{})
local unsafe.Pointer // ptr to []_ppool
localSz int64 // count of all elems in local
victim unsafe.Pointer // ptr to []_ppool
victimSz int64 // count of all elems in victim
mutex sync.Mutex // mutex protects new cleanups, and new allocations of local
}
// Get attempts to fetch an item from the pool, failing that allocates with supplied .New() function
func (p *Pool) Get() interface{} {
// Get local pool for proc
// (also pins proc)
pool, pid := p.pin()
if v := pool.getPrivate(); v != nil {
// local _ppool private elem acquired
runtime_procUnpin()
atomic.AddInt64(&p.localSz, -1)
return v
}
if v := pool.get(); v != nil {
// local _ppool queue elem acquired
runtime_procUnpin()
atomic.AddInt64(&p.localSz, -1)
return v
}
// Unpin before attempting slow
runtime_procUnpin()
if v := p.getSlow(pid); v != nil {
// note size decrementing
// is handled within p.getSlow()
// as we don't know if it came
// from the local or victim pools
return v
}
// Alloc new
return p.New()
}
// Put places supplied item in the proc local pool
func (p *Pool) Put(v interface{}) {
// Don't store nil
if v == nil {
return
}
// Get proc local pool
// (also pins proc)
pool, _ := p.pin()
// first try private, then queue
if !pool.setPrivate(v) {
pool.put(v)
}
runtime_procUnpin()
// Increment local pool size
atomic.AddInt64(&p.localSz, 1)
}
// Clean will drop the current victim pools, move the current local pools to its
// place and reset the local pools ptr in order to be regenerated
func (p *Pool) Clean() {
p.mutex.Lock()
// victim becomes local, local becomes nil
localPtr := atomic.SwapPointer(&p.local, nil)
victimPtr := atomic.SwapPointer(&p.victim, localPtr)
localSz := atomic.SwapInt64(&p.localSz, 0)
atomic.StoreInt64(&p.victimSz, localSz)
var victim []ppool
if victimPtr != nil {
victim = *(*[]ppool)(victimPtr)
}
// drain each of the vict _ppool items
for i := 0; i < len(victim); i++ {
ppool := &victim[i]
ppool.evict(p.Evict)
}
p.mutex.Unlock()
}
// LocalSize returns the total number of elements in all the proc-local pools
func (p *Pool) LocalSize() int64 {
return atomic.LoadInt64(&p.localSz)
}
// VictimSize returns the total number of elements in all the victim (old proc-local) pools
func (p *Pool) VictimSize() int64 {
return atomic.LoadInt64(&p.victimSz)
}
// getSlow is the slow path for fetching an element, attempting to steal from other proc's
// local pools, and failing that, from the aging-out victim pools. pid is still passed so
// not all procs start iterating from the same index
func (p *Pool) getSlow(pid int) interface{} {
// get local pools
local := p.localPools()
// Try to steal from other proc locals
for i := 0; i < len(local); i++ {
pool := &local[(pid+i+1)%len(local)]
if v := pool.get(); v != nil {
atomic.AddInt64(&p.localSz, -1)
return v
}
}
// get victim pools
victim := p.victimPools()
// Attempt to steal from victim pools
for i := 0; i < len(victim); i++ {
pool := &victim[(pid+i+1)%len(victim)]
if v := pool.get(); v != nil {
atomic.AddInt64(&p.victimSz, -1)
return v
}
}
// Set victim pools to nil (none found)
atomic.StorePointer(&p.victim, nil)
return nil
}
// localPools safely loads slice of local _ppools
func (p *Pool) localPools() []ppool {
local := atomic.LoadPointer(&p.local)
if local == nil {
return nil
}
return *(*[]ppool)(local)
}
// victimPools safely loads slice of victim _ppools
func (p *Pool) victimPools() []ppool {
victim := atomic.LoadPointer(&p.victim)
if victim == nil {
return nil
}
return *(*[]ppool)(victim)
}
// pin will get fetch pin proc to PID, fetch proc-local _ppool and current PID we're pinned to
func (p *Pool) pin() (*ppool, int) {
for {
// get local pools
local := p.localPools()
if len(local) > 0 {
// local already initialized
// pin to current proc
pid := runtime_procPin()
// check for pid local pool
if pid < len(local) {
return &local[pid], pid
}
// unpin from proc
runtime_procUnpin()
} else {
// local not yet initialized
// Check functions are set
if p.New == nil {
panic("new func must not be nil")
}
if p.Evict == nil {
panic("evict func must not be nil")
}
}
// allocate local
p.allocLocal()
}
}
// allocLocal allocates a new local pool slice, with the old length passed to check
// if pool was previously nil, or whether a change in GOMAXPROCS occurred
func (p *Pool) allocLocal() {
// get pool lock
p.mutex.Lock()
// Calculate new size to use
size := runtime.GOMAXPROCS(0)
local := p.localPools()
if len(local) != size {
// GOMAXPROCS changed, reallocate
pools := make([]ppool, size)
atomic.StorePointer(&p.local, unsafe.Pointer(&pools))
// Evict old local elements
for i := 0; i < len(local); i++ {
pool := &local[i]
pool.evict(p.Evict)
}
}
// Unlock pool
p.mutex.Unlock()
}
// _ppool is a proc local pool
type _ppool struct {
// root is the root element of the _ppool queue,
// and protects concurrent access to the queue
root unsafe.Pointer
// private is a proc private member accessible
// only to the pid this _ppool is assigned to,
// except during evict (hence the unsafe pointer)
private unsafe.Pointer
}
// ppool wraps _ppool with pad.
type ppool struct {
_ppool
// Prevents false sharing on widespread platforms with
// 128 mod (cache line size) = 0 .
pad [128 - unsafe.Sizeof(_ppool{})%128]byte
}
// getPrivate gets the proc private member
func (pp *_ppool) getPrivate() interface{} {
ptr := atomic.SwapPointer(&pp.private, nil)
if ptr == nil {
return nil
}
return *(*interface{})(ptr)
}
// setPrivate sets the proc private member (only if unset)
func (pp *_ppool) setPrivate(v interface{}) bool {
return atomic.CompareAndSwapPointer(&pp.private, nil, unsafe.Pointer(&v))
}
// get fetches an element from the queue
func (pp *_ppool) get() interface{} {
for {
// Attempt to load root elem
root := atomic.LoadPointer(&pp.root)
if root == nil {
return nil
}
// Attempt to consume root elem
if root == inUsePtr ||
!atomic.CompareAndSwapPointer(&pp.root, root, inUsePtr) {
continue
}
// Root becomes next in chain
e := (*elem)(root)
v := e.value
// Place new root back in the chain
atomic.StorePointer(&pp.root, unsafe.Pointer(e.next))
putElem(e)
return v
}
}
// put places an element in the queue
func (pp *_ppool) put(v interface{}) {
// Prepare next elem
e := getElem()
e.value = v
for {
// Attempt to load root elem
root := atomic.LoadPointer(&pp.root)
if root == inUsePtr {
continue
}
// Set the next elem value (might be nil)
e.next = (*elem)(root)
// Attempt to store this new value at root
if atomic.CompareAndSwapPointer(&pp.root, root, unsafe.Pointer(e)) {
break
}
}
}
// hook evicts all entries from pool, calling hook on each
func (pp *_ppool) evict(hook func(interface{})) {
if v := pp.getPrivate(); v != nil {
hook(v)
}
for {
v := pp.get()
if v == nil {
break
}
hook(v)
}
}
// inUsePtr is a ptr used to indicate _ppool is in use
var inUsePtr = unsafe.Pointer(&elem{
next: nil,
value: "in_use",
})
// elem defines an element in the _ppool queue
type elem struct {
next *elem
value interface{}
}
// elemPool is a simple pool of unused elements
var elemPool = struct {
root unsafe.Pointer
}{}
// getElem fetches a new elem from pool, or creates new
func getElem() *elem {
// Attempt to load root elem
root := atomic.LoadPointer(&elemPool.root)
if root == nil {
return &elem{}
}
// Attempt to consume root elem
if root == inUsePtr ||
!atomic.CompareAndSwapPointer(&elemPool.root, root, inUsePtr) {
return &elem{}
}
// Root becomes next in chain
e := (*elem)(root)
atomic.StorePointer(&elemPool.root, unsafe.Pointer(e.next))
e.next = nil
return e
}
// putElem will place element in the pool
func putElem(e *elem) {
e.value = nil
// Attempt to load root elem
root := atomic.LoadPointer(&elemPool.root)
if root == inUsePtr {
return // drop
}
// Set the next elem value (might be nil)
e.next = (*elem)(root)
// Attempt to store this new value at root
atomic.CompareAndSwapPointer(&elemPool.root, root, unsafe.Pointer(e))
}
//go:linkname runtime_procPin sync.runtime_procPin
func runtime_procPin() int
//go:linkname runtime_procUnpin sync.runtime_procUnpin
func runtime_procUnpin()

View file

@ -1,63 +0,0 @@
package kv
import (
"context"
"errors"
"codeberg.org/gruf/go-mutexes"
"codeberg.org/gruf/go-store/v2/storage"
)
var ErrIteratorClosed = errors.New("store/kv: iterator closed")
// Iterator provides a read-only iterator to all the key-value
// pairs in a KVStore. While the iterator is open the store is read
// locked, you MUST release the iterator when you are finished with
// it.
//
// Please note:
// individual iterators are NOT concurrency safe, though it is safe to
// have multiple iterators running concurrently.
type Iterator struct {
store *KVStore // store is the linked KVStore
state *mutexes.LockState
entries []storage.Entry
index int
key string
}
// Next attempts to fetch the next key-value pair, the
// return value indicates whether another pair remains.
func (i *Iterator) Next() bool {
next := i.index + 1
if next >= len(i.entries) {
i.key = ""
return false
}
i.key = i.entries[next].Key
i.index = next
return true
}
// Key returns the current iterator key.
func (i *Iterator) Key() string {
return i.key
}
// Value returns the current iterator value at key.
func (i *Iterator) Value(ctx context.Context) ([]byte, error) {
if i.store == nil {
return nil, ErrIteratorClosed
}
return i.store.get(i.state.RLock, ctx, i.key)
}
// Release will release the store read-lock, and close this iterator.
func (i *Iterator) Release() {
i.state.UnlockMap()
i.state = nil
i.store = nil
i.key = ""
i.entries = nil
i.index = 0
}

View file

@ -1,116 +0,0 @@
package kv
import (
"context"
"errors"
"io"
"codeberg.org/gruf/go-mutexes"
)
// ErrStateClosed is returned on further calls to states after calling Release().
var ErrStateClosed = errors.New("store/kv: state closed")
// StateRO provides a read-only window to the store. While this
// state is active during the Read() function window, the entire
// store will be read-locked. The state is thread-safe for concurrent
// use UNTIL the moment that your supplied function to Read() returns.
type StateRO struct {
store *KVStore
state *mutexes.LockState
}
// Get: see KVStore.Get(). Returns error if state already closed.
func (st *StateRO) Get(ctx context.Context, key string) ([]byte, error) {
if st.store == nil {
return nil, ErrStateClosed
}
return st.store.get(st.state.RLock, ctx, key)
}
// GetStream: see KVStore.GetStream(). Returns error if state already closed.
func (st *StateRO) GetStream(ctx context.Context, key string) (io.ReadCloser, error) {
if st.store == nil {
return nil, ErrStateClosed
}
return st.store.getStream(st.state.RLock, ctx, key)
}
// Has: see KVStore.Has(). Returns error if state already closed.
func (st *StateRO) Has(ctx context.Context, key string) (bool, error) {
if st.store == nil {
return false, ErrStateClosed
}
return st.store.has(st.state.RLock, ctx, key)
}
// Release will release the store read-lock, and close this state.
func (st *StateRO) Release() {
st.state.UnlockMap()
st.state = nil
st.store = nil
}
// StateRW provides a read-write window to the store. While this
// state is active during the Update() function window, the entire
// store will be locked. The state is thread-safe for concurrent
// use UNTIL the moment that your supplied function to Update() returns.
type StateRW struct {
store *KVStore
state *mutexes.LockState
}
// Get: see KVStore.Get(). Returns error if state already closed.
func (st *StateRW) Get(ctx context.Context, key string) ([]byte, error) {
if st.store == nil {
return nil, ErrStateClosed
}
return st.store.get(st.state.RLock, ctx, key)
}
// GetStream: see KVStore.GetStream(). Returns error if state already closed.
func (st *StateRW) GetStream(ctx context.Context, key string) (io.ReadCloser, error) {
if st.store == nil {
return nil, ErrStateClosed
}
return st.store.getStream(st.state.RLock, ctx, key)
}
// Put: see KVStore.Put(). Returns error if state already closed.
func (st *StateRW) Put(ctx context.Context, key string, value []byte) (int, error) {
if st.store == nil {
return 0, ErrStateClosed
}
return st.store.put(st.state.Lock, ctx, key, value)
}
// PutStream: see KVStore.PutStream(). Returns error if state already closed.
func (st *StateRW) PutStream(ctx context.Context, key string, r io.Reader) (int64, error) {
if st.store == nil {
return 0, ErrStateClosed
}
return st.store.putStream(st.state.Lock, ctx, key, r)
}
// Has: see KVStore.Has(). Returns error if state already closed.
func (st *StateRW) Has(ctx context.Context, key string) (bool, error) {
if st.store == nil {
return false, ErrStateClosed
}
return st.store.has(st.state.RLock, ctx, key)
}
// Delete: see KVStore.Delete(). Returns error if state already closed.
func (st *StateRW) Delete(ctx context.Context, key string) error {
if st.store == nil {
return ErrStateClosed
}
return st.store.delete(st.state.Lock, ctx, key)
}
// Release will release the store lock, and close this state.
func (st *StateRW) Release() {
st.state.UnlockMap()
st.state = nil
st.store = nil
}

View file

@ -1,267 +0,0 @@
package kv
import (
"context"
"io"
"codeberg.org/gruf/go-iotools"
"codeberg.org/gruf/go-mutexes"
"codeberg.org/gruf/go-store/v2/storage"
)
// KVStore is a very simple, yet performant key-value store
type KVStore struct {
mu mutexes.MutexMap // map of keys to mutexes to protect key access
st storage.Storage // underlying storage implementation
}
func OpenDisk(path string, cfg *storage.DiskConfig) (*KVStore, error) {
// Attempt to open disk storage
storage, err := storage.OpenDisk(path, cfg)
if err != nil {
return nil, err
}
// Return new KVStore
return OpenStorage(storage)
}
func OpenBlock(path string, cfg *storage.BlockConfig) (*KVStore, error) {
// Attempt to open block storage
storage, err := storage.OpenBlock(path, cfg)
if err != nil {
return nil, err
}
// Return new KVStore
return OpenStorage(storage)
}
func OpenMemory(overwrites bool) *KVStore {
return New(storage.OpenMemory(100, overwrites))
}
func OpenS3(endpoint string, bucket string, cfg *storage.S3Config) (*KVStore, error) {
// Attempt to open S3 storage
storage, err := storage.OpenS3(endpoint, bucket, cfg)
if err != nil {
return nil, err
}
// Return new KVStore
return OpenStorage(storage)
}
// OpenStorage will return a new KVStore instance based on Storage, performing an initial storage.Clean().
func OpenStorage(storage storage.Storage) (*KVStore, error) {
// Perform initial storage clean
err := storage.Clean(context.Background())
if err != nil {
return nil, err
}
// Return new KVStore
return New(storage), nil
}
// New will simply return a new KVStore instance based on Storage.
func New(storage storage.Storage) *KVStore {
if storage == nil {
panic("nil storage")
}
return &KVStore{
mu: mutexes.NewMap(-1, -1),
st: storage,
}
}
// RLock acquires a read-lock on supplied key, returning unlock function.
func (st *KVStore) RLock(key string) (runlock func()) {
return st.mu.RLock(key)
}
// Lock acquires a write-lock on supplied key, returning unlock function.
func (st *KVStore) Lock(key string) (unlock func()) {
return st.mu.Lock(key)
}
// Get fetches the bytes for supplied key in the store.
func (st *KVStore) Get(ctx context.Context, key string) ([]byte, error) {
return st.get(st.RLock, ctx, key)
}
// get performs the underlying logic for KVStore.Get(), using supplied read lock func to allow use with states.
func (st *KVStore) get(rlock func(string) func(), ctx context.Context, key string) ([]byte, error) {
// Acquire read lock for key
runlock := rlock(key)
defer runlock()
// Read file bytes from storage
return st.st.ReadBytes(ctx, key)
}
// GetStream fetches a ReadCloser for the bytes at the supplied key in the store.
func (st *KVStore) GetStream(ctx context.Context, key string) (io.ReadCloser, error) {
return st.getStream(st.RLock, ctx, key)
}
// getStream performs the underlying logic for KVStore.GetStream(), using supplied read lock func to allow use with states.
func (st *KVStore) getStream(rlock func(string) func(), ctx context.Context, key string) (io.ReadCloser, error) {
// Acquire read lock for key
runlock := rlock(key)
// Attempt to open stream for read
rd, err := st.st.ReadStream(ctx, key)
if err != nil {
runlock()
return nil, err
}
var unlocked bool
// Wrap readcloser to call our own callback
return iotools.ReadCloser(rd, iotools.CloserFunc(func() error {
if !unlocked {
unlocked = true
defer runlock()
}
return rd.Close()
})), nil
}
// Put places the bytes at the supplied key in the store.
func (st *KVStore) Put(ctx context.Context, key string, value []byte) (int, error) {
return st.put(st.Lock, ctx, key, value)
}
// put performs the underlying logic for KVStore.Put(), using supplied lock func to allow use with states.
func (st *KVStore) put(lock func(string) func(), ctx context.Context, key string, value []byte) (int, error) {
// Acquire write lock for key
unlock := lock(key)
defer unlock()
// Write file bytes to storage
return st.st.WriteBytes(ctx, key, value)
}
// PutStream writes the bytes from the supplied Reader at the supplied key in the store.
func (st *KVStore) PutStream(ctx context.Context, key string, r io.Reader) (int64, error) {
return st.putStream(st.Lock, ctx, key, r)
}
// putStream performs the underlying logic for KVStore.PutStream(), using supplied lock func to allow use with states.
func (st *KVStore) putStream(lock func(string) func(), ctx context.Context, key string, r io.Reader) (int64, error) {
// Acquire write lock for key
unlock := lock(key)
defer unlock()
// Write file stream to storage
return st.st.WriteStream(ctx, key, r)
}
// Has checks whether the supplied key exists in the store.
func (st *KVStore) Has(ctx context.Context, key string) (bool, error) {
return st.has(st.RLock, ctx, key)
}
// has performs the underlying logic for KVStore.Has(), using supplied read lock func to allow use with states.
func (st *KVStore) has(rlock func(string) func(), ctx context.Context, key string) (bool, error) {
// Acquire read lock for key
runlock := rlock(key)
defer runlock()
// Stat file in storage
return st.st.Stat(ctx, key)
}
// Delete removes value at supplied key from the store.
func (st *KVStore) Delete(ctx context.Context, key string) error {
return st.delete(st.Lock, ctx, key)
}
// delete performs the underlying logic for KVStore.Delete(), using supplied lock func to allow use with states.
func (st *KVStore) delete(lock func(string) func(), ctx context.Context, key string) error {
// Acquire write lock for key
unlock := lock(key)
defer unlock()
// Remove file from storage
return st.st.Remove(ctx, key)
}
// Iterator returns an Iterator for key-value pairs in the store, using supplied match function
func (st *KVStore) Iterator(ctx context.Context, matchFn func(string) bool) (*Iterator, error) {
if matchFn == nil {
// By default simply match all keys
matchFn = func(string) bool { return true }
}
// Get store read lock state
state := st.mu.RLockMap()
var entries []storage.Entry
walkFn := func(ctx context.Context, entry storage.Entry) error {
// Ignore unmatched entries
if !matchFn(entry.Key) {
return nil
}
// Add to entries
entries = append(entries, entry)
return nil
}
// Collate keys in storage with our walk function
err := st.st.WalkKeys(ctx, storage.WalkKeysOptions{WalkFn: walkFn})
if err != nil {
state.UnlockMap()
return nil, err
}
// Return new iterator
return &Iterator{
store: st,
state: state,
entries: entries,
index: -1,
key: "",
}, nil
}
// Read provides a read-only window to the store, holding it in a read-locked state until release.
func (st *KVStore) Read() *StateRO {
state := st.mu.RLockMap()
return &StateRO{store: st, state: state}
}
// ReadFn provides a read-only window to the store, holding it in a read-locked state until fn return..
func (st *KVStore) ReadFn(fn func(*StateRO)) {
// Acquire read-only state
state := st.Read()
defer state.Release()
// Pass to fn
fn(state)
}
// Update provides a read-write window to the store, holding it in a write-locked state until release.
func (st *KVStore) Update() *StateRW {
state := st.mu.LockMap()
return &StateRW{store: st, state: state}
}
// UpdateFn provides a read-write window to the store, holding it in a write-locked state until fn return.
func (st *KVStore) UpdateFn(fn func(*StateRW)) {
// Acquire read-write state
state := st.Update()
defer state.Release()
// Pass to fn
fn(state)
}
// Close will close the underlying storage, the mutex map locking (e.g. RLock(), Lock()) will continue to function.
func (st *KVStore) Close() error {
return st.st.Close()
}

View file

@ -0,0 +1,38 @@
package storage_test
import (
"os"
"testing"
"codeberg.org/gruf/go-store/v2/storage"
)
func TestBlockStorage(t *testing.T) {
// Set test path, defer deleting it
testPath := "blockstorage.test"
t.Cleanup(func() {
os.RemoveAll(testPath)
})
// Open new blockstorage instance
st, err := storage.OpenBlock(testPath, nil)
if err != nil {
t.Fatalf("Failed opening storage: %v", err)
}
// Attempt multi open of same instance
_, err = storage.OpenBlock(testPath, nil)
if err == nil {
t.Fatal("Successfully opened a locked storage instance")
}
// Run the storage tests
testStorage(t, st)
// Test reopen storage path
st, err = storage.OpenBlock(testPath, nil)
if err != nil {
t.Fatalf("Failed opening storage: %v", err)
}
st.Close()
}

View file

@ -7,6 +7,7 @@ package flate
import (
"encoding/binary"
"errors"
"fmt"
"io"
"math"
@ -833,6 +834,12 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.initDeflate()
d.fill = (*compressor).fillDeflate
d.step = (*compressor).deflateLazy
case -level >= MinCustomWindowSize && -level <= MaxCustomWindowSize:
d.w.logNewTablePenalty = 7
d.fast = &fastEncL5Window{maxOffset: int32(-level), cur: maxStoreBlockSize}
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeFast
default:
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
}
@ -929,6 +936,28 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
return zw, err
}
// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow.
const MinCustomWindowSize = 32
// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow.
const MaxCustomWindowSize = windowSize
// NewWriterWindow returns a new Writer compressing data with a custom window size.
// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize.
func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) {
if windowSize < MinCustomWindowSize {
return nil, errors.New("flate: requested window size less than MinWindowSize")
}
if windowSize > MaxCustomWindowSize {
return nil, errors.New("flate: requested window size bigger than MaxCustomWindowSize")
}
var dw Writer
if err := dw.d.init(w, -windowSize); err != nil {
return nil, err
}
return &dw, nil
}
// A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see NewWriter).
type Writer struct {

View file

@ -8,7 +8,6 @@ package flate
import (
"encoding/binary"
"fmt"
"math/bits"
)
type fastEnc interface {
@ -192,25 +191,3 @@ func (e *fastGen) Reset() {
}
e.hist = e.hist[:0]
}
// matchLen returns the maximum length.
// 'a' must be the shortest of the two.
func matchLen(a, b []byte) int {
var checked int
for len(a) >= 8 {
if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
return checked + (bits.TrailingZeros64(diff) >> 3)
}
checked += 8
a = a[8:]
b = b[8:]
}
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
return i + checked
}
}
return len(a) + checked
}

View file

@ -120,8 +120,9 @@ func (h *huffmanDecoder) init(lengths []int) bool {
const sanity = false
if h.chunks == nil {
h.chunks = &[huffmanNumChunks]uint16{}
h.chunks = new([huffmanNumChunks]uint16)
}
if h.maxRead != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
}
@ -175,6 +176,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
}
h.maxRead = min
chunks := h.chunks[:]
for i := range chunks {
chunks[i] = 0
@ -202,8 +204,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
if cap(h.links[off]) < numLinks {
h.links[off] = make([]uint16, numLinks)
} else {
links := h.links[off][:0]
h.links[off] = links[:numLinks]
h.links[off] = h.links[off][:numLinks]
}
}
} else {
@ -277,7 +278,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
return true
}
// The actual read interface needed by NewReader.
// Reader is the actual read interface needed by NewReader.
// If the passed in io.Reader does not also have ReadByte,
// the NewReader will introduce its own buffering.
type Reader interface {
@ -285,6 +286,18 @@ type Reader interface {
io.ByteReader
}
type step uint8
const (
copyData step = iota + 1
nextBlock
huffmanBytesBuffer
huffmanBytesReader
huffmanBufioReader
huffmanStringsReader
huffmanGenericReader
)
// Decompress state.
type decompressor struct {
// Input source.
@ -303,7 +316,7 @@ type decompressor struct {
// Next step in the decompression,
// and decompression state.
step func(*decompressor)
step step
stepState int
err error
toRead []byte
@ -342,7 +355,7 @@ func (f *decompressor) nextBlock() {
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlockDecoder()()
f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("predefinied huffman block")
}
@ -353,7 +366,7 @@ func (f *decompressor) nextBlock() {
}
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlockDecoder()()
f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("dynamic huffman block")
}
@ -379,14 +392,16 @@ func (f *decompressor) Read(b []byte) (int, error) {
if f.err != nil {
return 0, f.err
}
f.step(f)
f.doStep()
if f.err != nil && len(f.toRead) == 0 {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
}
}
}
// Support the io.WriteTo interface for io.Copy and friends.
// WriteTo implements the io.WriteTo interface for io.Copy and friends.
func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
total := int64(0)
flushed := false
@ -410,7 +425,7 @@ func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
return total, f.err
}
if f.err == nil {
f.step(f)
f.doStep()
}
if len(f.toRead) == 0 && f.err != nil && !flushed {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
@ -631,7 +646,7 @@ func (f *decompressor) copyData() {
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).copyData
f.step = copyData
return
}
f.finishBlock()
@ -644,7 +659,28 @@ func (f *decompressor) finishBlock() {
}
f.err = io.EOF
}
f.step = (*decompressor).nextBlock
f.step = nextBlock
}
func (f *decompressor) doStep() {
switch f.step {
case copyData:
f.copyData()
case nextBlock:
f.nextBlock()
case huffmanBytesBuffer:
f.huffmanBytesBuffer()
case huffmanBytesReader:
f.huffmanBytesReader()
case huffmanBufioReader:
f.huffmanBufioReader()
case huffmanStringsReader:
f.huffmanStringsReader()
case huffmanGenericReader:
f.huffmanGenericReader()
default:
panic("BUG: unexpected step state")
}
}
// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
@ -747,7 +783,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
h1: f.h1,
h2: f.h2,
dict: f.dict,
step: (*decompressor).nextBlock,
step: nextBlock,
}
f.dict.init(maxMatchOffset, dict)
return nil
@ -768,7 +804,7 @@ func NewReader(r io.Reader) io.ReadCloser {
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.step = nextBlock
f.dict.init(maxMatchOffset, nil)
return &f
}
@ -787,7 +823,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.step = nextBlock
f.dict.init(maxMatchOffset, dict)
return &f
}

View file

@ -85,7 +85,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer
f.step = huffmanBytesBuffer
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@ -251,7 +251,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work
f.step = huffmanBytesBuffer // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@ -336,7 +336,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesReader
f.step = huffmanBytesReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@ -502,7 +502,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesReader // We need to continue this work
f.step = huffmanBytesReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@ -587,7 +587,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBufioReader
f.step = huffmanBufioReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@ -753,7 +753,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBufioReader // We need to continue this work
f.step = huffmanBufioReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@ -838,7 +838,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanStringsReader
f.step = huffmanStringsReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@ -1004,7 +1004,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanStringsReader // We need to continue this work
f.step = huffmanStringsReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@ -1089,7 +1089,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanGenericReader
f.step = huffmanGenericReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@ -1255,7 +1255,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanGenericReader // We need to continue this work
f.step = huffmanGenericReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@ -1265,19 +1265,19 @@ copyHistory:
// Not reached
}
func (f *decompressor) huffmanBlockDecoder() func() {
func (f *decompressor) huffmanBlockDecoder() {
switch f.r.(type) {
case *bytes.Buffer:
return f.huffmanBytesBuffer
f.huffmanBytesBuffer()
case *bytes.Reader:
return f.huffmanBytesReader
f.huffmanBytesReader()
case *bufio.Reader:
return f.huffmanBufioReader
f.huffmanBufioReader()
case *strings.Reader:
return f.huffmanStringsReader
f.huffmanStringsReader()
case Reader:
return f.huffmanGenericReader
f.huffmanGenericReader()
default:
return f.huffmanGenericReader
f.huffmanGenericReader()
}
}

View file

@ -308,3 +308,401 @@ emitRemainder:
emitLiteral(dst, src[nextEmit:])
}
}
// fastEncL5Window is a level 5 encoder,
// but with a custom window size.
type fastEncL5Window struct {
hist []byte
cur int32
maxOffset int32
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL5Window) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
)
maxMatchOffset := e.maxOffset
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// If the next long is a candidate, use that...
t2 := lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
if l == 0 {
// Extend the 4-byte match as long as possible.
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end of best match...
if sAt := s + l; l < 30 && sAt < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is 2/3 bytes depending on input.
// 3 is only a little better when it is but sometimes a lot worse.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
t2 := eLong - e.cur - l + skipBeginning
s2 := s + skipBeginning
off := s2 - t2
if t2 >= 0 && off < maxMatchOffset && off > 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
s = s2
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
if debugDeflate {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// Store every 3rd hash in-between.
if true {
const hashEvery = 3
i := s - l + 1
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// Do an long at i+1
cv >>= 8
t = tableEntry{offset: t.offset + 1}
eLong = &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// We only have enough bits for a short entry at i+2
cv >>= 8
t = tableEntry{offset: t.offset + 1}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
// Skip one - otherwise we risk hitting 's'
i += 4
for ; i < s-1; i += hashEvery {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
eLong := &e.bTable[prevHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}
// Reset the encoding table.
func (e *fastEncL5Window) Reset() {
// We keep the same allocs, since we are compressing the same block sizes.
if cap(e.hist) < allocHistory {
e.hist = make([]byte, 0, allocHistory)
}
// We offset current position so everything will be out of reach.
// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
if e.cur <= int32(bufferReset) {
e.cur += e.maxOffset + int32(len(e.hist))
}
e.hist = e.hist[:0]
}
func (e *fastEncL5Window) addBlock(src []byte) int32 {
// check if we have space already
maxMatchOffset := e.maxOffset
if len(e.hist)+len(src) > cap(e.hist) {
if cap(e.hist) == 0 {
e.hist = make([]byte, 0, allocHistory)
} else {
if cap(e.hist) < int(maxMatchOffset*2) {
panic("unexpected buffer size")
}
// Move down
offset := int32(len(e.hist)) - maxMatchOffset
copy(e.hist[0:maxMatchOffset], e.hist[offset:])
e.cur += offset
e.hist = e.hist[:maxMatchOffset]
}
}
s := int32(len(e.hist))
e.hist = append(e.hist, src...)
return s
}
// matchlen will return the match length between offsets and t in src.
// The maximum length returned is maxMatchLength - 4.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 {
if debugDecode {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > e.maxOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:s1], src[t:]))
}
// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 {
if debugDeflate {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > e.maxOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}

View file

@ -0,0 +1,16 @@
//go:build amd64 && !appengine && !noasm && gc
// +build amd64,!appengine,!noasm,gc
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package flate
// matchLen returns how many bytes match in a and b
//
// It assumes that:
//
// len(a) <= len(b) and len(a) > 0
//
//go:noescape
func matchLen(a []byte, b []byte) int

View file

@ -0,0 +1,68 @@
// Copied from S2 implementation.
//go:build !appengine && !noasm && gc && !noasm
#include "textflag.h"
// func matchLen(a []byte, b []byte) int
// Requires: BMI
TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX
MOVQ a_len+8(FP), DX
// matchLen
XORL SI, SI
CMPL DX, $0x08
JB matchlen_match4_standalone
matchlen_loopback_standalone:
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
TESTQ BX, BX
JZ matchlen_loop_standalone
#ifdef GOAMD64_v3
TZCNTQ BX, BX
#else
BSFQ BX, BX
#endif
SARQ $0x03, BX
LEAL (SI)(BX*1), SI
JMP gen_match_len_end
matchlen_loop_standalone:
LEAL -8(DX), DX
LEAL 8(SI), SI
CMPL DX, $0x08
JAE matchlen_loopback_standalone
matchlen_match4_standalone:
CMPL DX, $0x04
JB matchlen_match2_standalone
MOVL (AX)(SI*1), BX
CMPL (CX)(SI*1), BX
JNE matchlen_match2_standalone
LEAL -4(DX), DX
LEAL 4(SI), SI
matchlen_match2_standalone:
CMPL DX, $0x02
JB matchlen_match1_standalone
MOVW (AX)(SI*1), BX
CMPW (CX)(SI*1), BX
JNE matchlen_match1_standalone
LEAL -2(DX), DX
LEAL 2(SI), SI
matchlen_match1_standalone:
CMPL DX, $0x01
JB gen_match_len_end
MOVB (AX)(SI*1), BL
CMPB (CX)(SI*1), BL
JNE gen_match_len_end
INCL SI
gen_match_len_end:
MOVQ SI, ret+48(FP)
RET

View file

@ -0,0 +1,33 @@
//go:build !amd64 || appengine || !gc || noasm
// +build !amd64 appengine !gc noasm
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package flate
import (
"encoding/binary"
"math/bits"
)
// matchLen returns the maximum common prefix length of a and b.
// a must be the shortest of the two.
func matchLen(a, b []byte) (n int) {
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
if diff != 0 {
return n + bits.TrailingZeros64(diff)>>3
}
n += 8
}
for i := range a {
if a[i] != b[i] {
break
}
n++
}
return n
}

View file

@ -106,6 +106,7 @@ func (z *Reader) Reset(r io.Reader) error {
*z = Reader{
decompressor: z.decompressor,
multistream: true,
br: z.br,
}
if rr, ok := r.(flate.Reader); ok {
z.r = rr

View file

@ -74,6 +74,27 @@ func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
return z, nil
}
// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow.
const MinCustomWindowSize = flate.MinCustomWindowSize
// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow.
const MaxCustomWindowSize = flate.MaxCustomWindowSize
// NewWriterWindow returns a new Writer compressing data with a custom window size.
// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize.
func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) {
if windowSize < MinCustomWindowSize {
return nil, errors.New("gzip: requested window size less than MinWindowSize")
}
if windowSize > MaxCustomWindowSize {
return nil, errors.New("gzip: requested window size bigger than MaxCustomWindowSize")
}
z := new(Writer)
z.init(w, -windowSize)
return z, nil
}
func (z *Writer) init(w io.Writer, level int) {
compressor := z.compressor
if level != StatelessCompression {

View file

@ -106,6 +106,25 @@ func MakeDict(data []byte, searchStart []byte) *Dict {
return &d
}
// MakeDictManual will create a dictionary.
// 'data' must be at least MinDictSize and less than or equal to MaxDictSize.
// A manual first repeat index into data must be provided.
// It must be less than len(data)-8.
func MakeDictManual(data []byte, firstIdx uint16) *Dict {
if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize {
return nil
}
var d Dict
dict := data
d.dict = dict
if cap(d.dict) < len(d.dict)+16 {
d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
}
d.repeat = int(firstIdx)
return &d
}
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.

View file

@ -57,7 +57,7 @@ func Encode(dst, src []byte) []byte {
// The function returns -1 if no improvement could be achieved.
// Using actual compression will most often produce better compression than the estimate.
func EstimateBlockSize(src []byte) (d int) {
if len(src) < 6 || int64(len(src)) > 0xffffffff {
if len(src) <= inputMargin || int64(len(src)) > 0xffffffff {
return -1
}
if len(src) <= 1024 {

View file

@ -157,6 +157,9 @@ func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
return m
}
matchDict := func(candidate, s int, first uint32, rep bool) match {
if s >= MaxDictSrcOffset {
return match{offset: candidate, s: s}
}
// Calculate offset as if in continuous array with s
offset := -len(dict.dict) + candidate
if best.length != 0 && best.s-best.offset == s-offset && !rep {

View file

@ -316,6 +316,7 @@ func matchLen(a []byte, b []byte) int {
return len(a) + checked
}
// input must be > inputMargin
func calcBlockSize(src []byte) (d int) {
// Initialize the hash table.
const (
@ -501,6 +502,7 @@ emitRemainder:
return d
}
// length must be > inputMargin.
func calcBlockSizeSmall(src []byte) (d int) {
// Initialize the hash table.
const (

File diff suppressed because it is too large Load diff

View file

@ -511,13 +511,14 @@ func IndexStream(r io.Reader) ([]byte, error) {
// JSON returns the index as JSON text.
func (i *Index) JSON() []byte {
type offset struct {
CompressedOffset int64 `json:"compressed"`
UncompressedOffset int64 `json:"uncompressed"`
}
x := struct {
TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown.
Offsets []struct {
CompressedOffset int64 `json:"compressed"`
UncompressedOffset int64 `json:"uncompressed"`
} `json:"offsets"`
Offsets []offset `json:"offsets"`
EstBlockUncomp int64 `json:"est_block_uncompressed"`
}{
TotalUncompressed: i.TotalUncompressed,
@ -525,10 +526,7 @@ func (i *Index) JSON() []byte {
EstBlockUncomp: i.estBlockUncomp,
}
for _, v := range i.info {
x.Offsets = append(x.Offsets, struct {
CompressedOffset int64 `json:"compressed"`
UncompressedOffset int64 `json:"uncompressed"`
}{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
x.Offsets = append(x.Offsets, offset{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
}
b, _ := json.MarshalIndent(x, "", " ")
return b

19
vendor/modules.txt vendored
View file

@ -28,16 +28,10 @@ codeberg.org/gruf/go-errors/v2
# codeberg.org/gruf/go-fastcopy v1.1.2
## explicit; go 1.17
codeberg.org/gruf/go-fastcopy
# codeberg.org/gruf/go-fastpath v1.0.3
## explicit; go 1.14
codeberg.org/gruf/go-fastpath
# codeberg.org/gruf/go-fastpath/v2 v2.0.0
## explicit; go 1.14
codeberg.org/gruf/go-fastpath/v2
# codeberg.org/gruf/go-hashenc v1.0.2
## explicit; go 1.16
codeberg.org/gruf/go-hashenc
# codeberg.org/gruf/go-iotools v0.0.0-20230601182242-d933b07dcbef
# codeberg.org/gruf/go-iotools v0.0.0-20230811115124-5d4223615a7f
## explicit; go 1.19
codeberg.org/gruf/go-iotools
# codeberg.org/gruf/go-kv v1.6.4
@ -53,21 +47,17 @@ codeberg.org/gruf/go-mangler
# codeberg.org/gruf/go-maps v1.0.3
## explicit; go 1.19
codeberg.org/gruf/go-maps
# codeberg.org/gruf/go-mutexes v1.1.5
# codeberg.org/gruf/go-mutexes v1.2.0 => ../go-mutexes
## explicit; go 1.14
codeberg.org/gruf/go-mutexes
# codeberg.org/gruf/go-pools v1.1.0
## explicit; go 1.16
codeberg.org/gruf/go-pools
# codeberg.org/gruf/go-runners v1.6.1
## explicit; go 1.19
codeberg.org/gruf/go-runners
# codeberg.org/gruf/go-sched v1.2.3
## explicit; go 1.19
codeberg.org/gruf/go-sched
# codeberg.org/gruf/go-store/v2 v2.2.2
# codeberg.org/gruf/go-store/v2 v2.2.4
## explicit; go 1.19
codeberg.org/gruf/go-store/v2/kv
codeberg.org/gruf/go-store/v2/storage
codeberg.org/gruf/go-store/v2/util
# github.com/DmitriyVTitov/size v1.5.0
@ -348,7 +338,7 @@ github.com/json-iterator/go
# github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
## explicit
github.com/kballard/go-shellquote
# github.com/klauspost/compress v1.16.7
# github.com/klauspost/compress v1.17.2
## explicit; go 1.18
github.com/klauspost/compress/flate
github.com/klauspost/compress/gzip
@ -1082,3 +1072,4 @@ modernc.org/token
# mvdan.cc/xurls/v2 v2.5.0
## explicit; go 1.19
mvdan.cc/xurls/v2
# codeberg.org/gruf/go-mutexes => ../go-mutexes