[performance] cache v2 filter keyword regular expressions (#2903)

* add caching of filterkeyword regular expressions

* formatting

* fix WholeWord nil check
This commit is contained in:
kim 2024-05-21 13:20:19 +00:00 committed by GitHub
parent 6c0d93c6cb
commit b092da6d28
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 85 additions and 36 deletions

View file

@ -531,6 +531,11 @@ func (c *Caches) initFilterKeyword() {
// See internal/db/bundb/filter.go. // See internal/db/bundb/filter.go.
filterKeyword2.Filter = nil filterKeyword2.Filter = nil
// We specifically DO NOT unset
// the regexp field here, as any
// regexp.Regexp instance is safe
// for concurrent access.
return filterKeyword2 return filterKeyword2
} }

View file

@ -25,6 +25,7 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtscontext" "github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/util" "github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/uptrace/bun" "github.com/uptrace/bun"
) )
@ -34,12 +35,22 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod
"ID", "ID",
func() (*gtsmodel.FilterKeyword, error) { func() (*gtsmodel.FilterKeyword, error) {
var filterKeyword gtsmodel.FilterKeyword var filterKeyword gtsmodel.FilterKeyword
err := f.db.
// Scan from DB.
if err := f.db.
NewSelect(). NewSelect().
Model(&filterKeyword). Model(&filterKeyword).
Where("? = ?", bun.Ident("id"), id). Where("? = ?", bun.Ident("id"), id).
Scan(ctx) Scan(ctx); err != nil {
return &filterKeyword, err return nil, err
}
// Pre-compile filter keyword regular expression.
if err := filterKeyword.Compile(); err != nil {
return nil, gtserror.Newf("error compiling filter keyword regex: %w", err)
}
return &filterKeyword, nil
}, },
id, id,
) )
@ -57,20 +68,20 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod
return filterKeyword, nil return filterKeyword, nil
} }
func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error { func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) (err error) {
if filterKeyword.Filter == nil { if filterKeyword.Filter == nil {
// Filter is not set, fetch from the cache or database. // Filter is not set, fetch from the cache or database.
filter, err := f.state.DB.GetFilterByID( filterKeyword.Filter, err = f.state.DB.GetFilterByID(
// Don't populate the filter with all of its keywords and statuses or we'll just end up back here.
// Don't populate the filter with all of its keywords
// and statuses or we'll just end up back here.
gtscontext.SetBarebones(ctx), gtscontext.SetBarebones(ctx),
filterKeyword.FilterID, filterKeyword.FilterID,
) )
if err != nil { if err != nil {
return err return err
} }
filterKeyword.Filter = filter
} }
return nil return nil
} }
@ -84,6 +95,7 @@ func (f *filterDB) GetFilterKeywordsForAccountID(ctx context.Context, accountID
func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id string) ([]*gtsmodel.FilterKeyword, error) { func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id string) ([]*gtsmodel.FilterKeyword, error) {
var filterKeywordIDs []string var filterKeywordIDs []string
if err := f.db. if err := f.db.
NewSelect(). NewSelect().
Model((*gtsmodel.FilterKeyword)(nil)). Model((*gtsmodel.FilterKeyword)(nil)).
@ -92,6 +104,7 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
Scan(ctx, &filterKeywordIDs); err != nil { Scan(ctx, &filterKeywordIDs); err != nil {
return nil, err return nil, err
} }
if len(filterKeywordIDs) == 0 { if len(filterKeywordIDs) == 0 {
return nil, nil return nil, nil
} }
@ -101,6 +114,8 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
filterKeywordIDs, filterKeywordIDs,
func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) { func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) {
uncachedFilterKeywords := make([]*gtsmodel.FilterKeyword, 0, len(uncachedFilterKeywordIDs)) uncachedFilterKeywords := make([]*gtsmodel.FilterKeyword, 0, len(uncachedFilterKeywordIDs))
// Scan from DB.
if err := f.db. if err := f.db.
NewSelect(). NewSelect().
Model(&uncachedFilterKeywords). Model(&uncachedFilterKeywords).
@ -108,6 +123,16 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
Scan(ctx); err != nil { Scan(ctx); err != nil {
return nil, err return nil, err
} }
// Compile all the keyword regular expressions.
uncachedFilterKeywords = slices.DeleteFunc(uncachedFilterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool {
if err := filterKeyword.Compile(); err != nil {
log.Errorf(ctx, "error compiling filter keyword regex: %v", err)
return true
}
return false
})
return uncachedFilterKeywords, nil return uncachedFilterKeywords, nil
}, },
) )
@ -125,23 +150,26 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
} }
// Populate the filter keywords. Remove any that we can't populate from the return slice. // Populate the filter keywords. Remove any that we can't populate from the return slice.
errs := gtserror.NewMultiError(len(filterKeywords))
filterKeywords = slices.DeleteFunc(filterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool { filterKeywords = slices.DeleteFunc(filterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool {
if err := f.populateFilterKeyword(ctx, filterKeyword); err != nil { if err := f.populateFilterKeyword(ctx, filterKeyword); err != nil {
errs.Appendf( log.Errorf(ctx, "error populating filter keyword: %v", err)
"error populating filter keyword %s: %w",
filterKeyword.ID,
err,
)
return true return true
} }
return false return false
}) })
return filterKeywords, errs.Combine() return filterKeywords, nil
} }
func (f *filterDB) PutFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error { func (f *filterDB) PutFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error {
if filterKeyword.Regexp == nil {
// Ensure regexp is compiled
// before attempted caching.
err := filterKeyword.Compile()
if err != nil {
return gtserror.Newf("error compiling filter keyword regex: %w", err)
}
}
return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error { return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {
_, err := f.db. _, err := f.db.
NewInsert(). NewInsert().
@ -156,7 +184,14 @@ func (f *filterDB) UpdateFilterKeyword(ctx context.Context, filterKeyword *gtsmo
if len(columns) > 0 { if len(columns) > 0 {
columns = append(columns, "updated_at") columns = append(columns, "updated_at")
} }
if filterKeyword.Regexp == nil {
// Ensure regexp is compiled
// before attempted caching.
err := filterKeyword.Compile()
if err != nil {
return gtserror.Newf("error compiling filter keyword regex: %w", err)
}
}
return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error { return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {
_, err := f.db. _, err := f.db.
NewUpdate(). NewUpdate().

View file

@ -17,7 +17,10 @@
package gtsmodel package gtsmodel
import "time" import (
"regexp"
"time"
)
// Filter stores a filter created by a local account. // Filter stores a filter created by a local account.
type Filter struct { type Filter struct {
@ -39,14 +42,28 @@ type Filter struct {
// FilterKeyword stores a single keyword to filter statuses against. // FilterKeyword stores a single keyword to filter statuses against.
type FilterKeyword struct { type FilterKeyword struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword. AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword.
FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to. FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to.
Filter *Filter `bun:"-"` // Filter corresponding to FilterID Filter *Filter `bun:"-"` // Filter corresponding to FilterID
Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against. Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against.
WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries? WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries?
Regexp *regexp.Regexp `bun:"-"` // pre-prepared regular expression
}
// Compile will compile this FilterKeyword as a prepared regular expression.
func (k *FilterKeyword) Compile() (err error) {
var wordBreak string
if k.WholeWord != nil && *k.WholeWord {
wordBreak = `\b`
}
// Compile keyword filter regexp.
quoted := regexp.QuoteMeta(k.Keyword)
k.Regexp, err = regexp.Compile(`(?i)` + wordBreak + quoted + wordBreak)
return // caller is expected to wrap this error
} }
// FilterStatus stores a single status to filter. // FilterStatus stores a single status to filter.

View file

@ -22,7 +22,6 @@ import (
"errors" "errors"
"fmt" "fmt"
"math" "math"
"regexp"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -746,18 +745,9 @@ func (c *Converter) statusToAPIFilterResults(
keywordMatches := make([]string, 0, len(filter.Keywords)) keywordMatches := make([]string, 0, len(filter.Keywords))
fields := filterableTextFields(s) fields := filterableTextFields(s)
for _, filterKeyword := range filter.Keywords { for _, filterKeyword := range filter.Keywords {
wholeWord := util.PtrValueOr(filterKeyword.WholeWord, false)
wordBreak := ``
if wholeWord {
wordBreak = `\b`
}
re, err := regexp.Compile(`(?i)` + wordBreak + regexp.QuoteMeta(filterKeyword.Keyword) + wordBreak)
if err != nil {
return nil, err
}
var isMatch bool var isMatch bool
for _, field := range fields { for _, field := range fields {
if re.MatchString(field) { if filterKeyword.Regexp.MatchString(field) {
isMatch = true isMatch = true
break break
} }

View file

@ -546,6 +546,7 @@ func (suite *InternalToFrontendTestSuite) TestWarnFilteredStatusToFrontend() {
requestingAccount := suite.testAccounts["local_account_1"] requestingAccount := suite.testAccounts["local_account_1"]
expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"] expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]
expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"]
suite.NoError(expectedMatchingFilterKeyword.Compile())
expectedMatchingFilterKeyword.Filter = expectedMatchingFilter expectedMatchingFilterKeyword.Filter = expectedMatchingFilter
expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword} expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}
requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter} requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}
@ -700,6 +701,7 @@ func (suite *InternalToFrontendTestSuite) TestHideFilteredStatusToFrontend() {
expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"] expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]
expectedMatchingFilter.Action = gtsmodel.FilterActionHide expectedMatchingFilter.Action = gtsmodel.FilterActionHide
expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"]
suite.NoError(expectedMatchingFilterKeyword.Compile())
expectedMatchingFilterKeyword.Filter = expectedMatchingFilter expectedMatchingFilterKeyword.Filter = expectedMatchingFilter
expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword} expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}
requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter} requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}