[FEAT] Support Include/Exclude Filters for Grep (#3058)

fixes `TestRepoSearch` failing occasionally

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3058
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
This commit is contained in:
Shiny Nematoda 2024-04-06 13:25:39 +00:00 committed by Earl Warren
parent 0a51ae26bc
commit baac15f316
3 changed files with 67 additions and 16 deletions

View file

@ -13,6 +13,8 @@ import (
"os" "os"
"strconv" "strconv"
"strings" "strings"
"code.gitea.io/gitea/modules/setting"
) )
type GrepResult struct { type GrepResult struct {
@ -58,7 +60,15 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
} else { } else {
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-")) cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
} }
cmd.AddDynamicArguments(cmp.Or(opts.RefName, "HEAD")) // pathspec
files := make([]string, 0, len(setting.Indexer.IncludePatterns)+len(setting.Indexer.ExcludePatterns))
for _, expr := range setting.Indexer.IncludePatterns {
files = append(files, expr.Pattern())
}
for _, expr := range setting.Indexer.ExcludePatterns {
files = append(files, ":^"+expr.Pattern())
}
cmd.AddDynamicArguments(cmp.Or(opts.RefName, "HEAD")).AddDashesAndList(files...)
opts.MaxResultLimit = cmp.Or(opts.MaxResultLimit, 50) opts.MaxResultLimit = cmp.Or(opts.MaxResultLimit, 50)
stderr := bytes.Buffer{} stderr := bytes.Buffer{}
err = cmd.Run(&RunOpts{ err = cmd.Run(&RunOpts{

View file

@ -30,8 +30,8 @@ var Indexer = struct {
RepoConnStr string RepoConnStr string
RepoIndexerName string RepoIndexerName string
MaxIndexerFileSize int64 MaxIndexerFileSize int64
IncludePatterns []glob.Glob IncludePatterns []Glob
ExcludePatterns []glob.Glob ExcludePatterns []Glob
ExcludeVendored bool ExcludeVendored bool
}{ }{
IssueType: "bleve", IssueType: "bleve",
@ -50,6 +50,19 @@ var Indexer = struct {
ExcludeVendored: true, ExcludeVendored: true,
} }
type Glob struct {
glob glob.Glob
pattern string
}
func (g *Glob) Match(s string) bool {
return g.glob.Match(s)
}
func (g *Glob) Pattern() string {
return g.pattern
}
func loadIndexerFrom(rootCfg ConfigProvider) { func loadIndexerFrom(rootCfg ConfigProvider) {
sec := rootCfg.Section("indexer") sec := rootCfg.Section("indexer")
Indexer.IssueType = sec.Key("ISSUE_INDEXER_TYPE").MustString("bleve") Indexer.IssueType = sec.Key("ISSUE_INDEXER_TYPE").MustString("bleve")
@ -90,15 +103,15 @@ func loadIndexerFrom(rootCfg ConfigProvider) {
} }
// IndexerGlobFromString parses a comma separated list of patterns and returns a glob.Glob slice suited for repo indexing // IndexerGlobFromString parses a comma separated list of patterns and returns a glob.Glob slice suited for repo indexing
func IndexerGlobFromString(globstr string) []glob.Glob { func IndexerGlobFromString(globstr string) []Glob {
extarr := make([]glob.Glob, 0, 10) extarr := make([]Glob, 0, 10)
for _, expr := range strings.Split(strings.ToLower(globstr), ",") { for _, expr := range strings.Split(strings.ToLower(globstr), ",") {
expr = strings.TrimSpace(expr) expr = strings.TrimSpace(expr)
if expr != "" { if expr != "" {
if g, err := glob.Compile(expr, '.', '/'); err != nil { if g, err := glob.Compile(expr, '.', '/'); err != nil {
log.Info("Invalid glob expression '%s' (skipped): %v", expr, err) log.Info("Invalid glob expression '%s' (skipped): %v", expr, err)
} else { } else {
extarr = append(extarr, g) extarr = append(extarr, Glob{glob: g, pattern: expr})
} }
} }
} }

View file

@ -11,6 +11,7 @@ import (
repo_model "code.gitea.io/gitea/models/repo" repo_model "code.gitea.io/gitea/models/repo"
code_indexer "code.gitea.io/gitea/modules/indexer/code" code_indexer "code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
"code.gitea.io/gitea/tests" "code.gitea.io/gitea/tests"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
@ -26,30 +27,57 @@ func resultFilenames(t testing.TB, doc *HTMLDoc) []string {
return result return result
} }
func TestSearchRepo(t *testing.T) { func TestSearchRepoIndexer(t *testing.T) {
testSearchRepo(t, true)
}
func TestSearchRepoNoIndexer(t *testing.T) {
testSearchRepo(t, false)
}
func testSearchRepo(t *testing.T, indexer bool) {
defer tests.PrepareTestEnv(t)() defer tests.PrepareTestEnv(t)()
defer test.MockVariableValue(&setting.Indexer.RepoIndexerEnabled, indexer)()
repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1") repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1")
assert.NoError(t, err) assert.NoError(t, err)
code_indexer.UpdateRepoIndexer(repo) if indexer {
code_indexer.UpdateRepoIndexer(repo)
}
testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"}) testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"})
setting.Indexer.IncludePatterns = setting.IndexerGlobFromString("**.txt") defer test.MockVariableValue(&setting.Indexer.IncludePatterns, setting.IndexerGlobFromString("**.txt"))()
setting.Indexer.ExcludePatterns = setting.IndexerGlobFromString("**/y/**") defer test.MockVariableValue(&setting.Indexer.ExcludePatterns, setting.IndexerGlobFromString("**/y/**"))()
repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob") repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
assert.NoError(t, err) assert.NoError(t, err)
code_indexer.UpdateRepoIndexer(repo) if indexer {
code_indexer.UpdateRepoIndexer(repo)
}
testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"}) testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"})
testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", []string{"a.txt"}) testSearch(t, "/user2/glob/search?q=loren&page=1&fuzzy=false", []string{"a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", []string{"x/b.txt", "a.txt"}) if indexer {
testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", []string{"x/b.txt", "a.txt"}) // fuzzy search: matches both file3 (x/b.txt) and file1 (a.txt)
testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", []string{"x/b.txt", "a.txt"}) // when indexer is enabled
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file5&page=1", []string{"x/b.txt", "a.txt"})
} else {
// fuzzy search: OR of all the keywords
// when indexer is disabled
testSearch(t, "/user2/glob/search?q=file3+file1&page=1", []string{"a.txt", "x/b.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1", []string{})
testSearch(t, "/user2/glob/search?q=file5&page=1", []string{})
}
testSearch(t, "/user2/glob/search?q=file3&page=1&fuzzy=false", []string{"x/b.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1&fuzzy=false", []string{})
testSearch(t, "/user2/glob/search?q=file5&page=1&fuzzy=false", []string{})
} }
func testSearch(t *testing.T, url string, expected []string) { func testSearch(t *testing.T, url string, expected []string) {