git-grep: skip binary files

It is a waste of resources to scan them looking for matches
because they are never returned back - they appear as empty
lines in the current format.

Notably, even if they were returned, it is unlikely that matching
in binary files makes sense when the goal is "code search".
This commit is contained in:
Radosław Piliszek 2024-08-11 14:24:40 +02:00
parent 87d50eca87
commit f4a7bf6d2a
2 changed files with 27 additions and 1 deletions

View file

@ -63,8 +63,9 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
2^@10^@repo: go-gitea/gitea
*/
var results []*GrepResult
// -I skips binary files
cmd := NewCommand(ctx, "grep",
"--null", "--break", "--heading", "--column",
"-I", "--null", "--break", "--heading", "--column",
"--fixed-strings", "--line-number", "--ignore-case", "--full-name")
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
if opts.MatchesPerFile > 0 {

View file

@ -98,6 +98,31 @@ func TestGrepSearch(t *testing.T) {
assert.Empty(t, res)
}
func TestGrepNoBinary(t *testing.T) {
tmpDir := t.TempDir()
err := InitRepository(DefaultContext, tmpDir, false, Sha1ObjectFormat.Name())
require.NoError(t, err)
gitRepo, err := openRepositoryWithDefaultContext(tmpDir)
require.NoError(t, err)
defer gitRepo.Close()
require.NoError(t, os.WriteFile(path.Join(tmpDir, "BINARY"), []byte("I AM BINARY\n\x00\nYOU WON'T SEE ME"), 0o666))
require.NoError(t, os.WriteFile(path.Join(tmpDir, "TEXT"), []byte("I AM NOT BINARY\nYOU WILL SEE ME"), 0o666))
err = AddChanges(tmpDir, true)
require.NoError(t, err)
err = CommitChanges(tmpDir, CommitChangesOptions{Message: "Binary and text files"})
require.NoError(t, err)
res, err := GrepSearch(context.Background(), gitRepo, "BINARY", GrepOptions{})
require.NoError(t, err)
assert.Len(t, res, 1)
assert.Equal(t, "TEXT", res[0].Filename)
}
func TestGrepLongFiles(t *testing.T) {
tmpDir := t.TempDir()