[BUG] Handle bigger files in git grep

- The parser of `git grep`'s output uses `bufio.Scanner`, which is a good
choice overall, however it does have a limit that's usually not noticed,
it will not read more than `64 * 1024` bytes at once which can be hit in
practical scenarios.
- Use `bufio.Reader` instead which doesn't have this limitation, but is
a bit harder to work with as it's a more lower level primitive.
- Adds unit test.
- Resolves https://codeberg.org/forgejo/forgejo/issues/3149
This commit is contained in:
Gusted 2024-04-11 13:34:53 +02:00
parent 656554e247
commit 668709a33f
No known key found for this signature in database
GPG key ID: FD821B732837125F
2 changed files with 43 additions and 4 deletions

View file

@ -10,6 +10,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"io"
"os" "os"
"strconv" "strconv"
"strings" "strings"
@ -80,10 +81,21 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
defer stdoutReader.Close() defer stdoutReader.Close()
isInBlock := false isInBlock := false
scanner := bufio.NewScanner(stdoutReader) scanner := bufio.NewReader(stdoutReader)
var res *GrepResult var res *GrepResult
for scanner.Scan() { for {
line := scanner.Text() line, err := scanner.ReadString('\n')
if err != nil {
if err == io.EOF {
return nil
}
return err
}
// Remove delimiter.
if len(line) > 0 {
line = line[:len(line)-1]
}
if !isInBlock { if !isInBlock {
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok { if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
isInBlock = true isInBlock = true
@ -109,7 +121,7 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
res.LineCodes = append(res.LineCodes, lineCode) res.LineCodes = append(res.LineCodes, lineCode)
} }
} }
return scanner.Err() return nil
}, },
}) })
// git grep exits by cancel (killed), usually it is caused by the limit of results // git grep exits by cancel (killed), usually it is caused by the limit of results

View file

@ -4,7 +4,10 @@
package git package git
import ( import (
"bytes"
"context" "context"
"os"
"path"
"path/filepath" "path/filepath"
"testing" "testing"
@ -49,3 +52,27 @@ func TestGrepSearch(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
assert.Len(t, res, 0) assert.Len(t, res, 0)
} }
func TestGrepLongFiles(t *testing.T) {
tmpDir := t.TempDir()
err := InitRepository(DefaultContext, tmpDir, false, Sha1ObjectFormat.Name())
assert.NoError(t, err)
gitRepo, err := openRepositoryWithDefaultContext(tmpDir)
assert.NoError(t, err)
defer gitRepo.Close()
assert.NoError(t, os.WriteFile(path.Join(tmpDir, "README.md"), bytes.Repeat([]byte{'a'}, 65*1024), 0o666))
err = AddChanges(tmpDir, true)
assert.NoError(t, err)
err = CommitChanges(tmpDir, CommitChangesOptions{Message: "Long file"})
assert.NoError(t, err)
res, err := GrepSearch(context.Background(), gitRepo, "a", GrepOptions{})
assert.NoError(t, err)
assert.Len(t, res, 1)
assert.Len(t, res[0].LineCodes[0], 65*1024)
}