forgejo/modules/git/blob.go

222 lines
4.3 KiB
Go
Raw Normal View History

2016-11-03 22:16:01 +00:00
// Copyright 2015 The Gogs Authors. All rights reserved.
Improve listing performance by using go-git (#6478) * Use go-git for tree reading and commit info lookup. Signed-off-by: Filip Navara <navara@emclient.com> * Use TreeEntry.IsRegular() instead of ObjectType that was removed. Signed-off-by: Filip Navara <navara@emclient.com> * Use the treePath to optimize commit info search. Signed-off-by: Filip Navara <navara@emclient.com> * Extract the latest commit at treePath along with the other commits. Signed-off-by: Filip Navara <navara@emclient.com> * Fix listing commit info for a directory that was created in one commit and never modified after. Signed-off-by: Filip Navara <navara@emclient.com> * Avoid nearly all external 'git' invocations when doing directory listing (.editorconfig code path is still hit). Signed-off-by: Filip Navara <navara@emclient.com> * Use go-git for reading blobs. Signed-off-by: Filip Navara <navara@emclient.com> * Make SHA1 type alias for plumbing.Hash in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Make Signature type alias for object.Signature in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Fix GetCommitsInfo for repository with only one commit. Signed-off-by: Filip Navara <navara@emclient.com> * Fix PGP signature verification. Signed-off-by: Filip Navara <navara@emclient.com> * Fix issues with walking commit graph across merges. Signed-off-by: Filip Navara <navara@emclient.com> * Fix typo in condition. Signed-off-by: Filip Navara <navara@emclient.com> * Speed up loading branch list by keeping the repository reference (and thus all the loaded packfile indexes). Signed-off-by: Filip Navara <navara@emclient.com> * Fix lising submodules. Signed-off-by: Filip Navara <navara@emclient.com> * Fix build Signed-off-by: Filip Navara <navara@emclient.com> * Add back commit cache because of name-rev Signed-off-by: Filip Navara <navara@emclient.com> * Fix tests Signed-off-by: Filip Navara <navara@emclient.com> * Fix code style * Fix spelling * Address PR feedback Signed-off-by: Filip Navara <navara@emclient.com> * Update vendor module list Signed-off-by: Filip Navara <navara@emclient.com> * Fix getting trees by commit id Signed-off-by: Filip Navara <navara@emclient.com> * Fix remaining unit test failures * Fix GetTreeBySHA * Avoid running `git name-rev` if not necessary Signed-off-by: Filip Navara <navara@emclient.com> * Move Branch code to git module * Clean up GPG signature verification and fix it for tagged commits * Address PR feedback (import formatting, copyright headers) * Make blob lookup by SHA working * Update tests to use public API * Allow getting content from any type of object through the blob interface * Change test to actually expect the object content that is in the GIT repository * Change one more test to actually expect the object content that is in the GIT repository * Add comments
2019-04-19 12:17:27 +00:00
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
2016-11-03 22:16:01 +00:00
package git
import (
"bufio"
"bytes"
"encoding/base64"
2016-11-03 22:16:01 +00:00
"io"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/typesniffer"
"code.gitea.io/gitea/modules/util"
2016-11-03 22:16:01 +00:00
)
// Blob represents a Git object.
type Blob struct {
ID ObjectID
gotSize bool
size int64
name string
repo *Repository
}
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
// Calling the Close function on the result will discard all unread output.
func (b *Blob) DataAsync() (io.ReadCloser, error) {
wr, rd, cancel := b.repo.CatFileBatch(b.repo.Ctx)
_, err := wr.Write([]byte(b.ID.String() + "\n"))
if err != nil {
cancel()
return nil, err
}
_, _, size, err := ReadBatchLine(rd)
if err != nil {
cancel()
return nil, err
}
b.gotSize = true
b.size = size
if size < 4096 {
bs, err := io.ReadAll(io.LimitReader(rd, size))
defer cancel()
if err != nil {
return nil, err
}
_, err = rd.Discard(1)
return io.NopCloser(bytes.NewReader(bs)), err
}
return &blobReader{
rd: rd,
n: size,
cancel: cancel,
}, nil
}
// Size returns the uncompressed size of the blob
func (b *Blob) Size() int64 {
if b.gotSize {
return b.size
}
wr, rd, cancel := b.repo.CatFileBatchCheck(b.repo.Ctx)
defer cancel()
_, err := wr.Write([]byte(b.ID.String() + "\n"))
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
_, _, b.size, err = ReadBatchLine(rd)
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
b.gotSize = true
return b.size
}
type blobReader struct {
rd *bufio.Reader
n int64
cancel func()
}
func (b *blobReader) Read(p []byte) (n int, err error) {
if b.n <= 0 {
return 0, io.EOF
}
if int64(len(p)) > b.n {
p = p[0:b.n]
}
n, err = b.rd.Read(p)
b.n -= int64(n)
return n, err
}
// Close implements io.Closer
func (b *blobReader) Close() error {
if b.rd == nil {
return nil
}
defer b.cancel()
if err := DiscardFull(b.rd, b.n+1); err != nil {
return err
}
b.rd = nil
return nil
}
Improve listing performance by using go-git (#6478) * Use go-git for tree reading and commit info lookup. Signed-off-by: Filip Navara <navara@emclient.com> * Use TreeEntry.IsRegular() instead of ObjectType that was removed. Signed-off-by: Filip Navara <navara@emclient.com> * Use the treePath to optimize commit info search. Signed-off-by: Filip Navara <navara@emclient.com> * Extract the latest commit at treePath along with the other commits. Signed-off-by: Filip Navara <navara@emclient.com> * Fix listing commit info for a directory that was created in one commit and never modified after. Signed-off-by: Filip Navara <navara@emclient.com> * Avoid nearly all external 'git' invocations when doing directory listing (.editorconfig code path is still hit). Signed-off-by: Filip Navara <navara@emclient.com> * Use go-git for reading blobs. Signed-off-by: Filip Navara <navara@emclient.com> * Make SHA1 type alias for plumbing.Hash in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Make Signature type alias for object.Signature in go-git. Signed-off-by: Filip Navara <navara@emclient.com> * Fix GetCommitsInfo for repository with only one commit. Signed-off-by: Filip Navara <navara@emclient.com> * Fix PGP signature verification. Signed-off-by: Filip Navara <navara@emclient.com> * Fix issues with walking commit graph across merges. Signed-off-by: Filip Navara <navara@emclient.com> * Fix typo in condition. Signed-off-by: Filip Navara <navara@emclient.com> * Speed up loading branch list by keeping the repository reference (and thus all the loaded packfile indexes). Signed-off-by: Filip Navara <navara@emclient.com> * Fix lising submodules. Signed-off-by: Filip Navara <navara@emclient.com> * Fix build Signed-off-by: Filip Navara <navara@emclient.com> * Add back commit cache because of name-rev Signed-off-by: Filip Navara <navara@emclient.com> * Fix tests Signed-off-by: Filip Navara <navara@emclient.com> * Fix code style * Fix spelling * Address PR feedback Signed-off-by: Filip Navara <navara@emclient.com> * Update vendor module list Signed-off-by: Filip Navara <navara@emclient.com> * Fix getting trees by commit id Signed-off-by: Filip Navara <navara@emclient.com> * Fix remaining unit test failures * Fix GetTreeBySHA * Avoid running `git name-rev` if not necessary Signed-off-by: Filip Navara <navara@emclient.com> * Move Branch code to git module * Clean up GPG signature verification and fix it for tagged commits * Address PR feedback (import formatting, copyright headers) * Make blob lookup by SHA working * Update tests to use public API * Allow getting content from any type of object through the blob interface * Change test to actually expect the object content that is in the GIT repository * Change one more test to actually expect the object content that is in the GIT repository * Add comments
2019-04-19 12:17:27 +00:00
// Name returns name of the tree entry this blob object was created from (or empty string)
func (b *Blob) Name() string {
return b.name
}
// GetBlobContent Gets the limited content of the blob as raw text
func (b *Blob) GetBlobContent(limit int64) (string, error) {
if limit <= 0 {
return "", nil
}
dataRc, err := b.DataAsync()
if err != nil {
return "", err
}
defer dataRc.Close()
buf, err := util.ReadWithLimit(dataRc, int(limit))
return string(buf), err
}
// GetBlobLineCount gets line count of the blob
func (b *Blob) GetBlobLineCount() (int, error) {
reader, err := b.DataAsync()
if err != nil {
return 0, err
}
defer reader.Close()
buf := make([]byte, 32*1024)
count := 1
lineSep := []byte{'\n'}
c, err := reader.Read(buf)
if c == 0 && err == io.EOF {
return 0, nil
}
for {
count += bytes.Count(buf[:c], lineSep)
switch {
case err == io.EOF:
return count, nil
case err != nil:
return count, err
}
c, err = reader.Read(buf)
}
}
// GetBlobContentBase64 Reads the content of the blob with a base64 encode and returns the encoded string
func (b *Blob) GetBlobContentBase64() (string, error) {
dataRc, err := b.DataAsync()
if err != nil {
return "", err
}
defer dataRc.Close()
pr, pw := io.Pipe()
encoder := base64.NewEncoder(base64.StdEncoding, pw)
go func() {
_, err := io.Copy(encoder, dataRc)
2019-06-12 19:41:28 +00:00
_ = encoder.Close()
if err != nil {
2019-06-12 19:41:28 +00:00
_ = pw.CloseWithError(err)
} else {
2019-06-12 19:41:28 +00:00
_ = pw.Close()
}
}()
out, err := io.ReadAll(pr)
if err != nil {
return "", err
}
return string(out), nil
}
// GuessContentType guesses the content type of the blob.
func (b *Blob) GuessContentType() (typesniffer.SniffedType, error) {
r, err := b.DataAsync()
if err != nil {
return typesniffer.SniffedType{}, err
}
defer r.Close()
return typesniffer.DetectContentTypeFromReader(r)
}
// GetBlob finds the blob object in the repository.
func (repo *Repository) GetBlob(idStr string) (*Blob, error) {
id, err := NewIDFromString(idStr)
if err != nil {
return nil, err
}
if id.IsZero() {
return nil, ErrNotExist{id.String(), ""}
}
return &Blob{
ID: id,
repo: repo,
}, nil
}