forgejo/services/pull/lfs.go
KN4CK3R c03e488e14
Add LFS Migration and Mirror (#14726)
* Implemented LFS client.

* Implemented scanning for pointer files.

* Implemented downloading of lfs files.

* Moved model-dependent code into services.

* Removed models dependency. Added TryReadPointerFromBuffer.

* Migrated code from service to module.

* Centralised storage creation.

* Removed dependency from models.

* Moved ContentStore into modules.

* Share structs between server and client.

* Moved method to services.

* Implemented lfs download on clone.

* Implemented LFS sync on clone and mirror update.

* Added form fields.

* Updated templates.

* Fixed condition.

* Use alternate endpoint.

* Added missing methods.

* Fixed typo and make linter happy.

* Detached pointer parser from gogit dependency.

* Fixed TestGetLFSRange test.

* Added context to support cancellation.

* Use ReadFull to probably read more data.

* Removed duplicated code from models.

* Moved scan implementation into pointer_scanner_nogogit.

* Changed method name.

* Added comments.

* Added more/specific log/error messages.

* Embedded lfs.Pointer into models.LFSMetaObject.

* Moved code from models to module.

* Moved code from models to module.

* Moved code from models to module.

* Reduced pointer usage.

* Embedded type.

* Use promoted fields.

* Fixed unexpected eof.

* Added unit tests.

* Implemented migration of local file paths.

* Show an error on invalid LFS endpoints.

* Hide settings if not used.

* Added LFS info to mirror struct.

* Fixed comment.

* Check LFS endpoint.

* Manage LFS settings from mirror page.

* Fixed selector.

* Adjusted selector.

* Added more tests.

* Added local filesystem migration test.

* Fixed typo.

* Reset settings.

* Added special windows path handling.

* Added unit test for HTTPClient.

* Added unit test for BasicTransferAdapter.

* Moved into util package.

* Test if LFS endpoint is allowed.

* Added support for git://

* Just use a static placeholder as the displayed url may be invalid.

* Reverted to original code.

* Added "Advanced Settings".

* Updated wording.

* Added discovery info link.

* Implemented suggestion.

* Fixed missing format parameter.

* Added Pointer.IsValid().

* Always remove model on error.

* Added suggestions.

* Use channel instead of array.

* Update routers/repo/migrate.go

* fmt

Signed-off-by: Andrew Thornton <art27@cantab.net>

Co-authored-by: zeripath <art27@cantab.net>
2021-04-08 18:25:57 -04:00

136 lines
4.4 KiB
Go

// Copyright 2019 The Gitea Authors.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package pull
import (
"bufio"
"io"
"strconv"
"sync"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git/pipeline"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
)
// LFSPush pushes lfs objects referred to in new commits in the head repository from the base repository
func LFSPush(tmpBasePath, mergeHeadSHA, mergeBaseSHA string, pr *models.PullRequest) error {
// Now we have to implement git lfs push
// git rev-list --objects --filter=blob:limit=1k HEAD --not base
// pass blob shas in to git cat-file --batch-check (possibly unnecessary)
// ensure only blobs and <=1k size then pass in to git cat-file --batch
// to read each sha and check each as a pointer
// Then if they are lfs -> add them to the baseRepo
revListReader, revListWriter := io.Pipe()
shasToCheckReader, shasToCheckWriter := io.Pipe()
catFileCheckReader, catFileCheckWriter := io.Pipe()
shasToBatchReader, shasToBatchWriter := io.Pipe()
catFileBatchReader, catFileBatchWriter := io.Pipe()
errChan := make(chan error, 1)
wg := sync.WaitGroup{}
wg.Add(6)
// Create the go-routines in reverse order.
// 6. Take the output of cat-file --batch and check if each file in turn
// to see if they're pointers to files in the LFS store associated with
// the head repo and add them to the base repo if so
go createLFSMetaObjectsFromCatFileBatch(catFileBatchReader, &wg, pr)
// 5. Take the shas of the blobs and batch read them
go pipeline.CatFileBatch(shasToBatchReader, catFileBatchWriter, &wg, tmpBasePath)
// 4. From the provided objects restrict to blobs <=1k
go pipeline.BlobsLessThan1024FromCatFileBatchCheck(catFileCheckReader, shasToBatchWriter, &wg)
// 3. Run batch-check on the objects retrieved from rev-list
go pipeline.CatFileBatchCheck(shasToCheckReader, catFileCheckWriter, &wg, tmpBasePath)
// 2. Check each object retrieved rejecting those without names as they will be commits or trees
go pipeline.BlobsFromRevListObjects(revListReader, shasToCheckWriter, &wg)
// 1. Run rev-list objects from mergeHead to mergeBase
go pipeline.RevListObjects(revListWriter, &wg, tmpBasePath, mergeHeadSHA, mergeBaseSHA, errChan)
wg.Wait()
select {
case err, has := <-errChan:
if has {
return err
}
default:
}
return nil
}
func createLFSMetaObjectsFromCatFileBatch(catFileBatchReader *io.PipeReader, wg *sync.WaitGroup, pr *models.PullRequest) {
defer wg.Done()
defer catFileBatchReader.Close()
contentStore := lfs.NewContentStore()
bufferedReader := bufio.NewReader(catFileBatchReader)
buf := make([]byte, 1025)
for {
// File descriptor line: sha
_, err := bufferedReader.ReadString(' ')
if err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
// Throw away the blob
if _, err := bufferedReader.ReadString(' '); err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
sizeStr, err := bufferedReader.ReadString('\n')
if err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
size, err := strconv.Atoi(sizeStr[:len(sizeStr)-1])
if err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
pointerBuf := buf[:size+1]
if _, err := io.ReadFull(bufferedReader, pointerBuf); err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
pointerBuf = pointerBuf[:size]
// Now we need to check if the pointerBuf is an LFS pointer
pointer, _ := lfs.ReadPointerFromBuffer(pointerBuf)
if !pointer.IsValid() {
continue
}
exist, _ := contentStore.Exists(pointer)
if !exist {
continue
}
// Then we need to check that this pointer is in the db
if _, err := pr.HeadRepo.GetLFSMetaObjectByOid(pointer.Oid); err != nil {
if err == models.ErrLFSObjectNotExist {
log.Warn("During merge of: %d in %-v, there is a pointer to LFS Oid: %s which although present in the LFS store is not associated with the head repo %-v", pr.Index, pr.BaseRepo, pointer.Oid, pr.HeadRepo)
continue
}
_ = catFileBatchReader.CloseWithError(err)
break
}
// OK we have a pointer that is associated with the head repo
// and is actually a file in the LFS
// Therefore it should be associated with the base repo
meta := &models.LFSMetaObject{Pointer: pointer}
meta.RepositoryID = pr.BaseRepoID
if _, err := models.NewLFSMetaObject(meta); err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
}
}