Refactor "Content" for file uploading (#25851)

Before: the concept "Content string" is used everywhere. It has some
problems:

1. Sometimes it means "base64 encoded content", sometimes it means "raw
binary content"
2. It doesn't work with large files, eg: uploading a 1G LFS file would
make Gitea process OOM

This PR does the refactoring: use "ContentReader" / "ContentBase64"
instead of "Content"

This PR is not breaking because the key in API JSON is still "content":
`` ContentBase64 string `json:"content"` ``
This commit is contained in:
wxiaoguang 2023-07-19 02:14:47 +08:00 committed by GitHub
parent 265a28802a
commit 236c645bf1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 103 additions and 80 deletions

View file

@ -26,7 +26,7 @@ type CreateFileOptions struct {
FileOptions FileOptions
// content must be base64 encoded // content must be base64 encoded
// required: true // required: true
Content string `json:"content"` ContentBase64 string `json:"content"`
} }
// Branch returns branch name // Branch returns branch name
@ -54,7 +54,7 @@ type UpdateFileOptions struct {
DeleteFileOptions DeleteFileOptions
// content must be base64 encoded // content must be base64 encoded
// required: true // required: true
Content string `json:"content"` ContentBase64 string `json:"content"`
// from_path (optional) is the path of the original file which will be moved/renamed to the path in the URL // from_path (optional) is the path of the original file which will be moved/renamed to the path in the URL
FromPath string `json:"from_path" binding:"MaxSize(500)"` FromPath string `json:"from_path" binding:"MaxSize(500)"`
} }
@ -74,7 +74,7 @@ type ChangeFileOperation struct {
// required: true // required: true
Path string `json:"path" binding:"Required;MaxSize(500)"` Path string `json:"path" binding:"Required;MaxSize(500)"`
// new or updated file content, must be base64 encoded // new or updated file content, must be base64 encoded
Content string `json:"content"` ContentBase64 string `json:"content"`
// sha is the SHA for the file that already exists, required for update or delete // sha is the SHA for the file that already exists, required for update or delete
SHA string `json:"sha"` SHA string `json:"sha"`
// old path of the file to move // old path of the file to move

View file

@ -408,6 +408,14 @@ func canReadFiles(r *context.Repository) bool {
return r.Permission.CanRead(unit.TypeCode) return r.Permission.CanRead(unit.TypeCode)
} }
func base64Reader(s string) (io.Reader, error) {
b, err := base64.StdEncoding.DecodeString(s)
if err != nil {
return nil, err
}
return bytes.NewReader(b), nil
}
// ChangeFiles handles API call for modifying multiple files // ChangeFiles handles API call for modifying multiple files
func ChangeFiles(ctx *context.APIContext) { func ChangeFiles(ctx *context.APIContext) {
// swagger:operation POST /repos/{owner}/{repo}/contents repository repoChangeFiles // swagger:operation POST /repos/{owner}/{repo}/contents repository repoChangeFiles
@ -449,14 +457,19 @@ func ChangeFiles(ctx *context.APIContext) {
apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch
} }
files := []*files_service.ChangeRepoFile{} var files []*files_service.ChangeRepoFile
for _, file := range apiOpts.Files { for _, file := range apiOpts.Files {
contentReader, err := base64Reader(file.ContentBase64)
if err != nil {
ctx.Error(http.StatusUnprocessableEntity, "Invalid base64 content", err)
return
}
changeRepoFile := &files_service.ChangeRepoFile{ changeRepoFile := &files_service.ChangeRepoFile{
Operation: file.Operation, Operation: file.Operation,
TreePath: file.Path, TreePath: file.Path,
FromTreePath: file.FromPath, FromTreePath: file.FromPath,
Content: file.Content, ContentReader: contentReader,
SHA: file.SHA, SHA: file.SHA,
} }
files = append(files, changeRepoFile) files = append(files, changeRepoFile)
} }
@ -544,12 +557,18 @@ func CreateFile(ctx *context.APIContext) {
apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch
} }
contentReader, err := base64Reader(apiOpts.ContentBase64)
if err != nil {
ctx.Error(http.StatusUnprocessableEntity, "Invalid base64 content", err)
return
}
opts := &files_service.ChangeRepoFilesOptions{ opts := &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: "create", Operation: "create",
TreePath: ctx.Params("*"), TreePath: ctx.Params("*"),
Content: apiOpts.Content, ContentReader: contentReader,
}, },
}, },
Message: apiOpts.Message, Message: apiOpts.Message,
@ -636,14 +655,20 @@ func UpdateFile(ctx *context.APIContext) {
apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch apiOpts.BranchName = ctx.Repo.Repository.DefaultBranch
} }
contentReader, err := base64Reader(apiOpts.ContentBase64)
if err != nil {
ctx.Error(http.StatusUnprocessableEntity, "Invalid base64 content", err)
return
}
opts := &files_service.ChangeRepoFilesOptions{ opts := &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: "update", Operation: "update",
Content: apiOpts.Content, ContentReader: contentReader,
SHA: apiOpts.SHA, SHA: apiOpts.SHA,
FromTreePath: apiOpts.FromPath, FromTreePath: apiOpts.FromPath,
TreePath: ctx.Params("*"), TreePath: ctx.Params("*"),
}, },
}, },
Message: apiOpts.Message, Message: apiOpts.Message,
@ -709,14 +734,6 @@ func createOrUpdateFiles(ctx *context.APIContext, opts *files_service.ChangeRepo
} }
} }
for _, file := range opts.Files {
content, err := base64.StdEncoding.DecodeString(file.Content)
if err != nil {
return nil, err
}
file.Content = string(content)
}
return files_service.ChangeRepoFiles(ctx, ctx.Repo.Repository, ctx.Doer, opts) return files_service.ChangeRepoFiles(ctx, ctx.Repo.Repository, ctx.Doer, opts)
} }

View file

@ -284,10 +284,10 @@ func editFilePost(ctx *context.Context, form forms.EditRepoFileForm, isNewFile b
Message: message, Message: message,
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: operation, Operation: operation,
FromTreePath: ctx.Repo.TreePath, FromTreePath: ctx.Repo.TreePath,
TreePath: form.TreePath, TreePath: form.TreePath,
Content: strings.ReplaceAll(form.Content, "\r", ""), ContentReader: strings.NewReader(strings.ReplaceAll(form.Content, "\r", "")),
}, },
}, },
Signoff: form.Signoff, Signoff: form.Signoff,

View file

@ -6,6 +6,7 @@ package files
import ( import (
"context" "context"
"fmt" "fmt"
"io"
"path" "path"
"strings" "strings"
"time" "time"
@ -35,12 +36,12 @@ type CommitDateOptions struct {
} }
type ChangeRepoFile struct { type ChangeRepoFile struct {
Operation string Operation string
TreePath string TreePath string
FromTreePath string FromTreePath string
Content string ContentReader io.Reader
SHA string SHA string
Options *RepoFileOptions Options *RepoFileOptions
} }
// ChangeRepoFilesOptions holds the repository files update options // ChangeRepoFilesOptions holds the repository files update options
@ -387,7 +388,7 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
} }
} }
treeObjectContent := file.Content treeObjectContentReader := file.ContentReader
var lfsMetaObject *git_model.LFSMetaObject var lfsMetaObject *git_model.LFSMetaObject
if setting.LFS.StartServer && hasOldBranch { if setting.LFS.StartServer && hasOldBranch {
// Check there is no way this can return multiple infos // Check there is no way this can return multiple infos
@ -402,17 +403,17 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
if filename2attribute2info[file.Options.treePath] != nil && filename2attribute2info[file.Options.treePath]["filter"] == "lfs" { if filename2attribute2info[file.Options.treePath] != nil && filename2attribute2info[file.Options.treePath]["filter"] == "lfs" {
// OK so we are supposed to LFS this data! // OK so we are supposed to LFS this data!
pointer, err := lfs.GeneratePointer(strings.NewReader(file.Content)) pointer, err := lfs.GeneratePointer(treeObjectContentReader)
if err != nil { if err != nil {
return err return err
} }
lfsMetaObject = &git_model.LFSMetaObject{Pointer: pointer, RepositoryID: repoID} lfsMetaObject = &git_model.LFSMetaObject{Pointer: pointer, RepositoryID: repoID}
treeObjectContent = pointer.StringContent() treeObjectContentReader = strings.NewReader(pointer.StringContent())
} }
} }
// Add the object to the database // Add the object to the database
objectHash, err := t.HashObject(strings.NewReader(treeObjectContent)) objectHash, err := t.HashObject(treeObjectContentReader)
if err != nil { if err != nil {
return err return err
} }
@ -439,7 +440,7 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
return err return err
} }
if !exist { if !exist {
if err := contentStore.Put(lfsMetaObject.Pointer, strings.NewReader(file.Content)); err != nil { if err := contentStore.Put(lfsMetaObject.Pointer, file.ContentReader); err != nil {
if _, err2 := git_model.RemoveLFSMetaObjectByOid(ctx, repoID, lfsMetaObject.Oid); err2 != nil { if _, err2 := git_model.RemoveLFSMetaObjectByOid(ctx, repoID, lfsMetaObject.Oid); err2 != nil {
return fmt.Errorf("unable to remove failed inserted LFS object %s: %v (Prev Error: %w)", lfsMetaObject.Oid, err2, err) return fmt.Errorf("unable to remove failed inserted LFS object %s: %v (Prev Error: %w)", lfsMetaObject.Oid, err2, err)
} }

View file

@ -16125,7 +16125,7 @@
"content": { "content": {
"description": "new or updated file content, must be base64 encoded", "description": "new or updated file content, must be base64 encoded",
"type": "string", "type": "string",
"x-go-name": "Content" "x-go-name": "ContentBase64"
}, },
"from_path": { "from_path": {
"description": "old path of the file to move", "description": "old path of the file to move",
@ -16810,7 +16810,7 @@
"content": { "content": {
"description": "content must be base64 encoded", "description": "content must be base64 encoded",
"type": "string", "type": "string",
"x-go-name": "Content" "x-go-name": "ContentBase64"
}, },
"dates": { "dates": {
"$ref": "#/definitions/CommitDateOptions" "$ref": "#/definitions/CommitDateOptions"
@ -21687,7 +21687,7 @@
"content": { "content": {
"description": "content must be base64 encoded", "description": "content must be base64 encoded",
"type": "string", "type": "string",
"x-go-name": "Content" "x-go-name": "ContentBase64"
}, },
"dates": { "dates": {
"$ref": "#/definitions/CommitDateOptions" "$ref": "#/definitions/CommitDateOptions"

View file

@ -5,6 +5,7 @@ package integration
import ( import (
"net/url" "net/url"
"strings"
"testing" "testing"
"time" "time"
@ -64,9 +65,9 @@ func TestPullRequestTargetEvent(t *testing.T) {
addWorkflowToBaseResp, err := files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, user2, &files_service.ChangeRepoFilesOptions{ addWorkflowToBaseResp, err := files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, user2, &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: "create", Operation: "create",
TreePath: ".gitea/workflows/pr.yml", TreePath: ".gitea/workflows/pr.yml",
Content: "name: test\non: pull_request_target\njobs:\n test:\n runs-on: ubuntu-latest\n steps:\n - run: echo helloworld\n", ContentReader: strings.NewReader("name: test\non: pull_request_target\njobs:\n test:\n runs-on: ubuntu-latest\n steps:\n - run: echo helloworld\n"),
}, },
}, },
Message: "add workflow", Message: "add workflow",
@ -92,9 +93,9 @@ func TestPullRequestTargetEvent(t *testing.T) {
addFileToForkedResp, err := files_service.ChangeRepoFiles(git.DefaultContext, forkedRepo, user3, &files_service.ChangeRepoFilesOptions{ addFileToForkedResp, err := files_service.ChangeRepoFiles(git.DefaultContext, forkedRepo, user3, &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: "create", Operation: "create",
TreePath: "file_1.txt", TreePath: "file_1.txt",
Content: "file1", ContentReader: strings.NewReader("file1"),
}, },
}, },
Message: "add file1", Message: "add file1",

View file

@ -46,7 +46,7 @@ func getCreateFileOptions() api.CreateFileOptions {
Committer: time.Unix(978307190, 0), Committer: time.Unix(978307190, 0),
}, },
}, },
Content: contentEncoded, ContentBase64: contentEncoded,
} }
} }

View file

@ -4,6 +4,8 @@
package integration package integration
import ( import (
"strings"
repo_model "code.gitea.io/gitea/models/repo" repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user" user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
@ -15,9 +17,9 @@ func createFileInBranch(user *user_model.User, repo *repo_model.Repository, tree
opts := &files_service.ChangeRepoFilesOptions{ opts := &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: "create", Operation: "create",
TreePath: treePath, TreePath: treePath,
Content: content, ContentReader: strings.NewReader(content),
}, },
}, },
OldBranch: branchName, OldBranch: branchName,

View file

@ -44,7 +44,7 @@ func getUpdateFileOptions() *api.UpdateFileOptions {
}, },
SHA: "103ff9234cefeee5ec5361d22b49fbb04d385885", SHA: "103ff9234cefeee5ec5361d22b49fbb04d385885",
}, },
Content: contentEncoded, ContentBase64: contentEncoded,
} }
} }

View file

@ -44,13 +44,13 @@ func getChangeFilesOptions() *api.ChangeFilesOptions {
}, },
Files: []*api.ChangeFileOperation{ Files: []*api.ChangeFileOperation{
{ {
Operation: "create", Operation: "create",
Content: newContentEncoded, ContentBase64: newContentEncoded,
}, },
{ {
Operation: "update", Operation: "update",
Content: updateContentEncoded, ContentBase64: updateContentEncoded,
SHA: "103ff9234cefeee5ec5361d22b49fbb04d385885", SHA: "103ff9234cefeee5ec5361d22b49fbb04d385885",
}, },
{ {
Operation: "delete", Operation: "delete",

View file

@ -125,7 +125,7 @@ func TestEmptyRepoAddFileByAPI(t *testing.T) {
NewBranchName: "new_branch", NewBranchName: "new_branch",
Message: "init", Message: "init",
}, },
Content: base64.StdEncoding.EncodeToString([]byte("newly-added-api-file")), ContentBase64: base64.StdEncoding.EncodeToString([]byte("newly-added-api-file")),
}) })
resp := MakeRequest(t, req, http.StatusCreated) resp := MakeRequest(t, req, http.StatusCreated)

View file

@ -337,7 +337,7 @@ func crudActionCreateFile(t *testing.T, ctx APITestContext, user *user_model.Use
Email: user.Email, Email: user.Email,
}, },
}, },
Content: base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("This is new text for %s", path))), ContentBase64: base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("This is new text for %s", path))),
}, callback...) }, callback...)
} }

View file

@ -370,9 +370,9 @@ func TestConflictChecking(t *testing.T) {
_, err = files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, user, &files_service.ChangeRepoFilesOptions{ _, err = files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, user, &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: "create", Operation: "create",
TreePath: "important_file", TreePath: "important_file",
Content: "Just a non-important file", ContentReader: strings.NewReader("Just a non-important file"),
}, },
}, },
Message: "Add a important file", Message: "Add a important file",
@ -385,9 +385,9 @@ func TestConflictChecking(t *testing.T) {
_, err = files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, user, &files_service.ChangeRepoFilesOptions{ _, err = files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, user, &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: "create", Operation: "create",
TreePath: "important_file", TreePath: "important_file",
Content: "Not the same content :P", ContentReader: strings.NewReader("Not the same content :P"),
}, },
}, },
Message: "Add a important file", Message: "Add a important file",

View file

@ -6,6 +6,7 @@ package integration
import ( import (
"net/http" "net/http"
"net/url" "net/url"
"strings"
"testing" "testing"
"time" "time"
@ -104,9 +105,9 @@ func createOutdatedPR(t *testing.T, actor, forkOrg *user_model.User) *issues_mod
_, err = files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, actor, &files_service.ChangeRepoFilesOptions{ _, err = files_service.ChangeRepoFiles(git.DefaultContext, baseRepo, actor, &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: "create", Operation: "create",
TreePath: "File_A", TreePath: "File_A",
Content: "File A", ContentReader: strings.NewReader("File A"),
}, },
}, },
Message: "Add File A", Message: "Add File A",
@ -131,9 +132,9 @@ func createOutdatedPR(t *testing.T, actor, forkOrg *user_model.User) *issues_mod
_, err = files_service.ChangeRepoFiles(git.DefaultContext, headRepo, actor, &files_service.ChangeRepoFilesOptions{ _, err = files_service.ChangeRepoFiles(git.DefaultContext, headRepo, actor, &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: "create", Operation: "create",
TreePath: "File_B", TreePath: "File_B",
Content: "File B", ContentReader: strings.NewReader("File B"),
}, },
}, },
Message: "Add File on PR branch", Message: "Add File on PR branch",

View file

@ -6,6 +6,7 @@ package integration
import ( import (
"net/url" "net/url"
"path/filepath" "path/filepath"
"strings"
"testing" "testing"
"time" "time"
@ -24,9 +25,9 @@ func getCreateRepoFilesOptions(repo *repo_model.Repository) *files_service.Chang
return &files_service.ChangeRepoFilesOptions{ return &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: "create", Operation: "create",
TreePath: "new/file.txt", TreePath: "new/file.txt",
Content: "This is a NEW file", ContentReader: strings.NewReader("This is a NEW file"),
}, },
}, },
OldBranch: repo.DefaultBranch, OldBranch: repo.DefaultBranch,
@ -41,10 +42,10 @@ func getUpdateRepoFilesOptions(repo *repo_model.Repository) *files_service.Chang
return &files_service.ChangeRepoFilesOptions{ return &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{ Files: []*files_service.ChangeRepoFile{
{ {
Operation: "update", Operation: "update",
TreePath: "README.md", TreePath: "README.md",
SHA: "4b4851ad51df6a7d9f25c979345979eaeb5b349f", SHA: "4b4851ad51df6a7d9f25c979345979eaeb5b349f",
Content: "This is UPDATED content for the README file", ContentReader: strings.NewReader("This is UPDATED content for the README file"),
}, },
}, },
OldBranch: repo.DefaultBranch, OldBranch: repo.DefaultBranch,