diff --git a/models/git/TestIterateRepositoryIDsWithLFSMetaObjects/lfs_meta_object.yaml b/models/git/TestIterateRepositoryIDsWithLFSMetaObjects/lfs_meta_object.yaml new file mode 100644 index 0000000000..fdfa66a216 --- /dev/null +++ b/models/git/TestIterateRepositoryIDsWithLFSMetaObjects/lfs_meta_object.yaml @@ -0,0 +1,7 @@ +- + + id: 1000 + oid: 9d172e5c64b4f0024b9901ec6afe9ea052f3c9b6ff9f4b07956d8c48c86fca82 + size: 25 + repository_id: 1 + created_unix: 1712309123 diff --git a/models/git/lfs.go b/models/git/lfs.go index 837dc9fd31..44b741c4c8 100644 --- a/models/git/lfs.go +++ b/models/git/lfs.go @@ -337,58 +337,47 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) { func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error { batchSize := setting.Database.IterateBufferSize sess := db.GetEngine(ctx) - id := int64(0) + var start int type RepositoryCount struct { RepositoryID int64 Count int64 } for { counts := make([]*RepositoryCount, 0, batchSize) - sess.Select("repository_id, COUNT(id) AS count"). + if err := sess.Select("repository_id, COUNT(id) AS count"). Table("lfs_meta_object"). - Where("repository_id > ?", id). GroupBy("repository_id"). - OrderBy("repository_id ASC") - - if err := sess.Limit(batchSize, 0).Find(&counts); err != nil { + OrderBy("repository_id ASC").Limit(batchSize, start).Find(&counts); err != nil { return err } if len(counts) == 0 { return nil } + start += len(counts) for _, count := range counts { if err := f(ctx, count.RepositoryID, count.Count); err != nil { return err } } - id = counts[len(counts)-1].RepositoryID } } // IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo type IterateLFSMetaObjectsForRepoOptions struct { - OlderThan timeutil.TimeStamp - UpdatedLessRecentlyThan timeutil.TimeStamp - OrderByUpdated bool - LoopFunctionAlwaysUpdates bool + OlderThan timeutil.TimeStamp + UpdatedLessRecentlyThan timeutil.TimeStamp } // IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo -func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error { - var start int +func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject) error, opts *IterateLFSMetaObjectsForRepoOptions) error { batchSize := setting.Database.IterateBufferSize engine := db.GetEngine(ctx) - type CountLFSMetaObject struct { - Count int64 - LFSMetaObject `xorm:"extends"` - } - id := int64(0) for { - beans := make([]*CountLFSMetaObject, 0, batchSize) - sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`"). + beans := make([]*LFSMetaObject, 0, batchSize) + sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*"). Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid"). Where("`lfs_meta_object`.repository_id = ?", repoID) if !opts.OlderThan.IsZero() { @@ -397,25 +386,19 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont if !opts.UpdatedLessRecentlyThan.IsZero() { sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan) } - sess.GroupBy("`lfs_meta_object`.id") - if opts.OrderByUpdated { - sess.OrderBy("`lfs_meta_object`.updated_unix ASC") - } else { - sess.And("`lfs_meta_object`.id > ?", id) - sess.OrderBy("`lfs_meta_object`.id ASC") - } - if err := sess.Limit(batchSize, start).Find(&beans); err != nil { + sess.GroupBy("`lfs_meta_object`.id"). + And("`lfs_meta_object`.id > ?", id). + OrderBy("`lfs_meta_object`.id ASC") + + if err := sess.Limit(batchSize, 0).Find(&beans); err != nil { return err } if len(beans) == 0 { return nil } - if !opts.LoopFunctionAlwaysUpdates { - start += len(beans) - } for _, bean := range beans { - if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil { + if err := f(ctx, bean); err != nil { return err } } diff --git a/models/git/lfs_test.go b/models/git/lfs_test.go new file mode 100644 index 0000000000..565b2e9303 --- /dev/null +++ b/models/git/lfs_test.go @@ -0,0 +1,101 @@ +// Copyright 2024 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "context" + "path/filepath" + "testing" + + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" + + "github.com/stretchr/testify/assert" +) + +func TestIterateRepositoryIDsWithLFSMetaObjects(t *testing.T) { + defer unittest.OverrideFixtures( + unittest.FixturesOptions{ + Dir: filepath.Join(setting.AppWorkPath, "models/fixtures/"), + Base: setting.AppWorkPath, + Dirs: []string{"models/git/TestIterateRepositoryIDsWithLFSMetaObjects/"}, + }, + )() + assert.NoError(t, unittest.PrepareTestDatabase()) + + type repocount struct { + repoid int64 + count int64 + } + expected := []repocount{{1, 1}, {54, 4}} + + t.Run("Normal batch size", func(t *testing.T) { + defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)() + cases := []repocount{} + + err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error { + cases = append(cases, repocount{repoID, count}) + return nil + }) + assert.NoError(t, err) + assert.EqualValues(t, expected, cases) + }) + + t.Run("Low batch size", func(t *testing.T) { + defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)() + cases := []repocount{} + + err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error { + cases = append(cases, repocount{repoID, count}) + return nil + }) + assert.NoError(t, err) + assert.EqualValues(t, expected, cases) + }) +} + +func TestIterateLFSMetaObjectsForRepo(t *testing.T) { + assert.NoError(t, unittest.PrepareTestDatabase()) + + expectedIDs := []int64{1, 2, 3, 4} + + t.Run("Normal batch size", func(t *testing.T) { + defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)() + actualIDs := []int64{} + + err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error { + actualIDs = append(actualIDs, lo.ID) + return nil + }, &IterateLFSMetaObjectsForRepoOptions{}) + assert.NoError(t, err) + assert.EqualValues(t, expectedIDs, actualIDs) + }) + + t.Run("Low batch size", func(t *testing.T) { + defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)() + actualIDs := []int64{} + + err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error { + actualIDs = append(actualIDs, lo.ID) + return nil + }, &IterateLFSMetaObjectsForRepoOptions{}) + assert.NoError(t, err) + assert.EqualValues(t, expectedIDs, actualIDs) + + t.Run("Batch handles updates", func(t *testing.T) { + actualIDs := []int64{} + + err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error { + actualIDs = append(actualIDs, lo.ID) + _, err := db.DeleteByID[LFSMetaObject](ctx, lo.ID) + assert.NoError(t, err) + return nil + }, &IterateLFSMetaObjectsForRepoOptions{}) + assert.NoError(t, err) + assert.EqualValues(t, expectedIDs, actualIDs) + }) + }) +} diff --git a/services/doctor/lfs.go b/services/doctor/lfs.go index 5f110b8f97..8531b7bbe8 100644 --- a/services/doctor/lfs.go +++ b/services/doctor/lfs.go @@ -44,6 +44,7 @@ func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool OlderThan: time.Now().Add(-24 * time.Hour * 7), // We don't set the UpdatedLessRecentlyThan because we want to do a full GC }); err != nil { + logger.Error("Couldn't garabage collect LFS objects: %v", err) return err } diff --git a/services/repository/lfs.go b/services/repository/lfs.go index 4d48881b87..b49a4f71ea 100644 --- a/services/repository/lfs.go +++ b/services/repository/lfs.go @@ -5,7 +5,6 @@ package repository import ( "context" - "errors" "fmt" "time" @@ -21,12 +20,10 @@ import ( // GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function type GarbageCollectLFSMetaObjectsOptions struct { - LogDetail func(format string, v ...any) - AutoFix bool - OlderThan time.Time - UpdatedLessRecentlyThan time.Time - NumberToCheckPerRepo int64 - ProportionToCheckPerRepo float64 + LogDetail func(format string, v ...any) + AutoFix bool + OlderThan time.Time + UpdatedLessRecentlyThan time.Time } // GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories @@ -49,9 +46,6 @@ func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMet return err } - if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 { - opts.NumberToCheckPerRepo = newMinimum - } return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts) }) } @@ -78,13 +72,9 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R defer gitRepo.Close() store := lfs.NewContentStore() - errStop := errors.New("STOPERR") objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName) - err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error { - if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo { - return errStop - } + err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject) error { total++ pointerSha := git.ComputeBlobHash(objectFormat, []byte(metaObject.Pointer.StringContent())) @@ -123,16 +113,10 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R // // It is likely that a week is potentially excessive but it should definitely be enough that any // unassociated LFS object is genuinely unassociated. - OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()), - UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()), - OrderByUpdated: true, - LoopFunctionAlwaysUpdates: true, + OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()), + UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()), }) - - if err == errStop { - opts.LogDetail("Processing stopped at %d total LFSMetaObjects in %-v", total, repo) - return nil - } else if err != nil { + if err != nil { return err } return nil