[feature] Cleanup unattached local media (#680)

* add localUnattached db function

* add parseOlderThan util function

* add pruneunusedlocalattachments to media manager

* add unusedlocal pruning to schedule + admin call

* set number of days to keep as a const

* fix test
This commit is contained in:
tobi 2022-06-30 12:22:10 +02:00 committed by GitHub
parent 07620acc0e
commit 9e7d022a06
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 253 additions and 10 deletions

View file

@ -98,3 +98,29 @@ func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit
return attachments, nil return attachments, nil
} }
func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachments := []*gtsmodel.MediaAttachment{}
q := m.newMediaQ(&attachments).
Where("media_attachment.cached = true").
Where("media_attachment.avatar = false").
Where("media_attachment.header = false").
Where("media_attachment.created_at < ?", olderThan).
Where("media_attachment.remote_url IS NULL").
Where("media_attachment.status_id IS NULL")
if maxID != "" {
q = q.Where("media_attachment.id < ?", maxID)
}
if limit != 0 {
q = q.Limit(limit)
}
if err := q.Scan(ctx); err != nil {
return nil, m.conn.ProcessError(err)
}
return attachments, nil
}

View file

@ -24,6 +24,7 @@ import (
"time" "time"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/testrig"
) )
type MediaTestSuite struct { type MediaTestSuite struct {
@ -51,6 +52,14 @@ func (suite *MediaTestSuite) TestGetAvisAndHeaders() {
suite.Len(attachments, 2) suite.Len(attachments, 2)
} }
func (suite *MediaTestSuite) TestGetLocalUnattachedOlderThan() {
ctx := context.Background()
attachments, err := suite.db.GetLocalUnattachedOlderThan(ctx, testrig.TimeMustParse("2090-06-04T13:12:00Z"), "", 10)
suite.NoError(err)
suite.Len(attachments, 1)
}
func TestMediaTestSuite(t *testing.T) { func TestMediaTestSuite(t *testing.T) {
suite.Run(t, new(MediaTestSuite)) suite.Run(t, new(MediaTestSuite))
} }

View file

@ -38,4 +38,9 @@ type Media interface {
// GetAvatarsAndHeaders fetches limit n avatars and headers with an id < maxID. These headers // GetAvatarsAndHeaders fetches limit n avatars and headers with an id < maxID. These headers
// and avis may be in use or not; the caller should check this if it's important. // and avis may be in use or not; the caller should check this if it's important.
GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error)
// GetLocalUnattachedOlderThan fetches limit n local media attachments, older than the given time, which
// aren't header or avatars, and aren't attached to a status. In other words, attachments which were uploaded
// but never used for whatever reason, or attachments that were attached to a status which was subsequently
// deleted.
GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error)
} }

View file

@ -34,6 +34,10 @@ import (
// selectPruneLimit is the amount of media entries to select at a time from the db when pruning // selectPruneLimit is the amount of media entries to select at a time from the db when pruning
const selectPruneLimit = 20 const selectPruneLimit = 20
// UnusedLocalAttachmentCacheDays is the amount of days to keep local media in storage if it
// is not attached to a status, or was never attached to a status.
const UnusedLocalAttachmentCacheDays = 3
// Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs. // Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs.
type Manager interface { type Manager interface {
// ProcessMedia begins the process of decoding and storing the given data as an attachment. // ProcessMedia begins the process of decoding and storing the given data as an attachment.
@ -75,11 +79,16 @@ type Manager interface {
// //
// The returned int is the amount of media that was pruned by this function. // The returned int is the amount of media that was pruned by this function.
PruneAllRemote(ctx context.Context, olderThanDays int) (int, error) PruneAllRemote(ctx context.Context, olderThanDays int) (int, error)
// PruneAllMeta prunes unused meta media -- currently, this means unused avatars + headers, but can also be extended // PruneAllMeta prunes unused/out of date headers and avatars cached on this instance.
// to include things like attachments that were uploaded on this server but left unused, etc.
// //
// The returned int is the amount of media that was pruned by this function. // The returned int is the amount of media that was pruned by this function.
PruneAllMeta(ctx context.Context) (int, error) PruneAllMeta(ctx context.Context) (int, error)
// PruneUnusedLocalAttachments prunes unused media attachments that were uploaded by
// a user on this instance, but never actually attached to a status, or attached but
// later detached.
//
// The returned int is the amount of media that was pruned by this function.
PruneUnusedLocalAttachments(ctx context.Context) (int, error)
// Stop stops the underlying worker pool of the manager. It should be called // Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs. // when closing GoToSocial in order to cleanly finish any in-progress jobs.
@ -210,6 +219,19 @@ func scheduleCleanupJobs(m *manager) error {
return fmt.Errorf("error starting media manager meta cleanup job: %s", err) return fmt.Errorf("error starting media manager meta cleanup job: %s", err)
} }
if _, err := c.AddFunc("@midnight", func() {
begin := time.Now()
pruned, err := m.PruneUnusedLocalAttachments(pruneCtx)
if err != nil {
logrus.Errorf("media manager: error pruning unused local attachments: %s", err)
return
}
logrus.Infof("media manager: pruned %d unused local attachments in %s", pruned, time.Since(begin))
}); err != nil {
pruneCancel()
return fmt.Errorf("error starting media manager unused local attachments cleanup job: %s", err)
}
// start remote cache cleanup cronjob if configured // start remote cache cleanup cronjob if configured
if mediaRemoteCacheDays := config.GetMediaRemoteCacheDays(); mediaRemoteCacheDays > 0 { if mediaRemoteCacheDays := config.GetMediaRemoteCacheDays(); mediaRemoteCacheDays > 0 {
if _, err := c.AddFunc("@midnight", func() { if _, err := c.AddFunc("@midnight", func() {

View file

@ -21,7 +21,6 @@ package media
import ( import (
"context" "context"
"fmt" "fmt"
"time"
"codeberg.org/gruf/go-store/storage" "codeberg.org/gruf/go-store/storage"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
@ -32,15 +31,10 @@ import (
func (m *manager) PruneAllRemote(ctx context.Context, olderThanDays int) (int, error) { func (m *manager) PruneAllRemote(ctx context.Context, olderThanDays int) (int, error) {
var totalPruned int var totalPruned int
// convert days into a duration string olderThan, err := parseOlderThan(olderThanDays)
olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24)
// parse the duration string into a duration
olderThanHours, err := time.ParseDuration(olderThanHoursString)
if err != nil { if err != nil {
return totalPruned, fmt.Errorf("PruneAllRemote: %d", err) return totalPruned, fmt.Errorf("PruneAllRemote: error parsing olderThanDays %d: %s", olderThanDays, err)
} }
// 'subtract' that from the time now to give our threshold
olderThan := time.Now().Add(-olderThanHours)
logrus.Infof("PruneAllRemote: pruning media older than %s", olderThan) logrus.Infof("PruneAllRemote: pruning media older than %s", olderThan)
// select 20 attachments at a time and prune them // select 20 attachments at a time and prune them

View file

@ -0,0 +1,86 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media
import (
"context"
"fmt"
"codeberg.org/gruf/go-store/storage"
"github.com/sirupsen/logrus"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
func (m *manager) PruneUnusedLocalAttachments(ctx context.Context) (int, error) {
var totalPruned int
var maxID string
var attachments []*gtsmodel.MediaAttachment
var err error
olderThan, err := parseOlderThan(UnusedLocalAttachmentCacheDays)
if err != nil {
return totalPruned, fmt.Errorf("PruneUnusedLocalAttachments: error parsing olderThanDays %d: %s", UnusedLocalAttachmentCacheDays, err)
}
logrus.Infof("PruneUnusedLocalAttachments: pruning unused local attachments older than %s", olderThan)
// select 20 attachments at a time and prune them
for attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit) {
// use the id of the last attachment in the slice as the next 'maxID' value
l := len(attachments)
maxID = attachments[l-1].ID
logrus.Tracef("PruneUnusedLocalAttachments: got %d unused local attachments older than %s with maxID < %s", l, olderThan, maxID)
for _, attachment := range attachments {
if err := m.pruneOneLocal(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}
// make sure we don't have a real error when we leave the loop
if err != nil && err != db.ErrNoEntries {
return totalPruned, err
}
logrus.Infof("PruneUnusedLocalAttachments: finished pruning: pruned %d entries", totalPruned)
return totalPruned, nil
}
func (m *manager) pruneOneLocal(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
if attachment.File.Path != "" {
// delete the full size attachment from storage
logrus.Tracef("pruneOneLocal: deleting %s", attachment.File.Path)
if err := m.storage.Delete(attachment.File.Path); err != nil && err != storage.ErrNotFound {
return err
}
}
if attachment.Thumbnail.Path != "" {
// delete the thumbnail from storage
logrus.Tracef("pruneOneLocal: deleting %s", attachment.Thumbnail.Path)
if err := m.storage.Delete(attachment.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
return err
}
}
// delete the attachment completely
return m.db.DeleteByID(ctx, attachment.ID, attachment)
}

View file

@ -0,0 +1,75 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media_test
import (
"context"
"testing"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/db"
)
type PruneUnusedLocalTestSuite struct {
MediaStandardTestSuite
}
func (suite *PruneUnusedLocalTestSuite) TestPruneUnusedLocal() {
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
suite.True(testAttachment.Cached)
totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(1, totalPruned)
_, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}
func (suite *PruneUnusedLocalTestSuite) TestPruneRemoteTwice() {
totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(1, totalPruned)
// final prune should prune nothing, since the first prune already happened
totalPrunedAgain, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
suite.NoError(err)
suite.Equal(0, totalPrunedAgain)
}
func (suite *PruneUnusedLocalTestSuite) TestPruneOneNonExistent() {
ctx := context.Background()
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
// Delete this attachment cached on disk
media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID)
suite.NoError(err)
suite.True(media.Cached)
err = suite.storage.Delete(media.File.Path)
suite.NoError(err)
// Now attempt to prune for item with db entry no file
totalPruned, err := suite.manager.PruneUnusedLocalAttachments(ctx)
suite.NoError(err)
suite.Equal(1, totalPruned)
}
func TestPruneUnusedLocalTestSuite(t *testing.T) {
suite.Run(t, &PruneUnusedLocalTestSuite{})
}

View file

@ -21,6 +21,7 @@ package media
import ( import (
"errors" "errors"
"fmt" "fmt"
"time"
"github.com/h2non/filetype" "github.com/h2non/filetype"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
@ -128,3 +129,19 @@ func (l *logrusWrapper) Info(msg string, keysAndValues ...interface{}) {
func (l *logrusWrapper) Error(err error, msg string, keysAndValues ...interface{}) { func (l *logrusWrapper) Error(err error, msg string, keysAndValues ...interface{}) {
logrus.Error("media manager cron logger: ", err, msg, keysAndValues) logrus.Error("media manager cron logger: ", err, msg, keysAndValues)
} }
func parseOlderThan(olderThanDays int) (time.Time, error) {
// convert days into a duration string
olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24)
// parse the duration string into a duration
olderThanHours, err := time.ParseDuration(olderThanHoursString)
if err != nil {
return time.Time{}, err
}
// 'subtract' that from the time now to give our threshold
olderThan := time.Now().Add(-olderThanHours)
return olderThan, nil
}

View file

@ -41,6 +41,15 @@ func (p *processor) MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gt
} }
}() }()
go func() {
pruned, err := p.mediaManager.PruneUnusedLocalAttachments(ctx)
if err != nil {
logrus.Errorf("MediaPrune: error pruning unused local cache: %s", err)
} else {
logrus.Infof("MediaPrune: pruned %d unused local cache entries", pruned)
}
}()
go func() { go func() {
pruned, err := p.mediaManager.PruneAllMeta(ctx) pruned, err := p.mediaManager.PruneAllMeta(ctx)
if err != nil { if err != nil {