diff --git a/cmd/gotosocial/action/admin/media/list.go b/cmd/gotosocial/action/admin/media/list.go new file mode 100644 index 000000000..e66019ecc --- /dev/null +++ b/cmd/gotosocial/action/admin/media/list.go @@ -0,0 +1,165 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package media + +import ( + "bufio" + "context" + "fmt" + "os" + "path" + + "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action" + "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/db/bundb" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/state" +) + +type list struct { + dbService db.DB + state *state.State + maxID string + limit int + out *bufio.Writer +} + +func (l *list) GetAllMediaPaths(ctx context.Context, filter func(*gtsmodel.MediaAttachment) string) ([]string, error) { + res := make([]string, 0, 100) + for { + attachments, err := l.dbService.GetAttachments(ctx, l.maxID, l.limit) + if err != nil { + return nil, fmt.Errorf("failed to retrieve media metadata from database: %w", err) + } + + for _, a := range attachments { + v := filter(a) + if v != "" { + res = append(res, v) + } + } + + // If we got less results than our limit, we've reached the + // last page to retrieve and we can break the loop. If the + // last batch happens to contain exactly the same amount of + // items as the limit we'll end up doing one extra query. + if len(attachments) < l.limit { + break + } + + // Grab the last ID from the batch and set it as the maxID + // that'll be used in the next iteration so we don't get items + // we've already seen. + l.maxID = attachments[len(attachments)-1].ID + } + return res, nil +} + +func setupList(ctx context.Context) (*list, error) { + var state state.State + + state.Caches.Init() + state.Caches.Start() + + state.Workers.Start() + + dbService, err := bundb.NewBunDBService(ctx, &state) + if err != nil { + return nil, fmt.Errorf("error creating dbservice: %w", err) + } + state.DB = dbService + + return &list{ + dbService: dbService, + state: &state, + limit: 200, + maxID: "", + out: bufio.NewWriter(os.Stdout), + }, nil +} + +func (l *list) shutdown(ctx context.Context) error { + l.out.Flush() + err := l.dbService.Stop(ctx) + l.state.Workers.Stop() + l.state.Caches.Stop() + + return err +} + +var ListLocal action.GTSAction = func(ctx context.Context) error { + list, err := setupList(ctx) + if err != nil { + return err + } + + defer func() { + // Ensure lister gets shutdown on exit. + if err := list.shutdown(ctx); err != nil { + log.Error(ctx, err) + } + }() + + mediaPath := config.GetStorageLocalBasePath() + media, err := list.GetAllMediaPaths( + ctx, + func(m *gtsmodel.MediaAttachment) string { + if m.RemoteURL == "" { + return path.Join(mediaPath, m.File.Path) + } + return "" + }) + if err != nil { + return err + } + + for _, m := range media { + _, _ = list.out.WriteString(m + "\n") + } + return nil +} + +var ListRemote action.GTSAction = func(ctx context.Context) error { + list, err := setupList(ctx) + if err != nil { + return err + } + + defer func() { + // Ensure lister gets shutdown on exit. + if err := list.shutdown(ctx); err != nil { + log.Error(ctx, err) + } + }() + + media, err := list.GetAllMediaPaths( + ctx, + func(m *gtsmodel.MediaAttachment) string { + return m.RemoteURL + }) + if err != nil { + return err + } + + for _, m := range media { + _, _ = list.out.WriteString(m + "\n") + } + return nil +} diff --git a/cmd/gotosocial/admin.go b/cmd/gotosocial/admin.go index 3dad3e3d6..a8aab810e 100644 --- a/cmd/gotosocial/admin.go +++ b/cmd/gotosocial/admin.go @@ -20,6 +20,7 @@ package main import ( "github.com/spf13/cobra" "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/account" + "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/media" "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/media/prune" "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/trans" "github.com/superseriousbusiness/gotosocial/internal/config" @@ -173,6 +174,34 @@ func adminCommands() *cobra.Command { Short: "admin commands related to stored media / emojis", } + /* + ADMIN MEDIA LIST COMMANDS + */ + + adminMediaListLocalCmd := &cobra.Command{ + Use: "list-local", + Short: "admin command to list media on local storage", + PreRunE: func(cmd *cobra.Command, args []string) error { + return preRun(preRunArgs{cmd: cmd}) + }, + RunE: func(cmd *cobra.Command, args []string) error { + return run(cmd.Context(), media.ListLocal) + }, + } + + adminMediaListRemoteCmd := &cobra.Command{ + Use: "list-remote", + Short: "admin command to list remote media cached on this instance", + PreRunE: func(cmd *cobra.Command, args []string) error { + return preRun(preRunArgs{cmd: cmd}) + }, + RunE: func(cmd *cobra.Command, args []string) error { + return run(cmd.Context(), media.ListRemote) + }, + } + + adminMediaCmd.AddCommand(adminMediaListLocalCmd, adminMediaListRemoteCmd) + /* ADMIN MEDIA PRUNE COMMANDS */ diff --git a/docs/admin/cli.md b/docs/admin/cli.md index c7fcbbe63..d9178b24d 100644 --- a/docs/admin/cli.md +++ b/docs/admin/cli.md @@ -255,6 +255,18 @@ Example: gotosocial admin import --path example.json --config-path config.yaml ``` +### gotosocial admin media list-local + +This command can be used to list local media. Local media is media that belongs to posts by users with an account on the instance. + +The output will be a list of files. The list can be used to drive your backups. + +### gotosocial admin media list-remote + +This is the corollary to list-local, but instead lists media from remote instances. Remote media belongs to other instances, but was attached to a post we received over federation and have potentially cached locally. + +The output will be a list of URLs to retrieve the original content from. GoToSocial automatically retrieves remote media when it needs it, so you should never need to do so yourself. + ### gotosocial admin media prune orphaned This command can be used to prune orphaned media from your GoToSocial.