From 81f33c3b9f8ff8b7d5ff034527e5fbf2d1c74dc4 Mon Sep 17 00:00:00 2001 From: Daenney Date: Fri, 7 Jul 2023 11:35:05 +0200 Subject: [PATCH] [feature] Add media list command (#1943) * [feature] Add media list command This is an attempt to help alleviate #1776. Using admin media list --local the full path to each local media file will be printed, with a newline. The output of this should be feadable into backup tools in order to allow to backup local media too. Together with the database this should allow to fully recover from the loss of an instance. The list command also gets a --remote flag for symmetry. In the case of --remote we print the RemoteURL instead, the location the asset can be retrieved from. To get all media, you can run with --local and --remote. * [bugfix] Fix the test failures * [feature] Reimplement list media as top commands This changes the implementation of admin media list -- to two separate top-level commands, list-local and list-remote. The implementation now iterates over over the database in batches of 200 in order to avoid loading all media metadata into memory. * [feature] Implement ListMedia with filter callback This does away with the somewhat odd iterator-like structure we had before and does away with most of the loop duplication in list-local and list-remote. Instead they call GetAllMediaPaths with a filter func to select the media they want. That's accumulated into a slice and eventually returned. * [bugfix] Simplify remote filter Since we don't append the empty string anywhere, the remote filter can be limited to returning RemoteURL, as that'll be an empty string for local media. * [docs] Add media list commands to CLI reference --------- Co-authored-by: tobi <31960611+tsmethurst@users.noreply.github.com> --- cmd/gotosocial/action/admin/media/list.go | 165 ++++++++++++++++++++++ cmd/gotosocial/admin.go | 29 ++++ docs/admin/cli.md | 12 ++ 3 files changed, 206 insertions(+) create mode 100644 cmd/gotosocial/action/admin/media/list.go diff --git a/cmd/gotosocial/action/admin/media/list.go b/cmd/gotosocial/action/admin/media/list.go new file mode 100644 index 000000000..e66019ecc --- /dev/null +++ b/cmd/gotosocial/action/admin/media/list.go @@ -0,0 +1,165 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package media + +import ( + "bufio" + "context" + "fmt" + "os" + "path" + + "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action" + "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/db/bundb" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/state" +) + +type list struct { + dbService db.DB + state *state.State + maxID string + limit int + out *bufio.Writer +} + +func (l *list) GetAllMediaPaths(ctx context.Context, filter func(*gtsmodel.MediaAttachment) string) ([]string, error) { + res := make([]string, 0, 100) + for { + attachments, err := l.dbService.GetAttachments(ctx, l.maxID, l.limit) + if err != nil { + return nil, fmt.Errorf("failed to retrieve media metadata from database: %w", err) + } + + for _, a := range attachments { + v := filter(a) + if v != "" { + res = append(res, v) + } + } + + // If we got less results than our limit, we've reached the + // last page to retrieve and we can break the loop. If the + // last batch happens to contain exactly the same amount of + // items as the limit we'll end up doing one extra query. + if len(attachments) < l.limit { + break + } + + // Grab the last ID from the batch and set it as the maxID + // that'll be used in the next iteration so we don't get items + // we've already seen. + l.maxID = attachments[len(attachments)-1].ID + } + return res, nil +} + +func setupList(ctx context.Context) (*list, error) { + var state state.State + + state.Caches.Init() + state.Caches.Start() + + state.Workers.Start() + + dbService, err := bundb.NewBunDBService(ctx, &state) + if err != nil { + return nil, fmt.Errorf("error creating dbservice: %w", err) + } + state.DB = dbService + + return &list{ + dbService: dbService, + state: &state, + limit: 200, + maxID: "", + out: bufio.NewWriter(os.Stdout), + }, nil +} + +func (l *list) shutdown(ctx context.Context) error { + l.out.Flush() + err := l.dbService.Stop(ctx) + l.state.Workers.Stop() + l.state.Caches.Stop() + + return err +} + +var ListLocal action.GTSAction = func(ctx context.Context) error { + list, err := setupList(ctx) + if err != nil { + return err + } + + defer func() { + // Ensure lister gets shutdown on exit. + if err := list.shutdown(ctx); err != nil { + log.Error(ctx, err) + } + }() + + mediaPath := config.GetStorageLocalBasePath() + media, err := list.GetAllMediaPaths( + ctx, + func(m *gtsmodel.MediaAttachment) string { + if m.RemoteURL == "" { + return path.Join(mediaPath, m.File.Path) + } + return "" + }) + if err != nil { + return err + } + + for _, m := range media { + _, _ = list.out.WriteString(m + "\n") + } + return nil +} + +var ListRemote action.GTSAction = func(ctx context.Context) error { + list, err := setupList(ctx) + if err != nil { + return err + } + + defer func() { + // Ensure lister gets shutdown on exit. + if err := list.shutdown(ctx); err != nil { + log.Error(ctx, err) + } + }() + + media, err := list.GetAllMediaPaths( + ctx, + func(m *gtsmodel.MediaAttachment) string { + return m.RemoteURL + }) + if err != nil { + return err + } + + for _, m := range media { + _, _ = list.out.WriteString(m + "\n") + } + return nil +} diff --git a/cmd/gotosocial/admin.go b/cmd/gotosocial/admin.go index 3dad3e3d6..a8aab810e 100644 --- a/cmd/gotosocial/admin.go +++ b/cmd/gotosocial/admin.go @@ -20,6 +20,7 @@ package main import ( "github.com/spf13/cobra" "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/account" + "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/media" "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/media/prune" "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action/admin/trans" "github.com/superseriousbusiness/gotosocial/internal/config" @@ -173,6 +174,34 @@ func adminCommands() *cobra.Command { Short: "admin commands related to stored media / emojis", } + /* + ADMIN MEDIA LIST COMMANDS + */ + + adminMediaListLocalCmd := &cobra.Command{ + Use: "list-local", + Short: "admin command to list media on local storage", + PreRunE: func(cmd *cobra.Command, args []string) error { + return preRun(preRunArgs{cmd: cmd}) + }, + RunE: func(cmd *cobra.Command, args []string) error { + return run(cmd.Context(), media.ListLocal) + }, + } + + adminMediaListRemoteCmd := &cobra.Command{ + Use: "list-remote", + Short: "admin command to list remote media cached on this instance", + PreRunE: func(cmd *cobra.Command, args []string) error { + return preRun(preRunArgs{cmd: cmd}) + }, + RunE: func(cmd *cobra.Command, args []string) error { + return run(cmd.Context(), media.ListRemote) + }, + } + + adminMediaCmd.AddCommand(adminMediaListLocalCmd, adminMediaListRemoteCmd) + /* ADMIN MEDIA PRUNE COMMANDS */ diff --git a/docs/admin/cli.md b/docs/admin/cli.md index c7fcbbe63..d9178b24d 100644 --- a/docs/admin/cli.md +++ b/docs/admin/cli.md @@ -255,6 +255,18 @@ Example: gotosocial admin import --path example.json --config-path config.yaml ``` +### gotosocial admin media list-local + +This command can be used to list local media. Local media is media that belongs to posts by users with an account on the instance. + +The output will be a list of files. The list can be used to drive your backups. + +### gotosocial admin media list-remote + +This is the corollary to list-local, but instead lists media from remote instances. Remote media belongs to other instances, but was attached to a post we received over federation and have potentially cached locally. + +The output will be a list of URLs to retrieve the original content from. GoToSocial automatically retrieves remote media when it needs it, so you should never need to do so yourself. + ### gotosocial admin media prune orphaned This command can be used to prune orphaned media from your GoToSocial.