From c157b1b20b38cc331cfd1673433d077719feef3f Mon Sep 17 00:00:00 2001 From: tsmethurst Date: Sun, 23 Jan 2022 14:41:58 +0100 Subject: [PATCH] rework data function to provide filesize --- internal/federation/dereferencing/account.go | 4 +-- internal/federation/dereferencing/media.go | 2 +- internal/media/image.go | 20 ------------ internal/media/manager_test.go | 12 +++---- internal/media/processingemoji.go | 4 +-- internal/media/processingmedia.go | 33 ++++++++++++++------ internal/media/types.go | 2 +- internal/processing/account/update.go | 10 +++--- internal/processing/admin/emoji.go | 5 +-- internal/processing/media/create.go | 5 +-- internal/transport/derefmedia.go | 12 +++---- internal/transport/transport.go | 4 +-- 12 files changed, 56 insertions(+), 57 deletions(-) diff --git a/internal/federation/dereferencing/account.go b/internal/federation/dereferencing/account.go index 6ea8256d5..581c95de2 100644 --- a/internal/federation/dereferencing/account.go +++ b/internal/federation/dereferencing/account.go @@ -252,7 +252,7 @@ func (d *deref) fetchHeaderAndAviForAccount(ctx context.Context, targetAccount * return err } - data := func(innerCtx context.Context) (io.Reader, error) { + data := func(innerCtx context.Context) (io.Reader, int, error) { return t.DereferenceMedia(innerCtx, avatarIRI) } @@ -274,7 +274,7 @@ func (d *deref) fetchHeaderAndAviForAccount(ctx context.Context, targetAccount * return err } - data := func(innerCtx context.Context) (io.Reader, error) { + data := func(innerCtx context.Context) (io.Reader, int, error) { return t.DereferenceMedia(innerCtx, headerIRI) } diff --git a/internal/federation/dereferencing/media.go b/internal/federation/dereferencing/media.go index c427f2507..0b19570f2 100644 --- a/internal/federation/dereferencing/media.go +++ b/internal/federation/dereferencing/media.go @@ -42,7 +42,7 @@ func (d *deref) GetRemoteMedia(ctx context.Context, requestingUsername string, a return nil, fmt.Errorf("GetRemoteMedia: error parsing url: %s", err) } - dataFunc := func(innerCtx context.Context) (io.Reader, error) { + dataFunc := func(innerCtx context.Context) (io.Reader, int, error) { return t.DereferenceMedia(innerCtx, derefURI) } diff --git a/internal/media/image.go b/internal/media/image.go index b8f00024f..e5390cee5 100644 --- a/internal/media/image.go +++ b/internal/media/image.go @@ -30,7 +30,6 @@ import ( "github.com/buckket/go-blurhash" "github.com/nfnt/resize" - "github.com/superseriousbusiness/exifremove/pkg/exifremove" ) const ( @@ -197,22 +196,3 @@ func deriveStaticEmoji(r io.Reader, contentType string) (*imageMeta, error) { small: out.Bytes(), }, nil } - -// purgeExif is a little wrapper for the action of removing exif data from an image. -// Only pass pngs or jpegs to this function. -func purgeExif(data []byte) ([]byte, error) { - if len(data) == 0 { - return nil, errors.New("passed image was not valid") - } - - clean, err := exifremove.Remove(data) - if err != nil { - return nil, fmt.Errorf("could not purge exif from image: %s", err) - } - - if len(clean) == 0 { - return nil, errors.New("purged image was not valid") - } - - return clean, nil -} diff --git a/internal/media/manager_test.go b/internal/media/manager_test.go index 5380b83b1..960f34843 100644 --- a/internal/media/manager_test.go +++ b/internal/media/manager_test.go @@ -39,13 +39,13 @@ type ManagerTestSuite struct { func (suite *ManagerTestSuite) TestSimpleJpegProcessBlocking() { ctx := context.Background() - data := func(_ context.Context) (io.Reader, error) { + data := func(_ context.Context) (io.Reader, int, error) { // load bytes from a test image b, err := os.ReadFile("./test/test-jpeg.jpg") if err != nil { panic(err) } - return bytes.NewBuffer(b), nil + return bytes.NewBuffer(b), len(b), nil } accountID := "01FS1X72SK9ZPW0J1QQ68BD264" @@ -109,13 +109,13 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlocking() { func (suite *ManagerTestSuite) TestSimpleJpegProcessAsync() { ctx := context.Background() - data := func(_ context.Context) (io.Reader, error) { + data := func(_ context.Context) (io.Reader, int, error) { // load bytes from a test image b, err := os.ReadFile("./test/test-jpeg.jpg") if err != nil { panic(err) } - return bytes.NewBuffer(b), nil + return bytes.NewBuffer(b), len(b), nil } accountID := "01FS1X72SK9ZPW0J1QQ68BD264" @@ -192,9 +192,9 @@ func (suite *ManagerTestSuite) TestSimpleJpegQueueSpamming() { panic(err) } - data := func(_ context.Context) (io.Reader, error) { + data := func(_ context.Context) (io.Reader, int, error) { // load bytes from a test image - return bytes.NewReader(b), nil + return bytes.NewReader(b), len(b), nil } accountID := "01FS1X72SK9ZPW0J1QQ68BD264" diff --git a/internal/media/processingemoji.go b/internal/media/processingemoji.go index 147b6b5b3..292712427 100644 --- a/internal/media/processingemoji.go +++ b/internal/media/processingemoji.go @@ -163,7 +163,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { } // execute the data function to get the reader out of it - reader, err := p.data(ctx) + reader, fileSize, err := p.data(ctx) if err != nil { return fmt.Errorf("store: error executing data function: %s", err) } @@ -194,6 +194,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { p.emoji.ImageURL = uris.GenerateURIForAttachment(p.instanceAccountID, string(TypeEmoji), string(SizeOriginal), p.emoji.ID, extension) p.emoji.ImagePath = fmt.Sprintf("%s/%s/%s/%s.%s", p.instanceAccountID, TypeEmoji, SizeOriginal, p.emoji.ID, extension) p.emoji.ImageContentType = contentType + p.emoji.ImageFileSize = fileSize // concatenate the first bytes with the existing bytes still in the reader (thanks Mara) multiReader := io.MultiReader(bytes.NewBuffer(firstBytes), reader) @@ -202,7 +203,6 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { if err := p.storage.PutStream(p.emoji.ImagePath, multiReader); err != nil { return fmt.Errorf("store: error storing stream: %s", err) } - p.emoji.ImageFileSize = 36702 // TODO: set this based on the result of PutStream // if the original reader is a readcloser, close it since we're done with it now if rc, ok := reader.(io.ReadCloser); ok { diff --git a/internal/media/processingmedia.go b/internal/media/processingmedia.go index 82db863e0..0bbe35aee 100644 --- a/internal/media/processingmedia.go +++ b/internal/media/processingmedia.go @@ -28,6 +28,7 @@ import ( "time" "codeberg.org/gruf/go-store/kv" + terminator "github.com/superseriousbusiness/exif-terminator" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/id" @@ -239,7 +240,7 @@ func (p *ProcessingMedia) store(ctx context.Context) error { } // execute the data function to get the reader out of it - reader, err := p.data(ctx) + reader, fileSize, err := p.data(ctx) if err != nil { return fmt.Errorf("store: error executing data function: %s", err) } @@ -268,22 +269,36 @@ func (p *ProcessingMedia) store(ctx context.Context) error { } extension := split[1] // something like 'jpeg' - // set some additional fields on the attachment now that - // we know more about what the underlying media actually is - if extension == mimeGif { + // concatenate the cleaned up first bytes with the existing bytes still in the reader (thanks Mara) + multiReader := io.MultiReader(bytes.NewBuffer(firstBytes), reader) + + // we'll need to clean exif data from the first bytes; while we're + // here, we can also use the extension to derive the attachment type + var clean io.Reader + switch extension { + case mimeGif: p.attachment.Type = gtsmodel.FileTypeGif - } else { + clean = multiReader // nothing to clean from a gif + case mimeJpeg, mimePng: p.attachment.Type = gtsmodel.FileTypeImage + purged, err := terminator.Terminate(multiReader, fileSize, extension) + if err != nil { + return fmt.Errorf("store: exif error: %s", err) + } + clean = purged + default: + return fmt.Errorf("store: couldn't process %s", extension) } + + // now set some additional fields on the attachment since + // we know more about what the underlying media actually is p.attachment.URL = uris.GenerateURIForAttachment(p.attachment.AccountID, string(TypeAttachment), string(SizeOriginal), p.attachment.ID, extension) p.attachment.File.Path = fmt.Sprintf("%s/%s/%s/%s.%s", p.attachment.AccountID, TypeAttachment, SizeOriginal, p.attachment.ID, extension) p.attachment.File.ContentType = contentType - - // concatenate the first bytes with the existing bytes still in the reader (thanks Mara) - multiReader := io.MultiReader(bytes.NewBuffer(firstBytes), reader) + p.attachment.File.FileSize = fileSize // store this for now -- other processes can pull it out of storage as they please - if err := p.storage.PutStream(p.attachment.File.Path, multiReader); err != nil { + if err := p.storage.PutStream(p.attachment.File.Path, clean); err != nil { return fmt.Errorf("store: error storing stream: %s", err) } diff --git a/internal/media/types.go b/internal/media/types.go index 0a7f60d66..b9c79d464 100644 --- a/internal/media/types.go +++ b/internal/media/types.go @@ -118,4 +118,4 @@ type AdditionalEmojiInfo struct { } // DataFunc represents a function used to retrieve the raw bytes of a piece of media. -type DataFunc func(ctx context.Context) (io.Reader, error) +type DataFunc func(ctx context.Context) (reader io.Reader, fileSize int, err error) diff --git a/internal/processing/account/update.go b/internal/processing/account/update.go index 5a0a3e5a1..758cc6600 100644 --- a/internal/processing/account/update.go +++ b/internal/processing/account/update.go @@ -140,8 +140,9 @@ func (p *processor) UpdateAvatar(ctx context.Context, avatar *multipart.FileHead return nil, fmt.Errorf("UpdateAvatar: avatar with size %d exceeded max image size of %d bytes", avatar.Size, maxImageSize) } - dataFunc := func(ctx context.Context) (io.Reader, error) { - return avatar.Open() + dataFunc := func(ctx context.Context) (io.Reader, int, error) { + f, err := avatar.Open() + return f, int(avatar.Size), err } isAvatar := true @@ -166,8 +167,9 @@ func (p *processor) UpdateHeader(ctx context.Context, header *multipart.FileHead return nil, fmt.Errorf("UpdateHeader: header with size %d exceeded max image size of %d bytes", header.Size, maxImageSize) } - dataFunc := func(ctx context.Context) (io.Reader, error) { - return header.Open() + dataFunc := func(ctx context.Context) (io.Reader, int, error) { + f, err := header.Open() + return f, int(header.Size), err } isHeader := true diff --git a/internal/processing/admin/emoji.go b/internal/processing/admin/emoji.go index e0068858b..bb9f4ecb5 100644 --- a/internal/processing/admin/emoji.go +++ b/internal/processing/admin/emoji.go @@ -36,8 +36,9 @@ func (p *processor) EmojiCreate(ctx context.Context, account *gtsmodel.Account, return nil, gtserror.NewErrorNotAuthorized(fmt.Errorf("user %s not an admin", user.ID), "user is not an admin") } - data := func(innerCtx context.Context) (io.Reader, error) { - return form.Image.Open() + data := func(innerCtx context.Context) (io.Reader, int, error) { + f, err := form.Image.Open() + return f, int(form.Image.Size), err } emojiID, err := id.NewRandomULID() diff --git a/internal/processing/media/create.go b/internal/processing/media/create.go index 0fda4c27b..4047278eb 100644 --- a/internal/processing/media/create.go +++ b/internal/processing/media/create.go @@ -29,8 +29,9 @@ import ( ) func (p *processor) Create(ctx context.Context, account *gtsmodel.Account, form *apimodel.AttachmentRequest) (*apimodel.Attachment, error) { - data := func(innerCtx context.Context) (io.Reader, error) { - return form.File.Open() + data := func(innerCtx context.Context) (io.Reader, int, error) { + f, err := form.File.Open() + return f, int(form.File.Size), err } focusX, focusY, err := parseFocus(form.Focus) diff --git a/internal/transport/derefmedia.go b/internal/transport/derefmedia.go index ed32f20c6..e3c86ce1e 100644 --- a/internal/transport/derefmedia.go +++ b/internal/transport/derefmedia.go @@ -28,12 +28,12 @@ import ( "github.com/sirupsen/logrus" ) -func (t *transport) DereferenceMedia(ctx context.Context, iri *url.URL) (io.ReadCloser, error) { +func (t *transport) DereferenceMedia(ctx context.Context, iri *url.URL) (io.ReadCloser, int, error) { l := logrus.WithField("func", "DereferenceMedia") l.Debugf("performing GET to %s", iri.String()) req, err := http.NewRequestWithContext(ctx, "GET", iri.String(), nil) if err != nil { - return nil, err + return nil, 0, err } req.Header.Add("Accept", "*/*") // we don't know what kind of media we're going to get here @@ -44,14 +44,14 @@ func (t *transport) DereferenceMedia(ctx context.Context, iri *url.URL) (io.Read err = t.getSigner.SignRequest(t.privkey, t.pubKeyID, req, nil) t.getSignerMu.Unlock() if err != nil { - return nil, err + return nil, 0, err } resp, err := t.client.Do(req) if err != nil { - return nil, err + return nil, 0, err } if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("GET request to %s failed (%d): %s", iri.String(), resp.StatusCode, resp.Status) + return nil, 0, fmt.Errorf("GET request to %s failed (%d): %s", iri.String(), resp.StatusCode, resp.Status) } - return resp.Body, nil + return resp.Body, int(resp.ContentLength), nil } diff --git a/internal/transport/transport.go b/internal/transport/transport.go index d9650d952..9e8cd8213 100644 --- a/internal/transport/transport.go +++ b/internal/transport/transport.go @@ -34,8 +34,8 @@ import ( // functionality for fetching remote media. type Transport interface { pub.Transport - // DereferenceMedia fetches the given media attachment IRI. - DereferenceMedia(ctx context.Context, iri *url.URL) (io.ReadCloser, error) + // DereferenceMedia fetches the given media attachment IRI, returning the reader and filesize. + DereferenceMedia(ctx context.Context, iri *url.URL) (io.ReadCloser, int, error) // DereferenceInstance dereferences remote instance information, first by checking /api/v1/instance, and then by checking /.well-known/nodeinfo. DereferenceInstance(ctx context.Context, iri *url.URL) (*gtsmodel.Instance, error) // Finger performs a webfinger request with the given username and domain, and returns the bytes from the response body.