From ca5492b65f45c7db8a9cfb767b0b48aa6cf6fe24 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Mon, 10 Jul 2023 17:05:59 +0200 Subject: [PATCH] [bugfix] Tidy up rss feed serving; don't error on empty feed (#1970) * [bugfix] Tidy up rss feed serving; don't error on empty feed * fall back to account creation time as rss feed update time * return feed early when account has no eligible statuses --- internal/processing/account/rss.go | 167 ++++++++++++++------ internal/processing/account/rss_test.go | 28 ++++ internal/web/rss.go | 198 +++++++++++++++--------- 3 files changed, 267 insertions(+), 126 deletions(-) diff --git a/internal/processing/account/rss.go b/internal/processing/account/rss.go index 9b577380..ddc07b9a 100644 --- a/internal/processing/account/rss.go +++ b/internal/processing/account/rss.go @@ -27,82 +27,151 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/config" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) -const rssFeedLength = 20 +const ( + rssFeedLength = 20 +) + +type GetRSSFeed func() (string, gtserror.WithCode) + +// GetRSSFeedForUsername returns a function to return the RSS feed of a local account +// with the given username, and the last-modified time (time that the account last +// posted a status eligible to be included in the rss feed). +// +// To save db calls, callers to this function should only call the returned GetRSSFeed +// func if the last-modified time is newer than the last-modified time they have cached. +// +// If the account has not yet posted an RSS-eligible status, the returned last-modified +// time will be zero, and the GetRSSFeed func will return a valid RSS xml with no items. +func (p *Processor) GetRSSFeedForUsername(ctx context.Context, username string) (GetRSSFeed, time.Time, gtserror.WithCode) { + var ( + never = time.Time{} + ) -// GetRSSFeedForUsername returns RSS feed for the given local username. -func (p *Processor) GetRSSFeedForUsername(ctx context.Context, username string) (func() (string, gtserror.WithCode), time.Time, gtserror.WithCode) { account, err := p.state.DB.GetAccountByUsernameDomain(ctx, username, "") if err != nil { - if err == db.ErrNoEntries { - return nil, time.Time{}, gtserror.NewErrorNotFound(errors.New("GetRSSFeedForUsername: account not found")) + if errors.Is(err, db.ErrNoEntries) { + // Simply no account with this username. + err = gtserror.New("account not found") + return nil, never, gtserror.NewErrorNotFound(err) } - return nil, time.Time{}, gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error: %s", err)) + + // Real db error. + err = gtserror.Newf("db error getting account %s: %w", username, err) + return nil, never, gtserror.NewErrorInternalError(err) } + // Ensure account has rss feed enabled. if !*account.EnableRSS { - return nil, time.Time{}, gtserror.NewErrorNotFound(errors.New("GetRSSFeedForUsername: account RSS feed not enabled")) + err = gtserror.New("account RSS feed not enabled") + return nil, never, gtserror.NewErrorNotFound(err) } - lastModified, err := p.state.DB.GetAccountLastPosted(ctx, account.ID, true) - if err != nil { - return nil, time.Time{}, gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error: %s", err)) + // LastModified time is needed by callers to check freshness for cacheing. + // This might be a zero time.Time if account has never posted a status that's + // eligible to appear in the RSS feed; that's fine. + lastPostAt, err := p.state.DB.GetAccountLastPosted(ctx, account.ID, true) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + err = gtserror.Newf("db error getting account %s last posted: %w", username, err) + return nil, never, gtserror.NewErrorInternalError(err) } return func() (string, gtserror.WithCode) { - statuses, err := p.state.DB.GetAccountWebStatuses(ctx, account.ID, rssFeedLength, "") - if err != nil && err != db.ErrNoEntries { - return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error: %s", err)) - } - + // Assemble author namestring once only. author := "@" + account.Username + "@" + config.GetAccountDomain() - title := "Posts from " + author - description := "Posts from " + author - link := &feeds.Link{Href: account.URL} - var image *feeds.Image - if account.AvatarMediaAttachmentID != "" { - if account.AvatarMediaAttachment == nil { - avatar, err := p.state.DB.GetAttachmentByID(ctx, account.AvatarMediaAttachmentID) - if err != nil { - return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error fetching avatar attachment: %s", err)) - } - account.AvatarMediaAttachment = avatar - } - image = &feeds.Image{ - Url: account.AvatarMediaAttachment.Thumbnail.URL, - Title: "Avatar for " + author, - Link: account.URL, - } + // Derive image/thumbnail for this account (may be nil). + image, errWithCode := p.rssImageForAccount(ctx, account, author) + if errWithCode != nil { + return "", errWithCode } feed := &feeds.Feed{ - Title: title, - Description: description, - Link: link, + Title: "Posts from " + author, + Description: "Posts from " + author, + Link: &feeds.Link{Href: account.URL}, Image: image, } - for i, s := range statuses { - // take the date of the first (ie., latest) status as feed updated value - if i == 0 { - feed.Updated = s.UpdatedAt - } + // If the account has never posted anything, just use + // account creation time as Updated value for the feed; + // we could use time.Now() here but this would likely + // mess up cacheing; we want something determinate. + // + // We can also return early rather than wasting a db call, + // since we already know there's no eligible statuses. + if lastPostAt.IsZero() { + feed.Updated = account.CreatedAt + return stringifyFeed(feed) + } - item, err := p.tc.StatusToRSSItem(ctx, s) + // Account has posted at least one status that's + // eligible to appear in the RSS feed. + // + // Reuse the lastPostAt value for feed.Updated. + feed.Updated = lastPostAt + + // Retrieve latest statuses as they'd be shown on the web view of the account profile. + statuses, err := p.state.DB.GetAccountWebStatuses(ctx, account.ID, rssFeedLength, "") + if err != nil && !errors.Is(err, db.ErrNoEntries) { + err = fmt.Errorf("db error getting account web statuses: %w", err) + return "", gtserror.NewErrorInternalError(err) + } + + // Add each status to the rss feed. + for _, status := range statuses { + item, err := p.tc.StatusToRSSItem(ctx, status) if err != nil { - return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: error converting status to feed item: %s", err)) + err = gtserror.Newf("error converting status to feed item: %w", err) + return "", gtserror.NewErrorInternalError(err) } feed.Add(item) } - rss, err := feed.ToRss() - if err != nil { - return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: error converting feed to rss string: %s", err)) - } - - return rss, nil - }, lastModified, nil + return stringifyFeed(feed) + }, lastPostAt, nil +} + +func (p *Processor) rssImageForAccount(ctx context.Context, account *gtsmodel.Account, author string) (*feeds.Image, gtserror.WithCode) { + if account.AvatarMediaAttachmentID == "" { + // No image, no problem! + return nil, nil + } + + // Ensure account avatar attachment populated. + if account.AvatarMediaAttachment == nil { + var err error + account.AvatarMediaAttachment, err = p.state.DB.GetAttachmentByID(ctx, account.AvatarMediaAttachmentID) + if err != nil { + if errors.Is(err, db.ErrNoEntries) { + // No attachment found with this ID (race condition?). + return nil, nil + } + + // Real db error. + err = gtserror.Newf("db error fetching avatar media attachment: %w", err) + return nil, gtserror.NewErrorInternalError(err) + } + } + + return &feeds.Image{ + Url: account.AvatarMediaAttachment.Thumbnail.URL, + Title: "Avatar for " + author, + Link: account.URL, + }, nil +} + +func stringifyFeed(feed *feeds.Feed) (string, gtserror.WithCode) { + // Stringify the feed. Even with no statuses, + // this will still produce valid rss xml. + rss, err := feed.ToRss() + if err != nil { + err := gtserror.Newf("error converting feed to rss string: %w", err) + return "", gtserror.NewErrorInternalError(err) + } + + return rss, nil } diff --git a/internal/processing/account/rss_test.go b/internal/processing/account/rss_test.go index 1a0bb978..80f86211 100644 --- a/internal/processing/account/rss_test.go +++ b/internal/processing/account/rss_test.go @@ -55,6 +55,34 @@ func (suite *GetRSSTestSuite) TestGetAccountRSSZork() { suite.Equal("\n \n Posts from @the_mighty_zork@localhost:8080\n http://localhost:8080/@the_mighty_zork\n Posts from @the_mighty_zork@localhost:8080\n Wed, 20 Oct 2021 10:40:37 +0000\n Wed, 20 Oct 2021 10:40:37 +0000\n \n http://localhost:8080/fileserver/01F8MH1H7YV1Z7D2C8K2730QBF/avatar/small/01F8MH58A357CV5K7R7TJMSH6S.jpg\n Avatar for @the_mighty_zork@localhost:8080\n http://localhost:8080/@the_mighty_zork\n \n \n introduction post\n http://localhost:8080/@the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY\n @the_mighty_zork@localhost:8080 made a new post: "hello everyone!"\n \n @the_mighty_zork@localhost:8080\n http://localhost:8080/@the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY\n Wed, 20 Oct 2021 10:40:37 +0000\n http://localhost:8080/@the_mighty_zork/feed.rss\n \n \n", feed) } +func (suite *GetRSSTestSuite) TestGetAccountRSSZorkNoPosts() { + ctx := context.Background() + + // Get all of zork's posts. + statuses, err := suite.db.GetAccountStatuses(ctx, suite.testAccounts["local_account_1"].ID, 0, false, false, "", "", false, false) + if err != nil { + suite.FailNow(err.Error()) + } + + // Now delete them! Hahaha! + for _, status := range statuses { + if err := suite.db.DeleteStatusByID(ctx, status.ID); err != nil { + suite.FailNow(err.Error()) + } + } + + getFeed, lastModified, err := suite.accountProcessor.GetRSSFeedForUsername(ctx, "the_mighty_zork") + suite.NoError(err) + suite.Empty(lastModified) + + feed, err := getFeed() + suite.NoError(err) + + fmt.Println(feed) + + suite.Equal("\n \n Posts from @the_mighty_zork@localhost:8080\n http://localhost:8080/@the_mighty_zork\n Posts from @the_mighty_zork@localhost:8080\n Fri, 20 May 2022 11:09:18 +0000\n Fri, 20 May 2022 11:09:18 +0000\n \n http://localhost:8080/fileserver/01F8MH1H7YV1Z7D2C8K2730QBF/avatar/small/01F8MH58A357CV5K7R7TJMSH6S.jpg\n Avatar for @the_mighty_zork@localhost:8080\n http://localhost:8080/@the_mighty_zork\n \n \n", feed) +} + func TestGetRSSTestSuite(t *testing.T) { suite.Run(t, new(GetRSSTestSuite)) } diff --git a/internal/web/rss.go b/internal/web/rss.go index 8c8d5112..2d98efcb 100644 --- a/internal/web/rss.go +++ b/internal/web/rss.go @@ -19,8 +19,6 @@ package web import ( "bytes" - "errors" - "fmt" "net/http" "strings" "time" @@ -31,86 +29,50 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/log" ) -const appRSSUTF8 = string(apiutil.AppRSSXML + "; charset=utf-8") - -func (m *Module) GetRSSETag(urlPath string, lastModified time.Time, getRSSFeed func() (string, gtserror.WithCode)) (string, error) { - if cachedETag, ok := m.eTagCache.Get(urlPath); ok && !lastModified.After(cachedETag.lastModified) { - // only return our cached etag if the file wasn't - // modified since last time, otherwise generate a - // new one; eat fresh! - return cachedETag.eTag, nil - } - - rssFeed, errWithCode := getRSSFeed() - if errWithCode != nil { - return "", fmt.Errorf("error getting rss feed: %s", errWithCode) - } - - eTag, err := generateEtag(bytes.NewReader([]byte(rssFeed))) - if err != nil { - return "", fmt.Errorf("error generating etag: %s", err) - } - - // put new entry in cache before we return - m.eTagCache.Set(urlPath, eTagCacheEntry{ - eTag: eTag, - lastModified: lastModified, - }) - - return eTag, nil -} - -func extractIfModifiedSince(r *http.Request) time.Time { - hdr := r.Header.Get(ifModifiedSinceHeader) - - if hdr == "" { - return time.Time{} - } - - t, err := http.ParseTime(hdr) - if err != nil { - log.Errorf(r.Context(), "couldn't parse if-modified-since %s: %s", hdr, err) - return time.Time{} - } - - return t -} +const appRSSUTF8 = string(apiutil.AppRSSXML) + "; charset=utf-8" func (m *Module) rssFeedGETHandler(c *gin.Context) { - // set this Cache-Control header to instruct clients to validate the response with us - // before each reuse (https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control) - c.Header(cacheControlHeader, cacheControlNoCache) - ctx := c.Request.Context() - if _, err := apiutil.NegotiateAccept(c, apiutil.AppRSSXML); err != nil { apiutil.WebErrorHandler(c, gtserror.NewErrorNotAcceptable(err, err.Error()), m.processor.InstanceGetV1) return } - // usernames on our instance will always be lowercase - username := strings.ToLower(c.Param(usernameKey)) - if username == "" { - err := errors.New("no account username specified") - apiutil.WebErrorHandler(c, gtserror.NewErrorBadRequest(err, err.Error()), m.processor.InstanceGetV1) - return - } - - ifNoneMatch := c.Request.Header.Get(ifNoneMatchHeader) - ifModifiedSince := extractIfModifiedSince(c.Request) - - getRssFeed, accountLastPostedPublic, errWithCode := m.processor.Account().GetRSSFeedForUsername(ctx, username) + // Fetch + normalize username from URL. + username, errWithCode := apiutil.ParseWebUsername(c.Param(apiutil.WebUsernameKey)) if errWithCode != nil { apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1) return } - var rssFeed string - cacheKey := c.Request.URL.Path - cacheEntry, ok := m.eTagCache.Get(cacheKey) + // Usernames on our instance will always be lowercase. + // + // todo: https://github.com/superseriousbusiness/gotosocial/issues/1813 + username = strings.ToLower(username) - if !ok || cacheEntry.lastModified.Before(accountLastPostedPublic) { - // we either have no cache entry for this, or we have an expired cache entry; generate a new one - rssFeed, errWithCode = getRssFeed() + // Retrieve the getRSSFeed function from the processor. + // We'll only call the function if we need to, to save db calls. + // lastPostAt may be a zero time if account has never posted. + getRSSFeed, lastPostAt, errWithCode := m.processor.Account().GetRSSFeedForUsername(c.Request.Context(), username) + if errWithCode != nil { + apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1) + return + } + + var ( + rssFeed string // Stringified rss feed. + + cacheKey = c.Request.URL.Path + cacheEntry, wasCached = m.eTagCache.Get(cacheKey) + ) + + if !wasCached || unixAfter(lastPostAt, cacheEntry.lastModified) { + // We either have no ETag cache entry for this account's feed, + // or we have an expired cache entry (account has posted since + // the cache entry was last generated). + // + // As such, we need to generate a new ETag, and for that we need + // the string representation of the RSS feed. + rssFeed, errWithCode = getRSSFeed() if errWithCode != nil { apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1) return @@ -122,29 +84,73 @@ func (m *Module) rssFeedGETHandler(c *gin.Context) { return } - cacheEntry.lastModified = accountLastPostedPublic - cacheEntry.eTag = eTag + // We never want lastModified to be zero, so if account + // has never actually posted anything, just use Now as + // the lastModified time instead for cache control. + var lastModified time.Time + if lastPostAt.IsZero() { + lastModified = time.Now() + } else { + lastModified = lastPostAt + } + + // Store the new cache entry. + cacheEntry = eTagCacheEntry{ + eTag: eTag, + lastModified: lastModified, + } m.eTagCache.Set(cacheKey, cacheEntry) } + // Set 'ETag' and 'Last-Modified' headers no matter what; + // even if we return 304 in the next checks, caller may + // want to cache these header values. c.Header(eTagHeader, cacheEntry.eTag) - c.Header(lastModifiedHeader, accountLastPostedPublic.Format(http.TimeFormat)) + c.Header(lastModifiedHeader, cacheEntry.lastModified.Format(http.TimeFormat)) + // Instruct caller to validate the response with us before + // each reuse, so that the 'ETag' and 'Last-Modified' headers + // actually take effect. + // + // "The no-cache response directive indicates that the response + // can be stored in caches, but the response must be validated + // with the origin server before each reuse, even when the cache + // is disconnected from the origin server." + // + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control + c.Header(cacheControlHeader, cacheControlNoCache) + + // Check if caller submitted an ETag via 'If-None-Match'. + // If they did + it matches what we have, that means they've + // already seen the latest version of this feed, so just bail. + ifNoneMatch := c.Request.Header.Get(ifNoneMatchHeader) if ifNoneMatch == cacheEntry.eTag { c.AbortWithStatus(http.StatusNotModified) return } - lmUnix := cacheEntry.lastModified.Unix() - imsUnix := ifModifiedSince.Unix() - if lmUnix <= imsUnix { + // Check if the caller submitted a time via 'If-Modified-Since'. + // If they did, and our cached ETag entry is not newer than the + // given time, this means the caller has already seen the latest + // version of this feed, so just bail. + ifModifiedSince := extractIfModifiedSince(c.Request) + if !ifModifiedSince.IsZero() && + !unixAfter(cacheEntry.lastModified, ifModifiedSince) { c.AbortWithStatus(http.StatusNotModified) return } + // At this point we know that the client wants the newest + // representation of the RSS feed, either because they didn't + // submit any 'If-None-Match' / 'If-Modified-Since' cache headers, + // or because they did but the account has posted more recently + // than the values of the submitted headers would suggest. + // + // If we had a cache hit earlier, we may not have called the + // getRSSFeed function yet; if that's the case then do call it + // now because we definitely need it. if rssFeed == "" { - // we had a cache entry already so we didn't call to get the rss feed yet - rssFeed, errWithCode = getRssFeed() + rssFeed, errWithCode = getRSSFeed() if errWithCode != nil { apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1) return @@ -153,3 +159,41 @@ func (m *Module) rssFeedGETHandler(c *gin.Context) { c.Data(http.StatusOK, appRSSUTF8, []byte(rssFeed)) } + +// unixAfter returns true if the unix value of t1 +// is greater than (ie., after) the unix value of t2. +func unixAfter(t1 time.Time, t2 time.Time) bool { + if t1.IsZero() { + // if t1 is zero then it cannot + // possibly be greater than t2. + return false + } + + if t2.IsZero() { + // t1 is not zero but t2 is, + // so t1 is necessarily greater. + return true + } + + return t1.Unix() > t2.Unix() +} + +// extractIfModifiedSince parses a time.Time from the +// 'If-Modified-Since' header of the given request. +// +// If no time was provided, or the provided time was +// not parseable, it will return a zero time. +func extractIfModifiedSince(r *http.Request) time.Time { + imsStr := r.Header.Get(ifModifiedSinceHeader) + if imsStr == "" { + return time.Time{} // Nothing set. + } + + ifModifiedSince, err := http.ParseTime(imsStr) + if err != nil { + log.Errorf(r.Context(), "couldn't parse %s value '%s' as time: %q", ifModifiedSinceHeader, imsStr, err) + return time.Time{} + } + + return ifModifiedSince +}