[bugfix] misc dereferencer fixes (#2475)

* only perform status-up-to-date checks if no statusable has been provided

* copy over the same style of freshness checking from status deref -> accounts

* change some var names

* check for empty account domain
This commit is contained in:
kim 2024-01-09 09:42:39 +00:00 committed by GitHub
parent 7cce1a7cc6
commit 10660e566d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 92 additions and 46 deletions

View file

@ -41,7 +41,7 @@ import (
// accountUpToDate returns whether the given account model is both updateable (i.e. // accountUpToDate returns whether the given account model is both updateable (i.e.
// non-instance remote account) and whether it needs an update based on `fetched_at`. // non-instance remote account) and whether it needs an update based on `fetched_at`.
func accountUpToDate(account *gtsmodel.Account) bool { func accountUpToDate(account *gtsmodel.Account, force bool) bool {
if !account.SuspendedAt.IsZero() { if !account.SuspendedAt.IsZero() {
// Can't update suspended accounts. // Can't update suspended accounts.
return true return true
@ -57,8 +57,19 @@ func accountUpToDate(account *gtsmodel.Account) bool {
return true return true
} }
// If this account was updated recently (last interval), we return as-is. // Default limit we allow
if next := account.FetchedAt.Add(6 * time.Hour); time.Now().Before(next) { // statuses to be refreshed.
limit := 6 * time.Hour
if force {
// We specifically allow the force flag
// to force an early refresh (on a much
// smaller cooldown period).
limit = 5 * time.Minute
}
// If account was updated recently (within limit), we return as-is.
if next := account.FetchedAt.Add(limit); time.Now().Before(next) {
return true return true
} }
@ -70,7 +81,7 @@ func accountUpToDate(account *gtsmodel.Account) bool {
// may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). An ActivityPub object indicates the account was dereferenced. // may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). An ActivityPub object indicates the account was dereferenced.
func (d *Dereferencer) GetAccountByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Account, ap.Accountable, error) { func (d *Dereferencer) GetAccountByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Account, ap.Accountable, error) {
// Fetch and dereference account if necessary. // Fetch and dereference account if necessary.
account, apubAcc, err := d.getAccountByURI(ctx, account, accountable, err := d.getAccountByURI(ctx,
requestUser, requestUser,
uri, uri,
) )
@ -78,7 +89,7 @@ func (d *Dereferencer) GetAccountByURI(ctx context.Context, requestUser string,
return nil, nil, err return nil, nil, err
} }
if apubAcc != nil { if accountable != nil {
// This account was updated, enqueue re-dereference featured posts. // This account was updated, enqueue re-dereference featured posts.
d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
if err := d.dereferenceAccountFeatured(ctx, requestUser, account); err != nil { if err := d.dereferenceAccountFeatured(ctx, requestUser, account); err != nil {
@ -87,7 +98,7 @@ func (d *Dereferencer) GetAccountByURI(ctx context.Context, requestUser string,
}) })
} }
return account, apubAcc, nil return account, accountable, nil
} }
// getAccountByURI is a package internal form of .GetAccountByURI() that doesn't bother dereferencing featured posts on update. // getAccountByURI is a package internal form of .GetAccountByURI() that doesn't bother dereferencing featured posts on update.
@ -134,17 +145,18 @@ func (d *Dereferencer) getAccountByURI(ctx context.Context, requestUser string,
}, nil) }, nil)
} }
// Check whether needs update. if accountUpToDate(account, false) {
if accountUpToDate(account) { // This is an existing account that is up-to-date,
// This is existing up-to-date account, ensure it is populated. // before returning ensure it is fully populated.
if err := d.state.DB.PopulateAccount(ctx, account); err != nil { if err := d.state.DB.PopulateAccount(ctx, account); err != nil {
log.Errorf(ctx, "error populating existing account: %v", err) log.Errorf(ctx, "error populating existing account: %v", err)
} }
return account, nil, nil return account, nil, nil
} }
// Try to update existing account model. // Try to update existing account model.
latest, apubAcc, err := d.enrichAccountSafely(ctx, latest, accountable, err := d.enrichAccountSafely(ctx,
requestUser, requestUser,
uri, uri,
account, account,
@ -157,14 +169,14 @@ func (d *Dereferencer) getAccountByURI(ctx context.Context, requestUser string,
return account, nil, nil return account, nil, nil
} }
return latest, apubAcc, nil return latest, accountable, nil
} }
// GetAccountByUsernameDomain will attempt to fetch an accounts by its username@domain, first checking the database. In the case of a newly-met remote model, // GetAccountByUsernameDomain will attempt to fetch an accounts by its username@domain, first checking the database. In the case of a newly-met remote model,
// or a remote model whose last_fetched date is beyond a certain interval, the account will be dereferenced. In the case of dereferencing, some low-priority // or a remote model whose last_fetched date is beyond a certain interval, the account will be dereferenced. In the case of dereferencing, some low-priority
// account information may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). An ActivityPub object indicates the account was dereferenced. // account information may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). An ActivityPub object indicates the account was dereferenced.
func (d *Dereferencer) GetAccountByUsernameDomain(ctx context.Context, requestUser string, username string, domain string) (*gtsmodel.Account, ap.Accountable, error) { func (d *Dereferencer) GetAccountByUsernameDomain(ctx context.Context, requestUser string, username string, domain string) (*gtsmodel.Account, ap.Accountable, error) {
account, apubAcc, err := d.getAccountByUsernameDomain( account, accountable, err := d.getAccountByUsernameDomain(
ctx, ctx,
requestUser, requestUser,
username, username,
@ -174,7 +186,7 @@ func (d *Dereferencer) GetAccountByUsernameDomain(ctx context.Context, requestUs
return nil, nil, err return nil, nil, err
} }
if apubAcc != nil { if accountable != nil {
// This account was updated, enqueue re-dereference featured posts. // This account was updated, enqueue re-dereference featured posts.
d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
if err := d.dereferenceAccountFeatured(ctx, requestUser, account); err != nil { if err := d.dereferenceAccountFeatured(ctx, requestUser, account); err != nil {
@ -183,12 +195,11 @@ func (d *Dereferencer) GetAccountByUsernameDomain(ctx context.Context, requestUs
}) })
} }
return account, apubAcc, nil return account, accountable, nil
} }
// getAccountByUsernameDomain is a package internal form // getAccountByUsernameDomain is a package internal form of
// of .GetAccountByUsernameDomain() that doesn't bother // GetAccountByUsernameDomain() that doesn't bother deref of featured posts.
// dereferencing featured posts.
func (d *Dereferencer) getAccountByUsernameDomain( func (d *Dereferencer) getAccountByUsernameDomain(
ctx context.Context, ctx context.Context,
requestUser string, requestUser string,
@ -219,7 +230,7 @@ func (d *Dereferencer) getAccountByUsernameDomain(
} }
// Create and pass-through a new bare-bones model for dereferencing. // Create and pass-through a new bare-bones model for dereferencing.
account, apubAcc, err := d.enrichAccountSafely(ctx, requestUser, nil, &gtsmodel.Account{ account, accountable, err := d.enrichAccountSafely(ctx, requestUser, nil, &gtsmodel.Account{
ID: id.NewULID(), ID: id.NewULID(),
Username: username, Username: username,
Domain: domain, Domain: domain,
@ -228,11 +239,11 @@ func (d *Dereferencer) getAccountByUsernameDomain(
return nil, nil, err return nil, nil, err
} }
return account, apubAcc, nil return account, accountable, nil
} }
// Try to update existing account model. // Try to update existing account model.
latest, apubAcc, err := d.RefreshAccount(ctx, latest, accountable, err := d.RefreshAccount(ctx,
requestUser, requestUser,
account, account,
nil, nil,
@ -243,22 +254,32 @@ func (d *Dereferencer) getAccountByUsernameDomain(
return account, nil, nil //nolint return account, nil, nil //nolint
} }
if apubAcc == nil { if accountable == nil {
// This is existing up-to-date account, ensure it is populated. // This is existing up-to-date account, ensure it is populated.
if err := d.state.DB.PopulateAccount(ctx, latest); err != nil { if err := d.state.DB.PopulateAccount(ctx, latest); err != nil {
log.Errorf(ctx, "error populating existing account: %v", err) log.Errorf(ctx, "error populating existing account: %v", err)
} }
} }
return latest, apubAcc, nil return latest, accountable, nil
} }
// RefreshAccount updates the given account if remote and last_fetched is beyond fetch interval, or if force is set. An updated account model is returned, // RefreshAccount updates the given account if remote and last_fetched is
// but in the case of dereferencing, some low-priority account information may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). // beyond fetch interval, or if force is set. An updated account model is
// returned, but in the case of dereferencing, some low-priority account info
// may be enqueued for asynchronous fetching, e.g. featured account statuses (pins).
// An ActivityPub object indicates the account was dereferenced (i.e. updated). // An ActivityPub object indicates the account was dereferenced (i.e. updated).
func (d *Dereferencer) RefreshAccount(ctx context.Context, requestUser string, account *gtsmodel.Account, apubAcc ap.Accountable, force bool) (*gtsmodel.Account, ap.Accountable, error) { func (d *Dereferencer) RefreshAccount(
// Check whether needs update (and not forced). ctx context.Context,
if accountUpToDate(account) && !force { requestUser string,
account *gtsmodel.Account,
accountable ap.Accountable,
force bool,
) (*gtsmodel.Account, ap.Accountable, error) {
// If no incoming data is provided,
// check whether account needs update.
if accountable == nil &&
accountUpToDate(account, force) {
return account, nil, nil return account, nil, nil
} }
@ -269,18 +290,18 @@ func (d *Dereferencer) RefreshAccount(ctx context.Context, requestUser string, a
} }
// Try to update + deref passed account model. // Try to update + deref passed account model.
latest, apubAcc, err := d.enrichAccountSafely(ctx, latest, accountable, err := d.enrichAccountSafely(ctx,
requestUser, requestUser,
uri, uri,
account, account,
apubAcc, accountable,
) )
if err != nil { if err != nil {
log.Errorf(ctx, "error enriching remote account: %v", err) log.Errorf(ctx, "error enriching remote account: %v", err)
return nil, nil, gtserror.Newf("error enriching remote account: %w", err) return nil, nil, gtserror.Newf("error enriching remote account: %w", err)
} }
if apubAcc != nil { if accountable != nil {
// This account was updated, enqueue re-dereference featured posts. // This account was updated, enqueue re-dereference featured posts.
d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
if err := d.dereferenceAccountFeatured(ctx, requestUser, latest); err != nil { if err := d.dereferenceAccountFeatured(ctx, requestUser, latest); err != nil {
@ -289,14 +310,24 @@ func (d *Dereferencer) RefreshAccount(ctx context.Context, requestUser string, a
}) })
} }
return latest, apubAcc, nil return latest, accountable, nil
} }
// RefreshAccountAsync enqueues the given account for an asychronous update fetching, if last_fetched is beyond fetch interval, or if forcc is set. // RefreshAccountAsync enqueues the given account for an asychronous
// This is a more optimized form of manually enqueueing .UpdateAccount() to the federation worker, since it only enqueues update if necessary. // update fetching, if last_fetched is beyond fetch interval, or if force
func (d *Dereferencer) RefreshAccountAsync(ctx context.Context, requestUser string, account *gtsmodel.Account, apubAcc ap.Accountable, force bool) { // is set. This is a more optimized form of manually enqueueing .UpdateAccount()
// Check whether needs update (and not forced). // to the federation worker, since it only enqueues update if necessary.
if accountUpToDate(account) && !force { func (d *Dereferencer) RefreshAccountAsync(
ctx context.Context,
requestUser string,
account *gtsmodel.Account,
accountable ap.Accountable,
force bool,
) {
// If no incoming data is provided,
// check whether account needs update.
if accountable == nil &&
accountUpToDate(account, force) {
return return
} }
@ -309,13 +340,13 @@ func (d *Dereferencer) RefreshAccountAsync(ctx context.Context, requestUser stri
// Enqueue a worker function to enrich this account async. // Enqueue a worker function to enrich this account async.
d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
latest, apubAcc, err := d.enrichAccountSafely(ctx, requestUser, uri, account, apubAcc) latest, accountable, err := d.enrichAccountSafely(ctx, requestUser, uri, account, accountable)
if err != nil { if err != nil {
log.Errorf(ctx, "error enriching remote account: %v", err) log.Errorf(ctx, "error enriching remote account: %v", err)
return return
} }
if apubAcc != nil { if accountable != nil {
// This account was updated, enqueue re-dereference featured posts. // This account was updated, enqueue re-dereference featured posts.
if err := d.dereferenceAccountFeatured(ctx, requestUser, latest); err != nil { if err := d.dereferenceAccountFeatured(ctx, requestUser, latest); err != nil {
log.Errorf(ctx, "error fetching account featured collection: %v", err) log.Errorf(ctx, "error fetching account featured collection: %v", err)
@ -332,7 +363,7 @@ func (d *Dereferencer) enrichAccountSafely(
requestUser string, requestUser string,
uri *url.URL, uri *url.URL,
account *gtsmodel.Account, account *gtsmodel.Account,
apubAcc ap.Accountable, accountable ap.Accountable,
) (*gtsmodel.Account, ap.Accountable, error) { ) (*gtsmodel.Account, ap.Accountable, error) {
// Noop if account has been suspended. // Noop if account has been suspended.
if !account.SuspendedAt.IsZero() { if !account.SuspendedAt.IsZero() {
@ -360,7 +391,7 @@ func (d *Dereferencer) enrichAccountSafely(
requestUser, requestUser,
uri, uri,
account, account,
apubAcc, accountable,
) )
if gtserror.StatusCode(err) >= 400 { if gtserror.StatusCode(err) >= 400 {
@ -521,6 +552,16 @@ func (d *Dereferencer) enrichAccount(
} }
} }
if latestAcc.Domain == "" {
// ensure we have a domain set by this point,
// otherwise it gets stored as a local user!
//
// TODO: there is probably a more granular way
// way of checking this in each of the above parts,
// and honestly it could do with a smol refactor.
return nil, nil, gtserror.Newf("empty domain for %s", uri)
}
// Ensure ID is set and update fetch time. // Ensure ID is set and update fetch time.
latestAcc.ID = account.ID latestAcc.ID = account.ID
latestAcc.FetchedAt = time.Now() latestAcc.FetchedAt = time.Now()

View file

@ -135,12 +135,13 @@ func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, u
}, nil) }, nil)
} }
// Check whether needs update.
if statusUpToDate(status, false) { if statusUpToDate(status, false) {
// This is existing up-to-date status, ensure it is populated. // This is an existing status that is up-to-date,
// before returning ensure it is fully populated.
if err := d.state.DB.PopulateStatus(ctx, status); err != nil { if err := d.state.DB.PopulateStatus(ctx, status); err != nil {
log.Errorf(ctx, "error populating existing status: %v", err) log.Errorf(ctx, "error populating existing status: %v", err)
} }
return status, nil, false, nil return status, nil, false, nil
} }
@ -170,8 +171,10 @@ func (d *Dereferencer) RefreshStatus(
statusable ap.Statusable, statusable ap.Statusable,
force bool, force bool,
) (*gtsmodel.Status, ap.Statusable, error) { ) (*gtsmodel.Status, ap.Statusable, error) {
// Check whether status needs update. // If no incoming data is provided,
if statusUpToDate(status, force) { // check whether status needs update.
if statusable == nil &&
statusUpToDate(status, force) {
return status, nil, nil return status, nil, nil
} }
@ -215,8 +218,10 @@ func (d *Dereferencer) RefreshStatusAsync(
statusable ap.Statusable, statusable ap.Statusable,
force bool, force bool,
) { ) {
// Check whether status needs update. // If no incoming data is provided,
if statusUpToDate(status, force) { // check whether status needs update.
if statusable == nil &&
statusUpToDate(status, force) {
return return
} }