[bugfix] Don't return Account or Status if new and dereferencing failed, other small fixes (#2563)

* tidy up account, status, webfingering logic a wee bit

* go fmt

* invert published check

* alter resp initialization

* get Published from account in typeutils

* don't instantiate error for no darn good reason

* shadow err

* don't repeat error codes in wrapped errors

* don't wrap error unnecessarily
This commit is contained in:
tobi 2024-01-26 14:17:10 +01:00 committed by GitHub
parent 07207e71e9
commit e3052e8c82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 461 additions and 211 deletions

View file

@ -163,6 +163,7 @@ type Accountable interface {
WithManuallyApprovesFollowers WithManuallyApprovesFollowers
WithEndpoints WithEndpoints
WithTag WithTag
WithPublished
} }
// Statusable represents the minimum activitypub interface for representing a 'status'. // Statusable represents the minimum activitypub interface for representing a 'status'.

View file

@ -52,7 +52,7 @@ func accountUpToDate(account *gtsmodel.Account, force bool) bool {
return true return true
} }
if !account.CreatedAt.IsZero() && account.IsInstance() { if account.IsInstance() && !account.IsNew() {
// Existing instance account. No need for update. // Existing instance account. No need for update.
return true return true
} }
@ -232,8 +232,8 @@ func (d *Dereferencer) getAccountByUsernameDomain(
// Create and pass-through a new bare-bones model for dereferencing. // Create and pass-through a new bare-bones model for dereferencing.
account, accountable, err := d.enrichAccountSafely(ctx, requestUser, nil, &gtsmodel.Account{ account, accountable, err := d.enrichAccountSafely(ctx, requestUser, nil, &gtsmodel.Account{
ID: id.NewULID(), ID: id.NewULID(),
Username: username,
Domain: domain, Domain: domain,
Username: username,
}, nil) }, nil)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
@ -372,17 +372,18 @@ func (d *Dereferencer) enrichAccountSafely(
// By default use account.URI // By default use account.URI
// as the per-URI deref lock. // as the per-URI deref lock.
uriStr := account.URI var lockKey string
if account.URI != "" {
if uriStr == "" { lockKey = account.URI
} else {
// No URI is set yet, instead generate a faux-one from user+domain. // No URI is set yet, instead generate a faux-one from user+domain.
uriStr = "https://" + account.Domain + "/user/" + account.Username lockKey = "https://" + account.Domain + "/users/" + account.Username
} }
// Acquire per-URI deref lock, wraping unlock // Acquire per-URI deref lock, wraping unlock
// to safely defer in case of panic, while still // to safely defer in case of panic, while still
// performing more granular unlocks when needed. // performing more granular unlocks when needed.
unlock := d.state.FedLocks.Lock(uriStr) unlock := d.state.FedLocks.Lock(lockKey)
unlock = doOnce(unlock) unlock = doOnce(unlock)
defer unlock() defer unlock()
@ -394,10 +395,30 @@ func (d *Dereferencer) enrichAccountSafely(
accountable, accountable,
) )
if gtserror.StatusCode(err) >= 400 { if code := gtserror.StatusCode(err); code >= 400 {
// Update fetch-at to slow re-attempts. // No matter what, log the error
// so instance admins have an idea
// why something isn't working.
log.Info(ctx, err)
if account.IsNew() {
// This was a new account enrich
// attempt which failed before we
// got to store it, so we can't
// return anything useful.
return nil, nil, err
}
// We had this account stored already
// before this enrichment attempt.
//
// Update fetched_at to slow re-attempts
// but don't return early. We can still
// return the model we had stored already.
account.FetchedAt = time.Now() account.FetchedAt = time.Now()
_ = d.state.DB.UpdateAccount(ctx, account, "fetched_at") if err := d.state.DB.UpdateAccount(ctx, account, "fetched_at"); err != nil {
log.Errorf(ctx, "error updating account fetched_at: %v", err)
}
} }
// Unlock now // Unlock now
@ -414,7 +435,7 @@ func (d *Dereferencer) enrichAccountSafely(
// in a call to db.Put(Account). Look again in DB by URI. // in a call to db.Put(Account). Look again in DB by URI.
latest, err = d.state.DB.GetAccountByURI(ctx, account.URI) latest, err = d.state.DB.GetAccountByURI(ctx, account.URI)
if err != nil { if err != nil {
err = gtserror.Newf("error getting account %s from database after race: %w", uriStr, err) err = gtserror.Newf("error getting account %s from database after race: %w", lockKey, err)
} }
} }
@ -440,32 +461,44 @@ func (d *Dereferencer) enrichAccount(
if account.Username != "" { if account.Username != "" {
// A username was provided so we can attempt a webfinger, this ensures up-to-date accountdomain info. // A username was provided so we can attempt a webfinger, this ensures up-to-date accountdomain info.
accDomain, accURI, err := d.fingerRemoteAccount(ctx, tsport, account.Username, account.Domain) accDomain, accURI, err := d.fingerRemoteAccount(ctx, tsport, account.Username, account.Domain)
if err != nil { switch {
if account.URI == "" {
// this is a new account (to us) with username@domain case err != nil && account.URI == "":
// This is a new account (to us) with username@domain
// but failed webfinger, nothing more we can do. // but failed webfinger, nothing more we can do.
err := gtserror.Newf("error webfingering account: %w", err) err := gtserror.Newf("error webfingering account: %w", err)
return nil, nil, gtserror.SetUnretrievable(err) return nil, nil, gtserror.SetUnretrievable(err)
}
// Simply log this error and move on, we already have an account URI. case err != nil:
log.Errorf(ctx, "error webfingering[1] remote account %s@%s: %v", account.Username, account.Domain, err) // Simply log this error and move on,
} // we already have an account URI.
log.Errorf(ctx,
"error webfingering[1] remote account %s@%s: %v",
account.Username, account.Domain, err,
)
if err == nil { case err == nil && account.Domain != accDomain:
if account.Domain != accDomain {
// After webfinger, we now have correct account domain from which we can do a final DB check. // After webfinger, we now have correct account domain from which we can do a final DB check.
alreadyAccount, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain) alreadyAcct, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain)
if err != nil && !errors.Is(err, db.ErrNoEntries) { if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err) return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err)
} }
if alreadyAccount != nil { if alreadyAcct != nil {
// Enrich existing account. // We had this account stored under
account = alreadyAccount // the discovered accountDomain.
} // Proceed with this account.
account = alreadyAcct
} }
// Whether we had the account or not, we
// now have webfinger info relevant to the
// account, so fallthrough to set webfinger
// info on either the account we just found,
// or the stub account we were passed.
fallthrough
case err == nil:
// Update account with latest info. // Update account with latest info.
account.URI = accURI.String() account.URI = accURI.String()
account.Domain = accDomain account.Domain = accDomain
@ -474,13 +507,31 @@ func (d *Dereferencer) enrichAccount(
} }
if uri == nil { if uri == nil {
// No URI provided / found, must parse from account. // No URI provided / found,
// must parse from account.
uri, err = url.Parse(account.URI) uri, err = url.Parse(account.URI)
if err != nil { if err != nil {
return nil, nil, gtserror.Newf("invalid uri %q: %w", account.URI, err) return nil, nil, gtserror.Newf(
"invalid uri %q: %w",
account.URI, gtserror.SetUnretrievable(err),
)
}
if uri.Scheme != "http" && uri.Scheme != "https" {
err = errors.New("account URI scheme must be http or https")
return nil, nil, gtserror.Newf(
"invalid uri %q: %w",
account.URI, gtserror.SetUnretrievable(err),
)
} }
} }
/*
BY THIS POINT we must have an account URI set,
either provided, pinned to the account, or
obtained via webfinger call.
*/
// Check whether this account URI is a blocked domain / subdomain. // Check whether this account URI is a blocked domain / subdomain.
if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil { if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil {
return nil, nil, gtserror.Newf("error checking blocked domain: %w", err) return nil, nil, gtserror.Newf("error checking blocked domain: %w", err)
@ -493,27 +544,45 @@ func (d *Dereferencer) enrichAccount(
defer d.stopHandshake(requestUser, uri) defer d.stopHandshake(requestUser, uri)
if apubAcc == nil { if apubAcc == nil {
// We were not given any (partial) ActivityPub
// version of this account as a parameter.
// Dereference latest version of the account. // Dereference latest version of the account.
b, err := tsport.Dereference(ctx, uri) b, err := tsport.Dereference(ctx, uri)
if err != nil { if err != nil {
err := gtserror.Newf("error deferencing %s: %w", uri, err) err := gtserror.Newf("error dereferencing %s: %w", uri, err)
return nil, nil, gtserror.SetUnretrievable(err) return nil, nil, gtserror.SetUnretrievable(err)
} }
// Attempt to resolve ActivityPub acc from data. // Attempt to resolve ActivityPub acc from data.
apubAcc, err = ap.ResolveAccountable(ctx, b) apubAcc, err = ap.ResolveAccountable(ctx, b)
if err != nil { if err != nil {
return nil, nil, gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err) // ResolveAccountable will set Unretrievable/WrongType
// on the returned error, so we don't need to do it here.
err = gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err)
return nil, nil, err
} }
} }
/*
BY THIS POINT we must have the ActivityPub
representation of the account, either provided,
or obtained via a dereference call.
*/
// Convert the dereferenced AP account object to our GTS model. // Convert the dereferenced AP account object to our GTS model.
//
// We put this in the variable latestAcc because we might want
// to compare the provided account model with this fresh version,
// in order to check if anything changed since we last saw it.
latestAcc, err := d.converter.ASRepresentationToAccount(ctx, latestAcc, err := d.converter.ASRepresentationToAccount(ctx,
apubAcc, apubAcc,
account.Domain, account.Domain,
) )
if err != nil { if err != nil {
return nil, nil, gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err) // ASRepresentationToAccount will set Malformed on the
// returned error, so we don't need to do it here.
err = gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err)
return nil, nil, err
} }
if account.Username == "" { if account.Username == "" {
@ -528,14 +597,15 @@ func (d *Dereferencer) enrichAccount(
// from example.org to somewhere.else: we want to take somewhere.else // from example.org to somewhere.else: we want to take somewhere.else
// as the accountDomain then, not the example.org we were redirected from. // as the accountDomain then, not the example.org we were redirected from.
// Assume the host from the returned ActivityPub representation. // Assume the host from the returned
idProp := apubAcc.GetJSONLDId() // ActivityPub representation.
if idProp == nil || !idProp.IsIRI() { id := ap.GetJSONLDId(apubAcc)
if id == nil {
return nil, nil, gtserror.New("no id property found on person, or id was not an iri") return nil, nil, gtserror.New("no id property found on person, or id was not an iri")
} }
// Get IRI host value. // Get IRI host value.
accHost := idProp.GetIRI().Host accHost := id.Host
latestAcc.Domain, _, err = d.fingerRemoteAccount(ctx, latestAcc.Domain, _, err = d.fingerRemoteAccount(ctx,
tsport, tsport,
@ -553,7 +623,7 @@ func (d *Dereferencer) enrichAccount(
} }
if latestAcc.Domain == "" { if latestAcc.Domain == "" {
// ensure we have a domain set by this point, // Ensure we have a domain set by this point,
// otherwise it gets stored as a local user! // otherwise it gets stored as a local user!
// //
// TODO: there is probably a more granular way // TODO: there is probably a more granular way
@ -562,7 +632,16 @@ func (d *Dereferencer) enrichAccount(
return nil, nil, gtserror.Newf("empty domain for %s", uri) return nil, nil, gtserror.Newf("empty domain for %s", uri)
} }
// Ensure ID is set and update fetch time. /*
BY THIS POINT we have more or less a fullly-formed
representation of the target account, derived from
a combination of webfinger lookups and dereferencing.
Further fetching beyond this point is for peripheral
things like account avatar, header, emojis.
*/
// Ensure internal db ID is
// set and update fetch time.
latestAcc.ID = account.ID latestAcc.ID = account.ID
latestAcc.FetchedAt = time.Now() latestAcc.FetchedAt = time.Now()
@ -581,12 +660,14 @@ func (d *Dereferencer) enrichAccount(
log.Errorf(ctx, "error fetching remote emojis for account %s: %v", uri, err) log.Errorf(ctx, "error fetching remote emojis for account %s: %v", uri, err)
} }
if account.CreatedAt.IsZero() { if account.IsNew() {
// CreatedAt will be zero if no local copy was // Prefer published/created time from
// found in one of the GetAccountBy___() functions. // apubAcc, fall back to FetchedAt value.
// if latestAcc.CreatedAt.IsZero() {
// Set time of creation from the last-fetched date.
latestAcc.CreatedAt = latestAcc.FetchedAt latestAcc.CreatedAt = latestAcc.FetchedAt
}
// Set time of update from the last-fetched date.
latestAcc.UpdatedAt = latestAcc.FetchedAt latestAcc.UpdatedAt = latestAcc.FetchedAt
// This is new, put it in the database. // This is new, put it in the database.
@ -595,11 +676,16 @@ func (d *Dereferencer) enrichAccount(
return nil, nil, gtserror.Newf("error putting in database: %w", err) return nil, nil, gtserror.Newf("error putting in database: %w", err)
} }
} else { } else {
// Prefer published time from apubAcc,
// fall back to previous stored value.
if latestAcc.CreatedAt.IsZero() {
latestAcc.CreatedAt = account.CreatedAt
}
// Set time of update from the last-fetched date. // Set time of update from the last-fetched date.
latestAcc.UpdatedAt = latestAcc.FetchedAt latestAcc.UpdatedAt = latestAcc.FetchedAt
// Use existing account values. // Carry over existing account language.
latestAcc.CreatedAt = account.CreatedAt
latestAcc.Language = account.Language latestAcc.Language = account.Language
// This is an existing account, update the model in the database. // This is an existing account, update the model in the database.

View file

@ -20,54 +20,109 @@ package dereferencing
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt"
"net/url" "net/url"
"strings" "strings"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/transport" "github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/internal/util" "github.com/superseriousbusiness/gotosocial/internal/util"
) )
func (d *Dereferencer) fingerRemoteAccount(ctx context.Context, transport transport.Transport, targetUsername string, targetHost string) (accountDomain string, accountURI *url.URL, err error) { // fingerRemoteAccount performs a webfinger call for the
b, err := transport.Finger(ctx, targetUsername, targetHost) // given username and host, using the provided transport.
//
// The webfinger response will be parsed, and the subject
// domain and AP URI will be extracted and returned.
//
// In case the response cannot be parsed, or the response
// does not contain a valid subject string or AP URI, an
// error will be returned instead.
func (d *Dereferencer) fingerRemoteAccount(
ctx context.Context,
transport transport.Transport,
username string,
host string,
) (
string, // discovered account domain
*url.URL, // discovered account URI
error,
) {
// Assemble target namestring for logging.
var target = "@" + username + "@" + host
b, err := transport.Finger(ctx, username, host)
if err != nil { if err != nil {
err = fmt.Errorf("fingerRemoteAccount: error fingering @%s@%s: %s", targetUsername, targetHost, err) err = gtserror.Newf("error webfingering %s: %w", target, err)
return return "", nil, err
} }
resp := &apimodel.WellKnownResponse{} var resp apimodel.WellKnownResponse
if err = json.Unmarshal(b, resp); err != nil { if err := json.Unmarshal(b, &resp); err != nil {
err = fmt.Errorf("fingerRemoteAccount: could not unmarshal server response as WebfingerAccountResponse while dereferencing @%s@%s: %s", targetUsername, targetHost, err) err = gtserror.Newf("error parsing response as JSON for %s: %w", target, err)
return return "", nil, err
} }
if len(resp.Links) == 0 { if len(resp.Links) == 0 {
err = fmt.Errorf("fingerRemoteAccount: no links found in webfinger response %s", string(b)) err = gtserror.Newf("no links found in response for %s", target)
return return "", nil, err
} }
if resp.Subject == "" { if resp.Subject == "" {
err = fmt.Errorf("fingerRemoteAccount: no subject found in webfinger response %s", string(b)) err = gtserror.Newf("no subject found in response for %s", target)
return return "", nil, err
} }
_, accountDomain, err = util.ExtractWebfingerParts(resp.Subject) _, accountDomain, err := util.ExtractWebfingerParts(resp.Subject)
if err != nil { if err != nil {
err = fmt.Errorf("fingerRemoteAccount: error extracting webfinger subject parts: %s", err) err = gtserror.Newf("error extracting subject parts for %s: %w", target, err)
return "", nil, err
} }
// look through the links for the first one that matches what we need // Look through links for the first
for _, l := range resp.Links { // one that matches what we need:
if l.Rel == "self" && (strings.EqualFold(l.Type, "application/activity+json") || strings.EqualFold(l.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"")) { //
if uri, thiserr := url.Parse(l.Href); thiserr == nil && (uri.Scheme == "http" || uri.Scheme == "https") { // - Must be self link.
// found it! // - Must be AP type.
accountURI = uri // - Valid https/http URI.
return for _, link := range resp.Links {
} if link.Rel != "self" {
} // Not self link, ignore.
continue
} }
return "", nil, errors.New("fingerRemoteAccount: no match found in webfinger response") if !strings.EqualFold(link.Type, "application/activity+json") &&
!strings.EqualFold(link.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"") {
// Not an AP type, ignore.
continue
}
uri, err := url.Parse(link.Href)
if err != nil {
log.Warnf(ctx,
"invalid href for ActivityPub self link %s for %s: %v",
link.Href, target, err,
)
// Funky URL, ignore.
continue
}
if uri.Scheme != "http" && uri.Scheme != "https" {
log.Warnf(ctx,
"invalid href for ActivityPub self link %s for %s: schema must be http or https",
link.Href, target,
)
// Can't handle this
// schema, ignore.
continue
}
// All looks good, return happily!
return accountDomain, uri, nil
}
return "", nil, gtserror.Newf("no suitable self, AP-type link found in webfinger response for %s", target)
} }

View file

@ -294,10 +294,30 @@ func (d *Dereferencer) enrichStatusSafely(
apubStatus, apubStatus,
) )
if gtserror.StatusCode(err) >= 400 { if code := gtserror.StatusCode(err); code >= 400 {
// Update fetch-at to slow re-attempts. // No matter what, log the error
// so instance admins have an idea
// why something isn't working.
log.Info(ctx, err)
if isNew {
// This was a new status enrich
// attempt which failed before we
// got to store it, so we can't
// return anything useful.
return nil, nil, isNew, err
}
// We had this status stored already
// before this enrichment attempt.
//
// Update fetched_at to slow re-attempts
// but don't return early. We can still
// return the model we had stored already.
status.FetchedAt = time.Now() status.FetchedAt = time.Now()
_ = d.state.DB.UpdateStatus(ctx, status, "fetched_at") if err := d.state.DB.UpdateStatus(ctx, status, "fetched_at"); err != nil {
log.Errorf(ctx, "error updating status fetched_at: %v", err)
}
} }
// Unlock now // Unlock now
@ -358,7 +378,7 @@ func (d *Dereferencer) enrichStatus(
// Dereference latest version of the status. // Dereference latest version of the status.
b, err := tsport.Dereference(ctx, uri) b, err := tsport.Dereference(ctx, uri)
if err != nil { if err != nil {
err := gtserror.Newf("error deferencing %s: %w", uri, err) err := gtserror.Newf("error dereferencing %s: %w", uri, err)
return nil, nil, gtserror.SetUnretrievable(err) return nil, nil, gtserror.SetUnretrievable(err)
} }
@ -388,16 +408,21 @@ func (d *Dereferencer) enrichStatus(
return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err) return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err)
} }
// Use existing status ID. // Check if we've previously
latestStatus.ID = status.ID // stored this status in the DB.
if latestStatus.ID == "" { // If we have, it'll be ID'd.
var isNew = (status.ID == "")
// Generate new status ID from the provided creation date. if isNew {
// No ID, we haven't stored this status before.
// Generate new status ID from the status publication time.
latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt) latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt)
if err != nil { if err != nil {
log.Errorf(ctx, "invalid created at date (falling back to 'now'): %v", err) log.Errorf(ctx, "invalid created at date (falling back to 'now'): %v", err)
latestStatus.ID = id.NewULID() // just use "now" latestStatus.ID = id.NewULID() // just use "now"
} }
} else {
// Reuse existing status ID.
latestStatus.ID = status.ID
} }
// Carry-over values and set fetch time. // Carry-over values and set fetch time.
@ -436,10 +461,7 @@ func (d *Dereferencer) enrichStatus(
return nil, nil, gtserror.Newf("error populating emojis for status %s: %w", uri, err) return nil, nil, gtserror.Newf("error populating emojis for status %s: %w", uri, err)
} }
if status.CreatedAt.IsZero() { if isNew {
// CreatedAt will be zero if no local copy was
// found in one of the GetStatusBy___() functions.
//
// This is new, put the status in the database. // This is new, put the status in the database.
err := d.state.DB.PutStatus(ctx, latestStatus) err := d.state.DB.PutStatus(ctx, latestStatus)
if err != nil { if err != nil {

View file

@ -96,6 +96,12 @@ func (a *Account) IsRemote() bool {
return !a.IsLocal() return !a.IsLocal()
} }
// IsNew returns whether an account is "new" in the sense
// that it has not been previously stored in the database.
func (a *Account) IsNew() bool {
return a.CreatedAt.IsZero()
}
// IsInstance returns whether account is an instance internal actor account. // IsInstance returns whether account is an instance internal actor account.
func (a *Account) IsInstance() bool { func (a *Account) IsInstance() bool {
if a.IsLocal() { if a.IsLocal() {

View file

@ -410,7 +410,6 @@ func (suite *DomainBlockTestSuite) TestBlockAndAllowDomain() {
}) })
} }
func (suite *DomainBlockTestSuite) TestAllowAndBlockDomain() { func (suite *DomainBlockTestSuite) TestAllowAndBlockDomain() {
const domain = "fossbros-anonymous.io" const domain = "fossbros-anonymous.io"

View file

@ -59,6 +59,8 @@ const (
userPathPrefix = `^/?` + users + `/(` + usernameRelaxed + `)` userPathPrefix = `^/?` + users + `/(` + usernameRelaxed + `)`
userPath = userPathPrefix + `$` userPath = userPathPrefix + `$`
userWebPathPrefix = `^/?` + `@(` + usernameRelaxed + `)`
userWebPath = userWebPathPrefix + `$`
publicKeyPath = userPathPrefix + `/` + publicKey + `$` publicKeyPath = userPathPrefix + `/` + publicKey + `$`
inboxPath = userPathPrefix + `/` + inbox + `$` inboxPath = userPathPrefix + `/` + inbox + `$`
outboxPath = userPathPrefix + `/` + outbox + `$` outboxPath = userPathPrefix + `/` + outbox + `$`
@ -110,6 +112,9 @@ var (
// UserPath validates and captures the username part from eg /users/example_username. // UserPath validates and captures the username part from eg /users/example_username.
UserPath = regexp.MustCompile(userPath) UserPath = regexp.MustCompile(userPath)
// UserWebPath validates and captures the username part from eg /@example_username.
UserWebPath = regexp.MustCompile(userWebPath)
// PublicKeyPath parses a path that validates and captures the username part from eg /users/example_username/main-key // PublicKeyPath parses a path that validates and captures the username part from eg /users/example_username/main-key
PublicKeyPath = regexp.MustCompile(publicKeyPath) PublicKeyPath = regexp.MustCompile(publicKeyPath)

View file

@ -77,6 +77,18 @@ func (c *Converter) ASRepresentationToAccount(ctx context.Context, accountable a
return nil, gtserror.SetMalformed(err) return nil, gtserror.SetMalformed(err)
} }
// Extract published time if possible.
//
// This denotes original creation time
// of the account on the remote instance.
//
// Not every implementation uses this property;
// so don't bother warning if we can't find it.
if pub := ap.GetPublished(accountable); !pub.IsZero() {
acct.CreatedAt = pub
acct.UpdatedAt = pub
}
// Extract a preferred name (display name), fallback to username. // Extract a preferred name (display name), fallback to username.
if displayName := ap.ExtractName(accountable); displayName != "" { if displayName := ap.ExtractName(accountable); displayName != "" {
acct.DisplayName = displayName acct.DisplayName = displayName
@ -300,7 +312,7 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
// status.Published // status.Published
// //
// Extract published time for the boost, // Extract published time for the status,
// zero-time will fall back to db defaults. // zero-time will fall back to db defaults.
if pub := ap.GetPublished(statusable); !pub.IsZero() { if pub := ap.GetPublished(statusable); !pub.IsZero() {
status.CreatedAt = pub status.CreatedAt = pub

View file

@ -146,6 +146,7 @@ func (suite *ASToInternalTestSuite) TestParseGargron() {
acct, err := suite.typeconverter.ASRepresentationToAccount(context.Background(), rep, "") acct, err := suite.typeconverter.ASRepresentationToAccount(context.Background(), rep, "")
suite.NoError(err) suite.NoError(err)
suite.Equal("https://mastodon.social/inbox", *acct.SharedInboxURI) suite.Equal("https://mastodon.social/inbox", *acct.SharedInboxURI)
suite.Equal(int64(1458086400), acct.CreatedAt.Unix())
} }
func (suite *ASToInternalTestSuite) TestParseReplyWithMention() { func (suite *ASToInternalTestSuite) TestParseReplyWithMention() {

View file

@ -279,6 +279,8 @@ const (
"url": "https://mastodon.social/@Gargron", "url": "https://mastodon.social/@Gargron",
"manuallyApprovesFollowers": false, "manuallyApprovesFollowers": false,
"discoverable": true, "discoverable": true,
"indexable": true,
"published": "2016-03-16T00:00:00Z",
"devices": "https://mastodon.social/users/Gargron/collections/devices", "devices": "https://mastodon.social/users/Gargron/collections/devices",
"alsoKnownAs": [ "alsoKnownAs": [
"https://tooting.ai/users/Gargron" "https://tooting.ai/users/Gargron"

View file

@ -248,6 +248,11 @@ func IsUserPath(id *url.URL) bool {
return regexes.UserPath.MatchString(id.Path) return regexes.UserPath.MatchString(id.Path)
} }
// IsUserWebPath returns true if the given URL path corresponds to eg /@example_username
func IsUserWebPath(id *url.URL) bool {
return regexes.UserWebPath.MatchString(id.Path)
}
// IsInboxPath returns true if the given URL path corresponds to eg /users/example_username/inbox // IsInboxPath returns true if the given URL path corresponds to eg /users/example_username/inbox
func IsInboxPath(id *url.URL) bool { func IsInboxPath(id *url.URL) bool {
return regexes.InboxPath.MatchString(id.Path) return regexes.InboxPath.MatchString(id.Path)
@ -326,6 +331,17 @@ func ParseUserPath(id *url.URL) (username string, err error) {
return return
} }
// ParseUserPath returns the username from a path such as /@example_username
func ParseUserWebPath(id *url.URL) (username string, err error) {
matches := regexes.UserWebPath.FindStringSubmatch(id.Path)
if len(matches) != 2 {
err = fmt.Errorf("expected 2 matches but matches length was %d", len(matches))
return
}
username = matches[1]
return
}
// ParseInboxPath returns the username from a path such as /users/example_username/inbox // ParseInboxPath returns the username from a path such as /users/example_username/inbox
func ParseInboxPath(id *url.URL) (username string, err error) { func ParseInboxPath(id *url.URL) (username string, err error) {
matches := regexes.InboxPath.FindStringSubmatch(id.Path) matches := regexes.InboxPath.FindStringSubmatch(id.Path)

View file

@ -23,101 +23,168 @@ import (
"strings" "strings"
"github.com/superseriousbusiness/gotosocial/internal/regexes" "github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/uris"
) )
// ExtractNamestringParts extracts the username test_user and // ExtractNamestringParts extracts the username test_user and
// the domain example.org from a string like @test_user@example.org. // the domain example.org from a string like @test_user@example.org.
// //
// If nothing is matched, it will return an error. // If nothing is matched, it will return an error.
func ExtractNamestringParts(mention string) (username, host string, err error) { func ExtractNamestringParts(namestring string) (username, host string, err error) {
matches := regexes.MentionName.FindStringSubmatch(mention) matches := regexes.MentionName.FindStringSubmatch(namestring)
switch len(matches) { switch len(matches) {
case 2: case 2:
return matches[1], "", nil return matches[1], "", nil
case 3: case 3:
return matches[1], matches[2], nil return matches[1], matches[2], nil
default: default:
return "", "", fmt.Errorf("couldn't match mention %s", mention) return "", "", fmt.Errorf("couldn't match namestring %s", namestring)
} }
} }
// ExtractWebfingerParts returns the username and domain from either an // ExtractWebfingerParts returns the username and domain from the "subject"
// account query or an actor URI. // part of a webfinger response: either an account namestring or an actor URI.
// //
// All implementations in the wild generate webfinger account resource // All AP implementations in the wild perform webfinger account resource
// queries with the "acct" scheme and without a leading "@"" on the username. // queries with the "acct" scheme and without a leading "@"" on the username.
// This is also the format the "subject" in a webfinger response adheres to. // This is also the format the "subject" in a webfinger response adheres to.
// //
// Despite this fact, we're being permissive about a single leading @. This // Despite this fact, we're permissive about a single leading @. This makes
// makes a query for acct:user@domain.tld and acct:@user@domain.tld // a query for "acct:user@domain.tld" and "acct:@user@domain.tld" equivalent.
// equivalent. But a query for acct:@@user@domain.tld will have its username
// returned with the @ prefix.
// //
// We also permit a resource of user@domain.tld or @user@domain.tld, without // We also permit a resource of "user@domain.tld" or "@user@domain.tld", without
// a scheme. In that case it gets interpreted as if it was using the "acct" // a scheme. In that case it gets interpreted as if it was using "acct:".
// scheme.
// //
// When parsing fails, an error is returned. // Will error if parsing fails, or if the extracted username or domain are empty.
func ExtractWebfingerParts(webfinger string) (username, host string, err error) { func ExtractWebfingerParts(subject string) (
orig := webfinger string, // username
string, // domain
u, oerr := url.ParseRequestURI(webfinger) error,
if oerr != nil { ) {
// Most likely reason for failing to parse is if the "acct" scheme was u, err := url.ParseRequestURI(subject)
// missing but a :port was included. So try an extra time with the scheme.
u, err = url.ParseRequestURI("acct:" + webfinger)
if err != nil { if err != nil {
return "", "", fmt.Errorf("failed to parse %s with acct sheme: %w", orig, oerr) // Most likely reason for failing to parse is if
// the "acct" scheme was missing but a :port was
// included. So try an extra time with the scheme.
u, err = url.ParseRequestURI("acct:" + subject)
} }
if err != nil {
return "", "", fmt.Errorf("failed to parse %s: %w", subject, err)
} }
if u.Scheme == "http" || u.Scheme == "https" { switch u.Scheme {
return ExtractWebfingerPartsFromURI(u)
// Subject looks like
// "https://example.org/users/whatever"
// or "https://example.org/@whatever".
case "http", "https":
return partsFromURI(u)
// Subject looks like
// "acct:whatever@example.org"
// or "acct:@whatever@example.org".
case "acct":
// Pass string without "acct:" prefix.
return partsFromNamestring(u.Opaque)
// Subject was probably a relative URL.
// Fail since we need the domain.
case "":
return "", "", fmt.Errorf("no scheme for resource %s", subject)
} }
if u.Scheme != "acct" { return "", "", fmt.Errorf("unsupported scheme %s for resource %s", u.Scheme, subject)
return "", "", fmt.Errorf("unsupported scheme: %s for resource: %s", u.Scheme, orig)
}
stripped := strings.TrimPrefix(u.Opaque, "@")
userDomain := strings.Split(stripped, "@")
if len(userDomain) != 2 {
return "", "", fmt.Errorf("failed to extract user and domain from: %s", orig)
}
return userDomain[0], userDomain[1], nil
} }
// ExtractWebfingerPartsFromURI returns the user and domain extracted from // partsFromNamestring returns the username
// the passed in URI. The URI should be an actor URI. // and host parts extracted from a passed-in actor
// namestring of the format "whatever@example.org".
// //
// The domain returned is the hostname, and the user will be extracted // The function returns an error if username or
// from either /@test_user or /users/test_user. These two paths match the // host cannot be extracted.
// "aliasses" we include in our webfinger response and are also present in func partsFromNamestring(namestring string) (
// our "links". string, // username
// string, // host
// Like with ExtractWebfingerParts, we're being permissive about a single error,
// leading @. ) {
// // Trim all leading "@" symbols,
// Errors are returned in case we end up with an empty domain or username. // and then inject just one "@".
func ExtractWebfingerPartsFromURI(uri *url.URL) (username, host string, err error) { namestring = strings.TrimLeft(namestring, "@")
host = uri.Host namestring = "@" + namestring
if host == "" {
return "", "", fmt.Errorf("failed to extract domain from: %s", uri) username, host, err := ExtractNamestringParts(namestring)
if err != nil {
return "", "", err
} }
// strip any leading slashes if username == "" {
path := strings.TrimLeft(uri.Path, "/") err := fmt.Errorf("failed to extract username from: %s", namestring)
segs := strings.Split(path, "/") return "", "", err
}
if host == "" {
err := fmt.Errorf("failed to extract domain from: %s", namestring)
return "", "", err
}
return username, host, nil
}
// partsFromURI returns the username and host
// extracted from the passed in actor URI.
//
// The username will be extracted from one of
// the patterns "/@whatever" or "/users/whatever".
// These paths match the "aliases" and "links"
// we include in our own webfinger responses.
//
// This function tries to be permissive with
// regard to leading "@" symbols. Nevertheless,
// an error will be returned if username or host
// cannot be extracted.
func partsFromURI(uri *url.URL) (
string, // username
string, // host
error,
) {
host := uri.Host
if host == "" {
err := fmt.Errorf("failed to extract domain from: %s", uri)
return "", "", err
}
// Copy the URL, taking
// only the parts we need.
short := &url.URL{
Path: uri.Path,
}
// Try "/users/whatever".
username, err := uris.ParseUserPath(short)
if err == nil && username != "" {
return username, host, nil
}
// Try "/@whatever"
username, err = uris.ParseUserWebPath(short)
if err == nil && username != "" {
return username, host, nil
}
// Try some exotic fallbacks like
// "/users/@whatever", "/@@whatever", etc.
short.Path = strings.TrimLeft(short.Path, "/")
segs := strings.Split(short.Path, "/")
if segs[0] == "users" { if segs[0] == "users" {
username = segs[1] username = segs[1]
} else { } else {
username = segs[0] username = segs[0]
} }
username = strings.TrimPrefix(username, "@") username = strings.TrimLeft(username, "@")
if username == "" { if username != "" {
return "", "", fmt.Errorf("failed to extract username from: %s", uri) return username, host, nil
} }
return return "", "", fmt.Errorf("failed to extract username from: %s", uri)
} }

View file

@ -18,7 +18,6 @@
package util_test package util_test
import ( import (
"net/url"
"testing" "testing"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
@ -39,11 +38,26 @@ func (suite *NamestringSuite) TestExtractWebfingerParts() {
{in: "stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, {in: "stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "stonerkitty.monster@stonerkitty.monster:8080", username: "stonerkitty.monster", domain: "stonerkitty.monster:8080"}, {in: "stonerkitty.monster@stonerkitty.monster:8080", username: "stonerkitty.monster", domain: "stonerkitty.monster:8080"},
{in: "@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, {in: "@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "acct:@@stonerkitty.monster@stonerkitty.monster", err: "failed to extract user and domain from: acct:@@stonerkitty.monster@stonerkitty.monster"}, {in: "acct:@@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "acct:@stonerkitty.monster@@stonerkitty.monster", err: "failed to extract user and domain from: acct:@stonerkitty.monster@@stonerkitty.monster"}, {in: "acct:@stonerkitty.monster@@stonerkitty.monster", err: "couldn't match namestring @stonerkitty.monster@@stonerkitty.monster"},
{in: "@@stonerkitty.monster@stonerkitty.monster", err: "failed to extract user and domain from: @@stonerkitty.monster@stonerkitty.monster"}, {in: "@@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "@stonerkitty.monster@@stonerkitty.monster", err: "failed to extract user and domain from: @stonerkitty.monster@@stonerkitty.monster"}, {in: "@stonerkitty.monster@@stonerkitty.monster", err: "couldn't match namestring @stonerkitty.monster@@stonerkitty.monster"},
{in: "s3:stonerkitty.monster@stonerkitty.monster", err: "unsupported scheme: s3 for resource: s3:stonerkitty.monster@stonerkitty.monster"}, {in: "s3:stonerkitty.monster@stonerkitty.monster", err: "unsupported scheme s3 for resource s3:stonerkitty.monster@stonerkitty.monster"},
{in: "https://stonerkitty.monster/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/users/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster:8080/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster:8080"},
{in: "https://stonerkitty.monster/users/stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "/@stonerkitty.monster", err: "no scheme for resource /@stonerkitty.monster"},
{in: "/users/stonerkitty.monster", err: "no scheme for resource /users/stonerkitty.monster"},
{in: "@stonerkitty.monster", err: "failed to extract domain from: @stonerkitty.monster"},
{in: "users/stonerkitty.monster", err: "couldn't match namestring @users/stonerkitty.monster"},
{in: "https://stonerkitty.monster/users/", err: "failed to extract username from: https://stonerkitty.monster/users/"},
{in: "https://stonerkitty.monster/users/@", err: "failed to extract username from: https://stonerkitty.monster/users/@"},
{in: "https://stonerkitty.monster/@", err: "failed to extract username from: https://stonerkitty.monster/@"},
{in: "https://stonerkitty.monster/", err: "failed to extract username from: https://stonerkitty.monster/"},
} }
for _, tt := range tests { for _, tt := range tests {
@ -56,45 +70,9 @@ func (suite *NamestringSuite) TestExtractWebfingerParts() {
suite.Equal(tt.username, username) suite.Equal(tt.username, username)
suite.Equal(tt.domain, domain) suite.Equal(tt.domain, domain)
} else { } else {
suite.EqualError(err, tt.err) if !suite.EqualError(err, tt.err) {
suite.T().Logf("expected error %s", tt.err)
} }
})
}
}
func (suite *NamestringSuite) TestExtractWebfingerPartsFromURI() {
tests := []struct {
in, username, domain, err string
}{
{in: "https://stonerkitty.monster/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/users/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@@stonerkitty.monster", username: "@stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster:8080/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster:8080"},
{in: "https://stonerkitty.monster/users/stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "/@stonerkitty.monster", err: "failed to extract domain from: /@stonerkitty.monster"},
{in: "/users/stonerkitty.monster", err: "failed to extract domain from: /users/stonerkitty.monster"},
{in: "@stonerkitty.monster", err: "failed to extract domain from: @stonerkitty.monster"},
{in: "users/stonerkitty.monster", err: "failed to extract domain from: users/stonerkitty.monster"},
{in: "https://stonerkitty.monster/users/", err: "failed to extract username from: https://stonerkitty.monster/users/"},
{in: "https://stonerkitty.monster/users/@", err: "failed to extract username from: https://stonerkitty.monster/users/@"},
{in: "https://stonerkitty.monster/@", err: "failed to extract username from: https://stonerkitty.monster/@"},
{in: "https://stonerkitty.monster/", err: "failed to extract username from: https://stonerkitty.monster/"},
}
for _, tt := range tests {
tt := tt
suite.Run(tt.in, func() {
suite.T().Parallel()
uri, _ := url.Parse(tt.in)
username, domain, err := util.ExtractWebfingerPartsFromURI(uri)
if tt.err == "" {
suite.NoError(err)
suite.Equal(tt.username, username)
suite.Equal(tt.domain, domain)
} else {
suite.EqualError(err, tt.err)
} }
}) })
} }
@ -107,7 +85,7 @@ func (suite *NamestringSuite) TestExtractNamestring() {
{in: "@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", host: "stonerkitty.monster"}, {in: "@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", host: "stonerkitty.monster"},
{in: "@stonerkitty.monster", username: "stonerkitty.monster"}, {in: "@stonerkitty.monster", username: "stonerkitty.monster"},
{in: "@someone@somewhere", username: "someone", host: "somewhere"}, {in: "@someone@somewhere", username: "someone", host: "somewhere"},
{in: "", err: "couldn't match mention "}, {in: "", err: "couldn't match namestring "},
} }
for _, tt := range tests { for _, tt := range tests {