gotosocial/internal/federation/dereferencing/status.go

430 lines
15 KiB
Go
Raw Normal View History

/*
GoToSocial
2021-12-20 17:42:19 +00:00
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dereferencing
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/url"
"strings"
"github.com/sirupsen/logrus"
2021-11-13 16:29:43 +00:00
"github.com/superseriousbusiness/activity/streams"
"github.com/superseriousbusiness/activity/streams/vocab"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
"github.com/superseriousbusiness/gotosocial/internal/media"
)
// EnrichRemoteStatus takes a status that's already been inserted into the database in a minimal form,
// and populates it with additional fields, media, etc.
//
// EnrichRemoteStatus is mostly useful for calling after a status has been initially created by
// the federatingDB's Create function, but additional dereferencing is needed on it.
func (d *deref) EnrichRemoteStatus(ctx context.Context, username string, status *gtsmodel.Status, includeParent bool) (*gtsmodel.Status, error) {
if err := d.populateStatusFields(ctx, status, username, includeParent); err != nil {
return nil, err
}
if err := d.db.UpdateByPrimaryKey(ctx, status); err != nil {
return nil, fmt.Errorf("EnrichRemoteStatus: error updating status: %s", err)
}
return status, nil
}
// GetRemoteStatus completely dereferences a remote status, converts it to a GtS model status,
// puts it in the database, and returns it to a caller.
//
// If refetch is true, then regardless of whether we have the original status in the database or not,
// the ap.Statusable representation of the status will be dereferenced and returned.
//
// If refetch is false, the ap.Statusable will only be returned if this is a new status, so callers
// should check whether or not this is nil.
//
// SIDE EFFECTS: remote status will be stored in the database, and the remote status owner will also be stored.
func (d *deref) GetRemoteStatus(ctx context.Context, username string, remoteStatusID *url.URL, refetch, includeParent bool) (*gtsmodel.Status, ap.Statusable, error) {
maybeStatus, err := d.db.GetStatusByURI(ctx, remoteStatusID.String())
if err == nil && !refetch {
// we already had the status and we aren't being asked to refetch the AP representation
return maybeStatus, nil, nil
}
statusable, err := d.dereferenceStatusable(ctx, username, remoteStatusID)
if err != nil {
return nil, nil, fmt.Errorf("GetRemoteStatus: error dereferencing statusable: %s", err)
}
if maybeStatus != nil && refetch {
// we already had the status and we've successfully fetched the AP representation as requested
return maybeStatus, statusable, nil
}
// from here on out we can consider this to be a 'new' status because we didn't have the status in the db already
accountURI, err := ap.ExtractAttributedTo(statusable)
if err != nil {
return nil, nil, fmt.Errorf("GetRemoteStatus: error extracting attributedTo: %s", err)
}
_, err = d.GetRemoteAccount(ctx, username, accountURI, true, false)
if err != nil {
return nil, nil, fmt.Errorf("GetRemoteStatus: couldn't get status author: %s", err)
}
gtsStatus, err := d.typeConverter.ASStatusToStatus(ctx, statusable)
if err != nil {
return nil, statusable, fmt.Errorf("GetRemoteStatus: error converting statusable to status: %s", err)
}
ulid, err := id.NewULIDFromTime(gtsStatus.CreatedAt)
if err != nil {
return nil, nil, fmt.Errorf("GetRemoteStatus: error generating new id for status: %s", err)
}
gtsStatus.ID = ulid
if err := d.populateStatusFields(ctx, gtsStatus, username, includeParent); err != nil {
return nil, nil, fmt.Errorf("GetRemoteStatus: error populating status fields: %s", err)
}
if err := d.db.PutStatus(ctx, gtsStatus); err != nil {
return nil, nil, fmt.Errorf("GetRemoteStatus: error putting new status: %s", err)
}
return gtsStatus, statusable, nil
}
func (d *deref) dereferenceStatusable(ctx context.Context, username string, remoteStatusID *url.URL) (ap.Statusable, error) {
if blocked, err := d.db.IsDomainBlocked(ctx, remoteStatusID.Host); blocked || err != nil {
return nil, fmt.Errorf("DereferenceStatusable: domain %s is blocked", remoteStatusID.Host)
}
transport, err := d.transportController.NewTransportForUsername(ctx, username)
if err != nil {
return nil, fmt.Errorf("DereferenceStatusable: transport err: %s", err)
}
b, err := transport.Dereference(ctx, remoteStatusID)
if err != nil {
return nil, fmt.Errorf("DereferenceStatusable: error deferencing %s: %s", remoteStatusID.String(), err)
}
m := make(map[string]interface{})
if err := json.Unmarshal(b, &m); err != nil {
return nil, fmt.Errorf("DereferenceStatusable: error unmarshalling bytes into json: %s", err)
}
t, err := streams.ToType(ctx, m)
if err != nil {
return nil, fmt.Errorf("DereferenceStatusable: error resolving json into ap vocab type: %s", err)
}
// Article, Document, Image, Video, Note, Page, Event, Place, Mention, Profile
switch t.GetTypeName() {
2021-08-31 13:59:12 +00:00
case ap.ObjectArticle:
p, ok := t.(vocab.ActivityStreamsArticle)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsArticle")
}
return p, nil
2021-08-31 13:59:12 +00:00
case ap.ObjectDocument:
p, ok := t.(vocab.ActivityStreamsDocument)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsDocument")
}
return p, nil
2021-08-31 13:59:12 +00:00
case ap.ObjectImage:
p, ok := t.(vocab.ActivityStreamsImage)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsImage")
}
return p, nil
2021-08-31 13:59:12 +00:00
case ap.ObjectVideo:
p, ok := t.(vocab.ActivityStreamsVideo)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsVideo")
}
return p, nil
2021-08-31 13:59:12 +00:00
case ap.ObjectNote:
p, ok := t.(vocab.ActivityStreamsNote)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsNote")
}
return p, nil
2021-08-31 13:59:12 +00:00
case ap.ObjectPage:
p, ok := t.(vocab.ActivityStreamsPage)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsPage")
}
return p, nil
2021-08-31 13:59:12 +00:00
case ap.ObjectEvent:
p, ok := t.(vocab.ActivityStreamsEvent)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsEvent")
}
return p, nil
2021-08-31 13:59:12 +00:00
case ap.ObjectPlace:
p, ok := t.(vocab.ActivityStreamsPlace)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsPlace")
}
return p, nil
2021-08-31 13:59:12 +00:00
case ap.ObjectProfile:
p, ok := t.(vocab.ActivityStreamsProfile)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsProfile")
}
return p, nil
}
return nil, fmt.Errorf("DereferenceStatusable: type name %s not supported", t.GetTypeName())
}
// populateStatusFields fetches all the information we temporarily pinned to an incoming
// federated status, back in the federating db's Create function.
//
// When a status comes in from the federation API, there are certain fields that
// haven't been dereferenced yet, because we needed to provide a snappy synchronous
// response to the caller. By the time it reaches this function though, it's being
// processed asynchronously, so we have all the time in the world to fetch the various
// bits and bobs that are attached to the status, and properly flesh it out, before we
// send the status to any timelines and notify people.
//
// Things to dereference and fetch here:
//
// 1. Media attachments.
// 2. Hashtags.
// 3. Emojis.
// 4. Mentions.
// 5. Replied-to-status.
//
// SIDE EFFECTS:
// This function will deference all of the above, insert them in the database as necessary,
// and attach them to the status. The status itself will not be added to the database yet,
// that's up the caller to do.
func (d *deref) populateStatusFields(ctx context.Context, status *gtsmodel.Status, requestingUsername string, includeParent bool) error {
l := logrus.WithFields(logrus.Fields{
"func": "dereferenceStatusFields",
"status": fmt.Sprintf("%+v", status),
})
l.Debug("entering function")
statusIRI, err := url.Parse(status.URI)
if err != nil {
return fmt.Errorf("populateStatusFields: couldn't parse status URI %s: %s", status.URI, err)
}
blocked, err := d.db.IsURIBlocked(ctx, statusIRI)
if err != nil {
return fmt.Errorf("populateStatusFields: error checking blocked status of %s: %s", statusIRI, err)
}
if blocked {
return fmt.Errorf("populateStatusFields: domain %s is blocked", statusIRI)
}
// in case the status doesn't have an id yet (ie., it hasn't entered the database yet), then create one
if status.ID == "" {
newID, err := id.NewULIDFromTime(status.CreatedAt)
if err != nil {
return fmt.Errorf("populateStatusFields: error creating ulid for status: %s", err)
}
status.ID = newID
}
// 1. Media attachments.
if err := d.populateStatusAttachments(ctx, status, requestingUsername); err != nil {
return fmt.Errorf("populateStatusFields: error populating status attachments: %s", err)
}
// 2. Hashtags
// TODO
// 3. Emojis
// TODO
// 4. Mentions
Improve GetRemoteStatus and db.GetStatus() logic (#174) * only fetch status parents / children if explicity requested when dereferencing Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Remove recursive DB GetStatus logic, don't fetch parent unless requested Signed-off-by: kim (grufwub) <grufwub@gmail.com> * StatusCache copies status so there are no thread-safety issues with modified status objects Signed-off-by: kim (grufwub) <grufwub@gmail.com> * remove sqlite test files Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix bugs introduced by previous commit Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix not continue on error in loop Signed-off-by: kim (grufwub) <grufwub@gmail.com> * use our own RunInTx implementation (possible fix for nested tx error) Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix cast statement to work with SQLite Signed-off-by: kim (grufwub) <grufwub@gmail.com> * be less strict about valid status in cache Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add cache=shared ALWAYS for SQLite db instances Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Fix EnrichRemoteAccount when updating account fails Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add nolint tag Signed-off-by: kim (grufwub) <grufwub@gmail.com> * ensure file: prefixes the filename in sqlite addr Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add an account cache, add status author account from db Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Fix incompatible SQLite query Signed-off-by: kim (grufwub) <grufwub@gmail.com> * *actually* use the new getAccount() function in accountsDB Signed-off-by: kim (grufwub) <grufwub@gmail.com> * update cache tests to use test suite Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add RelationshipTestSuite, add tests for methods with changed SQL Signed-off-by: kim (grufwub) <grufwub@gmail.com>
2021-09-01 09:08:21 +00:00
// TODO: do we need to handle removing empty mention objects and just using mention IDs slice?
if err := d.populateStatusMentions(ctx, status, requestingUsername); err != nil {
return fmt.Errorf("populateStatusFields: error populating status mentions: %s", err)
}
Improve GetRemoteStatus and db.GetStatus() logic (#174) * only fetch status parents / children if explicity requested when dereferencing Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Remove recursive DB GetStatus logic, don't fetch parent unless requested Signed-off-by: kim (grufwub) <grufwub@gmail.com> * StatusCache copies status so there are no thread-safety issues with modified status objects Signed-off-by: kim (grufwub) <grufwub@gmail.com> * remove sqlite test files Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix bugs introduced by previous commit Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix not continue on error in loop Signed-off-by: kim (grufwub) <grufwub@gmail.com> * use our own RunInTx implementation (possible fix for nested tx error) Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix cast statement to work with SQLite Signed-off-by: kim (grufwub) <grufwub@gmail.com> * be less strict about valid status in cache Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add cache=shared ALWAYS for SQLite db instances Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Fix EnrichRemoteAccount when updating account fails Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add nolint tag Signed-off-by: kim (grufwub) <grufwub@gmail.com> * ensure file: prefixes the filename in sqlite addr Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add an account cache, add status author account from db Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Fix incompatible SQLite query Signed-off-by: kim (grufwub) <grufwub@gmail.com> * *actually* use the new getAccount() function in accountsDB Signed-off-by: kim (grufwub) <grufwub@gmail.com> * update cache tests to use test suite Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add RelationshipTestSuite, add tests for methods with changed SQL Signed-off-by: kim (grufwub) <grufwub@gmail.com>
2021-09-01 09:08:21 +00:00
// 5. Replied-to-status (only if requested)
if includeParent {
if err := d.populateStatusRepliedTo(ctx, status, requestingUsername); err != nil {
return fmt.Errorf("populateStatusFields: error populating status repliedTo: %s", err)
}
}
return nil
}
func (d *deref) populateStatusMentions(ctx context.Context, status *gtsmodel.Status, requestingUsername string) error {
// At this point, mentions should have the namestring and mentionedAccountURI set on them.
// We can use these to find the accounts.
mentionIDs := []string{}
newMentions := []*gtsmodel.Mention{}
for _, m := range status.Mentions {
if m.ID != "" {
// we've already populated this mention, since it has an ID
logrus.Debug("populateStatusMentions: mention already populated")
mentionIDs = append(mentionIDs, m.ID)
newMentions = append(newMentions, m)
continue
}
if m.TargetAccountURI == "" {
logrus.Debug("populateStatusMentions: target URI not set on mention")
continue
}
targetAccountURI, err := url.Parse(m.TargetAccountURI)
if err != nil {
logrus.Debugf("populateStatusMentions: error parsing mentioned account uri %s: %s", m.TargetAccountURI, err)
continue
}
var targetAccount *gtsmodel.Account
errs := []string{}
// check if account is in the db already
if a, err := d.db.GetAccountByURI(ctx, targetAccountURI.String()); err != nil {
errs = append(errs, err.Error())
} else {
logrus.Debugf("populateStatusMentions: got target account %s with id %s through GetAccountByURI", targetAccountURI, a.ID)
targetAccount = a
}
if targetAccount == nil {
// we didn't find the account in our database already
// check if we can get the account remotely (dereference it)
2022-01-24 12:12:17 +00:00
if a, err := d.GetRemoteAccount(ctx, requestingUsername, targetAccountURI, false, false); err != nil {
errs = append(errs, err.Error())
} else {
logrus.Debugf("populateStatusMentions: got target account %s with id %s through GetRemoteAccount", targetAccountURI, a.ID)
targetAccount = a
}
}
if targetAccount == nil {
logrus.Debugf("populateStatusMentions: couldn't get target account %s: %s", m.TargetAccountURI, strings.Join(errs, " : "))
continue
}
mID, err := id.NewRandomULID()
if err != nil {
return fmt.Errorf("populateStatusMentions: error generating ulid: %s", err)
}
newMention := &gtsmodel.Mention{
ID: mID,
StatusID: status.ID,
Status: m.Status,
CreatedAt: status.CreatedAt,
UpdatedAt: status.UpdatedAt,
OriginAccountID: status.AccountID,
OriginAccountURI: status.AccountURI,
OriginAccount: status.Account,
TargetAccountID: targetAccount.ID,
TargetAccount: targetAccount,
NameString: m.NameString,
TargetAccountURI: targetAccount.URI,
TargetAccountURL: targetAccount.URL,
}
if err := d.db.Put(ctx, newMention); err != nil {
return fmt.Errorf("populateStatusMentions: error creating mention: %s", err)
}
mentionIDs = append(mentionIDs, newMention.ID)
newMentions = append(newMentions, newMention)
}
status.MentionIDs = mentionIDs
status.Mentions = newMentions
return nil
}
func (d *deref) populateStatusAttachments(ctx context.Context, status *gtsmodel.Status, requestingUsername string) error {
// At this point we should know:
// * the media type of the file we're looking for (a.File.ContentType)
// * the file type (a.Type)
// * the remote URL (a.RemoteURL)
// This should be enough to dereference the piece of media.
attachmentIDs := []string{}
attachments := []*gtsmodel.MediaAttachment{}
for _, a := range status.Attachments {
a.AccountID = status.AccountID
a.StatusID = status.ID
2022-01-24 12:12:17 +00:00
processingMedia, err := d.GetRemoteMedia(ctx, requestingUsername, a.AccountID, a.RemoteURL, &media.AdditionalMediaInfo{
CreatedAt: &a.CreatedAt,
StatusID: &a.StatusID,
RemoteURL: &a.RemoteURL,
Description: &a.Description,
Blurhash: &a.Blurhash,
})
if err != nil {
2022-01-08 16:17:01 +00:00
logrus.Errorf("populateStatusAttachments: couldn't get remote media %s: %s", a.RemoteURL, err)
continue
}
2022-01-24 12:12:17 +00:00
attachment, err := processingMedia.LoadAttachment(ctx)
2022-01-08 16:17:01 +00:00
if err != nil {
logrus.Errorf("populateStatusAttachments: couldn't load remote attachment %s: %s", a.RemoteURL, err)
Improve GetRemoteStatus and db.GetStatus() logic (#174) * only fetch status parents / children if explicity requested when dereferencing Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Remove recursive DB GetStatus logic, don't fetch parent unless requested Signed-off-by: kim (grufwub) <grufwub@gmail.com> * StatusCache copies status so there are no thread-safety issues with modified status objects Signed-off-by: kim (grufwub) <grufwub@gmail.com> * remove sqlite test files Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix bugs introduced by previous commit Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix not continue on error in loop Signed-off-by: kim (grufwub) <grufwub@gmail.com> * use our own RunInTx implementation (possible fix for nested tx error) Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix cast statement to work with SQLite Signed-off-by: kim (grufwub) <grufwub@gmail.com> * be less strict about valid status in cache Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add cache=shared ALWAYS for SQLite db instances Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Fix EnrichRemoteAccount when updating account fails Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add nolint tag Signed-off-by: kim (grufwub) <grufwub@gmail.com> * ensure file: prefixes the filename in sqlite addr Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add an account cache, add status author account from db Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Fix incompatible SQLite query Signed-off-by: kim (grufwub) <grufwub@gmail.com> * *actually* use the new getAccount() function in accountsDB Signed-off-by: kim (grufwub) <grufwub@gmail.com> * update cache tests to use test suite Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add RelationshipTestSuite, add tests for methods with changed SQL Signed-off-by: kim (grufwub) <grufwub@gmail.com>
2021-09-01 09:08:21 +00:00
continue
}
attachmentIDs = append(attachmentIDs, attachment.ID)
attachments = append(attachments, attachment)
}
status.AttachmentIDs = attachmentIDs
status.Attachments = attachments
return nil
}
func (d *deref) populateStatusRepliedTo(ctx context.Context, status *gtsmodel.Status, requestingUsername string) error {
if status.InReplyToURI != "" && status.InReplyToID == "" {
statusURI, err := url.Parse(status.InReplyToURI)
if err != nil {
return err
}
replyToStatus, _, err := d.GetRemoteStatus(ctx, requestingUsername, statusURI, false, false)
Improve GetRemoteStatus and db.GetStatus() logic (#174) * only fetch status parents / children if explicity requested when dereferencing Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Remove recursive DB GetStatus logic, don't fetch parent unless requested Signed-off-by: kim (grufwub) <grufwub@gmail.com> * StatusCache copies status so there are no thread-safety issues with modified status objects Signed-off-by: kim (grufwub) <grufwub@gmail.com> * remove sqlite test files Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix bugs introduced by previous commit Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix not continue on error in loop Signed-off-by: kim (grufwub) <grufwub@gmail.com> * use our own RunInTx implementation (possible fix for nested tx error) Signed-off-by: kim (grufwub) <grufwub@gmail.com> * fix cast statement to work with SQLite Signed-off-by: kim (grufwub) <grufwub@gmail.com> * be less strict about valid status in cache Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add cache=shared ALWAYS for SQLite db instances Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Fix EnrichRemoteAccount when updating account fails Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add nolint tag Signed-off-by: kim (grufwub) <grufwub@gmail.com> * ensure file: prefixes the filename in sqlite addr Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add an account cache, add status author account from db Signed-off-by: kim (grufwub) <grufwub@gmail.com> * Fix incompatible SQLite query Signed-off-by: kim (grufwub) <grufwub@gmail.com> * *actually* use the new getAccount() function in accountsDB Signed-off-by: kim (grufwub) <grufwub@gmail.com> * update cache tests to use test suite Signed-off-by: kim (grufwub) <grufwub@gmail.com> * add RelationshipTestSuite, add tests for methods with changed SQL Signed-off-by: kim (grufwub) <grufwub@gmail.com>
2021-09-01 09:08:21 +00:00
if err != nil {
return fmt.Errorf("populateStatusRepliedTo: couldn't get reply to status with uri %s: %s", status.InReplyToURI, err)
}
// we have the status
status.InReplyToID = replyToStatus.ID
status.InReplyTo = replyToStatus
status.InReplyToAccountID = replyToStatus.AccountID
status.InReplyToAccount = replyToStatus.Account
}
return nil
}