gotosocial/internal/federation/dereferencing/thread.go
kim 6c9d8e78eb
[feature] status refetch support (#1690)
* revamp http client to not limit requests, instead use sender worker

Signed-off-by: kim <grufwub@gmail.com>

* remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping

Signed-off-by: kim <grufwub@gmail.com>

* improve batch senders to keep popping recipients until remote URL found

Signed-off-by: kim <grufwub@gmail.com>

* fix recipient looping issue

Signed-off-by: kim <grufwub@gmail.com>

* move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP

Signed-off-by: kim <grufwub@gmail.com>

* first draft of status refetching logic

Signed-off-by: kim <grufwub@gmail.com>

* fix testrig to use new federation alloc func signature

Signed-off-by: kim <grufwub@gmail.com>

* fix log format directive

Signed-off-by: kim <grufwub@gmail.com>

* add status fetched_at migration

Signed-off-by: kim <grufwub@gmail.com>

* remove unused / unchecked for error types

Signed-off-by: kim <grufwub@gmail.com>

* add back the used type...

Signed-off-by: kim <grufwub@gmail.com>

* add separate internal getStatus() function for derefThread() that doesn't recurse

Signed-off-by: kim <grufwub@gmail.com>

* improved mention and media attachment error handling

Signed-off-by: kim <grufwub@gmail.com>

* fix log and error format directives

Signed-off-by: kim <grufwub@gmail.com>

* update account deref to match status deref changes

Signed-off-by: kim <grufwub@gmail.com>

* very small code formatting change to make things clearer

Signed-off-by: kim <grufwub@gmail.com>

* add more code comments

Signed-off-by: kim <grufwub@gmail.com>

* improved code commenting

Signed-off-by: kim <grufwub@gmail.com>

* only check for required further derefs if needed

Signed-off-by: kim <grufwub@gmail.com>

* improved cache invalidation

Signed-off-by: kim <grufwub@gmail.com>

* tweak cache restarting to use a (very small) backoff

Signed-off-by: kim <grufwub@gmail.com>

* small readability changes and fixes

Signed-off-by: kim <grufwub@gmail.com>

* fix account sync issues

Signed-off-by: kim <grufwub@gmail.com>

* fix merge conflicts + update account enrichment to accept already-passed accountable

Signed-off-by: kim <grufwub@gmail.com>

* remove secondary function declaration

Signed-off-by: kim <grufwub@gmail.com>

* normalise dereferencer get status / account behaviour, fix remaining tests

Signed-off-by: kim <grufwub@gmail.com>

* fix remaining rebase conflicts, finish commenting code

Signed-off-by: kim <grufwub@gmail.com>

* appease the linter

Signed-off-by: kim <grufwub@gmail.com>

* add source file header

Signed-off-by: kim <grufwub@gmail.com>

* update to use TIMESTAMPTZ column type instead of just TIMESTAMP

Signed-off-by: kim <grufwub@gmail.com>

* don't pass in 'updated_at' to UpdateEmoji()

Signed-off-by: kim <grufwub@gmail.com>

* use new ap.Resolve{Account,Status}able() functions

Signed-off-by: kim <grufwub@gmail.com>

* remove the somewhat confusing rescoping of the same variable names

Signed-off-by: kim <grufwub@gmail.com>

* update migration file name, improved database delete error returns

Signed-off-by: kim <grufwub@gmail.com>

* formatting

Signed-off-by: kim <grufwub@gmail.com>

* improved multi-delete database functions to minimise DB calls

Signed-off-by: kim <grufwub@gmail.com>

* remove unused type

Signed-off-by: kim <grufwub@gmail.com>

* fix delete statements

Signed-off-by: kim <grufwub@gmail.com>

---------

Signed-off-by: kim <grufwub@gmail.com>
2023-05-12 11:15:54 +02:00

316 lines
8.8 KiB
Go

// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package dereferencing
import (
"context"
"fmt"
"net/url"
"codeberg.org/gruf/go-kv"
"github.com/superseriousbusiness/activity/streams/vocab"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
// maxIter defines how many iterations of descendants or
// ancesters we are willing to follow before returning error.
const maxIter = 1000
// dereferenceThread will dereference statuses both above and below the given status in a thread, it returns no error and is intended to be called asychronously.
func (d *deref) dereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) {
// Ensure that ancestors have been fully dereferenced
if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil {
log.Errorf(ctx, "error dereferencing status ancestors: %v", err)
}
// Ensure that descendants have been fully dereferenced
if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil {
log.Errorf(ctx, "error dereferencing status descendants: %v", err)
}
}
// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way.
func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error {
// Take ref to original
ogIRI := status.URI
// Start log entry with fields
l := log.WithContext(ctx).
WithFields(kv.Fields{
{"username", username},
{"statusIRI", ogIRI},
}...)
// Log function start
l.Trace("beginning")
for i := 0; i < maxIter; i++ {
if status.InReplyToURI == "" {
// status doesn't reply to anything
return nil
}
// Parse this status's replied IRI
replyIRI, err := url.Parse(status.InReplyToURI)
if err != nil {
return fmt.Errorf("invalid status InReplyToURI %q: %w", status.InReplyToURI, err)
}
if replyIRI.Host == config.GetHost() {
l.Tracef("following local status ancestors: %s", status.InReplyToURI)
// This is our status, extract ID from path
_, id, err := uris.ParseStatusesPath(replyIRI)
if err != nil {
return fmt.Errorf("invalid local status IRI %q: %w", status.InReplyToURI, err)
}
// Fetch this status from the database
localStatus, err := d.state.DB.GetStatusByID(ctx, id)
if err != nil {
return fmt.Errorf("error fetching local status %q: %w", id, err)
}
// Set the fetched status
status = localStatus
} else {
l.Tracef("following remote status ancestors: %s", status.InReplyToURI)
// Fetch the remote status found at this IRI
remoteStatus, _, err := d.getStatusByURI(ctx,
username,
replyIRI,
)
if err != nil {
return fmt.Errorf("error fetching remote status %q: %w", status.InReplyToURI, err)
}
// Set the fetched status
status = remoteStatus
}
}
return fmt.Errorf("reached %d ancestor iterations for %q", maxIter, ogIRI)
}
func (d *deref) dereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
// Take ref to original
ogIRI := statusIRI
// Start log entry with fields
l := log.WithContext(ctx).
WithFields(kv.Fields{
{"username", username},
{"statusIRI", ogIRI},
}...)
// Log function start
l.Trace("beginning")
// frame represents a single stack frame when iteratively
// dereferencing status descendants. where statusIRI and
// statusable are of the status whose children we are to
// descend, page is the current activity streams collection
// page of entities we are on (as we often push a frame to
// stack mid-paging), and item___ are entity iterators for
// this activity streams collection page.
type frame struct {
statusIRI *url.URL
statusable ap.Statusable
page ap.CollectionPageable
itemIter vocab.ActivityStreamsItemsPropertyIterator
}
var (
// current is the current stack frame
current *frame
// stack is a list of "shelved" descendand iterator
// frames. this is pushed to when a child status frame
// is found that we need to further iterate down, and
// popped from into 'current' when that child's tree
// of further descendants is exhausted.
stack = []*frame{
{
// Starting input is first frame
statusIRI: statusIRI,
statusable: parent,
},
}
// popStack will remove and return the top frame
// from the stack, or nil if currently empty.
popStack = func() *frame {
if len(stack) == 0 {
return nil
}
// Get frame index
idx := len(stack) - 1
// Pop last frame
frame := stack[idx]
stack = stack[:idx]
return frame
}
)
stackLoop:
for i := 0; i < maxIter; i++ {
// Pop next frame, nil means we are at end
if current = popStack(); current == nil {
return nil
}
if current.page == nil {
// This is a local status, no looping to do
if current.statusIRI.Host == config.GetHost() {
continue stackLoop
}
l.Tracef("following remote status descendants: %s", current.statusIRI)
// Look for an attached status replies (as collection)
replies := current.statusable.GetActivityStreamsReplies()
if replies == nil {
continue stackLoop
}
// Get the status replies collection
collection := replies.GetActivityStreamsCollection()
if collection == nil {
continue stackLoop
}
// Get the "first" property of the replies collection
first := collection.GetActivityStreamsFirst()
if first == nil {
continue stackLoop
}
// Set the first activity stream collection page
current.page = first.GetActivityStreamsCollectionPage()
if current.page == nil {
continue stackLoop
}
}
pageLoop:
for {
if current.itemIter == nil {
// Get the items associated with this page
items := current.page.GetActivityStreamsItems()
if items == nil {
continue stackLoop
}
// Start off the item iterator
current.itemIter = items.Begin()
if current.itemIter == nil {
continue stackLoop
}
}
itemLoop:
for {
var itemIRI *url.URL
// Get next item iterator object
current.itemIter = current.itemIter.Next()
if current.itemIter == nil {
break itemLoop
}
if iri := current.itemIter.GetIRI(); iri != nil {
// Item is already an IRI type
itemIRI = iri
} else if note := current.itemIter.GetActivityStreamsNote(); note != nil {
// Item is a note, fetch the note ID IRI
if id := note.GetJSONLDId(); id != nil {
itemIRI = id.GetIRI()
}
}
if itemIRI == nil {
// Unusable iter object
continue itemLoop
}
if itemIRI.Host == config.GetHost() {
// This child is one of ours,
continue itemLoop
}
// Dereference the remote status and store in the database.
_, statusable, err := d.getStatusByURI(ctx, username, itemIRI)
if err != nil {
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
continue itemLoop
}
if statusable == nil {
// Already up-to-date.
continue itemLoop
}
// Put current and next frame at top of stack
stack = append(stack, current, &frame{
statusIRI: itemIRI,
statusable: statusable,
})
// Now start at top of loop
continue stackLoop
}
// Get the current page's "next" property
pageNext := current.page.GetActivityStreamsNext()
if pageNext == nil {
continue stackLoop
}
// Get the "next" page property IRI
pageNextIRI := pageNext.GetIRI()
if pageNextIRI == nil {
continue stackLoop
}
// Dereference this next collection page by its IRI
collectionPage, err := d.dereferenceCollectionPage(ctx,
username,
pageNextIRI,
)
if err != nil {
l.Errorf("error dereferencing remote collection page %q: %s", pageNextIRI.String(), err)
continue stackLoop
}
// Set the updated collection page
current.page = collectionPage
continue pageLoop
}
}
return fmt.Errorf("reached %d descendant iterations for %q", maxIter, ogIRI.String())
}