gotosocial/internal/processing/search.go
kim 6c9d8e78eb
[feature] status refetch support (#1690)
* revamp http client to not limit requests, instead use sender worker

Signed-off-by: kim <grufwub@gmail.com>

* remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping

Signed-off-by: kim <grufwub@gmail.com>

* improve batch senders to keep popping recipients until remote URL found

Signed-off-by: kim <grufwub@gmail.com>

* fix recipient looping issue

Signed-off-by: kim <grufwub@gmail.com>

* move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP

Signed-off-by: kim <grufwub@gmail.com>

* first draft of status refetching logic

Signed-off-by: kim <grufwub@gmail.com>

* fix testrig to use new federation alloc func signature

Signed-off-by: kim <grufwub@gmail.com>

* fix log format directive

Signed-off-by: kim <grufwub@gmail.com>

* add status fetched_at migration

Signed-off-by: kim <grufwub@gmail.com>

* remove unused / unchecked for error types

Signed-off-by: kim <grufwub@gmail.com>

* add back the used type...

Signed-off-by: kim <grufwub@gmail.com>

* add separate internal getStatus() function for derefThread() that doesn't recurse

Signed-off-by: kim <grufwub@gmail.com>

* improved mention and media attachment error handling

Signed-off-by: kim <grufwub@gmail.com>

* fix log and error format directives

Signed-off-by: kim <grufwub@gmail.com>

* update account deref to match status deref changes

Signed-off-by: kim <grufwub@gmail.com>

* very small code formatting change to make things clearer

Signed-off-by: kim <grufwub@gmail.com>

* add more code comments

Signed-off-by: kim <grufwub@gmail.com>

* improved code commenting

Signed-off-by: kim <grufwub@gmail.com>

* only check for required further derefs if needed

Signed-off-by: kim <grufwub@gmail.com>

* improved cache invalidation

Signed-off-by: kim <grufwub@gmail.com>

* tweak cache restarting to use a (very small) backoff

Signed-off-by: kim <grufwub@gmail.com>

* small readability changes and fixes

Signed-off-by: kim <grufwub@gmail.com>

* fix account sync issues

Signed-off-by: kim <grufwub@gmail.com>

* fix merge conflicts + update account enrichment to accept already-passed accountable

Signed-off-by: kim <grufwub@gmail.com>

* remove secondary function declaration

Signed-off-by: kim <grufwub@gmail.com>

* normalise dereferencer get status / account behaviour, fix remaining tests

Signed-off-by: kim <grufwub@gmail.com>

* fix remaining rebase conflicts, finish commenting code

Signed-off-by: kim <grufwub@gmail.com>

* appease the linter

Signed-off-by: kim <grufwub@gmail.com>

* add source file header

Signed-off-by: kim <grufwub@gmail.com>

* update to use TIMESTAMPTZ column type instead of just TIMESTAMP

Signed-off-by: kim <grufwub@gmail.com>

* don't pass in 'updated_at' to UpdateEmoji()

Signed-off-by: kim <grufwub@gmail.com>

* use new ap.Resolve{Account,Status}able() functions

Signed-off-by: kim <grufwub@gmail.com>

* remove the somewhat confusing rescoping of the same variable names

Signed-off-by: kim <grufwub@gmail.com>

* update migration file name, improved database delete error returns

Signed-off-by: kim <grufwub@gmail.com>

* formatting

Signed-off-by: kim <grufwub@gmail.com>

* improved multi-delete database functions to minimise DB calls

Signed-off-by: kim <grufwub@gmail.com>

* remove unused type

Signed-off-by: kim <grufwub@gmail.com>

* fix delete statements

Signed-off-by: kim <grufwub@gmail.com>

---------

Signed-off-by: kim <grufwub@gmail.com>
2023-05-12 11:15:54 +02:00

295 lines
10 KiB
Go

// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package processing
import (
"context"
"errors"
"fmt"
"net/url"
"strings"
"codeberg.org/gruf/go-kv"
"github.com/superseriousbusiness/gotosocial/internal/ap"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/federation/dereferencing"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/oauth"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
// Implementation note: in this function, we tend to log errors
// at debug level rather than return them. This is because the
// search has a sort of fallthrough logic: if we can't get a result
// with x search, we should try with y search rather than returning.
//
// If we get to the end and still haven't found anything, even then
// we shouldn't return an error, just return an empty search result.
//
// The only exception to this is when we get a malformed query, in
// which case we return a bad request error so the user knows they
// did something funky.
func (p *Processor) SearchGet(ctx context.Context, authed *oauth.Auth, search *apimodel.SearchQuery) (*apimodel.SearchResult, gtserror.WithCode) {
// tidy up the query and make sure it wasn't just spaces
query := strings.TrimSpace(search.Query)
if query == "" {
err := errors.New("search query was empty string after trimming space")
return nil, gtserror.NewErrorBadRequest(err, err.Error())
}
l := log.WithContext(ctx).
WithFields(kv.Fields{{"query", query}}...)
searchResult := &apimodel.SearchResult{
Accounts: []apimodel.Account{},
Statuses: []apimodel.Status{},
Hashtags: []apimodel.Tag{},
}
// currently the search will only ever return one result,
// so return nothing if the offset is greater than 0
if search.Offset > 0 {
return searchResult, nil
}
foundAccounts := []*gtsmodel.Account{}
foundStatuses := []*gtsmodel.Status{}
var foundOne bool
/*
SEARCH BY MENTION
check if the query is something like @whatever_username@example.org -- this means it's likely a remote account
*/
maybeNamestring := query
if maybeNamestring[0] != '@' {
maybeNamestring = "@" + maybeNamestring
}
if username, domain, err := util.ExtractNamestringParts(maybeNamestring); err == nil {
l.Trace("search term is a mention, looking it up...")
blocked, err := p.state.DB.IsDomainBlocked(ctx, domain)
if err != nil {
return nil, gtserror.NewErrorInternalError(fmt.Errorf("error checking domain block: %w", err))
}
if blocked {
l.Debug("domain is blocked")
return searchResult, nil
}
foundAccount, err := p.searchAccountByUsernameDomain(ctx, authed, username, domain, search.Resolve)
if err != nil {
var errNotRetrievable *dereferencing.ErrNotRetrievable
if !errors.As(err, &errNotRetrievable) {
// return a proper error only if it wasn't just not retrievable
return nil, gtserror.NewErrorInternalError(fmt.Errorf("error looking up account: %w", err))
}
return searchResult, nil
}
foundAccounts = append(foundAccounts, foundAccount)
foundOne = true
l.Trace("got an account by searching by mention")
}
/*
SEARCH BY URI
check if the query is a URI with a recognizable scheme and dereference it
*/
if !foundOne {
if uri, err := url.Parse(query); err == nil {
if uri.Scheme == "https" || uri.Scheme == "http" {
l.Trace("search term is a uri, looking it up...")
blocked, err := p.state.DB.IsURIBlocked(ctx, uri)
if err != nil {
return nil, gtserror.NewErrorInternalError(fmt.Errorf("error checking domain block: %w", err))
}
if blocked {
l.Debug("domain is blocked")
return searchResult, nil
}
// check if it's a status...
foundStatus, err := p.searchStatusByURI(ctx, authed, uri)
if err != nil {
// Check for semi-expected error types.
var (
errNotRetrievable *dereferencing.ErrNotRetrievable
errWrongType *ap.ErrWrongType
)
if !errors.As(err, &errNotRetrievable) && !errors.As(err, &errWrongType) {
return nil, gtserror.NewErrorInternalError(fmt.Errorf("error looking up status: %w", err))
}
} else {
foundStatuses = append(foundStatuses, foundStatus)
foundOne = true
l.Trace("got a status by searching by URI")
}
// ... or an account
if !foundOne {
foundAccount, err := p.searchAccountByURI(ctx, authed, uri, search.Resolve)
if err != nil {
// Check for semi-expected error types.
var (
errNotRetrievable *dereferencing.ErrNotRetrievable
errWrongType *ap.ErrWrongType
)
if !errors.As(err, &errNotRetrievable) && !errors.As(err, &errWrongType) {
return nil, gtserror.NewErrorInternalError(fmt.Errorf("error looking up account: %w", err))
}
} else {
foundAccounts = append(foundAccounts, foundAccount)
foundOne = true
l.Trace("got an account by searching by URI")
}
}
}
}
}
if !foundOne {
// we got nothing, we can return early
l.Trace("found nothing, returning")
return searchResult, nil
}
/*
FROM HERE ON we have our search results, it's just a matter of filtering them according to what this user is allowed to see,
and then converting them into our frontend format.
*/
for _, foundAccount := range foundAccounts {
// make sure there's no block in either direction between the account and the requester
blocked, err := p.state.DB.IsEitherBlocked(ctx, authed.Account.ID, foundAccount.ID)
if err != nil {
err = fmt.Errorf("SearchGet: error checking block between %s and %s: %s", authed.Account.ID, foundAccount.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
if blocked {
l.Tracef("block exists between %s and %s, skipping this result", authed.Account.ID, foundAccount.ID)
continue
}
apiAcct, err := p.tc.AccountToAPIAccountPublic(ctx, foundAccount)
if err != nil {
err = fmt.Errorf("SearchGet: error converting account %s to api account: %s", foundAccount.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
searchResult.Accounts = append(searchResult.Accounts, *apiAcct)
}
for _, foundStatus := range foundStatuses {
// make sure each found status is visible to the requester
visible, err := p.filter.StatusVisible(ctx, authed.Account, foundStatus)
if err != nil {
err = fmt.Errorf("SearchGet: error checking visibility of status %s for account %s: %s", foundStatus.ID, authed.Account.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
if !visible {
l.Tracef("status %s is not visible to account %s, skipping this result", foundStatus.ID, authed.Account.ID)
continue
}
apiStatus, err := p.tc.StatusToAPIStatus(ctx, foundStatus, authed.Account)
if err != nil {
err = fmt.Errorf("SearchGet: error converting status %s to api status: %s", foundStatus.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
searchResult.Statuses = append(searchResult.Statuses, *apiStatus)
}
return searchResult, nil
}
func (p *Processor) searchStatusByURI(ctx context.Context, authed *oauth.Auth, uri *url.URL) (*gtsmodel.Status, error) {
status, _, err := p.federator.GetStatusByURI(gtscontext.SetFastFail(ctx), authed.Account.Username, uri)
return status, err
}
func (p *Processor) searchAccountByURI(ctx context.Context, authed *oauth.Auth, uri *url.URL, resolve bool) (*gtsmodel.Account, error) {
if !resolve {
var (
account *gtsmodel.Account
err error
uriStr = uri.String()
)
// Search the database for existing account with ID URI.
account, err = p.state.DB.GetAccountByURI(ctx, uriStr)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, fmt.Errorf("searchAccountByURI: error checking database for account %s: %w", uriStr, err)
}
if account == nil {
// Else, search the database for existing by ID URL.
account, err = p.state.DB.GetAccountByURL(ctx, uriStr)
if err != nil {
if !errors.Is(err, db.ErrNoEntries) {
return nil, fmt.Errorf("searchAccountByURI: error checking database for account %s: %w", uriStr, err)
}
return nil, dereferencing.NewErrNotRetrievable(err)
}
}
return account, nil
}
account, _, err := p.federator.GetAccountByURI(
gtscontext.SetFastFail(ctx),
authed.Account.Username,
uri,
)
return account, err
}
func (p *Processor) searchAccountByUsernameDomain(ctx context.Context, authed *oauth.Auth, username string, domain string, resolve bool) (*gtsmodel.Account, error) {
if !resolve {
if domain == config.GetHost() || domain == config.GetAccountDomain() {
// We do local lookups using an empty domain,
// else it will fail the db search below.
domain = ""
}
// Search the database for existing account with USERNAME@DOMAIN
account, err := p.state.DB.GetAccountByUsernameDomain(ctx, username, domain)
if err != nil {
if !errors.Is(err, db.ErrNoEntries) {
return nil, fmt.Errorf("searchAccountByUsernameDomain: error checking database for account %s@%s: %w", username, domain, err)
}
return nil, dereferencing.NewErrNotRetrievable(err)
}
return account, nil
}
account, _, err := p.federator.GetAccountByUsernameDomain(
gtscontext.SetFastFail(ctx),
authed.Account.Username,
username, domain,
)
return account, err
}