// GoToSocial // Copyright (C) GoToSocial Authors admin@gotosocial.org // SPDX-License-Identifier: AGPL-3.0-or-later // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package spam import ( "context" "errors" "net/url" "slices" "strings" "github.com/miekg/dns" "github.com/superseriousbusiness/gotosocial/internal/ap" "github.com/superseriousbusiness/gotosocial/internal/config" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtscontext" "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/regexes" "github.com/superseriousbusiness/gotosocial/internal/util" ) // preppedMention represents a partially-parsed // mention, prepared for spam checking purposes. type preppedMention struct { *gtsmodel.Mention uri *url.URL domain string user string local bool } // StatusableOK returns no error if the given statusable looks OK, // ie., relevant to the receiver, and not spam. // // This should only be used for Creates of statusables, NOT Announces! // // If the statusable does not pass relevancy or spam checks, either // a Spam or NotRelevant error will be returned. Callers should use // gtserror.IsSpam() and gtserror.IsNotRelevant() to check for this. // // If the returned error is not nil, but neither Spam or NotRelevant, // then it's an actual database error. // // The decision is made based on the following heuristics, in order: // // 1. Receiver follow requester. Return nil. // 2. Statusable doesn't mention receiver. Return NotRelevant. // // If instance-federation-spam-filter = false, then return nil now. // Otherwise check: // // 3. Receiver is locked and is followed by requester. Return nil. // 4. Five or more people are mentioned. Return Spam. // 5. Receiver follow (requests) a mentioned account. Return nil. // 6. Statusable has a media attachment. Return Spam. // 7. Statusable contains non-mention, non-hashtag links. Return Spam. func (f *Filter) StatusableOK( ctx context.Context, receiver *gtsmodel.Account, requester *gtsmodel.Account, statusable ap.Statusable, ) error { // HEURISTIC 1: Check whether receiving account follows the requesting account. // If so, we know it's OK and don't need to do any other checks. follows, err := f.state.DB.IsFollowing(ctx, receiver.ID, requester.ID) if err != nil { return gtserror.Newf("db error checking follow status: %w", err) } if follows { // Looks fine. return nil } // HEURISTIC 2: Check whether statusable mentions the // receiver. If not, we don't want to process this message. rawMentions, _ := ap.ExtractMentions(statusable) mentions := prepMentions(ctx, rawMentions) mentioned := f.isMentioned(ctx, receiver, mentions) if !mentioned { // This is a random message fired // into our inbox, just drop it. err := errors.New("receiver does not follow requester, and is not mentioned") return gtserror.SetNotRelevant(err) } // Receiver is mentioned, but not by someone // they follow. Check if we need to do more // granular spam filtering. if !config.GetInstanceFederationSpamFilter() { // Filter is not enabled, allow it // through without further checks. return nil } // More granular spam filtering time! // // HEURISTIC 3: Does requester follow locked receiver? followedBy, err := f.lockedFollowedBy(ctx, receiver, requester) if err != nil { return gtserror.Newf("db error checking follow status: %w", err) } // If receiver is locked, and is followed // by requester, this likely means they're // interested in the message. Allow it. if followedBy { return nil } // HEURISTIC 4: How many people are mentioned? // If it's 5 or more we can assume this is spam. mentionsLen := len(mentions) if mentionsLen >= 5 { err := errors.New("status mentions 5 or more people") return gtserror.SetSpam(err) } // HEURISTIC 5: Four or fewer people are mentioned, // do we follow (request) at least one of them? // If so, we're probably interested in the message. knowsOne := f.knowsOneMentioned(ctx, receiver, mentions) if knowsOne { return nil } // HEURISTIC 6: Are there any media attachments? attachments, _ := ap.ExtractAttachments(statusable) hasAttachments := len(attachments) != 0 if hasAttachments { err := errors.New("status has attachment(s)") return gtserror.SetSpam(err) } // HEURISTIC 7: Are there any links in the post // aside from mentions and hashtags? Include the // summary/content warning when checking. hashtags, _ := ap.ExtractHashtags(statusable) hasErrantLinks := f.errantLinks(ctx, statusable, mentions, hashtags) if hasErrantLinks { err := errors.New("status has one or more non-mention, non-hashtag links") return gtserror.SetSpam(err) } // Looks OK. return nil } // prepMentions prepares a slice of mentions // for spam checking by parsing out the namestring // and targetAccountURI values, if present. func prepMentions( ctx context.Context, mentions []*gtsmodel.Mention, ) []preppedMention { var ( host = config.GetHost() accountDomain = config.GetAccountDomain() ) parsedMentions := make([]preppedMention, 0, len(mentions)) for _, mention := range mentions { // Start by just embedding // the original mention. parsedMention := preppedMention{ Mention: mention, } // Try to parse namestring if present. if mention.NameString != "" { user, domain, err := util.ExtractNamestringParts(mention.NameString) if err != nil { // Malformed mention, // just log + ignore. log.Debugf(ctx, "malformed mention namestring: %v", err, ) continue } parsedMention.domain = domain parsedMention.user = user } // Try to parse URI if present. if mention.TargetAccountURI != "" { targetURI, err := url.Parse(mention.TargetAccountURI) if err != nil { // Malformed mention, // just log + ignore. log.Debugf(ctx, "malformed mention uri: %v", err, ) continue } parsedMention.uri = targetURI // Set host from targetURI if // it wasn't set by namestring. if parsedMention.domain == "" { parsedMention.domain = targetURI.Host } } // It's a mention of a local account if the target host is us. parsedMention.local = parsedMention.domain == host || parsedMention.domain == accountDomain // Done with this one. parsedMentions = append(parsedMentions, parsedMention) } return parsedMentions } // isMentioned returns true if the // receiver is targeted by at least // one of the given mentions. func (f *Filter) isMentioned( ctx context.Context, receiver *gtsmodel.Account, mentions []preppedMention, ) bool { return slices.ContainsFunc( mentions, func(mention preppedMention) bool { // Check if receiver mentioned by URI. if accURI := mention.TargetAccountURI; accURI != "" && (accURI == receiver.URI || accURI == receiver.URL) { return true } // Check if receiver mentioned by namestring. if mention.local && strings.EqualFold(mention.user, receiver.Username) { return true } // Mention doesn't // target receiver. return false }, ) } // lockedFollowedBy returns true // if receiver account is locked, // and requester follows receiver. func (f *Filter) lockedFollowedBy( ctx context.Context, receiver *gtsmodel.Account, requester *gtsmodel.Account, ) (bool, error) { // If receiver is not locked, // return early to avoid a db call. if !*receiver.Locked { return false, nil } return f.state.DB.IsFollowing(ctx, requester.ID, receiver.ID) } // knowsOneMentioned returns true if the // receiver follows or has follow requested // at least one of the mentioned accounts. func (f *Filter) knowsOneMentioned( ctx context.Context, receiver *gtsmodel.Account, mentions []preppedMention, ) bool { return slices.ContainsFunc( mentions, func(mention preppedMention) bool { var ( acc *gtsmodel.Account err error ) // Try to get target account without // dereffing. After all, if they're not // in our db we definitely don't know them. if mention.TargetAccountURI != "" { acc, err = f.state.DB.GetAccountByURI( gtscontext.SetBarebones(ctx), mention.TargetAccountURI, ) } else if mention.user != "" { acc, err = f.state.DB.GetAccountByUsernameDomain( gtscontext.SetBarebones(ctx), mention.user, mention.domain, ) } if err != nil && !errors.Is(err, db.ErrNoEntries) { // Proper error. log.Errorf(ctx, "db error getting mentioned account: %v", err) return false } if acc == nil { // We don't know this nerd! return false } if acc.ID == receiver.ID { // This is us, doesn't count. return false } follows, err := f.state.DB.IsFollowing(ctx, receiver.ID, acc.ID) if err != nil { // Proper error. log.Errorf(ctx, "db error checking follow status: %v", err) return false } if follows { // We follow this nerd. return true } // We don't follow this nerd, but // have we requested to follow them? followRequested, err := f.state.DB.IsFollowRequested(ctx, receiver.ID, acc.ID) if err != nil { // Proper error. log.Errorf(ctx, "db error checking follow req status: %v", err) return false } return followRequested }, ) } // errantLinks returns true if any http/https // link discovered in the statusable content + cw // is not either a mention link, or a hashtag link. func (f *Filter) errantLinks( ctx context.Context, statusable ap.Statusable, mentions []preppedMention, hashtags []*gtsmodel.Tag, ) bool { // Concatenate the cw with the // content to check for links in both. cw := ap.ExtractSummary(statusable) content := ap.ExtractContent(statusable) concat := cw + " " + content.Content // Store link string alongside link // URI to avoid stringifying twice. type preppedLink struct { *url.URL str string } // Find + parse every http/https link in the status. rawLinks := regexes.LinkScheme.FindAllString(concat, -1) links := make([]preppedLink, 0, len(rawLinks)) for _, rawLink := range rawLinks { linkURI, err := url.Parse(rawLink) if err != nil { log.Debugf(ctx, "malformed link in status: %v", err, ) // Ignore bad links // for spam checking. continue } links = append(links, preppedLink{ URL: linkURI, str: rawLink, }) } // For each link in the status, try to // match it to a hashtag or a mention. // If we can't, we have an errant link. for _, link := range links { hashtagLink := slices.ContainsFunc( hashtags, func(hashtag *gtsmodel.Tag) bool { // If a link is to the href // of a hashtag, it's fine. return strings.EqualFold( link.str, hashtag.Href, ) }, ) if hashtagLink { // This link is accounted for. // Move to the next one. continue } mentionLink := slices.ContainsFunc( mentions, func(mention preppedMention) bool { // If link is straight up to the URI // of a mentioned account, it's fine. if strings.EqualFold( link.str, mention.TargetAccountURI, ) { return true } // Link might be to an account URL rather // than URI. This is a bit trickier because // we can't predict the format of such URLs, // and it's difficult to reconstruct them // while also taking account of different // host + account-domain values. // // So, just check if this link is on the same // host as the mentioned account, or at least // shares a host with it. if link.Host == mention.domain { // Same host. return true } // Shares a host if it has at least two // components from the right in common. common := dns.CompareDomainName( link.Host, mention.domain, ) return common >= 2 }, ) if mentionLink { // This link is accounted for. // Move to the next one. continue } // Not a hashtag link // or a mention link, // so it's errant. return true } // All links OK, or // no links found. return false }