From 30c191aaa03beb8bc4de4bf33db671fb98bafc96 Mon Sep 17 00:00:00 2001 From: jsgoldstein Date: Thu, 24 Aug 2023 10:40:04 -0400 Subject: [PATCH] Add new public status index (#26344) Co-authored-by: Eugen Rochko Co-authored-by: Claire --- app/chewy/accounts_index.rb | 2 +- app/chewy/public_statuses_index.rb | 50 +++++++++++++ app/chewy/statuses_index.rb | 22 +++--- .../api/v1/accounts/credentials_controller.rb | 1 + .../settings/privacy_controller.rb | 2 +- .../public_statuses_index_importer.rb | 41 ++++++++++ app/lib/search_query_transformer.rb | 49 +++++++++--- app/lib/vacuum/statuses_vacuum.rb | 9 ++- app/models/account.rb | 1 + .../concerns/account_statuses_search.rb | 44 +++++++++++ app/models/concerns/status_search_concern.rb | 54 +++++++++++++ app/models/status.rb | 37 ++------- .../activitypub/actor_serializer.rb | 8 +- app/services/batched_remove_status_service.rb | 5 +- app/services/search_service.rb | 32 +++----- app/services/statuses_search_service.rb | 75 +++++++++++++++++++ app/views/settings/privacy/show.html.haml | 3 + .../add_to_public_statuses_index_worker.rb | 15 ++++ ...emove_from_public_statuses_index_worker.rb | 15 ++++ app/workers/scheduler/indexing_scheduler.rb | 2 +- config/locales/simple_form.en.yml | 2 + lib/mastodon/cli/search.rb | 1 + spec/chewy/public_statuses_index_spec.rb | 31 ++++++++ .../public_statuses_index_importer_spec.rb | 16 ++++ spec/lib/search_query_transformer_spec.rb | 4 +- .../concerns/account_statuses_search_spec.rb | 66 ++++++++++++++++ ...dd_to_public_statuses_index_worker_spec.rb | 42 +++++++++++ ..._from_public_statuses_index_worker_spec.rb | 42 +++++++++++ 28 files changed, 584 insertions(+), 87 deletions(-) create mode 100644 app/chewy/public_statuses_index.rb create mode 100644 app/lib/importer/public_statuses_index_importer.rb create mode 100644 app/models/concerns/account_statuses_search.rb create mode 100644 app/models/concerns/status_search_concern.rb create mode 100644 app/services/statuses_search_service.rb create mode 100644 app/workers/add_to_public_statuses_index_worker.rb create mode 100644 app/workers/remove_from_public_statuses_index_worker.rb create mode 100644 spec/chewy/public_statuses_index_spec.rb create mode 100644 spec/lib/importer/public_statuses_index_importer_spec.rb create mode 100644 spec/models/concerns/account_statuses_search_spec.rb create mode 100644 spec/workers/add_to_public_statuses_index_worker_spec.rb create mode 100644 spec/workers/remove_from_public_statuses_index_worker_spec.rb diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb index 1f8571c09d..61e3399aa8 100644 --- a/app/chewy/accounts_index.rb +++ b/app/chewy/accounts_index.rb @@ -62,6 +62,6 @@ class AccountsIndex < Chewy::Index field(:last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }) field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } - field(:text, type: 'text', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' } + field(:text, type: 'text', analyzer: 'whitespace', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' } end end diff --git a/app/chewy/public_statuses_index.rb b/app/chewy/public_statuses_index.rb new file mode 100644 index 0000000000..1fad5de3a1 --- /dev/null +++ b/app/chewy/public_statuses_index.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +class PublicStatusesIndex < Chewy::Index + settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: { + filter: { + english_stop: { + type: 'stop', + stopwords: '_english_', + }, + + english_stemmer: { + type: 'stemmer', + language: 'english', + }, + + english_possessive_stemmer: { + type: 'stemmer', + language: 'possessive_english', + }, + }, + + analyzer: { + content: { + tokenizer: 'uax_url_email', + filter: %w( + english_possessive_stemmer + lowercase + asciifolding + cjk_width + english_stop + english_stemmer + ), + }, + }, + } + + index_scope ::Status.unscoped + .kept + .indexable + .includes(:media_attachments, :preloadable_poll, :preview_cards) + + root date_detection: false do + field(:id, type: 'keyword') + field(:account_id, type: 'long') + field(:text, type: 'text', analyzer: 'whitespace', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } + field(:language, type: 'keyword') + field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties }) + field(:created_at, type: 'date') + end +end diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 9f680efa52..130f8801df 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -1,23 +1,24 @@ # frozen_string_literal: true class StatusesIndex < Chewy::Index - include FormattingHelper - settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: { filter: { english_stop: { type: 'stop', stopwords: '_english_', }, + english_stemmer: { type: 'stemmer', language: 'english', }, + english_possessive_stemmer: { type: 'stemmer', language: 'possessive_english', }, }, + analyzer: { content: { tokenizer: 'uax_url_email', @@ -35,7 +36,7 @@ class StatusesIndex < Chewy::Index # We do not use delete_if option here because it would call a method that we # expect to be called with crutches without crutches, causing n+1 queries - index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll) + index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll, :preview_cards) crutch :mentions do |collection| data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id) @@ -63,13 +64,12 @@ class StatusesIndex < Chewy::Index end root date_detection: false do - field :id, type: 'long' - field :account_id, type: 'long' - - field :text, type: 'text', value: ->(status) { status.searchable_text } do - field :stemmed, type: 'text', analyzer: 'content' - end - - field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) } + field(:id, type: 'keyword') + field(:account_id, type: 'long') + field(:text, type: 'text', analyzer: 'whitespace', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } + field(:searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }) + field(:language, type: 'keyword') + field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties }) + field(:created_at, type: 'date') end end diff --git a/app/controllers/api/v1/accounts/credentials_controller.rb b/app/controllers/api/v1/accounts/credentials_controller.rb index 7c7d70fd32..76ba758245 100644 --- a/app/controllers/api/v1/accounts/credentials_controller.rb +++ b/app/controllers/api/v1/accounts/credentials_controller.rb @@ -30,6 +30,7 @@ class Api::V1::Accounts::CredentialsController < Api::BaseController :bot, :discoverable, :hide_collections, + :indexable, fields_attributes: [:name, :value] ) end diff --git a/app/controllers/settings/privacy_controller.rb b/app/controllers/settings/privacy_controller.rb index c2648eedd8..1102c89fad 100644 --- a/app/controllers/settings/privacy_controller.rb +++ b/app/controllers/settings/privacy_controller.rb @@ -18,7 +18,7 @@ class Settings::PrivacyController < Settings::BaseController private def account_params - params.require(:account).permit(:discoverable, :unlocked, :show_collections, settings: UserSettings.keys) + params.require(:account).permit(:discoverable, :unlocked, :indexable, :show_collections, settings: UserSettings.keys) end def set_account diff --git a/app/lib/importer/public_statuses_index_importer.rb b/app/lib/importer/public_statuses_index_importer.rb new file mode 100644 index 0000000000..8e36e36f90 --- /dev/null +++ b/app/lib/importer/public_statuses_index_importer.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +class Importer::PublicStatusesIndexImporter < Importer::BaseImporter + def import! + indexable_statuses_scope.find_in_batches(batch_size: @batch_size) do |batch| + in_work_unit(batch.map(&:status_id)) do |status_ids| + bulk = ActiveRecord::Base.connection_pool.with_connection do + Chewy::Index::Import::BulkBuilder.new(index, to_index: Status.includes(:media_attachments, :preloadable_poll).where(id: status_ids)).bulk_body + end + + indexed = 0 + deleted = 0 + + bulk.map! do |entry| + if entry[:index] + indexed += 1 + else + deleted += 1 + end + entry + end + + Chewy::Index::Import::BulkRequest.new(index).perform(bulk) + + [indexed, deleted] + end + end + + wait! + end + + private + + def index + PublicStatusesIndex + end + + def indexable_statuses_scope + Status.indexable.select('"statuses"."id", COALESCE("statuses"."reblog_of_id", "statuses"."id") AS status_id') + end +end diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb index aef05e9d9d..dad99cbd2d 100644 --- a/app/lib/search_query_transformer.rb +++ b/app/lib/search_query_transformer.rb @@ -36,7 +36,7 @@ class SearchQueryTransformer < Parslet::Transform def clause_to_filter(clause) case clause when PrefixClause - { term: { clause.filter => clause.term } } + { clause.type => { clause.filter => clause.term } } else raise "Unexpected clause type: #{clause}" end @@ -47,12 +47,10 @@ class SearchQueryTransformer < Parslet::Transform class << self def symbol(str) case str - when '+' + when '+', nil :must when '-' :must_not - when nil - :should else raise "Unknown operator: #{str}" end @@ -81,23 +79,52 @@ class SearchQueryTransformer < Parslet::Transform end class PrefixClause - attr_reader :filter, :operator, :term + attr_reader :type, :filter, :operator, :term def initialize(prefix, term) @operator = :filter + case prefix + when 'has', 'is' + @filter = :properties + @type = :term + @term = term + when 'language' + @filter = :language + @type = :term + @term = term when 'from' @filter = :account_id - - username, domain = term.gsub(/\A@/, '').split('@') - domain = nil if TagManager.instance.local_domain?(domain) - account = Account.find_remote!(username, domain) - - @term = account.id + @type = :term + @term = account_id_from_term(term) + when 'before' + @filter = :created_at + @type = :range + @term = { lt: term } + when 'after' + @filter = :created_at + @type = :range + @term = { gt: term } + when 'during' + @filter = :created_at + @type = :range + @term = { gte: term, lte: term } else raise Mastodon::SyntaxError end end + + private + + def account_id_from_term(term) + username, domain = term.gsub(/\A@/, '').split('@') + domain = nil if TagManager.instance.local_domain?(domain) + account = Account.find_remote(username, domain) + + # If the account is not found, we want to return empty results, so return + # an ID that does not exist + account&.id || -1 + end end rule(clause: subtree(:clause)) do diff --git a/app/lib/vacuum/statuses_vacuum.rb b/app/lib/vacuum/statuses_vacuum.rb index 28c087b1c2..ad1de07380 100644 --- a/app/lib/vacuum/statuses_vacuum.rb +++ b/app/lib/vacuum/statuses_vacuum.rb @@ -20,7 +20,10 @@ class Vacuum::StatusesVacuum statuses.direct_visibility .includes(mentions: :account) .find_each(&:unlink_from_conversations!) - remove_from_search_index(statuses.ids) if Chewy.enabled? + if Chewy.enabled? + remove_from_index(statuses.ids, 'chewy:queue:StatusesIndex') + remove_from_index(statuses.ids, 'chewy:queue:PublicStatusesIndex') + end # Foreign keys take care of most associated records for us. # Media attachments will be orphaned. @@ -38,7 +41,7 @@ class Vacuum::StatusesVacuum Mastodon::Snowflake.id_at(@retention_period.ago, with_random: false) end - def remove_from_search_index(status_ids) - with_redis { |redis| redis.sadd('chewy:queue:StatusesIndex', status_ids) } + def remove_from_index(status_ids, index) + with_redis { |redis| redis.sadd(index, status_ids) } end end diff --git a/app/models/account.rb b/app/models/account.rb index b1cb9eb5db..244f3da83d 100644 --- a/app/models/account.rb +++ b/app/models/account.rb @@ -82,6 +82,7 @@ class Account < ApplicationRecord include DomainMaterializable include AccountMerging include AccountSearch + include AccountStatusesSearch enum protocol: { ostatus: 0, activitypub: 1 } enum suspension_origin: { local: 0, remote: 1 }, _prefix: true diff --git a/app/models/concerns/account_statuses_search.rb b/app/models/concerns/account_statuses_search.rb new file mode 100644 index 0000000000..563a871051 --- /dev/null +++ b/app/models/concerns/account_statuses_search.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module AccountStatusesSearch + extend ActiveSupport::Concern + + included do + after_update_commit :enqueue_update_public_statuses_index, if: :saved_change_to_indexable? + after_destroy_commit :enqueue_remove_from_public_statuses_index, if: :indexable? + end + + def enqueue_update_public_statuses_index + if indexable? + enqueue_add_to_public_statuses_index + else + enqueue_remove_from_public_statuses_index + end + end + + def enqueue_add_to_public_statuses_index + return unless Chewy.enabled? + + AddToPublicStatusesIndexWorker.perform_async(id) + end + + def enqueue_remove_from_public_statuses_index + return unless Chewy.enabled? + + RemoveFromPublicStatusesIndexWorker.perform_async(id) + end + + def add_to_public_statuses_index! + return unless Chewy.enabled? + + statuses.indexable.find_in_batches do |batch| + PublicStatusesIndex.import(query: batch) + end + end + + def remove_from_public_statuses_index! + return unless Chewy.enabled? + + PublicStatusesIndex.filter(term: { account_id: id }).delete_all + end +end diff --git a/app/models/concerns/status_search_concern.rb b/app/models/concerns/status_search_concern.rb new file mode 100644 index 0000000000..21048b5682 --- /dev/null +++ b/app/models/concerns/status_search_concern.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module StatusSearchConcern + extend ActiveSupport::Concern + + included do + scope :indexable, -> { without_reblogs.where(visibility: :public).joins(:account).where(account: { indexable: true }) } + end + + def searchable_by(preloaded = nil) + ids = [] + + ids << account_id if local? + + if preloaded.nil? + ids += mentions.joins(:account).merge(Account.local).active.pluck(:account_id) + ids += favourites.joins(:account).merge(Account.local).pluck(:account_id) + ids += reblogs.joins(:account).merge(Account.local).pluck(:account_id) + ids += bookmarks.joins(:account).merge(Account.local).pluck(:account_id) + ids += poll.votes.joins(:account).merge(Account.local).pluck(:account_id) if poll.present? + else + ids += preloaded.mentions[id] || [] + ids += preloaded.favourites[id] || [] + ids += preloaded.reblogs[id] || [] + ids += preloaded.bookmarks[id] || [] + ids += preloaded.votes[id] || [] + end + + ids.uniq + end + + def searchable_text + [ + spoiler_text, + FormattingHelper.extract_status_plain_text(self), + preloadable_poll&.options&.join("\n\n"), + ordered_media_attachments.map(&:description).join("\n\n"), + ].compact.join("\n\n") + end + + def searchable_properties + [].tap do |properties| + properties << 'image' if ordered_media_attachments.any?(&:image?) + properties << 'video' if ordered_media_attachments.any?(&:video?) + properties << 'audio' if ordered_media_attachments.any?(&:audio?) + properties << 'media' if with_media? + properties << 'poll' if with_poll? + properties << 'link' if with_preview_card? + properties << 'embed' if preview_cards.any?(&:video?) + properties << 'sensitive' if sensitive? + properties << 'reply' if reply? + end + end +end diff --git a/app/models/status.rb b/app/models/status.rb index 86fd8334a2..760b8ec33e 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -37,6 +37,7 @@ class Status < ApplicationRecord include StatusSnapshotConcern include RateLimitable include StatusSafeReblogInsert + include StatusSearchConcern rate_limit by: :account, family: :statuses @@ -47,6 +48,7 @@ class Status < ApplicationRecord attr_accessor :override_timestamps update_index('statuses', :proper) + update_index('public_statuses', :proper) enum visibility: { public: 0, unlisted: 1, private: 2, direct: 3, limited: 4 }, _suffix: :visibility @@ -165,37 +167,6 @@ class Status < ApplicationRecord "v3:#{super}" end - def searchable_by(preloaded = nil) - ids = [] - - ids << account_id if local? - - if preloaded.nil? - ids += mentions.joins(:account).merge(Account.local).active.pluck(:account_id) - ids += favourites.joins(:account).merge(Account.local).pluck(:account_id) - ids += reblogs.joins(:account).merge(Account.local).pluck(:account_id) - ids += bookmarks.joins(:account).merge(Account.local).pluck(:account_id) - ids += poll.votes.joins(:account).merge(Account.local).pluck(:account_id) if poll.present? - else - ids += preloaded.mentions[id] || [] - ids += preloaded.favourites[id] || [] - ids += preloaded.reblogs[id] || [] - ids += preloaded.bookmarks[id] || [] - ids += preloaded.votes[id] || [] - end - - ids.uniq - end - - def searchable_text - [ - spoiler_text, - FormattingHelper.extract_status_plain_text(self), - preloadable_poll ? preloadable_poll.options.join("\n\n") : nil, - ordered_media_attachments.map(&:description).join("\n\n"), - ].compact.join("\n\n") - end - def to_log_human_identifier account.acct end @@ -270,6 +241,10 @@ class Status < ApplicationRecord preview_cards.any? end + def with_poll? + preloadable_poll.present? + end + def non_sensitive_with_media? !sensitive? && with_media? end diff --git a/app/serializers/activitypub/actor_serializer.rb b/app/serializers/activitypub/actor_serializer.rb index 4998d00399..31f39954fb 100644 --- a/app/serializers/activitypub/actor_serializer.rb +++ b/app/serializers/activitypub/actor_serializer.rb @@ -8,13 +8,13 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer context_extensions :manually_approves_followers, :featured, :also_known_as, :moved_to, :property_value, :discoverable, :olm, :suspended, - :memorial + :memorial, :indexable attributes :id, :type, :following, :followers, :inbox, :outbox, :featured, :featured_tags, :preferred_username, :name, :summary, :url, :manually_approves_followers, - :discoverable, :published, :memorial + :discoverable, :indexable, :published, :memorial has_one :public_key, serializer: ActivityPub::PublicKeySerializer @@ -99,6 +99,10 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer object.suspended? ? false : (object.discoverable || false) end + def indexable + object.suspended? ? false : (object.indexable || false) + end + def name object.suspended? ? object.username : (object.display_name.presence || object.username) end diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb index f5cb339cdf..c54cc1d350 100644 --- a/app/services/batched_remove_status_service.rb +++ b/app/services/batched_remove_status_service.rb @@ -35,7 +35,10 @@ class BatchedRemoveStatusService < BaseService # Since we skipped all callbacks, we also need to manually # deindex the statuses - Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) if Chewy.enabled? + if Chewy.enabled? + Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) + Chewy.strategy.current.update(PublicStatusesIndex, statuses_and_reblogs) + end return if options[:skip_side_effects] diff --git a/app/services/search_service.rb b/app/services/search_service.rb index 30937471bd..4e1e7ea26e 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -39,25 +39,15 @@ class SearchService < BaseService end def perform_statuses_search! - definition = parsed_query.apply(StatusesIndex.filter(term: { searchable_by: @account.id })) - - definition = definition.filter(term: { account_id: @options[:account_id] }) if @options[:account_id].present? - - if @options[:min_id].present? || @options[:max_id].present? - range = {} - range[:gt] = @options[:min_id].to_i if @options[:min_id].present? - range[:lt] = @options[:max_id].to_i if @options[:max_id].present? - definition = definition.filter(range: { id: range }) - end - - results = definition.limit(@limit).offset(@offset).objects.compact - account_ids = results.map(&:account_id) - account_domains = results.map(&:account_domain) - preloaded_relations = @account.relations_map(account_ids, account_domains) - - results.reject { |status| StatusFilter.new(status, @account, preloaded_relations).filtered? } - rescue Faraday::ConnectionFailed, Parslet::ParseFailed - [] + StatusesSearchService.new.call( + @query, + @account, + limit: @limit, + offset: @offset, + account_id: @options[:account_id], + min_id: @options[:min_id], + max_id: @options[:max_id] + ) end def perform_hashtags_search! @@ -114,8 +104,4 @@ class SearchService < BaseService def statuses_search? @options[:type].blank? || @options[:type] == 'statuses' end - - def parsed_query - SearchQueryTransformer.new.apply(SearchQueryParser.new.parse(@query)) - end end diff --git a/app/services/statuses_search_service.rb b/app/services/statuses_search_service.rb new file mode 100644 index 0000000000..21d6b71b7d --- /dev/null +++ b/app/services/statuses_search_service.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +class StatusesSearchService < BaseService + def call(query, account = nil, options = {}) + @query = query&.strip + @account = account + @options = options + @limit = options[:limit].to_i + @offset = options[:offset].to_i + + status_search_results + end + + private + + def status_search_results + definition = parsed_query.apply( + StatusesIndex.filter( + bool: { + should: [ + publicly_searchable, + non_publicly_searchable, + ], + + minimum_should_match: 1, + } + ) + ) + + # This is the best way to submit identical queries to multi-indexes though chewy + definition.instance_variable_get(:@parameters)[:indices].value[:indices] << PublicStatusesIndex + + results = definition.collapse(field: :id).order(_id: { order: :desc }).limit(@limit).offset(@offset).objects.compact + account_ids = results.map(&:account_id) + account_domains = results.map(&:account_domain) + preloaded_relations = @account.relations_map(account_ids, account_domains) + + results.reject { |status| StatusFilter.new(status, @account, preloaded_relations).filtered? } + rescue Faraday::ConnectionFailed, Parslet::ParseFailed + [] + end + + def publicly_searchable + { + bool: { + must_not: { + exists: { + field: 'searchable_by', + }, + }, + }, + } + end + + def non_publicly_searchable + { + bool: { + must: [ + { + exists: { + field: 'searchable_by', + }, + }, + { + term: { searchable_by: @account.id }, + }, + ], + }, + } + end + + def parsed_query + SearchQueryTransformer.new.apply(SearchQueryParser.new.parse(@query)) + end +end diff --git a/app/views/settings/privacy/show.html.haml b/app/views/settings/privacy/show.html.haml index ce31e60f06..3c14382587 100644 --- a/app/views/settings/privacy/show.html.haml +++ b/app/views/settings/privacy/show.html.haml @@ -24,6 +24,9 @@ %p.lead= t('privacy.search_hint_html') + .fields-group + = f.input :indexable, as: :boolean, wrapper: :with_label + = f.simple_fields_for :settings, current_user.settings do |ff| .fields-group = ff.input :indexable, wrapper: :with_label diff --git a/app/workers/add_to_public_statuses_index_worker.rb b/app/workers/add_to_public_statuses_index_worker.rb new file mode 100644 index 0000000000..409e5e7086 --- /dev/null +++ b/app/workers/add_to_public_statuses_index_worker.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class AddToPublicStatusesIndexWorker + include Sidekiq::Worker + + def perform(account_id) + account = Account.find(account_id) + + return unless account.indexable? + + account.add_to_public_statuses_index! + rescue ActiveRecord::RecordNotFound + true + end +end diff --git a/app/workers/remove_from_public_statuses_index_worker.rb b/app/workers/remove_from_public_statuses_index_worker.rb new file mode 100644 index 0000000000..e108a5c209 --- /dev/null +++ b/app/workers/remove_from_public_statuses_index_worker.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class RemoveFromPublicStatusesIndexWorker + include Sidekiq::Worker + + def perform(account_id) + account = Account.find(account_id) + + return if account.indexable? + + account.remove_from_public_statuses_index! + rescue ActiveRecord::RecordNotFound + true + end +end diff --git a/app/workers/scheduler/indexing_scheduler.rb b/app/workers/scheduler/indexing_scheduler.rb index 2868a3b715..6c770d5a8f 100644 --- a/app/workers/scheduler/indexing_scheduler.rb +++ b/app/workers/scheduler/indexing_scheduler.rb @@ -23,6 +23,6 @@ class Scheduler::IndexingScheduler end def indexes - [AccountsIndex, TagsIndex, StatusesIndex] + [AccountsIndex, TagsIndex, PublicStatusesIndex, StatusesIndex] end end diff --git a/config/locales/simple_form.en.yml b/config/locales/simple_form.en.yml index 443b7617ff..efda7b778b 100644 --- a/config/locales/simple_form.en.yml +++ b/config/locales/simple_form.en.yml @@ -6,6 +6,7 @@ en: discoverable: Your public posts and profile may be featured or recommended in various areas of Mastodon and your profile may be suggested to other users. display_name: Your full name or your fun name. fields: Your homepage, pronouns, age, anything you want. + indexable: Your public posts may appear in search results on Mastodon. People who have interacted with your posts may be able to search them regardless. note: 'You can @mention other people or #hashtags.' show_collections: People will be able to browse through your follows and followers. People that you follow will see that you follow them regardless. unlocked: People will be able to follow you without requesting approval. Uncheck if you want to review follow requests and chose whether to accept or reject new followers. @@ -143,6 +144,7 @@ en: fields: name: Label value: Content + indexable: Include public posts in search results show_collections: Show follows and followers on profile unlocked: Automatically accept new followers account_alias: diff --git a/lib/mastodon/cli/search.rb b/lib/mastodon/cli/search.rb index 41862b5b6b..481e01d8e7 100644 --- a/lib/mastodon/cli/search.rb +++ b/lib/mastodon/cli/search.rb @@ -10,6 +10,7 @@ module Mastodon::CLI InstancesIndex, AccountsIndex, TagsIndex, + PublicStatusesIndex, StatusesIndex, ].freeze diff --git a/spec/chewy/public_statuses_index_spec.rb b/spec/chewy/public_statuses_index_spec.rb new file mode 100644 index 0000000000..2f93d0ff02 --- /dev/null +++ b/spec/chewy/public_statuses_index_spec.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe PublicStatusesIndex do + describe 'Searching the index' do + before do + mock_elasticsearch_response(described_class, raw_response) + end + + it 'returns results from a query' do + results = described_class.query(match: { name: 'status' }) + + expect(results).to eq [] + end + end + + def raw_response + { + took: 3, + hits: { + hits: [ + { + _id: '0', + _score: 1.6375021, + }, + ], + }, + } + end +end diff --git a/spec/lib/importer/public_statuses_index_importer_spec.rb b/spec/lib/importer/public_statuses_index_importer_spec.rb new file mode 100644 index 0000000000..bc7c038a97 --- /dev/null +++ b/spec/lib/importer/public_statuses_index_importer_spec.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe Importer::PublicStatusesIndexImporter do + describe 'import!' do + let(:pool) { Concurrent::FixedThreadPool.new(5) } + let(:importer) { described_class.new(batch_size: 123, executor: pool) } + + before { Fabricate(:status, account: Fabricate(:account, indexable: true)) } + + it 'indexes relevant statuses' do + expect { importer.import! }.to update_index(PublicStatusesIndex) + end + end +end diff --git a/spec/lib/search_query_transformer_spec.rb b/spec/lib/search_query_transformer_spec.rb index 1095334695..953f9acb2f 100644 --- a/spec/lib/search_query_transformer_spec.rb +++ b/spec/lib/search_query_transformer_spec.rb @@ -9,8 +9,8 @@ describe SearchQueryTransformer do it 'sets attributes' do transformer = described_class.new.apply(parser) - expect(transformer.should_clauses.first).to be_a(SearchQueryTransformer::TermClause) - expect(transformer.must_clauses.first).to be_nil + expect(transformer.should_clauses.first).to be_nil + expect(transformer.must_clauses.first).to be_a(SearchQueryTransformer::TermClause) expect(transformer.must_not_clauses.first).to be_nil expect(transformer.filter_clauses.first).to be_nil end diff --git a/spec/models/concerns/account_statuses_search_spec.rb b/spec/models/concerns/account_statuses_search_spec.rb new file mode 100644 index 0000000000..46362936f4 --- /dev/null +++ b/spec/models/concerns/account_statuses_search_spec.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe AccountStatusesSearch do + let(:account) { Fabricate(:account, indexable: indexable) } + + before do + allow(Chewy).to receive(:enabled?).and_return(true) + end + + describe '#enqueue_update_public_statuses_index' do + before do + allow(account).to receive(:enqueue_add_to_public_statuses_index) + allow(account).to receive(:enqueue_remove_from_public_statuses_index) + end + + context 'when account is indexable' do + let(:indexable) { true } + + it 'enqueues add_to_public_statuses_index and not to remove_from_public_statuses_index' do + account.enqueue_update_public_statuses_index + expect(account).to have_received(:enqueue_add_to_public_statuses_index) + expect(account).to_not have_received(:enqueue_remove_from_public_statuses_index) + end + end + + context 'when account is not indexable' do + let(:indexable) { false } + + it 'enqueues remove_from_public_statuses_index and not to add_to_public_statuses_index' do + account.enqueue_update_public_statuses_index + expect(account).to have_received(:enqueue_remove_from_public_statuses_index) + expect(account).to_not have_received(:enqueue_add_to_public_statuses_index) + end + end + end + + describe '#enqueue_add_to_public_statuses_index' do + let(:indexable) { true } + let(:worker) { AddToPublicStatusesIndexWorker } + + before do + allow(worker).to receive(:perform_async) + end + + it 'enqueues AddToPublicStatusesIndexWorker' do + account.enqueue_add_to_public_statuses_index + expect(worker).to have_received(:perform_async).with(account.id) + end + end + + describe '#enqueue_remove_from_public_statuses_index' do + let(:indexable) { false } + let(:worker) { RemoveFromPublicStatusesIndexWorker } + + before do + allow(worker).to receive(:perform_async) + end + + it 'enqueues RemoveFromPublicStatusesIndexWorker' do + account.enqueue_remove_from_public_statuses_index + expect(worker).to have_received(:perform_async).with(account.id) + end + end +end diff --git a/spec/workers/add_to_public_statuses_index_worker_spec.rb b/spec/workers/add_to_public_statuses_index_worker_spec.rb new file mode 100644 index 0000000000..fa15072241 --- /dev/null +++ b/spec/workers/add_to_public_statuses_index_worker_spec.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe AddToPublicStatusesIndexWorker do + describe '#perform' do + let(:account) { Fabricate(:account, indexable: indexable) } + let(:account_id) { account.id } + + before do + allow(Account).to receive(:find).with(account_id).and_return(account) unless account.nil? + allow(account).to receive(:add_to_public_statuses_index!) unless account.nil? + end + + context 'when account is indexable' do + let(:indexable) { true } + + it 'adds the account to the public statuses index' do + subject.perform(account_id) + expect(account).to have_received(:add_to_public_statuses_index!) + end + end + + context 'when account is not indexable' do + let(:indexable) { false } + + it 'does not add the account to public statuses index' do + subject.perform(account_id) + expect(account).to_not have_received(:add_to_public_statuses_index!) + end + end + + context 'when account does not exist' do + let(:account) { nil } + let(:account_id) { 999 } + + it 'does not raise an error' do + expect { subject.perform(account_id) }.to_not raise_error + end + end + end +end diff --git a/spec/workers/remove_from_public_statuses_index_worker_spec.rb b/spec/workers/remove_from_public_statuses_index_worker_spec.rb new file mode 100644 index 0000000000..43ff211eaa --- /dev/null +++ b/spec/workers/remove_from_public_statuses_index_worker_spec.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe RemoveFromPublicStatusesIndexWorker do + describe '#perform' do + let(:account) { Fabricate(:account, indexable: indexable) } + let(:account_id) { account.id } + + before do + allow(Account).to receive(:find).with(account_id).and_return(account) unless account.nil? + allow(account).to receive(:remove_from_public_statuses_index!) unless account.nil? + end + + context 'when account is not indexable' do + let(:indexable) { false } + + it 'removes the account from public statuses index' do + subject.perform(account_id) + expect(account).to have_received(:remove_from_public_statuses_index!) + end + end + + context 'when account is indexable' do + let(:indexable) { true } + + it 'does not remove the account from public statuses index' do + subject.perform(account_id) + expect(account).to_not have_received(:remove_from_public_statuses_index!) + end + end + + context 'when account does not exist' do + let(:account) { nil } + let(:account_id) { 999 } + + it 'does not raise an error' do + expect { subject.perform(account_id) }.to_not raise_error + end + end + end +end