mirror of
https://git.pleroma.social/pleroma/pleroma.git
synced 2025-01-10 09:15:25 +00:00
[#3213] Hashtag-filtering functions in ActivityPub. Mix task for migrating hashtags to hashtags
table.
This commit is contained in:
parent
e369b1306b
commit
cbb19d0e18
3 changed files with 218 additions and 65 deletions
|
@ -4,14 +4,18 @@
|
|||
|
||||
defmodule Mix.Tasks.Pleroma.Database do
|
||||
alias Pleroma.Conversation
|
||||
alias Pleroma.Hashtag
|
||||
alias Pleroma.Maintenance
|
||||
alias Pleroma.Object
|
||||
alias Pleroma.Repo
|
||||
alias Pleroma.User
|
||||
|
||||
require Logger
|
||||
require Pleroma.Constants
|
||||
|
||||
import Ecto.Query
|
||||
import Mix.Pleroma
|
||||
|
||||
use Mix.Task
|
||||
|
||||
@shortdoc "A collection of database related tasks"
|
||||
|
@ -128,6 +132,66 @@ defmodule Mix.Tasks.Pleroma.Database do
|
|||
|> Stream.run()
|
||||
end
|
||||
|
||||
def run(["transfer_hashtags"]) do
|
||||
import Ecto.Query
|
||||
|
||||
start_pleroma()
|
||||
|
||||
from(
|
||||
object in Object,
|
||||
left_join: hashtag in assoc(object, :hashtags),
|
||||
where: is_nil(hashtag.id),
|
||||
where: fragment("(?)->>'tag' != '[]'", object.data),
|
||||
select: %{
|
||||
id: object.id,
|
||||
inserted_at: object.inserted_at,
|
||||
tag: fragment("(?)->>'tag'", object.data)
|
||||
},
|
||||
order_by: [desc: object.id]
|
||||
)
|
||||
|> Pleroma.Repo.chunk_stream(100, :batches)
|
||||
|> Stream.each(fn objects ->
|
||||
chunk_start = List.first(objects)
|
||||
chunk_end = List.last(objects)
|
||||
|
||||
Logger.info(
|
||||
"transfer_hashtags: " <>
|
||||
"#{chunk_start.id} (#{chunk_start.inserted_at}) -- " <>
|
||||
"#{chunk_end.id} (#{chunk_end.inserted_at})"
|
||||
)
|
||||
|
||||
Enum.map(
|
||||
objects,
|
||||
fn object ->
|
||||
hashtags =
|
||||
object.tag
|
||||
|> Jason.decode!()
|
||||
|> Enum.filter(&is_bitstring(&1))
|
||||
|
||||
with {:ok, hashtag_records} <- Hashtag.get_or_create_by_names(hashtags) do
|
||||
Repo.transaction(fn ->
|
||||
for hashtag_record <- hashtag_records do
|
||||
with {:error, _} <-
|
||||
Ecto.Adapters.SQL.query(
|
||||
Repo,
|
||||
"insert into hashtags_objects(hashtag_id, object_id) values " <>
|
||||
"(#{hashtag_record.id}, #{object.id});"
|
||||
) do
|
||||
Logger.warn(
|
||||
"ERROR: could not link object #{object.id} and hashtag #{hashtag_record.id}"
|
||||
)
|
||||
end
|
||||
end
|
||||
end)
|
||||
else
|
||||
e -> Logger.warn("ERROR: could not process object #{object.id}: #{inspect(e)}")
|
||||
end
|
||||
end
|
||||
)
|
||||
end)
|
||||
|> Stream.run()
|
||||
end
|
||||
|
||||
def run(["vacuum", args]) do
|
||||
start_pleroma()
|
||||
|
||||
|
|
|
@ -660,33 +660,41 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
|
|||
defp restrict_since(query, _), do: query
|
||||
|
||||
defp restrict_tag_reject(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
|
||||
raise "Can't use the child object without preloading!"
|
||||
raise_on_missing_preload()
|
||||
end
|
||||
|
||||
defp restrict_tag_reject(query, %{tag_reject: [_ | _] = tag_reject}) do
|
||||
defp restrict_tag_reject(query, %{tag_reject: tag_reject}) when is_list(tag_reject) do
|
||||
from(
|
||||
[_activity, object] in query,
|
||||
where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject)
|
||||
)
|
||||
end
|
||||
|
||||
defp restrict_tag_reject(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do
|
||||
restrict_tag_reject(query, %{tag_reject: [tag_reject]})
|
||||
end
|
||||
|
||||
defp restrict_tag_reject(query, _), do: query
|
||||
|
||||
defp restrict_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do
|
||||
raise "Can't use the child object without preloading!"
|
||||
raise_on_missing_preload()
|
||||
end
|
||||
|
||||
defp restrict_tag_all(query, %{tag_all: [_ | _] = tag_all}) do
|
||||
defp restrict_tag_all(query, %{tag_all: tag_all}) when is_list(tag_all) do
|
||||
from(
|
||||
[_activity, object] in query,
|
||||
where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all)
|
||||
)
|
||||
end
|
||||
|
||||
defp restrict_tag_all(query, %{tag_all: tag}) when is_binary(tag) do
|
||||
restrict_tag(query, %{tag: tag})
|
||||
end
|
||||
|
||||
defp restrict_tag_all(query, _), do: query
|
||||
|
||||
defp restrict_tag(_query, %{tag: _tag, skip_preload: true}) do
|
||||
raise "Can't use the child object without preloading!"
|
||||
raise_on_missing_preload()
|
||||
end
|
||||
|
||||
defp restrict_tag(query, %{tag: tag}) when is_list(tag) do
|
||||
|
@ -697,14 +705,80 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
|
|||
end
|
||||
|
||||
defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do
|
||||
from(
|
||||
[_activity, object] in query,
|
||||
where: fragment("(?)->'tag' \\? (?)", object.data, ^tag)
|
||||
)
|
||||
restrict_tag(query, %{tag: [tag]})
|
||||
end
|
||||
|
||||
defp restrict_tag(query, _), do: query
|
||||
|
||||
defp restrict_hashtag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
|
||||
raise_on_missing_preload()
|
||||
end
|
||||
|
||||
defp restrict_hashtag_reject_any(query, %{tag_reject: tags_reject}) when is_list(tags_reject) do
|
||||
if has_named_binding?(query, :thread_mute) do
|
||||
from(
|
||||
[activity, object, thread_mute] in query,
|
||||
group_by: [activity.id, object.id, thread_mute.id]
|
||||
)
|
||||
else
|
||||
from(
|
||||
[activity, object] in query,
|
||||
group_by: [activity.id, object.id]
|
||||
)
|
||||
end
|
||||
|> join(:left, [_activity, object], hashtag in assoc(object, :hashtags), as: :hashtag)
|
||||
|> having(
|
||||
[hashtag: hashtag],
|
||||
fragment("not(array_agg(?) && (?))", hashtag.name, ^tags_reject)
|
||||
)
|
||||
end
|
||||
|
||||
defp restrict_hashtag_reject_any(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do
|
||||
restrict_hashtag_reject_any(query, %{tag_reject: [tag_reject]})
|
||||
end
|
||||
|
||||
defp restrict_hashtag_reject_any(query, _), do: query
|
||||
|
||||
defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do
|
||||
raise_on_missing_preload()
|
||||
end
|
||||
|
||||
defp restrict_hashtag_all(query, %{tag_all: tags}) when is_list(tags) do
|
||||
Enum.reduce(
|
||||
tags,
|
||||
query,
|
||||
fn tag, acc -> restrict_hashtag_any(acc, %{tag: tag}) end
|
||||
)
|
||||
end
|
||||
|
||||
defp restrict_hashtag_all(query, %{tag_all: tag}) when is_binary(tag) do
|
||||
restrict_hashtag_any(query, %{tag: tag})
|
||||
end
|
||||
|
||||
defp restrict_hashtag_all(query, _), do: query
|
||||
|
||||
defp restrict_hashtag_any(_query, %{tag: _tag, skip_preload: true}) do
|
||||
raise_on_missing_preload()
|
||||
end
|
||||
|
||||
defp restrict_hashtag_any(query, %{tag: tags}) when is_list(tags) do
|
||||
from(
|
||||
[_activity, object] in query,
|
||||
join: hashtag in assoc(object, :hashtags),
|
||||
where: hashtag.name in ^tags
|
||||
)
|
||||
end
|
||||
|
||||
defp restrict_hashtag_any(query, %{tag: tag}) when is_binary(tag) do
|
||||
restrict_hashtag_any(query, %{tag: [tag]})
|
||||
end
|
||||
|
||||
defp restrict_hashtag_any(query, _), do: query
|
||||
|
||||
defp raise_on_missing_preload do
|
||||
raise "Can't use the child object without preloading!"
|
||||
end
|
||||
|
||||
defp restrict_recipients(query, [], _user), do: query
|
||||
|
||||
defp restrict_recipients(query, recipients, nil) do
|
||||
|
@ -1088,40 +1162,51 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
|
|||
skip_thread_containment: Config.get([:instance, :skip_thread_containment])
|
||||
}
|
||||
|
||||
Activity
|
||||
|> maybe_preload_objects(opts)
|
||||
|> maybe_preload_bookmarks(opts)
|
||||
|> maybe_preload_report_notes(opts)
|
||||
|> maybe_set_thread_muted_field(opts)
|
||||
|> maybe_order(opts)
|
||||
|> restrict_recipients(recipients, opts[:user])
|
||||
|> restrict_replies(opts)
|
||||
|> restrict_tag(opts)
|
||||
|> restrict_tag_reject(opts)
|
||||
|> restrict_tag_all(opts)
|
||||
|> restrict_since(opts)
|
||||
|> restrict_local(opts)
|
||||
|> restrict_actor(opts)
|
||||
|> restrict_type(opts)
|
||||
|> restrict_state(opts)
|
||||
|> restrict_favorited_by(opts)
|
||||
|> restrict_blocked(restrict_blocked_opts)
|
||||
|> restrict_muted(restrict_muted_opts)
|
||||
|> restrict_filtered(opts)
|
||||
|> restrict_media(opts)
|
||||
|> restrict_visibility(opts)
|
||||
|> restrict_thread_visibility(opts, config)
|
||||
|> restrict_reblogs(opts)
|
||||
|> restrict_pinned(opts)
|
||||
|> restrict_muted_reblogs(restrict_muted_reblogs_opts)
|
||||
|> restrict_instance(opts)
|
||||
|> restrict_announce_object_actor(opts)
|
||||
|> restrict_filtered(opts)
|
||||
|> Activity.restrict_deactivated_users()
|
||||
|> exclude_poll_votes(opts)
|
||||
|> exclude_chat_messages(opts)
|
||||
|> exclude_invisible_actors(opts)
|
||||
|> exclude_visibility(opts)
|
||||
query =
|
||||
Activity
|
||||
|> distinct([a], true)
|
||||
|> maybe_preload_objects(opts)
|
||||
|> maybe_preload_bookmarks(opts)
|
||||
|> maybe_preload_report_notes(opts)
|
||||
|> maybe_set_thread_muted_field(opts)
|
||||
|> maybe_order(opts)
|
||||
|> restrict_recipients(recipients, opts[:user])
|
||||
|> restrict_replies(opts)
|
||||
|> restrict_since(opts)
|
||||
|> restrict_local(opts)
|
||||
|> restrict_actor(opts)
|
||||
|> restrict_type(opts)
|
||||
|> restrict_state(opts)
|
||||
|> restrict_favorited_by(opts)
|
||||
|> restrict_blocked(restrict_blocked_opts)
|
||||
|> restrict_muted(restrict_muted_opts)
|
||||
|> restrict_filtered(opts)
|
||||
|> restrict_media(opts)
|
||||
|> restrict_visibility(opts)
|
||||
|> restrict_thread_visibility(opts, config)
|
||||
|> restrict_reblogs(opts)
|
||||
|> restrict_pinned(opts)
|
||||
|> restrict_muted_reblogs(restrict_muted_reblogs_opts)
|
||||
|> restrict_instance(opts)
|
||||
|> restrict_announce_object_actor(opts)
|
||||
|> restrict_filtered(opts)
|
||||
|> Activity.restrict_deactivated_users()
|
||||
|> exclude_poll_votes(opts)
|
||||
|> exclude_chat_messages(opts)
|
||||
|> exclude_invisible_actors(opts)
|
||||
|> exclude_visibility(opts)
|
||||
|
||||
if Config.get([:instance, :improved_hashtag_timeline]) do
|
||||
query
|
||||
|> restrict_hashtag_any(opts)
|
||||
|> restrict_hashtag_all(opts)
|
||||
|> restrict_hashtag_reject_any(opts)
|
||||
else
|
||||
query
|
||||
|> restrict_tag(opts)
|
||||
|> restrict_tag_reject(opts)
|
||||
|> restrict_tag_all(opts)
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do
|
||||
|
|
|
@ -199,33 +199,37 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubTest do
|
|||
{:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"})
|
||||
{:ok, status_three} = CommonAPI.post(user, %{status: ". #test #reject"})
|
||||
|
||||
fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})
|
||||
for new_timeline_enabled <- [true, false] do
|
||||
clear_config([:instance, :improved_hashtag_timeline], new_timeline_enabled)
|
||||
|
||||
fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]})
|
||||
fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})
|
||||
|
||||
fetch_three =
|
||||
ActivityPub.fetch_activities([], %{
|
||||
type: "Create",
|
||||
tag: ["test", "essais"],
|
||||
tag_reject: ["reject"]
|
||||
})
|
||||
fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]})
|
||||
|
||||
fetch_four =
|
||||
ActivityPub.fetch_activities([], %{
|
||||
type: "Create",
|
||||
tag: ["test"],
|
||||
tag_all: ["test", "reject"]
|
||||
})
|
||||
fetch_three =
|
||||
ActivityPub.fetch_activities([], %{
|
||||
type: "Create",
|
||||
tag: ["test", "essais"],
|
||||
tag_reject: ["reject"]
|
||||
})
|
||||
|
||||
[fetch_one, fetch_two, fetch_three, fetch_four] =
|
||||
Enum.map([fetch_one, fetch_two, fetch_three, fetch_four], fn statuses ->
|
||||
Enum.map(statuses, fn s -> Repo.preload(s, object: :hashtags) end)
|
||||
end)
|
||||
fetch_four =
|
||||
ActivityPub.fetch_activities([], %{
|
||||
type: "Create",
|
||||
tag: ["test"],
|
||||
tag_all: ["test", "reject"]
|
||||
})
|
||||
|
||||
assert fetch_one == [status_one, status_three]
|
||||
assert fetch_two == [status_one, status_two, status_three]
|
||||
assert fetch_three == [status_one, status_two]
|
||||
assert fetch_four == [status_three]
|
||||
[fetch_one, fetch_two, fetch_three, fetch_four] =
|
||||
Enum.map([fetch_one, fetch_two, fetch_three, fetch_four], fn statuses ->
|
||||
Enum.map(statuses, fn s -> Repo.preload(s, object: :hashtags) end)
|
||||
end)
|
||||
|
||||
assert fetch_one == [status_one, status_three]
|
||||
assert fetch_two == [status_one, status_two, status_three]
|
||||
assert fetch_three == [status_one, status_two]
|
||||
assert fetch_four == [status_three]
|
||||
end
|
||||
end
|
||||
|
||||
describe "insertion" do
|
||||
|
|
Loading…
Reference in a new issue