Merge branch 'fix/scrubber-cache' into 'develop'

[#481] Store scrubbed posts in Cachex

Closes #481

See merge request pleroma/pleroma!610
This commit is contained in:
lambda 2018-12-31 09:56:26 +00:00
commit 33e3a7ba7d
6 changed files with 70 additions and 9 deletions

View file

@ -53,6 +53,16 @@ defmodule Pleroma.Application do
], ],
id: :cachex_object id: :cachex_object
), ),
worker(
Cachex,
[
:scrubber_cache,
[
limit: 2500
]
],
id: :cachex_scrubber
),
worker( worker(
Cachex, Cachex,
[ [

View file

@ -15,8 +15,11 @@ defmodule Pleroma.HTML do
end end
def filter_tags(html, nil) do def filter_tags(html, nil) do
get_scrubbers() filter_tags(html, get_scrubbers())
|> Enum.reduce(html, fn scrubber, html -> end
def filter_tags(html, scrubbers) when is_list(scrubbers) do
Enum.reduce(scrubbers, html, fn scrubber, html ->
filter_tags(html, scrubber) filter_tags(html, scrubber)
end) end)
end end
@ -24,6 +27,40 @@ defmodule Pleroma.HTML do
def filter_tags(html, scrubber), do: Scrubber.scrub(html, scrubber) def filter_tags(html, scrubber), do: Scrubber.scrub(html, scrubber)
def filter_tags(html), do: filter_tags(html, nil) def filter_tags(html), do: filter_tags(html, nil)
def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags) def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags)
def get_cached_scrubbed_html_for_object(content, scrubbers, object) do
key = "#{generate_scrubber_signature(scrubbers)}|#{object.id}"
Cachex.fetch!(:scrubber_cache, key, fn _key -> ensure_scrubbed_html(content, scrubbers) end)
end
def get_cached_stripped_html_for_object(content, object) do
get_cached_scrubbed_html_for_object(content, HtmlSanitizeEx.Scrubber.StripTags, object)
end
def ensure_scrubbed_html(
content,
scrubbers
) do
{:commit, filter_tags(content, scrubbers)}
end
defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do
generate_scrubber_signature([scrubber])
end
defp generate_scrubber_signature(scrubbers) do
Enum.reduce(scrubbers, "", fn scrubber, signature ->
# If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber.StripTags) it is assumed it is always 0)
version =
if Kernel.function_exported?(scrubber, :version, 0) do
scrubber.version
else
0
end
"#{signature}#{to_string(scrubber)}#{version}"
end)
end
end end
defmodule Pleroma.HTML.Scrubber.TwitterText do defmodule Pleroma.HTML.Scrubber.TwitterText do
@ -39,6 +76,10 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do
require HtmlSanitizeEx.Scrubber.Meta require HtmlSanitizeEx.Scrubber.Meta
alias HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta
def version do
0
end
Meta.remove_cdata_sections_before_scrub() Meta.remove_cdata_sections_before_scrub()
Meta.strip_comments() Meta.strip_comments()
@ -77,6 +118,10 @@ defmodule Pleroma.HTML.Scrubber.Default do
require HtmlSanitizeEx.Scrubber.Meta require HtmlSanitizeEx.Scrubber.Meta
alias HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta
def version do
0
end
@markup Application.get_env(:pleroma, :markup) @markup Application.get_env(:pleroma, :markup)
@uri_schemes Application.get_env(:pleroma, :uri_schemes, []) @uri_schemes Application.get_env(:pleroma, :uri_schemes, [])
@valid_schemes Keyword.get(@uri_schemes, :valid_schemes, []) @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, [])
@ -154,6 +199,10 @@ defmodule Pleroma.HTML.Transform.MediaProxy do
alias Pleroma.Web.MediaProxy alias Pleroma.Web.MediaProxy
def version do
0
end
def before_scrub(html), do: html def before_scrub(html), do: html
def scrub_attribute("img", {"src", "http" <> target}) do def scrub_attribute("img", {"src", "http" <> target}) do

View file

@ -786,7 +786,9 @@ defmodule Pleroma.User do
Pleroma.HTML.Scrubber.TwitterText Pleroma.HTML.Scrubber.TwitterText
end end
def html_filter_policy(_), do: nil @default_scrubbers Pleroma.Config.get([:markup, :scrub_policy])
def html_filter_policy(_), do: @default_scrubbers
def get_or_fetch_by_ap_id(ap_id) do def get_or_fetch_by_ap_id(ap_id) do
user = get_by_ap_id(ap_id) user = get_by_ap_id(ap_id)

View file

@ -120,7 +120,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
content = content =
object object
|> render_content() |> render_content()
|> HTML.filter_tags(User.html_filter_policy(opts[:for])) |> HTML.get_cached_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity)
%{ %{
id: to_string(activity.id), id: to_string(activity.id),

View file

@ -11,11 +11,11 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do
alias Pleroma.Web.TwitterAPI.TwitterAPI alias Pleroma.Web.TwitterAPI.TwitterAPI
alias Pleroma.Web.TwitterAPI.Representers.ObjectRepresenter alias Pleroma.Web.TwitterAPI.Representers.ObjectRepresenter
alias Pleroma.Activity alias Pleroma.Activity
alias Pleroma.HTML
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.User alias Pleroma.User
alias Pleroma.Repo alias Pleroma.Repo
alias Pleroma.Formatter alias Pleroma.Formatter
alias Pleroma.HTML
import Ecto.Query import Ecto.Query
require Logger require Logger
@ -245,14 +245,14 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do
html = html =
content content
|> HTML.filter_tags(User.html_filter_policy(opts[:for])) |> HTML.get_cached_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity)
|> Formatter.emojify(object["emoji"]) |> Formatter.emojify(object["emoji"])
text = text =
if content do if content do
content content
|> String.replace(~r/<br\s?\/?>/, "\n") |> String.replace(~r/<br\s?\/?>/, "\n")
|> HTML.strip_tags() |> HTML.get_cached_stripped_html_for_object(activity)
end end
reply_parent = Activity.get_in_reply_to_activity(activity) reply_parent = Activity.get_in_reply_to_activity(activity)

View file

@ -706,10 +706,10 @@ defmodule Pleroma.UserTest do
end end
describe "per-user rich-text filtering" do describe "per-user rich-text filtering" do
test "html_filter_policy returns nil when rich-text is enabled" do test "html_filter_policy returns default policies, when rich-text is enabled" do
user = insert(:user) user = insert(:user)
assert nil == User.html_filter_policy(user) assert Pleroma.Config.get([:markup, :scrub_policy]) == User.html_filter_policy(user)
end end
test "html_filter_policy returns TwitterText scrubber when rich-text is disabled" do test "html_filter_policy returns TwitterText scrubber when rich-text is disabled" do