Merge branch 'issue/1509' into 'develop'

[#1509]  purge media from cache after delete

See merge request pleroma/pleroma!2539
This commit is contained in:
lain 2020-05-20 10:35:36 +00:00
commit 6dd1575c64
10 changed files with 293 additions and 30 deletions

View file

@ -376,6 +376,10 @@ config :pleroma, :rich_media,
config :pleroma, :media_proxy,
enabled: false,
invalidation: [
enabled: false,
provider: Pleroma.Web.MediaProxy.Invalidation.Script
],
proxy_opts: [
redirect_on_failure: false,
max_body_length: 25 * 1_048_576,

View file

@ -249,6 +249,40 @@ This section describe PWA manifest instance-specific values. Currently this opti
* `base_url`: The base URL to access a user-uploaded file. Useful when you want to proxy the media files via another host/CDN fronts.
* `proxy_opts`: All options defined in `Pleroma.ReverseProxy` documentation, defaults to `[max_body_length: (25*1_048_576)]`.
* `whitelist`: List of domains to bypass the mediaproxy
* `invalidation`: options for remove media from cache after delete object:
* `enabled`: Enables purge cache
* `provider`: Which one of the [purge cache strategy](#purge-cache-strategy) to use.
### Purge cache strategy
#### Pleroma.Web.MediaProxy.Invalidation.Script
This strategy allow perform external bash script to purge cache.
Urls of attachments pass to script as arguments.
* `script_path`: path to external script.
Example:
```elixir
config :pleroma, Pleroma.Web.MediaProxy.Invalidation.Script,
script_path: "./installation/nginx-cache-purge.example"
```
#### Pleroma.Web.MediaProxy.Invalidation.Http
This strategy allow perform custom http request to purge cache.
* `method`: http method. default is `purge`
* `headers`: http headers. default is empty
* `options`: request options. default is empty
Example:
```elixir
config :pleroma, Pleroma.Web.MediaProxy.Invalidation.Http,
method: :purge,
headers: [],
options: []
```
## Link previews

View file

@ -0,0 +1,40 @@
#!/bin/sh
# A simple shell script to delete a media from the Nginx cache.
SCRIPTNAME=${0##*/}
# NGINX cache directory
CACHE_DIRECTORY="/tmp/pleroma-media-cache"
## Return the files where the items are cached.
## $1 - the filename, can be a pattern .
## $2 - the cache directory.
## $3 - (optional) the number of parallel processes to run for grep.
get_cache_files() {
local max_parallel=${3-16}
find $2 -maxdepth 2 -type d | xargs -P $max_parallel -n 1 grep -E Rl "^KEY:.*$1" | sort -u
}
## Removes an item from the given cache zone.
## $1 - the filename, can be a pattern .
## $2 - the cache directory.
purge_item() {
for f in $(get_cache_files $1 $2); do
echo "found file: $f"
[ -f $f ] || continue
echo "Deleting $f from $2."
rm $f
done
} # purge_item
purge() {
for url in "$@"
do
echo "$SCRIPTNAME delete \`$url\` from cache ($CACHE_DIRECTORY)"
purge_item $url $CACHE_DIRECTORY
done
}
purge $1

View file

@ -9,11 +9,13 @@ defmodule Pleroma.Object do
import Ecto.Changeset
alias Pleroma.Activity
alias Pleroma.Config
alias Pleroma.Object
alias Pleroma.Object.Fetcher
alias Pleroma.ObjectTombstone
alias Pleroma.Repo
alias Pleroma.User
alias Pleroma.Workers.AttachmentsCleanupWorker
require Logger
@ -188,27 +190,37 @@ defmodule Pleroma.Object do
def delete(%Object{data: %{"id" => id}} = object) do
with {:ok, _obj} = swap_object_with_tombstone(object),
deleted_activity = Activity.delete_all_by_object_ap_id(id),
{:ok, true} <- Cachex.del(:object_cache, "object:#{id}"),
{:ok, _} <- Cachex.del(:web_resp_cache, URI.parse(id).path) do
with true <- Pleroma.Config.get([:instance, :cleanup_attachments]) do
{:ok, _} =
Pleroma.Workers.AttachmentsCleanupWorker.enqueue("cleanup_attachments", %{
"object" => object
})
end
{:ok, _} <- invalid_object_cache(object) do
cleanup_attachments(
Config.get([:instance, :cleanup_attachments]),
%{"object" => object}
)
{:ok, object, deleted_activity}
end
end
def prune(%Object{data: %{"id" => id}} = object) do
@spec cleanup_attachments(boolean(), %{required(:object) => map()}) ::
{:ok, Oban.Job.t() | nil}
def cleanup_attachments(true, %{"object" => _} = params) do
AttachmentsCleanupWorker.enqueue("cleanup_attachments", params)
end
def cleanup_attachments(_, _), do: {:ok, nil}
def prune(%Object{data: %{"id" => _id}} = object) do
with {:ok, object} <- Repo.delete(object),
{:ok, true} <- Cachex.del(:object_cache, "object:#{id}"),
{:ok, _} <- Cachex.del(:web_resp_cache, URI.parse(id).path) do
{:ok, _} <- invalid_object_cache(object) do
{:ok, object}
end
end
def invalid_object_cache(%Object{data: %{"id" => id}}) do
with {:ok, true} <- Cachex.del(:object_cache, "object:#{id}") do
Cachex.del(:web_resp_cache, URI.parse(id).path)
end
end
def set_cache(%Object{data: %{"id" => ap_id}} = object) do
Cachex.put(:object_cache, "object:#{ap_id}", object)
{:ok, object}

View file

@ -0,0 +1,26 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.MediaProxy.Invalidation do
@moduledoc false
@callback purge(list(String.t()), map()) :: {:ok, String.t()} | {:error, String.t()}
alias Pleroma.Config
@spec purge(list(String.t())) :: {:ok, String.t()} | {:error, String.t()}
def purge(urls) do
[:media_proxy, :invalidation, :enabled]
|> Config.get()
|> do_purge(urls)
end
defp do_purge(true, urls) do
provider = Config.get([:media_proxy, :invalidation, :provider])
options = Config.get(provider)
provider.purge(urls, options)
end
defp do_purge(_, _), do: :ok
end

View file

@ -0,0 +1,40 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.MediaProxy.Invalidation.Http do
@moduledoc false
@behaviour Pleroma.Web.MediaProxy.Invalidation
require Logger
@impl Pleroma.Web.MediaProxy.Invalidation
def purge(urls, opts) do
method = Map.get(opts, :method, :purge)
headers = Map.get(opts, :headers, [])
options = Map.get(opts, :options, [])
Logger.debug("Running cache purge: #{inspect(urls)}")
Enum.each(urls, fn url ->
with {:error, error} <- do_purge(method, url, headers, options) do
Logger.error("Error while cache purge: url - #{url}, error: #{inspect(error)}")
end
end)
{:ok, "success"}
end
defp do_purge(method, url, headers, options) do
case Pleroma.HTTP.request(method, url, "", headers, options) do
{:ok, %{status: status} = env} when 400 <= status and status < 500 ->
{:error, env}
{:error, error} = error ->
error
_ ->
{:ok, "success"}
end
end
end

View file

@ -0,0 +1,41 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.MediaProxy.Invalidation.Script do
@moduledoc false
@behaviour Pleroma.Web.MediaProxy.Invalidation
require Logger
@impl Pleroma.Web.MediaProxy.Invalidation
def purge(urls, %{script_path: script_path} = _options) do
args =
urls
|> List.wrap()
|> Enum.uniq()
|> Enum.join(" ")
path = Path.expand(script_path)
Logger.debug("Running cache purge: #{inspect(urls)}, #{path}")
case do_purge(path, [args]) do
{result, exit_status} when exit_status > 0 ->
Logger.error("Error while cache purge: #{inspect(result)}")
{:error, inspect(result)}
_ ->
{:ok, "success"}
end
end
def purge(_, _), do: {:error, "not found script path"}
defp do_purge(path, args) do
System.cmd(path, args)
rescue
error -> {inspect(error), 1}
end
end

View file

@ -27,8 +27,20 @@ defmodule Pleroma.Workers.AttachmentsCleanupWorker do
uploader = Pleroma.Config.get([Pleroma.Upload, :uploader])
prefix =
case Pleroma.Config.get([Pleroma.Upload, :base_url]) do
nil -> "media"
_ -> ""
end
base_url =
String.trim_trailing(
Pleroma.Config.get([Pleroma.Upload, :base_url], Pleroma.Web.base_url()),
"/"
)
# find all objects for copies of the attachments, name and actor doesn't matter here
delete_ids =
object_ids_and_hrefs =
from(o in Object,
where:
fragment(
@ -67,29 +79,28 @@ defmodule Pleroma.Workers.AttachmentsCleanupWorker do
|> Enum.map(fn {href, %{id: id, count: count}} ->
# only delete files that have single instance
with 1 <- count do
prefix =
case Pleroma.Config.get([Pleroma.Upload, :base_url]) do
nil -> "media"
_ -> ""
end
href
|> String.trim_leading("#{base_url}/#{prefix}")
|> uploader.delete_file()
base_url =
String.trim_trailing(
Pleroma.Config.get([Pleroma.Upload, :base_url], Pleroma.Web.base_url()),
"/"
)
file_path = String.trim_leading(href, "#{base_url}/#{prefix}")
uploader.delete_file(file_path)
{id, href}
else
_ -> {id, nil}
end
id
end)
from(o in Object, where: o.id in ^delete_ids)
object_ids = Enum.map(object_ids_and_hrefs, fn {id, _} -> id end)
from(o in Object, where: o.id in ^object_ids)
|> Repo.delete_all()
object_ids_and_hrefs
|> Enum.filter(fn {_, href} -> not is_nil(href) end)
|> Enum.map(&elem(&1, 1))
|> Pleroma.Web.MediaProxy.Invalidation.purge()
{:ok, :success}
end
def perform(%{"op" => "cleanup_attachments", "object" => _object}, _job), do: :ok
def perform(%{"op" => "cleanup_attachments", "object" => _object}, _job), do: {:ok, :skip}
end

View file

@ -0,0 +1,35 @@
defmodule Pleroma.Web.MediaProxy.Invalidation.HttpTest do
use ExUnit.Case
alias Pleroma.Web.MediaProxy.Invalidation
import ExUnit.CaptureLog
import Tesla.Mock
test "logs hasn't error message when request is valid" do
mock(fn
%{method: :purge, url: "http://example.com/media/example.jpg"} ->
%Tesla.Env{status: 200}
end)
refute capture_log(fn ->
assert Invalidation.Http.purge(
["http://example.com/media/example.jpg"],
%{}
) == {:ok, "success"}
end) =~ "Error while cache purge"
end
test "it write error message in logs when request invalid" do
mock(fn
%{method: :purge, url: "http://example.com/media/example1.jpg"} ->
%Tesla.Env{status: 404}
end)
assert capture_log(fn ->
assert Invalidation.Http.purge(
["http://example.com/media/example1.jpg"],
%{}
) == {:ok, "success"}
end) =~ "Error while cache purge: url - http://example.com/media/example1.jpg"
end
end

View file

@ -0,0 +1,20 @@
defmodule Pleroma.Web.MediaProxy.Invalidation.ScriptTest do
use ExUnit.Case
alias Pleroma.Web.MediaProxy.Invalidation
import ExUnit.CaptureLog
test "it logger error when script not found" do
assert capture_log(fn ->
assert Invalidation.Script.purge(
["http://example.com/media/example.jpg"],
%{script_path: "./example"}
) == {:error, "\"%ErlangError{original: :enoent}\""}
end) =~ "Error while cache purge: \"%ErlangError{original: :enoent}\""
assert Invalidation.Script.purge(
["http://example.com/media/example.jpg"],
%{}
) == {:error, "not found script path"}
end
end