From 858fd01c012a48928b55999e8209371a5049c3e6 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 24 Jul 2024 15:40:15 -0400 Subject: [PATCH 1/7] Pleroma.HTTP: permit passing through custom Tesla Middlware for requests --- lib/pleroma/http.ex | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index ec837e509..c11317850 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -68,7 +68,9 @@ defmodule Pleroma.HTTP do adapter = Application.get_env(:tesla, :adapter) - client = Tesla.client(adapter_middlewares(adapter), adapter) + extra_middleware = options[:tesla_middleware] || [] + + client = Tesla.client(adapter_middlewares(adapter, extra_middleware), adapter) maybe_limit( fn -> @@ -102,20 +104,21 @@ defmodule Pleroma.HTTP do fun.() end - defp adapter_middlewares(Tesla.Adapter.Gun) do - [Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.ConnectionPool] + defp adapter_middlewares(Tesla.Adapter.Gun, extra_middleware) do + [Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.ConnectionPool] ++ + extra_middleware end - defp adapter_middlewares({Tesla.Adapter.Finch, _}) do - [Tesla.Middleware.FollowRedirects] + defp adapter_middlewares({Tesla.Adapter.Finch, _}, extra_middleware) do + [Tesla.Middleware.FollowRedirects] ++ extra_middleware end - defp adapter_middlewares(_) do + defp adapter_middlewares(_, extra_middleware) do if Pleroma.Config.get(:env) == :test do # Emulate redirects in test env, which are handled by adapters in other environments [Tesla.Middleware.FollowRedirects] else - [] + extra_middleware end end end From 731f7b87d24bd58d5349c7d1f564cd73dbf59aa9 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 24 Jul 2024 15:42:50 -0400 Subject: [PATCH 2/7] Pad RichMediaWorker timeout to be 2s longer than the Rich Media HTTP timeout --- config/config.exs | 2 +- lib/pleroma/web/rich_media/helpers.ex | 5 ++++- lib/pleroma/workers/rich_media_worker.ex | 10 +++++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/config/config.exs b/config/config.exs index 044f951f6..a370405e0 100644 --- a/config/config.exs +++ b/config/config.exs @@ -448,7 +448,7 @@ config :pleroma, :rich_media, Pleroma.Web.RichMedia.Parsers.TwitterCard, Pleroma.Web.RichMedia.Parsers.OEmbed ], - failure_backoff: 60_000, + timeout: 5_000, ttl_setters: [ Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl, Pleroma.Web.RichMedia.Parser.TTL.Opengraph diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index fba23c657..e2889b351 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -69,9 +69,12 @@ defmodule Pleroma.Web.RichMedia.Helpers do end defp http_options do + timeout = Config.get!([:rich_media, :timeout]) + [ pool: :rich_media, - max_body: Config.get([:rich_media, :max_body], 5_000_000) + max_body: Config.get([:rich_media, :max_body], 5_000_000), + tesla_middleware: [{Tesla.Middleware.Timeout, timeout: timeout}] ] end end diff --git a/lib/pleroma/workers/rich_media_worker.ex b/lib/pleroma/workers/rich_media_worker.ex index 30f9d9e9e..0a1c6d58c 100644 --- a/lib/pleroma/workers/rich_media_worker.ex +++ b/lib/pleroma/workers/rich_media_worker.ex @@ -3,6 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Workers.RichMediaWorker do + alias Pleroma.Config alias Pleroma.Web.RichMedia.Backfill alias Pleroma.Web.RichMedia.Card @@ -31,6 +32,13 @@ defmodule Pleroma.Workers.RichMediaWorker do end end + # There is timeout value enforced by Tesla.Middleware.Timeout + # which can be found in the RichMedia.Helpers module to allow us to detect + # a slow/infinite data stream and insert a negative cache entry for the URL + # We pad it by 2 seconds to be certain a slow connection is detected and we + # can inject a negative cache entry for the URL @impl Oban.Worker - def timeout(_job), do: :timer.seconds(5) + def timeout(_job) do + Config.get!([:rich_media, :timeout]) + :timer.seconds(2) + end end From 5a62868106465bd30be11922b4ff3b11b3c174aa Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 24 Jul 2024 15:43:45 -0400 Subject: [PATCH 3/7] Consider errors during HTTP GET and HEAD to be unrecoverable and insert a negative cache entry This is for a normal HTTP error response or timeout while receiving the data. A hard error from a process crash, DNS lookup failure, etc should produce a different response than {:ok, %Tesla.Env{}} and the request/job will be retryable. --- lib/pleroma/web/rich_media/backfill.ex | 6 +----- lib/pleroma/workers/rich_media_worker.ex | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/lib/pleroma/web/rich_media/backfill.ex b/lib/pleroma/web/rich_media/backfill.ex index a66422e71..cf638fa29 100644 --- a/lib/pleroma/web/rich_media/backfill.ex +++ b/lib/pleroma/web/rich_media/backfill.ex @@ -36,13 +36,9 @@ defmodule Pleroma.Web.RichMedia.Backfill do :ok {:error, type} = error - when type in [:invalid_metadata, :body_too_large, :content_type, :validate] -> + when type in [:invalid_metadata, :body_too_large, :content_type, :validate, :get, :head] -> negative_cache(url_hash) error - - {:error, type} = error - when type in [:get, :head] -> - error end end diff --git a/lib/pleroma/workers/rich_media_worker.ex b/lib/pleroma/workers/rich_media_worker.ex index 0a1c6d58c..2ebf42d4f 100644 --- a/lib/pleroma/workers/rich_media_worker.ex +++ b/lib/pleroma/workers/rich_media_worker.ex @@ -20,13 +20,9 @@ defmodule Pleroma.Workers.RichMediaWorker do :ok {:error, type} - when type in [:invalid_metadata, :body_too_large, :content_type, :validate] -> + when type in [:invalid_metadata, :body_too_large, :content_type, :validate, :get, :head] -> {:cancel, type} - {:error, type} - when type in [:get, :head] -> - {:error, type} - error -> {:error, error} end From 97d488aea3ce75e52d4e6ba9a3b5e4447b535879 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 24 Jul 2024 15:45:35 -0400 Subject: [PATCH 4/7] Fix RichMedia negative cache entries The negative cache entry was a nil value, but that is an expected response when the cache is missing an entry so it didn't work as intended. --- lib/pleroma/web/rich_media/backfill.ex | 2 +- test/pleroma/web/rich_media/backfill_test.exs | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 test/pleroma/web/rich_media/backfill_test.exs diff --git a/lib/pleroma/web/rich_media/backfill.ex b/lib/pleroma/web/rich_media/backfill.ex index cf638fa29..1cd90629f 100644 --- a/lib/pleroma/web/rich_media/backfill.ex +++ b/lib/pleroma/web/rich_media/backfill.ex @@ -64,5 +64,5 @@ defmodule Pleroma.Web.RichMedia.Backfill do defp warm_cache(key, val), do: @cachex.put(:rich_media_cache, key, val) defp negative_cache(key, ttl \\ :timer.minutes(15)), - do: @cachex.put(:rich_media_cache, key, nil, ttl: ttl) + do: @cachex.put(:rich_media_cache, key, :error, ttl: ttl) end diff --git a/test/pleroma/web/rich_media/backfill_test.exs b/test/pleroma/web/rich_media/backfill_test.exs new file mode 100644 index 000000000..6d221fcf5 --- /dev/null +++ b/test/pleroma/web/rich_media/backfill_test.exs @@ -0,0 +1,26 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.BackfillTest do + use Pleroma.DataCase + + alias Pleroma.Web.RichMedia.Backfill + alias Pleroma.Web.RichMedia.Card + + import Mox + + setup_all do: clear_config([:rich_media, :enabled], true) + + test "sets a negative cache entry for an error" do + url = "https://bad.example.com/" + url_hash = Card.url_to_hash(url) + + Tesla.Mock.mock(fn %{url: ^url} -> :error end) + + Pleroma.CachexMock + |> expect(:put, fn :rich_media_cache, ^url_hash, :error, ttl: _ -> {:ok, true} end) + + Backfill.run(%{"url" => url}) + end +end From 8c5a68a62e0c6615783d1c3bab549a185a650fd6 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 24 Jul 2024 15:52:23 -0400 Subject: [PATCH 5/7] Increase Oban.Pruner max_age to 15 mins --- config/config.exs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/config/config.exs b/config/config.exs index a370405e0..30626d4a1 100644 --- a/config/config.exs +++ b/config/config.exs @@ -580,6 +580,8 @@ config :pleroma, Pleroma.User, ], email_blacklist: [] +# The Pruner :max_age must be longer than Worker :unique +# value or it cannot enforce uniqueness. config :pleroma, Oban, repo: Pleroma.Repo, log: false, @@ -593,7 +595,7 @@ config :pleroma, Oban, search_indexing: [limit: 10, paused: true], slow: 5 ], - plugins: [Oban.Plugins.Pruner], + plugins: [{Oban.Plugins.Pruner, max_age: 900}], crontab: [ {"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker}, {"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker} From 2314ff59811a1a258bcfb7e9888dbea82405e23a Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 24 Jul 2024 15:55:30 -0400 Subject: [PATCH 6/7] Harden Rich Media parsing against very slow or malicious URLs --- changelog.d/rich-media-hardening.fix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/rich-media-hardening.fix diff --git a/changelog.d/rich-media-hardening.fix b/changelog.d/rich-media-hardening.fix new file mode 100644 index 000000000..ff3dc81f3 --- /dev/null +++ b/changelog.d/rich-media-hardening.fix @@ -0,0 +1 @@ +Harden Rich Media parsing against very slow or malicious URLs From 659891921371fd3a8788089fbc96a9158f07f289 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 24 Jul 2024 16:16:37 -0400 Subject: [PATCH 7/7] Document the new timeout setting --- config/description.exs | 6 +++--- docs/configuration/cheatsheet.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/description.exs b/config/description.exs index b7d86dc63..10a6e9cdf 100644 --- a/config/description.exs +++ b/config/description.exs @@ -2101,11 +2101,11 @@ config :pleroma, :config_description, [ ] }, %{ - key: :failure_backoff, + key: :timeout, type: :integer, description: - "Amount of milliseconds after request failure, during which the request will not be retried.", - suggestions: [60_000] + "Amount of milliseconds after which the HTTP request is forcibly terminated.", + suggestions: [5_000] } ] }, diff --git a/docs/configuration/cheatsheet.md b/docs/configuration/cheatsheet.md index 9c5659988..ab0d1c78d 100644 --- a/docs/configuration/cheatsheet.md +++ b/docs/configuration/cheatsheet.md @@ -436,7 +436,7 @@ config :pleroma, Pleroma.Web.MediaProxy.Invalidation.Http, * `ignore_hosts`: list of hosts which will be ignored by the metadata parser. For example `["accounts.google.com", "xss.website"]`, defaults to `[]`. * `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"]. * `parsers`: list of Rich Media parsers. -* `failure_backoff`: Amount of milliseconds after request failure, during which the request will not be retried. +* `timeout`: Amount of milliseconds after which the HTTP request is forcibly terminated. ## HTTP server