mirror of
https://git.pleroma.social/pleroma/pleroma.git
synced 2024-12-23 00:26:30 +00:00
Merge branch 'rich-media-db' into 'develop'
RichMedia refactor See merge request pleroma/pleroma!4057
This commit is contained in:
commit
8eea4f58c7
30 changed files with 993 additions and 582 deletions
1
changelog.d/card-endpoint.remove
Normal file
1
changelog.d/card-endpoint.remove
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Mastodon API: Remove deprecated GET /api/v1/statuses/:id/card endpoint https://github.com/mastodon/mastodon/pull/11213
|
1
changelog.d/rich_media_refactor.change
Normal file
1
changelog.d/rich_media_refactor.change
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Refactored Rich Media to cache the content in the database. Fetching operations that could block status rendering have been eliminated.
|
|
@ -428,7 +428,11 @@ config :pleroma, :rich_media,
|
||||||
Pleroma.Web.RichMedia.Parsers.OEmbed
|
Pleroma.Web.RichMedia.Parsers.OEmbed
|
||||||
],
|
],
|
||||||
failure_backoff: 60_000,
|
failure_backoff: 60_000,
|
||||||
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
|
ttl_setters: [
|
||||||
|
Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl,
|
||||||
|
Pleroma.Web.RichMedia.Parser.TTL.Opengraph
|
||||||
|
],
|
||||||
|
max_body: 5_000_000
|
||||||
|
|
||||||
config :pleroma, :media_proxy,
|
config :pleroma, :media_proxy,
|
||||||
enabled: false,
|
enabled: false,
|
||||||
|
@ -575,7 +579,8 @@ config :pleroma, Oban,
|
||||||
attachments_cleanup: 1,
|
attachments_cleanup: 1,
|
||||||
new_users_digest: 1,
|
new_users_digest: 1,
|
||||||
mute_expire: 5,
|
mute_expire: 5,
|
||||||
search_indexing: 10
|
search_indexing: 10,
|
||||||
|
rich_media_expiration: 2
|
||||||
],
|
],
|
||||||
plugins: [Oban.Plugins.Pruner],
|
plugins: [Oban.Plugins.Pruner],
|
||||||
crontab: [
|
crontab: [
|
||||||
|
|
|
@ -61,7 +61,8 @@ config :tesla, adapter: Tesla.Mock
|
||||||
config :pleroma, :rich_media,
|
config :pleroma, :rich_media,
|
||||||
enabled: false,
|
enabled: false,
|
||||||
ignore_hosts: [],
|
ignore_hosts: [],
|
||||||
ignore_tld: ["local", "localdomain", "lan"]
|
ignore_tld: ["local", "localdomain", "lan"],
|
||||||
|
max_body: 2_000_000
|
||||||
|
|
||||||
config :pleroma, :instance,
|
config :pleroma, :instance,
|
||||||
multi_factor_authentication: [
|
multi_factor_authentication: [
|
||||||
|
@ -174,6 +175,8 @@ config :pleroma, Pleroma.Uploaders.Uploader, timeout: 1_000
|
||||||
|
|
||||||
config :pleroma, Pleroma.Emoji.Loader, test_emoji: true
|
config :pleroma, Pleroma.Emoji.Loader, test_emoji: true
|
||||||
|
|
||||||
|
config :pleroma, Pleroma.Web.RichMedia.Backfill, provider: Pleroma.Web.RichMedia.Backfill
|
||||||
|
|
||||||
if File.exists?("./config/test.secret.exs") do
|
if File.exists?("./config/test.secret.exs") do
|
||||||
import_config "test.secret.exs"
|
import_config "test.secret.exs"
|
||||||
else
|
else
|
||||||
|
|
|
@ -65,20 +65,16 @@ defmodule Pleroma.HTML do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@spec extract_first_external_url_from_object(Pleroma.Object.t()) ::
|
@spec extract_first_external_url_from_object(Pleroma.Object.t()) :: String.t() | nil
|
||||||
{:ok, String.t()} | {:error, :no_content}
|
|
||||||
def extract_first_external_url_from_object(%{data: %{"content" => content}})
|
def extract_first_external_url_from_object(%{data: %{"content" => content}})
|
||||||
when is_binary(content) do
|
when is_binary(content) do
|
||||||
url =
|
content
|
||||||
content
|
|> Floki.parse_fragment!()
|
||||||
|> Floki.parse_fragment!()
|
|> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
|
||||||
|> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
|
|> Enum.take(1)
|
||||||
|> Enum.take(1)
|
|> Floki.attribute("href")
|
||||||
|> Floki.attribute("href")
|
|> Enum.at(0)
|
||||||
|> Enum.at(0)
|
|
||||||
|
|
||||||
{:ok, url}
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_first_external_url_from_object(_), do: {:error, :no_content}
|
def extract_first_external_url_from_object(_), do: nil
|
||||||
end
|
end
|
||||||
|
|
|
@ -147,9 +147,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
|
||||||
# Splice in the child object if we have one.
|
# Splice in the child object if we have one.
|
||||||
activity = Maps.put_if_present(activity, :object, object)
|
activity = Maps.put_if_present(activity, :object, object)
|
||||||
|
|
||||||
ConcurrentLimiter.limit(Pleroma.Web.RichMedia.Helpers, fn ->
|
Pleroma.Web.RichMedia.Card.get_by_activity(activity)
|
||||||
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)
|
|
||||||
end)
|
|
||||||
|
|
||||||
# Add local posts to search index
|
# Add local posts to search index
|
||||||
if local, do: Pleroma.Search.add_to_index(activity)
|
if local, do: Pleroma.Search.add_to_index(activity)
|
||||||
|
@ -177,7 +175,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
|
||||||
id: "pleroma:fakeid"
|
id: "pleroma:fakeid"
|
||||||
}
|
}
|
||||||
|
|
||||||
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
Pleroma.Web.RichMedia.Card.get_by_activity(activity)
|
||||||
{:ok, activity}
|
{:ok, activity}
|
||||||
|
|
||||||
{:remote_limit_pass, _} ->
|
{:remote_limit_pass, _} ->
|
||||||
|
|
|
@ -227,9 +227,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
ConcurrentLimiter.limit(Pleroma.Web.RichMedia.Helpers, fn ->
|
Pleroma.Web.RichMedia.Card.get_by_activity(activity)
|
||||||
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)
|
|
||||||
end)
|
|
||||||
|
|
||||||
Pleroma.Search.add_to_index(Map.put(activity, :object, object))
|
Pleroma.Search.add_to_index(Map.put(activity, :object, object))
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,6 @@ defmodule Pleroma.Web.MastodonAPI.StatusController do
|
||||||
when action in [
|
when action in [
|
||||||
:index,
|
:index,
|
||||||
:show,
|
:show,
|
||||||
:card,
|
|
||||||
:context,
|
:context,
|
||||||
:show_history,
|
:show_history,
|
||||||
:show_source
|
:show_source
|
||||||
|
@ -473,21 +472,6 @@ defmodule Pleroma.Web.MastodonAPI.StatusController do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@doc "GET /api/v1/statuses/:id/card"
|
|
||||||
@deprecated "https://github.com/tootsuite/mastodon/pull/11213"
|
|
||||||
def card(
|
|
||||||
%{assigns: %{user: user}, private: %{open_api_spex: %{params: %{id: status_id}}}} = conn,
|
|
||||||
_
|
|
||||||
) do
|
|
||||||
with %Activity{} = activity <- Activity.get_by_id(status_id),
|
|
||||||
true <- Visibility.visible_for_user?(activity, user) do
|
|
||||||
data = Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
|
||||||
render(conn, "card.json", data)
|
|
||||||
else
|
|
||||||
_ -> render_error(conn, :not_found, "Record not found")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
@doc "GET /api/v1/statuses/:id/favourited_by"
|
@doc "GET /api/v1/statuses/:id/favourited_by"
|
||||||
def favourited_by(
|
def favourited_by(
|
||||||
%{assigns: %{user: user}, private: %{open_api_spex: %{params: %{id: id}}}} = conn,
|
%{assigns: %{user: user}, private: %{open_api_spex: %{params: %{id: id}}}} = conn,
|
||||||
|
|
|
@ -21,6 +21,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
|
||||||
alias Pleroma.Web.MastodonAPI.StatusView
|
alias Pleroma.Web.MastodonAPI.StatusView
|
||||||
alias Pleroma.Web.MediaProxy
|
alias Pleroma.Web.MediaProxy
|
||||||
alias Pleroma.Web.PleromaAPI.EmojiReactionController
|
alias Pleroma.Web.PleromaAPI.EmojiReactionController
|
||||||
|
alias Pleroma.Web.RichMedia.Card
|
||||||
|
|
||||||
import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2]
|
import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2]
|
||||||
|
|
||||||
|
@ -29,9 +30,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
|
||||||
# pagination is restricted to 40 activities at a time
|
# pagination is restricted to 40 activities at a time
|
||||||
defp fetch_rich_media_for_activities(activities) do
|
defp fetch_rich_media_for_activities(activities) do
|
||||||
Enum.each(activities, fn activity ->
|
Enum.each(activities, fn activity ->
|
||||||
spawn(fn ->
|
spawn(fn -> Card.get_by_activity(activity) end)
|
||||||
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
|
||||||
end)
|
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -113,9 +112,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
|
||||||
# To do: check AdminAPIControllerTest on the reasons behind nil activities in the list
|
# To do: check AdminAPIControllerTest on the reasons behind nil activities in the list
|
||||||
activities = Enum.filter(opts.activities, & &1)
|
activities = Enum.filter(opts.activities, & &1)
|
||||||
|
|
||||||
# Start fetching rich media before doing anything else, so that later calls to get the cards
|
# Start prefetching rich media before doing anything else
|
||||||
# only block for timeout in the worst case, as opposed to
|
|
||||||
# length(activities_with_links) * timeout
|
|
||||||
fetch_rich_media_for_activities(activities)
|
fetch_rich_media_for_activities(activities)
|
||||||
replied_to_activities = get_replied_to_activities(activities)
|
replied_to_activities = get_replied_to_activities(activities)
|
||||||
quoted_activities = get_quoted_activities(activities)
|
quoted_activities = get_quoted_activities(activities)
|
||||||
|
@ -364,7 +361,11 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
|
||||||
|
|
||||||
summary = object.data["summary"] || ""
|
summary = object.data["summary"] || ""
|
||||||
|
|
||||||
card = render("card.json", Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity))
|
card =
|
||||||
|
case Card.get_by_activity(activity) do
|
||||||
|
%Card{} = result -> render("card.json", result)
|
||||||
|
_ -> nil
|
||||||
|
end
|
||||||
|
|
||||||
url =
|
url =
|
||||||
if user.local do
|
if user.local do
|
||||||
|
@ -567,15 +568,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
def render("card.json", %{rich_media: rich_media, page_url: page_url}) do
|
def render("card.json", %Card{fields: rich_media}) do
|
||||||
page_url_data = URI.parse(page_url)
|
page_url_data = URI.parse(rich_media["url"])
|
||||||
|
|
||||||
page_url_data =
|
|
||||||
if is_binary(rich_media["url"]) do
|
|
||||||
URI.merge(page_url_data, URI.parse(rich_media["url"]))
|
|
||||||
else
|
|
||||||
page_url_data
|
|
||||||
end
|
|
||||||
|
|
||||||
page_url = page_url_data |> to_string
|
page_url = page_url_data |> to_string
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ defmodule Pleroma.Web.PleromaAPI.Chat.MessageReferenceView do
|
||||||
alias Pleroma.User
|
alias Pleroma.User
|
||||||
alias Pleroma.Web.CommonAPI.Utils
|
alias Pleroma.Web.CommonAPI.Utils
|
||||||
alias Pleroma.Web.MastodonAPI.StatusView
|
alias Pleroma.Web.MastodonAPI.StatusView
|
||||||
|
alias Pleroma.Web.RichMedia.Card
|
||||||
|
|
||||||
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
||||||
|
|
||||||
|
@ -23,6 +24,12 @@ defmodule Pleroma.Web.PleromaAPI.Chat.MessageReferenceView do
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
) do
|
) do
|
||||||
|
card =
|
||||||
|
case Card.get_by_object(object) do
|
||||||
|
%Card{} = card_data -> StatusView.render("card.json", card_data)
|
||||||
|
_ -> nil
|
||||||
|
end
|
||||||
|
|
||||||
%{
|
%{
|
||||||
id: id |> to_string(),
|
id: id |> to_string(),
|
||||||
content: chat_message["content"],
|
content: chat_message["content"],
|
||||||
|
@ -34,11 +41,7 @@ defmodule Pleroma.Web.PleromaAPI.Chat.MessageReferenceView do
|
||||||
chat_message["attachment"] &&
|
chat_message["attachment"] &&
|
||||||
StatusView.render("attachment.json", attachment: chat_message["attachment"]),
|
StatusView.render("attachment.json", attachment: chat_message["attachment"]),
|
||||||
unread: unread,
|
unread: unread,
|
||||||
card:
|
card: card
|
||||||
StatusView.render(
|
|
||||||
"card.json",
|
|
||||||
Pleroma.Web.RichMedia.Helpers.fetch_data_for_object(object)
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|> put_idempotency_key()
|
|> put_idempotency_key()
|
||||||
end
|
end
|
||||||
|
|
101
lib/pleroma/web/rich_media/backfill.ex
Normal file
101
lib/pleroma/web/rich_media/backfill.ex
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
# Pleroma: A lightweight social networking server
|
||||||
|
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Pleroma.Web.RichMedia.Backfill do
|
||||||
|
alias Pleroma.Web.RichMedia.Card
|
||||||
|
alias Pleroma.Web.RichMedia.Parser
|
||||||
|
alias Pleroma.Web.RichMedia.Parser.TTL
|
||||||
|
alias Pleroma.Workers.RichMediaExpirationWorker
|
||||||
|
|
||||||
|
require Logger
|
||||||
|
|
||||||
|
@backfiller Pleroma.Config.get([__MODULE__, :provider], Pleroma.Web.RichMedia.Backfill.Task)
|
||||||
|
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
||||||
|
@max_attempts 3
|
||||||
|
@retry 5_000
|
||||||
|
|
||||||
|
def start(%{url: url} = args) when is_binary(url) do
|
||||||
|
url_hash = Card.url_to_hash(url)
|
||||||
|
|
||||||
|
args =
|
||||||
|
args
|
||||||
|
|> Map.put(:attempt, 1)
|
||||||
|
|> Map.put(:url_hash, url_hash)
|
||||||
|
|
||||||
|
@backfiller.run(args)
|
||||||
|
end
|
||||||
|
|
||||||
|
def run(%{url: url, url_hash: url_hash, attempt: attempt} = args)
|
||||||
|
when attempt <= @max_attempts do
|
||||||
|
case Parser.parse(url) do
|
||||||
|
{:ok, fields} ->
|
||||||
|
{:ok, card} = Card.create(url, fields)
|
||||||
|
|
||||||
|
maybe_schedule_expiration(url, fields)
|
||||||
|
|
||||||
|
if Map.has_key?(args, :activity_id) do
|
||||||
|
stream_update(args)
|
||||||
|
end
|
||||||
|
|
||||||
|
warm_cache(url_hash, card)
|
||||||
|
|
||||||
|
{:error, {:invalid_metadata, fields}} ->
|
||||||
|
Logger.debug("Rich media incomplete or invalid metadata for #{url}: #{inspect(fields)}")
|
||||||
|
negative_cache(url_hash)
|
||||||
|
|
||||||
|
{:error, :body_too_large} ->
|
||||||
|
Logger.error("Rich media error for #{url}: :body_too_large")
|
||||||
|
negative_cache(url_hash)
|
||||||
|
|
||||||
|
{:error, {:content_type, type}} ->
|
||||||
|
Logger.debug("Rich media error for #{url}: :content_type is #{type}")
|
||||||
|
negative_cache(url_hash)
|
||||||
|
|
||||||
|
e ->
|
||||||
|
Logger.debug("Rich media error for #{url}: #{inspect(e)}")
|
||||||
|
|
||||||
|
:timer.sleep(@retry * attempt)
|
||||||
|
|
||||||
|
run(%{args | attempt: attempt + 1})
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def run(%{url: url, url_hash: url_hash}) do
|
||||||
|
Logger.debug("Rich media failure for #{url}")
|
||||||
|
|
||||||
|
negative_cache(url_hash, :timer.minutes(15))
|
||||||
|
end
|
||||||
|
|
||||||
|
defp maybe_schedule_expiration(url, fields) do
|
||||||
|
case TTL.process(fields, url) do
|
||||||
|
{:ok, ttl} when is_number(ttl) ->
|
||||||
|
timestamp = DateTime.from_unix!(ttl)
|
||||||
|
|
||||||
|
RichMediaExpirationWorker.new(%{"url" => url}, scheduled_at: timestamp)
|
||||||
|
|> Oban.insert()
|
||||||
|
|
||||||
|
_ ->
|
||||||
|
:ok
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp stream_update(%{activity_id: activity_id}) do
|
||||||
|
Pleroma.Activity.get_by_id(activity_id)
|
||||||
|
|> Pleroma.Activity.normalize()
|
||||||
|
|> Pleroma.Web.ActivityPub.ActivityPub.stream_out()
|
||||||
|
end
|
||||||
|
|
||||||
|
defp warm_cache(key, val), do: @cachex.put(:rich_media_cache, key, val)
|
||||||
|
defp negative_cache(key, ttl \\ nil), do: @cachex.put(:rich_media_cache, key, nil, ttl: ttl)
|
||||||
|
end
|
||||||
|
|
||||||
|
defmodule Pleroma.Web.RichMedia.Backfill.Task do
|
||||||
|
alias Pleroma.Web.RichMedia.Backfill
|
||||||
|
|
||||||
|
def run(args) do
|
||||||
|
Task.Supervisor.start_child(Pleroma.TaskSupervisor, Backfill, :run, [args],
|
||||||
|
name: {:global, {:rich_media, args.url_hash}}
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
157
lib/pleroma/web/rich_media/card.ex
Normal file
157
lib/pleroma/web/rich_media/card.ex
Normal file
|
@ -0,0 +1,157 @@
|
||||||
|
defmodule Pleroma.Web.RichMedia.Card do
|
||||||
|
use Ecto.Schema
|
||||||
|
import Ecto.Changeset
|
||||||
|
import Ecto.Query
|
||||||
|
|
||||||
|
alias Pleroma.Activity
|
||||||
|
alias Pleroma.HTML
|
||||||
|
alias Pleroma.Object
|
||||||
|
alias Pleroma.Repo
|
||||||
|
alias Pleroma.Web.RichMedia.Backfill
|
||||||
|
alias Pleroma.Web.RichMedia.Parser
|
||||||
|
|
||||||
|
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
||||||
|
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
|
||||||
|
|
||||||
|
@type t :: %__MODULE__{}
|
||||||
|
|
||||||
|
schema "rich_media_card" do
|
||||||
|
field(:url_hash, :binary)
|
||||||
|
field(:fields, :map)
|
||||||
|
|
||||||
|
timestamps()
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc false
|
||||||
|
def changeset(card, attrs) do
|
||||||
|
card
|
||||||
|
|> cast(attrs, [:url_hash, :fields])
|
||||||
|
|> validate_required([:url_hash, :fields])
|
||||||
|
|> unique_constraint(:url_hash)
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec create(String.t(), map()) :: {:ok, t()}
|
||||||
|
def create(url, fields) do
|
||||||
|
url_hash = url_to_hash(url)
|
||||||
|
|
||||||
|
fields = Map.put_new(fields, "url", url)
|
||||||
|
|
||||||
|
%__MODULE__{}
|
||||||
|
|> changeset(%{url_hash: url_hash, fields: fields})
|
||||||
|
|> Repo.insert(on_conflict: {:replace, [:fields]}, conflict_target: :url_hash)
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec delete(String.t()) :: {:ok, Ecto.Schema.t()} | {:error, Ecto.Changeset.t()} | :ok
|
||||||
|
def delete(url) do
|
||||||
|
url_hash = url_to_hash(url)
|
||||||
|
@cachex.del(:rich_media_cache, url_hash)
|
||||||
|
|
||||||
|
case get_by_url(url) do
|
||||||
|
%__MODULE__{} = card -> Repo.delete(card)
|
||||||
|
nil -> :ok
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec get_by_url(String.t() | nil) :: t() | nil | :error
|
||||||
|
def get_by_url(url) when is_binary(url) do
|
||||||
|
if @config_impl.get([:rich_media, :enabled]) do
|
||||||
|
url_hash = url_to_hash(url)
|
||||||
|
|
||||||
|
@cachex.fetch!(:rich_media_cache, url_hash, fn _ ->
|
||||||
|
result =
|
||||||
|
__MODULE__
|
||||||
|
|> where(url_hash: ^url_hash)
|
||||||
|
|> Repo.one()
|
||||||
|
|
||||||
|
case result do
|
||||||
|
%__MODULE__{} = card -> {:commit, card}
|
||||||
|
_ -> {:ignore, nil}
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
else
|
||||||
|
:error
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_by_url(nil), do: nil
|
||||||
|
|
||||||
|
@spec get_or_backfill_by_url(String.t(), map()) :: t() | nil
|
||||||
|
def get_or_backfill_by_url(url, backfill_opts \\ %{}) do
|
||||||
|
case get_by_url(url) do
|
||||||
|
%__MODULE__{} = card ->
|
||||||
|
card
|
||||||
|
|
||||||
|
nil ->
|
||||||
|
backfill_opts = Map.put(backfill_opts, :url, url)
|
||||||
|
|
||||||
|
Backfill.start(backfill_opts)
|
||||||
|
|
||||||
|
nil
|
||||||
|
|
||||||
|
:error ->
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec get_by_object(Object.t()) :: t() | nil | :error
|
||||||
|
def get_by_object(object) do
|
||||||
|
case HTML.extract_first_external_url_from_object(object) do
|
||||||
|
nil -> nil
|
||||||
|
url -> get_or_backfill_by_url(url)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec get_by_activity(Activity.t()) :: t() | nil | :error
|
||||||
|
# Fake/Draft activity
|
||||||
|
def get_by_activity(%Activity{id: "pleroma:fakeid"} = activity) do
|
||||||
|
with %Object{} = object <- Object.normalize(activity, fetch: false),
|
||||||
|
url when not is_nil(url) <- HTML.extract_first_external_url_from_object(object) do
|
||||||
|
case get_by_url(url) do
|
||||||
|
# Cache hit
|
||||||
|
%__MODULE__{} = card ->
|
||||||
|
card
|
||||||
|
|
||||||
|
# Cache miss, but fetch for rendering the Draft
|
||||||
|
_ ->
|
||||||
|
with {:ok, fields} <- Parser.parse(url),
|
||||||
|
{:ok, card} <- create(url, fields) do
|
||||||
|
card
|
||||||
|
else
|
||||||
|
_ -> nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
_ ->
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_by_activity(activity) do
|
||||||
|
with %Object{} = object <- Object.normalize(activity, fetch: false),
|
||||||
|
{_, nil} <- {:cached, get_cached_url(object, activity.id)} do
|
||||||
|
nil
|
||||||
|
else
|
||||||
|
{:cached, url} ->
|
||||||
|
get_or_backfill_by_url(url, %{activity_id: activity.id})
|
||||||
|
|
||||||
|
_ ->
|
||||||
|
:error
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec url_to_hash(String.t()) :: String.t()
|
||||||
|
def url_to_hash(url) do
|
||||||
|
:crypto.hash(:sha256, url) |> Base.encode16(case: :lower)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp get_cached_url(object, activity_id) do
|
||||||
|
key = "URL|#{activity_id}"
|
||||||
|
|
||||||
|
@cachex.fetch!(:scrubber_cache, key, fn _ ->
|
||||||
|
url = HTML.extract_first_external_url_from_object(object)
|
||||||
|
Activity.HTML.add_cache_key_for(activity_id, key)
|
||||||
|
|
||||||
|
{:commit, url}
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
end
|
|
@ -3,65 +3,13 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
defmodule Pleroma.Web.RichMedia.Helpers do
|
defmodule Pleroma.Web.RichMedia.Helpers do
|
||||||
alias Pleroma.Activity
|
alias Pleroma.Config
|
||||||
alias Pleroma.HTML
|
|
||||||
alias Pleroma.Object
|
|
||||||
alias Pleroma.Web.RichMedia.Parser
|
|
||||||
|
|
||||||
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
|
||||||
|
|
||||||
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
|
|
||||||
|
|
||||||
@options [
|
|
||||||
pool: :media,
|
|
||||||
max_body: 2_000_000,
|
|
||||||
recv_timeout: 2_000
|
|
||||||
]
|
|
||||||
|
|
||||||
def fetch_data_for_object(object) do
|
|
||||||
with true <- @config_impl.get([:rich_media, :enabled]),
|
|
||||||
{:ok, page_url} <-
|
|
||||||
HTML.extract_first_external_url_from_object(object),
|
|
||||||
{:ok, rich_media} <- Parser.parse(page_url) do
|
|
||||||
%{page_url: page_url, rich_media: rich_media}
|
|
||||||
else
|
|
||||||
_ -> %{}
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do
|
|
||||||
with true <- @config_impl.get([:rich_media, :enabled]),
|
|
||||||
%Object{} = object <- Object.normalize(activity, fetch: false) do
|
|
||||||
if object.data["fake"] do
|
|
||||||
fetch_data_for_object(object)
|
|
||||||
else
|
|
||||||
key = "URL|#{activity.id}"
|
|
||||||
|
|
||||||
@cachex.fetch!(:scrubber_cache, key, fn _ ->
|
|
||||||
result = fetch_data_for_object(object)
|
|
||||||
|
|
||||||
cond do
|
|
||||||
match?(%{page_url: _, rich_media: _}, result) ->
|
|
||||||
Activity.HTML.add_cache_key_for(activity.id, key)
|
|
||||||
{:commit, result}
|
|
||||||
|
|
||||||
true ->
|
|
||||||
{:ignore, %{}}
|
|
||||||
end
|
|
||||||
end)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
_ -> %{}
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def fetch_data_for_activity(_), do: %{}
|
|
||||||
|
|
||||||
def rich_media_get(url) do
|
def rich_media_get(url) do
|
||||||
headers = [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}]
|
headers = [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}]
|
||||||
|
|
||||||
head_check =
|
head_check =
|
||||||
case Pleroma.HTTP.head(url, headers, @options) do
|
case Pleroma.HTTP.head(url, headers, http_options()) do
|
||||||
# If the HEAD request didn't reach the server for whatever reason,
|
# If the HEAD request didn't reach the server for whatever reason,
|
||||||
# we assume the GET that comes right after won't either
|
# we assume the GET that comes right after won't either
|
||||||
{:error, _} = e ->
|
{:error, _} = e ->
|
||||||
|
@ -76,7 +24,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|
||||||
:ok
|
:ok
|
||||||
end
|
end
|
||||||
|
|
||||||
with :ok <- head_check, do: Pleroma.HTTP.get(url, headers, @options)
|
with :ok <- head_check, do: Pleroma.HTTP.get(url, headers, http_options())
|
||||||
end
|
end
|
||||||
|
|
||||||
defp check_content_type(headers) do
|
defp check_content_type(headers) do
|
||||||
|
@ -92,12 +40,13 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@max_body @options[:max_body]
|
|
||||||
defp check_content_length(headers) do
|
defp check_content_length(headers) do
|
||||||
|
max_body = Keyword.get(http_options(), :max_body)
|
||||||
|
|
||||||
case List.keyfind(headers, "content-length", 0) do
|
case List.keyfind(headers, "content-length", 0) do
|
||||||
{_, maybe_content_length} ->
|
{_, maybe_content_length} ->
|
||||||
case Integer.parse(maybe_content_length) do
|
case Integer.parse(maybe_content_length) do
|
||||||
{content_length, ""} when content_length <= @max_body -> :ok
|
{content_length, ""} when content_length <= max_body -> :ok
|
||||||
{_, ""} -> {:error, :body_too_large}
|
{_, ""} -> {:error, :body_too_large}
|
||||||
_ -> :ok
|
_ -> :ok
|
||||||
end
|
end
|
||||||
|
@ -106,4 +55,11 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|
||||||
:ok
|
:ok
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
defp http_options do
|
||||||
|
[
|
||||||
|
pool: :media,
|
||||||
|
max_body: Config.get([:rich_media, :max_body], 5_000_000)
|
||||||
|
]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -5,134 +5,28 @@
|
||||||
defmodule Pleroma.Web.RichMedia.Parser do
|
defmodule Pleroma.Web.RichMedia.Parser do
|
||||||
require Logger
|
require Logger
|
||||||
|
|
||||||
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
|
||||||
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
|
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
|
||||||
|
|
||||||
defp parsers do
|
defp parsers do
|
||||||
Pleroma.Config.get([:rich_media, :parsers])
|
Pleroma.Config.get([:rich_media, :parsers])
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse(nil), do: {:error, "No URL provided"}
|
def parse(nil), do: nil
|
||||||
|
|
||||||
@spec parse(String.t()) :: {:ok, map()} | {:error, any()}
|
@spec parse(String.t()) :: {:ok, map()} | {:error, any()}
|
||||||
def parse(url) do
|
def parse(url) do
|
||||||
with :ok <- validate_page_url(url),
|
with :ok <- validate_page_url(url),
|
||||||
{:ok, data} <- get_cached_or_parse(url),
|
{:ok, data} <- parse_url(url) do
|
||||||
{:ok, _} <- set_ttl_based_on_image(data, url) do
|
data = Map.put(data, "url", url)
|
||||||
{:ok, data}
|
{:ok, data}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp get_cached_or_parse(url) do
|
defp parse_url(url) do
|
||||||
case @cachex.fetch(:rich_media_cache, url, fn ->
|
|
||||||
case parse_url(url) do
|
|
||||||
{:ok, _} = res ->
|
|
||||||
{:commit, res}
|
|
||||||
|
|
||||||
{:error, reason} = e ->
|
|
||||||
# Unfortunately we have to log errors here, instead of doing that
|
|
||||||
# along with ttl setting at the bottom. Otherwise we can get log spam
|
|
||||||
# if more than one process was waiting for the rich media card
|
|
||||||
# while it was generated. Ideally we would set ttl here as well,
|
|
||||||
# so we don't override it number_of_waiters_on_generation
|
|
||||||
# times, but one, obviously, can't set ttl for not-yet-created entry
|
|
||||||
# and Cachex doesn't support returning ttl from the fetch callback.
|
|
||||||
log_error(url, reason)
|
|
||||||
{:commit, e}
|
|
||||||
end
|
|
||||||
end) do
|
|
||||||
{action, res} when action in [:commit, :ok] ->
|
|
||||||
case res do
|
|
||||||
{:ok, _data} = res ->
|
|
||||||
res
|
|
||||||
|
|
||||||
{:error, reason} = e ->
|
|
||||||
if action == :commit, do: set_error_ttl(url, reason)
|
|
||||||
e
|
|
||||||
end
|
|
||||||
|
|
||||||
{:error, e} ->
|
|
||||||
{:error, {:cachex_error, e}}
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
defp set_error_ttl(_url, :body_too_large), do: :ok
|
|
||||||
defp set_error_ttl(_url, {:content_type, _}), do: :ok
|
|
||||||
|
|
||||||
# The TTL is not set for the errors above, since they are unlikely to change
|
|
||||||
# with time
|
|
||||||
|
|
||||||
defp set_error_ttl(url, _reason) do
|
|
||||||
ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000)
|
|
||||||
@cachex.expire(:rich_media_cache, url, ttl)
|
|
||||||
:ok
|
|
||||||
end
|
|
||||||
|
|
||||||
defp log_error(url, {:invalid_metadata, data}) do
|
|
||||||
Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp log_error(url, reason) do
|
|
||||||
Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end)
|
|
||||||
end
|
|
||||||
|
|
||||||
@doc """
|
|
||||||
Set the rich media cache based on the expiration time of image.
|
|
||||||
|
|
||||||
Adopt behaviour `Pleroma.Web.RichMedia.Parser.TTL`
|
|
||||||
|
|
||||||
## Example
|
|
||||||
|
|
||||||
defmodule MyModule do
|
|
||||||
@behaviour Pleroma.Web.RichMedia.Parser.TTL
|
|
||||||
def ttl(data, url) do
|
|
||||||
image_url = Map.get(data, :image)
|
|
||||||
# do some parsing in the url and get the ttl of the image
|
|
||||||
# and return ttl is unix time
|
|
||||||
parse_ttl_from_url(image_url)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
Define the module in the config
|
|
||||||
|
|
||||||
config :pleroma, :rich_media,
|
|
||||||
ttl_setters: [MyModule]
|
|
||||||
"""
|
|
||||||
@spec set_ttl_based_on_image(map(), String.t()) ::
|
|
||||||
{:ok, integer() | :noop} | {:error, :no_key}
|
|
||||||
def set_ttl_based_on_image(data, url) do
|
|
||||||
case get_ttl_from_image(data, url) do
|
|
||||||
ttl when is_number(ttl) ->
|
|
||||||
ttl = ttl * 1000
|
|
||||||
|
|
||||||
case @cachex.expire_at(:rich_media_cache, url, ttl) do
|
|
||||||
{:ok, true} -> {:ok, ttl}
|
|
||||||
{:ok, false} -> {:error, :no_key}
|
|
||||||
end
|
|
||||||
|
|
||||||
_ ->
|
|
||||||
{:ok, :noop}
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
defp get_ttl_from_image(data, url) do
|
|
||||||
[:rich_media, :ttl_setters]
|
|
||||||
|> Pleroma.Config.get()
|
|
||||||
|> Enum.reduce({:ok, nil}, fn
|
|
||||||
module, {:ok, _ttl} ->
|
|
||||||
module.ttl(data, url)
|
|
||||||
|
|
||||||
_, error ->
|
|
||||||
error
|
|
||||||
end)
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse_url(url) do
|
|
||||||
with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url),
|
with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url),
|
||||||
{:ok, html} <- Floki.parse_document(html) do
|
{:ok, html} <- Floki.parse_document(html) do
|
||||||
html
|
html
|
||||||
|> maybe_parse()
|
|> maybe_parse()
|
||||||
|> Map.put("url", url)
|
|
||||||
|> clean_parsed_data()
|
|> clean_parsed_data()
|
||||||
|> check_parsed_data()
|
|> check_parsed_data()
|
||||||
end
|
end
|
||||||
|
|
|
@ -4,4 +4,17 @@
|
||||||
|
|
||||||
defmodule Pleroma.Web.RichMedia.Parser.TTL do
|
defmodule Pleroma.Web.RichMedia.Parser.TTL do
|
||||||
@callback ttl(map(), String.t()) :: integer() | nil
|
@callback ttl(map(), String.t()) :: integer() | nil
|
||||||
|
|
||||||
|
@spec process(map(), String.t()) :: {:ok, integer() | nil}
|
||||||
|
def process(data, url) do
|
||||||
|
[:rich_media, :ttl_setters]
|
||||||
|
|> Pleroma.Config.get()
|
||||||
|
|> Enum.reduce_while({:ok, nil}, fn
|
||||||
|
module, acc ->
|
||||||
|
case module.ttl(data, url) do
|
||||||
|
ttl when is_number(ttl) -> {:halt, {:ok, ttl}}
|
||||||
|
_ -> {:cont, acc}
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -7,7 +7,7 @@ defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl do
|
||||||
|
|
||||||
@impl true
|
@impl true
|
||||||
def ttl(data, _url) do
|
def ttl(data, _url) do
|
||||||
image = Map.get(data, :image)
|
image = Map.get(data, "image")
|
||||||
|
|
||||||
if aws_signed_url?(image) do
|
if aws_signed_url?(image) do
|
||||||
image
|
image
|
||||||
|
@ -15,14 +15,15 @@ defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl do
|
||||||
|> format_query_params()
|
|> format_query_params()
|
||||||
|> get_expiration_timestamp()
|
|> get_expiration_timestamp()
|
||||||
else
|
else
|
||||||
{:error, "Not aws signed url #{inspect(image)}"}
|
nil
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp aws_signed_url?(image) when is_binary(image) and image != "" do
|
defp aws_signed_url?(image) when is_binary(image) and image != "" do
|
||||||
%URI{host: host, query: query} = URI.parse(image)
|
%URI{host: host, query: query} = URI.parse(image)
|
||||||
|
|
||||||
String.contains?(host, "amazonaws.com") and String.contains?(query, "X-Amz-Expires")
|
is_binary(host) and String.contains?(host, "amazonaws.com") and
|
||||||
|
String.contains?(query, "X-Amz-Expires")
|
||||||
end
|
end
|
||||||
|
|
||||||
defp aws_signed_url?(_), do: nil
|
defp aws_signed_url?(_), do: nil
|
||||||
|
|
20
lib/pleroma/web/rich_media/parser/ttl/opengraph.ex
Normal file
20
lib/pleroma/web/rich_media/parser/ttl/opengraph.ex
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# Pleroma: A lightweight social networking server
|
||||||
|
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Pleroma.Web.RichMedia.Parser.TTL.Opengraph do
|
||||||
|
@behaviour Pleroma.Web.RichMedia.Parser.TTL
|
||||||
|
|
||||||
|
@impl true
|
||||||
|
def ttl(%{"ttl" => ttl_string}, _url) when is_binary(ttl_string) do
|
||||||
|
try do
|
||||||
|
ttl = String.to_integer(ttl_string)
|
||||||
|
now = DateTime.utc_now() |> DateTime.to_unix()
|
||||||
|
now + ttl
|
||||||
|
rescue
|
||||||
|
_ -> nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def ttl(_, _), do: nil
|
||||||
|
end
|
|
@ -768,7 +768,6 @@ defmodule Pleroma.Web.Router do
|
||||||
get("/statuses", StatusController, :index)
|
get("/statuses", StatusController, :index)
|
||||||
get("/statuses/:id", StatusController, :show)
|
get("/statuses/:id", StatusController, :show)
|
||||||
get("/statuses/:id/context", StatusController, :context)
|
get("/statuses/:id/context", StatusController, :context)
|
||||||
get("/statuses/:id/card", StatusController, :card)
|
|
||||||
get("/statuses/:id/favourited_by", StatusController, :favourited_by)
|
get("/statuses/:id/favourited_by", StatusController, :favourited_by)
|
||||||
get("/statuses/:id/reblogged_by", StatusController, :reblogged_by)
|
get("/statuses/:id/reblogged_by", StatusController, :reblogged_by)
|
||||||
get("/statuses/:id/history", StatusController, :show_history)
|
get("/statuses/:id/history", StatusController, :show_history)
|
||||||
|
|
15
lib/pleroma/workers/rich_media_expiration_worker.ex
Normal file
15
lib/pleroma/workers/rich_media_expiration_worker.ex
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# Pleroma: A lightweight social networking server
|
||||||
|
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Pleroma.Workers.RichMediaExpirationWorker do
|
||||||
|
alias Pleroma.Web.RichMedia.Card
|
||||||
|
|
||||||
|
use Oban.Worker,
|
||||||
|
queue: :rich_media_expiration
|
||||||
|
|
||||||
|
@impl Oban.Worker
|
||||||
|
def perform(%Job{args: %{"url" => url} = _args}) do
|
||||||
|
Card.delete(url)
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,14 @@
|
||||||
|
defmodule Pleroma.Repo.Migrations.CreateRichMediaCard do
|
||||||
|
use Ecto.Migration
|
||||||
|
|
||||||
|
def change do
|
||||||
|
create table(:rich_media_card) do
|
||||||
|
add(:url_hash, :bytea)
|
||||||
|
add(:fields, :map)
|
||||||
|
|
||||||
|
timestamps()
|
||||||
|
end
|
||||||
|
|
||||||
|
create(unique_index(:rich_media_card, [:url_hash]))
|
||||||
|
end
|
||||||
|
end
|
392
test/fixtures/rich_media/reddit.html
vendored
Normal file
392
test/fixtures/rich_media/reddit.html
vendored
Normal file
File diff suppressed because one or more lines are too long
|
@ -202,7 +202,7 @@ defmodule Pleroma.HTMLTest do
|
||||||
})
|
})
|
||||||
|
|
||||||
object = Object.normalize(activity, fetch: false)
|
object = Object.normalize(activity, fetch: false)
|
||||||
{:ok, url} = HTML.extract_first_external_url_from_object(object)
|
url = HTML.extract_first_external_url_from_object(object)
|
||||||
assert url == "https://github.com/komeiji-satori/Dress"
|
assert url == "https://github.com/komeiji-satori/Dress"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -217,7 +217,7 @@ defmodule Pleroma.HTMLTest do
|
||||||
})
|
})
|
||||||
|
|
||||||
object = Object.normalize(activity, fetch: false)
|
object = Object.normalize(activity, fetch: false)
|
||||||
{:ok, url} = HTML.extract_first_external_url_from_object(object)
|
url = HTML.extract_first_external_url_from_object(object)
|
||||||
|
|
||||||
assert url == "https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md"
|
assert url == "https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md"
|
||||||
|
|
||||||
|
@ -233,7 +233,7 @@ defmodule Pleroma.HTMLTest do
|
||||||
})
|
})
|
||||||
|
|
||||||
object = Object.normalize(activity, fetch: false)
|
object = Object.normalize(activity, fetch: false)
|
||||||
{:ok, url} = HTML.extract_first_external_url_from_object(object)
|
url = HTML.extract_first_external_url_from_object(object)
|
||||||
|
|
||||||
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
|
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
|
||||||
end
|
end
|
||||||
|
@ -249,7 +249,7 @@ defmodule Pleroma.HTMLTest do
|
||||||
})
|
})
|
||||||
|
|
||||||
object = Object.normalize(activity, fetch: false)
|
object = Object.normalize(activity, fetch: false)
|
||||||
{:ok, url} = HTML.extract_first_external_url_from_object(object)
|
url = HTML.extract_first_external_url_from_object(object)
|
||||||
|
|
||||||
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
|
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
|
||||||
end
|
end
|
||||||
|
@ -261,7 +261,7 @@ defmodule Pleroma.HTMLTest do
|
||||||
|
|
||||||
object = Object.normalize(activity, fetch: false)
|
object = Object.normalize(activity, fetch: false)
|
||||||
|
|
||||||
assert {:ok, nil} = HTML.extract_first_external_url_from_object(object)
|
assert nil == HTML.extract_first_external_url_from_object(object)
|
||||||
end
|
end
|
||||||
|
|
||||||
test "skips attachment links" do
|
test "skips attachment links" do
|
||||||
|
@ -275,7 +275,7 @@ defmodule Pleroma.HTMLTest do
|
||||||
|
|
||||||
object = Object.normalize(activity, fetch: false)
|
object = Object.normalize(activity, fetch: false)
|
||||||
|
|
||||||
assert {:ok, nil} = HTML.extract_first_external_url_from_object(object)
|
assert nil == HTML.extract_first_external_url_from_object(object)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -329,62 +329,6 @@ defmodule Pleroma.Web.MastodonAPI.StatusControllerTest do
|
||||||
assert real_status == fake_status
|
assert real_status == fake_status
|
||||||
end
|
end
|
||||||
|
|
||||||
test "fake statuses' preview card is not cached", %{conn: conn} do
|
|
||||||
Pleroma.StaticStubbedConfigMock
|
|
||||||
|> stub(:get, fn
|
|
||||||
[:rich_media, :enabled] -> true
|
|
||||||
path -> Pleroma.Test.StaticConfig.get(path)
|
|
||||||
end)
|
|
||||||
|
|
||||||
Tesla.Mock.mock_global(fn
|
|
||||||
env ->
|
|
||||||
apply(HttpRequestMock, :request, [env])
|
|
||||||
end)
|
|
||||||
|
|
||||||
conn1 =
|
|
||||||
conn
|
|
||||||
|> put_req_header("content-type", "application/json")
|
|
||||||
|> post("/api/v1/statuses", %{
|
|
||||||
"status" => "https://example.com/ogp",
|
|
||||||
"preview" => true
|
|
||||||
})
|
|
||||||
|
|
||||||
conn2 =
|
|
||||||
conn
|
|
||||||
|> put_req_header("content-type", "application/json")
|
|
||||||
|> post("/api/v1/statuses", %{
|
|
||||||
"status" => "https://example.com/twitter-card",
|
|
||||||
"preview" => true
|
|
||||||
})
|
|
||||||
|
|
||||||
assert %{"card" => %{"title" => "The Rock"}} = json_response_and_validate_schema(conn1, 200)
|
|
||||||
|
|
||||||
assert %{"card" => %{"title" => "Small Island Developing States Photo Submission"}} =
|
|
||||||
json_response_and_validate_schema(conn2, 200)
|
|
||||||
end
|
|
||||||
|
|
||||||
test "posting a status with OGP link preview", %{conn: conn} do
|
|
||||||
Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
|
|
||||||
|
|
||||||
Pleroma.StaticStubbedConfigMock
|
|
||||||
|> stub(:get, fn
|
|
||||||
[:rich_media, :enabled] -> true
|
|
||||||
path -> Pleroma.Test.StaticConfig.get(path)
|
|
||||||
end)
|
|
||||||
|
|
||||||
conn =
|
|
||||||
conn
|
|
||||||
|> put_req_header("content-type", "application/json")
|
|
||||||
|> post("/api/v1/statuses", %{
|
|
||||||
"status" => "https://example.com/ogp"
|
|
||||||
})
|
|
||||||
|
|
||||||
assert %{"id" => id, "card" => %{"title" => "The Rock"}} =
|
|
||||||
json_response_and_validate_schema(conn, 200)
|
|
||||||
|
|
||||||
assert Activity.get_by_id(id)
|
|
||||||
end
|
|
||||||
|
|
||||||
test "posting a direct status", %{conn: conn} do
|
test "posting a direct status", %{conn: conn} do
|
||||||
user2 = insert(:user)
|
user2 = insert(:user)
|
||||||
content = "direct cofe @#{user2.nickname}"
|
content = "direct cofe @#{user2.nickname}"
|
||||||
|
@ -1699,91 +1643,6 @@ defmodule Pleroma.Web.MastodonAPI.StatusControllerTest do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe "cards" do
|
|
||||||
setup do
|
|
||||||
Pleroma.StaticStubbedConfigMock
|
|
||||||
|> stub(:get, fn
|
|
||||||
[:rich_media, :enabled] -> true
|
|
||||||
path -> Pleroma.Test.StaticConfig.get(path)
|
|
||||||
end)
|
|
||||||
|
|
||||||
oauth_access(["read:statuses"])
|
|
||||||
end
|
|
||||||
|
|
||||||
test "returns rich-media card", %{conn: conn, user: user} do
|
|
||||||
Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
|
|
||||||
|
|
||||||
{:ok, activity} = CommonAPI.post(user, %{status: "https://example.com/ogp"})
|
|
||||||
|
|
||||||
card_data = %{
|
|
||||||
"image" => "http://ia.media-imdb.com/images/rock.jpg",
|
|
||||||
"provider_name" => "example.com",
|
|
||||||
"provider_url" => "https://example.com",
|
|
||||||
"title" => "The Rock",
|
|
||||||
"type" => "link",
|
|
||||||
"url" => "https://example.com/ogp",
|
|
||||||
"description" =>
|
|
||||||
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
|
|
||||||
"pleroma" => %{
|
|
||||||
"opengraph" => %{
|
|
||||||
"image" => "http://ia.media-imdb.com/images/rock.jpg",
|
|
||||||
"title" => "The Rock",
|
|
||||||
"type" => "video.movie",
|
|
||||||
"url" => "https://example.com/ogp",
|
|
||||||
"description" =>
|
|
||||||
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer."
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
response =
|
|
||||||
conn
|
|
||||||
|> get("/api/v1/statuses/#{activity.id}/card")
|
|
||||||
|> json_response_and_validate_schema(200)
|
|
||||||
|
|
||||||
assert response == card_data
|
|
||||||
|
|
||||||
# works with private posts
|
|
||||||
{:ok, activity} =
|
|
||||||
CommonAPI.post(user, %{status: "https://example.com/ogp", visibility: "direct"})
|
|
||||||
|
|
||||||
response_two =
|
|
||||||
conn
|
|
||||||
|> get("/api/v1/statuses/#{activity.id}/card")
|
|
||||||
|> json_response_and_validate_schema(200)
|
|
||||||
|
|
||||||
assert response_two == card_data
|
|
||||||
end
|
|
||||||
|
|
||||||
test "replaces missing description with an empty string", %{conn: conn, user: user} do
|
|
||||||
Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
|
|
||||||
|
|
||||||
{:ok, activity} = CommonAPI.post(user, %{status: "https://example.com/ogp-missing-data"})
|
|
||||||
|
|
||||||
response =
|
|
||||||
conn
|
|
||||||
|> get("/api/v1/statuses/#{activity.id}/card")
|
|
||||||
|> json_response_and_validate_schema(:ok)
|
|
||||||
|
|
||||||
assert response == %{
|
|
||||||
"type" => "link",
|
|
||||||
"title" => "Pleroma",
|
|
||||||
"description" => "",
|
|
||||||
"image" => nil,
|
|
||||||
"provider_name" => "example.com",
|
|
||||||
"provider_url" => "https://example.com",
|
|
||||||
"url" => "https://example.com/ogp-missing-data",
|
|
||||||
"pleroma" => %{
|
|
||||||
"opengraph" => %{
|
|
||||||
"title" => "Pleroma",
|
|
||||||
"type" => "website",
|
|
||||||
"url" => "https://example.com/ogp-missing-data"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
test "bookmarks" do
|
test "bookmarks" do
|
||||||
bookmarks_uri = "/api/v1/bookmarks"
|
bookmarks_uri = "/api/v1/bookmarks"
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusViewTest do
|
||||||
alias Pleroma.Web.CommonAPI
|
alias Pleroma.Web.CommonAPI
|
||||||
alias Pleroma.Web.MastodonAPI.AccountView
|
alias Pleroma.Web.MastodonAPI.AccountView
|
||||||
alias Pleroma.Web.MastodonAPI.StatusView
|
alias Pleroma.Web.MastodonAPI.StatusView
|
||||||
|
alias Pleroma.Web.RichMedia.Card
|
||||||
|
|
||||||
require Bitwise
|
require Bitwise
|
||||||
|
|
||||||
|
@ -732,56 +733,55 @@ defmodule Pleroma.Web.MastodonAPI.StatusViewTest do
|
||||||
|
|
||||||
describe "rich media cards" do
|
describe "rich media cards" do
|
||||||
test "a rich media card without a site name renders correctly" do
|
test "a rich media card without a site name renders correctly" do
|
||||||
page_url = "http://example.com"
|
page_url = "https://example.com"
|
||||||
|
|
||||||
card = %{
|
{:ok, card} =
|
||||||
url: page_url,
|
Card.create(page_url, %{image: page_url <> "/example.jpg", title: "Example website"})
|
||||||
image: page_url <> "/example.jpg",
|
|
||||||
title: "Example website"
|
|
||||||
}
|
|
||||||
|
|
||||||
%{provider_name: "example.com"} =
|
%{provider_name: "example.com"} = StatusView.render("card.json", card)
|
||||||
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
|
|
||||||
end
|
end
|
||||||
|
|
||||||
test "a rich media card without a site name or image renders correctly" do
|
test "a rich media card without a site name or image renders correctly" do
|
||||||
page_url = "http://example.com"
|
page_url = "https://example.com"
|
||||||
|
|
||||||
card = %{
|
fields = %{
|
||||||
url: page_url,
|
"url" => page_url,
|
||||||
title: "Example website"
|
"title" => "Example website"
|
||||||
}
|
}
|
||||||
|
|
||||||
%{provider_name: "example.com"} =
|
{:ok, card} = Card.create(page_url, fields)
|
||||||
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
|
|
||||||
|
%{provider_name: "example.com"} = StatusView.render("card.json", card)
|
||||||
end
|
end
|
||||||
|
|
||||||
test "a rich media card without an image renders correctly" do
|
test "a rich media card without an image renders correctly" do
|
||||||
page_url = "http://example.com"
|
page_url = "https://example.com"
|
||||||
|
|
||||||
card = %{
|
fields = %{
|
||||||
url: page_url,
|
"url" => page_url,
|
||||||
site_name: "Example site name",
|
"site_name" => "Example site name",
|
||||||
title: "Example website"
|
"title" => "Example website"
|
||||||
}
|
}
|
||||||
|
|
||||||
%{provider_name: "example.com"} =
|
{:ok, card} = Card.create(page_url, fields)
|
||||||
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
|
|
||||||
|
%{provider_name: "example.com"} = StatusView.render("card.json", card)
|
||||||
end
|
end
|
||||||
|
|
||||||
test "a rich media card with all relevant data renders correctly" do
|
test "a rich media card with all relevant data renders correctly" do
|
||||||
page_url = "http://example.com"
|
page_url = "https://example.com"
|
||||||
|
|
||||||
card = %{
|
fields = %{
|
||||||
url: page_url,
|
"url" => page_url,
|
||||||
site_name: "Example site name",
|
"site_name" => "Example site name",
|
||||||
title: "Example website",
|
"title" => "Example website",
|
||||||
image: page_url <> "/example.jpg",
|
"image" => page_url <> "/example.jpg",
|
||||||
description: "Example description"
|
"description" => "Example description"
|
||||||
}
|
}
|
||||||
|
|
||||||
%{provider_name: "example.com"} =
|
{:ok, card} = Card.create(page_url, fields)
|
||||||
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
|
|
||||||
|
%{provider_name: "example.com"} = StatusView.render("card.json", card)
|
||||||
end
|
end
|
||||||
|
|
||||||
test "a rich media card has all media proxied" do
|
test "a rich media card has all media proxied" do
|
||||||
|
@ -791,25 +791,25 @@ defmodule Pleroma.Web.MastodonAPI.StatusViewTest do
|
||||||
ConfigMock
|
ConfigMock
|
||||||
|> stub_with(Pleroma.Test.StaticConfig)
|
|> stub_with(Pleroma.Test.StaticConfig)
|
||||||
|
|
||||||
page_url = "http://example.com"
|
page_url = "https://example.com"
|
||||||
|
|
||||||
card = %{
|
fields = %{
|
||||||
url: page_url,
|
"url" => page_url,
|
||||||
site_name: "Example site name",
|
"site_name" => "Example site name",
|
||||||
title: "Example website",
|
"title" => "Example website",
|
||||||
image: page_url <> "/example.jpg",
|
"image" => page_url <> "/example.jpg",
|
||||||
audio: page_url <> "/example.ogg",
|
"audio" => page_url <> "/example.ogg",
|
||||||
video: page_url <> "/example.mp4",
|
"video" => page_url <> "/example.mp4",
|
||||||
description: "Example description"
|
"description" => "Example description"
|
||||||
}
|
}
|
||||||
|
|
||||||
strcard = for {k, v} <- card, into: %{}, do: {to_string(k), v}
|
{:ok, card} = Card.create(page_url, fields)
|
||||||
|
|
||||||
%{
|
%{
|
||||||
provider_name: "example.com",
|
provider_name: "example.com",
|
||||||
image: image,
|
image: image,
|
||||||
pleroma: %{opengraph: og}
|
pleroma: %{opengraph: og}
|
||||||
} = StatusView.render("card.json", %{page_url: page_url, rich_media: strcard})
|
} = StatusView.render("card.json", card)
|
||||||
|
|
||||||
assert String.match?(image, ~r/\/proxy\//)
|
assert String.match?(image, ~r/\/proxy\//)
|
||||||
assert String.match?(og["image"], ~r/\/proxy\//)
|
assert String.match?(og["image"], ~r/\/proxy\//)
|
||||||
|
|
|
@ -9,7 +9,6 @@ defmodule Pleroma.Web.PleromaAPI.ChatMessageReferenceViewTest do
|
||||||
alias Pleroma.Chat
|
alias Pleroma.Chat
|
||||||
alias Pleroma.Chat.MessageReference
|
alias Pleroma.Chat.MessageReference
|
||||||
alias Pleroma.Object
|
alias Pleroma.Object
|
||||||
alias Pleroma.StaticStubbedConfigMock
|
|
||||||
alias Pleroma.UnstubbedConfigMock, as: ConfigMock
|
alias Pleroma.UnstubbedConfigMock, as: ConfigMock
|
||||||
alias Pleroma.Web.ActivityPub.ActivityPub
|
alias Pleroma.Web.ActivityPub.ActivityPub
|
||||||
alias Pleroma.Web.CommonAPI
|
alias Pleroma.Web.CommonAPI
|
||||||
|
@ -18,6 +17,8 @@ defmodule Pleroma.Web.PleromaAPI.ChatMessageReferenceViewTest do
|
||||||
import Mox
|
import Mox
|
||||||
import Pleroma.Factory
|
import Pleroma.Factory
|
||||||
|
|
||||||
|
setup do: clear_config([:rich_media, :enabled], true)
|
||||||
|
|
||||||
test "it displays a chat message" do
|
test "it displays a chat message" do
|
||||||
user = insert(:user)
|
user = insert(:user)
|
||||||
recipient = insert(:user)
|
recipient = insert(:user)
|
||||||
|
@ -62,16 +63,7 @@ defmodule Pleroma.Web.PleromaAPI.ChatMessageReferenceViewTest do
|
||||||
assert match?([%{shortcode: "firefox"}], chat_message[:emojis])
|
assert match?([%{shortcode: "firefox"}], chat_message[:emojis])
|
||||||
assert chat_message[:idempotency_key] == "123"
|
assert chat_message[:idempotency_key] == "123"
|
||||||
|
|
||||||
StaticStubbedConfigMock
|
Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
|
||||||
|> stub(:get, fn
|
|
||||||
[:rich_media, :enabled] -> true
|
|
||||||
path -> Pleroma.Test.StaticConfig.get(path)
|
|
||||||
end)
|
|
||||||
|
|
||||||
Tesla.Mock.mock_global(fn
|
|
||||||
%{url: "https://example.com/ogp"} ->
|
|
||||||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}
|
|
||||||
end)
|
|
||||||
|
|
||||||
{:ok, activity} =
|
{:ok, activity} =
|
||||||
CommonAPI.post_chat_message(recipient, user, "gkgkgk https://example.com/ogp",
|
CommonAPI.post_chat_message(recipient, user, "gkgkgk https://example.com/ogp",
|
||||||
|
|
71
test/pleroma/web/rich_media/card_test.exs
Normal file
71
test/pleroma/web/rich_media/card_test.exs
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
# Pleroma: A lightweight social networking server
|
||||||
|
# Copyright © 2017-2024 Pleroma Authors <https://pleroma.social/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Pleroma.Web.RichMedia.CardTest do
|
||||||
|
use Pleroma.DataCase, async: true
|
||||||
|
|
||||||
|
alias Pleroma.UnstubbedConfigMock, as: ConfigMock
|
||||||
|
alias Pleroma.Web.CommonAPI
|
||||||
|
alias Pleroma.Web.RichMedia.Card
|
||||||
|
|
||||||
|
import Mox
|
||||||
|
import Pleroma.Factory
|
||||||
|
import Tesla.Mock
|
||||||
|
|
||||||
|
setup do
|
||||||
|
mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
|
||||||
|
|
||||||
|
ConfigMock
|
||||||
|
|> stub_with(Pleroma.Test.StaticConfig)
|
||||||
|
|
||||||
|
:ok
|
||||||
|
end
|
||||||
|
|
||||||
|
setup do: clear_config([:rich_media, :enabled], true)
|
||||||
|
|
||||||
|
test "crawls URL in activity" do
|
||||||
|
user = insert(:user)
|
||||||
|
|
||||||
|
url = "https://example.com/ogp"
|
||||||
|
url_hash = Card.url_to_hash(url)
|
||||||
|
|
||||||
|
{:ok, activity} =
|
||||||
|
CommonAPI.post(user, %{
|
||||||
|
status: "[test](#{url})",
|
||||||
|
content_type: "text/markdown"
|
||||||
|
})
|
||||||
|
|
||||||
|
assert %Card{url_hash: ^url_hash, fields: _} = Card.get_by_activity(activity)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "recrawls URLs on status edits/updates" do
|
||||||
|
original_url = "https://google.com/"
|
||||||
|
original_url_hash = Card.url_to_hash(original_url)
|
||||||
|
updated_url = "https://yahoo.com/"
|
||||||
|
updated_url_hash = Card.url_to_hash(updated_url)
|
||||||
|
|
||||||
|
user = insert(:user)
|
||||||
|
{:ok, activity} = CommonAPI.post(user, %{status: "I like this site #{original_url}"})
|
||||||
|
|
||||||
|
# Force a backfill
|
||||||
|
Card.get_by_activity(activity)
|
||||||
|
|
||||||
|
assert match?(
|
||||||
|
%Card{url_hash: ^original_url_hash, fields: _},
|
||||||
|
Card.get_by_activity(activity)
|
||||||
|
)
|
||||||
|
|
||||||
|
{:ok, _} = CommonAPI.update(user, activity, %{status: "I like this site #{updated_url}"})
|
||||||
|
|
||||||
|
activity = Pleroma.Activity.get_by_id(activity.id)
|
||||||
|
|
||||||
|
# Force a backfill
|
||||||
|
Card.get_by_activity(activity)
|
||||||
|
|
||||||
|
assert match?(
|
||||||
|
%Card{url_hash: ^updated_url_hash, fields: _},
|
||||||
|
Card.get_by_activity(activity)
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,137 +0,0 @@
|
||||||
# Pleroma: A lightweight social networking server
|
|
||||||
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
|
||||||
|
|
||||||
defmodule Pleroma.Web.RichMedia.HelpersTest do
|
|
||||||
use Pleroma.DataCase, async: false
|
|
||||||
|
|
||||||
alias Pleroma.StaticStubbedConfigMock, as: ConfigMock
|
|
||||||
alias Pleroma.Web.CommonAPI
|
|
||||||
alias Pleroma.Web.RichMedia.Helpers
|
|
||||||
|
|
||||||
import Mox
|
|
||||||
import Pleroma.Factory
|
|
||||||
import Tesla.Mock
|
|
||||||
|
|
||||||
setup do
|
|
||||||
mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
|
|
||||||
|
|
||||||
ConfigMock
|
|
||||||
|> stub(:get, fn
|
|
||||||
[:rich_media, :enabled] -> false
|
|
||||||
path -> Pleroma.Test.StaticConfig.get(path)
|
|
||||||
end)
|
|
||||||
|> stub(:get, fn
|
|
||||||
path, default -> Pleroma.Test.StaticConfig.get(path, default)
|
|
||||||
end)
|
|
||||||
|
|
||||||
:ok
|
|
||||||
end
|
|
||||||
|
|
||||||
test "refuses to crawl incomplete URLs" do
|
|
||||||
user = insert(:user)
|
|
||||||
|
|
||||||
{:ok, activity} =
|
|
||||||
CommonAPI.post(user, %{
|
|
||||||
status: "[test](example.com/ogp)",
|
|
||||||
content_type: "text/markdown"
|
|
||||||
})
|
|
||||||
|
|
||||||
ConfigMock
|
|
||||||
|> stub(:get, fn
|
|
||||||
[:rich_media, :enabled] -> true
|
|
||||||
path -> Pleroma.Test.StaticConfig.get(path)
|
|
||||||
end)
|
|
||||||
|
|
||||||
assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
|
||||||
end
|
|
||||||
|
|
||||||
test "refuses to crawl malformed URLs" do
|
|
||||||
user = insert(:user)
|
|
||||||
|
|
||||||
{:ok, activity} =
|
|
||||||
CommonAPI.post(user, %{
|
|
||||||
status: "[test](example.com[]/ogp)",
|
|
||||||
content_type: "text/markdown"
|
|
||||||
})
|
|
||||||
|
|
||||||
ConfigMock
|
|
||||||
|> stub(:get, fn
|
|
||||||
[:rich_media, :enabled] -> true
|
|
||||||
path -> Pleroma.Test.StaticConfig.get(path)
|
|
||||||
end)
|
|
||||||
|
|
||||||
assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
|
||||||
end
|
|
||||||
|
|
||||||
test "crawls valid, complete URLs" do
|
|
||||||
user = insert(:user)
|
|
||||||
|
|
||||||
{:ok, activity} =
|
|
||||||
CommonAPI.post(user, %{
|
|
||||||
status: "[test](https://example.com/ogp)",
|
|
||||||
content_type: "text/markdown"
|
|
||||||
})
|
|
||||||
|
|
||||||
ConfigMock
|
|
||||||
|> stub(:get, fn
|
|
||||||
[:rich_media, :enabled] -> true
|
|
||||||
path -> Pleroma.Test.StaticConfig.get(path)
|
|
||||||
end)
|
|
||||||
|
|
||||||
assert %{page_url: "https://example.com/ogp", rich_media: _} =
|
|
||||||
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
|
||||||
end
|
|
||||||
|
|
||||||
test "recrawls URLs on updates" do
|
|
||||||
original_url = "https://google.com/"
|
|
||||||
updated_url = "https://yahoo.com/"
|
|
||||||
|
|
||||||
Pleroma.StaticStubbedConfigMock
|
|
||||||
|> stub(:get, fn
|
|
||||||
[:rich_media, :enabled] -> true
|
|
||||||
path -> Pleroma.Test.StaticConfig.get(path)
|
|
||||||
end)
|
|
||||||
|
|
||||||
user = insert(:user)
|
|
||||||
{:ok, activity} = CommonAPI.post(user, %{status: "I like this site #{original_url}"})
|
|
||||||
|
|
||||||
assert match?(
|
|
||||||
%{page_url: ^original_url, rich_media: _},
|
|
||||||
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
|
||||||
)
|
|
||||||
|
|
||||||
{:ok, _} = CommonAPI.update(user, activity, %{status: "I like this site #{updated_url}"})
|
|
||||||
|
|
||||||
activity = Pleroma.Activity.get_by_id(activity.id)
|
|
||||||
|
|
||||||
assert match?(
|
|
||||||
%{page_url: ^updated_url, rich_media: _},
|
|
||||||
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
test "refuses to crawl URLs of private network from posts" do
|
|
||||||
user = insert(:user)
|
|
||||||
|
|
||||||
{:ok, activity} =
|
|
||||||
CommonAPI.post(user, %{status: "http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO"})
|
|
||||||
|
|
||||||
{:ok, activity2} = CommonAPI.post(user, %{status: "https://10.111.10.1/notice/9kCP7V"})
|
|
||||||
{:ok, activity3} = CommonAPI.post(user, %{status: "https://172.16.32.40/notice/9kCP7V"})
|
|
||||||
{:ok, activity4} = CommonAPI.post(user, %{status: "https://192.168.10.40/notice/9kCP7V"})
|
|
||||||
{:ok, activity5} = CommonAPI.post(user, %{status: "https://pleroma.local/notice/9kCP7V"})
|
|
||||||
|
|
||||||
ConfigMock
|
|
||||||
|> stub(:get, fn
|
|
||||||
[:rich_media, :enabled] -> true
|
|
||||||
path -> Pleroma.Test.StaticConfig.get(path)
|
|
||||||
end)
|
|
||||||
|
|
||||||
assert %{} == Helpers.fetch_data_for_activity(activity)
|
|
||||||
assert %{} == Helpers.fetch_data_for_activity(activity2)
|
|
||||||
assert %{} == Helpers.fetch_data_for_activity(activity3)
|
|
||||||
assert %{} == Helpers.fetch_data_for_activity(activity4)
|
|
||||||
assert %{} == Helpers.fetch_data_for_activity(activity5)
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -3,8 +3,22 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrlTest do
|
defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrlTest do
|
||||||
# Relies on Cachex, needs to be synchronous
|
use Pleroma.DataCase, async: false
|
||||||
use Pleroma.DataCase
|
use Oban.Testing, repo: Pleroma.Repo
|
||||||
|
|
||||||
|
import Mox
|
||||||
|
|
||||||
|
alias Pleroma.UnstubbedConfigMock, as: ConfigMock
|
||||||
|
alias Pleroma.Web.RichMedia.Card
|
||||||
|
|
||||||
|
setup do
|
||||||
|
ConfigMock
|
||||||
|
|> stub_with(Pleroma.Test.StaticConfig)
|
||||||
|
|
||||||
|
clear_config([:rich_media, :enabled], true)
|
||||||
|
|
||||||
|
:ok
|
||||||
|
end
|
||||||
|
|
||||||
test "s3 signed url is parsed correct for expiration time" do
|
test "s3 signed url is parsed correct for expiration time" do
|
||||||
url = "https://pleroma.social/amz"
|
url = "https://pleroma.social/amz"
|
||||||
|
@ -43,26 +57,29 @@ defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrlTest do
|
||||||
<meta name="twitter:site" content="Pleroma" />
|
<meta name="twitter:site" content="Pleroma" />
|
||||||
<meta name="twitter:title" content="Pleroma" />
|
<meta name="twitter:title" content="Pleroma" />
|
||||||
<meta name="twitter:description" content="Pleroma" />
|
<meta name="twitter:description" content="Pleroma" />
|
||||||
<meta name="twitter:image" content="#{Map.get(metadata, :image)}" />
|
<meta name="twitter:image" content="#{Map.get(metadata, "image")}" />
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Tesla.Mock.mock(fn
|
Tesla.Mock.mock(fn
|
||||||
%{
|
%{
|
||||||
method: :get,
|
method: :get,
|
||||||
url: "https://pleroma.social/amz"
|
url: ^url
|
||||||
} ->
|
} ->
|
||||||
%Tesla.Env{status: 200, body: body}
|
%Tesla.Env{status: 200, body: body}
|
||||||
|
|
||||||
|
%{method: :head} ->
|
||||||
|
%Tesla.Env{status: 200}
|
||||||
end)
|
end)
|
||||||
|
|
||||||
Cachex.put(:rich_media_cache, url, metadata)
|
Card.get_or_backfill_by_url(url)
|
||||||
|
|
||||||
Pleroma.Web.RichMedia.Parser.set_ttl_based_on_image(metadata, url)
|
assert_enqueued(worker: Pleroma.Workers.RichMediaExpirationWorker, args: %{"url" => url})
|
||||||
|
|
||||||
{:ok, cache_ttl} = Cachex.ttl(:rich_media_cache, url)
|
[%Oban.Job{scheduled_at: scheduled_at}] = all_enqueued()
|
||||||
|
|
||||||
# as there is delay in setting and pulling the data from cache we ignore 1 second
|
timestamp_dt = Timex.parse!(timestamp, "{ISO:Basic:Z}")
|
||||||
# make it 2 seconds for flakyness
|
|
||||||
assert_in_delta(valid_till * 1000, cache_ttl, 2000)
|
assert DateTime.diff(scheduled_at, timestamp_dt) == valid_till
|
||||||
end
|
end
|
||||||
|
|
||||||
defp construct_s3_url(timestamp, valid_till) do
|
defp construct_s3_url(timestamp, valid_till) do
|
||||||
|
@ -71,11 +88,11 @@ defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrlTest do
|
||||||
|
|
||||||
defp construct_metadata(timestamp, valid_till, url) do
|
defp construct_metadata(timestamp, valid_till, url) do
|
||||||
%{
|
%{
|
||||||
image: construct_s3_url(timestamp, valid_till),
|
"image" => construct_s3_url(timestamp, valid_till),
|
||||||
site: "Pleroma",
|
"site" => "Pleroma",
|
||||||
title: "Pleroma",
|
"title" => "Pleroma",
|
||||||
description: "Pleroma",
|
"description" => "Pleroma",
|
||||||
url: url
|
"url" => url
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
41
test/pleroma/web/rich_media/parser/ttl/opengraph_test.exs
Normal file
41
test/pleroma/web/rich_media/parser/ttl/opengraph_test.exs
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
# Pleroma: A lightweight social networking server
|
||||||
|
# Copyright © 2017-2024 Pleroma Authors <https://pleroma.social/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Pleroma.Web.RichMedia.Parser.TTL.OpengraphTest do
|
||||||
|
use Pleroma.DataCase
|
||||||
|
use Oban.Testing, repo: Pleroma.Repo
|
||||||
|
|
||||||
|
import Mox
|
||||||
|
|
||||||
|
alias Pleroma.UnstubbedConfigMock, as: ConfigMock
|
||||||
|
alias Pleroma.Web.RichMedia.Card
|
||||||
|
|
||||||
|
setup do
|
||||||
|
ConfigMock
|
||||||
|
|> stub_with(Pleroma.Test.StaticConfig)
|
||||||
|
|
||||||
|
clear_config([:rich_media, :enabled], true)
|
||||||
|
|
||||||
|
:ok
|
||||||
|
end
|
||||||
|
|
||||||
|
test "OpenGraph TTL value is honored" do
|
||||||
|
url = "https://reddit.com/r/somepost"
|
||||||
|
|
||||||
|
Tesla.Mock.mock(fn
|
||||||
|
%{
|
||||||
|
method: :get,
|
||||||
|
url: ^url
|
||||||
|
} ->
|
||||||
|
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/reddit.html")}
|
||||||
|
|
||||||
|
%{method: :head} ->
|
||||||
|
%Tesla.Env{status: 200}
|
||||||
|
end)
|
||||||
|
|
||||||
|
Card.get_or_backfill_by_url(url)
|
||||||
|
|
||||||
|
assert_enqueued(worker: Pleroma.Workers.RichMediaExpirationWorker, args: %{"url" => url})
|
||||||
|
end
|
||||||
|
end
|
|
@ -3,7 +3,7 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
defmodule Pleroma.Web.RichMedia.ParserTest do
|
defmodule Pleroma.Web.RichMedia.ParserTest do
|
||||||
use Pleroma.DataCase, async: false
|
use Pleroma.DataCase
|
||||||
|
|
||||||
alias Pleroma.Web.RichMedia.Parser
|
alias Pleroma.Web.RichMedia.Parser
|
||||||
|
|
||||||
|
@ -104,4 +104,27 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
|
||||||
test "does a HEAD request to check if the body is html" do
|
test "does a HEAD request to check if the body is html" do
|
||||||
assert {:error, {:content_type, _}} = Parser.parse("https://example.com/pdf-file")
|
assert {:error, {:content_type, _}} = Parser.parse("https://example.com/pdf-file")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "refuses to crawl incomplete URLs" do
|
||||||
|
url = "example.com/ogp"
|
||||||
|
assert :error == Parser.parse(url)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "refuses to crawl malformed URLs" do
|
||||||
|
url = "example.com[]/ogp"
|
||||||
|
assert :error == Parser.parse(url)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "refuses to crawl URLs of private network from posts" do
|
||||||
|
[
|
||||||
|
"http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO",
|
||||||
|
"https://10.111.10.1/notice/9kCP7V",
|
||||||
|
"https://172.16.32.40/notice/9kCP7V",
|
||||||
|
"https://192.168.10.40/notice/9kCP7V",
|
||||||
|
"https://pleroma.local/notice/9kCP7V"
|
||||||
|
]
|
||||||
|
|> Enum.each(fn url ->
|
||||||
|
assert :error == Parser.parse(url)
|
||||||
|
end)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue