Fix atom leak in Rich Media Parser

This commit is contained in:
Egor Kislitsyn 2020-06-09 21:49:24 +04:00 committed by rinpatch
parent e2793744c5
commit 520367d6fd
7 changed files with 91 additions and 102 deletions

View file

@ -377,8 +377,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
page_url_data = URI.parse(page_url) page_url_data = URI.parse(page_url)
page_url_data = page_url_data =
if rich_media[:url] != nil do if is_binary(rich_media["url"]) do
URI.merge(page_url_data, URI.parse(rich_media[:url])) URI.merge(page_url_data, URI.parse(rich_media["url"]))
else else
page_url_data page_url_data
end end
@ -386,11 +386,9 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
page_url = page_url_data |> to_string page_url = page_url_data |> to_string
image_url = image_url =
if rich_media[:image] != nil do if is_binary(rich_media["image"]) do
URI.merge(page_url_data, URI.parse(rich_media[:image])) URI.merge(page_url_data, URI.parse(rich_media["image"]))
|> to_string |> to_string
else
nil
end end
%{ %{
@ -399,8 +397,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
provider_url: page_url_data.scheme <> "://" <> page_url_data.host, provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
url: page_url, url: page_url,
image: image_url |> MediaProxy.url(), image: image_url |> MediaProxy.url(),
title: rich_media[:title] || "", title: rich_media["title"] || "",
description: rich_media[:description] || "", description: rich_media["description"] || "",
pleroma: %{ pleroma: %{
opengraph: rich_media opengraph: rich_media
} }

View file

@ -9,7 +9,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.Web.RichMedia.Parser alias Pleroma.Web.RichMedia.Parser
@spec validate_page_url(any()) :: :ok | :error @spec validate_page_url(URI.t() | binary()) :: :ok | :error
defp validate_page_url(page_url) when is_binary(page_url) do defp validate_page_url(page_url) when is_binary(page_url) do
validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld] validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld]
@ -18,8 +18,8 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|> parse_uri(page_url) |> parse_uri(page_url)
end end
defp validate_page_url(%URI{host: host, scheme: scheme, authority: authority}) defp validate_page_url(%URI{host: host, scheme: "https", authority: authority})
when scheme == "https" and not is_nil(authority) do when is_binary(authority) do
cond do cond do
host in Config.get([:rich_media, :ignore_hosts], []) -> host in Config.get([:rich_media, :ignore_hosts], []) ->
:error :error

View file

@ -91,7 +91,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
html html
|> parse_html() |> parse_html()
|> maybe_parse() |> maybe_parse()
|> Map.put(:url, url) |> Map.put("url", url)
|> clean_parsed_data() |> clean_parsed_data()
|> check_parsed_data() |> check_parsed_data()
rescue rescue
@ -111,8 +111,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
end) end)
end end
defp check_parsed_data(%{title: title} = data) defp check_parsed_data(%{"title" => title} = data)
when is_binary(title) and byte_size(title) > 0 do when is_binary(title) and title != "" do
{:ok, data} {:ok, data}
end end
@ -123,11 +123,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
defp clean_parsed_data(data) do defp clean_parsed_data(data) do
data data
|> Enum.reject(fn {key, val} -> |> Enum.reject(fn {key, val} ->
with {:ok, _} <- Jason.encode(%{key => val}) do not match?({:ok, _}, Jason.encode(%{key => val}))
false
else
_ -> true
end
end) end)
|> Map.new() |> Map.new()
end end

View file

@ -29,19 +29,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
{_tag, attributes, _children} = html_node {_tag, attributes, _children} = html_node
data = data =
Enum.into(attributes, %{}, fn {name, value} -> Map.new(attributes, fn {name, value} ->
{name, String.trim_leading(value, "#{prefix}:")} {name, String.trim_leading(value, "#{prefix}:")}
end) end)
%{String.to_atom(data[key_name]) => data[value_name]} %{data[key_name] => data[value_name]}
end end
defp maybe_put_title(%{title: _} = meta, _), do: meta defp maybe_put_title(%{"title" => _} = meta, _), do: meta
defp maybe_put_title(meta, html) when meta != %{} do defp maybe_put_title(meta, html) when meta != %{} do
case get_page_title(html) do case get_page_title(html) do
"" -> meta "" -> meta
title -> Map.put_new(meta, :title, title) title -> Map.put_new(meta, "title", title)
end end
end end

View file

@ -5,7 +5,7 @@
defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
def parse(html, _data) do def parse(html, _data) do
with elements = [_ | _] <- get_discovery_data(html), with elements = [_ | _] <- get_discovery_data(html),
{:ok, oembed_url} <- get_oembed_url(elements), oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
{:ok, oembed_data} <- get_oembed_data(oembed_url) do {:ok, oembed_data} <- get_oembed_data(oembed_url) do
{:ok, oembed_data} {:ok, oembed_data}
else else
@ -17,19 +17,13 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
html |> Floki.find("link[type='application/json+oembed']") html |> Floki.find("link[type='application/json+oembed']")
end end
defp get_oembed_url(nodes) do defp get_oembed_url([{"link", attributes, _children} | _]) do
{"link", attributes, _children} = nodes |> hd() Enum.find_value(attributes, fn {k, v} -> if k == "href", do: v end)
{:ok, Enum.into(attributes, %{})["href"]}
end end
defp get_oembed_data(url) do defp get_oembed_data(url) do
{:ok, %Tesla.Env{body: json}} = Pleroma.HTTP.get(url, [], adapter: [pool: :media]) with {:ok, %Tesla.Env{body: json}} <- Pleroma.HTTP.get(url, [], adapter: [pool: :media]) do
Jason.decode(json)
{:ok, data} = Jason.decode(json) end
data = data |> Map.new(fn {k, v} -> {String.to_atom(k), v} end)
{:ok, data}
end end
end end

View file

@ -60,19 +60,19 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
test "doesn't just add a title" do test "doesn't just add a title" do
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/non-ogp") == assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/non-ogp") ==
{:error, {:error,
"Found metadata was invalid or incomplete: %{url: \"http://example.com/non-ogp\"}"} "Found metadata was invalid or incomplete: %{\"url\" => \"http://example.com/non-ogp\"}"}
end end
test "parses ogp" do test "parses ogp" do
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") == assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") ==
{:ok, {:ok,
%{ %{
image: "http://ia.media-imdb.com/images/rock.jpg", "image" => "http://ia.media-imdb.com/images/rock.jpg",
title: "The Rock", "title" => "The Rock",
description: "description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
type: "video.movie", "type" => "video.movie",
url: "http://example.com/ogp" "url" => "http://example.com/ogp"
}} }}
end end
@ -80,12 +80,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp-missing-title") == assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp-missing-title") ==
{:ok, {:ok,
%{ %{
image: "http://ia.media-imdb.com/images/rock.jpg", "image" => "http://ia.media-imdb.com/images/rock.jpg",
title: "The Rock (1996)", "title" => "The Rock (1996)",
description: "description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
type: "video.movie", "type" => "video.movie",
url: "http://example.com/ogp-missing-title" "url" => "http://example.com/ogp-missing-title"
}} }}
end end
@ -93,12 +93,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") == assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") ==
{:ok, {:ok,
%{ %{
card: "summary", "card" => "summary",
site: "@flickr", "site" => "@flickr",
image: "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg", "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
title: "Small Island Developing States Photo Submission", "title" => "Small Island Developing States Photo Submission",
description: "View the album on Flickr.", "description" => "View the album on Flickr.",
url: "http://example.com/twitter-card" "url" => "http://example.com/twitter-card"
}} }}
end end
@ -106,27 +106,28 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/oembed") == assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/oembed") ==
{:ok, {:ok,
%{ %{
author_name: "bees", "author_name" => "bees",
author_url: "https://www.flickr.com/photos/bees/", "author_url" => "https://www.flickr.com/photos/bees/",
cache_age: 3600, "cache_age" => 3600,
flickr_type: "photo", "flickr_type" => "photo",
height: "768", "height" => "768",
html: "html" =>
"<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by bees, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>", "<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by bees, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>",
license: "All Rights Reserved", "license" => "All Rights Reserved",
license_id: 0, "license_id" => 0,
provider_name: "Flickr", "provider_name" => "Flickr",
provider_url: "https://www.flickr.com/", "provider_url" => "https://www.flickr.com/",
thumbnail_height: 150, "thumbnail_height" => 150,
thumbnail_url: "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg", "thumbnail_url" =>
thumbnail_width: 150, "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
title: "Bacon Lollys", "thumbnail_width" => 150,
type: "photo", "title" => "Bacon Lollys",
url: "http://example.com/oembed", "type" => "photo",
version: "1.0", "url" => "http://example.com/oembed",
web_page: "https://www.flickr.com/photos/bees/2362225867/", "version" => "1.0",
web_page_short_url: "https://flic.kr/p/4AK2sc", "web_page" => "https://www.flickr.com/photos/bees/2362225867/",
width: "1024" "web_page_short_url" => "https://flic.kr/p/4AK2sc",
"width" => "1024"
}} }}
end end

View file

@ -19,11 +19,11 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
assert TwitterCard.parse(html, %{}) == assert TwitterCard.parse(html, %{}) ==
{:ok, {:ok,
%{ %{
"app:id:googleplay": "com.nytimes.android", "app:id:googleplay" => "com.nytimes.android",
"app:name:googleplay": "NYTimes", "app:name:googleplay" => "NYTimes",
"app:url:googleplay": "nytimes://reader/id/100000006583622", "app:url:googleplay" => "nytimes://reader/id/100000006583622",
site: nil, "site" => nil,
title: "title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times" "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
}} }}
end end
@ -36,15 +36,15 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
assert TwitterCard.parse(html, %{}) == assert TwitterCard.parse(html, %{}) ==
{:ok, {:ok,
%{ %{
card: "summary_large_image", "card" => "summary_large_image",
description: "description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
image: "image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
"image:alt": "", "image:alt" => "",
title: "title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
url: "url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html" "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
}} }}
end end
@ -57,19 +57,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
assert TwitterCard.parse(html, %{}) == assert TwitterCard.parse(html, %{}) ==
{:ok, {:ok,
%{ %{
"app:id:googleplay": "com.nytimes.android", "app:id:googleplay" => "com.nytimes.android",
"app:name:googleplay": "NYTimes", "app:name:googleplay" => "NYTimes",
"app:url:googleplay": "nytimes://reader/id/100000006583622", "app:url:googleplay" => "nytimes://reader/id/100000006583622",
card: "summary_large_image", "card" => "summary_large_image",
description: "description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
image: "image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
"image:alt": "", "image:alt" => "",
site: nil, "site" => nil,
title: "title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
url: "url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html" "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
}} }}
end end
@ -86,11 +86,11 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
assert TwitterCard.parse(html, %{}) == assert TwitterCard.parse(html, %{}) ==
{:ok, {:ok,
%{ %{
site: "@atlasobscura", "site" => "@atlasobscura",
title: "title" =>
"The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura", "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
card: "summary_large_image", "card" => "summary_large_image",
image: image_path "image" => image_path
}} }}
end end
@ -102,12 +102,12 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
assert TwitterCard.parse(html, %{}) == assert TwitterCard.parse(html, %{}) ==
{:ok, {:ok,
%{ %{
site: nil, "site" => nil,
title: "title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times", "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
"app:id:googleplay": "com.nytimes.android", "app:id:googleplay" => "com.nytimes.android",
"app:name:googleplay": "NYTimes", "app:name:googleplay" => "NYTimes",
"app:url:googleplay": "nytimes://reader/id/100000006583622" "app:url:googleplay" => "nytimes://reader/id/100000006583622"
}} }}
end end
end end