diff --git a/config/description.exs b/config/description.exs index 6c13bde31..f317c4e34 100644 --- a/config/description.exs +++ b/config/description.exs @@ -3523,5 +3523,27 @@ config :pleroma, :config_description, [ suggestion: [100_000] } ] + }, + %{ + group: :pleroma, + key: Pleroma.Language.LanguageDetector, + type: :group, + description: "Language detection providers", + children: [ + %{ + key: :provider, + type: :module, + suggestions: [ + Pleroma.Language.LanguageDetector.Fasttext + ] + }, + %{ + group: {:subgroup, Pleroma.Language.LanguageDetector.Fasttext}, + key: :model, + label: "fastText language detection model", + type: :string, + suggestions: ["/usr/share/fasttext/lid.176.bin"] + } + ] } ] diff --git a/lib/pleroma/application_requirements.ex b/lib/pleroma/application_requirements.ex index 819245481..ff22f835b 100644 --- a/lib/pleroma/application_requirements.ex +++ b/lib/pleroma/application_requirements.ex @@ -188,7 +188,27 @@ defmodule Pleroma.ApplicationRequirements do false end - if Enum.all?([preview_proxy_commands_status | filter_commands_statuses], & &1) do + language_detector_commands_status = + if Pleroma.Language.LanguageDetector.missing_dependencies() == [] do + true + else + Logger.error( + "The following dependencies required by the currently enabled " <> + "language detection provider are not installed: " <> + inspect(Pleroma.Language.LanguageDetector.missing_dependencies()) + ) + + false + end + + if Enum.all?( + [ + preview_proxy_commands_status, + language_detector_commands_status + | filter_commands_statuses + ], + & &1 + ) do :ok else {:error, diff --git a/lib/pleroma/language/language_detector.ex b/lib/pleroma/language/language_detector.ex new file mode 100644 index 000000000..3901a8b90 --- /dev/null +++ b/lib/pleroma/language/language_detector.ex @@ -0,0 +1,34 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Language.LanguageDetector do + @words_threshold 4 + + def missing_dependencies do + provider = get_provider() + + if provider do + provider.missing_dependencies() + else + nil + end + end + + def detect(text) do + provider = get_provider() + + {:ok, text} = text |> FastSanitize.strip_tags() + word_count = text |> String.split(~r/\s+/) |> Enum.count() + + if word_count < @words_threshold or !provider or !provider.configured? do + nil + else + provider.detect(text) + end + end + + defp get_provider() do + Pleroma.Config.get([__MODULE__, :provider]) + end +end diff --git a/lib/pleroma/language/language_detector/fasttext.ex b/lib/pleroma/language/language_detector/fasttext.ex new file mode 100644 index 000000000..d479d2125 --- /dev/null +++ b/lib/pleroma/language/language_detector/fasttext.ex @@ -0,0 +1,47 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Language.LanguageDetector.Fasttext do + import Pleroma.Web.Utils.Guards, only: [not_empty_string: 1] + + alias Pleroma.Language.LanguageDetector.Provider + + @behaviour Provider + + @impl Provider + def missing_dependencies do + if Pleroma.Utils.command_available?("fasttext") do + [] + else + ["fasttext"] + end + end + + @impl Provider + def configured?, do: not_empty_string(get_model()) + + @impl Provider + def detect(text) do + text_path = Path.join(System.tmp_dir!(), "fasttext-#{Ecto.UUID.generate()}") + + File.write(text_path, text) + + detected_language = + case System.cmd("fasttext", ["predict", get_model(), text_path]) do + {"__label__" <> language, _} -> + language |> String.trim() + + _ -> + nil + end + + File.rm(text_path) + + detected_language + end + + defp get_model do + Pleroma.Config.get([__MODULE__, :model]) + end +end diff --git a/lib/pleroma/language/language_detector/provider.ex b/lib/pleroma/language/language_detector/provider.ex new file mode 100644 index 000000000..08e7c8eef --- /dev/null +++ b/lib/pleroma/language/language_detector/provider.ex @@ -0,0 +1,11 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Language.LanguageDetector.Provider do + @callback missing_dependencies() :: [String.t()] + + @callback configured?() :: boolean() + + @callback detect(text :: String.t()) :: String.t() | nil +end diff --git a/lib/pleroma/web/common_api/activity_draft.ex b/lib/pleroma/web/common_api/activity_draft.ex index 4b7d28f5c..7728c6bcb 100644 --- a/lib/pleroma/web/common_api/activity_draft.ex +++ b/lib/pleroma/web/common_api/activity_draft.ex @@ -5,6 +5,7 @@ defmodule Pleroma.Web.CommonAPI.ActivityDraft do alias Pleroma.Activity alias Pleroma.Conversation.Participation + alias Pleroma.Language.LanguageDetector alias Pleroma.Object alias Pleroma.Web.ActivityPub.Builder alias Pleroma.Web.ActivityPub.Visibility @@ -241,13 +242,15 @@ defmodule Pleroma.Web.CommonAPI.ActivityDraft do end defp language(draft) do - language = draft.params[:language] + language = + with language <- draft.params[:language], + true <- good_locale_code?(language) do + language + else + _ -> LanguageDetector.detect(draft.full_payload) + end - if good_locale_code?(language) do - %__MODULE__{draft | language: language} - else - draft - end + %__MODULE__{draft | language: language} end defp object(draft) do