diff --git a/config/description.exs b/config/description.exs
index 6c13bde31..f317c4e34 100644
--- a/config/description.exs
+++ b/config/description.exs
@@ -3523,5 +3523,27 @@ config :pleroma, :config_description, [
suggestion: [100_000]
}
]
+ },
+ %{
+ group: :pleroma,
+ key: Pleroma.Language.LanguageDetector,
+ type: :group,
+ description: "Language detection providers",
+ children: [
+ %{
+ key: :provider,
+ type: :module,
+ suggestions: [
+ Pleroma.Language.LanguageDetector.Fasttext
+ ]
+ },
+ %{
+ group: {:subgroup, Pleroma.Language.LanguageDetector.Fasttext},
+ key: :model,
+ label: "fastText language detection model",
+ type: :string,
+ suggestions: ["/usr/share/fasttext/lid.176.bin"]
+ }
+ ]
}
]
diff --git a/lib/pleroma/application_requirements.ex b/lib/pleroma/application_requirements.ex
index 819245481..ff22f835b 100644
--- a/lib/pleroma/application_requirements.ex
+++ b/lib/pleroma/application_requirements.ex
@@ -188,7 +188,27 @@ defmodule Pleroma.ApplicationRequirements do
false
end
- if Enum.all?([preview_proxy_commands_status | filter_commands_statuses], & &1) do
+ language_detector_commands_status =
+ if Pleroma.Language.LanguageDetector.missing_dependencies() == [] do
+ true
+ else
+ Logger.error(
+ "The following dependencies required by the currently enabled " <>
+ "language detection provider are not installed: " <>
+ inspect(Pleroma.Language.LanguageDetector.missing_dependencies())
+ )
+
+ false
+ end
+
+ if Enum.all?(
+ [
+ preview_proxy_commands_status,
+ language_detector_commands_status
+ | filter_commands_statuses
+ ],
+ & &1
+ ) do
:ok
else
{:error,
diff --git a/lib/pleroma/language/language_detector.ex b/lib/pleroma/language/language_detector.ex
new file mode 100644
index 000000000..3901a8b90
--- /dev/null
+++ b/lib/pleroma/language/language_detector.ex
@@ -0,0 +1,34 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Language.LanguageDetector do
+ @words_threshold 4
+
+ def missing_dependencies do
+ provider = get_provider()
+
+ if provider do
+ provider.missing_dependencies()
+ else
+ nil
+ end
+ end
+
+ def detect(text) do
+ provider = get_provider()
+
+ {:ok, text} = text |> FastSanitize.strip_tags()
+ word_count = text |> String.split(~r/\s+/) |> Enum.count()
+
+ if word_count < @words_threshold or !provider or !provider.configured? do
+ nil
+ else
+ provider.detect(text)
+ end
+ end
+
+ defp get_provider() do
+ Pleroma.Config.get([__MODULE__, :provider])
+ end
+end
diff --git a/lib/pleroma/language/language_detector/fasttext.ex b/lib/pleroma/language/language_detector/fasttext.ex
new file mode 100644
index 000000000..d479d2125
--- /dev/null
+++ b/lib/pleroma/language/language_detector/fasttext.ex
@@ -0,0 +1,47 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Language.LanguageDetector.Fasttext do
+ import Pleroma.Web.Utils.Guards, only: [not_empty_string: 1]
+
+ alias Pleroma.Language.LanguageDetector.Provider
+
+ @behaviour Provider
+
+ @impl Provider
+ def missing_dependencies do
+ if Pleroma.Utils.command_available?("fasttext") do
+ []
+ else
+ ["fasttext"]
+ end
+ end
+
+ @impl Provider
+ def configured?, do: not_empty_string(get_model())
+
+ @impl Provider
+ def detect(text) do
+ text_path = Path.join(System.tmp_dir!(), "fasttext-#{Ecto.UUID.generate()}")
+
+ File.write(text_path, text)
+
+ detected_language =
+ case System.cmd("fasttext", ["predict", get_model(), text_path]) do
+ {"__label__" <> language, _} ->
+ language |> String.trim()
+
+ _ ->
+ nil
+ end
+
+ File.rm(text_path)
+
+ detected_language
+ end
+
+ defp get_model do
+ Pleroma.Config.get([__MODULE__, :model])
+ end
+end
diff --git a/lib/pleroma/language/language_detector/provider.ex b/lib/pleroma/language/language_detector/provider.ex
new file mode 100644
index 000000000..08e7c8eef
--- /dev/null
+++ b/lib/pleroma/language/language_detector/provider.ex
@@ -0,0 +1,11 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Language.LanguageDetector.Provider do
+ @callback missing_dependencies() :: [String.t()]
+
+ @callback configured?() :: boolean()
+
+ @callback detect(text :: String.t()) :: String.t() | nil
+end
diff --git a/lib/pleroma/web/common_api/activity_draft.ex b/lib/pleroma/web/common_api/activity_draft.ex
index 4b7d28f5c..7728c6bcb 100644
--- a/lib/pleroma/web/common_api/activity_draft.ex
+++ b/lib/pleroma/web/common_api/activity_draft.ex
@@ -5,6 +5,7 @@
defmodule Pleroma.Web.CommonAPI.ActivityDraft do
alias Pleroma.Activity
alias Pleroma.Conversation.Participation
+ alias Pleroma.Language.LanguageDetector
alias Pleroma.Object
alias Pleroma.Web.ActivityPub.Builder
alias Pleroma.Web.ActivityPub.Visibility
@@ -241,13 +242,15 @@ defmodule Pleroma.Web.CommonAPI.ActivityDraft do
end
defp language(draft) do
- language = draft.params[:language]
+ language =
+ with language <- draft.params[:language],
+ true <- good_locale_code?(language) do
+ language
+ else
+ _ -> LanguageDetector.detect(draft.full_payload)
+ end
- if good_locale_code?(language) do
- %__MODULE__{draft | language: language}
- else
- draft
- end
+ %__MODULE__{draft | language: language}
end
defp object(draft) do