mirror of
https://git.pleroma.social/pleroma/pleroma.git
synced 2024-09-26 13:30:04 +00:00
Language detection
Signed-off-by: marcin mikołajczak <git@mkljczk.pl>
This commit is contained in:
parent
03d4e7eecc
commit
32994bb9c3
6 changed files with 144 additions and 7 deletions
|
@ -3523,5 +3523,27 @@ config :pleroma, :config_description, [
|
||||||
suggestion: [100_000]
|
suggestion: [100_000]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
group: :pleroma,
|
||||||
|
key: Pleroma.Language.LanguageDetector,
|
||||||
|
type: :group,
|
||||||
|
description: "Language detection providers",
|
||||||
|
children: [
|
||||||
|
%{
|
||||||
|
key: :provider,
|
||||||
|
type: :module,
|
||||||
|
suggestions: [
|
||||||
|
Pleroma.Language.LanguageDetector.Fasttext
|
||||||
|
]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
group: {:subgroup, Pleroma.Language.LanguageDetector.Fasttext},
|
||||||
|
key: :model,
|
||||||
|
label: "fastText language detection model",
|
||||||
|
type: :string,
|
||||||
|
suggestions: ["/usr/share/fasttext/lid.176.bin"]
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -188,7 +188,27 @@ defmodule Pleroma.ApplicationRequirements do
|
||||||
false
|
false
|
||||||
end
|
end
|
||||||
|
|
||||||
if Enum.all?([preview_proxy_commands_status | filter_commands_statuses], & &1) do
|
language_detector_commands_status =
|
||||||
|
if Pleroma.Language.LanguageDetector.missing_dependencies() == [] do
|
||||||
|
true
|
||||||
|
else
|
||||||
|
Logger.error(
|
||||||
|
"The following dependencies required by the currently enabled " <>
|
||||||
|
"language detection provider are not installed: " <>
|
||||||
|
inspect(Pleroma.Language.LanguageDetector.missing_dependencies())
|
||||||
|
)
|
||||||
|
|
||||||
|
false
|
||||||
|
end
|
||||||
|
|
||||||
|
if Enum.all?(
|
||||||
|
[
|
||||||
|
preview_proxy_commands_status,
|
||||||
|
language_detector_commands_status
|
||||||
|
| filter_commands_statuses
|
||||||
|
],
|
||||||
|
& &1
|
||||||
|
) do
|
||||||
:ok
|
:ok
|
||||||
else
|
else
|
||||||
{:error,
|
{:error,
|
||||||
|
|
34
lib/pleroma/language/language_detector.ex
Normal file
34
lib/pleroma/language/language_detector.ex
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
# Pleroma: A lightweight social networking server
|
||||||
|
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Pleroma.Language.LanguageDetector do
|
||||||
|
@words_threshold 4
|
||||||
|
|
||||||
|
def missing_dependencies do
|
||||||
|
provider = get_provider()
|
||||||
|
|
||||||
|
if provider do
|
||||||
|
provider.missing_dependencies()
|
||||||
|
else
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def detect(text) do
|
||||||
|
provider = get_provider()
|
||||||
|
|
||||||
|
{:ok, text} = text |> FastSanitize.strip_tags()
|
||||||
|
word_count = text |> String.split(~r/\s+/) |> Enum.count()
|
||||||
|
|
||||||
|
if word_count < @words_threshold or !provider or !provider.configured? do
|
||||||
|
nil
|
||||||
|
else
|
||||||
|
provider.detect(text)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp get_provider() do
|
||||||
|
Pleroma.Config.get([__MODULE__, :provider])
|
||||||
|
end
|
||||||
|
end
|
47
lib/pleroma/language/language_detector/fasttext.ex
Normal file
47
lib/pleroma/language/language_detector/fasttext.ex
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
# Pleroma: A lightweight social networking server
|
||||||
|
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Pleroma.Language.LanguageDetector.Fasttext do
|
||||||
|
import Pleroma.Web.Utils.Guards, only: [not_empty_string: 1]
|
||||||
|
|
||||||
|
alias Pleroma.Language.LanguageDetector.Provider
|
||||||
|
|
||||||
|
@behaviour Provider
|
||||||
|
|
||||||
|
@impl Provider
|
||||||
|
def missing_dependencies do
|
||||||
|
if Pleroma.Utils.command_available?("fasttext") do
|
||||||
|
[]
|
||||||
|
else
|
||||||
|
["fasttext"]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@impl Provider
|
||||||
|
def configured?, do: not_empty_string(get_model())
|
||||||
|
|
||||||
|
@impl Provider
|
||||||
|
def detect(text) do
|
||||||
|
text_path = Path.join(System.tmp_dir!(), "fasttext-#{Ecto.UUID.generate()}")
|
||||||
|
|
||||||
|
File.write(text_path, text)
|
||||||
|
|
||||||
|
detected_language =
|
||||||
|
case System.cmd("fasttext", ["predict", get_model(), text_path]) do
|
||||||
|
{"__label__" <> language, _} ->
|
||||||
|
language |> String.trim()
|
||||||
|
|
||||||
|
_ ->
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
File.rm(text_path)
|
||||||
|
|
||||||
|
detected_language
|
||||||
|
end
|
||||||
|
|
||||||
|
defp get_model do
|
||||||
|
Pleroma.Config.get([__MODULE__, :model])
|
||||||
|
end
|
||||||
|
end
|
11
lib/pleroma/language/language_detector/provider.ex
Normal file
11
lib/pleroma/language/language_detector/provider.ex
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
# Pleroma: A lightweight social networking server
|
||||||
|
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Pleroma.Language.LanguageDetector.Provider do
|
||||||
|
@callback missing_dependencies() :: [String.t()]
|
||||||
|
|
||||||
|
@callback configured?() :: boolean()
|
||||||
|
|
||||||
|
@callback detect(text :: String.t()) :: String.t() | nil
|
||||||
|
end
|
|
@ -5,6 +5,7 @@
|
||||||
defmodule Pleroma.Web.CommonAPI.ActivityDraft do
|
defmodule Pleroma.Web.CommonAPI.ActivityDraft do
|
||||||
alias Pleroma.Activity
|
alias Pleroma.Activity
|
||||||
alias Pleroma.Conversation.Participation
|
alias Pleroma.Conversation.Participation
|
||||||
|
alias Pleroma.Language.LanguageDetector
|
||||||
alias Pleroma.Object
|
alias Pleroma.Object
|
||||||
alias Pleroma.Web.ActivityPub.Builder
|
alias Pleroma.Web.ActivityPub.Builder
|
||||||
alias Pleroma.Web.ActivityPub.Visibility
|
alias Pleroma.Web.ActivityPub.Visibility
|
||||||
|
@ -241,13 +242,15 @@ defmodule Pleroma.Web.CommonAPI.ActivityDraft do
|
||||||
end
|
end
|
||||||
|
|
||||||
defp language(draft) do
|
defp language(draft) do
|
||||||
language = draft.params[:language]
|
language =
|
||||||
|
with language <- draft.params[:language],
|
||||||
|
true <- good_locale_code?(language) do
|
||||||
|
language
|
||||||
|
else
|
||||||
|
_ -> LanguageDetector.detect(draft.full_payload)
|
||||||
|
end
|
||||||
|
|
||||||
if good_locale_code?(language) do
|
%__MODULE__{draft | language: language}
|
||||||
%__MODULE__{draft | language: language}
|
|
||||||
else
|
|
||||||
draft
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
defp object(draft) do
|
defp object(draft) do
|
||||||
|
|
Loading…
Reference in a new issue