diff --git a/lib/pleroma/language/language_detector.ex b/lib/pleroma/language/language_detector.ex index b19eb4571..0be69d220 100644 --- a/lib/pleroma/language/language_detector.ex +++ b/lib/pleroma/language/language_detector.ex @@ -15,10 +15,18 @@ defmodule Pleroma.Language.LanguageDetector do end end + # Strip tags from text, etc. + defp prepare_text(text) do + text + |> Floki.parse_fragment!() + |> Floki.filter_out(".h-card, .mention, .hashtag, .u-url, .quote-inline, .recipients-inline, code, pre") + |> Floki.text() + end + def detect(text) do provider = get_provider() - {:ok, text} = text |> FastSanitize.strip_tags() + text = prepare_text(text) word_count = text |> String.split(~r/\s+/) |> Enum.count() if word_count < @words_threshold or !provider or !provider.configured? do