From 7edd75ff935d96cd8dcb03a6df1e9eed06ac56f3 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Tue, 15 Oct 2024 15:39:14 +0200 Subject: [PATCH] [refactor] translation engines: common interface --- searx/engines/deepl.py | 14 +++--- searx/engines/dictzone.py | 21 +++++---- searx/engines/libretranslate.py | 13 +++--- searx/engines/lingva.py | 43 ++++++++++++------- searx/engines/mozhi.py | 24 +++++------ searx/engines/translated.py | 28 ++++++------ .../templates/simple/answerers/translate.html | 38 ++++++++++++++++ searx/templates/simple/results.html | 22 ++++++---- 8 files changed, 125 insertions(+), 78 deletions(-) create mode 100644 searx/templates/simple/answerers/translate.html diff --git a/searx/engines/deepl.py b/searx/engines/deepl.py index ce2109138..484f56ec4 100644 --- a/searx/engines/deepl.py +++ b/searx/engines/deepl.py @@ -1,8 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Deepl translation engine""" -from json import loads - about = { "website": 'https://deepl.com', "wikidata_id": 'Q43968444', @@ -41,16 +39,14 @@ def request(_query, params): def response(resp): results = [] - result = loads(resp.text) - translations = result['translations'] - infobox = "
" + result = resp.json() - for translation in translations: - infobox += f"
{translation['text']}
" + if not result.get('translations'): + return results - infobox += "
" + translations = [{'text': translation['text']} for translation in result['translations']] - results.append({'answer': infobox}) + results.append({'answer': translations[0]['text'], 'answer_type': 'translations', 'translations': translations}) return results diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 4a9c4811e..acd682911 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -3,7 +3,6 @@ Dictzone """ -from urllib.parse import urljoin from lxml import html from searx.utils import eval_xpath @@ -33,11 +32,10 @@ def request(query, params): # pylint: disable=unused-argument def response(resp): - results = [] - dom = html.fromstring(resp.text) - for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]): + translations = [] + for result in eval_xpath(dom, results_xpath)[1:]: try: from_result, to_results_raw = eval_xpath(result, './td') except: # pylint: disable=bare-except @@ -49,12 +47,17 @@ def response(resp): if t.strip(): to_results.append(to_result.text_content()) - results.append( + translations.append( { - 'url': urljoin(str(resp.url), '?%d' % k), - 'title': from_result.text_content(), - 'content': '; '.join(to_results), + 'text': f"{from_result.text_content()} - {'; '.join(to_results)}", } ) - return results + if translations: + result = { + 'answer': translations[0]['text'], + 'translations': translations, + 'answer_type': 'translations', + } + + return [result] diff --git a/searx/engines/libretranslate.py b/searx/engines/libretranslate.py index a62d88d8a..d9b9cf2f9 100644 --- a/searx/engines/libretranslate.py +++ b/searx/engines/libretranslate.py @@ -24,7 +24,7 @@ def request(_query, params): request_url = random.choice(base_url) if isinstance(base_url, list) else base_url params['url'] = f"{request_url}/translate" - args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query']} + args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query'], 'alternatives': 3} if api_key: args['api_key'] = api_key params['data'] = dumps(args) @@ -42,12 +42,11 @@ def response(resp): json_resp = resp.json() text = json_resp.get('translatedText') - from_lang = resp.search_params["from_lang"][1] - to_lang = resp.search_params["to_lang"][1] - query = resp.search_params["query"] - req_url = resp.search_params["req_url"] + if not text: + return results - if text: - results.append({"answer": text, "url": f"{req_url}/?source={from_lang}&target={to_lang}&q={query}"}) + translations = [{'text': text}] + [{'text': alternative} for alternative in json_resp.get('alternatives', [])] + + results.append({'answer': text, 'answer_type': 'translations', 'translations': translations}) return results diff --git a/searx/engines/lingva.py b/searx/engines/lingva.py index bf35deed4..ecebe4587 100644 --- a/searx/engines/lingva.py +++ b/searx/engines/lingva.py @@ -1,8 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Lingva (alternative Google Translate frontend)""" -from json import loads - about = { "website": 'https://lingva.ml', "wikidata_id": None, @@ -29,7 +27,7 @@ def request(_query, params): def response(resp): results = [] - result = loads(resp.text) + result = resp.json() info = result["info"] from_to_prefix = "%s-%s " % (resp.search_params['from_lang'][1], resp.search_params['to_lang'][1]) @@ -38,27 +36,40 @@ def response(resp): if 'definitions' in info: # pylint: disable=too-many-nested-blocks for definition in info['definitions']: - if 'list' in definition: - for item in definition['list']: - if 'synonyms' in item: - for synonym in item['synonyms']: - results.append({"suggestion": from_to_prefix + synonym}) + for item in definition.get('list', []): + for synonym in item.get('synonyms', []): + results.append({"suggestion": from_to_prefix + synonym}) - infobox = "" + data = [] + + for definition in info['definitions']: + for translation in definition['list']: + data.append( + { + 'text': result['translation'], + 'definitions': [translation['definition']] if translation['definition'] else [], + 'examples': [translation['example']] if translation['example'] else [], + 'synonyms': translation['synonyms'], + } + ) for translation in info["extraTranslations"]: for word in translation["list"]: - infobox += f"
{word['word']}
" + data.append( + { + 'text': word['word'], + 'definitions': word['meanings'], + } + ) - for meaning in word["meanings"]: - infobox += f"
{meaning}
" - - infobox += "
" + if not data and result['translation']: + data.append({'text': result['translation']}) results.append( { - 'infobox': result["translation"], - 'content': infobox, + 'answer': data[0]['text'], + 'answer_type': 'translations', + 'translations': data, } ) diff --git a/searx/engines/mozhi.py b/searx/engines/mozhi.py index 63c90084e..a36bfbec8 100644 --- a/searx/engines/mozhi.py +++ b/searx/engines/mozhi.py @@ -4,7 +4,6 @@ import random import re from urllib.parse import urlencode -from flask_babel import gettext about = { "website": 'https://codeberg.org/aryak/mozhi', @@ -35,30 +34,27 @@ def request(_query, params): def response(resp): translation = resp.json() - infobox = "" + data = {'text': translation['translated-text'], 'definitions': [], 'examples': []} if translation['target_transliteration'] and not re.match( re_transliteration_unsupported, translation['target_transliteration'] ): - infobox = f"{translation['target_transliteration']}" + data['transliteration'] = translation['target_transliteration'] if translation['word_choices']: for word in translation['word_choices']: - infobox += f"
{word['word']}: {word['definition']}
" + if word.get('definition'): + data['definitions'].append(word['definition']) - if word['examples_target']: - for example in word['examples_target']: - infobox += f"
{re.sub(r'<|>', '', example)}
" - infobox += f"
{re.sub(r'<|>', '', example)}
" + for example in word.get('examples_target', []): + data['examples'].append(re.sub(r"<|>", "", example).lstrip('- ')) - infobox += "
" - - if translation['source_synonyms']: - infobox += f"
{gettext('Synonyms')}: {', '.join(translation['source_synonyms'])}
" + data['synonyms'] = translation.get('source_synonyms', []) result = { - 'infobox': translation['translated-text'], - 'content': infobox, + 'answer': translation['translated-text'], + 'answer_type': 'translations', + 'translations': [data], } return [result] diff --git a/searx/engines/translated.py b/searx/engines/translated.py index ea8c081dc..190707a95 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -35,18 +35,16 @@ def request(query, params): # pylint: disable=unused-argument def response(resp): - results = [] - results.append( - { - 'url': web_url.format( - from_lang=resp.search_params['from_lang'][2], - to_lang=resp.search_params['to_lang'][2], - query=resp.search_params['query'], - ), - 'title': '[{0}-{1}] {2}'.format( - resp.search_params['from_lang'][1], resp.search_params['to_lang'][1], resp.search_params['query'] - ), - 'content': resp.json()['responseData']['translatedText'], - } - ) - return results + json_resp = resp.json() + text = json_resp['responseData']['translatedText'] + + alternatives = [match['translation'] for match in json_resp['matches'] if match['translation'] != text] + translations = [{'text': translation} for translation in [text] + alternatives] + + result = { + 'answer': translations[0]['text'], + 'answer_type': 'translations', + 'translations': translations, + } + + return [result] diff --git a/searx/templates/simple/answerers/translate.html b/searx/templates/simple/answerers/translate.html new file mode 100644 index 000000000..a1c38eecf --- /dev/null +++ b/searx/templates/simple/answerers/translate.html @@ -0,0 +1,38 @@ +
+{% for translation in translations %} + {% if loop.index > 1 %} +
+ {% endif %} +

{{ translation.text }}

+ {% if translation.transliteration %} + translation.transliteration + {% endif %} {% if translation.definitions %} +
+
{{ _('Definitions') }}
+
    + {% for definition in translation.definitions %} +
  • {{ definition }}
  • + {% endfor %} +
      +
+ {% endif %} {% if translation.examples %} +
+
{{ _('Examples') }}
+
    + {% for example in translation.examples %} +
  • {{ example }}
  • + {% endfor %} +
+
+ {% endif %} {% if translation.synonyms %} +
+
{{ _('Synonyms') }}
+
    + {% for synonym in translation.synonyms %} +
  • {{ synonym }}
  • + {% endfor %} +
+
+ {% endif %} +{% endfor %} +
diff --git a/searx/templates/simple/results.html b/searx/templates/simple/results.html index fbc41a17a..4567be1c1 100644 --- a/searx/templates/simple/results.html +++ b/searx/templates/simple/results.html @@ -23,14 +23,20 @@