From 02fc4147ce745325ff25146a8085a915a5d3cacd Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Wed, 16 Dec 2020 16:20:03 +0100 Subject: [PATCH] [mod] dictzone, translated, currency_convert: use engine_type online_curency and online_dictionnary --- docs/dev/engine_overview.rst | 66 ++++++++++++++++++++++--------- searx/engines/currency_convert.py | 42 +------------------- searx/engines/dictzone.py | 24 +++-------- searx/engines/translated.py | 26 ++---------- 4 files changed, 59 insertions(+), 99 deletions(-) diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst index 12e62eb61..0d2c00e87 100644 --- a/docs/dev/engine_overview.rst +++ b/docs/dev/engine_overview.rst @@ -37,15 +37,16 @@ settings. However, the standard way is the following: engine file ----------- -======================= =========== =========================================== +======================= =========== ======================================================== argument type information -======================= =========== =========================================== +======================= =========== ======================================================== categories list pages, in which the engine is working paging boolean support multible pages language_support boolean support language choosing time_range_support boolean support search time range -engine_type str ``online`` by default, ``offline`` -======================= =========== =========================================== +engine_type str ``online`` by default, other possibles values are + ``offline``, ``online_dictionnary``, ``online_currency`` +======================= =========== ======================================================== .. _engine settings: @@ -111,22 +112,49 @@ passed arguments These arguments can be used to construct the search query. Furthermore, parameters with default value can be redefined for special purposes. +If the ``engine_type`` is ``online```: + +====================== ============== ======================================================================== +argument type default-value, information +====================== ============== ======================================================================== +url str ``''`` +method str ``'GET'`` +headers set ``{}`` +data set ``{}`` +cookies set ``{}`` +verify bool ``True`` +headers.User-Agent str a random User-Agent +category str current category, like ``'general'`` +safesearch int ``0``, between ``0`` and ``2`` (normal, moderate, strict) +time_range Optional[str] ``None``, can be ``day``, ``week``, ``month``, ``year`` +pageno int current pagenumber +language str specific language code like ``'en_US'``, or ``'all'`` if unspecified +====================== ============== ======================================================================== + + +If the ``engine_type`` is ``online_dictionnary```, in addition to the ``online`` arguments: + ====================== ============ ======================================================================== argument type default-value, information ====================== ============ ======================================================================== -url string ``''`` -method string ``'GET'`` -headers set ``{}`` -data set ``{}`` -cookies set ``{}`` -verify boolean ``True`` -headers.User-Agent string a random User-Agent -category string current category, like ``'general'`` -started datetime current date-time -pageno int current pagenumber -language string specific language code like ``'en_US'``, or ``'all'`` if unspecified +from_lang str specific language code like ``'en_US'`` +to_lang str specific language code like ``'en_US'`` +query str the text query without the languages ====================== ============ ======================================================================== +If the ``engine_type`` is ``online_currency```, in addition to the ``online`` arguments: + +====================== ============ ======================================================================== +argument type default-value, information +====================== ============ ======================================================================== +amount float the amount to convert +from str ISO 4217 code +to str ISO 4217 code +from_name str currency name +to_name str currency name +====================== ============ ======================================================================== + + parsed arguments ---------------- @@ -137,12 +165,12 @@ request: =================== =========== ========================================================================== argument type information =================== =========== ========================================================================== -url string requested url -method string HTTP request method +url str requested url +method str HTTP request method headers set HTTP header information -data set HTTP data information (parsed if ``method != 'GET'``) +data set HTTP data information cookies set HTTP cookies -verify boolean Performing SSL-Validity check +verify bool Performing SSL-Validity check max_redirects int maximum redirects, hard limit soft_max_redirects int maximum redirects, soft limit. Record an error but don't stop the engine raise_for_httperror bool True by default: raise an exception if the HTTP code of response is >= 300 diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 87e21d0af..7098dd3c7 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,54 +1,16 @@ import json -import re -import unicodedata -from searx.data import CURRENCIES # NOQA +engine_type = 'online_currency' categories = [] url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}' weight = 100 -parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) https_support = True -def normalize_name(name): - name = name.lower().replace('-', ' ').rstrip('s') - name = re.sub(' +', ' ', name) - return unicodedata.normalize('NFKD', name).lower() - - -def name_to_iso4217(name): - global CURRENCIES - - name = normalize_name(name) - currency = CURRENCIES['names'].get(name, [name]) - return currency[0] - - -def iso4217_to_name(iso4217, language): - global CURRENCIES - - return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217) - - def request(query, params): - m = parser_re.match(query) - if not m: - # wrong query - return params - amount, from_currency, to_currency = m.groups() - amount = float(amount) - from_currency = name_to_iso4217(from_currency.strip()) - to_currency = name_to_iso4217(to_currency.strip()) - - params['url'] = url.format(from_currency, to_currency) - params['amount'] = amount - params['from'] = from_currency - params['to'] = to_currency - params['from_name'] = iso4217_to_name(from_currency, 'en') - params['to_name'] = iso4217_to_name(to_currency, 'en') - + params['url'] = url.format(params['from'], params['to']) return params diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 727eb6598..5e6f688a1 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -9,36 +9,24 @@ @parse url, title, content """ -import re from urllib.parse import urljoin from lxml import html -from searx.utils import is_valid_lang, eval_xpath +from searx.utils import eval_xpath + +engine_type = 'online_dictionnary' categories = ['general'] url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' weight = 100 -parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) results_xpath = './/table[@id="r"]/tr' https_support = True def request(query, params): - m = parser_re.match(query) - if not m: - return params - - from_lang, to_lang, query = m.groups() - - from_lang = is_valid_lang(from_lang) - to_lang = is_valid_lang(to_lang) - - if not from_lang or not to_lang: - return params - - params['url'] = url.format(from_lang=from_lang[2], - to_lang=to_lang[2], - query=query) + params['url'] = url.format(from_lang=params['from_lang'][2], + to_lang=params['to_lang'][2], + query=params['query']) return params diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 75b8b5f42..2706e3617 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -8,44 +8,26 @@ @stable yes @parse url, title, content """ -import re -from searx.utils import is_valid_lang +engine_type = 'online_dictionnary' categories = ['general'] url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' weight = 100 https_support = True -parser_re = re.compile('.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) api_key = '' def request(query, params): - m = parser_re.match(query) - if not m: - return params - - from_lang, to_lang, query = m.groups() - - from_lang = is_valid_lang(from_lang) - to_lang = is_valid_lang(to_lang) - - if not from_lang or not to_lang: - return params - if api_key: key_form = '&key=' + api_key else: key_form = '' - params['url'] = url.format(from_lang=from_lang[1], - to_lang=to_lang[1], - query=query, + params['url'] = url.format(from_lang=params['from_lang'][1], + to_lang=params['to_lang'][1], + query=params['query'], key=key_form) - params['query'] = query - params['from_lang'] = from_lang - params['to_lang'] = to_lang - return params