diff --git a/searx/engines/mojeek.py b/searx/engines/mojeek.py index 6aaca021b..df2302e8b 100644 --- a/searx/engines/mojeek.py +++ b/searx/engines/mojeek.py @@ -1,12 +1,15 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Mojeek (general, images, news)""" +from typing import TYPE_CHECKING + from datetime import datetime from urllib.parse import urlencode from lxml import html from dateutil.relativedelta import relativedelta from searx.utils import eval_xpath, eval_xpath_list, extract_text +from searx.enginelib.traits import EngineTraits about = { 'website': 'https://mojeek.com', @@ -42,6 +45,18 @@ news_url_xpath = './/h2/a/@href' news_title_xpath = './/h2/a' news_content_xpath = './/p[@class="s"]' +language_param = 'lb' +region_param = 'arc' + +_delta_kwargs = {'day': 'days', 'week': 'weeks', 'month': 'months', 'year': 'years'} + +if TYPE_CHECKING: + import logging + + logger = logging.getLogger() + +traits: EngineTraits + def init(_): if search_type not in ('', 'images', 'news'): @@ -53,13 +68,16 @@ def request(query, params): 'q': query, 'safe': min(params['safesearch'], 1), 'fmt': search_type, + language_param: traits.get_language(params['searxng_locale'], traits.custom['language_all']), + region_param: traits.get_region(params['searxng_locale'], traits.custom['region_all']), } if search_type == '': args['s'] = 10 * (params['pageno'] - 1) if params['time_range'] and search_type != 'images': - args["since"] = (datetime.now() - relativedelta(**{f"{params['time_range']}s": 1})).strftime("%Y%m%d") + kwargs = {_delta_kwargs[params['time_range']]: 1} + args["since"] = (datetime.now() - relativedelta(**kwargs)).strftime("%Y%m%d") # type: ignore logger.debug(args["since"]) params['url'] = f"{base_url}/search?{urlencode(args)}" @@ -94,7 +112,7 @@ def _image_results(dom): 'template': 'images.html', 'url': extract_text(eval_xpath(result, image_url_xpath)), 'title': extract_text(eval_xpath(result, image_title_xpath)), - 'img_src': base_url + extract_text(eval_xpath(result, image_img_src_xpath)), + 'img_src': base_url + extract_text(eval_xpath(result, image_img_src_xpath)), # type: ignore 'content': '', } ) @@ -130,3 +148,31 @@ def response(resp): return _news_results(dom) raise ValueError(f"Invalid search type {search_type}") + + +def fetch_traits(engine_traits: EngineTraits): + # pylint: disable=import-outside-toplevel + from searx import network + from searx.locales import get_official_locales, region_tag + from babel import Locale, UnknownLocaleError + import contextlib + + resp = network.get(base_url + "/preferences", headers={'Accept-Language': 'en-US,en;q=0.5'}) + dom = html.fromstring(resp.text) # type: ignore + + languages = eval_xpath_list(dom, f'//select[@name="{language_param}"]/option/@value') + + engine_traits.custom['language_all'] = languages[0] + + for code in languages[1:]: + with contextlib.suppress(UnknownLocaleError): + locale = Locale(code) + engine_traits.languages[locale.language] = code + + regions = eval_xpath_list(dom, f'//select[@name="{region_param}"]/option/@value') + + engine_traits.custom['region_all'] = regions[1] + + for code in regions[2:]: + for locale in get_official_locales(code, engine_traits.languages): + engine_traits.regions[region_tag(locale)] = code diff --git a/searxng_extra/update/update_engine_traits.py b/searxng_extra/update/update_engine_traits.py index ef5dc607c..e31a143ea 100755 --- a/searxng_extra/update/update_engine_traits.py +++ b/searxng_extra/update/update_engine_traits.py @@ -101,7 +101,7 @@ def fetch_traits_map(): def filter_locales(traits_map: EngineTraitsMap): """Filter language & region tags by a threshold.""" - min_eng_per_region = 15 + min_eng_per_region = 18 min_eng_per_lang = 20 _ = {} diff --git a/tests/unit/test_locales.py b/tests/unit/test_locales.py index 37cc187da..cf22563e0 100644 --- a/tests/unit/test_locales.py +++ b/tests/unit/test_locales.py @@ -32,12 +32,10 @@ class TestLocales(SearxTestCase): @parameterized.expand( [ - ('ca-es', 'ca-ES'), ('de-at', 'de-AT'), ('de-de', 'de-DE'), ('en-UK', 'en-GB'), ('fr-be', 'fr-BE'), - ('fr-be', 'fr-BE'), ('fr-ca', 'fr-CA'), ('fr-ch', 'fr-CH'), ('zh-cn', 'zh-CN'),