From 057e9bc1d1314e6f776274ea94cad75d05065c96 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 1 Jan 2023 15:50:21 +0100 Subject: [PATCH] [mod] SepiaSearch: re-engineered & upgrade to data_type: traits_v1 - fetch_traits() SepiaSearch and Peertube are using identical languages. Replace module's dictionary `supported_languages` by `engine.traits.languages` (data_type: `traits_v1`). - fixed code to pass pylint - request(): add argument boostLanguages - response(): is replaced by peertube's video_response() function, which adds metadata from channel name, host & tags Signed-off-by: Markus Heiser --- docs/src/searx.engines.peertube.rst | 8 ++ searx/data/engine_traits.json | 30 ++++++++ searx/engines/sepiasearch.py | 112 ++++++++++++---------------- 3 files changed, 84 insertions(+), 66 deletions(-) diff --git a/docs/src/searx.engines.peertube.rst b/docs/src/searx.engines.peertube.rst index 757d9c742..8e1576ea0 100644 --- a/docs/src/searx.engines.peertube.rst +++ b/docs/src/searx.engines.peertube.rst @@ -17,3 +17,11 @@ Peertube Video .. automodule:: searx.engines.peertube :members: + +.. _sepiasearch engine: + +SepiaSearch +=========== + +.. automodule:: searx.engines.sepiasearch + :members: diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index e1c1a5a46..f0e6ef045 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -3301,6 +3301,36 @@ }, "supported_languages": {} }, + "sepiasearch": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ca": "ca", + "cs": "cs", + "de": "de", + "el": "el", + "en": "en", + "eo": "eo", + "es": "es", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gd": "gd", + "it": "it", + "ja": "ja", + "nl": "nl", + "pl": "pl", + "pt": "pt", + "ru": "ru", + "sv": "sv", + "zh": "zh", + "zh_Hans": "zh", + "zh_Hant": "zh" + }, + "regions": {}, + "supported_languages": {} + }, "startpage": { "all_locale": null, "custom": {}, diff --git a/searx/engines/sepiasearch.py b/searx/engines/sepiasearch.py index 9c45d6c43..72157b253 100644 --- a/searx/engines/sepiasearch.py +++ b/searx/engines/sepiasearch.py @@ -1,70 +1,80 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - SepiaSearch (Videos) +# lint: pylint +"""SepiaSearch uses the same languages as :py:obj:`Peertube +` and the response is identical to the response from the +peertube engines. + """ -from json import loads -from dateutil import parser, relativedelta +from typing import TYPE_CHECKING + from urllib.parse import urlencode from datetime import datetime -# about +from searx.engines.peertube import fetch_traits # pylint: disable=unused-import +from searx.engines.peertube import ( + # pylint: disable=unused-import + video_response, + safesearch_table, + time_range_table, +) +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits + about = { + # pylint: disable=line-too-long "website": 'https://sepiasearch.org', "wikidata_id": None, - "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api", # NOQA + "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos', "use_official_api": True, "require_api_key": False, "results": 'JSON', } +# engine dependent config categories = ['videos'] paging = True + +base_url = 'https://sepiasearch.org' + time_range_support = True safesearch = True -supported_languages = [ - # fmt: off - 'en', 'fr', 'ja', 'eu', 'ca', 'cs', 'eo', 'el', - 'de', 'it', 'nl', 'es', 'oc', 'gd', 'zh', 'pt', - 'sv', 'pl', 'fi', 'ru' - # fmt: on -] -base_url = 'https://sepiasearch.org/api/v1/search/videos' - -safesearch_table = {0: 'both', 1: 'false', 2: 'false'} - -time_range_table = { - 'day': relativedelta.relativedelta(), - 'week': relativedelta.relativedelta(weeks=-1), - 'month': relativedelta.relativedelta(months=-1), - 'year': relativedelta.relativedelta(years=-1), -} - - -def minute_to_hm(minute): - if isinstance(minute, int): - return "%d:%02d" % (divmod(minute, 60)) - return None def request(query, params): + """Assemble request for the SepiaSearch API""" + + if not query: + return False + + # eng_region = traits.get_region(params['searxng_locale'], 'en_US') + eng_lang = traits.get_language(params['searxng_locale'], None) + params['url'] = ( - base_url - + '?' + base_url.rstrip("/") + + "/api/v1/search/videos?" + urlencode( { 'search': query, 'start': (params['pageno'] - 1) * 10, 'count': 10, - 'sort': '-match', + # -createdAt: sort by date ascending / createdAt: date descending + 'sort': '-match', # sort by *match descending* 'nsfw': safesearch_table[params['safesearch']], } ) ) - language = params['language'].split('-')[0] - if language in supported_languages: - params['url'] += '&languageOneOf[]=' + language + if eng_lang is not None: + params['url'] += '&languageOneOf[]=' + eng_lang + params['url'] += '&boostLanguages[]=' + eng_lang + if params['time_range'] in time_range_table: time = datetime.now().date() + time_range_table[params['time_range']] params['url'] += '&startDate=' + time.isoformat() @@ -73,34 +83,4 @@ def request(query, params): def response(resp): - results = [] - - search_results = loads(resp.text) - - if 'data' not in search_results: - return [] - - for result in search_results['data']: - title = result['name'] - content = result['description'] - thumbnail = result['thumbnailUrl'] - publishedDate = parser.parse(result['publishedAt']) - author = result.get('account', {}).get('displayName') - length = minute_to_hm(result.get('duration')) - url = result['url'] - - results.append( - { - 'url': url, - 'title': title, - 'content': content, - 'author': author, - 'length': length, - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'iframe_src': result.get('embedUrl'), - 'thumbnail': thumbnail, - } - ) - - return results + return video_response(resp)