From 04cfce2eb845c0dfc7d578a6fb3c44cb546b2028 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Mon, 16 Oct 2023 10:45:09 +0200 Subject: [PATCH] [feat] yep: support for images and news (and safesearch) --- searx/engines/yep.py | 79 ++++++++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 30 +++++++++-------- 2 files changed, 95 insertions(+), 14 deletions(-) create mode 100644 searx/engines/yep.py diff --git a/searx/engines/yep.py b/searx/engines/yep.py new file mode 100644 index 000000000..c3cb65c7b --- /dev/null +++ b/searx/engines/yep.py @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Yep (general, images, news) +""" + +from datetime import datetime +from urllib.parse import urlencode +from searx.utils import html_to_text + +about = { + 'website': 'https://yep.com/', + 'official_api_documentation': 'https://docs.developer.yelp.com', + 'use_official_api': False, + 'require_api_key': False, + 'results': 'JSON', +} + +base_url = "https://api.yep.com" +search_type = "web" # 'web', 'images', 'news' + +safesearch = True +safesearch_map = {0: 'off', 1: 'moderate', 2: 'strict'} + + +def request(query, params): + args = { + 'client': 'web', + 'no_correct': 'false', + 'q': query, + 'safeSearch': safesearch_map[params['safesearch']], + 'type': search_type, + } + params['url'] = f"{base_url}/fs/2/search?{urlencode(args)}" + params['headers']['Referer'] = 'https://yep.com/' + return params + + +def _web_result(result): + return { + 'url': result['url'], + 'title': result['title'], + 'content': html_to_text(result['snippet']), + } + + +def _images_result(result): + return { + 'template': 'images.html', + 'url': result['host_page'], + 'title': result.get('title', ''), + 'content': '', + 'img_src': result['image_id'], + 'thumbnail_src': result['src'], + } + + +def _news_result(result): + return { + 'url': result['url'], + 'title': result['title'], + 'content': html_to_text(result['snippet']), + 'publishedDate': datetime.strptime(result['first_seen'][:19], '%Y-%m-%dT%H:%M:%S'), + } + + +def response(resp): + results = [] + + for result in resp.json()[1]['results']: + if search_type == "web": + results.append(_web_result(result)) + elif search_type == "images": + results.append(_images_result(result)) + elif search_type == "news": + results.append(_news_result(result)) + else: + raise ValueError(f"Unsupported yep search type: {search_type}") + + return results diff --git a/searx/settings.yml b/searx/settings.yml index be420528f..1e9f0855b 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -484,23 +484,25 @@ engines: website: https://crowdview.ai/ - name: yep - engine: json_engine + engine: yep shortcut: yep categories: general + search_type: web + disabled: true + + - name: yep images + engine: yep + shortcut: yepi + categories: images + search_type: images + disabled: true + + - name: yep news + engine: yep + shortcut: yepn + categories: news + search_type: news disabled: true - paging: false - content_html_to_text: true - title_html_to_text: true - search_url: https://api.yep.com/fs/1/?type=web&q={query}&no_correct=false&limit=100 - results_query: 1/results - title_query: title - url_query: url - content_query: snippet - about: - website: https://yep.com - use_official_api: false - require_api_key: false - results: JSON - name: curlie engine: xpath