[feat] yep: support for images and news (and safesearch)

This commit is contained in:
Bnyro 2023-10-16 10:45:09 +02:00 committed by Markus Heiser
parent ee2675c999
commit 04cfce2eb8
2 changed files with 95 additions and 14 deletions

79
searx/engines/yep.py Normal file
View file

@ -0,0 +1,79 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Yep (general, images, news)
"""
from datetime import datetime
from urllib.parse import urlencode
from searx.utils import html_to_text
about = {
'website': 'https://yep.com/',
'official_api_documentation': 'https://docs.developer.yelp.com',
'use_official_api': False,
'require_api_key': False,
'results': 'JSON',
}
base_url = "https://api.yep.com"
search_type = "web" # 'web', 'images', 'news'
safesearch = True
safesearch_map = {0: 'off', 1: 'moderate', 2: 'strict'}
def request(query, params):
args = {
'client': 'web',
'no_correct': 'false',
'q': query,
'safeSearch': safesearch_map[params['safesearch']],
'type': search_type,
}
params['url'] = f"{base_url}/fs/2/search?{urlencode(args)}"
params['headers']['Referer'] = 'https://yep.com/'
return params
def _web_result(result):
return {
'url': result['url'],
'title': result['title'],
'content': html_to_text(result['snippet']),
}
def _images_result(result):
return {
'template': 'images.html',
'url': result['host_page'],
'title': result.get('title', ''),
'content': '',
'img_src': result['image_id'],
'thumbnail_src': result['src'],
}
def _news_result(result):
return {
'url': result['url'],
'title': result['title'],
'content': html_to_text(result['snippet']),
'publishedDate': datetime.strptime(result['first_seen'][:19], '%Y-%m-%dT%H:%M:%S'),
}
def response(resp):
results = []
for result in resp.json()[1]['results']:
if search_type == "web":
results.append(_web_result(result))
elif search_type == "images":
results.append(_images_result(result))
elif search_type == "news":
results.append(_news_result(result))
else:
raise ValueError(f"Unsupported yep search type: {search_type}")
return results

View file

@ -484,23 +484,25 @@ engines:
website: https://crowdview.ai/
- name: yep
engine: json_engine
engine: yep
shortcut: yep
categories: general
search_type: web
disabled: true
- name: yep images
engine: yep
shortcut: yepi
categories: images
search_type: images
disabled: true
- name: yep news
engine: yep
shortcut: yepn
categories: news
search_type: news
disabled: true
paging: false
content_html_to_text: true
title_html_to_text: true
search_url: https://api.yep.com/fs/1/?type=web&q={query}&no_correct=false&limit=100
results_query: 1/results
title_query: title
url_query: url
content_query: snippet
about:
website: https://yep.com
use_official_api: false
require_api_key: false
results: JSON
- name: curlie
engine: xpath