From 08d08475fc555bcb6dd272b67680097ac5829714 Mon Sep 17 00:00:00 2001 From: Zhijie He Date: Tue, 4 Mar 2025 21:55:17 +0800 Subject: [PATCH] [feat] engines: add www.chinaso.com Co-authored-by: Bnyro --- searx/engines/chinaso.py | 142 +++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 18 +++++ 2 files changed, 160 insertions(+) create mode 100644 searx/engines/chinaso.py diff --git a/searx/engines/chinaso.py b/searx/engines/chinaso.py new file mode 100644 index 000000000..509006aa3 --- /dev/null +++ b/searx/engines/chinaso.py @@ -0,0 +1,142 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""ChinaSo: A search engine from ChinaSo.""" + +from urllib.parse import urlencode +from datetime import datetime + +from searx.exceptions import SearxEngineAPIException +from searx.utils import html_to_text + +about = { + "website": "https://www.chinaso.com/", + "wikidata_id": "Q10846064", + "use_official_api": False, + "require_api_key": False, + "results": "JSON", +} + +paging = True +time_range_support = True +results_per_page = 10 +categories = [] +chinaso_category = 'news' +"""ChinaSo supports news, videos, images search. + +- ``news``: search for news +- ``videos``: search for videos +- ``images``: search for images +""" + +time_range_dict = {'day': '24h', 'week': '1w', 'month': '1m', 'year': '1y'} + +base_url = "https://www.chinaso.com" + + +def init(_): + if chinaso_category not in ('news', 'videos', 'images'): + raise SearxEngineAPIException(f"Unsupported category: {chinaso_category}") + + +def request(query, params): + query_params = {"q": query} + + if time_range_dict.get(params['time_range']): + query_params["stime"] = time_range_dict[params['time_range']] + query_params["etime"] = 'now' + + category_config = { + 'news': {'endpoint': '/v5/general/v1/web/search', 'params': {'pn': params["pageno"], 'ps': results_per_page}}, + 'images': { + 'endpoint': '/v5/general/v1/search/image', + 'params': {'start_index': (params["pageno"] - 1) * results_per_page, 'rn': results_per_page}, + }, + 'videos': { + 'endpoint': '/v5/general/v1/search/video', + 'params': {'start_index': (params["pageno"] - 1) * results_per_page, 'rn': results_per_page}, + }, + } + + query_params.update(category_config[chinaso_category]['params']) + + params["url"] = f"{base_url}{category_config[chinaso_category]['endpoint']}?{urlencode(query_params)}" + + return params + + +def response(resp): + try: + data = resp.json() + except Exception as e: + raise SearxEngineAPIException(f"Invalid response: {e}") from e + + parsers = {'news': parse_news, 'images': parse_images, 'videos': parse_videos} + + return parsers[chinaso_category](data) + + +def parse_news(data): + results = [] + if not data.get("data", {}).get("data"): + raise SearxEngineAPIException("Invalid response") + + for entry in data["data"]["data"]: + published_date = None + if entry.get("timestamp"): + try: + published_date = datetime.fromtimestamp(int(entry["timestamp"])) + except (ValueError, TypeError): + pass + + results.append( + { + 'title': html_to_text(entry["title"]), + 'url': entry["url"], + 'content': html_to_text(entry["snippet"]), + 'publishedDate': published_date, + } + ) + return results + + +def parse_images(data): + results = [] + if not data.get("data", {}).get("arrRes"): + raise SearxEngineAPIException("Invalid response") + + for entry in data["data"]["arrRes"]: + results.append( + { + 'url': entry["web_url"], + 'title': html_to_text(entry["title"]), + 'content': html_to_text(entry["ImageInfo"]), + 'template': 'images.html', + 'img_src': entry["url"].replace("http://", "https://"), + 'thumbnail_src': entry["largeimage"].replace("http://", "https://"), + } + ) + return results + + +def parse_videos(data): + results = [] + if not data.get("data", {}).get("arrRes"): + raise SearxEngineAPIException("Invalid response") + + for entry in data["data"]["arrRes"]: + published_date = None + if entry.get("VideoPubDate"): + try: + published_date = datetime.fromtimestamp(int(entry["VideoPubDate"])) + except (ValueError, TypeError): + pass + + results.append( + { + 'url': entry["url"], + 'title': html_to_text(entry["raw_title"]), + 'template': 'videos.html', + 'publishedDate': published_date, + 'thumbnail': entry["image_src"].replace("http://", "https://"), + } + ) + return results diff --git a/searx/settings.yml b/searx/settings.yml index 6aafaeb63..8a5069736 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -573,6 +573,24 @@ engines: # to show premium or plus results too: # skip_premium: false + - name: chinaso news + chinaso_category: news + engine: chinaso + shortcut: chinaso + disabled: true + + - name: chinaso images + chinaso_category: images + engine: chinaso + shortcut: chinasoi + disabled: true + + - name: chinaso videos + chinaso_category: videos + engine: chinaso + shortcut: chinasov + disabled: true + - name: cloudflareai engine: cloudflareai shortcut: cfai