diff --git a/searx/engines/baidu.py b/searx/engines/baidu.py new file mode 100644 index 000000000..6aa0716fd --- /dev/null +++ b/searx/engines/baidu.py @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Baidu_ + +.. _Baidu: https://www.baidu.com +""" + +# There exits a https://github.com/ohblue/baidu-serp-api/ +# but we don't use it here (may we can learn from). + +from urllib.parse import urlencode +from datetime import datetime + +from searx.exceptions import SearxEngineAPIException + +about = { + "website": "https://www.baidu.com", + "wikidata_id": "Q14772", + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": "JSON", +} + +paging = True +categories = ["general"] +base_url = "https://www.baidu.com/s" +results_per_page = 10 + + +def request(query, params): + keyword = query.strip() + + query_params = { + "wd": keyword, + "rn": results_per_page, + "pn": (params["pageno"] - 1) * results_per_page, + "tn": "json", + } + + params["url"] = f"{base_url}?{urlencode(query_params)}" + return params + + +def response(resp): + try: + data = resp.json() + except Exception as e: + raise SearxEngineAPIException(f"Invalid response: {e}") from e + results = [] + + if "feed" not in data or "entry" not in data["feed"]: + raise SearxEngineAPIException("Invalid response") + + for entry in data["feed"]["entry"]: + if not entry.get("title") or not entry.get("url"): + continue + + published_date = None + if entry.get("time"): + try: + published_date = datetime.fromtimestamp(entry["time"]) + except (ValueError, TypeError): + published_date = None + + results.append( + { + "title": entry["title"], + "url": entry["url"], + "content": entry.get("abs", ""), + "publishedDate": published_date, + # "source": entry.get('source') + } + ) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index c0b740e99..45728424c 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -501,6 +501,11 @@ engines: shortcut: bc categories: music + - name: baidu + engine: baidu + shortcut: baidu + disabled: false + - name: wikipedia engine: wikipedia shortcut: wp