From e25d1c7288417a5a3773e0b592900156b246234b Mon Sep 17 00:00:00 2001 From: Bnyro Date: Tue, 8 Aug 2023 10:54:48 +0200 Subject: [PATCH] [feat] engine: implementation of German news, Tagesschau Co-authored-by: Markus Heiser --- searx/engines/tagesschau.py | 101 ++++++++++++++++++++++++++++++++++++ searx/settings.yml | 5 ++ 2 files changed, 106 insertions(+) create mode 100644 searx/engines/tagesschau.py diff --git a/searx/engines/tagesschau.py b/searx/engines/tagesschau.py new file mode 100644 index 000000000..4a36747c8 --- /dev/null +++ b/searx/engines/tagesschau.py @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""ARD: `Tagesschau API`_ + +The Tagesschau is a news program of the ARD. Via the `Tagesschau API`_, current +news and media reports are available in JSON format. The `Bundesstelle für Open +Data`_ offers a `OpenAPI`_ portal at bundDEV_ where APIs are documented an can +be tested. + +This SearXNG engine uses the `/api2u/search`_ API. + +.. _/api2u/search: http://tagesschau.api.bund.dev/ +.. _bundDEV: https://bund.dev/apis +.. _Bundesstelle für Open Data: https://github.com/bundesAPI +.. _Tagesschau API: https://github.com/AndreasFischer1985/tagesschau-api/blob/main/README_en.md +.. _OpenAPI: https://swagger.io/specification/ + +""" +from typing import TYPE_CHECKING + +from datetime import datetime +from urllib.parse import urlencode +import re + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +about = { + 'website': "https://tagesschau.de", + 'wikidata_id': "Q703907", + 'official_api_documentation': None, + 'use_official_api': True, + 'require_api_key': False, + 'results': 'JSON', + 'language': 'de', +} +categories = ['general', 'news'] +paging = True + +results_per_page = 10 +base_url = "https://www.tagesschau.de" + + +def request(query, params): + args = { + 'searchText': query, + 'pageSize': results_per_page, + 'resultPage': params['pageno'] - 1, + } + + params['url'] = f"{base_url}/api2u/search?{urlencode(args)}" + + return params + + +def response(resp): + results = [] + + json = resp.json() + + for item in json['searchResults']: + item_type = item.get('type') + if item_type in ('story', 'webview'): + results.append(_story(item)) + elif item_type == 'video': + results.append(_video(item)) + else: + logger.error("unknow result type: %s", item_type) + + return results + + +def _story(item): + return { + 'title': item['title'], + 'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'), + 'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'), + 'content': item['firstSentence'], + 'url': item['shareURL'], + } + + +def _video(item): + video_url = item['streams']['h264s'] + title = item['title'] + + if "_vapp.mxf" in title: + title = title.replace("_vapp.mxf", "") + title = re.sub(r"APP\d+ (FC-)?", "", title, count=1) + + return { + 'template': 'videos.html', + 'title': title, + 'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'), + 'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'), + 'content': item.get('firstSentence', ''), + 'iframe_src': video_url, + 'url': video_url, + } diff --git a/searx/settings.yml b/searx/settings.yml index 256fb15a9..e3f3421fc 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1431,6 +1431,11 @@ engines: # WHERE title LIKE :wildcard OR description LIKE :wildcard # ORDER BY duration DESC + - name: tagesschau + engine: tagesschau + shortcut: ts + disabled: true + # Requires Tor - name: torch engine: xpath