From f02d9b523d802748fa45f97696930ce4d45d231e Mon Sep 17 00:00:00 2001 From: Bnyro Date: Thu, 25 Apr 2024 18:21:47 +0200 Subject: [PATCH] [feat] wikimedia commons: support for videos, audio and other files --- searx/engines/wikicommons.py | 44 +++++++++++++++++++++++++++++++----- searx/settings.yml | 22 ++++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/searx/engines/wikicommons.py b/searx/engines/wikicommons.py index 0bb140abc..a229d24b6 100644 --- a/searx/engines/wikicommons.py +++ b/searx/engines/wikicommons.py @@ -3,6 +3,8 @@ """ +import datetime + from urllib.parse import urlencode # about @@ -14,6 +16,8 @@ about = { "require_api_key": False, "results": 'JSON', } +categories = ['images'] +search_type = 'images' base_url = "https://commons.wikimedia.org" search_prefix = ( @@ -29,17 +33,29 @@ search_prefix = ( paging = True number_of_results = 10 +search_types = { + 'images': 'bitmap|drawing', + 'videos': 'video', + 'audio': 'audio', + 'files': 'multimedia|office|archive|3d', +} + def request(query, params): language = 'en' if params['language'] != 'all': language = params['language'].split('-')[0] + if search_type not in search_types: + raise ValueError(f"Unsupported search type: {search_type}") + + filetype = search_types[search_type] + args = { 'uselang': language, 'gsrlimit': number_of_results, 'gsroffset': number_of_results * (params["pageno"] - 1), - 'gsrsearch': "filetype:bitmap|drawing " + query, + 'gsrsearch': f"filetype:{filetype} {query}", } params["url"] = f"{base_url}/w/api.php{search_prefix}&{urlencode(args, safe=':|')}" @@ -52,7 +68,6 @@ def response(resp): if not json.get("query", {}).get("pages"): return results - for item in json["query"]["pages"].values(): imageinfo = item["imageinfo"][0] title = item["title"].replace("File:", "").rsplit('.', 1)[0] @@ -60,11 +75,28 @@ def response(resp): 'url': imageinfo["descriptionurl"], 'title': title, 'content': item["snippet"], - 'img_src': imageinfo["url"], - 'resolution': f'{imageinfo["width"]} x {imageinfo["height"]}', - 'thumbnail_src': imageinfo["thumburl"], - 'template': 'images.html', } + + if search_type == "images": + result['template'] = 'images.html' + result['img_src'] = imageinfo["url"] + result['thumbnail_src'] = imageinfo["thumburl"] + result['resolution'] = f'{imageinfo["width"]} x {imageinfo["height"]}' + else: + result['thumbnail'] = imageinfo["thumburl"] + + if search_type == "videos": + result['template'] = 'videos.html' + if imageinfo.get('duration'): + result['length'] = datetime.timedelta(seconds=int(imageinfo['duration'])) + result['iframe_src'] = imageinfo['url'] + elif search_type == "files": + result['template'] = 'files.html' + result['metadata'] = imageinfo['mime'] + result['size'] = imageinfo['size'] + elif search_type == "audio": + result['iframe_src'] = imageinfo['url'] + results.append(result) return results diff --git a/searx/settings.yml b/searx/settings.yml index ce725d7e7..a147b1b4d 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1910,6 +1910,28 @@ engines: engine: wikicommons shortcut: wc categories: images + search_type: images + number_of_results: 10 + + - name: wikicommons.videos + engine: wikicommons + shortcut: wcv + categories: videos + search_type: videos + number_of_results: 10 + + - name: wikicommons.audio + engine: wikicommons + shortcut: wca + categories: music + search_type: audio + number_of_results: 10 + + - name: wikicommons.files + engine: wikicommons + shortcut: wcf + categories: files + search_type: files number_of_results: 10 - name: wolframalpha