diff --git a/AUTHORS.rst b/AUTHORS.rst index 39a70f655..7e97c48c2 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -59,3 +59,4 @@ generally made searx better: - Thomas Renard @threnard - Pydo ``_ - Athemis ``_ +- Stefan Antoni `` diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index cc200a0d1..5275351f1 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -34,7 +34,8 @@ engine_dir = dirname(realpath(__file__)) engines = {} categories = {'general': []} -_initialized = False + +languages = loads(open(engine_dir + '/../data/engines_languages.json').read()) engine_shortcuts = {} engine_default_args = {'paging': False, @@ -214,13 +215,7 @@ def get_engines_stats(): ] -if 'engines' not in settings or not settings['engines']: - logger.error('No engines found. Edit your settings.yml') - exit(2) - -languages = loads(open(engine_dir + '/../data/engines_languages.json').read()) - -for engine_data in settings['engines']: - engine = load_engine(engine_data) - if engine is not None: +def initialize_engines(engine_list): + for engine_data in engine_list: + engine = load_engine(engine_data) engines[engine.name] = engine diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index ddacd1a61..49c6a5d50 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -72,9 +72,9 @@ def response(resp): 'content': ''.join(result.xpath('.//div[@class="st"]//text()')), } - img = result.xpath('.//img/@src')[0] - if img and not img.startswith('data'): - r['img_src'] = img + imgs = result.xpath('.//img/@src') + if len(imgs) and not imgs[0].startswith('data'): + r['img_src'] = imgs[0] results.append(r) diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py new file mode 100644 index 000000000..91c264498 --- /dev/null +++ b/searx/engines/searx_engine.py @@ -0,0 +1,57 @@ +""" + Searx (all) + + @website https://github.com/asciimoo/searx + @provide-api yes (https://asciimoo.ithub.io/searx/dev/search_api.html) + + @using-api yes + @results JSON + @stable yes (using api) + @parse url, title, content +""" + +from json import loads +from searx.engines import categories as searx_categories + + +categories = searx_categories.keys() + +# search-url +instance_urls = [] +instance_index = 0 + + +# do search-request +def request(query, params): + global instance_index + params['url'] = instance_urls[instance_index % len(instance_urls)] + params['method'] = 'POST' + + instance_index += 1 + + params['data'] = { + 'q': query, + 'pageno': params['pageno'], + 'language': params['language'], + 'time_range': params['time_range'], + 'category': params['category'], + 'format': 'json' + } + + return params + + +# get response from search-request +def response(resp): + + response_json = loads(resp.text) + results = response_json['results'] + + for i in ('answers', 'infoboxes'): + results.extend(response_json[i]) + + results.extend({'suggestion': s} for s in response_json['suggestions']) + + results.append({'number_of_results': response_json['number_of_results']}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index eee2903b6..a475433a9 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -13,6 +13,7 @@ server: secret_key : "ultrasecretkey" # change this! base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" image_proxy : False # Proxying image results through searx + http_protocol_version : "1.0" # 1.0 and 1.1 are supported ui: themes_path : "" # Custom ui themes path - leave it blank if you didn't change @@ -91,6 +92,17 @@ engines: disabled : True shortcut : bb + - name : ccc-tv + engine : xpath + paging : False + search_url : https://media.ccc.de/search/?q={query} + url_xpath : //div[@class="caption"]/h3/a/@href + title_xpath : //div[@class="caption"]/h3/a/text() + content_xpath : //div[@class="caption"]/h4/@title + categories : videos + disabled : True + shortcut : c3tv + - name : crossref engine : json_engine paging : True @@ -154,6 +166,18 @@ engines: shortcut : ddg disabled : True + - name : etymonline + engine : xpath + paging : True + search_url : http://etymonline.com/?search={query}&p={pageno} + url_xpath : //dt/a[1]/@href + title_xpath : //dt + content_xpath : //dd + suggestion_xpath : //a[@class="crossreference"] + first_page_num : 0 + shortcut : et + disabled : True + # api-key required: http://www.faroo.com/hp/api/api.html#key # - name : faroo # engine : faroo @@ -430,6 +454,14 @@ engines: shortcut : scc disabled : True +# - name : searx +# engine : searx_engine +# shortcut : se +# instance_urls : +# - http://127.0.0.1:8888/ +# - ... +# disabled : True + - name : spotify engine : spotify shortcut : stf diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml index 7d2701449..dbaf2fd52 100644 --- a/searx/settings_robot.yml +++ b/searx/settings_robot.yml @@ -13,6 +13,7 @@ server: secret_key : "ultrasecretkey" # change this! base_url : False image_proxy : False + http_protocol_version : "1.0" ui: themes_path : "" diff --git a/searx/webapp.py b/searx/webapp.py index b124aa75d..1682015cf 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -53,7 +53,7 @@ from flask_babel import Babel, gettext, format_date, format_decimal from flask.json import jsonify from searx import settings, searx_dir, searx_debug from searx.engines import ( - categories, engines, get_engines_stats, engine_shortcuts + categories, engines, engine_shortcuts, get_engines_stats, initialize_engines ) from searx.utils import ( UnicodeWriter, highlight_content, html_to_text, get_themes, @@ -81,7 +81,7 @@ except ImportError: # serve pages with HTTP/1.1 from werkzeug.serving import WSGIRequestHandler -WSGIRequestHandler.protocol_version = "HTTP/1.1" +WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0')) static_path, templates_path, themes =\ get_themes(settings['ui']['themes_path'] @@ -769,6 +769,9 @@ def page_not_found(e): def run(): + if not searx_debug or os.environ.get("WERKZEUG_RUN_MAIN") == "true": + initialize_engines(settings['engines']) + app.run( debug=searx_debug, use_debugger=searx_debug,