mirror of
https://github.com/searxng/searxng.git
synced 2025-01-03 15:08:41 +00:00
Created new plugin type custom_results. Added new plugin bang_redirect (#2027)
* Made first attempt at the bangs redirects plugin. * It redirects. But in a messy way via javascript. * First version with custom plugin * Added a help page and a operator to see all the bangs available. * Changed to .format because of support * Changed to .format because of support * Removed : in params * Fixed path to json file and changed bang operator * Changed bang operator back to & * Made first attempt at the bangs redirects plugin. * It redirects. But in a messy way via javascript. * First version with custom plugin * Added a help page and a operator to see all the bangs available. * Changed to .format because of support * Changed to .format because of support * Removed : in params * Fixed path to json file and changed bang operator * Changed bang operator back to & * Refactored getting search query. Also changed bang operator to ! and is now working. * Removed prints * Removed temporary bangs_redirect.js file. Updated plugin documentation * Added unit test for the bangs plugin * Fixed a unit test and added 2 more for bangs plugin * Changed back to default settings.yml * Added myself to AUTHORS.rst * Refacored working of custom plugin. * Refactored _get_bangs_data from list to dict to improve search speed. * Decoupled bangs plugin from webserver with redirect_url * Refactored bangs unit tests * Fixed unit test bangs. Removed dubbel parsing in bangs.py * Removed a dumb print statement * Refactored bangs plugin to core engine. * Removed bangs plugin. * Refactored external bangs unit tests from plugin to core. * Removed custom_results/bangs documentation from plugins.rst * Added newline in settings.yml so the PR stays clean. * Changed searx/plugins/__init__.py back to the old file * Removed newline search.py * Refactored get_external_bang_operator from utils to external_bang.py * Removed unnecessary import form test_plugins.py * Removed _parseExternalBang and _isExternalBang from query.py * Removed get_external_bang_operator since it was not necessary * Simplified external_bang.py * Simplified external_bang.py * Moved external_bangs unit tests to test_webapp.py. Fixed return in search with external_bang * Refactored query parsing to unicode to support python2 * Refactored query parsing to unicode to support python2 * Refactored bangs plugin to core engine. * Refactored search parameter to search_query in external_bang.py
This commit is contained in:
parent
c21220c671
commit
4829a76aae
10 changed files with 75458 additions and 6 deletions
|
@ -124,3 +124,4 @@ generally made searx better:
|
||||||
- @CaffeinatedTech
|
- @CaffeinatedTech
|
||||||
- Robin Schneider @ypid
|
- Robin Schneider @ypid
|
||||||
- @splintah
|
- @splintah
|
||||||
|
- Lukas van den Berk @lukasvdberk
|
||||||
|
|
|
@ -30,6 +30,14 @@ Example plugin
|
||||||
ctx['search'].suggestions.add('example')
|
ctx['search'].suggestions.add('example')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
Register your plugin
|
||||||
|
====================
|
||||||
|
|
||||||
|
To enable your plugin register your plugin in
|
||||||
|
searx > plugin > __init__.py.
|
||||||
|
And at the bottom of the file add your plugin like.
|
||||||
|
``plugins.register(name_of_python_file)``
|
||||||
|
|
||||||
Plugin entry points
|
Plugin entry points
|
||||||
===================
|
===================
|
||||||
|
|
||||||
|
|
75351
searx/data/bangs.json
Normal file
75351
searx/data/bangs.json
Normal file
File diff suppressed because it is too large
Load diff
43
searx/external_bang.py
Normal file
43
searx/external_bang.py
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
import json
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
from searx import searx_dir
|
||||||
|
|
||||||
|
# bangs data coming from the following url convert to json with
|
||||||
|
# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
|
||||||
|
# https://pseitz.github.io/toml-to-json-online-converter/
|
||||||
|
# NOTE only use the get_bang_url
|
||||||
|
|
||||||
|
bangs_data = {}
|
||||||
|
with open(join(searx_dir, 'data/bangs.json')) as json_file:
|
||||||
|
for bang in json.load(json_file)['bang']:
|
||||||
|
for trigger in bang["triggers"]:
|
||||||
|
bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
|
||||||
|
|
||||||
|
|
||||||
|
def get_bang_url(search_query):
|
||||||
|
"""
|
||||||
|
Redirects if the user supplied a correct bang search.
|
||||||
|
:param search_query: This is a search_query object which contains preferences and the submitted queries.
|
||||||
|
:return: None if the bang was invalid, else a string of the redirect url.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if search_query.external_bang:
|
||||||
|
query = search_query.query.decode('utf-8', 'ignore')
|
||||||
|
bang = _get_bang(search_query.external_bang)
|
||||||
|
|
||||||
|
if bang and query:
|
||||||
|
# TODO add region support.
|
||||||
|
bang_url = bang["regions"]["default"]
|
||||||
|
|
||||||
|
return bang_url.replace("{{{term}}}", query)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_bang(user_bang):
|
||||||
|
"""
|
||||||
|
Searches if the supplied user bang is available. Returns None if not found.
|
||||||
|
:param user_bang: The parsed user bang. For example yt
|
||||||
|
:return: Returns a dict with bangs data (check bangs_data.json for the structure)
|
||||||
|
"""
|
||||||
|
return bangs_data.get(user_bang)
|
|
@ -44,10 +44,11 @@ class RawTextQuery(object):
|
||||||
self.engines = []
|
self.engines = []
|
||||||
self.languages = []
|
self.languages = []
|
||||||
self.timeout_limit = None
|
self.timeout_limit = None
|
||||||
|
self.external_bang = None
|
||||||
self.specific = False
|
self.specific = False
|
||||||
|
|
||||||
# parse query, if tags are set, which
|
# parse query, if tags are set, which
|
||||||
# change the serch engine or search-language
|
# change the search engine or search-language
|
||||||
def parse_query(self):
|
def parse_query(self):
|
||||||
self.query_parts = []
|
self.query_parts = []
|
||||||
|
|
||||||
|
@ -120,6 +121,11 @@ class RawTextQuery(object):
|
||||||
self.languages.append(lang)
|
self.languages.append(lang)
|
||||||
parse_next = True
|
parse_next = True
|
||||||
|
|
||||||
|
# external bang
|
||||||
|
if query_part[0:2] == "!!":
|
||||||
|
self.external_bang = query_part[2:]
|
||||||
|
parse_next = True
|
||||||
|
continue
|
||||||
# this force a engine or category
|
# this force a engine or category
|
||||||
if query_part[0] == '!' or query_part[0] == '?':
|
if query_part[0] == '!' or query_part[0] == '?':
|
||||||
prefix = query_part[1:].replace('-', ' ').replace('_', ' ')
|
prefix = query_part[1:].replace('-', ' ').replace('_', ' ')
|
||||||
|
@ -178,7 +184,7 @@ class SearchQuery(object):
|
||||||
"""container for all the search parameters (query, language, etc...)"""
|
"""container for all the search parameters (query, language, etc...)"""
|
||||||
|
|
||||||
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
|
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
|
||||||
timeout_limit=None, preferences=None):
|
timeout_limit=None, preferences=None, external_bang=None):
|
||||||
self.query = query.encode('utf-8')
|
self.query = query.encode('utf-8')
|
||||||
self.engines = engines
|
self.engines = engines
|
||||||
self.categories = categories
|
self.categories = categories
|
||||||
|
@ -188,6 +194,7 @@ class SearchQuery(object):
|
||||||
self.time_range = None if time_range in ('', 'None', None) else time_range
|
self.time_range = None if time_range in ('', 'None', None) else time_range
|
||||||
self.timeout_limit = timeout_limit
|
self.timeout_limit = timeout_limit
|
||||||
self.preferences = preferences
|
self.preferences = preferences
|
||||||
|
self.external_bang = external_bang
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return str(self.query) + ";" + str(self.engines)
|
return str(self.query) + ";" + str(self.engines)
|
||||||
|
|
|
@ -138,6 +138,7 @@ class ResultContainer(object):
|
||||||
self.paging = False
|
self.paging = False
|
||||||
self.unresponsive_engines = set()
|
self.unresponsive_engines = set()
|
||||||
self.timings = []
|
self.timings = []
|
||||||
|
self.redirect_url = None
|
||||||
|
|
||||||
def extend(self, engine_name, results):
|
def extend(self, engine_name, results):
|
||||||
for result in list(results):
|
for result in list(results):
|
||||||
|
|
|
@ -20,6 +20,8 @@ import sys
|
||||||
import threading
|
import threading
|
||||||
from time import time
|
from time import time
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
|
import six
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
import requests.exceptions
|
import requests.exceptions
|
||||||
import searx.poolrequests as requests_lib
|
import searx.poolrequests as requests_lib
|
||||||
|
@ -27,6 +29,7 @@ from searx.engines import (
|
||||||
categories, engines, settings
|
categories, engines, settings
|
||||||
)
|
)
|
||||||
from searx.answerers import ask
|
from searx.answerers import ask
|
||||||
|
from searx.external_bang import get_bang_url
|
||||||
from searx.utils import gen_useragent
|
from searx.utils import gen_useragent
|
||||||
from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
|
from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
|
||||||
from searx.results import ResultContainer
|
from searx.results import ResultContainer
|
||||||
|
@ -54,6 +57,7 @@ else:
|
||||||
else:
|
else:
|
||||||
logger.critical('outgoing.max_request_timeout if defined has to be float')
|
logger.critical('outgoing.max_request_timeout if defined has to be float')
|
||||||
from sys import exit
|
from sys import exit
|
||||||
|
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
@ -397,15 +401,16 @@ def get_search_query_from_webapp(preferences, form):
|
||||||
if (engine.name, categ) not in disabled_engines)
|
if (engine.name, categ) not in disabled_engines)
|
||||||
|
|
||||||
query_engines = deduplicate_query_engines(query_engines)
|
query_engines = deduplicate_query_engines(query_engines)
|
||||||
|
external_bang = raw_text_query.external_bang
|
||||||
|
|
||||||
return (SearchQuery(query, query_engines, query_categories,
|
return (SearchQuery(query, query_engines, query_categories,
|
||||||
query_lang, query_safesearch, query_pageno,
|
query_lang, query_safesearch, query_pageno,
|
||||||
query_time_range, query_timeout, preferences),
|
query_time_range, query_timeout, preferences,
|
||||||
|
external_bang=external_bang),
|
||||||
raw_text_query)
|
raw_text_query)
|
||||||
|
|
||||||
|
|
||||||
class Search(object):
|
class Search(object):
|
||||||
|
|
||||||
"""Search information container"""
|
"""Search information container"""
|
||||||
|
|
||||||
def __init__(self, search_query):
|
def __init__(self, search_query):
|
||||||
|
@ -419,6 +424,14 @@ class Search(object):
|
||||||
def search(self):
|
def search(self):
|
||||||
global number_of_searches
|
global number_of_searches
|
||||||
|
|
||||||
|
# Check if there is a external bang. After that we can stop because the search will terminate.
|
||||||
|
if self.search_query.external_bang:
|
||||||
|
self.result_container.redirect_url = get_bang_url(self.search_query)
|
||||||
|
|
||||||
|
# This means there was a valid bang and the
|
||||||
|
# rest of the search does not need to be continued
|
||||||
|
if isinstance(self.result_container.redirect_url, six.string_types):
|
||||||
|
return self.result_container
|
||||||
# start time
|
# start time
|
||||||
start_time = time()
|
start_time = time()
|
||||||
|
|
||||||
|
@ -521,7 +534,6 @@ class Search(object):
|
||||||
|
|
||||||
|
|
||||||
class SearchWithPlugins(Search):
|
class SearchWithPlugins(Search):
|
||||||
|
|
||||||
"""Similar to the Search class but call the plugins."""
|
"""Similar to the Search class but call the plugins."""
|
||||||
|
|
||||||
def __init__(self, search_query, ordered_plugin_list, request):
|
def __init__(self, search_query, ordered_plugin_list, request):
|
||||||
|
|
|
@ -575,7 +575,9 @@ def index():
|
||||||
search_query, raw_text_query = get_search_query_from_webapp(request.preferences, request.form)
|
search_query, raw_text_query = get_search_query_from_webapp(request.preferences, request.form)
|
||||||
# search = Search(search_query) # without plugins
|
# search = Search(search_query) # without plugins
|
||||||
search = SearchWithPlugins(search_query, request.user_plugins, request)
|
search = SearchWithPlugins(search_query, request.user_plugins, request)
|
||||||
|
|
||||||
result_container = search.search()
|
result_container = search.search()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# log exception
|
# log exception
|
||||||
logger.exception('search error')
|
logger.exception('search error')
|
||||||
|
@ -592,6 +594,10 @@ def index():
|
||||||
if number_of_results < result_container.results_length():
|
if number_of_results < result_container.results_length():
|
||||||
number_of_results = 0
|
number_of_results = 0
|
||||||
|
|
||||||
|
# checkin for a external bang
|
||||||
|
if result_container.redirect_url:
|
||||||
|
return redirect(result_container.redirect_url)
|
||||||
|
|
||||||
# UI
|
# UI
|
||||||
advanced_search = request.form.get('advanced_search', None)
|
advanced_search = request.form.get('advanced_search', None)
|
||||||
|
|
||||||
|
@ -665,6 +671,7 @@ def index():
|
||||||
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8'))
|
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8'))
|
||||||
response.headers.add('Content-Disposition', cont_disp)
|
response.headers.add('Content-Disposition', cont_disp)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
elif output_format == 'rss':
|
elif output_format == 'rss':
|
||||||
response_rss = render(
|
response_rss = render(
|
||||||
'opensearch_response_rss.xml',
|
'opensearch_response_rss.xml',
|
||||||
|
|
|
@ -110,3 +110,24 @@ class SearchTestCase(SearxTestCase):
|
||||||
search = searx.search.Search(search_query)
|
search = searx.search.Search(search_query)
|
||||||
results = search.search()
|
results = search.search()
|
||||||
self.assertEquals(results.results_length(), 1)
|
self.assertEquals(results.results_length(), 1)
|
||||||
|
|
||||||
|
def test_external_bang(self):
|
||||||
|
search_query = searx.query.SearchQuery('yes yes',
|
||||||
|
[{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
|
||||||
|
['general'], 'en-US', SAFESEARCH, PAGENO, None, None,
|
||||||
|
preferences=Preferences(['oscar'], ['general'], engines, [],),
|
||||||
|
external_bang="yt")
|
||||||
|
search = searx.search.Search(search_query)
|
||||||
|
results = search.search()
|
||||||
|
# For checking if the user redirected with the youtube external bang
|
||||||
|
self.assertTrue(results.redirect_url is not None)
|
||||||
|
|
||||||
|
search_query = searx.query.SearchQuery('youtube never gonna give you up',
|
||||||
|
[{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
|
||||||
|
['general'], 'en-US', SAFESEARCH, PAGENO, None, None,
|
||||||
|
preferences=Preferences(['oscar'], ['general'], engines, []),)
|
||||||
|
|
||||||
|
search = searx.search.Search(search_query)
|
||||||
|
results = search.search()
|
||||||
|
# This should not redirect
|
||||||
|
self.assertTrue(results.redirect_url is None)
|
||||||
|
|
|
@ -56,7 +56,8 @@ class ViewsTestCase(SearxTestCase):
|
||||||
results=test_results,
|
results=test_results,
|
||||||
results_number=lambda: 3,
|
results_number=lambda: 3,
|
||||||
results_length=lambda: len(test_results),
|
results_length=lambda: len(test_results),
|
||||||
get_timings=lambda: timings)
|
get_timings=lambda: timings,
|
||||||
|
redirect_url=None)
|
||||||
|
|
||||||
self.setattr4test(Search, 'search', search_mock)
|
self.setattr4test(Search, 'search', search_mock)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue