[enh] search language support init

This commit is contained in:
asciimoo 2014-01-31 04:35:23 +01:00
parent 90a9342286
commit 2a788c8f29
9 changed files with 162 additions and 13 deletions

View file

@ -53,8 +53,14 @@ if not 'engines' in settings or not settings['engines']:
for engine_data in settings['engines']: for engine_data in settings['engines']:
engine_name = engine_data['engine'] engine_name = engine_data['engine']
engine = load_module(engine_name + '.py') engine = load_module(engine_name + '.py')
if not hasattr(engine, 'paging'): if not hasattr(engine, 'paging'):
engine.paging = False engine.paging = False
if not hasattr(engine, 'language_support'):
#engine.language_support = False
engine.language_support = True
for param_name in engine_data: for param_name in engine_data:
if param_name == 'engine': if param_name == 'engine':
continue continue
@ -158,7 +164,7 @@ def score_results(results):
return sorted(results, key=itemgetter('score'), reverse=True) return sorted(results, key=itemgetter('score'), reverse=True)
def search(query, request, selected_engines, pageno=1): def search(query, request, selected_engines, pageno=1, lang='all'):
global engines, categories, number_of_searches global engines, categories, number_of_searches
requests = [] requests = []
results = {} results = {}
@ -176,11 +182,15 @@ def search(query, request, selected_engines, pageno=1):
if pageno > 1 and not engine.paging: if pageno > 1 and not engine.paging:
continue continue
if lang != 'all' and not engine.language_support:
continue
request_params = default_request_params() request_params = default_request_params()
request_params['headers']['User-Agent'] = user_agent request_params['headers']['User-Agent'] = user_agent
request_params['category'] = selected_engine['category'] request_params['category'] = selected_engine['category']
request_params['started'] = datetime.now() request_params['started'] = datetime.now()
request_params['pageno'] = pageno request_params['pageno'] = pageno
request_params['language'] = lang
request_params = engine.request(query, request_params) request_params = engine.request(query, request_params)
callback = make_callback( callback = make_callback(

View file

@ -4,16 +4,22 @@ from cgi import escape
base_url = 'http://www.bing.com/' base_url = 'http://www.bing.com/'
search_string = 'search?{query}&first={offset}' search_string = 'search?{query}&first={offset}'
locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
paging = True paging = True
language_support = True
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
if params['language'] == 'all':
language = 'en-US'
else:
language = params['language'].replace('_', '-')
search_path = search_string.format( search_path = search_string.format(
query=urlencode({'q': query, 'setmkt': locale}), query=urlencode({'q': query, 'setmkt': language}),
offset=offset) offset=offset)
params['cookies']['SRCHHPGUSR'] = \
'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0]
#if params['category'] == 'images': #if params['category'] == 'images':
# params['url'] = base_url + 'images/' + search_path # params['url'] = base_url + 'images/' + search_path
params['url'] = base_url + search_path params['url'] = base_url + search_path

View file

@ -5,16 +5,21 @@ from json import loads
categories = ['general'] categories = ['general']
paging = True
url = 'https://ajax.googleapis.com/' url = 'https://ajax.googleapis.com/'
search_url = url + 'ajax/services/search/web?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}' # noqa search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa
paging = True
language_support = True
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 8 offset = (params['pageno'] - 1) * 8
language = 'en-US'
if params['language'] != 'all':
language = params['language'].replace('_', '-')
params['url'] = search_url.format(offset=offset, params['url'] = search_url.format(offset=offset,
query=urlencode({'q': query})) query=urlencode({'q': query}),
language=language)
return params return params

View file

@ -0,0 +1,30 @@
from json import loads
from urllib import urlencode, quote
url = 'https://{language}.wikipedia.org/'
search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json&sroffset={offset}' # noqa
number_of_results = 10
language_support = True
def request(query, params):
offset = (params['pageno'] - 1) * 10
if params['language'] == 'all':
language = 'en'
else:
language = params['language'].split('_')[0]
params['language'] = language
params['url'] = search_url.format(query=urlencode({'srsearch': query}),
offset=offset,
language=language)
return params
def response(resp):
search_results = loads(resp.text)
res = search_results.get('query', {}).get('search', [])
return [{'url': url.format(language=resp.search_params['language']) + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), # noqa
'title': result['title']} for result in res[:int(number_of_results)]]

59
searx/languages.py Normal file
View file

@ -0,0 +1,59 @@
language_codes = (
("ar_XA", "Arabic", "Arabia"),
("bg_BG", "Bulgarian", "Bulgaria"),
("cs_CZ", "Czech", "Czech Republic"),
("de_DE", "German", "Germany"),
("da_DK", "Danish", "Denmark"),
("de_AT", "German", "Austria"),
("de_CH", "German", "Switzerland"),
("el_GR", "Greek", "Greece"),
("en_AU", "English", "Australia"),
("en_CA", "English", "Canada"),
("en_GB", "English", "United Kingdom"),
("en_ID", "English", "Indonesia"),
("en_IE", "English", "Ireland"),
("en_IN", "English", "India"),
("en_MY", "English", "Malaysia"),
("en_NZ", "English", "New Zealand"),
("en_PH", "English", "Philippines"),
("en_SG", "English", "Singapore"),
("en_US", "English", "United States"),
("en_XA", "English", "Arabia"),
("en_ZA", "English", "South Africa"),
("es_AR", "Spanish", "Argentina"),
("es_CL", "Spanish", "Chile"),
("es_ES", "Spanish", "Spain"),
("es_MX", "Spanish", "Mexico"),
("es_US", "Spanish", "United States"),
("es_XL", "Spanish", "Latin America"),
("et_EE", "Estonian", "Estonia"),
("fi_FI", "Finnish", "Finland"),
("fr_BE", "French", "Belgium"),
("fr_CA", "French", "Canada"),
("fr_CH", "French", "Switzerland"),
("fr_FR", "French", "France"),
("he_IL", "Hebrew", "Israel"),
("hr_HR", "Croatian", "Croatia"),
("hu_HU", "Hungarian", "Hungary"),
("it_IT", "Italian", "Italy"),
("ja_JP", "Japanese", "Japan"),
("ko_KR", "Korean", "Korea"),
("lt_LT", "Lithuanian", "Lithuania"),
("lv_LV", "Latvian", "Latvia"),
("nb_NO", "Norwegian", "Norway"),
("nl_BE", "Dutch", "Belgium"),
("nl_NL", "Dutch", "Netherlands"),
("pl_PL", "Polish", "Poland"),
("pt_BR", "Portuguese", "Brazil"),
("pt_PT", "Portuguese", "Portugal"),
("ro_RO", "Romanian", "Romania"),
("ru_RU", "Russian", "Russia"),
("sk_SK", "Slovak", "Slovak Republic"),
("sl_SL", "Slovenian", "Slovenia"),
("sv_SE", "Swedish", "Sweden"),
("th_TH", "Thai", "Thailand"),
("tr_TR", "Turkish", "Turkey"),
("uk_UA", "Ukrainian", "Ukraine"),
("zh_CN", "Chinese", "China"),
("zh_HK", "Chinese", "Hong Kong SAR"),
("zh_TW", "Chinese", "Taiwan"))

View file

@ -7,8 +7,7 @@ server:
engines: engines:
- name : wikipedia - name : wikipedia
engine : mediawiki engine : wikipedia
url : https://en.wikipedia.org/
number_of_results : 1 number_of_results : 1
paging : False paging : False

View file

@ -152,7 +152,7 @@ tr:hover td { background: #DDDDDD; }
#results { margin: 10px; padding: 0; margin-bottom: 20px; } #results { margin: 10px; padding: 0; margin-bottom: 20px; }
#sidebar { position: absolute; left: 54em; width: 12em; margin: 0 2px 5px 5px; padding: 0 2px 2px 2px; } #sidebar { position: absolute; left: 54em; width: 12em; margin: 0 2px 5px 5px; padding: 0 2px 2px 2px; }
#suggestions span { display: block; margin: 0 2px 10px 2px; padding: 0; } #suggestions span { display: block; margin: 0 2px 2px 2px; padding: 0; }
#suggestions form { display: block; } #suggestions form { display: block; }
#suggestions input { padding: 2px 6px; margin: 2px 4px; font-size: 0.8em; display: inline-block; background: #3498DB; color: #FFFFFF; border-radius: 4px; border: 0; cursor: pointer; } #suggestions input { padding: 2px 6px; margin: 2px 4px; font-size: 0.8em; display: inline-block; background: #3498DB; color: #FFFFFF; border-radius: 4px; border: 0; cursor: pointer; }
@ -177,6 +177,7 @@ tr:hover td { background: #DDDDDD; }
} }
#apis { #apis {
margin-top: 8px;
clear: both; clear: both;
} }

View file

@ -11,6 +11,17 @@
{% include 'categories.html' %} {% include 'categories.html' %}
</p> </p>
</fieldset> </fieldset>
<fieldset>
<legend>{{ _('Search language') }}</legend>
<p>
<select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
{% for lang_id,lang_name,country_name in language_codes %}
<option value={{ lang_id }} {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name}} ({{ country_name }})</option>
{% endfor %}
</select>
</p>
</fieldset>
<fieldset> <fieldset>
<legend>{{ _('Interface language') }}</legend> <legend>{{ _('Interface language') }}</legend>
<p> <p>

View file

@ -29,6 +29,7 @@ from searx import settings, searx_dir
from searx.engines import search, categories, engines, get_engines_stats from searx.engines import search, categories, engines, get_engines_stats
from searx.utils import UnicodeWriter from searx.utils import UnicodeWriter
from searx.utils import highlight_content, html_to_text from searx.utils import highlight_content, html_to_text
from searx.languages import language_codes
from flask.ext.babel import Babel from flask.ext.babel import Babel
@ -117,6 +118,11 @@ def parse_query(query):
@app.route('/', methods=['GET', 'POST']) @app.route('/', methods=['GET', 'POST'])
def index(): def index():
paging = False paging = False
lang = 'all'
if request.cookies.get('language')\
and request.cookies['language'] in (x[0] for x in language_codes):
lang = request.cookies['language']
if request.method == 'POST': if request.method == 'POST':
request_data = request.form request_data = request.form
@ -159,7 +165,11 @@ def index():
'name': x.name} 'name': x.name}
for x in categories[categ]) for x in categories[categ])
results, suggestions = search(query, request, selected_engines, pageno) results, suggestions = search(query,
request,
selected_engines,
pageno,
lang)
for result in results: for result in results:
if not paging and engines[result['engine']].paging: if not paging and engines[result['engine']].paging:
@ -232,6 +242,11 @@ def list_engines():
@app.route('/preferences', methods=['GET', 'POST']) @app.route('/preferences', methods=['GET', 'POST'])
def preferences(): def preferences():
lang = None
if request.cookies.get('language')\
and request.cookies['language'] in (x[0] for x in language_codes):
lang = request.cookies['language']
if request.method == 'POST': if request.method == 'POST':
selected_categories = [] selected_categories = []
@ -244,6 +259,10 @@ def preferences():
selected_categories.append(category) selected_categories.append(category)
elif pd_name == 'locale' and pd in settings['locales']: elif pd_name == 'locale' and pd in settings['locales']:
locale = pd locale = pd
elif pd_name == 'language' and (pd == 'all' or
pd in (x[0] for
x in language_codes)):
lang = pd
resp = make_response(redirect('/')) resp = make_response(redirect('/'))
@ -254,6 +273,13 @@ def preferences():
max_age=60 * 60 * 24 * 7 * 4 max_age=60 * 60 * 24 * 7 * 4
) )
if lang:
# cookie max age: 4 weeks
resp.set_cookie(
'language', lang,
max_age=60 * 60 * 24 * 7 * 4
)
if selected_categories: if selected_categories:
# cookie max age: 4 weeks # cookie max age: 4 weeks
resp.set_cookie( resp.set_cookie(
@ -263,7 +289,9 @@ def preferences():
return resp return resp
return render('preferences.html', return render('preferences.html',
locales=settings['locales'], locales=settings['locales'],
current_locale=get_locale()) current_locale=get_locale(),
current_language=lang or 'all',
language_codes=language_codes)
@app.route('/stats', methods=['GET']) @app.route('/stats', methods=['GET'])