add time range search with yahoo

This commit is contained in:
Noemi Vanyi 2016-07-17 18:42:30 +02:00
parent 3a9c3fbd68
commit 93c0c49e9a
4 changed files with 35 additions and 11 deletions

View file

@ -42,7 +42,8 @@ engine_default_args = {'paging': False,
'shortcut': '-', 'shortcut': '-',
'disabled': False, 'disabled': False,
'suspend_end_time': 0, 'suspend_end_time': 0,
'continuous_errors': 0} 'continuous_errors': 0,
'time_range_support': False}
def load_module(filename): def load_module(filename):

View file

@ -20,10 +20,12 @@ from searx.engines.xpath import extract_text, extract_url
categories = ['general'] categories = ['general']
paging = True paging = True
language_support = True language_support = True
time_range_support = True
# search-url # search-url
base_url = 'https://search.yahoo.com/' base_url = 'https://search.yahoo.com/'
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
# specific xpath variables # specific xpath variables
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
@ -32,6 +34,9 @@ title_xpath = './/h3/a'
content_xpath = './/div[@class="compText aAbs"]' content_xpath = './/div[@class="compText aAbs"]'
suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a" suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
time_range_dict = {'day': ['1d', 'd'],
'week': ['1w', 'w'],
'month': ['1m', 'm']}
# remove yahoo-specific tracking-url # remove yahoo-specific tracking-url
def parse_url(url_string): def parse_url(url_string):
@ -51,18 +56,30 @@ def parse_url(url_string):
return unquote(url_string[start:end]) return unquote(url_string[start:end])
def _get_url(query, offset, language, time_range):
if time_range:
return base_url + search_url_with_time.format(offset=offset,
query=urlencode({'p': query}),
lang=language,
age=time_range_dict[time_range][0],
btf=time_range_dict[time_range][1])
return base_url + search_url.format(offset=offset,
query=urlencode({'p': query}),
lang=language)
def _get_language(params):
if params['language'] == 'all':
return 'en'
return params['language'].split('_')[0]
# do search-request # do search-request
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
language = _get_language(params)
if params['language'] == 'all': params['url'] = _get_url(query, offset, language, params['time_range'])
language = 'en'
else:
language = params['language'].split('_')[0]
params['url'] = base_url + search_url.format(offset=offset,
query=urlencode({'p': query}),
lang=language)
# TODO required? # TODO required?
params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\ params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\

View file

@ -138,6 +138,7 @@ class Search(object):
self.paging = False self.paging = False
self.pageno = 1 self.pageno = 1
self.lang = 'all' self.lang = 'all'
self.time_range = None
# set blocked engines # set blocked engines
self.disabled_engines = request.preferences.engines.get_disabled() self.disabled_engines = request.preferences.engines.get_disabled()
@ -178,9 +179,9 @@ class Search(object):
if len(query_obj.languages): if len(query_obj.languages):
self.lang = query_obj.languages[-1] self.lang = query_obj.languages[-1]
self.engines = query_obj.engines self.time_range = self.request_data.get('time_range')
self.categories = [] self.engines = query_obj.engines
# if engines are calculated from query, # if engines are calculated from query,
# set categories by using that informations # set categories by using that informations
@ -279,6 +280,9 @@ class Search(object):
if self.lang != 'all' and not engine.language_support: if self.lang != 'all' and not engine.language_support:
continue continue
if self.time_range and not engine.time_range_support:
continue
# set default request parameters # set default request parameters
request_params = default_request_params() request_params = default_request_params()
request_params['headers']['User-Agent'] = user_agent request_params['headers']['User-Agent'] = user_agent
@ -293,6 +297,7 @@ class Search(object):
# 0 = None, 1 = Moderate, 2 = Strict # 0 = None, 1 = Moderate, 2 = Strict
request_params['safesearch'] = request.preferences.get_value('safesearch') request_params['safesearch'] = request.preferences.get_value('safesearch')
request_params['time_range'] = self.time_range
# update request parameters dependent on # update request parameters dependent on
# search-engine (contained in engines folder) # search-engine (contained in engines folder)

View file

@ -459,6 +459,7 @@ def index():
paging=search.paging, paging=search.paging,
number_of_results=format_decimal(number_of_results), number_of_results=format_decimal(number_of_results),
pageno=search.pageno, pageno=search.pageno,
time_range=search.time_range,
base_url=get_base_url(), base_url=get_base_url(),
suggestions=search.result_container.suggestions, suggestions=search.result_container.suggestions,
answers=search.result_container.answers, answers=search.result_container.answers,