[mod] results.py: code clean up (#2140)

This commit is contained in:
Alexandre Flament 2020-09-08 10:09:11 +02:00 committed by GitHub
parent f0ca1c3483
commit d0f9778c2a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -3,6 +3,7 @@ import sys
from collections import defaultdict from collections import defaultdict
from operator import itemgetter from operator import itemgetter
from threading import RLock from threading import RLock
from searx import logger
from searx.engines import engines from searx.engines import engines
from searx.url_utils import urlparse, unquote from searx.url_utils import urlparse, unquote
@ -127,9 +128,11 @@ def result_score(result):
class ResultContainer(object): class ResultContainer(object):
"""docstring for ResultContainer""" """docstring for ResultContainer"""
__slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\
'_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url'
def __init__(self): def __init__(self):
super(ResultContainer, self).__init__() super(ResultContainer, self).__init__()
self.results = defaultdict(list)
self._merged_results = [] self._merged_results = []
self.infoboxes = [] self.infoboxes = []
self.suggestions = set() self.suggestions = set()
@ -143,51 +146,40 @@ class ResultContainer(object):
self.redirect_url = None self.redirect_url = None
def extend(self, engine_name, results): def extend(self, engine_name, results):
standard_result_count = 0
for result in list(results): for result in list(results):
result['engine'] = engine_name result['engine'] = engine_name
if 'suggestion' in result: if 'suggestion' in result:
self.suggestions.add(result['suggestion']) self.suggestions.add(result['suggestion'])
results.remove(result)
elif 'answer' in result: elif 'answer' in result:
self.answers[result['answer']] = result self.answers[result['answer']] = result
results.remove(result)
elif 'correction' in result: elif 'correction' in result:
self.corrections.add(result['correction']) self.corrections.add(result['correction'])
results.remove(result)
elif 'infobox' in result: elif 'infobox' in result:
self._merge_infobox(result) self._merge_infobox(result)
results.remove(result)
elif 'number_of_results' in result: elif 'number_of_results' in result:
self._number_of_results.append(result['number_of_results']) self._number_of_results.append(result['number_of_results'])
results.remove(result) else:
# standard result (url, title, content)
if 'url' in result and not isinstance(result['url'], basestring):
logger.debug('result: invalid URL: %s', str(result))
elif 'title' in result and not isinstance(result['title'], basestring):
logger.debug('result: invalid title: %s', str(result))
elif 'content' in result and not isinstance(result['content'], basestring):
logger.debug('result: invalid content: %s', str(result))
else:
self._merge_result(result, standard_result_count + 1)
standard_result_count += 1
if engine_name in engines: if engine_name in engines:
with RLock(): with RLock():
engines[engine_name].stats['search_count'] += 1 engines[engine_name].stats['search_count'] += 1
engines[engine_name].stats['result_count'] += len(results) engines[engine_name].stats['result_count'] += standard_result_count
if not results: if not self.paging and standard_result_count > 0 and engine_name in engines\
return and engines[engine_name].paging:
self.results[engine_name].extend(results)
if not self.paging and engine_name in engines and engines[engine_name].paging:
self.paging = True self.paging = True
for i, result in enumerate(results):
if 'url' in result and not isinstance(result['url'], basestring):
continue
try:
result['url'] = result['url'].decode('utf-8')
except:
pass
if 'title' in result and not isinstance(result['title'], basestring):
continue
if 'content' in result and not isinstance(result['content'], basestring):
continue
position = i + 1
self._merge_result(result, position)
def _merge_infobox(self, infobox): def _merge_infobox(self, infobox):
add_infobox = True add_infobox = True
infobox_id = infobox.get('id', None) infobox_id = infobox.get('id', None)