mirror of
https://github.com/searxng/searxng.git
synced 2024-12-12 20:36:27 +00:00
[enh] use longest title and test get_ordered_results()
This commit is contained in:
parent
94aafc83a6
commit
6948689d2a
2 changed files with 55 additions and 17 deletions
|
@ -12,7 +12,6 @@ from searx import logger
|
|||
from searx.engines import engines
|
||||
from searx.metrics import histogram_observe, counter_add, count_error
|
||||
|
||||
|
||||
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
|
||||
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
||||
|
||||
|
@ -133,7 +132,7 @@ def result_score(result, priority):
|
|||
weight = 1.0
|
||||
|
||||
for result_engine in result['engines']:
|
||||
if hasattr(engines[result_engine], 'weight'):
|
||||
if hasattr(engines.get(result_engine), 'weight'):
|
||||
weight *= float(engines[result_engine].weight)
|
||||
|
||||
weight *= len(result['positions'])
|
||||
|
@ -332,10 +331,14 @@ class ResultContainer:
|
|||
return None
|
||||
|
||||
def __merge_duplicated_http_result(self, duplicated, result, position):
|
||||
# using content with more text
|
||||
# use content with more text
|
||||
if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
|
||||
duplicated['content'] = result['content']
|
||||
|
||||
# use title with more text
|
||||
if result_content_len(result.get('title', '')) > len(duplicated.get('title', '')):
|
||||
duplicated['title'] = result['title']
|
||||
|
||||
# merge all result's parameters not found in duplicate
|
||||
for key in result.keys():
|
||||
if not duplicated.get(key):
|
||||
|
@ -347,7 +350,7 @@ class ResultContainer:
|
|||
# add engine to list of result-engines
|
||||
duplicated['engines'].add(result['engine'])
|
||||
|
||||
# using https if possible
|
||||
# use https if possible
|
||||
if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
|
||||
duplicated['url'] = result['parsed_url'].geturl()
|
||||
duplicated['parsed_url'] = result['parsed_url']
|
||||
|
|
|
@ -2,9 +2,26 @@
|
|||
# pylint: disable=missing-module-docstring
|
||||
|
||||
from searx.results import ResultContainer
|
||||
from searx.engines import load_engines
|
||||
from tests import SearxTestCase
|
||||
|
||||
|
||||
def make_test_engine_dict(**kwargs) -> dict:
|
||||
test_engine = {
|
||||
# fmt: off
|
||||
'name': None,
|
||||
'engine': None,
|
||||
'categories': 'general',
|
||||
'shortcut': 'dummy',
|
||||
'timeout': 3.0,
|
||||
'tokens': [],
|
||||
# fmt: on
|
||||
}
|
||||
|
||||
test_engine.update(**kwargs)
|
||||
return test_engine
|
||||
|
||||
|
||||
def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs):
|
||||
result = {
|
||||
# fmt: off
|
||||
|
@ -19,23 +36,41 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', eng
|
|||
|
||||
|
||||
class ResultContainerTestCase(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||
def setUp(self) -> None:
|
||||
stract_engine = make_test_engine_dict(name="stract", engine="stract", shortcut="stra")
|
||||
duckduckgo_engine = make_test_engine_dict(name="duckduckgo", engine="duckduckgo", shortcut="ddg")
|
||||
mojeek_engine = make_test_engine_dict(name="mojeek", engine="mojeek", shortcut="mjk")
|
||||
|
||||
load_engines([stract_engine, duckduckgo_engine, mojeek_engine])
|
||||
|
||||
self.container = ResultContainer()
|
||||
|
||||
def tearDown(self):
|
||||
load_engines([])
|
||||
|
||||
def test_empty(self):
|
||||
c = ResultContainer()
|
||||
self.assertEqual(c.get_ordered_results(), [])
|
||||
self.assertEqual(self.container.get_ordered_results(), [])
|
||||
|
||||
def test_one_result(self):
|
||||
c = ResultContainer()
|
||||
c.extend('wikipedia', [fake_result()])
|
||||
self.assertEqual(c.results_length(), 1)
|
||||
self.container.extend('wikipedia', [fake_result()])
|
||||
|
||||
self.assertEqual(self.container.results_length(), 1)
|
||||
|
||||
def test_one_suggestion(self):
|
||||
c = ResultContainer()
|
||||
c.extend('wikipedia', [fake_result(suggestion=True)])
|
||||
self.assertEqual(len(c.suggestions), 1)
|
||||
self.assertEqual(c.results_length(), 0)
|
||||
self.container.extend('wikipedia', [fake_result(suggestion=True)])
|
||||
|
||||
self.assertEqual(len(self.container.suggestions), 1)
|
||||
self.assertEqual(self.container.results_length(), 0)
|
||||
|
||||
def test_result_merge(self):
|
||||
c = ResultContainer()
|
||||
c.extend('wikipedia', [fake_result()])
|
||||
c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
|
||||
self.assertEqual(c.results_length(), 2)
|
||||
self.container.extend('wikipedia', [fake_result()])
|
||||
self.container.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')])
|
||||
|
||||
self.assertEqual(self.container.results_length(), 2)
|
||||
|
||||
def test_result_merge_by_title(self):
|
||||
self.container.extend('stract', [fake_result(engine='stract', title='short title')])
|
||||
self.container.extend('duckduckgo', [fake_result(engine='duckduckgo', title='normal title')])
|
||||
self.container.extend('mojeek', [fake_result(engine='mojeek', title='this long long title')])
|
||||
|
||||
self.assertEqual(self.container.get_ordered_results()[0].get('title', ''), 'this long long title')
|
||||
|
|
Loading…
Reference in a new issue