mirror of
https://github.com/searxng/searxng.git
synced 2025-01-03 15:08:41 +00:00
[fix] googel engine - "some results are invalids: invalid content"
Fix google issues listet in the `/stats?engine=google` and message:: some results are invalids: invalid content The log is:: DEBUG searx : result: invalid content: {'url': 'https://de.wikipedia.org/wiki/Foo', 'title': 'Foo - Wikipedia', 'content': None, 'engine': 'google'} WARNING searx.engines.google : ErrorContext('searx/search/processors/abstract.py', 111, 'result_container.extend(self.engine_name, search_results)', None, 'some results are invalids: invalid content', ()) True Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
f0102a95c9
commit
1a0760c10a
1 changed files with 7 additions and 5 deletions
|
@ -353,20 +353,22 @@ def response(resp):
|
||||||
title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
|
title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
|
||||||
if title_tag is None:
|
if title_tag is None:
|
||||||
# this not one of the common google results *section*
|
# this not one of the common google results *section*
|
||||||
logger.debug('ingoring <div class="g" ../> section: missing title')
|
logger.debug('ingoring item from the result_xpath list: missing title')
|
||||||
continue
|
continue
|
||||||
title = extract_text(title_tag)
|
title = extract_text(title_tag)
|
||||||
url = eval_xpath_getindex(result, href_xpath, 0, None)
|
url = eval_xpath_getindex(result, href_xpath, 0, None)
|
||||||
if url is None:
|
if url is None:
|
||||||
continue
|
continue
|
||||||
content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
|
content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
|
||||||
|
if content is None:
|
||||||
|
logger.debug('ingoring item from the result_xpath list: missing content of title "%s"', title)
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.debug('add link to results: %s', title)
|
||||||
results.append({'url': url, 'title': title, 'content': content})
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
|
|
||||||
except Exception as e: # pylint: disable=broad-except
|
except Exception as e: # pylint: disable=broad-except
|
||||||
logger.error(e, exc_info=True)
|
logger.error(e, exc_info=True)
|
||||||
# from lxml import etree
|
|
||||||
# logger.debug(etree.tostring(result, pretty_print=True))
|
|
||||||
# import pdb
|
|
||||||
# pdb.set_trace()
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# parse suggestion
|
# parse suggestion
|
||||||
|
|
Loading…
Reference in a new issue