mirror of
https://github.com/searxng/searxng.git
synced 2024-12-20 16:26:33 +00:00
[fix] html tag removal
This commit is contained in:
parent
ba0f818e89
commit
59eeeaab87
1 changed files with 2 additions and 1 deletions
|
@ -2,6 +2,7 @@ from lxml import html
|
||||||
from urllib import urlencode, unquote
|
from urllib import urlencode, unquote
|
||||||
from urlparse import urlparse, urljoin
|
from urlparse import urlparse, urljoin
|
||||||
from lxml.etree import _ElementStringResult
|
from lxml.etree import _ElementStringResult
|
||||||
|
from searx.utils import html_to_text
|
||||||
|
|
||||||
search_url = None
|
search_url = None
|
||||||
url_xpath = None
|
url_xpath = None
|
||||||
|
@ -33,7 +34,7 @@ def extract_text(xpath_results):
|
||||||
return ''.join(xpath_results)
|
return ''.join(xpath_results)
|
||||||
else:
|
else:
|
||||||
# it's a element
|
# it's a element
|
||||||
return xpath_results.text_content()
|
return html_to_text(xpath_results.text_content())
|
||||||
|
|
||||||
|
|
||||||
def extract_url(xpath_results):
|
def extract_url(xpath_results):
|
||||||
|
|
Loading…
Reference in a new issue