[mod] stackoverflow & yandex: detect CAPTCHA response

This commit is contained in:
Alexandre Flament 2020-12-03 13:23:19 +01:00
parent 7905d41487
commit fa909c7c02
2 changed files with 12 additions and 2 deletions

View file

@ -10,9 +10,10 @@
@parse url, title, content
"""
from urllib.parse import urlencode, urljoin
from urllib.parse import urlencode, urljoin, urlparse
from lxml import html
from searx.utils import extract_text
from searx.exceptions import SearxEngineCaptchaException
# engine dependent config
categories = ['it']
@ -37,6 +38,10 @@ def request(query, params):
# get response from search-request
def response(resp):
resp_url = urlparse(resp.url)
if resp_url.path.startswith('/nocaptcha'):
raise SearxEngineCaptchaException()
results = []
dom = html.fromstring(resp.text)

View file

@ -9,9 +9,10 @@
@parse url, title, content
"""
from urllib.parse import urlencode
from urllib.parse import urlencode, urlparse
from lxml import html
from searx import logger
from searx.exceptions import SearxEngineCaptchaException
logger = logger.getChild('yandex engine')
@ -47,6 +48,10 @@ def request(query, params):
# get response from search-request
def response(resp):
resp_url = urlparse(resp.url)
if resp_url.path.startswith('/showcaptcha'):
raise SearxEngineCaptchaException()
dom = html.fromstring(resp.text)
results = []