mirror of
https://github.com/searxng/searxng.git
synced 2024-11-26 21:01:01 +00:00
[mod] stackoverflow & yandex: detect CAPTCHA response
This commit is contained in:
parent
7905d41487
commit
fa909c7c02
2 changed files with 12 additions and 2 deletions
|
@ -10,9 +10,10 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode, urljoin
|
||||
from urllib.parse import urlencode, urljoin, urlparse
|
||||
from lxml import html
|
||||
from searx.utils import extract_text
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
@ -37,6 +38,10 @@ def request(query, params):
|
|||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
resp_url = urlparse(resp.url)
|
||||
if resp_url.path.startswith('/nocaptcha'):
|
||||
raise SearxEngineCaptchaException()
|
||||
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
|
|
@ -9,9 +9,10 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from urllib.parse import urlencode, urlparse
|
||||
from lxml import html
|
||||
from searx import logger
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
|
||||
logger = logger.getChild('yandex engine')
|
||||
|
||||
|
@ -47,6 +48,10 @@ def request(query, params):
|
|||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
resp_url = urlparse(resp.url)
|
||||
if resp_url.path.startswith('/showcaptcha'):
|
||||
raise SearxEngineCaptchaException()
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
results = []
|
||||
|
||||
|
|
Loading…
Reference in a new issue