From 55fee1e45d55063b107924e5a866783428383780 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 28 Sep 2021 19:01:04 +0200 Subject: [PATCH 1/5] [mod] engines - add Stack Exchange API v2.3 Signed-off-by: Markus Heiser --- searx/engines/stackexchange.py | 64 ++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 searx/engines/stackexchange.py diff --git a/searx/engines/stackexchange.py b/searx/engines/stackexchange.py new file mode 100644 index 000000000..e4092c26f --- /dev/null +++ b/searx/engines/stackexchange.py @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Stack Exchange API v2.3 + +* https://api.stackexchange.com/ + +""" + +from json import loads +from urllib.parse import urlencode + +about = { + "website": 'https://stackexchange.com', + "wikidata_id": 'Q3495447', + "official_api_documentation": 'https://api.stackexchange.com/docs', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + +paging = True +pagesize = 10 + +api_site = 'stackoverflow' +api_sort= 'activity' +api_order = 'desc' + +# https://api.stackexchange.com/docs/advanced-search +search_api = 'https://api.stackexchange.com/2.3/search/advanced?' + +def request(query, params): + + args = urlencode({ + 'q' : query, + 'page' : params['pageno'], + 'pagesize' : pagesize, + 'site' : api_site, + 'sort' : api_sort, + 'order': 'desc', + }) + params['url'] = search_api + args + + return params + +def response(resp): + + results = [] + json_data = loads(resp.text) + + for result in json_data['items']: + + content = "[%s]" % ", ".join(result['tags']) + content += " %s" % result['owner']['display_name'] + if result['is_answered']: + content += ' // is answered' + content += " // score: %s" % result['score'] + + results.append({ + 'url': "https://%s.com/q/%s" % (api_site, result['question_id']), + 'title': result['title'], + 'content': content, + }) + + return results From b62851559b235b3fa7e833749f8d10597b7de6f2 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 28 Sep 2021 19:02:57 +0200 Subject: [PATCH 2/5] [mod] replace old stackoverflow engine by Stack Exchange API v2.3 Signed-off-by: Markus Heiser --- searx/engines/stackoverflow.py | 64 ---------------------------------- searx/settings.yml | 4 ++- 2 files changed, 3 insertions(+), 65 deletions(-) delete mode 100644 searx/engines/stackoverflow.py diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py deleted file mode 100644 index 8fc2cdb3a..000000000 --- a/searx/engines/stackoverflow.py +++ /dev/null @@ -1,64 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -""" - Stackoverflow (IT) -""" - -from urllib.parse import urlencode, urljoin -from lxml import html -from searx.utils import extract_text -from searx.exceptions import SearxEngineCaptchaException - -# about -about = { - "website": 'https://stackoverflow.com/', - "wikidata_id": 'Q549037', - "official_api_documentation": 'https://api.stackexchange.com/docs', - "use_official_api": False, - "require_api_key": False, - "results": 'HTML', -} - -# engine dependent config -categories = ['it'] -paging = True - -# search-url -url = 'https://stackoverflow.com/' -search_url = url + 'search?{query}&page={pageno}' - -# specific xpath variables -results_xpath = '//div[contains(@class,"question-summary")]' -link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a' -content_xpath = './/div[@class="excerpt"]' - - -# do search-request -def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno']) - - return params - - -# get response from search-request -def response(resp): - if resp.url.path.startswith('/nocaptcha'): - raise SearxEngineCaptchaException() - - results = [] - - dom = html.fromstring(resp.text) - - # parse results - for result in dom.xpath(results_xpath): - link = result.xpath(link_xpath)[0] - href = urljoin(url, link.attrib.get('href')) - title = extract_text(link) - content = extract_text(result.xpath(content_xpath)) - - # append result - results.append({'url': href, - 'title': title, - 'content': content}) - - # return results - return results diff --git a/searx/settings.yml b/searx/settings.yml index 3fefb62fc..a56028774 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1125,8 +1125,10 @@ engines: shortcut: sc - name: stackoverflow - engine: stackoverflow + engine: stackexchange shortcut: st + api_site: 'stackoverflow' + categories: it - name: searchcode code engine: searchcode_code From 29eb06ab166c3d83512e12ec805e23ca5500b8e2 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 28 Sep 2021 19:17:26 +0200 Subject: [PATCH 3/5] [mod] engines - add askubuntu.com (Stack Exchange API) Signed-off-by: Markus Heiser --- searx/settings.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/searx/settings.yml b/searx/settings.yml index a56028774..d6e09066d 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1130,6 +1130,12 @@ engines: api_site: 'stackoverflow' categories: it + - name: askubuntu + engine: stackexchange + shortcut: ubuntu + api_site: 'askubuntu' + categories: it + - name: searchcode code engine: searchcode_code shortcut: scc From 5efe77bdf5dac8e8b14cbd6be39325eb519af79a Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 28 Sep 2021 19:20:13 +0200 Subject: [PATCH 4/5] [mod] engines - add superuser.com (Stack Exchange API) Signed-off-by: Markus Heiser --- searx/settings.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/searx/settings.yml b/searx/settings.yml index d6e09066d..207024192 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1136,6 +1136,12 @@ engines: api_site: 'askubuntu' categories: it + - name: superuser + engine: stackexchange + shortcut: su + api_site: 'superuser' + categories: it + - name: searchcode code engine: searchcode_code shortcut: scc From ecb3912bd000bddd10841775ecada538386818c5 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 29 Sep 2021 08:08:18 +0200 Subject: [PATCH 5/5] [fix] engine stackexchange - decode HTML entities in title & content Signed-off-by: Markus Heiser --- searx/engines/stackexchange.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/searx/engines/stackexchange.py b/searx/engines/stackexchange.py index e4092c26f..34cba687c 100644 --- a/searx/engines/stackexchange.py +++ b/searx/engines/stackexchange.py @@ -6,6 +6,7 @@ """ +import html from json import loads from urllib.parse import urlencode @@ -57,8 +58,8 @@ def response(resp): results.append({ 'url': "https://%s.com/q/%s" % (api_site, result['question_id']), - 'title': result['title'], - 'content': content, + 'title': html.unescape(result['title']), + 'content': html.unescape(content), }) return results