diff --git a/searx/engines/stackexchange.py b/searx/engines/stackexchange.py new file mode 100644 index 000000000..34cba687c --- /dev/null +++ b/searx/engines/stackexchange.py @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Stack Exchange API v2.3 + +* https://api.stackexchange.com/ + +""" + +import html +from json import loads +from urllib.parse import urlencode + +about = { + "website": 'https://stackexchange.com', + "wikidata_id": 'Q3495447', + "official_api_documentation": 'https://api.stackexchange.com/docs', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} + +paging = True +pagesize = 10 + +api_site = 'stackoverflow' +api_sort= 'activity' +api_order = 'desc' + +# https://api.stackexchange.com/docs/advanced-search +search_api = 'https://api.stackexchange.com/2.3/search/advanced?' + +def request(query, params): + + args = urlencode({ + 'q' : query, + 'page' : params['pageno'], + 'pagesize' : pagesize, + 'site' : api_site, + 'sort' : api_sort, + 'order': 'desc', + }) + params['url'] = search_api + args + + return params + +def response(resp): + + results = [] + json_data = loads(resp.text) + + for result in json_data['items']: + + content = "[%s]" % ", ".join(result['tags']) + content += " %s" % result['owner']['display_name'] + if result['is_answered']: + content += ' // is answered' + content += " // score: %s" % result['score'] + + results.append({ + 'url': "https://%s.com/q/%s" % (api_site, result['question_id']), + 'title': html.unescape(result['title']), + 'content': html.unescape(content), + }) + + return results diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py deleted file mode 100644 index 8fc2cdb3a..000000000 --- a/searx/engines/stackoverflow.py +++ /dev/null @@ -1,64 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -""" - Stackoverflow (IT) -""" - -from urllib.parse import urlencode, urljoin -from lxml import html -from searx.utils import extract_text -from searx.exceptions import SearxEngineCaptchaException - -# about -about = { - "website": 'https://stackoverflow.com/', - "wikidata_id": 'Q549037', - "official_api_documentation": 'https://api.stackexchange.com/docs', - "use_official_api": False, - "require_api_key": False, - "results": 'HTML', -} - -# engine dependent config -categories = ['it'] -paging = True - -# search-url -url = 'https://stackoverflow.com/' -search_url = url + 'search?{query}&page={pageno}' - -# specific xpath variables -results_xpath = '//div[contains(@class,"question-summary")]' -link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a' -content_xpath = './/div[@class="excerpt"]' - - -# do search-request -def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno']) - - return params - - -# get response from search-request -def response(resp): - if resp.url.path.startswith('/nocaptcha'): - raise SearxEngineCaptchaException() - - results = [] - - dom = html.fromstring(resp.text) - - # parse results - for result in dom.xpath(results_xpath): - link = result.xpath(link_xpath)[0] - href = urljoin(url, link.attrib.get('href')) - title = extract_text(link) - content = extract_text(result.xpath(content_xpath)) - - # append result - results.append({'url': href, - 'title': title, - 'content': content}) - - # return results - return results diff --git a/searx/settings.yml b/searx/settings.yml index 3fefb62fc..207024192 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1125,8 +1125,22 @@ engines: shortcut: sc - name: stackoverflow - engine: stackoverflow + engine: stackexchange shortcut: st + api_site: 'stackoverflow' + categories: it + + - name: askubuntu + engine: stackexchange + shortcut: ubuntu + api_site: 'askubuntu' + categories: it + + - name: superuser + engine: stackexchange + shortcut: su + api_site: 'superuser' + categories: it - name: searchcode code engine: searchcode_code