From e467a24843b62ff444b20e81a7b22b0cc5783a9e Mon Sep 17 00:00:00 2001 From: asciimoo Date: Thu, 17 Oct 2013 00:27:25 +0200 Subject: [PATCH] [enh] stackoverflow added --- searx/engines/stackoverflow.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 searx/engines/stackoverflow.py diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py new file mode 100644 index 000000000..fb0d65cdf --- /dev/null +++ b/searx/engines/stackoverflow.py @@ -0,0 +1,25 @@ +from urllib import quote +from lxml import html +from urlparse import urljoin + +base_url = 'http://stackoverflow.com/' +search_url = base_url+'search?q=' + +def request(query, params): + global search_url + query = quote(query.replace(' ', '+'), safe='+') + params['url'] = search_url + query + return params + + +def response(resp): + global base_url + results = [] + dom = html.fromstring(resp.text) + for result in dom.xpath('//div[@class="question-summary search-result"]'): + link = result.xpath('.//div[@class="result-link"]//a')[0] + url = urljoin(base_url, link.attrib.get('href')) + title = ' '.join(link.xpath('.//text()')) + content = ' '.join(result.xpath('.//div[@class="excerpt"]//text()')) + results.append({'url': url, 'title': title, 'content': content}) + return results