rewrite duckduckgo engine and add comments

2024-11-23 11:21:00 +00:00 · 2014-09-02 17:14:57 +02:00 · 2014-09-02 17:14:57 +02:00 · e6e4de8ba0
commit e6e4de8ba0
parent 3d61d9b930
2 changed files with 37 additions and 36 deletions
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@ -1,24 +1,48 @@
 ## DuckDuckGo (Web)
 # 
 # @website     https://duckduckgo.com/
 # @provide-api yes (https://duckduckgo.com/api), but not all results from search-site
 # 
 # @using-api   no
 # @results     HTML (using search portal)
 # @stable      no (HTML can change)
 # @parse       url, title, content
 #
 # @todo        rewrite to api
 # @todo        language support
 from urllib import urlencode
 from lxml.html import fromstring
 from searx.utils import html_to_text
-url = 'https://duckduckgo.com/html?{query}&s={offset}'
+# engine dependent config
 categories = ['general']
 paging = True
 locale = 'us-en'
 # search-url
 url = 'https://duckduckgo.com/html?{query}&s={offset}'
-def request(query, params):
+# specific xpath variables
    offset = (params['pageno'] - 1) * 30
    q = urlencode({'q': query,
                   'l': locale})
    params['url'] = url.format(query=q, offset=offset)
    return params
 def response(resp):
 result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
 url_xpath = './/a[@class="large"]/@href'
 title_xpath = './/a[@class="large"]//text()'
 content_xpath = './/div[@class="snippet"]//text()'
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 30
    params['url'] = url.format(
        query=urlencode({'q': query, 'l': locale}),
        offset=offset)
    return params
 # get response from search-request
 def response(resp):
    results = []
    doc = fromstring(resp.text)
@ -28,38 +52,17 @@ def response(resp):
            res_url = r.xpath(url_xpath)[-1]
        except:
            continue
        if not res_url:
            continue
        title = html_to_text(''.join(r.xpath(title_xpath)))
        content = html_to_text(''.join(r.xpath(content_xpath)))
        # append result
        results.append({'title': title,
                        'content': content,
                        'url': res_url})
    return results
 #from json import loads
 #search_url = url + 'd.js?{query}&p=1&s={offset}'
 #
 #paging = True
 #
 #
 #def request(query, params):
 #    offset = (params['pageno'] - 1) * 30
 #    q = urlencode({'q': query,
 #                   'l': locale})
 #    params['url'] = search_url.format(query=q, offset=offset)
 #    return params
 #
 #
 #def response(resp):
 #    results = []
 #    search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1]
 #    for r in search_res:
 #        if not r.get('t'):
 #            continue
 #        results.append({'title': r['t'],
 #                       'content': html_to_text(r['a']),
 #                       'url': r['u']})
    # return results
    return results
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -37,7 +37,6 @@ engines:
  - name : deviantart
    engine : deviantart
    categories : images
    shortcut : da
    timeout: 3.0
@ -47,7 +46,6 @@ engines:
  - name : duckduckgo
    engine : duckduckgo
    locale : en-us
    shortcut : ddg
 # down - website is under criminal investigation by the UK