From f7bdd827c4cfd92fe182d5806f1e2c35352feed9 Mon Sep 17 00:00:00 2001 From: Jonas Zohren Date: Wed, 13 Feb 2019 00:37:29 +0100 Subject: [PATCH] [enh] adds apkmirror search engine --- searx/engines/apkmirror.py | 61 ++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 6 ++++ 2 files changed, 67 insertions(+) create mode 100644 searx/engines/apkmirror.py diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py new file mode 100644 index 000000000..f2ee12b29 --- /dev/null +++ b/searx/engines/apkmirror.py @@ -0,0 +1,61 @@ +""" + APK Mirror + + @website https://www.apkmirror.com + + @using-api no + @results HTML + @stable no (HTML can change) + @parse url, title, thumbnail_src +""" + +from lxml import html +from searx.engines.xpath import extract_text +from searx.url_utils import urlencode + +# engine dependent config +categories = ['it'] +paging = True + +# I am not 100% certain about this, as apkmirror appears to be a wordpress site, +# which might support time_range searching. If you want to implement it, go ahead. +time_range_support = False + +# search-url +base_url = 'https://www.apkmirror.com' +search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{query}' + + +# do search-request +def request(query, params): + + params['url'] = search_url.format(pageno=params['pageno'], + query=urlencode({'s': query})) + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + # parse results + for result in dom.xpath('.//div[@id="content"]/div[@class="listWidget"]/div[@class="appRow"]'): + + link = result.xpath('.//h5/a')[0] + url = base_url + link.attrib.get('href') + '#downloads' + title = extract_text(link) + thumbnail_src = base_url + result.xpath('.//img')[0].attrib.get('src').replace('&w=32&h=32', '&w=64&h=64') + + res = { + 'url': url, + 'title': title, + 'thumbnail_src': thumbnail_src + } + + # append result + results.append(res) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml index ff7782b65..3851f49d5 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -49,6 +49,12 @@ outgoing: # communication with search engines # - 1.1.1.2 engines: + - name: apk mirror + engine: apkmirror + timeout: 4.0 + shortcut: apkm + disabled: True + - name : arch linux wiki engine : archlinux shortcut : al