[fix] engine - kickass update url, fix parsing, use multiple mirrors

This commit is contained in:
jazzzooo 2023-09-24 09:20:12 -07:00 committed by Markus Heiser
parent 0c39064d60
commit 1a66d74673
2 changed files with 38 additions and 67 deletions

View file

@ -1,16 +1,23 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
""" # lint: pylint
Kickass Torrent (Videos, Music, Files) """Kickass Torrent (Videos, Music, Files)"""
"""
import random
from operator import itemgetter
from urllib.parse import quote
from lxml import html from lxml import html
from operator import itemgetter from searx.utils import (
from urllib.parse import quote, urljoin eval_xpath,
from searx.utils import extract_text, get_torrent_size, convert_str_to_int eval_xpath_getindex,
eval_xpath_list,
extract_text,
get_torrent_size,
int_or_zero,
)
# about
about = { about = {
"website": 'https://kickass.so', "website": 'https://kickasstorrents.to',
"wikidata_id": 'Q17062285', "wikidata_id": 'Q17062285',
"official_api_documentation": None, "official_api_documentation": None,
"use_official_api": False, "use_official_api": False,
@ -18,80 +25,39 @@ about = {
"results": 'HTML', "results": 'HTML',
} }
# engine dependent config
categories = ['files'] categories = ['files']
paging = True paging = True
# search-url # base_url can be overwritten by a list of URLs in the settings.yml
url = 'https://kickass.cd/' base_url = 'https://kickasstorrents.to'
search_url = url + 'search/{search_term}/{pageno}/'
# specific xpath variables
magnet_xpath = './/a[@title="Torrent magnet link"]'
torrent_xpath = './/a[@title="Download torrent file"]'
content_xpath = './/span[@class="font11px lightgrey block"]'
# do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno']) params['base_url'] = random.choice(base_url) if isinstance(base_url, list) else base_url
params['url'] = params['base_url'] + f'/usearch/{quote(query)}/{params["pageno"]}/'
return params return params
# get response from search-request
def response(resp): def response(resp):
results = [] results = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
search_res = dom.xpath('//table[@class="data"]//tr') search_res = eval_xpath_list(dom, '//table[contains(@class, "data")]//tr', None)
if search_res is None:
# return empty array if nothing is found
if not search_res:
return [] return []
# parse results for tag in search_res[1:]:
for result in search_res[1:]: result = {'template': 'torrent.html'}
link = result.xpath('.//a[@class="cellMainLink"]')[0] url = eval_xpath_getindex(tag, './/a[contains(@class, "cellMainLink")]/@href', 0, None)
href = urljoin(url, link.attrib['href']) result['url'] = resp.search_params['base_url'] + url
title = extract_text(link) result['title'] = extract_text(eval_xpath(tag, './/a[contains(@class, "cellMainLink")]'))
content = extract_text(result.xpath(content_xpath)) result['content'] = extract_text(eval_xpath(tag, './/span[@class="font11px lightgrey block"]'))
seed = extract_text(result.xpath('.//td[contains(@class, "green")]')) result['seed'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "green")]')))
leech = extract_text(result.xpath('.//td[contains(@class, "red")]')) result['leech'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "red")]')))
filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]')) result['filesize'] = get_torrent_size(*extract_text(eval_xpath(tag, './/td[contains(@class, "nobr")]')).split())
files = extract_text(result.xpath('.//td[contains(@class, "center")][2]'))
seed = convert_str_to_int(seed) results.append(result)
leech = convert_str_to_int(leech)
filesize, filesize_multiplier = filesize_info.split() # results sorted by seeder count
filesize = get_torrent_size(filesize, filesize_multiplier)
if files.isdigit():
files = int(files)
else:
files = None
magnetlink = result.xpath(magnet_xpath)[0].attrib['href']
torrentfile = result.xpath(torrent_xpath)[0].attrib['href']
torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")
# append result
results.append(
{
'url': href,
'title': title,
'content': content,
'seed': seed,
'leech': leech,
'filesize': filesize,
'files': files,
'magnetlink': magnetlink,
'torrentfile': torrentfileurl,
'template': 'torrent.html',
}
)
# return results sorted by seeder
return sorted(results, key=itemgetter('seed'), reverse=True) return sorted(results, key=itemgetter('seed'), reverse=True)

View file

@ -912,9 +912,14 @@ engines:
- name: kickass - name: kickass
engine: kickass engine: kickass
base_url:
- https://kickasstorrents.to
- https://kickasstorrents.cr
- https://kickasstorrent.cr
- https://kickass.sx
- https://kat.am
shortcut: kc shortcut: kc
timeout: 4.0 timeout: 4.0
disabled: true
- name: lemmy communities - name: lemmy communities
engine: lemmy engine: lemmy