[feat] engines: add www.acfun.cn

This commit is contained in:
Zhijie He 2025-03-05 18:48:58 +08:00 committed by Markus Heiser
parent 8fe4904619
commit 066aabc112
2 changed files with 113 additions and 0 deletions

108
searx/engines/acfun.py Normal file
View file

@ -0,0 +1,108 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Acfun search engine for searxng"""
from urllib.parse import urlencode
import re
import json
from datetime import datetime, timedelta
from lxml import html
from searx.utils import extract_text
# Metadata
about = {
"website": "https://www.acfun.cn/",
"wikidata_id": "Q3077675",
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
# Engine Configuration
categories = ["videos"]
paging = True
# Base URL
base_url = "https://www.acfun.cn"
def request(query, params):
query_params = {"keyword": query, "pCursor": params["pageno"]}
params["url"] = f"{base_url}/search?{urlencode(query_params)}"
return params
def response(resp):
results = []
matches = re.findall(r'bigPipe\.onPageletArrive\((\{.*?\})\);', resp.text, re.DOTALL)
if not matches:
return results
for match in matches:
try:
json_data = json.loads(match)
raw_html = json_data.get("html", "")
if not raw_html:
continue
tree = html.fromstring(raw_html)
video_blocks = tree.xpath('//div[contains(@class, "search-video")]')
if not video_blocks:
continue
for video_block in video_blocks:
video_info = extract_video_data(video_block)
if video_info and video_info["title"] and video_info["url"]:
results.append(video_info)
except json.JSONDecodeError:
continue
return results
def extract_video_data(video_block):
try:
data_exposure_log = video_block.get('data-exposure-log')
video_data = json.loads(data_exposure_log)
content_id = video_data.get("content_id", "")
title = video_data.get("title", "")
url = f"{base_url}/v/ac{content_id}"
iframe_src = f"{base_url}/player/ac{content_id}"
create_time = extract_text(video_block.xpath('.//span[contains(@class, "info__create-time")]'))
video_cover = extract_text(video_block.xpath('.//div[contains(@class, "video__cover")]/a/img/@src')[0])
video_duration = extract_text(video_block.xpath('.//span[contains(@class, "video__duration")]'))
video_intro = extract_text(video_block.xpath('.//div[contains(@class, "video__main__intro")]'))
published_date = None
if create_time:
try:
published_date = datetime.strptime(create_time.strip(), "%Y-%m-%d")
except (ValueError, TypeError):
pass
length = None
if video_duration:
try:
timediff = datetime.strptime(video_duration.strip(), "%M:%S")
length = timedelta(minutes=timediff.minute, seconds=timediff.second)
except (ValueError, TypeError):
pass
return {
"title": title,
"url": url,
"content": video_intro,
"thumbnail": video_cover,
"length": length,
"publishedDate": published_date,
"iframe_src": iframe_src,
}
except (json.JSONDecodeError, AttributeError, TypeError, ValueError):
return None

View file

@ -352,6 +352,11 @@ engines:
shortcut: 9g
disabled: true
- name: acfun
engine: acfun
shortcut: acf
disabled: true
- name: adobe stock
engine: adobe_stock
shortcut: asi