[feat] add Sogou WeChat article search support

This commit is contained in:
Zhijie He 2025-03-02 11:18:30 +08:00 committed by Bnyro
parent 97aa5a779b
commit 76f52b5b45
4 changed files with 88 additions and 3 deletions

View file

@ -18,7 +18,6 @@ about = {
# Engine Configuration
categories = ["general"]
paging = True
max_page = 10
time_range_support = True
time_range_dict = {'day': 'inttime_day', 'week': 'inttime_week', 'month': 'inttime_month', 'year': 'inttime_year'}

View file

@ -3,7 +3,7 @@
import json
import re
from urllib.parse import quote_plus
from urllib.parse import urlencode
# about
about = {
@ -16,12 +16,18 @@ about = {
# engine dependent config
categories = ["images"]
paging = True
base_url = "https://pic.sogou.com"
def request(query, params):
params["url"] = f"{base_url}/pics?query={quote_plus(query)}"
query_params = {
"query": query,
"start": (params["pageno"] - 1) * 48,
}
params["url"] = f"{base_url}/pics?{urlencode(query_params)}"
return params

View file

@ -0,0 +1,75 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Sogou-WeChat search engine for retrieving WeChat Article from Sogou"""
from urllib.parse import urlencode
from datetime import datetime
import re
from lxml import html
from searx.utils import extract_text
# Metadata
about = {
"website": "https://weixin.sogou.com/",
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
# Engine Configuration
categories = ["news"]
paging = True
# Base URL
base_url = "https://weixin.sogou.com"
def request(query, params):
query_params = {
"query": query,
"page": params["pageno"],
"type": 2,
}
params["url"] = f"{base_url}/weixin?{urlencode(query_params)}"
return params
def response(resp):
dom = html.fromstring(resp.text)
results = []
for item in dom.xpath('//li[contains(@id, "sogou_vr_")]'):
title = extract_text(item.xpath('.//h3/a'))
url = extract_text(item.xpath('.//h3/a/@href'))
if url.startswith("/link?url="):
url = f"{base_url}{url}"
content = extract_text(item.xpath('.//p[@class="txt-info"]'))
if not content:
content = extract_text(item.xpath('.//p[contains(@class, "txt-info")]'))
thumbnail = extract_text(item.xpath('.//div[@class="img-box"]/a/img/@src'))
if thumbnail and thumbnail.startswith("//"):
thumbnail = f"https:{thumbnail}"
published_date = None
timestamp = extract_text(item.xpath('.//script[contains(text(), "timeConvert")]'))
if timestamp:
match = re.search(r"timeConvert\('(\d+)'\)", timestamp)
if match:
published_date = datetime.fromtimestamp(int(match.group(1)))
if title and url:
results.append(
{
"title": title,
"url": url,
"content": content,
'thumbnail': thumbnail,
"publishedDate": published_date,
}
)
return results

View file

@ -1726,6 +1726,11 @@ engines:
shortcut: sogouv
disabled: true
- name: sogou wechat
engine: sogou_wechat
shortcut: sogouw
disabled: true
- name: soundcloud
engine: soundcloud
shortcut: sc