[refactor] duration strings: move parsing logic to utils.py

This commit is contained in:
Bnyro 2025-03-20 21:16:37 +01:00 committed by Markus Heiser
parent c28d35c7fc
commit 4dfc47584d
5 changed files with 42 additions and 42 deletions

View file

@ -56,18 +56,6 @@ def request(query, params):
return params
# Format the video duration
def format_duration(duration):
if not ":" in duration:
return None
minutes, seconds = map(int, duration.split(":"))
total_seconds = minutes * 60 + seconds
formatted_duration = str(timedelta(seconds=total_seconds))[2:] if 0 <= total_seconds < 3600 else ""
return formatted_duration
def response(resp):
search_res = resp.json()
@ -83,7 +71,12 @@ def response(resp):
unix_date = item["pubdate"]
formatted_date = datetime.fromtimestamp(unix_date)
formatted_duration = format_duration(item["duration"])
# the duration only seems to be valid if the video is less than 60 mins
duration = utils.parse_duration_string(item["duration"])
if duration and duration > timedelta(minutes=60):
duration = None
iframe_url = f"https://player.bilibili.com/player.html?aid={video_id}&high_quality=1&autoplay=false&danmaku=0"
results.append(
@ -93,7 +86,7 @@ def response(resp):
"content": description,
"author": author,
"publishedDate": formatted_date,
"length": formatted_duration,
"length": duration,
"thumbnail": thumbnail,
"iframe_src": iframe_url,
"template": "videos.html",

View file

@ -2,9 +2,10 @@
"""iQiyi: A search engine for retrieving videos from iQiyi."""
from urllib.parse import urlencode
from datetime import datetime, timedelta
from datetime import datetime
from searx.exceptions import SearxEngineAPIException
from searx.utils import parse_duration_string
about = {
"website": "https://www.iqiyi.com/",
@ -55,20 +56,7 @@ def response(resp):
except (ValueError, TypeError):
pass
length = None
subscript_content = album_info.get("subscriptContent")
if subscript_content:
try:
time_parts = subscript_content.split(":")
if len(time_parts) == 2:
minutes, seconds = map(int, time_parts)
length = timedelta(minutes=minutes, seconds=seconds)
elif len(time_parts) == 3:
hours, minutes, seconds = map(int, time_parts)
length = timedelta(hours=hours, minutes=minutes, seconds=seconds)
except (ValueError, TypeError):
pass
length = parse_duration_string(album_info.get("subscriptionContent"))
results.append(
{
'url': album_info.get("pageUrl", "").replace("http://", "https://"),

View file

@ -6,7 +6,7 @@
import re
from urllib.parse import urlencode
from datetime import datetime
from datetime import datetime, timedelta
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
@ -50,12 +50,6 @@ safesearch = True
safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
def minute_to_hm(minute):
if isinstance(minute, int):
return "%d:%02d" % (divmod(minute, 60))
return None
def request(query, params):
"""Assemble request for the Peertube API"""
@ -117,13 +111,17 @@ def video_response(resp):
if x
]
duration = result.get('duration')
if duration:
duration = timedelta(seconds=duration)
results.append(
{
'url': result['url'],
'title': result['name'],
'content': html_to_text(result.get('description') or ''),
'author': result.get('account', {}).get('displayName'),
'length': minute_to_hm(result.get('duration')),
'length': duration,
'views': humanize_number(result['views']),
'template': 'videos.html',
'publishedDate': parse(result['publishedAt']),

View file

@ -73,7 +73,7 @@ Implementations
from urllib.parse import urlencode, urlparse
from searx import locales
from searx.network import get
from searx.utils import gen_useragent, html_to_text
from searx.utils import gen_useragent, html_to_text, parse_duration_string
about = {
"website": "https://presearch.io",
@ -270,7 +270,7 @@ def response(resp):
'url': item.get('link'),
'content': item.get('description', ''),
'thumbnail': item.get('image'),
'length': item.get('duration'),
'length': parse_duration_string(item.get('duration')),
}
)

View file

@ -1,7 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Utility functions for the engines
"""
"""Utility functions for the engines"""
from __future__ import annotations
@ -18,6 +16,7 @@ from random import choice
from html.parser import HTMLParser
from html import escape
from urllib.parse import urljoin, urlparse, parse_qs, urlencode
from datetime import timedelta
from markdown_it import MarkdownIt
from lxml import html
@ -831,3 +830,25 @@ def js_variable_to_python(js_variable):
s = s.replace(chr(1), ':')
# load the JSON and return the result
return json.loads(s)
def parse_duration_string(duration_str: str) -> timedelta | None:
"""Parse a time string in format MM:SS or HH:MM:SS and convert it to a `timedelta` object.
Returns None if the provided string doesn't match any of the formats.
"""
duration_str = duration_str.strip()
if not duration_str:
return None
try:
# prepending ["00"] here inits hours to 0 if they are not provided
time_parts = (["00"] + duration_str.split(":"))[:3]
hours, minutes, seconds = map(int, time_parts)
return timedelta(hours=hours, minutes=minutes, seconds=seconds)
except (ValueError, TypeError):
pass
return None