searxng/searx/engines/piped.py
Markus Heiser 203f1f0928 [fix] engine piped: 'invalid content'
SearXNG does not allow a None value in the content field of a result item.

If the key (shortDescription, uploaderName) in the JSON response from piped
exists but is set to None, SearXNG ignores this result item::

  DEBUG   searx    : result: invalid content: { ..,  'content': None,  ..}

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2023-08-03 16:23:36 +02:00

165 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""An alternative privacy-friendly YouTube frontend which is efficient by
design. `Pipeds architecture`_ consists of 3 components:
- :py:obj:`backend <backend_url>`
- :py:obj:`frontend <frontend_url>`
- proxy
.. _Pipeds architecture: https://docs.piped.video/docs/architecture/
Configuration
=============
The :py:obj:`backend_url` and :py:obj:`frontend_url` has to be set in the engine
named `piped` and are used by all piped engines
.. code:: yaml
- name: piped
engine: piped
piped_filter: videos
...
frontend_url: https://..
backend_url:
- https://..
- https://..
- name: piped.music
engine: piped
network: piped
shortcut: ppdm
piped_filter: music_songs
...
Known Quirks
============
The implementation to support :py:obj:`paging <searx.enginelib.Engine.paging>`
is based on the *nextpage* method of Piped's REST API / the :py:obj:`frontend
API <frontend_url>`. This feature is *next page driven* and plays well with the
:ref:`infinite_scroll <settings ui>` setting in SearXNG but it does not really
fit into SearXNG's UI to select a page by number.
Implementations
===============
"""
from __future__ import annotations
import time
import random
from urllib.parse import urlencode
import datetime
from dateutil import parser
# about
about = {
"website": 'https://github.com/TeamPiped/Piped/',
"wikidata_id": 'Q107565255',
"official_api_documentation": 'https://docs.piped.video/docs/api-documentation/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config
categories = []
paging = True
# search-url
backend_url: list | str = "https://pipedapi.kavin.rocks"
"""Piped-Backend_: The core component behind Piped. The value is an URL or a
list of URLs. In the latter case instance will be selected randomly. For a
complete list of offical instances see Piped-Instances (`JSON
<https://piped-instances.kavin.rocks/>`__)
.. _Piped-Instances: https://github.com/TeamPiped/Piped/wiki/Instances
.. _Piped-Backend: https://github.com/TeamPiped/Piped-Backend
"""
frontend_url: str = "https://piped.video"
"""Piped-Frontend_: URL to use as link and for embeds.
.. _Piped-Frontend: https://github.com/TeamPiped/Piped
"""
piped_filter = 'all'
"""Content filter ``music_songs`` or ``videos``"""
def _backend_url() -> str:
from searx.engines import engines # pylint: disable=import-outside-toplevel
url = engines['piped'].backend_url # type: ignore
if isinstance(url, list):
url = random.choice(url)
return url
def _frontend_url() -> str:
from searx.engines import engines # pylint: disable=import-outside-toplevel
return engines['piped'].frontend_url # type: ignore
def request(query, params):
args = {
'q': query,
'filter': piped_filter,
}
path = "/search"
if params['pageno'] > 1:
# don't use nextpage when user selected to jump back to page 1
nextpage = params['engine_data'].get('nextpage')
if nextpage:
path = "/nextpage/search"
args['nextpage'] = nextpage
params["url"] = _backend_url() + f"{path}?" + urlencode(args)
return params
def response(resp):
results = []
json = resp.json()
for result in json["items"]:
publishedDate = parser.parse(time.ctime(result.get("uploaded", 0) / 1000))
item = {
# the api url differs from the frontend, hence use piped.video as default
"url": _frontend_url() + result.get("url", ""),
"title": result.get("title", ""),
"publishedDate": publishedDate,
"iframe_src": _frontend_url() + '/embed' + result.get("url", ""),
}
if piped_filter == 'videos':
item["template"] = "videos.html"
# if the value of shortDescription set, but is None, return empty string
item["content"] = result.get("shortDescription", "") or ""
item["thumbnail"] = result.get("thumbnail", "")
elif piped_filter == 'music_songs':
item["template"] = "default.html"
item["img_src"] = result.get("thumbnail", "")
item["content"] = result.get("uploaderName", "") or ""
length = result.get("duration")
if length:
item["length"] = datetime.timedelta(seconds=length)
results.append(item)
results.append(
{
"engine_data": json["nextpage"],
"key": "nextpage",
}
)
return results