mirror of
https://github.com/searxng/searxng.git
synced 2024-11-25 04:11:00 +00:00
add get_embeded_stream_url to searx.utils
This commit is contained in:
parent
f07ab6deb0
commit
cbf1e90979
5 changed files with 56 additions and 14 deletions
|
@ -123,7 +123,6 @@ from typing import Any, TYPE_CHECKING
|
||||||
from urllib.parse import (
|
from urllib.parse import (
|
||||||
urlencode,
|
urlencode,
|
||||||
urlparse,
|
urlparse,
|
||||||
parse_qs,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
@ -137,6 +136,7 @@ from searx.utils import (
|
||||||
eval_xpath_list,
|
eval_xpath_list,
|
||||||
eval_xpath_getindex,
|
eval_xpath_getindex,
|
||||||
js_variable_to_python,
|
js_variable_to_python,
|
||||||
|
get_embeded_stream_url,
|
||||||
)
|
)
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
|
||||||
|
@ -311,7 +311,7 @@ def _parse_search(resp):
|
||||||
# In my tests a video tag in the WEB search was most often not a
|
# In my tests a video tag in the WEB search was most often not a
|
||||||
# video, except the ones from youtube ..
|
# video, except the ones from youtube ..
|
||||||
|
|
||||||
iframe_src = _get_iframe_src(url)
|
iframe_src = get_embeded_stream_url(url)
|
||||||
if iframe_src:
|
if iframe_src:
|
||||||
item['iframe_src'] = iframe_src
|
item['iframe_src'] = iframe_src
|
||||||
item['template'] = 'videos.html'
|
item['template'] = 'videos.html'
|
||||||
|
@ -328,15 +328,6 @@ def _parse_search(resp):
|
||||||
return result_list
|
return result_list
|
||||||
|
|
||||||
|
|
||||||
def _get_iframe_src(url):
|
|
||||||
parsed_url = urlparse(url)
|
|
||||||
if parsed_url.path == '/watch' and parsed_url.query:
|
|
||||||
video_id = parse_qs(parsed_url.query).get('v', []) # type: ignore
|
|
||||||
if video_id:
|
|
||||||
return 'https://www.youtube-nocookie.com/embed/' + video_id[0] # type: ignore
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_news(json_resp):
|
def _parse_news(json_resp):
|
||||||
result_list = []
|
result_list = []
|
||||||
|
|
||||||
|
@ -392,7 +383,7 @@ def _parse_videos(json_resp):
|
||||||
if result['thumbnail'] is not None:
|
if result['thumbnail'] is not None:
|
||||||
item['thumbnail'] = result['thumbnail']['src']
|
item['thumbnail'] = result['thumbnail']['src']
|
||||||
|
|
||||||
iframe_src = _get_iframe_src(url)
|
iframe_src = get_embeded_stream_url(url)
|
||||||
if iframe_src:
|
if iframe_src:
|
||||||
item['iframe_src'] = iframe_src
|
item['iframe_src'] = iframe_src
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ DuckDuckGo Extra (images, videos, news)
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
from searx.utils import get_embeded_stream_url
|
||||||
|
|
||||||
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
||||||
from searx.engines.duckduckgo import (
|
from searx.engines.duckduckgo import (
|
||||||
|
@ -108,7 +109,7 @@ def _video_result(result):
|
||||||
'title': result['title'],
|
'title': result['title'],
|
||||||
'content': result['description'],
|
'content': result['description'],
|
||||||
'thumbnail': result['images'].get('small') or result['images'].get('medium'),
|
'thumbnail': result['images'].get('small') or result['images'].get('medium'),
|
||||||
'iframe_src': result['embed_url'],
|
'iframe_src': get_embeded_stream_url(result['content']),
|
||||||
'source': result['provider'],
|
'source': result['provider'],
|
||||||
'length': result['duration'],
|
'length': result['duration'],
|
||||||
'metadata': result.get('uploader'),
|
'metadata': result.get('uploader'),
|
||||||
|
|
|
@ -34,6 +34,7 @@ from searx.engines.google import (
|
||||||
detect_google_sorry,
|
detect_google_sorry,
|
||||||
)
|
)
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
from searx.utils import get_embeded_stream_url
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import logging
|
import logging
|
||||||
|
@ -125,6 +126,7 @@ def response(resp):
|
||||||
'content': content,
|
'content': content,
|
||||||
'author': pub_info,
|
'author': pub_info,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'iframe_src': get_embeded_stream_url(url),
|
||||||
'template': 'videos.html',
|
'template': 'videos.html',
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -61,6 +61,7 @@ from searx.utils import (
|
||||||
eval_xpath,
|
eval_xpath,
|
||||||
eval_xpath_list,
|
eval_xpath_list,
|
||||||
extract_text,
|
extract_text,
|
||||||
|
get_embeded_stream_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
traits: EngineTraits
|
traits: EngineTraits
|
||||||
|
@ -303,6 +304,7 @@ def parse_web_api(resp):
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': res_url,
|
'url': res_url,
|
||||||
'content': content,
|
'content': content,
|
||||||
|
'iframe_src': get_embeded_stream_url(res_url),
|
||||||
'publishedDate': pub_date,
|
'publishedDate': pub_date,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'template': 'videos.html',
|
'template': 'videos.html',
|
||||||
|
|
|
@ -17,7 +17,7 @@ from os.path import splitext, join
|
||||||
from random import choice
|
from random import choice
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from html import escape
|
from html import escape
|
||||||
from urllib.parse import urljoin, urlparse
|
from urllib.parse import urljoin, urlparse, parse_qs, urlencode
|
||||||
from markdown_it import MarkdownIt
|
from markdown_it import MarkdownIt
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
@ -615,6 +615,52 @@ def _get_fasttext_model() -> "fasttext.FastText._FastText": # type: ignore
|
||||||
return _FASTTEXT_MODEL
|
return _FASTTEXT_MODEL
|
||||||
|
|
||||||
|
|
||||||
|
def get_embeded_stream_url(url):
|
||||||
|
"""
|
||||||
|
Converts a standard video URL into its embed format. Supported services include Youtube,
|
||||||
|
Facebook, Instagram, TikTok, and Dailymotion.
|
||||||
|
"""
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
iframe_src = None
|
||||||
|
|
||||||
|
# YouTube
|
||||||
|
if parsed_url.netloc in ['www.youtube.com', 'youtube.com'] and parsed_url.path == '/watch' and parsed_url.query:
|
||||||
|
video_id = parse_qs(parsed_url.query).get('v', [])
|
||||||
|
if video_id:
|
||||||
|
iframe_src = 'https://www.youtube-nocookie.com/embed/' + video_id[0]
|
||||||
|
|
||||||
|
# Facebook
|
||||||
|
elif parsed_url.netloc in ['www.facebook.com', 'facebook.com']:
|
||||||
|
encoded_href = urlencode({'href': url})
|
||||||
|
iframe_src = 'https://www.facebook.com/plugins/video.php?allowfullscreen=true&' + encoded_href
|
||||||
|
|
||||||
|
# Instagram
|
||||||
|
elif parsed_url.netloc in ['www.instagram.com', 'instagram.com'] and parsed_url.path.startswith('/p/'):
|
||||||
|
if parsed_url.path.endswith('/'):
|
||||||
|
iframe_src = url + 'embed'
|
||||||
|
else:
|
||||||
|
iframe_src = url + '/embed'
|
||||||
|
|
||||||
|
# TikTok
|
||||||
|
elif (
|
||||||
|
parsed_url.netloc in ['www.tiktok.com', 'tiktok.com']
|
||||||
|
and parsed_url.path.startswith('/@')
|
||||||
|
and '/video/' in parsed_url.path
|
||||||
|
):
|
||||||
|
path_parts = parsed_url.path.split('/video/')
|
||||||
|
video_id = path_parts[1]
|
||||||
|
iframe_src = 'https://www.tiktok.com/embed/' + video_id
|
||||||
|
|
||||||
|
# Dailymotion
|
||||||
|
elif parsed_url.netloc in ['www.dailymotion.com', 'dailymotion.com'] and parsed_url.path.startswith('/video/'):
|
||||||
|
path_parts = parsed_url.path.split('/')
|
||||||
|
if len(path_parts) == 3:
|
||||||
|
video_id = path_parts[2]
|
||||||
|
iframe_src = 'https://www.dailymotion.com/embed/video/' + video_id
|
||||||
|
|
||||||
|
return iframe_src
|
||||||
|
|
||||||
|
|
||||||
def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> Optional[str]:
|
def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> Optional[str]:
|
||||||
"""Detect the language of the ``text`` parameter.
|
"""Detect the language of the ``text`` parameter.
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue