diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index 86dc1867d..abf046f4c 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -31,11 +31,8 @@ from searx.engines.google import ( get_lang_info, time_range_dict, filter_mapping, - results_xpath, g_section_with_header, title_xpath, - href_xpath, - content_xpath, suggestion_xpath, detect_google_sorry, ) @@ -73,11 +70,27 @@ def _re(regexpr): RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr)) return RE_CACHE[regexpr] + +def scrap_out_thumbs_src(dom): + ret_val = {} + thumb_name = 'dimg_' + for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'): + _script = script.text + # "dimg_35":"https://i.ytimg.c....", + _dimurl = _re("s='([^']*)").findall( _script) + for k,v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)' ).findall(_script): + v = v.replace(r'\u003d','=') + v = v.replace(r'\u0026','&') + ret_val[k] = v + logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys()) + return ret_val + + def scrap_out_thumbs(dom): """Scrap out thumbnail data from