mirror of
https://github.com/searxng/searxng.git
synced 2024-12-27 11:40:29 +00:00
[mod] add/modify image fetching for bing_news, qwant and twitter engines
This commit is contained in:
parent
4cffd78650
commit
f5128c7cb9
4 changed files with 28 additions and 11 deletions
|
@ -112,12 +112,11 @@ def response(resp):
|
|||
|
||||
# append result
|
||||
if thumbnail is not None:
|
||||
results.append({'template': 'videos.html',
|
||||
'url': url,
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'publishedDate': publishedDate,
|
||||
'content': content,
|
||||
'thumbnail': thumbnail})
|
||||
'img_src': thumbnail})
|
||||
else:
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
|
|
|
@ -96,14 +96,27 @@ def response(resp):
|
|||
'thumbnail_src': thumbnail_src,
|
||||
'img_src': img_src})
|
||||
|
||||
elif (category_to_keyword.get(categories[0], '') == 'news' or
|
||||
category_to_keyword.get(categories[0], '') == 'social'):
|
||||
elif category_to_keyword.get(categories[0], '') == 'social':
|
||||
published_date = datetime.fromtimestamp(result['date'], None)
|
||||
|
||||
img_src = result.get('img', None)
|
||||
results.append({'url': res_url,
|
||||
'title': title,
|
||||
'publishedDate': published_date,
|
||||
'content': content})
|
||||
'content': content,
|
||||
'img_src': img_src})
|
||||
|
||||
elif category_to_keyword.get(categories[0], '') == 'news':
|
||||
published_date = datetime.fromtimestamp(result['date'], None)
|
||||
media = result.get('media', [])
|
||||
if len(media) > 0:
|
||||
img_src = media[0].get('pict', {}).get('url', None)
|
||||
else:
|
||||
img_src = None
|
||||
results.append({'url': res_url,
|
||||
'title': title,
|
||||
'publishedDate': published_date,
|
||||
'content': content,
|
||||
'img_src': img_src})
|
||||
|
||||
return results
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ search_url = base_url + 'search?'
|
|||
|
||||
# specific xpath variables
|
||||
results_xpath = '//li[@data-item-type="tweet"]'
|
||||
avatar_xpath = './/img[contains(@class, "avatar")]/@src'
|
||||
link_xpath = './/small[@class="time"]//a'
|
||||
title_xpath = './/span[contains(@class, "username")]'
|
||||
content_xpath = './/p[contains(@class, "tweet-text")]'
|
||||
|
@ -57,6 +58,8 @@ def response(resp):
|
|||
try:
|
||||
link = tweet.xpath(link_xpath)[0]
|
||||
content = extract_text(tweet.xpath(content_xpath)[0])
|
||||
img_src = tweet.xpath(avatar_xpath)[0]
|
||||
img_src = img_src.replace('_bigger', '_normal')
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
|
@ -71,12 +74,14 @@ def response(resp):
|
|||
results.append({'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'img_src': img_src,
|
||||
'publishedDate': publishedDate})
|
||||
else:
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'content': content})
|
||||
'content': content,
|
||||
'img_src': img_src})
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
|
|
@ -81,11 +81,11 @@ class TestBingNewsEngine(SearxTestCase):
|
|||
self.assertEqual(results[0]['title'], 'Title')
|
||||
self.assertEqual(results[0]['url'], 'http://url.of.article/')
|
||||
self.assertEqual(results[0]['content'], 'Article Content')
|
||||
self.assertEqual(results[0]['thumbnail'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337')
|
||||
self.assertEqual(results[0]['img_src'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337')
|
||||
self.assertEqual(results[1]['title'], 'Another Title')
|
||||
self.assertEqual(results[1]['url'], 'http://another.url.of.article/')
|
||||
self.assertEqual(results[1]['content'], 'Another Article Content')
|
||||
self.assertNotIn('thumbnail', results[1])
|
||||
self.assertNotIn('img_src', results[1])
|
||||
|
||||
html = """<?xml version="1.0" encoding="utf-8" ?>
|
||||
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
|
||||
|
@ -120,7 +120,7 @@ class TestBingNewsEngine(SearxTestCase):
|
|||
self.assertEqual(results[0]['title'], 'Title')
|
||||
self.assertEqual(results[0]['url'], 'http://another.url.of.article/')
|
||||
self.assertEqual(results[0]['content'], 'Article Content')
|
||||
self.assertEqual(results[0]['thumbnail'], 'http://another.bing.com/image')
|
||||
self.assertEqual(results[0]['img_src'], 'http://another.bing.com/image')
|
||||
|
||||
html = """<?xml version="1.0" encoding="utf-8" ?>
|
||||
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
|
||||
|
|
Loading…
Reference in a new issue