From cb4a3fe598707fc42f86ea3f7bcf517dcd4db660 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Sat, 17 Jan 2015 19:21:09 +0100 Subject: [PATCH 1/4] Add thumbnails in images results - Modify engines to create/fetch an URL for the thumbnails - Modify themes to show thumbnails instead of full images. In Courgette, the result is not very beautiful. Should we change it ? --- searx/engines/500px.py | 8 +++++++- searx/engines/bing_images.py | 4 ++++ searx/engines/deviantart.py | 9 +++++++-- searx/engines/flickr-noapi.py | 9 +++++++++ searx/engines/flickr.py | 11 ++++++++++- searx/engines/google_images.py | 3 +++ .../templates/courgette/result_templates/images.html | 2 +- searx/templates/default/result_templates/images.html | 2 +- searx/templates/oscar/result_templates/images.html | 4 ++-- 9 files changed, 44 insertions(+), 8 deletions(-) diff --git a/searx/engines/500px.py b/searx/engines/500px.py index 3b95619a1..f25678c24 100644 --- a/searx/engines/500px.py +++ b/searx/engines/500px.py @@ -14,6 +14,7 @@ from urllib import urlencode from urlparse import urljoin from lxml import html +import re # engine dependent config categories = ['images'] @@ -37,20 +38,25 @@ def response(resp): results = [] dom = html.fromstring(resp.text) + regex = re.compile('3\.jpg.*$') # parse results for result in dom.xpath('//div[@class="photo"]'): link = result.xpath('.//a')[0] url = urljoin(base_url, link.attrib.get('href')) title = result.xpath('.//div[@class="title"]//text()')[0] - img_src = link.xpath('.//img')[0].attrib['src'] + thumbnail_src = link.xpath('.//img')[0].attrib['src'] + # To have a bigger thumbnail, uncomment the next line + #thumbnail_src = regex.sub('4.jpg', thumbnail_src) content = result.xpath('.//div[@class="info"]//text()')[0] + img_src = regex.sub('2048.jpg', thumbnail_src) # append result results.append({'url': url, 'title': title, 'img_src': img_src, 'content': content, + 'thumbnail_src': thumbnail_src, 'template': 'images.html'}) # return results diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 6c5e49bc9..e1dda0b1f 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -25,6 +25,7 @@ paging = True # search-url base_url = 'https://www.bing.com/' search_string = 'images/search?{query}&count=10&first={offset}' +thumb_url = "http://ts1.mm.bing.net/th?id={ihk}" # do search-request @@ -63,6 +64,8 @@ def response(resp): yaml_data = load(p.sub(r'\1\2: \3', link.attrib.get('m'))) title = link.attrib.get('t1') + ihk = link.attrib.get('ihk') + #url = 'http://' + link.attrib.get('t3') url = yaml_data.get('surl') img_src = yaml_data.get('imgurl') @@ -72,6 +75,7 @@ def response(resp): 'url': url, 'title': title, 'content': '', + 'thumbnail_src': thumb_url.format(ihk=ihk), 'img_src': img_src}) # TODO stop parsing if 10 images are found diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index d436e8163..2c6661cef 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -6,13 +6,14 @@ # @using-api no (TODO, rewrite to api) # @results HTML # @stable no (HTML can change) -# @parse url, title, thumbnail, img_src +# @parse url, title, thumbnail_src, img_src # # @todo rewrite to api from urllib import urlencode from urlparse import urljoin from lxml import html +import re # engine dependent config categories = ['images'] @@ -42,6 +43,8 @@ def response(resp): return [] dom = html.fromstring(resp.text) + + regex = re.compile('\/200H\/') # parse results for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'): @@ -49,12 +52,14 @@ def response(resp): url = urljoin(base_url, link.attrib.get('href')) title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') # noqa title = ''.join(title_links[0].xpath('.//text()')) - img_src = link.xpath('.//img')[0].attrib['src'] + thumbnail_src = link.xpath('.//img')[0].attrib['src'] + img_src = regex.sub('/', thumbnail_src) # append result results.append({'url': url, 'title': title, 'img_src': img_src, + 'thumbnail_src': thumbnail_src, 'template': 'images.html'}) # return results diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py index 89dd2ee5f..fdd8bc3eb 100644 --- a/searx/engines/flickr-noapi.py +++ b/searx/engines/flickr-noapi.py @@ -71,6 +71,14 @@ def response(resp): if 'id' not in photo['owner']: continue +# For a bigger thumbnail, keep only the url_z, not the url_n + if 'n' in photo['sizes']: + thumbnail_src = photo['sizes']['n']['displayUrl'] + elif 'z' in photo['sizes']: + thumbnail_src = photo['sizes']['z']['displayUrl'] + else: + thumbnail_src = img_src + url = build_flickr_url(photo['owner']['id'], photo['id']) title = photo.get('title', '') @@ -89,6 +97,7 @@ def response(resp): results.append({'url': url, 'title': title, 'img_src': img_src, + 'thumbnail_src': thumbnail_src, 'content': content, 'template': 'images.html'}) diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index 4dadd80a6..4040236e1 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -23,7 +23,7 @@ api_key = None url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\ '&api_key={api_key}&{text}&sort=relevance' +\ - '&extras=description%2C+owner_name%2C+url_o%2C+url_z' +\ + '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z' +\ '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' @@ -65,6 +65,14 @@ def response(resp): else: continue +# For a bigger thumbnail, keep only the url_z, not the url_n + if 'url_n' in photo: + thumbnail_src = photo['url_n'] + elif 'url_z' in photo: + thumbnail_src = photo['url_z'] + else: + thumbnail_src = img_src + url = build_flickr_url(photo['owner'], photo['id']) title = photo['title'] @@ -80,6 +88,7 @@ def response(resp): results.append({'url': url, 'title': title, 'img_src': img_src, + 'thumbnail_src': thumbnail_src, 'content': content, 'template': 'images.html'}) diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 79fac3fb0..c08279660 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -43,15 +43,18 @@ def response(resp): # parse results for result in search_res['responseData']['results']: + print result href = result['originalContextUrl'] title = result['title'] if not result['url']: continue + thumbnail_src = result['tbUrl'] # append result results.append({'url': href, 'title': title, 'content': '', + 'thumbnail_src': thumbnail_src, 'img_src': unquote(result['url']), 'template': 'images.html'}) diff --git a/searx/templates/courgette/result_templates/images.html b/searx/templates/courgette/result_templates/images.html index bf2a686ac..6a4d7ae83 100644 --- a/searx/templates/courgette/result_templates/images.html +++ b/searx/templates/courgette/result_templates/images.html @@ -1,6 +1,6 @@

- {{ result.title|striptags }} + {{ result.title|striptags }} {{ _('original context') }}

diff --git a/searx/templates/default/result_templates/images.html b/searx/templates/default/result_templates/images.html index d6a0f84a1..7f209030f 100644 --- a/searx/templates/default/result_templates/images.html +++ b/searx/templates/default/result_templates/images.html @@ -1,6 +1,6 @@

- {{ result.title|striptags }} + {{ result.title|striptags }} {{ _('original context') }}

diff --git a/searx/templates/oscar/result_templates/images.html b/searx/templates/oscar/result_templates/images.html index 155c24153..7051bb737 100644 --- a/searx/templates/oscar/result_templates/images.html +++ b/searx/templates/oscar/result_templates/images.html @@ -1,7 +1,7 @@ {% from 'oscar/macros.html' import draw_favicon %} - {{ result.title|striptags }} + {{ result.title|striptags }}