From b9ada93b3ade2b4268bdc898e2c67b156b4dba92 Mon Sep 17 00:00:00 2001 From: Denis Wernert Date: Mon, 8 Oct 2018 14:56:20 +0200 Subject: [PATCH] Removes what looks like tracking parameters --- searx/engines/unsplash.py | 20 ++++++++++++++++---- tests/unit/engines/test_unsplash.py | 6 +++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 04a943297..2e8d6fdfc 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -10,7 +10,7 @@ @parse url, title, img_src, thumbnail_src """ -from searx.url_utils import urlencode +from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl from json import loads url = 'https://unsplash.com/' @@ -20,6 +20,18 @@ page_size = 20 paging = True +def clean_url(url): + parsed = urlparse(url) + query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']] + + return urlunparse((parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + urlencode(query), + parsed.fragment)) + + def request(query, params): params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size}) return params @@ -32,9 +44,9 @@ def response(resp): if 'results' in json_data: for result in json_data['results']: results.append({'template': 'images.html', - 'url': result['links']['html'], - 'thumbnail_src': result['urls']['thumb'], - 'img_src': result['urls']['raw'], + 'url': clean_url(result['links']['html']), + 'thumbnail_src': clean_url(result['urls']['thumb']), + 'img_src': clean_url(result['urls']['raw']), 'title': result['description'], 'content': ''}) return results diff --git a/tests/unit/engines/test_unsplash.py b/tests/unit/engines/test_unsplash.py index cb9e683c4..4501de906 100644 --- a/tests/unit/engines/test_unsplash.py +++ b/tests/unit/engines/test_unsplash.py @@ -32,7 +32,7 @@ class TestUnsplashEngine(SearxTestCase): self.assertEqual(result[0]['title'], 'low angle photography of swimming penguin') self.assertEqual(result[0]['url'], 'https://unsplash.com/photos/FY8d721UO_4') self.assertEqual(result[0]['thumbnail_src'], 'https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5&q=80\ -&fm=jpg&crop=entropy&cs=tinysrgb&w=200&fit=max&ixid=eyJhcHBfaWQiOjEyMDd9&s=a9b9e56e63efc6f4611a87ce7e9a48f8') - self.assertEqual(result[0]['img_src'], 'https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5\ -&ixid=eyJhcHBfaWQiOjEyMDd9&s=095c5fc319c5a77c705f49ad63e0f195') +&fm=jpg&crop=entropy&cs=tinysrgb&w=200&fit=max') + self.assertEqual(result[0]['img_src'], 'https://images.unsplash.com/photo-1523557148507-1b77641c7e7c\ +?ixlib=rb-0.3.5') self.assertEqual(result[0]['content'], '')