[fix] log messages from: google- images, news, scholar, videos

- HTTP header Accept-Language --> lang_info['headers']['Accept-Language']
- remove obsolete query_url log messages which is already logged by
  httpx._client:HTTP request

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2021-06-11 16:31:50 +02:00
parent 1ac3961336
commit 2ac3e5b20b
4 changed files with 9 additions and 16 deletions

View file

@ -103,6 +103,8 @@ def request(query, params):
# pylint: disable=undefined-variable # pylint: disable=undefined-variable
params, supported_languages, language_aliases, False params, supported_languages, language_aliases, False
) )
logger.debug(
"HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({
'q': query, 'q': query,
@ -117,11 +119,8 @@ def request(query, params):
query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
if params['safesearch']: if params['safesearch']:
query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]}) query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
logger.debug("query_url --> %s", query_url)
params['url'] = query_url params['url'] = query_url
logger.debug("HTTP header Accept-Language --> %s", lang_info.get('Accept-Language'))
params['headers'].update(lang_info['headers']) params['headers'].update(lang_info['headers'])
params['headers']['Accept'] = ( params['headers']['Accept'] = (
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'

View file

@ -85,6 +85,8 @@ def request(query, params):
# pylint: disable=undefined-variable # pylint: disable=undefined-variable
params, supported_languages, language_aliases, False params, supported_languages, language_aliases, False
) )
logger.debug(
"HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
# google news has only one domain # google news has only one domain
lang_info['subdomain'] = 'news.google.com' lang_info['subdomain'] = 'news.google.com'
@ -107,11 +109,8 @@ def request(query, params):
'oe': "utf8", 'oe': "utf8",
'gl': lang_info['country'], 'gl': lang_info['country'],
}) + ('&ceid=%s' % ceid) # ceid includes a ':' character which must not be urlencoded }) + ('&ceid=%s' % ceid) # ceid includes a ':' character which must not be urlencoded
logger.debug("query_url --> %s", query_url)
params['url'] = query_url params['url'] = query_url
logger.debug("HTTP header Accept-Language --> %s", lang_info.get('Accept-Language'))
params['headers'].update(lang_info['headers']) params['headers'].update(lang_info['headers'])
params['headers']['Accept'] = ( params['headers']['Accept'] = (
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'

View file

@ -77,12 +77,11 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10 offset = (params['pageno'] - 1) * 10
lang_info = get_lang_info( lang_info = get_lang_info(
# pylint: disable=undefined-variable # pylint: disable=undefined-variable
# params, {}, language_aliases
params, supported_languages, language_aliases, False params, supported_languages, language_aliases, False
) )
logger.debug(
"HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
# subdomain is: scholar.google.xy # subdomain is: scholar.google.xy
lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.") lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
@ -95,11 +94,8 @@ def request(query, params):
}) })
query_url += time_range_url(params) query_url += time_range_url(params)
logger.debug("query_url --> %s", query_url)
params['url'] = query_url params['url'] = query_url
logger.debug("HTTP header Accept-Language --> %s", lang_info.get('Accept-Language'))
params['headers'].update(lang_info['headers']) params['headers'].update(lang_info['headers'])
params['headers']['Accept'] = ( params['headers']['Accept'] = (
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'

View file

@ -121,6 +121,8 @@ def request(query, params):
# pylint: disable=undefined-variable # pylint: disable=undefined-variable
params, supported_languages, language_aliases, False params, supported_languages, language_aliases, False
) )
logger.debug(
"HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({
'q': query, 'q': query,
@ -134,11 +136,8 @@ def request(query, params):
query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
if params['safesearch']: if params['safesearch']:
query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]}) query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
logger.debug("query_url --> %s", query_url)
params['url'] = query_url params['url'] = query_url
logger.debug("HTTP header Accept-Language --> %s", lang_info.get('Accept-Language'))
params['headers'].update(lang_info['headers']) params['headers'].update(lang_info['headers'])
params['headers']['Accept'] = ( params['headers']['Accept'] = (
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'