mirror of
https://github.com/searxng/searxng.git
synced 2024-11-27 05:11:03 +00:00
Merge pull request #104 from dalf/master
[enh] add infoboxes and answers, [fix] when two results are merged, really use the content with more text
This commit is contained in:
commit
67b69619ba
12 changed files with 618 additions and 130 deletions
|
@ -38,16 +38,14 @@ def response(resp):
|
||||||
except:
|
except:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
title = '{0} {1} in {2} is {3}'.format(
|
answer = '{0} {1} = {2} {3} (1 {1} = {4} {3})'.format(
|
||||||
resp.search_params['ammount'],
|
resp.search_params['ammount'],
|
||||||
resp.search_params['from'],
|
resp.search_params['from'],
|
||||||
|
resp.search_params['ammount'] * conversion_rate,
|
||||||
resp.search_params['to'],
|
resp.search_params['to'],
|
||||||
resp.search_params['ammount'] * conversion_rate
|
conversion_rate
|
||||||
)
|
)
|
||||||
|
|
||||||
content = '1 {0} is {1} {2}'.format(resp.search_params['from'],
|
|
||||||
conversion_rate,
|
|
||||||
resp.search_params['to'])
|
|
||||||
now_date = datetime.now().strftime('%Y%m%d')
|
now_date = datetime.now().strftime('%Y%m%d')
|
||||||
url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' # noqa
|
url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' # noqa
|
||||||
url = url.format(
|
url = url.format(
|
||||||
|
@ -56,6 +54,7 @@ def response(resp):
|
||||||
resp.search_params['from'].lower(),
|
resp.search_params['from'].lower(),
|
||||||
resp.search_params['to'].lower()
|
resp.search_params['to'].lower()
|
||||||
)
|
)
|
||||||
results.append({'title': title, 'content': content, 'url': url})
|
|
||||||
|
results.append({'answer' : answer, 'url': url})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -1,10 +1,25 @@
|
||||||
import json
|
import json
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
|
from lxml import html
|
||||||
|
from searx.engines.xpath import extract_text
|
||||||
|
|
||||||
url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1'
|
url = 'https://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
||||||
|
|
||||||
|
def result_to_text(url, text, htmlResult):
|
||||||
|
# TODO : remove result ending with "Meaning" or "Category"
|
||||||
|
dom = html.fromstring(htmlResult)
|
||||||
|
a = dom.xpath('//a')
|
||||||
|
if len(a)>=1:
|
||||||
|
return extract_text(a[0])
|
||||||
|
else:
|
||||||
|
return text
|
||||||
|
|
||||||
|
def html_to_text(htmlFragment):
|
||||||
|
dom = html.fromstring(htmlFragment)
|
||||||
|
return extract_text(dom)
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
# TODO add kl={locale}
|
||||||
params['url'] = url.format(query=urlencode({'q': query}))
|
params['url'] = url.format(query=urlencode({'q': query}))
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -12,12 +27,104 @@ def request(query, params):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
search_res = json.loads(resp.text)
|
search_res = json.loads(resp.text)
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
content = ''
|
||||||
|
heading = search_res.get('Heading', '')
|
||||||
|
attributes = []
|
||||||
|
urls = []
|
||||||
|
infobox_id = None
|
||||||
|
relatedTopics = []
|
||||||
|
|
||||||
|
# add answer if there is one
|
||||||
|
answer = search_res.get('Answer', '')
|
||||||
|
if answer != '':
|
||||||
|
results.append({ 'answer' : html_to_text(answer) })
|
||||||
|
|
||||||
|
# add infobox
|
||||||
if 'Definition' in search_res:
|
if 'Definition' in search_res:
|
||||||
if search_res.get('AbstractURL'):
|
content = content + search_res.get('Definition', '')
|
||||||
res = {'title': search_res.get('Heading', ''),
|
|
||||||
'content': search_res.get('Definition', ''),
|
if 'Abstract' in search_res:
|
||||||
'url': search_res.get('AbstractURL', ''),
|
content = content + search_res.get('Abstract', '')
|
||||||
'class': 'definition_result'}
|
|
||||||
results.append(res)
|
|
||||||
|
# image
|
||||||
|
image = search_res.get('Image', '')
|
||||||
|
image = None if image == '' else image
|
||||||
|
|
||||||
|
# attributes
|
||||||
|
if 'Infobox' in search_res:
|
||||||
|
infobox = search_res.get('Infobox', None)
|
||||||
|
if 'content' in infobox:
|
||||||
|
for info in infobox.get('content'):
|
||||||
|
attributes.append({'label': info.get('label'), 'value': info.get('value')})
|
||||||
|
|
||||||
|
# urls
|
||||||
|
for ddg_result in search_res.get('Results', []):
|
||||||
|
if 'FirstURL' in ddg_result:
|
||||||
|
firstURL = ddg_result.get('FirstURL', '')
|
||||||
|
text = ddg_result.get('Text', '')
|
||||||
|
urls.append({'title':text, 'url':firstURL})
|
||||||
|
results.append({'title':heading, 'url': firstURL})
|
||||||
|
|
||||||
|
# related topics
|
||||||
|
for ddg_result in search_res.get('RelatedTopics', None):
|
||||||
|
if 'FirstURL' in ddg_result:
|
||||||
|
suggestion = result_to_text(ddg_result.get('FirstURL', None), ddg_result.get('Text', None), ddg_result.get('Result', None))
|
||||||
|
if suggestion != heading:
|
||||||
|
results.append({'suggestion': suggestion})
|
||||||
|
elif 'Topics' in ddg_result:
|
||||||
|
suggestions = []
|
||||||
|
relatedTopics.append({ 'name' : ddg_result.get('Name', ''), 'suggestions': suggestions })
|
||||||
|
for topic_result in ddg_result.get('Topics', []):
|
||||||
|
suggestion = result_to_text(topic_result.get('FirstURL', None), topic_result.get('Text', None), topic_result.get('Result', None))
|
||||||
|
if suggestion != heading:
|
||||||
|
suggestions.append(suggestion)
|
||||||
|
|
||||||
|
# abstract
|
||||||
|
abstractURL = search_res.get('AbstractURL', '')
|
||||||
|
if abstractURL != '':
|
||||||
|
# add as result ? problem always in english
|
||||||
|
infobox_id = abstractURL
|
||||||
|
urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL})
|
||||||
|
|
||||||
|
# definition
|
||||||
|
definitionURL = search_res.get('DefinitionURL', '')
|
||||||
|
if definitionURL != '':
|
||||||
|
# add as result ? as answer ? problem always in english
|
||||||
|
infobox_id = definitionURL
|
||||||
|
urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL})
|
||||||
|
|
||||||
|
# entity
|
||||||
|
entity = search_res.get('Entity', None)
|
||||||
|
# TODO continent / country / department / location / waterfall / mountain range : link to map search, get weather, near by locations
|
||||||
|
# TODO musician : link to music search
|
||||||
|
# TODO concert tour : ??
|
||||||
|
# TODO film / actor / television / media franchise : links to IMDB / rottentomatoes (or scrap result)
|
||||||
|
# TODO music : link tu musicbrainz / last.fm
|
||||||
|
# TODO book : ??
|
||||||
|
# TODO artist / playwright : ??
|
||||||
|
# TODO compagny : ??
|
||||||
|
# TODO software / os : ??
|
||||||
|
# TODO software engineer : ??
|
||||||
|
# TODO prepared food : ??
|
||||||
|
# TODO website : ??
|
||||||
|
# TODO performing art : ??
|
||||||
|
# TODO prepared food : ??
|
||||||
|
# TODO programming language : ??
|
||||||
|
# TODO file format : ??
|
||||||
|
|
||||||
|
if len(heading)>0:
|
||||||
|
# TODO get infobox.meta.value where .label='article_title'
|
||||||
|
results.append({
|
||||||
|
'infobox': heading,
|
||||||
|
'id': infobox_id,
|
||||||
|
'entity': entity,
|
||||||
|
'content': content,
|
||||||
|
'img_src' : image,
|
||||||
|
'attributes': attributes,
|
||||||
|
'urls': urls,
|
||||||
|
'relatedTopics': relatedTopics
|
||||||
|
})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
221
searx/engines/wikidata.py
Normal file
221
searx/engines/wikidata.py
Normal file
|
@ -0,0 +1,221 @@
|
||||||
|
import json
|
||||||
|
from requests import get
|
||||||
|
from urllib import urlencode
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
resultCount=2
|
||||||
|
urlSearch = 'https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectiontitle&{query}'
|
||||||
|
urlDetail = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&props=labels%7Cinfo%7Csitelinks%7Csitelinks%2Furls%7Cdescriptions%7Cclaims&{query}'
|
||||||
|
urlMap = 'https://www.openstreetmap.org/?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
|
||||||
|
|
||||||
|
def request(query, params):
|
||||||
|
params['url'] = urlSearch.format(query=urlencode({'srsearch': query, 'srlimit': resultCount}))
|
||||||
|
print params['url']
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp):
|
||||||
|
results = []
|
||||||
|
search_res = json.loads(resp.text)
|
||||||
|
|
||||||
|
wikidata_ids = set()
|
||||||
|
for r in search_res.get('query', {}).get('search', {}):
|
||||||
|
wikidata_ids.add(r.get('title', ''))
|
||||||
|
|
||||||
|
language = resp.search_params['language'].split('_')[0]
|
||||||
|
if language == 'all':
|
||||||
|
language = 'en'
|
||||||
|
url = urlDetail.format(query=urlencode({'ids': '|'.join(wikidata_ids), 'languages': language + '|en'}))
|
||||||
|
|
||||||
|
before = datetime.now()
|
||||||
|
htmlresponse = get(url)
|
||||||
|
print datetime.now() - before
|
||||||
|
jsonresponse = json.loads(htmlresponse.content)
|
||||||
|
for wikidata_id in wikidata_ids:
|
||||||
|
results = results + getDetail(jsonresponse, wikidata_id, language)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def getDetail(jsonresponse, wikidata_id, language):
|
||||||
|
result = jsonresponse.get('entities', {}).get(wikidata_id, {})
|
||||||
|
|
||||||
|
title = result.get('labels', {}).get(language, {}).get('value', None)
|
||||||
|
if title == None:
|
||||||
|
title = result.get('labels', {}).get('en', {}).get('value', wikidata_id)
|
||||||
|
results = []
|
||||||
|
urls = []
|
||||||
|
attributes = []
|
||||||
|
|
||||||
|
description = result.get('descriptions', {}).get(language, {}).get('value', '')
|
||||||
|
if description == '':
|
||||||
|
description = result.get('descriptions', {}).get('en', {}).get('value', '')
|
||||||
|
|
||||||
|
claims = result.get('claims', {})
|
||||||
|
official_website = get_string(claims, 'P856', None)
|
||||||
|
if official_website != None:
|
||||||
|
urls.append({ 'title' : 'Official site', 'url': official_website })
|
||||||
|
results.append({ 'title': title, 'url' : official_website })
|
||||||
|
|
||||||
|
if language != 'en':
|
||||||
|
add_url(urls, 'Wikipedia (' + language + ')', get_wikilink(result, language + 'wiki'))
|
||||||
|
wikipedia_en_link = get_wikilink(result, 'enwiki')
|
||||||
|
add_url(urls, 'Wikipedia (en)', wikipedia_en_link)
|
||||||
|
|
||||||
|
if language != 'en':
|
||||||
|
add_url(urls, 'Wiki voyage (' + language + ')', get_wikilink(result, language + 'wikivoyage'))
|
||||||
|
add_url(urls, 'Wiki voyage (en)', get_wikilink(result, 'enwikivoyage'))
|
||||||
|
|
||||||
|
if language != 'en':
|
||||||
|
add_url(urls, 'Wikiquote (' + language + ')', get_wikilink(result, language + 'wikiquote'))
|
||||||
|
add_url(urls, 'Wikiquote (en)', get_wikilink(result, 'enwikiquote'))
|
||||||
|
|
||||||
|
add_url(urls, 'Commons wiki', get_wikilink(result, 'commonswiki'))
|
||||||
|
|
||||||
|
add_url(urls, 'Location', get_geolink(claims, 'P625', None))
|
||||||
|
|
||||||
|
add_url(urls, 'Wikidata', 'https://www.wikidata.org/wiki/' + wikidata_id + '?uselang='+ language)
|
||||||
|
|
||||||
|
musicbrainz_work_id = get_string(claims, 'P435')
|
||||||
|
if musicbrainz_work_id != None:
|
||||||
|
add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/work/' + musicbrainz_work_id)
|
||||||
|
|
||||||
|
musicbrainz_artist_id = get_string(claims, 'P434')
|
||||||
|
if musicbrainz_artist_id != None:
|
||||||
|
add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/artist/' + musicbrainz_artist_id)
|
||||||
|
|
||||||
|
musicbrainz_release_group_id = get_string(claims, 'P436')
|
||||||
|
if musicbrainz_release_group_id != None:
|
||||||
|
add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/release-group/' + musicbrainz_release_group_id)
|
||||||
|
|
||||||
|
musicbrainz_label_id = get_string(claims, 'P966')
|
||||||
|
if musicbrainz_label_id != None:
|
||||||
|
add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/label/' + musicbrainz_label_id)
|
||||||
|
|
||||||
|
# musicbrainz_area_id = get_string(claims, 'P982')
|
||||||
|
# P1407 MusicBrainz series ID
|
||||||
|
# P1004 MusicBrainz place ID
|
||||||
|
# P1330 MusicBrainz instrument ID
|
||||||
|
# P1407 MusicBrainz series ID
|
||||||
|
|
||||||
|
postal_code = get_string(claims, 'P281', None)
|
||||||
|
if postal_code != None:
|
||||||
|
attributes.append({'label' : 'Postal code(s)', 'value' : postal_code})
|
||||||
|
|
||||||
|
date_of_birth = get_time(claims, 'P569', None)
|
||||||
|
if date_of_birth != None:
|
||||||
|
attributes.append({'label' : 'Date of birth', 'value' : date_of_birth})
|
||||||
|
|
||||||
|
date_of_death = get_time(claims, 'P570', None)
|
||||||
|
if date_of_death != None:
|
||||||
|
attributes.append({'label' : 'Date of death', 'value' : date_of_death})
|
||||||
|
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
'infobox' : title,
|
||||||
|
'id' : wikipedia_en_link,
|
||||||
|
'content' : description,
|
||||||
|
'attributes' : attributes,
|
||||||
|
'urls' : urls
|
||||||
|
})
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def add_url(urls, title, url):
|
||||||
|
if url != None:
|
||||||
|
urls.append({'title' : title, 'url' : url})
|
||||||
|
|
||||||
|
|
||||||
|
def get_mainsnak(claims, propertyName):
|
||||||
|
propValue = claims.get(propertyName, {})
|
||||||
|
if len(propValue) == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
propValue = propValue[0].get('mainsnak', None)
|
||||||
|
return propValue
|
||||||
|
|
||||||
|
|
||||||
|
def get_string(claims, propertyName, defaultValue=None):
|
||||||
|
propValue = claims.get(propertyName, {})
|
||||||
|
if len(propValue) == 0:
|
||||||
|
return defaultValue
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for e in propValue:
|
||||||
|
mainsnak = e.get('mainsnak', {})
|
||||||
|
|
||||||
|
datatype = mainsnak.get('datatype', '')
|
||||||
|
datavalue = mainsnak.get('datavalue', {})
|
||||||
|
if datavalue != None:
|
||||||
|
result.append(datavalue.get('value', ''))
|
||||||
|
|
||||||
|
if len(result) == 0:
|
||||||
|
return defaultValue
|
||||||
|
else:
|
||||||
|
return ', '.join(result)
|
||||||
|
|
||||||
|
|
||||||
|
def get_time(claims, propertyName, defaultValue=None):
|
||||||
|
propValue = claims.get(propertyName, {})
|
||||||
|
if len(propValue) == 0:
|
||||||
|
return defaultValue
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for e in propValue:
|
||||||
|
mainsnak = e.get('mainsnak', {})
|
||||||
|
|
||||||
|
datatype = mainsnak.get('datatype', '')
|
||||||
|
datavalue = mainsnak.get('datavalue', {})
|
||||||
|
if datavalue != None:
|
||||||
|
value = datavalue.get('value', '')
|
||||||
|
result.append(value.get('time', ''))
|
||||||
|
|
||||||
|
if len(result) == 0:
|
||||||
|
return defaultValue
|
||||||
|
else:
|
||||||
|
return ', '.join(result)
|
||||||
|
|
||||||
|
|
||||||
|
def get_geolink(claims, propertyName, defaultValue=''):
|
||||||
|
mainsnak = get_mainsnak(claims, propertyName)
|
||||||
|
|
||||||
|
if mainsnak == None:
|
||||||
|
return defaultValue
|
||||||
|
|
||||||
|
datatype = mainsnak.get('datatype', '')
|
||||||
|
datavalue = mainsnak.get('datavalue', {})
|
||||||
|
|
||||||
|
if datatype != 'globe-coordinate':
|
||||||
|
return defaultValue
|
||||||
|
|
||||||
|
value = datavalue.get('value', {})
|
||||||
|
|
||||||
|
precision = value.get('precision', 0.0002)
|
||||||
|
|
||||||
|
# there is no zoom information, deduce from precision (error prone)
|
||||||
|
# samples :
|
||||||
|
# 13 --> 5
|
||||||
|
# 1 --> 6
|
||||||
|
# 0.016666666666667 --> 9
|
||||||
|
# 0.00027777777777778 --> 19
|
||||||
|
# wolframalpha : quadratic fit { {13, 5}, {1, 6}, {0.0166666, 9}, {0.0002777777,19}}
|
||||||
|
# 14.1186-8.8322 x+0.625447 x^2
|
||||||
|
if precision < 0.0003:
|
||||||
|
zoom = 19
|
||||||
|
else:
|
||||||
|
zoom = int(15 - precision*8.8322 + precision*precision*0.625447)
|
||||||
|
|
||||||
|
url = urlMap.replace('{latitude}', str(value.get('latitude',0))).replace('{longitude}', str(value.get('longitude',0))).replace('{zoom}', str(zoom))
|
||||||
|
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def get_wikilink(result, wikiid):
|
||||||
|
url = result.get('sitelinks', {}).get(wikiid, {}).get('url', None)
|
||||||
|
if url == None:
|
||||||
|
return url
|
||||||
|
elif url.startswith('http://'):
|
||||||
|
url = url.replace('http://', 'https://')
|
||||||
|
elif url.startswith('//'):
|
||||||
|
url = 'https:' + url
|
||||||
|
return url
|
113
searx/search.py
113
searx/search.py
|
@ -16,6 +16,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import grequests
|
import grequests
|
||||||
|
import re
|
||||||
from itertools import izip_longest, chain
|
from itertools import izip_longest, chain
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
@ -38,17 +39,14 @@ def default_request_params():
|
||||||
|
|
||||||
|
|
||||||
# create a callback wrapper for the search engine results
|
# create a callback wrapper for the search engine results
|
||||||
def make_callback(engine_name, results, suggestions, callback, params):
|
def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params):
|
||||||
|
|
||||||
# creating a callback wrapper for the search engine results
|
# creating a callback wrapper for the search engine results
|
||||||
def process_callback(response, **kwargs):
|
def process_callback(response, **kwargs):
|
||||||
cb_res = []
|
cb_res = []
|
||||||
response.search_params = params
|
response.search_params = params
|
||||||
|
|
||||||
# update stats with current page-load-time
|
# callback
|
||||||
engines[engine_name].stats['page_load_time'] += \
|
|
||||||
(datetime.now() - params['started']).total_seconds()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
search_results = callback(response)
|
search_results = callback(response)
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
|
@ -61,6 +59,7 @@ def make_callback(engine_name, results, suggestions, callback, params):
|
||||||
engine_name, str(e))
|
engine_name, str(e))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# add results
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
result['engine'] = engine_name
|
result['engine'] = engine_name
|
||||||
|
|
||||||
|
@ -70,14 +69,37 @@ def make_callback(engine_name, results, suggestions, callback, params):
|
||||||
suggestions.add(result['suggestion'])
|
suggestions.add(result['suggestion'])
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# if it is an answer, add it to list of answers
|
||||||
|
if 'answer' in result:
|
||||||
|
answers.add(result['answer'])
|
||||||
|
continue
|
||||||
|
|
||||||
|
# if it is an infobox, add it to list of infoboxes
|
||||||
|
if 'infobox' in result:
|
||||||
|
infoboxes.append(result)
|
||||||
|
continue
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
cb_res.append(result)
|
cb_res.append(result)
|
||||||
|
|
||||||
results[engine_name] = cb_res
|
results[engine_name] = cb_res
|
||||||
|
|
||||||
|
# update stats with current page-load-time
|
||||||
|
engines[engine_name].stats['page_load_time'] += \
|
||||||
|
(datetime.now() - params['started']).total_seconds()
|
||||||
|
|
||||||
return process_callback
|
return process_callback
|
||||||
|
|
||||||
|
|
||||||
|
# return the meaningful length of the content for a result
|
||||||
|
def content_result_len(content):
|
||||||
|
if isinstance(content, basestring):
|
||||||
|
content = re.sub('[,;:!?\./\\\\ ()-_]', '', content)
|
||||||
|
return len(content)
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
# score results and remove duplications
|
# score results and remove duplications
|
||||||
def score_results(results):
|
def score_results(results):
|
||||||
# calculate scoring parameters
|
# calculate scoring parameters
|
||||||
|
@ -99,8 +121,13 @@ def score_results(results):
|
||||||
res['host'] = res['host'].replace('www.', '', 1)
|
res['host'] = res['host'].replace('www.', '', 1)
|
||||||
|
|
||||||
res['engines'] = [res['engine']]
|
res['engines'] = [res['engine']]
|
||||||
|
|
||||||
weight = 1.0
|
weight = 1.0
|
||||||
|
|
||||||
|
# strip multiple spaces and cariage returns from content
|
||||||
|
if 'content' in res:
|
||||||
|
res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
|
||||||
|
|
||||||
# get weight of this engine if possible
|
# get weight of this engine if possible
|
||||||
if hasattr(engines[res['engine']], 'weight'):
|
if hasattr(engines[res['engine']], 'weight'):
|
||||||
weight = float(engines[res['engine']].weight)
|
weight = float(engines[res['engine']].weight)
|
||||||
|
@ -108,9 +135,8 @@ def score_results(results):
|
||||||
# calculate score for that engine
|
# calculate score for that engine
|
||||||
score = int((flat_len - i) / engines_len) * weight + 1
|
score = int((flat_len - i) / engines_len) * weight + 1
|
||||||
|
|
||||||
duplicated = False
|
|
||||||
|
|
||||||
# check for duplicates
|
# check for duplicates
|
||||||
|
duplicated = False
|
||||||
for new_res in results:
|
for new_res in results:
|
||||||
# remove / from the end of the url if required
|
# remove / from the end of the url if required
|
||||||
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
|
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
|
||||||
|
@ -127,7 +153,7 @@ def score_results(results):
|
||||||
# merge duplicates together
|
# merge duplicates together
|
||||||
if duplicated:
|
if duplicated:
|
||||||
# using content with more text
|
# using content with more text
|
||||||
if res.get('content') > duplicated.get('content'):
|
if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')):
|
||||||
duplicated['content'] = res['content']
|
duplicated['content'] = res['content']
|
||||||
|
|
||||||
# increase result-score
|
# increase result-score
|
||||||
|
@ -186,6 +212,64 @@ def score_results(results):
|
||||||
return gresults
|
return gresults
|
||||||
|
|
||||||
|
|
||||||
|
def merge_two_infoboxes(infobox1, infobox2):
|
||||||
|
if 'urls' in infobox2:
|
||||||
|
urls1 = infobox1.get('urls', None)
|
||||||
|
if urls1 == None:
|
||||||
|
urls1 = []
|
||||||
|
infobox1.set('urls', urls1)
|
||||||
|
|
||||||
|
urlSet = set()
|
||||||
|
for url in infobox1.get('urls', []):
|
||||||
|
urlSet.add(url.get('url', None))
|
||||||
|
|
||||||
|
for url in infobox2.get('urls', []):
|
||||||
|
if url.get('url', None) not in urlSet:
|
||||||
|
urls1.append(url)
|
||||||
|
|
||||||
|
if 'attributes' in infobox2:
|
||||||
|
attributes1 = infobox1.get('attributes', None)
|
||||||
|
if attributes1 == None:
|
||||||
|
attributes1 = []
|
||||||
|
infobox1.set('attributes', attributes1)
|
||||||
|
|
||||||
|
attributeSet = set()
|
||||||
|
for attribute in infobox1.get('attributes', []):
|
||||||
|
if attribute.get('label', None) not in attributeSet:
|
||||||
|
attributeSet.add(attribute.get('label', None))
|
||||||
|
|
||||||
|
for attribute in infobox2.get('attributes', []):
|
||||||
|
attributes1.append(attribute)
|
||||||
|
|
||||||
|
if 'content' in infobox2:
|
||||||
|
content1 = infobox1.get('content', None)
|
||||||
|
content2 = infobox2.get('content', '')
|
||||||
|
if content1 != None:
|
||||||
|
if content_result_len(content2) > content_result_len(content1):
|
||||||
|
infobox1['content'] = content2
|
||||||
|
else:
|
||||||
|
infobox1.set('content', content2)
|
||||||
|
|
||||||
|
|
||||||
|
def merge_infoboxes(infoboxes):
|
||||||
|
results = []
|
||||||
|
infoboxes_id = {}
|
||||||
|
for infobox in infoboxes:
|
||||||
|
add_infobox = True
|
||||||
|
infobox_id = infobox.get('id', None)
|
||||||
|
if infobox_id != None:
|
||||||
|
existingIndex = infoboxes_id.get(infobox_id, None)
|
||||||
|
if existingIndex != None:
|
||||||
|
merge_two_infoboxes(results[existingIndex], infobox)
|
||||||
|
add_infobox=False
|
||||||
|
|
||||||
|
if add_infobox:
|
||||||
|
results.append(infobox)
|
||||||
|
infoboxes_id[infobox_id] = len(results)-1
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
class Search(object):
|
class Search(object):
|
||||||
|
|
||||||
"""Search information container"""
|
"""Search information container"""
|
||||||
|
@ -208,6 +292,8 @@ class Search(object):
|
||||||
|
|
||||||
self.results = []
|
self.results = []
|
||||||
self.suggestions = []
|
self.suggestions = []
|
||||||
|
self.answers = []
|
||||||
|
self.infoboxes = []
|
||||||
self.request_data = {}
|
self.request_data = {}
|
||||||
|
|
||||||
# set specific language if set
|
# set specific language if set
|
||||||
|
@ -293,6 +379,8 @@ class Search(object):
|
||||||
requests = []
|
requests = []
|
||||||
results = {}
|
results = {}
|
||||||
suggestions = set()
|
suggestions = set()
|
||||||
|
answers = set()
|
||||||
|
infoboxes = []
|
||||||
|
|
||||||
# increase number of searches
|
# increase number of searches
|
||||||
number_of_searches += 1
|
number_of_searches += 1
|
||||||
|
@ -337,6 +425,8 @@ class Search(object):
|
||||||
selected_engine['name'],
|
selected_engine['name'],
|
||||||
results,
|
results,
|
||||||
suggestions,
|
suggestions,
|
||||||
|
answers,
|
||||||
|
infoboxes,
|
||||||
engine.response,
|
engine.response,
|
||||||
request_params
|
request_params
|
||||||
)
|
)
|
||||||
|
@ -374,11 +464,14 @@ class Search(object):
|
||||||
# score results and remove duplications
|
# score results and remove duplications
|
||||||
results = score_results(results)
|
results = score_results(results)
|
||||||
|
|
||||||
|
# merge infoboxes according to their ids
|
||||||
|
infoboxes = merge_infoboxes(infoboxes)
|
||||||
|
|
||||||
# update engine stats, using calculated score
|
# update engine stats, using calculated score
|
||||||
for result in results:
|
for result in results:
|
||||||
for res_engine in result['engines']:
|
for res_engine in result['engines']:
|
||||||
engines[result['engine']]\
|
engines[result['engine']]\
|
||||||
.stats['score_count'] += result['score']
|
.stats['score_count'] += result['score']
|
||||||
|
|
||||||
# return results and suggestions
|
# return results, suggestions, answers and infoboxes
|
||||||
return results, suggestions
|
return results, suggestions, answers, infoboxes
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
server:
|
server:
|
||||||
port : 8888
|
port : 8888
|
||||||
secret_key : "ultrasecretkey" # change this!
|
secret_key : "ultrasecretkey" # change this!
|
||||||
debug : False # Debug mode, only for development
|
debug : True # Debug mode, only for development
|
||||||
request_timeout : 2.0 # seconds
|
request_timeout : 2.0 # seconds
|
||||||
base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/"
|
base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/"
|
||||||
themes_path : "" # Custom ui themes path
|
themes_path : "" # Custom ui themes path
|
||||||
|
@ -44,6 +44,10 @@ engines:
|
||||||
engine : duckduckgo_definitions
|
engine : duckduckgo_definitions
|
||||||
shortcut : ddd
|
shortcut : ddd
|
||||||
|
|
||||||
|
- name : wikidata
|
||||||
|
engine : wikidata
|
||||||
|
shortcut : wd
|
||||||
|
|
||||||
- name : duckduckgo
|
- name : duckduckgo
|
||||||
engine : duckduckgo
|
engine : duckduckgo
|
||||||
shortcut : ddg
|
shortcut : ddg
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -235,6 +235,17 @@ a {
|
||||||
max-width: 54em;
|
max-width: 54em;
|
||||||
word-wrap:break-word;
|
word-wrap:break-word;
|
||||||
line-height: 1.24;
|
line-height: 1.24;
|
||||||
|
|
||||||
|
img {
|
||||||
|
float: left;
|
||||||
|
margin-right: 5px;
|
||||||
|
max-width: 200px;
|
||||||
|
max-height: 100px;
|
||||||
|
}
|
||||||
|
|
||||||
|
br.last {
|
||||||
|
clear: both;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
.url {
|
.url {
|
||||||
|
@ -384,33 +395,80 @@ tr {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#suggestions {
|
#suggestions, #answers {
|
||||||
|
|
||||||
margin-top: 20px;
|
margin-top: 20px;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#suggestions, #answers, #infoboxes {
|
||||||
|
|
||||||
span {
|
|
||||||
display: inline;
|
|
||||||
margin: 0 2px 2px 2px;
|
|
||||||
padding: 0;
|
|
||||||
}
|
|
||||||
input {
|
input {
|
||||||
padding: 0;
|
padding: 0;
|
||||||
margin: 3px;
|
margin: 3px;
|
||||||
font-size: 0.8em;
|
font-size: 0.8em;
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
background: transparent;
|
background: transparent;
|
||||||
color: @color-result-search-url-font;
|
color: @color-result-search-url-font;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
}
|
}
|
||||||
input[type="submit"] {
|
|
||||||
|
input[type="submit"] {
|
||||||
text-decoration: underline;
|
text-decoration: underline;
|
||||||
}
|
}
|
||||||
|
|
||||||
form {
|
form {
|
||||||
display: inline;
|
display: inline;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#infoboxes {
|
||||||
|
position: absolute;
|
||||||
|
top: 220px;
|
||||||
|
right: 20px;
|
||||||
|
margin: 0px 2px 5px 5px;
|
||||||
|
padding: 0px 2px 2px;
|
||||||
|
max-width: 21em;
|
||||||
|
|
||||||
|
.infobox {
|
||||||
|
margin: 10px 0 10px;
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
padding: 5px;
|
||||||
|
font-size: 0.8em;
|
||||||
|
|
||||||
|
img {
|
||||||
|
max-width: 20em;
|
||||||
|
max-heigt: 12em;
|
||||||
|
display: block;
|
||||||
|
margin: 5px;
|
||||||
|
padding: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
h2 {
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
table {
|
||||||
|
width: auto;
|
||||||
|
|
||||||
|
td {
|
||||||
|
vertical-align: top;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
input {
|
||||||
|
font-size: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
br {
|
||||||
|
clear: both;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#search_url {
|
#search_url {
|
||||||
margin-top: 8px;
|
margin-top: 8px;
|
||||||
|
|
||||||
|
@ -453,16 +511,6 @@ tr {
|
||||||
|
|
||||||
@media screen and (max-width: @results-width) {
|
@media screen and (max-width: @results-width) {
|
||||||
|
|
||||||
#categories {
|
|
||||||
font-size: 90%;
|
|
||||||
clear: both;
|
|
||||||
|
|
||||||
.checkbox_container {
|
|
||||||
margin-top: 2px;
|
|
||||||
margin: auto;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#results {
|
#results {
|
||||||
margin: auto;
|
margin: auto;
|
||||||
padding: 0;
|
padding: 0;
|
||||||
|
@ -483,7 +531,33 @@ tr {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@media screen and (max-width: 70em) {
|
@media screen and (max-width: 75em) {
|
||||||
|
|
||||||
|
#infoboxes {
|
||||||
|
position: inherit;
|
||||||
|
max-width: inherit;
|
||||||
|
|
||||||
|
.infobox {
|
||||||
|
clear:both;
|
||||||
|
|
||||||
|
img {
|
||||||
|
float: left;
|
||||||
|
max-width: 10em;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#categories {
|
||||||
|
font-size: 90%;
|
||||||
|
clear: both;
|
||||||
|
|
||||||
|
.checkbox_container {
|
||||||
|
margin-top: 2px;
|
||||||
|
margin: auto;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
.right {
|
.right {
|
||||||
display: none;
|
display: none;
|
||||||
postion: fixed !important;
|
postion: fixed !important;
|
||||||
|
@ -515,12 +589,6 @@ tr {
|
||||||
.result {
|
.result {
|
||||||
border-top: 1px solid @color-result-top-border;
|
border-top: 1px solid @color-result-top-border;
|
||||||
margin: 7px 0 6px 0;
|
margin: 7px 0 6px 0;
|
||||||
|
|
||||||
img {
|
|
||||||
max-width: 90%;
|
|
||||||
width: auto;
|
|
||||||
height: auto
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
44
searx/templates/default/infobox.html
Normal file
44
searx/templates/default/infobox.html
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
<div class="infobox">
|
||||||
|
<h2>{{ infobox.infobox }}</h2>
|
||||||
|
{% if infobox.img_src %}<img src="{{ infobox.img_src }}" />{% endif %}
|
||||||
|
<p>{{ infobox.entity }}</p>
|
||||||
|
<p>{{ infobox.content }}</p>
|
||||||
|
{% if infobox.attributes %}
|
||||||
|
<div class="attributes">
|
||||||
|
<table>
|
||||||
|
{% for attribute in infobox.attributes %}
|
||||||
|
<tr><td>{{ attribute.label }}</td><td>{{ attribute.value }}</td></tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if infobox.urls %}
|
||||||
|
<div class="urls">
|
||||||
|
<ul>
|
||||||
|
{% for url in infobox.urls %}
|
||||||
|
<li class="url"><a href="{{ url.url }}">{{ url.title }}</a></li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if infobox.relatedTopics %}
|
||||||
|
<div class="relatedTopics">
|
||||||
|
{% for topic in infobox.relatedTopics %}
|
||||||
|
<div>
|
||||||
|
<h3>{{ topic.name }}</h3>
|
||||||
|
{% for suggestion in topic.suggestions %}
|
||||||
|
<form method="{{ method or 'POST' }}" action="{{ url_for('index') }}">
|
||||||
|
<input type="hidden" name="q" value="{{ suggestion }}">
|
||||||
|
<input type="submit" value="{{ suggestion }}" />
|
||||||
|
</form>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<br />
|
||||||
|
|
||||||
|
</div>
|
|
@ -8,6 +8,6 @@
|
||||||
<h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
|
<h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
|
||||||
<p class="url">{{ result.pretty_url }} <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}">cached</a></p>
|
<p class="url">{{ result.pretty_url }} <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}">cached</a></p>
|
||||||
{% if result.publishedDate %}<p class="published_date">{{ result.publishedDate }}</p>{% endif %}
|
{% if result.publishedDate %}<p class="published_date">{{ result.publishedDate }}</p>{% endif %}
|
||||||
<p class="content">{% if result.content %}{{ result.content|safe }}<br />{% endif %}</p>
|
<p class="content">{% if result.img_src %}<img src="{{ result.img_src|safe }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -30,6 +30,14 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{% if answers %}
|
||||||
|
<div id="answers"><span>{{ _('Answers') }}</span>
|
||||||
|
{% for answer in answers %}
|
||||||
|
<span>{{ answer }}</span>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{% if suggestions %}
|
{% if suggestions %}
|
||||||
<div id="suggestions"><span>{{ _('Suggestions') }}</span>
|
<div id="suggestions"><span>{{ _('Suggestions') }}</span>
|
||||||
{% for suggestion in suggestions %}
|
{% for suggestion in suggestions %}
|
||||||
|
@ -41,6 +49,14 @@
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
{% if infoboxes %}
|
||||||
|
<div id="infoboxes">
|
||||||
|
{% for infobox in infoboxes %}
|
||||||
|
{% include 'default/infobox.html' %}
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{% for result in results %}
|
{% for result in results %}
|
||||||
{% if result['template'] %}
|
{% if result['template'] %}
|
||||||
{% include 'default/result_templates/'+result['template'] %}
|
{% include 'default/result_templates/'+result['template'] %}
|
||||||
|
|
|
@ -43,6 +43,8 @@ class ViewsTestCase(SearxTestCase):
|
||||||
def test_index_html(self, search):
|
def test_index_html(self, search):
|
||||||
search.return_value = (
|
search.return_value = (
|
||||||
self.test_results,
|
self.test_results,
|
||||||
|
set(),
|
||||||
|
set(),
|
||||||
set()
|
set()
|
||||||
)
|
)
|
||||||
result = self.app.post('/', data={'q': 'test'})
|
result = self.app.post('/', data={'q': 'test'})
|
||||||
|
@ -51,7 +53,7 @@ class ViewsTestCase(SearxTestCase):
|
||||||
result.data
|
result.data
|
||||||
)
|
)
|
||||||
self.assertIn(
|
self.assertIn(
|
||||||
'<p class="content">first <span class="highlight">test</span> content<br /></p>', # noqa
|
'<p class="content">first <span class="highlight">test</span> content<br class="last"/></p>', # noqa
|
||||||
result.data
|
result.data
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -59,6 +61,8 @@ class ViewsTestCase(SearxTestCase):
|
||||||
def test_index_json(self, search):
|
def test_index_json(self, search):
|
||||||
search.return_value = (
|
search.return_value = (
|
||||||
self.test_results,
|
self.test_results,
|
||||||
|
set(),
|
||||||
|
set(),
|
||||||
set()
|
set()
|
||||||
)
|
)
|
||||||
result = self.app.post('/', data={'q': 'test', 'format': 'json'})
|
result = self.app.post('/', data={'q': 'test', 'format': 'json'})
|
||||||
|
@ -75,6 +79,8 @@ class ViewsTestCase(SearxTestCase):
|
||||||
def test_index_csv(self, search):
|
def test_index_csv(self, search):
|
||||||
search.return_value = (
|
search.return_value = (
|
||||||
self.test_results,
|
self.test_results,
|
||||||
|
set(),
|
||||||
|
set(),
|
||||||
set()
|
set()
|
||||||
)
|
)
|
||||||
result = self.app.post('/', data={'q': 'test', 'format': 'csv'})
|
result = self.app.post('/', data={'q': 'test', 'format': 'csv'})
|
||||||
|
@ -90,6 +96,8 @@ class ViewsTestCase(SearxTestCase):
|
||||||
def test_index_rss(self, search):
|
def test_index_rss(self, search):
|
||||||
search.return_value = (
|
search.return_value = (
|
||||||
self.test_results,
|
self.test_results,
|
||||||
|
set(),
|
||||||
|
set(),
|
||||||
set()
|
set()
|
||||||
)
|
)
|
||||||
result = self.app.post('/', data={'q': 'test', 'format': 'rss'})
|
result = self.app.post('/', data={'q': 'test', 'format': 'rss'})
|
||||||
|
|
|
@ -199,7 +199,7 @@ def index():
|
||||||
'index.html',
|
'index.html',
|
||||||
)
|
)
|
||||||
|
|
||||||
search.results, search.suggestions = search.search(request)
|
search.results, search.suggestions, search.answers, search.infoboxes = search.search(request)
|
||||||
|
|
||||||
for result in search.results:
|
for result in search.results:
|
||||||
|
|
||||||
|
@ -292,6 +292,8 @@ def index():
|
||||||
pageno=search.pageno,
|
pageno=search.pageno,
|
||||||
base_url=get_base_url(),
|
base_url=get_base_url(),
|
||||||
suggestions=search.suggestions,
|
suggestions=search.suggestions,
|
||||||
|
answers=search.answers,
|
||||||
|
infoboxes=search.infoboxes,
|
||||||
theme=get_current_theme_name()
|
theme=get_current_theme_name()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue