[fix][mod] wikidata date handling refactor - fixes #387

This commit is contained in:
Adam Tauber 2015-09-07 22:39:33 +02:00
parent 7580852bda
commit 362c849797
2 changed files with 35 additions and 25 deletions

View file

@ -1,8 +1,15 @@
import json import json
from urllib import urlencode
from searx import logger
from searx.poolrequests import get from searx.poolrequests import get
from searx.utils import format_date_by_locale from searx.utils import format_date_by_locale
from datetime import datetime
from dateutil.parser import parse as dateutil_parse
from urllib import urlencode
logger = logger.getChild('wikidata')
result_count = 1 result_count = 1
wikidata_host = 'https://www.wikidata.org' wikidata_host = 'https://www.wikidata.org'
wikidata_api = wikidata_host + '/w/api.php' wikidata_api = wikidata_host + '/w/api.php'
@ -164,14 +171,12 @@ def getDetail(jsonresponse, wikidata_id, language, locale):
if postal_code is not None: if postal_code is not None:
attributes.append({'label': 'Postal code(s)', 'value': postal_code}) attributes.append({'label': 'Postal code(s)', 'value': postal_code})
date_of_birth = get_time(claims, 'P569', None) date_of_birth = get_time(claims, 'P569', locale, None)
if date_of_birth is not None: if date_of_birth is not None:
date_of_birth = format_date_by_locale(date_of_birth[8:], locale)
attributes.append({'label': 'Date of birth', 'value': date_of_birth}) attributes.append({'label': 'Date of birth', 'value': date_of_birth})
date_of_death = get_time(claims, 'P570', None) date_of_death = get_time(claims, 'P570', locale, None)
if date_of_death is not None: if date_of_death is not None:
date_of_death = format_date_by_locale(date_of_death[8:], locale)
attributes.append({'label': 'Date of death', 'value': date_of_death}) attributes.append({'label': 'Date of death', 'value': date_of_death})
if len(attributes) == 0 and len(urls) == 2 and len(description) == 0: if len(attributes) == 0 and len(urls) == 2 and len(description) == 0:
@ -229,7 +234,7 @@ def get_string(claims, propertyName, defaultValue=None):
return result[0] return result[0]
def get_time(claims, propertyName, defaultValue=None): def get_time(claims, propertyName, locale, defaultValue=None):
propValue = claims.get(propertyName, {}) propValue = claims.get(propertyName, {})
if len(propValue) == 0: if len(propValue) == 0:
return defaultValue return defaultValue
@ -244,9 +249,22 @@ def get_time(claims, propertyName, defaultValue=None):
result.append(value.get('time', '')) result.append(value.get('time', ''))
if len(result) == 0: if len(result) == 0:
return defaultValue date_string = defaultValue
else: else:
return ', '.join(result) date_string = ', '.join(result)
try:
parsed_date = datetime.strptime(date_string, "+%Y-%m-%dT%H:%M:%SZ")
except:
if date_string.startswith('-'):
return date_string.split('T')[0]
try:
parsed_date = dateutil_parse(date_string, fuzzy=False, default=False)
except:
logger.debug('could not parse date %s', date_string)
return date_string.split('T')[0]
return format_date_by_locale(parsed_date, locale)
def get_geolink(claims, propertyName, defaultValue=''): def get_geolink(claims, propertyName, defaultValue=''):

View file

@ -1,11 +1,10 @@
# import htmlentitydefs # import htmlentitydefs
import locale
import dateutil.parser
import cStringIO import cStringIO
import csv import csv
import os import os
import re import re
from babel.dates import format_date
from codecs import getincrementalencoder from codecs import getincrementalencoder
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
from random import choice from random import choice
@ -195,23 +194,16 @@ def get_result_templates(base_path):
return result_templates return result_templates
def format_date_by_locale(date_string, locale_string): def format_date_by_locale(date, locale_string):
# strftime works only on dates after 1900 # strftime works only on dates after 1900
parsed_date = dateutil.parser.parse(date_string)
if parsed_date.year <= 1900:
return parsed_date.isoformat().split('T')[0]
orig_locale = locale.getlocale()[0] if date.year <= 1900:
try: return date.isoformat().split('T')[0]
locale.setlocale(locale.LC_ALL, locale_string)
except: if locale_string == 'all':
logger.warning('cannot set locale: {0}'.format(locale_string)) locale_string = settings['ui']['default_locale'] or 'en_US'
formatted_date = parsed_date.strftime(locale.nl_langinfo(locale.D_FMT))
try: return format_date(date, locale=locale_string)
locale.setlocale(locale.LC_ALL, orig_locale)
except:
logger.warning('cannot set original locale: {0}'.format(orig_locale))
return formatted_date
def dict_subset(d, properties): def dict_subset(d, properties):