bookwyrm/bookwyrm/utils/isni.py
Hugh Rundle 45158a1c03 improve isni search logic
- skip ISNIs that aren't persons
- don't look for text values in non-existent nameTitle elements
2021-10-30 09:24:42 +11:00

47 lines
1.6 KiB
Python

"""ISNI author checking utilities"""
import xml.etree.ElementTree as ET
import requests
# get data
BASE_STRING = "http://isni.oclc.org/sru/?query=pica.na+%3D+%22"
#pylint: disable=line-too-long
SUFFIX_STRING = "%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b&maximumRecords=10&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C"
def url_stringify(string):
"""replace spaces for url encoding"""
return string.replace(" ", "+")
def find_authors_by_name(names):
"""Query the ISNI database for an author"""
names = url_stringify(names)
query = BASE_STRING + names + SUFFIX_STRING
result = requests.get(query)
# the OCLC ISNI server asserts the payload is encoded
# in latin1, but we know better
result.encoding = "utf-8"
payload = result.text
# parse xml
root = ET.fromstring(payload)
# build list of possible authors
possible_authors = []
for element in root.iter("responseRecord"):
author = {}
author["uri"] = element.find(".//isniURI").text
# NOTE: this will often be incorrect, many naming systems
# list "surname" before personal name
personal_name = element.find(".//forename/..")
description = element.find(".//nameTitle")
if personal_name:
forename = personal_name.find(".//forename")
surname = personal_name.find(".//surname")
author["name"] = forename.text + " " + surname.text
if description is not None:
author["description"] = description.text
possible_authors.append(author)
return possible_authors