bookwyrm/bookwyrm/utils/isni.py

"""ISNI author checking utilities"""
import xml.etree.ElementTree as ET
import requests

# get data
BASE_STRING = "http://isni.oclc.org/sru/?query=pica.na+%3D+%22"
#pylint: disable=line-too-long
SUFFIX_STRING = "%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b&maximumRecords=10&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C"


def url_stringify(string):
    """replace spaces for url encoding"""
    return string.replace(" ", "+")


def find_authors_by_name(names):
    """Query the ISNI database for an author"""
    names = url_stringify(names)
    query = BASE_STRING + names + SUFFIX_STRING
    result = requests.get(query)
    # the OCLC ISNI server asserts the payload is encoded
    # in latin1, but we know better
    result.encoding = "utf-8"
    payload = result.text
    # parse xml
    root = ET.fromstring(payload)

    # build list of possible authors
    possible_authors = []
    for element in root.iter("responseRecord"):

        author = {}
        author["uri"] = element.find(".//isniURI").text
        # NOTE: this will often be incorrect, many naming systems
        # list "surname" before personal name
        personal_name = element.find(".//forename/..")
        description = element.find(".//nameTitle")
        if personal_name:
            forename = personal_name.find(".//forename")
            surname = personal_name.find(".//surname")
            author["name"] = forename.text + " " + surname.text
            if description is not None:
                author["description"] = description.text

            possible_authors.append(author)

    return possible_authors