moviewyrm/bookwyrm/utils/isni.py

import requests
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import XMLParser

# get data
base_string = "http://isni.oclc.org/sru/?query=pica.na+%3D+%22"
suffix_string = "%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b&maximumRecords=10&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C"


def url_stringify(string):
    return string.replace(" ", "+")


def find_authors_by_name(names):

    names = url_stringify(names)
    query = base_string + names + suffix_string
    r = requests.get(query)
    # the OCLC ISNI server asserts the payload is encoded
    # in latin1, but we know better
    r.encoding = "utf-8"
    payload = r.text
    # parse xml
    root = ET.fromstring(payload)

    # build list of possible authors
    possible_authors = []
    for el in root.iter("responseRecord"):

        author = dict()
        author["uri"] = el.find(".//isniURI").text
        # NOTE: this will often be incorrect, some naming systems list "surname" before personal name
        personal_name = el.find(".//forename/..")
        forename = personal_name.find(".//forename")
        surname = personal_name.find(".//surname")
        author["name"] = surname.text
        if personal_name:
            author["name"] = forename.text + " " + surname.text
            author["description"] = el.find(".//nameTitle").text

            possible_authors.append(author)

    return possible_authors
isni author lookup utility 2021-10-29 05:12:31 +00:00			`import requests`
			`import xml.etree.ElementTree as ET`
fix encoding The OCLC server claims that the xml payload is encoded as latin1 (ISO-8859-1). This causes Requests to incorrectly encode things as latin1, when actually everything is (thank goodness) UTF-8. We can fix it by just telling Requests that it is really UTF-8 With thanks to Tex Texin, creator of http://i18nqa.com/debug/utf8-debug.html 2021-10-29 10:00:35 +00:00			`from xml.etree.ElementTree import XMLParser`
isni author lookup utility 2021-10-29 05:12:31 +00:00
			`# get data`
			`base_string = "http://isni.oclc.org/sru/?query=pica.na+%3D+%22"`
			`suffix_string = "%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b&maximumRecords=10&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C"`


			`def url_stringify(string):`
			`return string.replace(" ", "+")`


			`def find_authors_by_name(names):`

			`names = url_stringify(names)`
			`query = base_string + names + suffix_string`
			`r = requests.get(query)`
fix encoding The OCLC server claims that the xml payload is encoded as latin1 (ISO-8859-1). This causes Requests to incorrectly encode things as latin1, when actually everything is (thank goodness) UTF-8. We can fix it by just telling Requests that it is really UTF-8 With thanks to Tex Texin, creator of http://i18nqa.com/debug/utf8-debug.html 2021-10-29 10:00:35 +00:00			`# the OCLC ISNI server asserts the payload is encoded`
			`# in latin1, but we know better`
			`r.encoding = "utf-8"`
isni author lookup utility 2021-10-29 05:12:31 +00:00			`payload = r.text`
fix encoding The OCLC server claims that the xml payload is encoded as latin1 (ISO-8859-1). This causes Requests to incorrectly encode things as latin1, when actually everything is (thank goodness) UTF-8. We can fix it by just telling Requests that it is really UTF-8 With thanks to Tex Texin, creator of http://i18nqa.com/debug/utf8-debug.html 2021-10-29 10:00:35 +00:00			`# parse xml`
isni author lookup utility 2021-10-29 05:12:31 +00:00			`root = ET.fromstring(payload)`

			`# build list of possible authors`
			`possible_authors = []`
			`for el in root.iter("responseRecord"):`

			`author = dict()`
			`author["uri"] = el.find(".//isniURI").text`
			`# NOTE: this will often be incorrect, some naming systems list "surname" before personal name`
			`personal_name = el.find(".//forename/..")`
			`forename = personal_name.find(".//forename")`
			`surname = personal_name.find(".//surname")`
			`author["name"] = surname.text`
			`if personal_name:`
			`author["name"] = forename.text + " " + surname.text`
			`author["description"] = el.find(".//nameTitle").text`

			`possible_authors.append(author)`

			`return possible_authors`