moviewyrm/bookwyrm/utils/isni.py

"""ISNI author checking utilities"""
import xml.etree.ElementTree as ET
import requests


def url_stringify(string):
    """replace spaces for url encoding"""

    # TODO: this is very lazy and incomplete
    return string.replace(" ", "+")


def request_isni_data(search_index, search_term, max_records=5):
    """Request data from the ISNI API"""

    search_string = url_stringify(search_term)
    query_parts = [
        "http://isni.oclc.org/sru/?query=",
        search_index,
        "+%3D+%22",
        search_string,
        "%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b",
        "&maximumRecords=",
        str(max_records),
        "&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C",
    ]
    query_url = "".join(query_parts)
    result = requests.get(query_url)
    # the OCLC ISNI server asserts the payload is encoded
    # in latin1, but we know better
    result.encoding = "utf-8"
    return result.text


def make_name_string(element):
    """create a string of form 'personal_name surname'"""

    # NOTE: this will often be incorrect, many naming systems
    # list "surname" before personal name
    forename = element.find(".//forename")
    surname = element.find(".//surname")
    if forename is not None:
        return "".join([forename.text, " ", surname.text])
    return surname.text


def get_other_identifier(element, code):
    """Get other identifiers associated with an author from their ISNI record"""

    identifiers = element.findall(".//otherIdentifierOfIdentity")
    for section_head in identifiers:
        if (
            section_head.find(".//type") is not None
            and section_head.find(".//type").text == code
            and section_head.find(".//identifier") is not None
        ):
            return section_head.find(".//identifier").text
    return ""


def get_external_information_uri(element, match_string):
    """Get URLs associated with an author from their ISNI record"""

    sources = element.findall(".//externalInformation")
    for source in sources:
        uri = source.find(".//URI")
        if uri is not None and uri.text.find(match_string) is not None:
            return uri.text
    return ""


def find_authors_by_name(name_string):
    """Query the ISNI database for possible author matches by name"""

    payload = request_isni_data("pica.na", name_string)
    # parse xml
    root = ET.fromstring(payload)
    # build list of possible authors
    possible_authors = []
    for element in root.iter("responseRecord"):

        personal_name = element.find(".//forename/..")
        bio = element.find(".//nameTitle")

        if not personal_name:
            continue

        author = {}
        author["isni"] = element.find(".//isniUnformatted").text
        author["uri"] = element.find(".//isniURI").text
        author["name"] = make_name_string(personal_name)
        if bio is not None:
            author["bio"] = bio.text
        possible_authors.append(author)

    return possible_authors


def get_author_isni_data(isni):
    """Find data to populate a new author record from their ISNI"""

    payload = request_isni_data("pica.isn", isni)
    # parse xml
    root = ET.fromstring(payload)
    # there should only be a single responseRecord
    # but let's use the first one just in case
    element = root.find(".//responseRecord")
    personal_name = element.find(".//forename/..")
    bio = element.find(".//nameTitle")
    author = {}
    author["isni"] = isni
    author["name"] = make_name_string(personal_name)
    author["viaf_id"] = get_other_identifier(element, "viaf")
    author["wikipedia_link"] = get_external_information_uri(element, "Wikipedia")
    author["bio"] = bio.text if bio is not None else ""
    author["aliases"] = []
    aliases = element.findall(".//personalNameVariant")
    for entry in aliases:
        author["aliases"].append(make_name_string(entry))

    return author
code formatting 2021-10-29 10:14:32 +00:00			`"""ISNI author checking utilities"""`
isni author lookup utility 2021-10-29 05:12:31 +00:00			`import xml.etree.ElementTree as ET`
code formatting 2021-10-29 10:14:32 +00:00			`import requests`
isni author lookup utility 2021-10-29 05:12:31 +00:00
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00
isni author lookup utility 2021-10-29 05:12:31 +00:00			`def url_stringify(string):`
code formatting 2021-10-29 10:14:32 +00:00			`"""replace spaces for url encoding"""`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00
			`# TODO: this is very lazy and incomplete`
isni author lookup utility 2021-10-29 05:12:31 +00:00			`return string.replace(" ", "+")`

hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00
make pylint happy 2021-10-31 23:20:19 +00:00			`def request_isni_data(search_index, search_term, max_records=5):`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`"""Request data from the ISNI API"""`
isni author lookup utility 2021-10-29 05:12:31 +00:00
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`search_string = url_stringify(search_term)`
			`query_parts = [`
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00			`"http://isni.oclc.org/sru/?query=",`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`search_index,`
			`"+%3D+%22",`
			`search_string,`
			`"%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b",`
			`"&maximumRecords=",`
make pylint happy 2021-10-31 23:20:19 +00:00			`str(max_records),`
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00			`"&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C",`
			`]`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`query_url = "".join(query_parts)`
			`result = requests.get(query_url)`
fix encoding The OCLC server claims that the xml payload is encoded as latin1 (ISO-8859-1). This causes Requests to incorrectly encode things as latin1, when actually everything is (thank goodness) UTF-8. We can fix it by just telling Requests that it is really UTF-8 With thanks to Tex Texin, creator of http://i18nqa.com/debug/utf8-debug.html 2021-10-29 10:00:35 +00:00			`# the OCLC ISNI server asserts the payload is encoded`
			`# in latin1, but we know better`
code formatting 2021-10-29 10:14:32 +00:00			`result.encoding = "utf-8"`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`return result.text`

hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`def make_name_string(element):`
			`"""create a string of form 'personal_name surname'"""`

hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00			`# NOTE: this will often be incorrect, many naming systems`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`# list "surname" before personal name`
			`forename = element.find(".//forename")`
			`surname = element.find(".//surname")`
			`if forename is not None:`
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00			`return "".join([forename.text, " ", surname.text])`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`return surname.text`

hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`def get_other_identifier(element, code):`
			`"""Get other identifiers associated with an author from their ISNI record"""`

			`identifiers = element.findall(".//otherIdentifierOfIdentity")`
			`for section_head in identifiers:`
			`if (`
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00			`section_head.find(".//type") is not None`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`and section_head.find(".//type").text == code`
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00			`and section_head.find(".//identifier") is not None`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`):`
			`return section_head.find(".//identifier").text`
make pylint happy 2021-10-31 23:20:19 +00:00			`return ""`
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00
			`def get_external_information_uri(element, match_string):`
			`"""Get URLs associated with an author from their ISNI record"""`

			`sources = element.findall(".//externalInformation")`
			`for source in sources:`
			`uri = source.find(".//URI")`
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00			`if uri is not None and uri.text.find(match_string) is not None:`
			`return uri.text`
make pylint happy 2021-10-31 23:20:19 +00:00			`return ""`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`def find_authors_by_name(name_string):`
			`"""Query the ISNI database for possible author matches by name"""`

			`payload = request_isni_data("pica.na", name_string)`
fix encoding The OCLC server claims that the xml payload is encoded as latin1 (ISO-8859-1). This causes Requests to incorrectly encode things as latin1, when actually everything is (thank goodness) UTF-8. We can fix it by just telling Requests that it is really UTF-8 With thanks to Tex Texin, creator of http://i18nqa.com/debug/utf8-debug.html 2021-10-29 10:00:35 +00:00			`# parse xml`
isni author lookup utility 2021-10-29 05:12:31 +00:00			`root = ET.fromstring(payload)`
			`# build list of possible authors`
			`possible_authors = []`
code formatting 2021-10-29 10:14:32 +00:00			`for element in root.iter("responseRecord"):`
isni author lookup utility 2021-10-29 05:12:31 +00:00
code formatting 2021-10-29 10:14:32 +00:00			`personal_name = element.find(".//forename/..")`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`bio = element.find(".//nameTitle")`

			`if not personal_name:`
			`continue`
isni author lookup utility 2021-10-29 05:12:31 +00:00
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`author = {}`
			`author["isni"] = element.find(".//isniUnformatted").text`
			`author["uri"] = element.find(".//isniURI").text`
			`author["name"] = make_name_string(personal_name)`
			`if bio is not None:`
			`author["bio"] = bio.text`
			`possible_authors.append(author)`
isni author lookup utility 2021-10-29 05:12:31 +00:00
			`return possible_authors`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`def get_author_isni_data(isni):`
make pylint happy 2021-10-31 23:20:19 +00:00			`"""Find data to populate a new author record from their ISNI"""`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00
			`payload = request_isni_data("pica.isn", isni)`
			`# parse xml`
			`root = ET.fromstring(payload)`
			`# there should only be a single responseRecord`
			`# but let's use the first one just in case`
			`element = root.find(".//responseRecord")`
			`personal_name = element.find(".//forename/..")`
			`bio = element.find(".//nameTitle")`
			`author = {}`
			`author["isni"] = isni`
			`author["name"] = make_name_string(personal_name)`
			`author["viaf_id"] = get_other_identifier(element, "viaf")`
			`author["wikipedia_link"] = get_external_information_uri(element, "Wikipedia")`
			`author["bio"] = bio.text if bio is not None else ""`
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00			`author["aliases"] = []`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00			`aliases = element.findall(".//personalNameVariant")`
			`for entry in aliases:`
hide isni authors if isni in local db Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we know it is the same author. 2021-10-31 09:48:47 +00:00			`author["aliases"].append(make_name_string(entry))`
populate new authors with isni data If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf 2021-10-31 06:58:15 +00:00
			`return author`