populate new authors with isni data

If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf
2025-03-28 22:25:30 +00:00 · 2021-10-31 17:58:15 +11:00 · 2021-10-31 17:58:15 +11:00 · bce5f3f3b5
commit bce5f3f3b5
parent 45158a1c03
3 changed files with 119 additions and 47 deletions
--- a/bookwyrm/templates/book/edit/edit_book.html
+++ b/bookwyrm/templates/book/edit/edit_book.html
@ -50,39 +50,33 @@
                <input type="hidden" name="author-match-count" value="{{ author_matches|length }}">
                <div class="column is-half">
                    {% for author in author_matches %}
-                    {% if author.isni_matches %}
                    <fieldset>
                        <legend class="title is-5 mb-1">
-                            {% blocktrans with name=author.name %}Is "{{ name }}" one of these?{% endblocktrans %}
+                            {% blocktrans with name=author.name %}Is "{{ name }}" one of these authors?{% endblocktrans %}
                        </legend>
                        {% with forloop.counter0 as counter %}
-                        {% for match in author.isni_matches %}
-                        <label class="label mb-2">
-                            <input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required>
-                            {{ match.name }}
-                            <span class="help">
-                                <a href="{{ match.uri }}" target="_blank"><em>{{ match.description }}</em></a>
-                            </span>
+                        {% if author.isni_matches %}
+                        {% for isni_match in author.isni_matches %}
+                        <label class="label mt-2">
+                            <input type="radio" name="author_match-{{ counter }}" value="{{ isni_match.name }}" required>
+                            {{ isni_match.name }}
                        </label>
+                        <p class="help ml-5 mb-2">
+                            <a href="{{ match.uri }}" target="_blank">{{ isni_match.bio }}</a>
+                        </p>
+                        <input type="text" name="isni_match-{{ counter }}" value="{{ isni_match.isni }}" hidden>
                        {% endfor %}
-                        {% endwith %}
-                    </fieldset>
-                    {% endif %}
-                    <fieldset>
-                        <legend class="title is-5 mb-1">
-                            {% blocktrans with name=author.name %}Is "{{ name }}" an existing author?{% endblocktrans %}
-                        </legend>
-                        {% with forloop.counter0 as counter %}
+                        {% endif %}
                        {% for match in author.matches %}
-                        <label class="label mb-2">
+                        <label class="label">
                            <input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required>
                            {{ match.name }}
                        </label>
-                        <p class="help">
+                        <p class="help ml-5 mb-2">
                            <a href="{{ match.local_path }}" target="_blank">{% blocktrans with book_title=match.book_set.first.title %}Author of <em>{{ book_title }}</em>{% endblocktrans %}</a>
                        </p>
                        {% endfor %}
-                        <label class="label">
+                        <label class="label mt-2">
                            <input type="radio" name="author_match-{{ counter }}" value="{{ author.name }}" required> {% trans "This is a new author" %}
                        </label>
                        {% endwith %}
--- a/bookwyrm/utils/isni.py
+++ b/bookwyrm/utils/isni.py
@ -2,46 +2,117 @@
 import xml.etree.ElementTree as ET
 import requests

-# get data
-BASE_STRING = "http://isni.oclc.org/sru/?query=pica.na+%3D+%22"
-#pylint: disable=line-too-long
-SUFFIX_STRING = "%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b&maximumRecords=10&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C"
-
+from bookwyrm.settings import BASE_DIR

 def url_stringify(string):
    """replace spaces for url encoding"""
+
+    # TODO: this is very lazy and incomplete
    return string.replace(" ", "+")

+def request_isni_data(search_index, search_term, maxRecords=10):
+    """Request data from the ISNI API"""

-def find_authors_by_name(names):
-    """Query the ISNI database for an author"""
-    names = url_stringify(names)
-    query = BASE_STRING + names + SUFFIX_STRING
-    result = requests.get(query)
+    search_string = url_stringify(search_term)
+    query_parts = [
+        "http://isni.oclc.org/sru/?query=", 
+        search_index,
+        "+%3D+%22",
+        search_string,
+        "%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b",
+        "&maximumRecords=",
+        str(maxRecords),
+        "&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C"
+        ]
+    query_url = "".join(query_parts)
+    result = requests.get(query_url)
    # the OCLC ISNI server asserts the payload is encoded
    # in latin1, but we know better
    result.encoding = "utf-8"
-    payload = result.text
+    return result.text
+
+def make_name_string(element):
+    """create a string of form 'personal_name surname'"""
+
+    # NOTE: this will often be incorrect, many naming systems 
+    # list "surname" before personal name
+    forename = element.find(".//forename")
+    surname = element.find(".//surname")
+    if forename is not None:
+        return "".join([forename.text," ",surname.text])
+    return surname.text
+
+def get_other_identifier(element, code):
+    """Get other identifiers associated with an author from their ISNI record"""
+
+    identifiers = element.findall(".//otherIdentifierOfIdentity")
+    for section_head in identifiers:
+        if (
+            section_head.find(".//type") is not None 
+            and section_head.find(".//type").text == code
+            and section_head.find(".//identifier") is not None 
+        ):
+            return section_head.find(".//identifier").text
+    return 
+
+def get_external_information_uri(element, match_string):
+    """Get URLs associated with an author from their ISNI record"""
+
+    sources = element.findall(".//externalInformation")
+    for source in sources:
+        uri = source.find(".//URI")
+        if (
+            uri is not None
+            and uri.text.find(match_string) is not None
+            ):
+                return uri.text
+    return
+
+def find_authors_by_name(name_string):
+    """Query the ISNI database for possible author matches by name"""
+
+    payload = request_isni_data("pica.na", name_string)
    # parse xml
    root = ET.fromstring(payload)
-
    # build list of possible authors
    possible_authors = []
    for element in root.iter("responseRecord"):

-        author = {}
-        author["uri"] = element.find(".//isniURI").text
-        # NOTE: this will often be incorrect, many naming systems 
-        # list "surname" before personal name
        personal_name = element.find(".//forename/..")
-        description = element.find(".//nameTitle")
-        if personal_name:
-            forename = personal_name.find(".//forename")
-            surname = personal_name.find(".//surname")
-            author["name"] = forename.text + " " + surname.text
-            if description is not None:
-                author["description"] = description.text
+        bio = element.find(".//nameTitle")

-            possible_authors.append(author)
+        if not personal_name:
+            continue
+
+        author = {}
+        author["isni"] = element.find(".//isniUnformatted").text
+        author["uri"] = element.find(".//isniURI").text
+        author["name"] = make_name_string(personal_name)
+        if bio is not None:
+            author["bio"] = bio.text
+        possible_authors.append(author)

    return possible_authors
+
+def get_author_isni_data(isni):
+
+    payload = request_isni_data("pica.isn", isni)
+    # parse xml
+    root = ET.fromstring(payload)
+    # there should only be a single responseRecord
+    # but let's use the first one just in case
+    element = root.find(".//responseRecord")
+    personal_name = element.find(".//forename/..")
+    bio = element.find(".//nameTitle")
+    author = {}
+    author["isni"] = isni
+    author["name"] = make_name_string(personal_name)
+    author["viaf_id"] = get_other_identifier(element, "viaf")
+    author["wikipedia_link"] = get_external_information_uri(element, "Wikipedia")
+    author["bio"] = bio.text if bio is not None else ""
+    author["aliases"] = [] # CHECK can we send a list for this?
+    aliases = element.findall(".//personalNameVariant")
+    for entry in aliases:
+        author["aliases"].append( make_name_string(entry) )
+
+    return author
--- a/bookwyrm/views/books/edit_book.py
+++ b/bookwyrm/views/books/edit_book.py
@ -11,7 +11,7 @@ from django.utils.decorators import method_decorator
 from django.views import View

 from bookwyrm import book_search, forms, models
-from bookwyrm.utils.isni import find_authors_by_name
+from bookwyrm.utils.isni import find_authors_by_name, get_author_isni_data
 from bookwyrm.views.helpers import get_edition
 from .books import set_cover_from_url

@ -71,6 +71,8 @@ class EditBook(View):
                        ),  # find matches from ISNI API
                    }
                )
+                # TODO: check if an isni record matches an existing record
+                # to bring these two records together

        # we're creating a new book
        if not book:
@ -152,8 +154,13 @@ class ConfirmEditBook(View):
                        models.Author, id=request.POST[f"author_match-{i}"]
                    )
                except ValueError:
-                    # otherwise it's a name
-                    author = models.Author.objects.create(name=match)
+                    # otherwise it's a name with or without isni id
+                    isni = request.POST.get(f"isni_match-{i}")
+                    author_data = (
+                        get_author_isni_data(isni) if isni is not None 
+                        else {"name": match}
+                    )
+                    author = models.Author.objects.create(**author_data)
                book.authors.add(author)

            # create work, if needed