use activitypub dataclass for isni authors

- add timeout to isni API call
- use activitypub.Author dataclass instead of bespoke dict
- display isni authors as "Author of" first title in ISNI record if possible
- sensible fallbacks if title info unavailable in isni record
This commit is contained in:
Hugh Rundle 2021-11-21 19:55:55 +11:00
parent 8658e36ca8
commit 1e6e4b0f8d
No known key found for this signature in database
GPG key ID: CD23D6039184286B
3 changed files with 100 additions and 44 deletions

View file

@ -59,11 +59,11 @@
{% if author.isni_matches %} {% if author.isni_matches %}
{% for isni_match in author.isni_matches %} {% for isni_match in author.isni_matches %}
<label class="label mt-2"> <label class="label mt-2">
<input type="radio" name="author_match-{{ counter }}" value="isni_match_{{ isni_match.isni }}" required> <input type="radio" name="author_match-{{ counter }}" value="isni_match_{{ isni_match.author.isni }}" required>
{{ isni_match.name }} {{ isni_match.author.name }}
</label> </label>
<p class="help ml-5 mb-2"> <p class="help ml-5 mb-2">
<a href="{{ isni_match.uri }}" target="_blank" rel="noopener noreferrer">{{ isni_match.bio }}</a> <a href="{{ isni_match.author.id }}" target="_blank" rel="noopener noreferrer">{{ isni_match.description }}</a>
</p> </p>
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -1,7 +1,11 @@
"""ISNI author checking utilities""" """ISNI author checking utilities"""
from typing import Set
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import requests import requests
from django.utils.safestring import mark_safe
from bookwyrm import activitypub, models
def request_isni_data(search_index, search_term, max_records=5): def request_isni_data(search_index, search_term, max_records=5):
"""Request data from the ISNI API""" """Request data from the ISNI API"""
@ -17,7 +21,11 @@ def request_isni_data(search_index, search_term, max_records=5):
"recordPacking": "xml", "recordPacking": "xml",
"sortKeys": "RLV,pica,0,,", "sortKeys": "RLV,pica,0,,",
} }
result = requests.get("http://isni.oclc.org/sru/", params=query_params) result = requests.get(
"http://isni.oclc.org/sru/",
params=query_params,
timeout=10
)
# the OCLC ISNI server asserts the payload is encoded # the OCLC ISNI server asserts the payload is encoded
# in latin1, but we know better # in latin1, but we know better
result.encoding = "utf-8" result.encoding = "utf-8"
@ -47,6 +55,18 @@ def get_other_identifier(element, code):
and section_head.find(".//identifier") is not None and section_head.find(".//identifier") is not None
): ):
return section_head.find(".//identifier").text return section_head.find(".//identifier").text
# if we can't find it in otherIdentifierOfIdentity,
# try sources
for source in element.findall(".//sources"):
code_of_source = source.find(".//codeOfSource")
if (
code_of_source is not None
and code_of_source.text == code.upper()
or code_of_source.text == code.lower()
):
return source.find(".//sourceIdentifier").text
return "" return ""
@ -55,8 +75,13 @@ def get_external_information_uri(element, match_string):
sources = element.findall(".//externalInformation") sources = element.findall(".//externalInformation")
for source in sources: for source in sources:
information = source.find(".//information")
uri = source.find(".//URI") uri = source.find(".//URI")
if uri is not None and uri.text.find(match_string) is not None: if (
uri is not None
and information is not None
and information.text.lower() == match_string.lower()
):
return uri.text return uri.text
return "" return ""
@ -78,17 +103,29 @@ def find_authors_by_name(name_string):
continue continue
author = {} author = {}
author["isni"] = element.find(".//isniUnformatted").text author["author"] = get_author_from_isni(element.find(".//isniUnformatted").text)
author["uri"] = element.find(".//isniURI").text titles = element.findall(".//title")
author["name"] = make_name_string(personal_name) if titles:
if bio is not None: title_element = [e for e in titles if not e.text.replace('@', '').isnumeric()][0]
author["bio"] = bio.text title = (
title_element.text.replace('@', '')
if titles is not None
and title_element is not None
and len(title_element.text) > 4
else None
)
author["description"] = (
mark_safe(f"Author of <em>{title}</em>") if title is not None
else bio.text if bio is not None
else "More information at isni.org"
)
possible_authors.append(author) possible_authors.append(author)
return possible_authors return possible_authors
def get_author_isni_data(isni): def get_author_from_isni(isni):
"""Find data to populate a new author record from their ISNI""" """Find data to populate a new author record from their ISNI"""
payload = request_isni_data("pica.isn", isni) payload = request_isni_data("pica.isn", isni)
@ -97,48 +134,57 @@ def get_author_isni_data(isni):
# there should only be a single responseRecord # there should only be a single responseRecord
# but let's use the first one just in case # but let's use the first one just in case
element = root.find(".//responseRecord") element = root.find(".//responseRecord")
personal_name = element.find(".//forename/..") name = make_name_string(element.find(".//forename/.."))
viaf = get_other_identifier(element, "viaf")
# use a set to dedupe aliases in ISNI
aliases = set()
aliases_element = element.findall(".//personalNameVariant")
for entry in aliases_element:
aliases.add(make_name_string(entry))
# aliases needs to be list not set
aliases = list(aliases)
bio = element.find(".//nameTitle") bio = element.find(".//nameTitle")
author = {} bio = bio.text if bio is not None else ""
author["isni"] = isni wikipedia = get_external_information_uri(element, "Wikipedia")
author["name"] = make_name_string(personal_name)
author["viaf_id"] = get_other_identifier(element, "viaf") author = activitypub.Author(
author["wikipedia_link"] = get_external_information_uri(element, "Wikipedia") id=element.find(".//isniURI").text,
author["bio"] = bio.text if bio is not None else "" name=name,
author["aliases"] = [] isni=isni,
aliases = element.findall(".//personalNameVariant") viaf_id=viaf,
for entry in aliases: aliases=aliases,
author["aliases"].append(make_name_string(entry)) bio=bio,
# dedupe aliases wikipedia_link=wikipedia
author["aliases"] = list(set(author["aliases"])) )
return author return author
def build_author_from_isni(match_value):
def build_author_dict(match_value):
"""Build dict with basic author details from ISNI or author name""" """Build dict with basic author details from ISNI or author name"""
# if it is an isni value get the data # if it is an isni value get the data
if match_value.startswith("isni_match_"): if match_value.startswith("isni_match_"):
isni = match_value.replace("isni_match_", "") isni = match_value.replace("isni_match_", "")
return get_author_isni_data(isni) print("returning author dict")
return { "author": get_author_from_isni(isni) }
# otherwise it's a name string # otherwise it's a name string
return {"name": match_value} print("returning empty dict")
return {}
def augment_author_metadata(author, isni): def augment_author_metadata(author, isni):
"""Update any missing author fields from ISNI data""" """Update any missing author fields from ISNI data"""
isni_data = get_author_isni_data(isni)
author.viaf_id = ( isni_author = get_author_from_isni(isni)
isni_data["viaf_id"] if len(author.viaf_id) == 0 else author.viaf_id isni_author.to_model(model=models.Author, instance=author, overwrite=False)
)
author.wikipedia_link = ( # we DO want to overwrite aliases because we're adding them to the
isni_data["wikipedia_link"] # existing aliases and ISNI will usually have more.
if len(author.wikipedia_link) == 0 # We need to dedupe because ISNI has lots of dupe aliases
else author.wikipedia_link aliases = set(isni_author["aliases"])
)
author.bio = isni_data["bio"] if len(author.bio) == 0 else author.bio
aliases = set(isni_data["aliases"])
for alias in author.aliases: for alias in author.aliases:
aliases.add(alias) aliases.add(alias)
author.aliases = list(aliases) author.aliases = list(aliases)
author.save() author.save()
return

View file

@ -12,9 +12,10 @@ from django.utils.decorators import method_decorator
from django.views import View from django.views import View
from bookwyrm import book_search, forms, models from bookwyrm import book_search, forms, models
# from bookwyrm.activitypub.base_activity import ActivityObject
from bookwyrm.utils.isni import ( from bookwyrm.utils.isni import (
find_authors_by_name, find_authors_by_name,
build_author_dict, build_author_from_isni,
augment_author_metadata, augment_author_metadata,
) )
from bookwyrm.views.helpers import get_edition from bookwyrm.views.helpers import get_edition
@ -79,7 +80,7 @@ class EditBook(View):
i i
for i in isni_authors for i in isni_authors
for a in author_matches for a in author_matches
if sub(r"\D", "", str(i["isni"])) == sub(r"\D", "", str(a.isni)) if sub(r"\D", "", str(i["author"].isni)) == sub(r"\D", "", str(a.isni))
] ]
# pylint: disable=cell-var-from-loop # pylint: disable=cell-var-from-loop
@ -179,9 +180,18 @@ class ConfirmEditBook(View):
if isni is not None: if isni is not None:
augment_author_metadata(author, isni) augment_author_metadata(author, isni)
except ValueError: except ValueError:
# otherwise it's a name with or without isni id # otherwise it's a new author
author_data = build_author_dict(match) # with isni id
author = models.Author.objects.create(**author_data) isni_match = request.POST.get(f"author_match-{i}")
author_object = build_author_from_isni(isni_match)
if "author" in author_object:
author = author_object["author"].to_model(
model=models.Author,
overwrite=False
)
else:
# or it's a name
author = models.Author.objects.create(name=match)
book.authors.add(author) book.authors.add(author)
# create work, if needed # create work, if needed