mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-16 13:16:33 +00:00
populate new authors with isni data
If a user selects an author pulled from the ISNI service when editing a book, use any relevant data from ISNI to populate the new author record. This includes - bio - aliases - isni - wikipedia url - viaf
This commit is contained in:
parent
45158a1c03
commit
bce5f3f3b5
3 changed files with 119 additions and 47 deletions
|
@ -50,39 +50,33 @@
|
||||||
<input type="hidden" name="author-match-count" value="{{ author_matches|length }}">
|
<input type="hidden" name="author-match-count" value="{{ author_matches|length }}">
|
||||||
<div class="column is-half">
|
<div class="column is-half">
|
||||||
{% for author in author_matches %}
|
{% for author in author_matches %}
|
||||||
{% if author.isni_matches %}
|
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<legend class="title is-5 mb-1">
|
<legend class="title is-5 mb-1">
|
||||||
{% blocktrans with name=author.name %}Is "{{ name }}" one of these?{% endblocktrans %}
|
{% blocktrans with name=author.name %}Is "{{ name }}" one of these authors?{% endblocktrans %}
|
||||||
</legend>
|
</legend>
|
||||||
{% with forloop.counter0 as counter %}
|
{% with forloop.counter0 as counter %}
|
||||||
{% for match in author.isni_matches %}
|
{% if author.isni_matches %}
|
||||||
<label class="label mb-2">
|
{% for isni_match in author.isni_matches %}
|
||||||
<input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required>
|
<label class="label mt-2">
|
||||||
{{ match.name }}
|
<input type="radio" name="author_match-{{ counter }}" value="{{ isni_match.name }}" required>
|
||||||
<span class="help">
|
{{ isni_match.name }}
|
||||||
<a href="{{ match.uri }}" target="_blank"><em>{{ match.description }}</em></a>
|
|
||||||
</span>
|
|
||||||
</label>
|
</label>
|
||||||
|
<p class="help ml-5 mb-2">
|
||||||
|
<a href="{{ match.uri }}" target="_blank">{{ isni_match.bio }}</a>
|
||||||
|
</p>
|
||||||
|
<input type="text" name="isni_match-{{ counter }}" value="{{ isni_match.isni }}" hidden>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endwith %}
|
{% endif %}
|
||||||
</fieldset>
|
|
||||||
{% endif %}
|
|
||||||
<fieldset>
|
|
||||||
<legend class="title is-5 mb-1">
|
|
||||||
{% blocktrans with name=author.name %}Is "{{ name }}" an existing author?{% endblocktrans %}
|
|
||||||
</legend>
|
|
||||||
{% with forloop.counter0 as counter %}
|
|
||||||
{% for match in author.matches %}
|
{% for match in author.matches %}
|
||||||
<label class="label mb-2">
|
<label class="label">
|
||||||
<input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required>
|
<input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required>
|
||||||
{{ match.name }}
|
{{ match.name }}
|
||||||
</label>
|
</label>
|
||||||
<p class="help">
|
<p class="help ml-5 mb-2">
|
||||||
<a href="{{ match.local_path }}" target="_blank">{% blocktrans with book_title=match.book_set.first.title %}Author of <em>{{ book_title }}</em>{% endblocktrans %}</a>
|
<a href="{{ match.local_path }}" target="_blank">{% blocktrans with book_title=match.book_set.first.title %}Author of <em>{{ book_title }}</em>{% endblocktrans %}</a>
|
||||||
</p>
|
</p>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
<label class="label">
|
<label class="label mt-2">
|
||||||
<input type="radio" name="author_match-{{ counter }}" value="{{ author.name }}" required> {% trans "This is a new author" %}
|
<input type="radio" name="author_match-{{ counter }}" value="{{ author.name }}" required> {% trans "This is a new author" %}
|
||||||
</label>
|
</label>
|
||||||
{% endwith %}
|
{% endwith %}
|
||||||
|
|
|
@ -2,46 +2,117 @@
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
# get data
|
from bookwyrm.settings import BASE_DIR
|
||||||
BASE_STRING = "http://isni.oclc.org/sru/?query=pica.na+%3D+%22"
|
|
||||||
#pylint: disable=line-too-long
|
|
||||||
SUFFIX_STRING = "%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b&maximumRecords=10&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C"
|
|
||||||
|
|
||||||
|
|
||||||
def url_stringify(string):
|
def url_stringify(string):
|
||||||
"""replace spaces for url encoding"""
|
"""replace spaces for url encoding"""
|
||||||
|
|
||||||
|
# TODO: this is very lazy and incomplete
|
||||||
return string.replace(" ", "+")
|
return string.replace(" ", "+")
|
||||||
|
|
||||||
|
def request_isni_data(search_index, search_term, maxRecords=10):
|
||||||
|
"""Request data from the ISNI API"""
|
||||||
|
|
||||||
def find_authors_by_name(names):
|
search_string = url_stringify(search_term)
|
||||||
"""Query the ISNI database for an author"""
|
query_parts = [
|
||||||
names = url_stringify(names)
|
"http://isni.oclc.org/sru/?query=",
|
||||||
query = BASE_STRING + names + SUFFIX_STRING
|
search_index,
|
||||||
result = requests.get(query)
|
"+%3D+%22",
|
||||||
|
search_string,
|
||||||
|
"%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b",
|
||||||
|
"&maximumRecords=",
|
||||||
|
str(maxRecords),
|
||||||
|
"&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C"
|
||||||
|
]
|
||||||
|
query_url = "".join(query_parts)
|
||||||
|
result = requests.get(query_url)
|
||||||
# the OCLC ISNI server asserts the payload is encoded
|
# the OCLC ISNI server asserts the payload is encoded
|
||||||
# in latin1, but we know better
|
# in latin1, but we know better
|
||||||
result.encoding = "utf-8"
|
result.encoding = "utf-8"
|
||||||
payload = result.text
|
return result.text
|
||||||
|
|
||||||
|
def make_name_string(element):
|
||||||
|
"""create a string of form 'personal_name surname'"""
|
||||||
|
|
||||||
|
# NOTE: this will often be incorrect, many naming systems
|
||||||
|
# list "surname" before personal name
|
||||||
|
forename = element.find(".//forename")
|
||||||
|
surname = element.find(".//surname")
|
||||||
|
if forename is not None:
|
||||||
|
return "".join([forename.text," ",surname.text])
|
||||||
|
return surname.text
|
||||||
|
|
||||||
|
def get_other_identifier(element, code):
|
||||||
|
"""Get other identifiers associated with an author from their ISNI record"""
|
||||||
|
|
||||||
|
identifiers = element.findall(".//otherIdentifierOfIdentity")
|
||||||
|
for section_head in identifiers:
|
||||||
|
if (
|
||||||
|
section_head.find(".//type") is not None
|
||||||
|
and section_head.find(".//type").text == code
|
||||||
|
and section_head.find(".//identifier") is not None
|
||||||
|
):
|
||||||
|
return section_head.find(".//identifier").text
|
||||||
|
return
|
||||||
|
|
||||||
|
def get_external_information_uri(element, match_string):
|
||||||
|
"""Get URLs associated with an author from their ISNI record"""
|
||||||
|
|
||||||
|
sources = element.findall(".//externalInformation")
|
||||||
|
for source in sources:
|
||||||
|
uri = source.find(".//URI")
|
||||||
|
if (
|
||||||
|
uri is not None
|
||||||
|
and uri.text.find(match_string) is not None
|
||||||
|
):
|
||||||
|
return uri.text
|
||||||
|
return
|
||||||
|
|
||||||
|
def find_authors_by_name(name_string):
|
||||||
|
"""Query the ISNI database for possible author matches by name"""
|
||||||
|
|
||||||
|
payload = request_isni_data("pica.na", name_string)
|
||||||
# parse xml
|
# parse xml
|
||||||
root = ET.fromstring(payload)
|
root = ET.fromstring(payload)
|
||||||
|
|
||||||
# build list of possible authors
|
# build list of possible authors
|
||||||
possible_authors = []
|
possible_authors = []
|
||||||
for element in root.iter("responseRecord"):
|
for element in root.iter("responseRecord"):
|
||||||
|
|
||||||
author = {}
|
|
||||||
author["uri"] = element.find(".//isniURI").text
|
|
||||||
# NOTE: this will often be incorrect, many naming systems
|
|
||||||
# list "surname" before personal name
|
|
||||||
personal_name = element.find(".//forename/..")
|
personal_name = element.find(".//forename/..")
|
||||||
description = element.find(".//nameTitle")
|
bio = element.find(".//nameTitle")
|
||||||
if personal_name:
|
|
||||||
forename = personal_name.find(".//forename")
|
|
||||||
surname = personal_name.find(".//surname")
|
|
||||||
author["name"] = forename.text + " " + surname.text
|
|
||||||
if description is not None:
|
|
||||||
author["description"] = description.text
|
|
||||||
|
|
||||||
possible_authors.append(author)
|
if not personal_name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
author = {}
|
||||||
|
author["isni"] = element.find(".//isniUnformatted").text
|
||||||
|
author["uri"] = element.find(".//isniURI").text
|
||||||
|
author["name"] = make_name_string(personal_name)
|
||||||
|
if bio is not None:
|
||||||
|
author["bio"] = bio.text
|
||||||
|
possible_authors.append(author)
|
||||||
|
|
||||||
return possible_authors
|
return possible_authors
|
||||||
|
|
||||||
|
def get_author_isni_data(isni):
|
||||||
|
|
||||||
|
payload = request_isni_data("pica.isn", isni)
|
||||||
|
# parse xml
|
||||||
|
root = ET.fromstring(payload)
|
||||||
|
# there should only be a single responseRecord
|
||||||
|
# but let's use the first one just in case
|
||||||
|
element = root.find(".//responseRecord")
|
||||||
|
personal_name = element.find(".//forename/..")
|
||||||
|
bio = element.find(".//nameTitle")
|
||||||
|
author = {}
|
||||||
|
author["isni"] = isni
|
||||||
|
author["name"] = make_name_string(personal_name)
|
||||||
|
author["viaf_id"] = get_other_identifier(element, "viaf")
|
||||||
|
author["wikipedia_link"] = get_external_information_uri(element, "Wikipedia")
|
||||||
|
author["bio"] = bio.text if bio is not None else ""
|
||||||
|
author["aliases"] = [] # CHECK can we send a list for this?
|
||||||
|
aliases = element.findall(".//personalNameVariant")
|
||||||
|
for entry in aliases:
|
||||||
|
author["aliases"].append( make_name_string(entry) )
|
||||||
|
|
||||||
|
return author
|
||||||
|
|
|
@ -11,7 +11,7 @@ from django.utils.decorators import method_decorator
|
||||||
from django.views import View
|
from django.views import View
|
||||||
|
|
||||||
from bookwyrm import book_search, forms, models
|
from bookwyrm import book_search, forms, models
|
||||||
from bookwyrm.utils.isni import find_authors_by_name
|
from bookwyrm.utils.isni import find_authors_by_name, get_author_isni_data
|
||||||
from bookwyrm.views.helpers import get_edition
|
from bookwyrm.views.helpers import get_edition
|
||||||
from .books import set_cover_from_url
|
from .books import set_cover_from_url
|
||||||
|
|
||||||
|
@ -71,6 +71,8 @@ class EditBook(View):
|
||||||
), # find matches from ISNI API
|
), # find matches from ISNI API
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
# TODO: check if an isni record matches an existing record
|
||||||
|
# to bring these two records together
|
||||||
|
|
||||||
# we're creating a new book
|
# we're creating a new book
|
||||||
if not book:
|
if not book:
|
||||||
|
@ -152,8 +154,13 @@ class ConfirmEditBook(View):
|
||||||
models.Author, id=request.POST[f"author_match-{i}"]
|
models.Author, id=request.POST[f"author_match-{i}"]
|
||||||
)
|
)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# otherwise it's a name
|
# otherwise it's a name with or without isni id
|
||||||
author = models.Author.objects.create(name=match)
|
isni = request.POST.get(f"isni_match-{i}")
|
||||||
|
author_data = (
|
||||||
|
get_author_isni_data(isni) if isni is not None
|
||||||
|
else {"name": match}
|
||||||
|
)
|
||||||
|
author = models.Author.objects.create(**author_data)
|
||||||
book.authors.add(author)
|
book.authors.add(author)
|
||||||
|
|
||||||
# create work, if needed
|
# create work, if needed
|
||||||
|
|
Loading…
Reference in a new issue