refactor isni author enrichment

- use activitypub.Author instead of custom dict
- prefer to display "Author of [title]" from ISNI data instead of short description
- merge isni and db authors for edit_book display
- fix edit_book template to use changed data
- rename some functions in utils/isni.py
This commit is contained in:
Hugh Rundle 2021-11-22 08:49:22 +11:00
parent e2836d468d
commit fd2ade2313
No known key found for this signature in database
GPG key ID: CD23D6039184286B
4 changed files with 50 additions and 53 deletions

View file

@ -56,24 +56,19 @@
{% blocktrans with name=author.name %}Is "{{ name }}" one of these authors?{% endblocktrans %} {% blocktrans with name=author.name %}Is "{{ name }}" one of these authors?{% endblocktrans %}
</legend> </legend>
{% with forloop.counter0 as counter %} {% with forloop.counter0 as counter %}
{% if author.isni_matches %}
{% for isni_match in author.isni_matches %}
<label class="label mt-2">
<input type="radio" name="author_match-{{ counter }}" value="isni_match_{{ isni_match.author.isni }}" required>
{{ isni_match.author.name }}
</label>
<p class="help ml-5 mb-2">
<a href="{{ isni_match.author.id }}" target="_blank" rel="noopener noreferrer">{{ isni_match.description }}</a>
</p>
{% endfor %}
{% endif %}
{% for match in author.matches %} {% for match in author.matches %}
<label class="label"> <label class="label">
<input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required> <input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required>
{{ match.name }} {{ match.name }}
</label> </label>
<p class="help ml-5 mb-2"> <p class="help ml-5 mb-2">
<a href="{{ match.local_path }}" target="_blank">{% blocktrans with book_title=match.book_set.first.title %}Author of <em>{{ book_title }}</em>{% endblocktrans %}</a> {% with book_title=match.book_set.first.title alt_title=match.bio %}
{% if book_title %}
<a href="{{ match.local_path }}" target="_blank">{% trans "Author of " %}<em>{{ book_title }}</em></a>
{% else %}
<a href="{{ match.id }}" target="_blank">{% trans "Author of " %}<em>{{ alt_title }}</em></a>
{% endif %}
{% endwith %}
</p> </p>
<p class="help ml-5"> <p class="help ml-5">
{{ author.existing_isnis|get_isni_bio:match }} {{ author.existing_isnis|get_isni_bio:match }}

View file

@ -78,8 +78,8 @@ def get_isni_bio(existing, author):
if len(existing) == 0: if len(existing) == 0:
return "" return ""
for value in existing: for value in existing:
if "bio" in value and auth_isni == re.sub(r"\D", "", str(value["isni"])): if hasattr(value, "bio") and auth_isni == re.sub(r"\D", "", str(value.isni)):
return value["bio"] return mark_safe(f"Author of <em>{value.bio}</em>")
return "" return ""
@ -92,8 +92,8 @@ def get_isni(existing, author, autoescape=True):
if len(existing) == 0: if len(existing) == 0:
return "" return ""
for value in existing: for value in existing:
if "isni" in value and auth_isni == re.sub(r"\D", "", str(value["isni"])): if hasattr(value, "isni")and auth_isni == re.sub(r"\D", "", str(value.isni)):
isni = value["isni"] isni = value.isni
return mark_safe( return mark_safe(
f'<input type="text" name="isni-for-{author.id}" value="{isni}" hidden>' f'<input type="text" name="isni-for-{author.id}" value="{isni}" hidden>'
) )

View file

@ -1,10 +1,7 @@
"""ISNI author checking utilities""" """ISNI author checking utilities"""
from typing import Set
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import requests import requests
from django.utils.safestring import mark_safe
from bookwyrm import activitypub, models from bookwyrm import activitypub, models
def request_isni_data(search_index, search_term, max_records=5): def request_isni_data(search_index, search_term, max_records=5):
@ -86,7 +83,7 @@ def get_external_information_uri(element, match_string):
return "" return ""
def find_authors_by_name(name_string): def find_authors_by_name(name_string, description=False):
"""Query the ISNI database for possible author matches by name""" """Query the ISNI database for possible author matches by name"""
payload = request_isni_data("pica.na", name_string) payload = request_isni_data("pica.na", name_string)
@ -102,10 +99,12 @@ def find_authors_by_name(name_string):
if not personal_name: if not personal_name:
continue continue
author = {} author = get_author_from_isni(element.find(".//isniUnformatted").text)
author["author"] = get_author_from_isni(element.find(".//isniUnformatted").text)
if bool(description):
titles = element.findall(".//title") titles = element.findall(".//title")
if titles: if titles:
# some of the "titles" in ISNI are a little ...iffy
title_element = [e for e in titles if not e.text.replace('@', '').isnumeric()][0] title_element = [e for e in titles if not e.text.replace('@', '').isnumeric()][0]
title = ( title = (
title_element.text.replace('@', '') title_element.text.replace('@', '')
@ -114,8 +113,8 @@ def find_authors_by_name(name_string):
and len(title_element.text) > 4 and len(title_element.text) > 4
else None else None
) )
author["description"] = ( author.bio = (
mark_safe(f"Author of <em>{title}</em>") if title is not None title if title is not None
else bio.text if bio is not None else bio.text if bio is not None
else "More information at isni.org" else "More information at isni.org"
) )
@ -151,10 +150,10 @@ def get_author_from_isni(isni):
id=element.find(".//isniURI").text, id=element.find(".//isniURI").text,
name=name, name=name,
isni=isni, isni=isni,
viaf_id=viaf, viafId=viaf,
aliases=aliases, aliases=aliases,
bio=bio, bio=bio,
wikipedia_link=wikipedia wikipediaLink=wikipedia
) )
return author return author
@ -163,12 +162,10 @@ def build_author_from_isni(match_value):
"""Build dict with basic author details from ISNI or author name""" """Build dict with basic author details from ISNI or author name"""
# if it is an isni value get the data # if it is an isni value get the data
if match_value.startswith("isni_match_"): if match_value.startswith("https://isni.org/isni/"):
isni = match_value.replace("isni_match_", "") isni = match_value.replace("https://isni.org/isni/", "")
print("returning author dict")
return { "author": get_author_from_isni(isni) } return { "author": get_author_from_isni(isni) }
# otherwise it's a name string # otherwise it's a name string
print("returning empty dict")
return {} return {}
@ -180,7 +177,7 @@ def augment_author_metadata(author, isni):
# we DO want to overwrite aliases because we're adding them to the # we DO want to overwrite aliases because we're adding them to the
# existing aliases and ISNI will usually have more. # existing aliases and ISNI will usually have more.
# We need to dedupe because ISNI has lots of dupe aliases # We need to dedupe because ISNI records often have lots of dupe aliases
aliases = set(isni_author["aliases"]) aliases = set(isni_author["aliases"])
for alias in author.aliases: for alias in author.aliases:
aliases.add(alias) aliases.add(alias)

View file

@ -72,26 +72,28 @@ class EditBook(View):
) )
isni_authors = find_authors_by_name( isni_authors = find_authors_by_name(
author author,
description=True
) # find matches from ISNI API ) # find matches from ISNI API
# do not show isni results for authors we already have in the DB # dedupe isni authors we already have in the DB
exists = [ exists = [
i i
for i in isni_authors for i in isni_authors
for a in author_matches for a in author_matches
if sub(r"\D", "", str(i["author"].isni)) == sub(r"\D", "", str(a.isni)) if sub(r"\D", "", str(i.isni)) == sub(r"\D", "", str(a.isni))
] ]
# pylint: disable=cell-var-from-loop # pylint: disable=cell-var-from-loop
isni_matches = list(filter(lambda x: x not in exists, isni_authors)) matches = list(filter(lambda x: x not in exists, isni_authors))
# combine existing and isni authors
matches.extend(author_matches)
data["author_matches"].append( data["author_matches"].append(
{ {
"name": author.strip(), "name": author.strip(),
"matches": author_matches, "matches": matches,
"existing_isnis": exists, "existing_isnis": exists,
"isni_matches": isni_matches,
} }
) )
@ -181,16 +183,19 @@ class ConfirmEditBook(View):
augment_author_metadata(author, isni) augment_author_metadata(author, isni)
except ValueError: except ValueError:
# otherwise it's a new author # otherwise it's a new author
# with isni id
isni_match = request.POST.get(f"author_match-{i}") isni_match = request.POST.get(f"author_match-{i}")
author_object = build_author_from_isni(isni_match) author_object = build_author_from_isni(isni_match)
# with author data class from isni id
if "author" in author_object: if "author" in author_object:
# TESTING
skeleton = models.Author.objects.create(name=author_object["author"].name)
author = author_object["author"].to_model( author = author_object["author"].to_model(
model=models.Author, model=models.Author,
overwrite=False overwrite=True,
instance=skeleton
) )
else: else:
# or it's a name # or it's just a name
author = models.Author.objects.create(name=match) author = models.Author.objects.create(name=match)
book.authors.add(author) book.authors.add(author)