refactor isni author enrichment

- use activitypub.Author instead of custom dict
- prefer to display "Author of [title]" from ISNI data instead of short description
- merge isni and db authors for edit_book display
- fix edit_book template to use changed data
- rename some functions in utils/isni.py
This commit is contained in:
Hugh Rundle 2021-11-22 08:49:22 +11:00
parent e2836d468d
commit fd2ade2313
No known key found for this signature in database
GPG key ID: CD23D6039184286B
4 changed files with 50 additions and 53 deletions

View file

@ -56,24 +56,19 @@
{% blocktrans with name=author.name %}Is "{{ name }}" one of these authors?{% endblocktrans %} {% blocktrans with name=author.name %}Is "{{ name }}" one of these authors?{% endblocktrans %}
</legend> </legend>
{% with forloop.counter0 as counter %} {% with forloop.counter0 as counter %}
{% if author.isni_matches %}
{% for isni_match in author.isni_matches %}
<label class="label mt-2">
<input type="radio" name="author_match-{{ counter }}" value="isni_match_{{ isni_match.author.isni }}" required>
{{ isni_match.author.name }}
</label>
<p class="help ml-5 mb-2">
<a href="{{ isni_match.author.id }}" target="_blank" rel="noopener noreferrer">{{ isni_match.description }}</a>
</p>
{% endfor %}
{% endif %}
{% for match in author.matches %} {% for match in author.matches %}
<label class="label"> <label class="label">
<input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required> <input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required>
{{ match.name }} {{ match.name }}
</label> </label>
<p class="help ml-5 mb-2"> <p class="help ml-5 mb-2">
<a href="{{ match.local_path }}" target="_blank">{% blocktrans with book_title=match.book_set.first.title %}Author of <em>{{ book_title }}</em>{% endblocktrans %}</a> {% with book_title=match.book_set.first.title alt_title=match.bio %}
{% if book_title %}
<a href="{{ match.local_path }}" target="_blank">{% trans "Author of " %}<em>{{ book_title }}</em></a>
{% else %}
<a href="{{ match.id }}" target="_blank">{% trans "Author of " %}<em>{{ alt_title }}</em></a>
{% endif %}
{% endwith %}
</p> </p>
<p class="help ml-5"> <p class="help ml-5">
{{ author.existing_isnis|get_isni_bio:match }} {{ author.existing_isnis|get_isni_bio:match }}

View file

@ -78,8 +78,8 @@ def get_isni_bio(existing, author):
if len(existing) == 0: if len(existing) == 0:
return "" return ""
for value in existing: for value in existing:
if "bio" in value and auth_isni == re.sub(r"\D", "", str(value["isni"])): if hasattr(value, "bio") and auth_isni == re.sub(r"\D", "", str(value.isni)):
return value["bio"] return mark_safe(f"Author of <em>{value.bio}</em>")
return "" return ""
@ -92,8 +92,8 @@ def get_isni(existing, author, autoescape=True):
if len(existing) == 0: if len(existing) == 0:
return "" return ""
for value in existing: for value in existing:
if "isni" in value and auth_isni == re.sub(r"\D", "", str(value["isni"])): if hasattr(value, "isni")and auth_isni == re.sub(r"\D", "", str(value.isni)):
isni = value["isni"] isni = value.isni
return mark_safe( return mark_safe(
f'<input type="text" name="isni-for-{author.id}" value="{isni}" hidden>' f'<input type="text" name="isni-for-{author.id}" value="{isni}" hidden>'
) )

View file

@ -1,10 +1,7 @@
"""ISNI author checking utilities""" """ISNI author checking utilities"""
from typing import Set
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import requests import requests
from django.utils.safestring import mark_safe
from bookwyrm import activitypub, models from bookwyrm import activitypub, models
def request_isni_data(search_index, search_term, max_records=5): def request_isni_data(search_index, search_term, max_records=5):
@ -86,7 +83,7 @@ def get_external_information_uri(element, match_string):
return "" return ""
def find_authors_by_name(name_string): def find_authors_by_name(name_string, description=False):
"""Query the ISNI database for possible author matches by name""" """Query the ISNI database for possible author matches by name"""
payload = request_isni_data("pica.na", name_string) payload = request_isni_data("pica.na", name_string)
@ -102,23 +99,25 @@ def find_authors_by_name(name_string):
if not personal_name: if not personal_name:
continue continue
author = {} author = get_author_from_isni(element.find(".//isniUnformatted").text)
author["author"] = get_author_from_isni(element.find(".//isniUnformatted").text)
titles = element.findall(".//title") if bool(description):
if titles: titles = element.findall(".//title")
title_element = [e for e in titles if not e.text.replace('@', '').isnumeric()][0] if titles:
title = ( # some of the "titles" in ISNI are a little ...iffy
title_element.text.replace('@', '') title_element = [e for e in titles if not e.text.replace('@', '').isnumeric()][0]
if titles is not None title = (
and title_element is not None title_element.text.replace('@', '')
and len(title_element.text) > 4 if titles is not None
else None and title_element is not None
) and len(title_element.text) > 4
author["description"] = ( else None
mark_safe(f"Author of <em>{title}</em>") if title is not None )
else bio.text if bio is not None author.bio = (
else "More information at isni.org" title if title is not None
) else bio.text if bio is not None
else "More information at isni.org"
)
possible_authors.append(author) possible_authors.append(author)
@ -151,10 +150,10 @@ def get_author_from_isni(isni):
id=element.find(".//isniURI").text, id=element.find(".//isniURI").text,
name=name, name=name,
isni=isni, isni=isni,
viaf_id=viaf, viafId=viaf,
aliases=aliases, aliases=aliases,
bio=bio, bio=bio,
wikipedia_link=wikipedia wikipediaLink=wikipedia
) )
return author return author
@ -163,12 +162,10 @@ def build_author_from_isni(match_value):
"""Build dict with basic author details from ISNI or author name""" """Build dict with basic author details from ISNI or author name"""
# if it is an isni value get the data # if it is an isni value get the data
if match_value.startswith("isni_match_"): if match_value.startswith("https://isni.org/isni/"):
isni = match_value.replace("isni_match_", "") isni = match_value.replace("https://isni.org/isni/", "")
print("returning author dict")
return { "author": get_author_from_isni(isni) } return { "author": get_author_from_isni(isni) }
# otherwise it's a name string # otherwise it's a name string
print("returning empty dict")
return {} return {}
@ -180,7 +177,7 @@ def augment_author_metadata(author, isni):
# we DO want to overwrite aliases because we're adding them to the # we DO want to overwrite aliases because we're adding them to the
# existing aliases and ISNI will usually have more. # existing aliases and ISNI will usually have more.
# We need to dedupe because ISNI has lots of dupe aliases # We need to dedupe because ISNI records often have lots of dupe aliases
aliases = set(isni_author["aliases"]) aliases = set(isni_author["aliases"])
for alias in author.aliases: for alias in author.aliases:
aliases.add(alias) aliases.add(alias)

View file

@ -72,26 +72,28 @@ class EditBook(View):
) )
isni_authors = find_authors_by_name( isni_authors = find_authors_by_name(
author author,
description=True
) # find matches from ISNI API ) # find matches from ISNI API
# do not show isni results for authors we already have in the DB # dedupe isni authors we already have in the DB
exists = [ exists = [
i i
for i in isni_authors for i in isni_authors
for a in author_matches for a in author_matches
if sub(r"\D", "", str(i["author"].isni)) == sub(r"\D", "", str(a.isni)) if sub(r"\D", "", str(i.isni)) == sub(r"\D", "", str(a.isni))
] ]
# pylint: disable=cell-var-from-loop # pylint: disable=cell-var-from-loop
isni_matches = list(filter(lambda x: x not in exists, isni_authors)) matches = list(filter(lambda x: x not in exists, isni_authors))
# combine existing and isni authors
matches.extend(author_matches)
data["author_matches"].append( data["author_matches"].append(
{ {
"name": author.strip(), "name": author.strip(),
"matches": author_matches, "matches": matches,
"existing_isnis": exists, "existing_isnis": exists,
"isni_matches": isni_matches,
} }
) )
@ -181,16 +183,19 @@ class ConfirmEditBook(View):
augment_author_metadata(author, isni) augment_author_metadata(author, isni)
except ValueError: except ValueError:
# otherwise it's a new author # otherwise it's a new author
# with isni id
isni_match = request.POST.get(f"author_match-{i}") isni_match = request.POST.get(f"author_match-{i}")
author_object = build_author_from_isni(isni_match) author_object = build_author_from_isni(isni_match)
# with author data class from isni id
if "author" in author_object: if "author" in author_object:
# TESTING
skeleton = models.Author.objects.create(name=author_object["author"].name)
author = author_object["author"].to_model( author = author_object["author"].to_model(
model=models.Author, model=models.Author,
overwrite=False overwrite=True,
instance=skeleton
) )
else: else:
# or it's a name # or it's just a name
author = models.Author.objects.create(name=match) author = models.Author.objects.create(name=match)
book.authors.add(author) book.authors.add(author)