refactor isni author enrichment

- use activitypub.Author instead of custom dict
- prefer to display "Author of [title]" from ISNI data instead of short description
- merge isni and db authors for edit_book display
- fix edit_book template to use changed data
- rename some functions in utils/isni.py
This commit is contained in:
Hugh Rundle 2021-11-22 08:49:22 +11:00
parent e2836d468d
commit fd2ade2313
No known key found for this signature in database
GPG key ID: CD23D6039184286B
4 changed files with 50 additions and 53 deletions

View file

@ -56,24 +56,19 @@
{% blocktrans with name=author.name %}Is "{{ name }}" one of these authors?{% endblocktrans %}
</legend>
{% with forloop.counter0 as counter %}
{% if author.isni_matches %}
{% for isni_match in author.isni_matches %}
<label class="label mt-2">
<input type="radio" name="author_match-{{ counter }}" value="isni_match_{{ isni_match.author.isni }}" required>
{{ isni_match.author.name }}
</label>
<p class="help ml-5 mb-2">
<a href="{{ isni_match.author.id }}" target="_blank" rel="noopener noreferrer">{{ isni_match.description }}</a>
</p>
{% endfor %}
{% endif %}
{% for match in author.matches %}
<label class="label">
<input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required>
{{ match.name }}
</label>
<p class="help ml-5 mb-2">
<a href="{{ match.local_path }}" target="_blank">{% blocktrans with book_title=match.book_set.first.title %}Author of <em>{{ book_title }}</em>{% endblocktrans %}</a>
{% with book_title=match.book_set.first.title alt_title=match.bio %}
{% if book_title %}
<a href="{{ match.local_path }}" target="_blank">{% trans "Author of " %}<em>{{ book_title }}</em></a>
{% else %}
<a href="{{ match.id }}" target="_blank">{% trans "Author of " %}<em>{{ alt_title }}</em></a>
{% endif %}
{% endwith %}
</p>
<p class="help ml-5">
{{ author.existing_isnis|get_isni_bio:match }}

View file

@ -78,8 +78,8 @@ def get_isni_bio(existing, author):
if len(existing) == 0:
return ""
for value in existing:
if "bio" in value and auth_isni == re.sub(r"\D", "", str(value["isni"])):
return value["bio"]
if hasattr(value, "bio") and auth_isni == re.sub(r"\D", "", str(value.isni)):
return mark_safe(f"Author of <em>{value.bio}</em>")
return ""
@ -92,8 +92,8 @@ def get_isni(existing, author, autoescape=True):
if len(existing) == 0:
return ""
for value in existing:
if "isni" in value and auth_isni == re.sub(r"\D", "", str(value["isni"])):
isni = value["isni"]
if hasattr(value, "isni")and auth_isni == re.sub(r"\D", "", str(value.isni)):
isni = value.isni
return mark_safe(
f'<input type="text" name="isni-for-{author.id}" value="{isni}" hidden>'
)

View file

@ -1,10 +1,7 @@
"""ISNI author checking utilities"""
from typing import Set
import xml.etree.ElementTree as ET
import requests
from django.utils.safestring import mark_safe
from bookwyrm import activitypub, models
def request_isni_data(search_index, search_term, max_records=5):
@ -86,7 +83,7 @@ def get_external_information_uri(element, match_string):
return ""
def find_authors_by_name(name_string):
def find_authors_by_name(name_string, description=False):
"""Query the ISNI database for possible author matches by name"""
payload = request_isni_data("pica.na", name_string)
@ -102,23 +99,25 @@ def find_authors_by_name(name_string):
if not personal_name:
continue
author = {}
author["author"] = get_author_from_isni(element.find(".//isniUnformatted").text)
titles = element.findall(".//title")
if titles:
title_element = [e for e in titles if not e.text.replace('@', '').isnumeric()][0]
title = (
title_element.text.replace('@', '')
if titles is not None
and title_element is not None
and len(title_element.text) > 4
else None
)
author["description"] = (
mark_safe(f"Author of <em>{title}</em>") if title is not None
else bio.text if bio is not None
else "More information at isni.org"
)
author = get_author_from_isni(element.find(".//isniUnformatted").text)
if bool(description):
titles = element.findall(".//title")
if titles:
# some of the "titles" in ISNI are a little ...iffy
title_element = [e for e in titles if not e.text.replace('@', '').isnumeric()][0]
title = (
title_element.text.replace('@', '')
if titles is not None
and title_element is not None
and len(title_element.text) > 4
else None
)
author.bio = (
title if title is not None
else bio.text if bio is not None
else "More information at isni.org"
)
possible_authors.append(author)
@ -151,10 +150,10 @@ def get_author_from_isni(isni):
id=element.find(".//isniURI").text,
name=name,
isni=isni,
viaf_id=viaf,
viafId=viaf,
aliases=aliases,
bio=bio,
wikipedia_link=wikipedia
wikipediaLink=wikipedia
)
return author
@ -163,12 +162,10 @@ def build_author_from_isni(match_value):
"""Build dict with basic author details from ISNI or author name"""
# if it is an isni value get the data
if match_value.startswith("isni_match_"):
isni = match_value.replace("isni_match_", "")
print("returning author dict")
if match_value.startswith("https://isni.org/isni/"):
isni = match_value.replace("https://isni.org/isni/", "")
return { "author": get_author_from_isni(isni) }
# otherwise it's a name string
print("returning empty dict")
return {}
@ -180,7 +177,7 @@ def augment_author_metadata(author, isni):
# we DO want to overwrite aliases because we're adding them to the
# existing aliases and ISNI will usually have more.
# We need to dedupe because ISNI has lots of dupe aliases
# We need to dedupe because ISNI records often have lots of dupe aliases
aliases = set(isni_author["aliases"])
for alias in author.aliases:
aliases.add(alias)

View file

@ -72,26 +72,28 @@ class EditBook(View):
)
isni_authors = find_authors_by_name(
author
author,
description=True
) # find matches from ISNI API
# do not show isni results for authors we already have in the DB
# dedupe isni authors we already have in the DB
exists = [
i
for i in isni_authors
for a in author_matches
if sub(r"\D", "", str(i["author"].isni)) == sub(r"\D", "", str(a.isni))
if sub(r"\D", "", str(i.isni)) == sub(r"\D", "", str(a.isni))
]
# pylint: disable=cell-var-from-loop
isni_matches = list(filter(lambda x: x not in exists, isni_authors))
matches = list(filter(lambda x: x not in exists, isni_authors))
# combine existing and isni authors
matches.extend(author_matches)
data["author_matches"].append(
{
"name": author.strip(),
"matches": author_matches,
"matches": matches,
"existing_isnis": exists,
"isni_matches": isni_matches,
}
)
@ -181,16 +183,19 @@ class ConfirmEditBook(View):
augment_author_metadata(author, isni)
except ValueError:
# otherwise it's a new author
# with isni id
isni_match = request.POST.get(f"author_match-{i}")
author_object = build_author_from_isni(isni_match)
# with author data class from isni id
if "author" in author_object:
# TESTING
skeleton = models.Author.objects.create(name=author_object["author"].name)
author = author_object["author"].to_model(
model=models.Author,
overwrite=False
overwrite=True,
instance=skeleton
)
else:
# or it's a name
# or it's just a name
author = models.Author.objects.create(name=match)
book.authors.add(author)