mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-28 04:21:07 +00:00
refactor isni author enrichment
- use activitypub.Author instead of custom dict - prefer to display "Author of [title]" from ISNI data instead of short description - merge isni and db authors for edit_book display - fix edit_book template to use changed data - rename some functions in utils/isni.py
This commit is contained in:
parent
e2836d468d
commit
fd2ade2313
4 changed files with 50 additions and 53 deletions
|
@ -56,24 +56,19 @@
|
||||||
{% blocktrans with name=author.name %}Is "{{ name }}" one of these authors?{% endblocktrans %}
|
{% blocktrans with name=author.name %}Is "{{ name }}" one of these authors?{% endblocktrans %}
|
||||||
</legend>
|
</legend>
|
||||||
{% with forloop.counter0 as counter %}
|
{% with forloop.counter0 as counter %}
|
||||||
{% if author.isni_matches %}
|
|
||||||
{% for isni_match in author.isni_matches %}
|
|
||||||
<label class="label mt-2">
|
|
||||||
<input type="radio" name="author_match-{{ counter }}" value="isni_match_{{ isni_match.author.isni }}" required>
|
|
||||||
{{ isni_match.author.name }}
|
|
||||||
</label>
|
|
||||||
<p class="help ml-5 mb-2">
|
|
||||||
<a href="{{ isni_match.author.id }}" target="_blank" rel="noopener noreferrer">{{ isni_match.description }}</a>
|
|
||||||
</p>
|
|
||||||
{% endfor %}
|
|
||||||
{% endif %}
|
|
||||||
{% for match in author.matches %}
|
{% for match in author.matches %}
|
||||||
<label class="label">
|
<label class="label">
|
||||||
<input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required>
|
<input type="radio" name="author_match-{{ counter }}" value="{{ match.id }}" required>
|
||||||
{{ match.name }}
|
{{ match.name }}
|
||||||
</label>
|
</label>
|
||||||
<p class="help ml-5 mb-2">
|
<p class="help ml-5 mb-2">
|
||||||
<a href="{{ match.local_path }}" target="_blank">{% blocktrans with book_title=match.book_set.first.title %}Author of <em>{{ book_title }}</em>{% endblocktrans %}</a>
|
{% with book_title=match.book_set.first.title alt_title=match.bio %}
|
||||||
|
{% if book_title %}
|
||||||
|
<a href="{{ match.local_path }}" target="_blank">{% trans "Author of " %}<em>{{ book_title }}</em></a>
|
||||||
|
{% else %}
|
||||||
|
<a href="{{ match.id }}" target="_blank">{% trans "Author of " %}<em>{{ alt_title }}</em></a>
|
||||||
|
{% endif %}
|
||||||
|
{% endwith %}
|
||||||
</p>
|
</p>
|
||||||
<p class="help ml-5">
|
<p class="help ml-5">
|
||||||
{{ author.existing_isnis|get_isni_bio:match }}
|
{{ author.existing_isnis|get_isni_bio:match }}
|
||||||
|
|
|
@ -78,8 +78,8 @@ def get_isni_bio(existing, author):
|
||||||
if len(existing) == 0:
|
if len(existing) == 0:
|
||||||
return ""
|
return ""
|
||||||
for value in existing:
|
for value in existing:
|
||||||
if "bio" in value and auth_isni == re.sub(r"\D", "", str(value["isni"])):
|
if hasattr(value, "bio") and auth_isni == re.sub(r"\D", "", str(value.isni)):
|
||||||
return value["bio"]
|
return mark_safe(f"Author of <em>{value.bio}</em>")
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
@ -92,8 +92,8 @@ def get_isni(existing, author, autoescape=True):
|
||||||
if len(existing) == 0:
|
if len(existing) == 0:
|
||||||
return ""
|
return ""
|
||||||
for value in existing:
|
for value in existing:
|
||||||
if "isni" in value and auth_isni == re.sub(r"\D", "", str(value["isni"])):
|
if hasattr(value, "isni")and auth_isni == re.sub(r"\D", "", str(value.isni)):
|
||||||
isni = value["isni"]
|
isni = value.isni
|
||||||
return mark_safe(
|
return mark_safe(
|
||||||
f'<input type="text" name="isni-for-{author.id}" value="{isni}" hidden>'
|
f'<input type="text" name="isni-for-{author.id}" value="{isni}" hidden>'
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
"""ISNI author checking utilities"""
|
"""ISNI author checking utilities"""
|
||||||
from typing import Set
|
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from django.utils.safestring import mark_safe
|
|
||||||
|
|
||||||
from bookwyrm import activitypub, models
|
from bookwyrm import activitypub, models
|
||||||
|
|
||||||
def request_isni_data(search_index, search_term, max_records=5):
|
def request_isni_data(search_index, search_term, max_records=5):
|
||||||
|
@ -86,7 +83,7 @@ def get_external_information_uri(element, match_string):
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def find_authors_by_name(name_string):
|
def find_authors_by_name(name_string, description=False):
|
||||||
"""Query the ISNI database for possible author matches by name"""
|
"""Query the ISNI database for possible author matches by name"""
|
||||||
|
|
||||||
payload = request_isni_data("pica.na", name_string)
|
payload = request_isni_data("pica.na", name_string)
|
||||||
|
@ -102,23 +99,25 @@ def find_authors_by_name(name_string):
|
||||||
if not personal_name:
|
if not personal_name:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
author = {}
|
author = get_author_from_isni(element.find(".//isniUnformatted").text)
|
||||||
author["author"] = get_author_from_isni(element.find(".//isniUnformatted").text)
|
|
||||||
titles = element.findall(".//title")
|
if bool(description):
|
||||||
if titles:
|
titles = element.findall(".//title")
|
||||||
title_element = [e for e in titles if not e.text.replace('@', '').isnumeric()][0]
|
if titles:
|
||||||
title = (
|
# some of the "titles" in ISNI are a little ...iffy
|
||||||
title_element.text.replace('@', '')
|
title_element = [e for e in titles if not e.text.replace('@', '').isnumeric()][0]
|
||||||
if titles is not None
|
title = (
|
||||||
and title_element is not None
|
title_element.text.replace('@', '')
|
||||||
and len(title_element.text) > 4
|
if titles is not None
|
||||||
else None
|
and title_element is not None
|
||||||
)
|
and len(title_element.text) > 4
|
||||||
author["description"] = (
|
else None
|
||||||
mark_safe(f"Author of <em>{title}</em>") if title is not None
|
)
|
||||||
else bio.text if bio is not None
|
author.bio = (
|
||||||
else "More information at isni.org"
|
title if title is not None
|
||||||
)
|
else bio.text if bio is not None
|
||||||
|
else "More information at isni.org"
|
||||||
|
)
|
||||||
|
|
||||||
possible_authors.append(author)
|
possible_authors.append(author)
|
||||||
|
|
||||||
|
@ -151,10 +150,10 @@ def get_author_from_isni(isni):
|
||||||
id=element.find(".//isniURI").text,
|
id=element.find(".//isniURI").text,
|
||||||
name=name,
|
name=name,
|
||||||
isni=isni,
|
isni=isni,
|
||||||
viaf_id=viaf,
|
viafId=viaf,
|
||||||
aliases=aliases,
|
aliases=aliases,
|
||||||
bio=bio,
|
bio=bio,
|
||||||
wikipedia_link=wikipedia
|
wikipediaLink=wikipedia
|
||||||
)
|
)
|
||||||
|
|
||||||
return author
|
return author
|
||||||
|
@ -163,12 +162,10 @@ def build_author_from_isni(match_value):
|
||||||
"""Build dict with basic author details from ISNI or author name"""
|
"""Build dict with basic author details from ISNI or author name"""
|
||||||
|
|
||||||
# if it is an isni value get the data
|
# if it is an isni value get the data
|
||||||
if match_value.startswith("isni_match_"):
|
if match_value.startswith("https://isni.org/isni/"):
|
||||||
isni = match_value.replace("isni_match_", "")
|
isni = match_value.replace("https://isni.org/isni/", "")
|
||||||
print("returning author dict")
|
|
||||||
return { "author": get_author_from_isni(isni) }
|
return { "author": get_author_from_isni(isni) }
|
||||||
# otherwise it's a name string
|
# otherwise it's a name string
|
||||||
print("returning empty dict")
|
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
@ -180,7 +177,7 @@ def augment_author_metadata(author, isni):
|
||||||
|
|
||||||
# we DO want to overwrite aliases because we're adding them to the
|
# we DO want to overwrite aliases because we're adding them to the
|
||||||
# existing aliases and ISNI will usually have more.
|
# existing aliases and ISNI will usually have more.
|
||||||
# We need to dedupe because ISNI has lots of dupe aliases
|
# We need to dedupe because ISNI records often have lots of dupe aliases
|
||||||
aliases = set(isni_author["aliases"])
|
aliases = set(isni_author["aliases"])
|
||||||
for alias in author.aliases:
|
for alias in author.aliases:
|
||||||
aliases.add(alias)
|
aliases.add(alias)
|
||||||
|
|
|
@ -72,26 +72,28 @@ class EditBook(View):
|
||||||
)
|
)
|
||||||
|
|
||||||
isni_authors = find_authors_by_name(
|
isni_authors = find_authors_by_name(
|
||||||
author
|
author,
|
||||||
|
description=True
|
||||||
) # find matches from ISNI API
|
) # find matches from ISNI API
|
||||||
|
|
||||||
# do not show isni results for authors we already have in the DB
|
# dedupe isni authors we already have in the DB
|
||||||
exists = [
|
exists = [
|
||||||
i
|
i
|
||||||
for i in isni_authors
|
for i in isni_authors
|
||||||
for a in author_matches
|
for a in author_matches
|
||||||
if sub(r"\D", "", str(i["author"].isni)) == sub(r"\D", "", str(a.isni))
|
if sub(r"\D", "", str(i.isni)) == sub(r"\D", "", str(a.isni))
|
||||||
]
|
]
|
||||||
|
|
||||||
# pylint: disable=cell-var-from-loop
|
# pylint: disable=cell-var-from-loop
|
||||||
isni_matches = list(filter(lambda x: x not in exists, isni_authors))
|
matches = list(filter(lambda x: x not in exists, isni_authors))
|
||||||
|
# combine existing and isni authors
|
||||||
|
matches.extend(author_matches)
|
||||||
|
|
||||||
data["author_matches"].append(
|
data["author_matches"].append(
|
||||||
{
|
{
|
||||||
"name": author.strip(),
|
"name": author.strip(),
|
||||||
"matches": author_matches,
|
"matches": matches,
|
||||||
"existing_isnis": exists,
|
"existing_isnis": exists,
|
||||||
"isni_matches": isni_matches,
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -181,16 +183,19 @@ class ConfirmEditBook(View):
|
||||||
augment_author_metadata(author, isni)
|
augment_author_metadata(author, isni)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# otherwise it's a new author
|
# otherwise it's a new author
|
||||||
# with isni id
|
|
||||||
isni_match = request.POST.get(f"author_match-{i}")
|
isni_match = request.POST.get(f"author_match-{i}")
|
||||||
author_object = build_author_from_isni(isni_match)
|
author_object = build_author_from_isni(isni_match)
|
||||||
|
# with author data class from isni id
|
||||||
if "author" in author_object:
|
if "author" in author_object:
|
||||||
|
# TESTING
|
||||||
|
skeleton = models.Author.objects.create(name=author_object["author"].name)
|
||||||
author = author_object["author"].to_model(
|
author = author_object["author"].to_model(
|
||||||
model=models.Author,
|
model=models.Author,
|
||||||
overwrite=False
|
overwrite=True,
|
||||||
|
instance=skeleton
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# or it's a name
|
# or it's just a name
|
||||||
author = models.Author.objects.create(name=match)
|
author = models.Author.objects.create(name=match)
|
||||||
book.authors.add(author)
|
book.authors.add(author)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue