mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-26 10:00:33 +00:00
use activitypub dataclass for isni authors
- add timeout to isni API call - use activitypub.Author dataclass instead of bespoke dict - display isni authors as "Author of" first title in ISNI record if possible - sensible fallbacks if title info unavailable in isni record
This commit is contained in:
parent
8658e36ca8
commit
1e6e4b0f8d
3 changed files with 100 additions and 44 deletions
|
@ -59,11 +59,11 @@
|
||||||
{% if author.isni_matches %}
|
{% if author.isni_matches %}
|
||||||
{% for isni_match in author.isni_matches %}
|
{% for isni_match in author.isni_matches %}
|
||||||
<label class="label mt-2">
|
<label class="label mt-2">
|
||||||
<input type="radio" name="author_match-{{ counter }}" value="isni_match_{{ isni_match.isni }}" required>
|
<input type="radio" name="author_match-{{ counter }}" value="isni_match_{{ isni_match.author.isni }}" required>
|
||||||
{{ isni_match.name }}
|
{{ isni_match.author.name }}
|
||||||
</label>
|
</label>
|
||||||
<p class="help ml-5 mb-2">
|
<p class="help ml-5 mb-2">
|
||||||
<a href="{{ isni_match.uri }}" target="_blank" rel="noopener noreferrer">{{ isni_match.bio }}</a>
|
<a href="{{ isni_match.author.id }}" target="_blank" rel="noopener noreferrer">{{ isni_match.description }}</a>
|
||||||
</p>
|
</p>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
|
@ -1,7 +1,11 @@
|
||||||
"""ISNI author checking utilities"""
|
"""ISNI author checking utilities"""
|
||||||
|
from typing import Set
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from django.utils.safestring import mark_safe
|
||||||
|
|
||||||
|
from bookwyrm import activitypub, models
|
||||||
|
|
||||||
def request_isni_data(search_index, search_term, max_records=5):
|
def request_isni_data(search_index, search_term, max_records=5):
|
||||||
"""Request data from the ISNI API"""
|
"""Request data from the ISNI API"""
|
||||||
|
@ -17,7 +21,11 @@ def request_isni_data(search_index, search_term, max_records=5):
|
||||||
"recordPacking": "xml",
|
"recordPacking": "xml",
|
||||||
"sortKeys": "RLV,pica,0,,",
|
"sortKeys": "RLV,pica,0,,",
|
||||||
}
|
}
|
||||||
result = requests.get("http://isni.oclc.org/sru/", params=query_params)
|
result = requests.get(
|
||||||
|
"http://isni.oclc.org/sru/",
|
||||||
|
params=query_params,
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
# the OCLC ISNI server asserts the payload is encoded
|
# the OCLC ISNI server asserts the payload is encoded
|
||||||
# in latin1, but we know better
|
# in latin1, but we know better
|
||||||
result.encoding = "utf-8"
|
result.encoding = "utf-8"
|
||||||
|
@ -47,6 +55,18 @@ def get_other_identifier(element, code):
|
||||||
and section_head.find(".//identifier") is not None
|
and section_head.find(".//identifier") is not None
|
||||||
):
|
):
|
||||||
return section_head.find(".//identifier").text
|
return section_head.find(".//identifier").text
|
||||||
|
|
||||||
|
# if we can't find it in otherIdentifierOfIdentity,
|
||||||
|
# try sources
|
||||||
|
for source in element.findall(".//sources"):
|
||||||
|
code_of_source = source.find(".//codeOfSource")
|
||||||
|
if (
|
||||||
|
code_of_source is not None
|
||||||
|
and code_of_source.text == code.upper()
|
||||||
|
or code_of_source.text == code.lower()
|
||||||
|
):
|
||||||
|
return source.find(".//sourceIdentifier").text
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
@ -55,8 +75,13 @@ def get_external_information_uri(element, match_string):
|
||||||
|
|
||||||
sources = element.findall(".//externalInformation")
|
sources = element.findall(".//externalInformation")
|
||||||
for source in sources:
|
for source in sources:
|
||||||
|
information = source.find(".//information")
|
||||||
uri = source.find(".//URI")
|
uri = source.find(".//URI")
|
||||||
if uri is not None and uri.text.find(match_string) is not None:
|
if (
|
||||||
|
uri is not None
|
||||||
|
and information is not None
|
||||||
|
and information.text.lower() == match_string.lower()
|
||||||
|
):
|
||||||
return uri.text
|
return uri.text
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
@ -78,17 +103,29 @@ def find_authors_by_name(name_string):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
author = {}
|
author = {}
|
||||||
author["isni"] = element.find(".//isniUnformatted").text
|
author["author"] = get_author_from_isni(element.find(".//isniUnformatted").text)
|
||||||
author["uri"] = element.find(".//isniURI").text
|
titles = element.findall(".//title")
|
||||||
author["name"] = make_name_string(personal_name)
|
if titles:
|
||||||
if bio is not None:
|
title_element = [e for e in titles if not e.text.replace('@', '').isnumeric()][0]
|
||||||
author["bio"] = bio.text
|
title = (
|
||||||
|
title_element.text.replace('@', '')
|
||||||
|
if titles is not None
|
||||||
|
and title_element is not None
|
||||||
|
and len(title_element.text) > 4
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
author["description"] = (
|
||||||
|
mark_safe(f"Author of <em>{title}</em>") if title is not None
|
||||||
|
else bio.text if bio is not None
|
||||||
|
else "More information at isni.org"
|
||||||
|
)
|
||||||
|
|
||||||
possible_authors.append(author)
|
possible_authors.append(author)
|
||||||
|
|
||||||
return possible_authors
|
return possible_authors
|
||||||
|
|
||||||
|
|
||||||
def get_author_isni_data(isni):
|
def get_author_from_isni(isni):
|
||||||
"""Find data to populate a new author record from their ISNI"""
|
"""Find data to populate a new author record from their ISNI"""
|
||||||
|
|
||||||
payload = request_isni_data("pica.isn", isni)
|
payload = request_isni_data("pica.isn", isni)
|
||||||
|
@ -97,48 +134,57 @@ def get_author_isni_data(isni):
|
||||||
# there should only be a single responseRecord
|
# there should only be a single responseRecord
|
||||||
# but let's use the first one just in case
|
# but let's use the first one just in case
|
||||||
element = root.find(".//responseRecord")
|
element = root.find(".//responseRecord")
|
||||||
personal_name = element.find(".//forename/..")
|
name = make_name_string(element.find(".//forename/.."))
|
||||||
|
viaf = get_other_identifier(element, "viaf")
|
||||||
|
# use a set to dedupe aliases in ISNI
|
||||||
|
aliases = set()
|
||||||
|
aliases_element = element.findall(".//personalNameVariant")
|
||||||
|
for entry in aliases_element:
|
||||||
|
aliases.add(make_name_string(entry))
|
||||||
|
# aliases needs to be list not set
|
||||||
|
aliases = list(aliases)
|
||||||
bio = element.find(".//nameTitle")
|
bio = element.find(".//nameTitle")
|
||||||
author = {}
|
bio = bio.text if bio is not None else ""
|
||||||
author["isni"] = isni
|
wikipedia = get_external_information_uri(element, "Wikipedia")
|
||||||
author["name"] = make_name_string(personal_name)
|
|
||||||
author["viaf_id"] = get_other_identifier(element, "viaf")
|
author = activitypub.Author(
|
||||||
author["wikipedia_link"] = get_external_information_uri(element, "Wikipedia")
|
id=element.find(".//isniURI").text,
|
||||||
author["bio"] = bio.text if bio is not None else ""
|
name=name,
|
||||||
author["aliases"] = []
|
isni=isni,
|
||||||
aliases = element.findall(".//personalNameVariant")
|
viaf_id=viaf,
|
||||||
for entry in aliases:
|
aliases=aliases,
|
||||||
author["aliases"].append(make_name_string(entry))
|
bio=bio,
|
||||||
# dedupe aliases
|
wikipedia_link=wikipedia
|
||||||
author["aliases"] = list(set(author["aliases"]))
|
)
|
||||||
|
|
||||||
return author
|
return author
|
||||||
|
|
||||||
|
def build_author_from_isni(match_value):
|
||||||
def build_author_dict(match_value):
|
|
||||||
"""Build dict with basic author details from ISNI or author name"""
|
"""Build dict with basic author details from ISNI or author name"""
|
||||||
|
|
||||||
# if it is an isni value get the data
|
# if it is an isni value get the data
|
||||||
if match_value.startswith("isni_match_"):
|
if match_value.startswith("isni_match_"):
|
||||||
isni = match_value.replace("isni_match_", "")
|
isni = match_value.replace("isni_match_", "")
|
||||||
return get_author_isni_data(isni)
|
print("returning author dict")
|
||||||
|
return { "author": get_author_from_isni(isni) }
|
||||||
# otherwise it's a name string
|
# otherwise it's a name string
|
||||||
return {"name": match_value}
|
print("returning empty dict")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def augment_author_metadata(author, isni):
|
def augment_author_metadata(author, isni):
|
||||||
"""Update any missing author fields from ISNI data"""
|
"""Update any missing author fields from ISNI data"""
|
||||||
isni_data = get_author_isni_data(isni)
|
|
||||||
author.viaf_id = (
|
isni_author = get_author_from_isni(isni)
|
||||||
isni_data["viaf_id"] if len(author.viaf_id) == 0 else author.viaf_id
|
isni_author.to_model(model=models.Author, instance=author, overwrite=False)
|
||||||
)
|
|
||||||
author.wikipedia_link = (
|
# we DO want to overwrite aliases because we're adding them to the
|
||||||
isni_data["wikipedia_link"]
|
# existing aliases and ISNI will usually have more.
|
||||||
if len(author.wikipedia_link) == 0
|
# We need to dedupe because ISNI has lots of dupe aliases
|
||||||
else author.wikipedia_link
|
aliases = set(isni_author["aliases"])
|
||||||
)
|
|
||||||
author.bio = isni_data["bio"] if len(author.bio) == 0 else author.bio
|
|
||||||
aliases = set(isni_data["aliases"])
|
|
||||||
for alias in author.aliases:
|
for alias in author.aliases:
|
||||||
aliases.add(alias)
|
aliases.add(alias)
|
||||||
author.aliases = list(aliases)
|
author.aliases = list(aliases)
|
||||||
author.save()
|
author.save()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
|
@ -12,9 +12,10 @@ from django.utils.decorators import method_decorator
|
||||||
from django.views import View
|
from django.views import View
|
||||||
|
|
||||||
from bookwyrm import book_search, forms, models
|
from bookwyrm import book_search, forms, models
|
||||||
|
# from bookwyrm.activitypub.base_activity import ActivityObject
|
||||||
from bookwyrm.utils.isni import (
|
from bookwyrm.utils.isni import (
|
||||||
find_authors_by_name,
|
find_authors_by_name,
|
||||||
build_author_dict,
|
build_author_from_isni,
|
||||||
augment_author_metadata,
|
augment_author_metadata,
|
||||||
)
|
)
|
||||||
from bookwyrm.views.helpers import get_edition
|
from bookwyrm.views.helpers import get_edition
|
||||||
|
@ -79,7 +80,7 @@ class EditBook(View):
|
||||||
i
|
i
|
||||||
for i in isni_authors
|
for i in isni_authors
|
||||||
for a in author_matches
|
for a in author_matches
|
||||||
if sub(r"\D", "", str(i["isni"])) == sub(r"\D", "", str(a.isni))
|
if sub(r"\D", "", str(i["author"].isni)) == sub(r"\D", "", str(a.isni))
|
||||||
]
|
]
|
||||||
|
|
||||||
# pylint: disable=cell-var-from-loop
|
# pylint: disable=cell-var-from-loop
|
||||||
|
@ -179,9 +180,18 @@ class ConfirmEditBook(View):
|
||||||
if isni is not None:
|
if isni is not None:
|
||||||
augment_author_metadata(author, isni)
|
augment_author_metadata(author, isni)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# otherwise it's a name with or without isni id
|
# otherwise it's a new author
|
||||||
author_data = build_author_dict(match)
|
# with isni id
|
||||||
author = models.Author.objects.create(**author_data)
|
isni_match = request.POST.get(f"author_match-{i}")
|
||||||
|
author_object = build_author_from_isni(isni_match)
|
||||||
|
if "author" in author_object:
|
||||||
|
author = author_object["author"].to_model(
|
||||||
|
model=models.Author,
|
||||||
|
overwrite=False
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# or it's a name
|
||||||
|
author = models.Author.objects.create(name=match)
|
||||||
book.authors.add(author)
|
book.authors.add(author)
|
||||||
|
|
||||||
# create work, if needed
|
# create work, if needed
|
||||||
|
|
Loading…
Reference in a new issue