diff --git a/bookwyrm/models/author.py b/bookwyrm/models/author.py
index 53cf94ff..6c29ac05 100644
--- a/bookwyrm/models/author.py
+++ b/bookwyrm/models/author.py
@@ -27,7 +27,7 @@ class Author(BookDataModel):
# idk probably other keys would be useful here?
born = fields.DateTimeField(blank=True, null=True)
died = fields.DateTimeField(blank=True, null=True)
- name = fields.CharField(max_length=255, deduplication_field=True)
+ name = fields.CharField(max_length=255)
aliases = fields.ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
diff --git a/bookwyrm/templates/author/author.html b/bookwyrm/templates/author/author.html
index 6a67b50b..b066c6ca 100644
--- a/bookwyrm/templates/author/author.html
+++ b/bookwyrm/templates/author/author.html
@@ -2,6 +2,7 @@
{% load i18n %}
{% load markdown %}
{% load humanize %}
+{% load utilities %}
{% block title %}{{ author.name }}{% endblock %}
@@ -25,7 +26,7 @@
- {% if author.aliases or author.born or author.died or author.wikipedia_link or author.openlibrary_key or author.inventaire_id %}
+ {% if author.aliases or author.born or author.died or author.wikipedia_link or author.openlibrary_key or author.inventaire_id or author.isni %}
@@ -63,6 +64,14 @@
{% endif %}
+ {% if author.isni %}
+
+
+ {% trans "View ISNI record" %}
+
+
+ {% endif %}
+
{% if author.openlibrary_key %}
diff --git a/bookwyrm/templates/book/edit/edit_book.html b/bookwyrm/templates/book/edit/edit_book.html
index fc11208f..3d41058e 100644
--- a/bookwyrm/templates/book/edit/edit_book.html
+++ b/bookwyrm/templates/book/edit/edit_book.html
@@ -1,6 +1,7 @@
{% extends 'layout.html' %}
{% load i18n %}
{% load humanize %}
+{% load utilities %}
{% block title %}{% if book %}{% blocktrans with book_title=book.title %}Edit "{{ book_title }}"{% endblocktrans %}{% else %}{% trans "Add Book" %}{% endif %}{% endblock %}
@@ -52,19 +53,29 @@
{% for author in author_matches %}
- {% blocktrans with name=author.name %}Is "{{ name }}" an existing author?{% endblocktrans %}
+ {% blocktrans with name=author.name %}Is "{{ name }}" one of these authors?{% endblocktrans %}
{% with forloop.counter0 as counter %}
{% for match in author.matches %}
-
+
{{ match.name }}
-
- {% blocktrans with book_title=match.book_set.first.title %}Author of {{ book_title }} {% endblocktrans %}
+
+ {% with book_title=match.book_set.first.title alt_title=match.bio %}
+ {% if book_title %}
+ {% trans "Author of " %}{{ book_title }}
+ {% else %}
+ {% if alt_title %}{% trans "Author of " %}{{ alt_title }} {% else %} {% trans "Find more information at isni.org" %}{% endif %}
+ {% endif %}
+ {% endwith %}
+
+ {{ author.existing_isnis|get_isni_bio:match }}
+
+ {{ author.existing_isnis|get_isni:match }}
{% endfor %}
-
+
{% trans "This is a new author" %}
{% endwith %}
diff --git a/bookwyrm/templatetags/utilities.py b/bookwyrm/templatetags/utilities.py
index d31f0e4d..5cc25fed 100644
--- a/bookwyrm/templatetags/utilities.py
+++ b/bookwyrm/templatetags/utilities.py
@@ -1,8 +1,11 @@
""" template filters for really common utilities """
import os
+import re
from uuid import uuid4
from django import template
+from django.utils.safestring import mark_safe
from django.utils.translation import gettext_lazy as _
+from django.template.defaultfilters import stringfilter
from django.templatetags.static import static
@@ -66,3 +69,39 @@ def get_book_cover_thumbnail(book, size="medium", ext="jpg"):
return cover_thumbnail.url
except OSError:
return static("images/no_cover.jpg")
+
+
+@register.filter(name="get_isni_bio")
+def get_isni_bio(existing, author):
+ """Returns the isni bio string if an existing author has an isni listed"""
+ auth_isni = re.sub(r"\D", "", str(author.isni))
+ if len(existing) == 0:
+ return ""
+ for value in existing:
+ if hasattr(value, "bio") and auth_isni == re.sub(r"\D", "", str(value.isni)):
+ return mark_safe(f"Author of {value.bio} ")
+
+ return ""
+
+
+# pylint: disable=unused-argument
+@register.filter(name="get_isni", needs_autoescape=True)
+def get_isni(existing, author, autoescape=True):
+ """Returns the isni ID if an existing author has an ISNI listing"""
+ auth_isni = re.sub(r"\D", "", str(author.isni))
+ if len(existing) == 0:
+ return ""
+ for value in existing:
+ if hasattr(value, "isni") and auth_isni == re.sub(r"\D", "", str(value.isni)):
+ isni = value.isni
+ return mark_safe(
+ f' '
+ )
+ return ""
+
+
+@register.filter(name="remove_spaces")
+@stringfilter
+def remove_spaces(arg):
+ """Removes spaces from argument passed in"""
+ return re.sub(r"\s", "", str(arg))
diff --git a/bookwyrm/utils/isni.py b/bookwyrm/utils/isni.py
new file mode 100644
index 00000000..a35c3f24
--- /dev/null
+++ b/bookwyrm/utils/isni.py
@@ -0,0 +1,183 @@
+"""ISNI author checking utilities"""
+import xml.etree.ElementTree as ET
+import requests
+
+from bookwyrm import activitypub, models
+
+
+def request_isni_data(search_index, search_term, max_records=5):
+ """Request data from the ISNI API"""
+
+ search_string = f'{search_index}="{search_term}"'
+ query_params = {
+ "query": search_string,
+ "version": "1.1",
+ "operation": "searchRetrieve",
+ "recordSchema": "isni-b",
+ "maximumRecords": max_records,
+ "startRecord": "1",
+ "recordPacking": "xml",
+ "sortKeys": "RLV,pica,0,,",
+ }
+ result = requests.get("http://isni.oclc.org/sru/", params=query_params, timeout=10)
+ # the OCLC ISNI server asserts the payload is encoded
+ # in latin1, but we know better
+ result.encoding = "utf-8"
+ return result.text
+
+
+def make_name_string(element):
+ """create a string of form 'personal_name surname'"""
+
+ # NOTE: this will often be incorrect, many naming systems
+ # list "surname" before personal name
+ forename = element.find(".//forename")
+ surname = element.find(".//surname")
+ if forename is not None:
+ return "".join([forename.text, " ", surname.text])
+ return surname.text
+
+
+def get_other_identifier(element, code):
+ """Get other identifiers associated with an author from their ISNI record"""
+
+ identifiers = element.findall(".//otherIdentifierOfIdentity")
+ for section_head in identifiers:
+ if (
+ section_head.find(".//type") is not None
+ and section_head.find(".//type").text == code
+ and section_head.find(".//identifier") is not None
+ ):
+ return section_head.find(".//identifier").text
+
+ # if we can't find it in otherIdentifierOfIdentity,
+ # try sources
+ for source in element.findall(".//sources"):
+ code_of_source = source.find(".//codeOfSource")
+ if code_of_source is not None and code_of_source.text.lower() == code.lower():
+ return source.find(".//sourceIdentifier").text
+
+ return ""
+
+
+def get_external_information_uri(element, match_string):
+ """Get URLs associated with an author from their ISNI record"""
+
+ sources = element.findall(".//externalInformation")
+ for source in sources:
+ information = source.find(".//information")
+ uri = source.find(".//URI")
+ if (
+ uri is not None
+ and information is not None
+ and information.text.lower() == match_string.lower()
+ ):
+ return uri.text
+ return ""
+
+
+def find_authors_by_name(name_string, description=False):
+ """Query the ISNI database for possible author matches by name"""
+
+ payload = request_isni_data("pica.na", name_string)
+ # parse xml
+ root = ET.fromstring(payload)
+ # build list of possible authors
+ possible_authors = []
+ for element in root.iter("responseRecord"):
+ personal_name = element.find(".//forename/..")
+ if not personal_name:
+ continue
+
+ author = get_author_from_isni(element.find(".//isniUnformatted").text)
+
+ if bool(description):
+
+ titles = []
+ # prefer title records from LoC+ coop, Australia, Ireland, or Singapore
+ # in that order
+ for source in ["LCNACO", "NLA", "N6I", "NLB"]:
+ for parent in element.findall(f'.//titleOfWork/[@source="{source}"]'):
+ titles.append(parent.find(".//title"))
+ for parent in element.findall(f'.//titleOfWork[@subsource="{source}"]'):
+ titles.append(parent.find(".//title"))
+ # otherwise just grab the first title listing
+ titles.append(element.find(".//title"))
+
+ if titles is not None:
+ # some of the "titles" in ISNI are a little ...iffy
+ # '@' is used by ISNI/OCLC to index the starting point ignoring stop words
+ # (e.g. "The @Government of no one")
+ title_elements = [
+ e for e in titles if not e.text.replace("@", "").isnumeric()
+ ]
+ if len(title_elements):
+ author.bio = title_elements[0].text.replace("@", "")
+ else:
+ author.bio = None
+
+ possible_authors.append(author)
+
+ return possible_authors
+
+
+def get_author_from_isni(isni):
+ """Find data to populate a new author record from their ISNI"""
+
+ payload = request_isni_data("pica.isn", isni)
+ # parse xml
+ root = ET.fromstring(payload)
+ # there should only be a single responseRecord
+ # but let's use the first one just in case
+ element = root.find(".//responseRecord")
+ name = make_name_string(element.find(".//forename/.."))
+ viaf = get_other_identifier(element, "viaf")
+ # use a set to dedupe aliases in ISNI
+ aliases = set()
+ aliases_element = element.findall(".//personalNameVariant")
+ for entry in aliases_element:
+ aliases.add(make_name_string(entry))
+ # aliases needs to be list not set
+ aliases = list(aliases)
+ bio = element.find(".//nameTitle")
+ bio = bio.text if bio is not None else ""
+ wikipedia = get_external_information_uri(element, "Wikipedia")
+
+ author = activitypub.Author(
+ id=element.find(".//isniURI").text,
+ name=name,
+ isni=isni,
+ viafId=viaf,
+ aliases=aliases,
+ bio=bio,
+ wikipediaLink=wikipedia,
+ )
+
+ return author
+
+
+def build_author_from_isni(match_value):
+ """Build basic author class object from ISNI URL"""
+
+ # if it is an isni value get the data
+ if match_value.startswith("https://isni.org/isni/"):
+ isni = match_value.replace("https://isni.org/isni/", "")
+ return {"author": get_author_from_isni(isni)}
+ # otherwise it's a name string
+ return {}
+
+
+def augment_author_metadata(author, isni):
+ """Update any missing author fields from ISNI data"""
+
+ isni_author = get_author_from_isni(isni)
+ isni_author.to_model(model=models.Author, instance=author, overwrite=False)
+
+ # we DO want to overwrite aliases because we're adding them to the
+ # existing aliases and ISNI will usually have more.
+ # We need to dedupe because ISNI records often have lots of dupe aliases
+ aliases = set(isni_author.aliases)
+ for alias in author.aliases:
+ aliases.add(alias)
+ author.aliases = list(aliases)
+ author.save()
diff --git a/bookwyrm/views/books/edit_book.py b/bookwyrm/views/books/edit_book.py
index 1445dc01..fc13aa6c 100644
--- a/bookwyrm/views/books/edit_book.py
+++ b/bookwyrm/views/books/edit_book.py
@@ -1,4 +1,5 @@
""" the good stuff! the books! """
+from re import sub
from dateutil.parser import parse as dateparse
from django.contrib.auth.decorators import login_required, permission_required
from django.contrib.postgres.search import SearchRank, SearchVector
@@ -11,10 +12,16 @@ from django.utils.decorators import method_decorator
from django.views import View
from bookwyrm import book_search, forms, models
+
+# from bookwyrm.activitypub.base_activity import ActivityObject
+from bookwyrm.utils.isni import (
+ find_authors_by_name,
+ build_author_from_isni,
+ augment_author_metadata,
+)
from bookwyrm.views.helpers import get_edition
from .books import set_cover_from_url
-
# pylint: disable=no-self-use
@method_decorator(login_required, name="dispatch")
@method_decorator(
@@ -33,6 +40,7 @@ class EditBook(View):
data = {"book": book, "form": forms.EditionForm(instance=book)}
return TemplateResponse(request, "book/edit/edit_book.html", data)
+ # pylint: disable=too-many-locals
def post(self, request, book_id=None):
"""edit a book cool"""
# returns None if no match is found
@@ -48,6 +56,7 @@ class EditBook(View):
if add_author:
data["add_author"] = add_author
data["author_matches"] = []
+ data["isni_matches"] = []
for author in add_author.split(","):
if not author:
continue
@@ -56,15 +65,35 @@ class EditBook(View):
"aliases", weight="B"
)
+ author_matches = (
+ models.Author.objects.annotate(search=vector)
+ .annotate(rank=SearchRank(vector, author))
+ .filter(rank__gt=0.4)
+ .order_by("-rank")[:5]
+ )
+
+ isni_authors = find_authors_by_name(
+ author, description=True
+ ) # find matches from ISNI API
+
+ # dedupe isni authors we already have in the DB
+ exists = [
+ i
+ for i in isni_authors
+ for a in author_matches
+ if sub(r"\D", "", str(i.isni)) == sub(r"\D", "", str(a.isni))
+ ]
+
+ # pylint: disable=cell-var-from-loop
+ matches = list(filter(lambda x: x not in exists, isni_authors))
+ # combine existing and isni authors
+ matches.extend(author_matches)
+
data["author_matches"].append(
{
"name": author.strip(),
- "matches": (
- models.Author.objects.annotate(search=vector)
- .annotate(rank=SearchRank(vector, author))
- .filter(rank__gt=0.4)
- .order_by("-rank")[:5]
- ),
+ "matches": matches,
+ "existing_isnis": exists,
}
)
@@ -122,6 +151,8 @@ class EditBook(View):
class ConfirmEditBook(View):
"""confirm edits to a book"""
+ # pylint: disable=too-many-locals
+ # pylint: disable=too-many-branches
def post(self, request, book_id=None):
"""edit a book cool"""
# returns None if no match is found
@@ -147,9 +178,25 @@ class ConfirmEditBook(View):
author = get_object_or_404(
models.Author, id=request.POST[f"author_match-{i}"]
)
+ # update author metadata if the ISNI record is more complete
+ isni = request.POST.get(f"isni-for-{match}", None)
+ if isni is not None:
+ augment_author_metadata(author, isni)
except ValueError:
- # otherwise it's a name
- author = models.Author.objects.create(name=match)
+ # otherwise it's a new author
+ isni_match = request.POST.get(f"author_match-{i}")
+ author_object = build_author_from_isni(isni_match)
+ # with author data class from isni id
+ if "author" in author_object:
+ skeleton = models.Author.objects.create(
+ name=author_object["author"].name
+ )
+ author = author_object["author"].to_model(
+ model=models.Author, overwrite=True, instance=skeleton
+ )
+ else:
+ # or it's just a name
+ author = models.Author.objects.create(name=match)
book.authors.add(author)
# create work, if needed