From 09c5a3861bf65a558013d0c8892208b17ed9de52 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Mon, 22 Nov 2021 12:15:06 +1100 Subject: [PATCH] prefer English language sources for ISNI titles --- bookwyrm/templates/book/edit/edit_book.html | 2 +- bookwyrm/utils/isni.py | 40 +++++++++++---------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/bookwyrm/templates/book/edit/edit_book.html b/bookwyrm/templates/book/edit/edit_book.html index e7912c30f..3d41058e3 100644 --- a/bookwyrm/templates/book/edit/edit_book.html +++ b/bookwyrm/templates/book/edit/edit_book.html @@ -66,7 +66,7 @@ {% if book_title %} {% trans "Author of " %}{{ book_title }} {% else %} - {% trans "Author of " %}{{ alt_title }} + {% if alt_title %}{% trans "Author of " %}{{ alt_title }}{% else %} {% trans "Find more information at isni.org" %}{% endif %} {% endif %} {% endwith %}

diff --git a/bookwyrm/utils/isni.py b/bookwyrm/utils/isni.py index fead999a9..fd0dc0835 100644 --- a/bookwyrm/utils/isni.py +++ b/bookwyrm/utils/isni.py @@ -92,32 +92,34 @@ def find_authors_by_name(name_string, description=False): # build list of possible authors possible_authors = [] for element in root.iter("responseRecord"): - personal_name = element.find(".//forename/..") - bio = element.find(".//nameTitle") - if not personal_name: continue author = get_author_from_isni(element.find(".//isniUnformatted").text) if bool(description): - titles = element.findall(".//title") - if titles: - # some of the "titles" in ISNI are a little ...iffy - title_element = [e for e in titles if not e.text.replace('@', '').isnumeric()][0] - title = ( - title_element.text.replace('@', '') - if titles is not None - and title_element is not None - and len(title_element.text) > 4 - else None - ) - author.bio = ( - title if title is not None - else bio.text if bio is not None - else "More information at isni.org" - ) + + titles = [] + # prefer title records from LoC+ coop, Australia, Ireland, or Singapore + # in that order + for source in ["LCNACO", "NLA", "N6I", "NLB"]: + for parent in element.findall(f'.//titleOfWork/[@source="{source}"]'): + titles.append(parent.find(".//title")) + for parent in element.findall(f'.//titleOfWork[@subsource="{source}"]'): + titles.append(parent.find(".//title")) + # otherwise just grab the first title listing + titles.append(element.find(".//title")) + + if titles is not None: + # some of the "titles" in ISNI are a little ...iffy + # '@' is used by ISNI/OCLC to index the starting point ignoring stop words + # (e.g. "The @Government of no one") + title_elements = [e for e in titles if not e.text.replace('@', '').isnumeric()] + if len(title_elements): + author.bio = title_elements[0].text.replace('@', '') + else: + author.bio = None possible_authors.append(author)