hide isni authors if isni in local db

Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI.
i.e. we do not want to suggest the same author twice when we *know* it is the same author.
This commit is contained in:
Hugh Rundle 2021-10-31 20:48:47 +11:00
parent bce5f3f3b5
commit 3cfd31f1fe
2 changed files with 43 additions and 29 deletions

View file

@ -1,16 +1,17 @@
"""ISNI author checking utilities""" """ISNI author checking utilities"""
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import requests import requests
from bookwyrm.settings import BASE_DIR from bookwyrm.settings import BASE_DIR
def url_stringify(string): def url_stringify(string):
"""replace spaces for url encoding""" """replace spaces for url encoding"""
# TODO: this is very lazy and incomplete # TODO: this is very lazy and incomplete
return string.replace(" ", "+") return string.replace(" ", "+")
def request_isni_data(search_index, search_term, maxRecords=10):
def request_isni_data(search_index, search_term, maxRecords=5):
"""Request data from the ISNI API""" """Request data from the ISNI API"""
search_string = url_stringify(search_term) search_string = url_stringify(search_term)
@ -22,7 +23,7 @@ def request_isni_data(search_index, search_term, maxRecords=10):
"%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b", "%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b",
"&maximumRecords=", "&maximumRecords=",
str(maxRecords), str(maxRecords),
"&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C" "&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C",
] ]
query_url = "".join(query_parts) query_url = "".join(query_parts)
result = requests.get(query_url) result = requests.get(query_url)
@ -31,6 +32,7 @@ def request_isni_data(search_index, search_term, maxRecords=10):
result.encoding = "utf-8" result.encoding = "utf-8"
return result.text return result.text
def make_name_string(element): def make_name_string(element):
"""create a string of form 'personal_name surname'""" """create a string of form 'personal_name surname'"""
@ -42,6 +44,7 @@ def make_name_string(element):
return "".join([forename.text, " ", surname.text]) return "".join([forename.text, " ", surname.text])
return surname.text return surname.text
def get_other_identifier(element, code): def get_other_identifier(element, code):
"""Get other identifiers associated with an author from their ISNI record""" """Get other identifiers associated with an author from their ISNI record"""
@ -55,19 +58,18 @@ def get_other_identifier(element, code):
return section_head.find(".//identifier").text return section_head.find(".//identifier").text
return return
def get_external_information_uri(element, match_string): def get_external_information_uri(element, match_string):
"""Get URLs associated with an author from their ISNI record""" """Get URLs associated with an author from their ISNI record"""
sources = element.findall(".//externalInformation") sources = element.findall(".//externalInformation")
for source in sources: for source in sources:
uri = source.find(".//URI") uri = source.find(".//URI")
if ( if uri is not None and uri.text.find(match_string) is not None:
uri is not None
and uri.text.find(match_string) is not None
):
return uri.text return uri.text
return return
def find_authors_by_name(name_string): def find_authors_by_name(name_string):
"""Query the ISNI database for possible author matches by name""" """Query the ISNI database for possible author matches by name"""
@ -94,6 +96,7 @@ def find_authors_by_name(name_string):
return possible_authors return possible_authors
def get_author_isni_data(isni): def get_author_isni_data(isni):
payload = request_isni_data("pica.isn", isni) payload = request_isni_data("pica.isn", isni)
@ -110,7 +113,7 @@ def get_author_isni_data(isni):
author["viaf_id"] = get_other_identifier(element, "viaf") author["viaf_id"] = get_other_identifier(element, "viaf")
author["wikipedia_link"] = get_external_information_uri(element, "Wikipedia") author["wikipedia_link"] = get_external_information_uri(element, "Wikipedia")
author["bio"] = bio.text if bio is not None else "" author["bio"] = bio.text if bio is not None else ""
author["aliases"] = [] # CHECK can we send a list for this? author["aliases"] = []
aliases = element.findall(".//personalNameVariant") aliases = element.findall(".//personalNameVariant")
for entry in aliases: for entry in aliases:
author["aliases"].append(make_name_string(entry)) author["aliases"].append(make_name_string(entry))

View file

@ -57,22 +57,32 @@ class EditBook(View):
"aliases", weight="B" "aliases", weight="B"
) )
data["author_matches"].append( author_matches = (
{
"name": author.strip(),
"matches": (
models.Author.objects.annotate(search=vector) models.Author.objects.annotate(search=vector)
.annotate(rank=SearchRank(vector, author)) .annotate(rank=SearchRank(vector, author))
.filter(rank__gt=0.4) .filter(rank__gt=0.4)
.order_by("-rank")[:5] .order_by("-rank")[:5]
), )
"isni_matches": find_authors_by_name(
isni_authors = find_authors_by_name(
author author
), # find matches from ISNI API ) # find matches from ISNI API
# do not show isni results for authors we already have in the DB
exists = [
i
for i in isni_authors
for a in author_matches
if i["isni"] == a.isni
]
isni_matches = list(filter(lambda x: x not in exists, isni_authors))
data["author_matches"].append(
{
"name": author.strip(),
"matches": author_matches,
"isni_matches": isni_matches,
} }
) )
# TODO: check if an isni record matches an existing record
# to bring these two records together
# we're creating a new book # we're creating a new book
if not book: if not book:
@ -157,7 +167,8 @@ class ConfirmEditBook(View):
# otherwise it's a name with or without isni id # otherwise it's a name with or without isni id
isni = request.POST.get(f"isni_match-{i}") isni = request.POST.get(f"isni_match-{i}")
author_data = ( author_data = (
get_author_isni_data(isni) if isni is not None get_author_isni_data(isni)
if isni is not None
else {"name": match} else {"name": match}
) )
author = models.Author.objects.create(**author_data) author = models.Author.objects.create(**author_data)