mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-16 13:16:33 +00:00
hide isni authors if isni in local db
Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we *know* it is the same author.
This commit is contained in:
parent
bce5f3f3b5
commit
3cfd31f1fe
2 changed files with 43 additions and 29 deletions
|
@ -1,16 +1,17 @@
|
|||
"""ISNI author checking utilities"""
|
||||
import xml.etree.ElementTree as ET
|
||||
import requests
|
||||
|
||||
from bookwyrm.settings import BASE_DIR
|
||||
|
||||
|
||||
def url_stringify(string):
|
||||
"""replace spaces for url encoding"""
|
||||
|
||||
# TODO: this is very lazy and incomplete
|
||||
return string.replace(" ", "+")
|
||||
|
||||
def request_isni_data(search_index, search_term, maxRecords=10):
|
||||
|
||||
def request_isni_data(search_index, search_term, maxRecords=5):
|
||||
"""Request data from the ISNI API"""
|
||||
|
||||
search_string = url_stringify(search_term)
|
||||
|
@ -22,7 +23,7 @@ def request_isni_data(search_index, search_term, maxRecords=10):
|
|||
"%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b",
|
||||
"&maximumRecords=",
|
||||
str(maxRecords),
|
||||
"&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C"
|
||||
"&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C",
|
||||
]
|
||||
query_url = "".join(query_parts)
|
||||
result = requests.get(query_url)
|
||||
|
@ -31,6 +32,7 @@ def request_isni_data(search_index, search_term, maxRecords=10):
|
|||
result.encoding = "utf-8"
|
||||
return result.text
|
||||
|
||||
|
||||
def make_name_string(element):
|
||||
"""create a string of form 'personal_name surname'"""
|
||||
|
||||
|
@ -39,9 +41,10 @@ def make_name_string(element):
|
|||
forename = element.find(".//forename")
|
||||
surname = element.find(".//surname")
|
||||
if forename is not None:
|
||||
return "".join([forename.text," ",surname.text])
|
||||
return "".join([forename.text, " ", surname.text])
|
||||
return surname.text
|
||||
|
||||
|
||||
def get_other_identifier(element, code):
|
||||
"""Get other identifiers associated with an author from their ISNI record"""
|
||||
|
||||
|
@ -55,19 +58,18 @@ def get_other_identifier(element, code):
|
|||
return section_head.find(".//identifier").text
|
||||
return
|
||||
|
||||
|
||||
def get_external_information_uri(element, match_string):
|
||||
"""Get URLs associated with an author from their ISNI record"""
|
||||
|
||||
sources = element.findall(".//externalInformation")
|
||||
for source in sources:
|
||||
uri = source.find(".//URI")
|
||||
if (
|
||||
uri is not None
|
||||
and uri.text.find(match_string) is not None
|
||||
):
|
||||
if uri is not None and uri.text.find(match_string) is not None:
|
||||
return uri.text
|
||||
return
|
||||
|
||||
|
||||
def find_authors_by_name(name_string):
|
||||
"""Query the ISNI database for possible author matches by name"""
|
||||
|
||||
|
@ -94,6 +96,7 @@ def find_authors_by_name(name_string):
|
|||
|
||||
return possible_authors
|
||||
|
||||
|
||||
def get_author_isni_data(isni):
|
||||
|
||||
payload = request_isni_data("pica.isn", isni)
|
||||
|
@ -110,9 +113,9 @@ def get_author_isni_data(isni):
|
|||
author["viaf_id"] = get_other_identifier(element, "viaf")
|
||||
author["wikipedia_link"] = get_external_information_uri(element, "Wikipedia")
|
||||
author["bio"] = bio.text if bio is not None else ""
|
||||
author["aliases"] = [] # CHECK can we send a list for this?
|
||||
author["aliases"] = []
|
||||
aliases = element.findall(".//personalNameVariant")
|
||||
for entry in aliases:
|
||||
author["aliases"].append( make_name_string(entry) )
|
||||
author["aliases"].append(make_name_string(entry))
|
||||
|
||||
return author
|
||||
|
|
|
@ -57,22 +57,32 @@ class EditBook(View):
|
|||
"aliases", weight="B"
|
||||
)
|
||||
|
||||
data["author_matches"].append(
|
||||
{
|
||||
"name": author.strip(),
|
||||
"matches": (
|
||||
author_matches = (
|
||||
models.Author.objects.annotate(search=vector)
|
||||
.annotate(rank=SearchRank(vector, author))
|
||||
.filter(rank__gt=0.4)
|
||||
.order_by("-rank")[:5]
|
||||
),
|
||||
"isni_matches": find_authors_by_name(
|
||||
)
|
||||
|
||||
isni_authors = find_authors_by_name(
|
||||
author
|
||||
), # find matches from ISNI API
|
||||
) # find matches from ISNI API
|
||||
|
||||
# do not show isni results for authors we already have in the DB
|
||||
exists = [
|
||||
i
|
||||
for i in isni_authors
|
||||
for a in author_matches
|
||||
if i["isni"] == a.isni
|
||||
]
|
||||
isni_matches = list(filter(lambda x: x not in exists, isni_authors))
|
||||
data["author_matches"].append(
|
||||
{
|
||||
"name": author.strip(),
|
||||
"matches": author_matches,
|
||||
"isni_matches": isni_matches,
|
||||
}
|
||||
)
|
||||
# TODO: check if an isni record matches an existing record
|
||||
# to bring these two records together
|
||||
|
||||
# we're creating a new book
|
||||
if not book:
|
||||
|
@ -157,7 +167,8 @@ class ConfirmEditBook(View):
|
|||
# otherwise it's a name with or without isni id
|
||||
isni = request.POST.get(f"isni_match-{i}")
|
||||
author_data = (
|
||||
get_author_isni_data(isni) if isni is not None
|
||||
get_author_isni_data(isni)
|
||||
if isni is not None
|
||||
else {"name": match}
|
||||
)
|
||||
author = models.Author.objects.create(**author_data)
|
||||
|
|
Loading…
Reference in a new issue