mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-16 13:16:33 +00:00
hide isni authors if isni in local db
Check the authors suggested from the local DB for a matching ISNI when pulling authors from ISNI. i.e. we do not want to suggest the same author twice when we *know* it is the same author.
This commit is contained in:
parent
bce5f3f3b5
commit
3cfd31f1fe
2 changed files with 43 additions and 29 deletions
|
@ -1,16 +1,17 @@
|
||||||
"""ISNI author checking utilities"""
|
"""ISNI author checking utilities"""
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from bookwyrm.settings import BASE_DIR
|
from bookwyrm.settings import BASE_DIR
|
||||||
|
|
||||||
|
|
||||||
def url_stringify(string):
|
def url_stringify(string):
|
||||||
"""replace spaces for url encoding"""
|
"""replace spaces for url encoding"""
|
||||||
|
|
||||||
# TODO: this is very lazy and incomplete
|
# TODO: this is very lazy and incomplete
|
||||||
return string.replace(" ", "+")
|
return string.replace(" ", "+")
|
||||||
|
|
||||||
def request_isni_data(search_index, search_term, maxRecords=10):
|
|
||||||
|
def request_isni_data(search_index, search_term, maxRecords=5):
|
||||||
"""Request data from the ISNI API"""
|
"""Request data from the ISNI API"""
|
||||||
|
|
||||||
search_string = url_stringify(search_term)
|
search_string = url_stringify(search_term)
|
||||||
|
@ -22,7 +23,7 @@ def request_isni_data(search_index, search_term, maxRecords=10):
|
||||||
"%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b",
|
"%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b",
|
||||||
"&maximumRecords=",
|
"&maximumRecords=",
|
||||||
str(maxRecords),
|
str(maxRecords),
|
||||||
"&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C"
|
"&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C",
|
||||||
]
|
]
|
||||||
query_url = "".join(query_parts)
|
query_url = "".join(query_parts)
|
||||||
result = requests.get(query_url)
|
result = requests.get(query_url)
|
||||||
|
@ -31,6 +32,7 @@ def request_isni_data(search_index, search_term, maxRecords=10):
|
||||||
result.encoding = "utf-8"
|
result.encoding = "utf-8"
|
||||||
return result.text
|
return result.text
|
||||||
|
|
||||||
|
|
||||||
def make_name_string(element):
|
def make_name_string(element):
|
||||||
"""create a string of form 'personal_name surname'"""
|
"""create a string of form 'personal_name surname'"""
|
||||||
|
|
||||||
|
@ -39,9 +41,10 @@ def make_name_string(element):
|
||||||
forename = element.find(".//forename")
|
forename = element.find(".//forename")
|
||||||
surname = element.find(".//surname")
|
surname = element.find(".//surname")
|
||||||
if forename is not None:
|
if forename is not None:
|
||||||
return "".join([forename.text," ",surname.text])
|
return "".join([forename.text, " ", surname.text])
|
||||||
return surname.text
|
return surname.text
|
||||||
|
|
||||||
|
|
||||||
def get_other_identifier(element, code):
|
def get_other_identifier(element, code):
|
||||||
"""Get other identifiers associated with an author from their ISNI record"""
|
"""Get other identifiers associated with an author from their ISNI record"""
|
||||||
|
|
||||||
|
@ -55,19 +58,18 @@ def get_other_identifier(element, code):
|
||||||
return section_head.find(".//identifier").text
|
return section_head.find(".//identifier").text
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def get_external_information_uri(element, match_string):
|
def get_external_information_uri(element, match_string):
|
||||||
"""Get URLs associated with an author from their ISNI record"""
|
"""Get URLs associated with an author from their ISNI record"""
|
||||||
|
|
||||||
sources = element.findall(".//externalInformation")
|
sources = element.findall(".//externalInformation")
|
||||||
for source in sources:
|
for source in sources:
|
||||||
uri = source.find(".//URI")
|
uri = source.find(".//URI")
|
||||||
if (
|
if uri is not None and uri.text.find(match_string) is not None:
|
||||||
uri is not None
|
|
||||||
and uri.text.find(match_string) is not None
|
|
||||||
):
|
|
||||||
return uri.text
|
return uri.text
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def find_authors_by_name(name_string):
|
def find_authors_by_name(name_string):
|
||||||
"""Query the ISNI database for possible author matches by name"""
|
"""Query the ISNI database for possible author matches by name"""
|
||||||
|
|
||||||
|
@ -94,6 +96,7 @@ def find_authors_by_name(name_string):
|
||||||
|
|
||||||
return possible_authors
|
return possible_authors
|
||||||
|
|
||||||
|
|
||||||
def get_author_isni_data(isni):
|
def get_author_isni_data(isni):
|
||||||
|
|
||||||
payload = request_isni_data("pica.isn", isni)
|
payload = request_isni_data("pica.isn", isni)
|
||||||
|
@ -110,9 +113,9 @@ def get_author_isni_data(isni):
|
||||||
author["viaf_id"] = get_other_identifier(element, "viaf")
|
author["viaf_id"] = get_other_identifier(element, "viaf")
|
||||||
author["wikipedia_link"] = get_external_information_uri(element, "Wikipedia")
|
author["wikipedia_link"] = get_external_information_uri(element, "Wikipedia")
|
||||||
author["bio"] = bio.text if bio is not None else ""
|
author["bio"] = bio.text if bio is not None else ""
|
||||||
author["aliases"] = [] # CHECK can we send a list for this?
|
author["aliases"] = []
|
||||||
aliases = element.findall(".//personalNameVariant")
|
aliases = element.findall(".//personalNameVariant")
|
||||||
for entry in aliases:
|
for entry in aliases:
|
||||||
author["aliases"].append( make_name_string(entry) )
|
author["aliases"].append(make_name_string(entry))
|
||||||
|
|
||||||
return author
|
return author
|
||||||
|
|
|
@ -57,22 +57,32 @@ class EditBook(View):
|
||||||
"aliases", weight="B"
|
"aliases", weight="B"
|
||||||
)
|
)
|
||||||
|
|
||||||
data["author_matches"].append(
|
author_matches = (
|
||||||
{
|
|
||||||
"name": author.strip(),
|
|
||||||
"matches": (
|
|
||||||
models.Author.objects.annotate(search=vector)
|
models.Author.objects.annotate(search=vector)
|
||||||
.annotate(rank=SearchRank(vector, author))
|
.annotate(rank=SearchRank(vector, author))
|
||||||
.filter(rank__gt=0.4)
|
.filter(rank__gt=0.4)
|
||||||
.order_by("-rank")[:5]
|
.order_by("-rank")[:5]
|
||||||
),
|
)
|
||||||
"isni_matches": find_authors_by_name(
|
|
||||||
|
isni_authors = find_authors_by_name(
|
||||||
author
|
author
|
||||||
), # find matches from ISNI API
|
) # find matches from ISNI API
|
||||||
|
|
||||||
|
# do not show isni results for authors we already have in the DB
|
||||||
|
exists = [
|
||||||
|
i
|
||||||
|
for i in isni_authors
|
||||||
|
for a in author_matches
|
||||||
|
if i["isni"] == a.isni
|
||||||
|
]
|
||||||
|
isni_matches = list(filter(lambda x: x not in exists, isni_authors))
|
||||||
|
data["author_matches"].append(
|
||||||
|
{
|
||||||
|
"name": author.strip(),
|
||||||
|
"matches": author_matches,
|
||||||
|
"isni_matches": isni_matches,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
# TODO: check if an isni record matches an existing record
|
|
||||||
# to bring these two records together
|
|
||||||
|
|
||||||
# we're creating a new book
|
# we're creating a new book
|
||||||
if not book:
|
if not book:
|
||||||
|
@ -157,7 +167,8 @@ class ConfirmEditBook(View):
|
||||||
# otherwise it's a name with or without isni id
|
# otherwise it's a name with or without isni id
|
||||||
isni = request.POST.get(f"isni_match-{i}")
|
isni = request.POST.get(f"isni_match-{i}")
|
||||||
author_data = (
|
author_data = (
|
||||||
get_author_isni_data(isni) if isni is not None
|
get_author_isni_data(isni)
|
||||||
|
if isni is not None
|
||||||
else {"name": match}
|
else {"name": match}
|
||||||
)
|
)
|
||||||
author = models.Author.objects.create(**author_data)
|
author = models.Author.objects.create(**author_data)
|
||||||
|
|
Loading…
Reference in a new issue