code cleanup

This commit is contained in:
Hugh Rundle 2021-11-22 12:52:59 +11:00
parent 09c5a3861b
commit a9d921cc06
No known key found for this signature in database
GPG key ID: CD23D6039184286B
3 changed files with 33 additions and 35 deletions

View file

@ -92,7 +92,7 @@ def get_isni(existing, author, autoescape=True):
if len(existing) == 0: if len(existing) == 0:
return "" return ""
for value in existing: for value in existing:
if hasattr(value, "isni")and auth_isni == re.sub(r"\D", "", str(value.isni)): if hasattr(value, "isni") and auth_isni == re.sub(r"\D", "", str(value.isni)):
isni = value.isni isni = value.isni
return mark_safe( return mark_safe(
f'<input type="text" name="isni-for-{author.id}" value="{isni}" hidden>' f'<input type="text" name="isni-for-{author.id}" value="{isni}" hidden>'

View file

@ -4,6 +4,7 @@ import requests
from bookwyrm import activitypub, models from bookwyrm import activitypub, models
def request_isni_data(search_index, search_term, max_records=5): def request_isni_data(search_index, search_term, max_records=5):
"""Request data from the ISNI API""" """Request data from the ISNI API"""
@ -18,11 +19,7 @@ def request_isni_data(search_index, search_term, max_records=5):
"recordPacking": "xml", "recordPacking": "xml",
"sortKeys": "RLV,pica,0,,", "sortKeys": "RLV,pica,0,,",
} }
result = requests.get( result = requests.get("http://isni.oclc.org/sru/", params=query_params, timeout=10)
"http://isni.oclc.org/sru/",
params=query_params,
timeout=10
)
# the OCLC ISNI server asserts the payload is encoded # the OCLC ISNI server asserts the payload is encoded
# in latin1, but we know better # in latin1, but we know better
result.encoding = "utf-8" result.encoding = "utf-8"
@ -58,10 +55,9 @@ def get_other_identifier(element, code):
for source in element.findall(".//sources"): for source in element.findall(".//sources"):
code_of_source = source.find(".//codeOfSource") code_of_source = source.find(".//codeOfSource")
if ( if (
code_of_source is not None code_of_source is not None
and code_of_source.text == code.upper() and code_of_source.text.lower() == code.lower()
or code_of_source.text == code.lower() ):
):
return source.find(".//sourceIdentifier").text return source.find(".//sourceIdentifier").text
return "" return ""
@ -75,10 +71,10 @@ def get_external_information_uri(element, match_string):
information = source.find(".//information") information = source.find(".//information")
uri = source.find(".//URI") uri = source.find(".//URI")
if ( if (
uri is not None uri is not None
and information is not None and information is not None
and information.text.lower() == match_string.lower() and information.text.lower() == match_string.lower()
): ):
return uri.text return uri.text
return "" return ""
@ -112,12 +108,14 @@ def find_authors_by_name(name_string, description=False):
titles.append(element.find(".//title")) titles.append(element.find(".//title"))
if titles is not None: if titles is not None:
# some of the "titles" in ISNI are a little ...iffy # some of the "titles" in ISNI are a little ...iffy
# '@' is used by ISNI/OCLC to index the starting point ignoring stop words # '@' is used by ISNI/OCLC to index the starting point ignoring stop words
# (e.g. "The @Government of no one") # (e.g. "The @Government of no one")
title_elements = [e for e in titles if not e.text.replace('@', '').isnumeric()] title_elements = [
e for e in titles if not e.text.replace("@", "").isnumeric()
]
if len(title_elements): if len(title_elements):
author.bio = title_elements[0].text.replace('@', '') author.bio = title_elements[0].text.replace("@", "")
else: else:
author.bio = None author.bio = None
@ -149,24 +147,25 @@ def get_author_from_isni(isni):
wikipedia = get_external_information_uri(element, "Wikipedia") wikipedia = get_external_information_uri(element, "Wikipedia")
author = activitypub.Author( author = activitypub.Author(
id=element.find(".//isniURI").text, id=element.find(".//isniURI").text,
name=name, name=name,
isni=isni, isni=isni,
viafId=viaf, viafId=viaf,
aliases=aliases, aliases=aliases,
bio=bio, bio=bio,
wikipediaLink=wikipedia wikipediaLink=wikipedia,
) )
return author return author
def build_author_from_isni(match_value): def build_author_from_isni(match_value):
"""Build dict with basic author details from ISNI or author name""" """Build basic author class object from ISNI URL"""
# if it is an isni value get the data # if it is an isni value get the data
if match_value.startswith("https://isni.org/isni/"): if match_value.startswith("https://isni.org/isni/"):
isni = match_value.replace("https://isni.org/isni/", "") isni = match_value.replace("https://isni.org/isni/", "")
return { "author": get_author_from_isni(isni) } return {"author": get_author_from_isni(isni)}
# otherwise it's a name string # otherwise it's a name string
return {} return {}
@ -177,7 +176,7 @@ def augment_author_metadata(author, isni):
isni_author = get_author_from_isni(isni) isni_author = get_author_from_isni(isni)
isni_author.to_model(model=models.Author, instance=author, overwrite=False) isni_author.to_model(model=models.Author, instance=author, overwrite=False)
# we DO want to overwrite aliases because we're adding them to the # we DO want to overwrite aliases because we're adding them to the
# existing aliases and ISNI will usually have more. # existing aliases and ISNI will usually have more.
# We need to dedupe because ISNI records often have lots of dupe aliases # We need to dedupe because ISNI records often have lots of dupe aliases
aliases = set(isni_author.aliases) aliases = set(isni_author.aliases)

View file

@ -12,6 +12,7 @@ from django.utils.decorators import method_decorator
from django.views import View from django.views import View
from bookwyrm import book_search, forms, models from bookwyrm import book_search, forms, models
# from bookwyrm.activitypub.base_activity import ActivityObject # from bookwyrm.activitypub.base_activity import ActivityObject
from bookwyrm.utils.isni import ( from bookwyrm.utils.isni import (
find_authors_by_name, find_authors_by_name,
@ -72,8 +73,7 @@ class EditBook(View):
) )
isni_authors = find_authors_by_name( isni_authors = find_authors_by_name(
author, author, description=True
description=True
) # find matches from ISNI API ) # find matches from ISNI API
# dedupe isni authors we already have in the DB # dedupe isni authors we already have in the DB
@ -187,12 +187,11 @@ class ConfirmEditBook(View):
author_object = build_author_from_isni(isni_match) author_object = build_author_from_isni(isni_match)
# with author data class from isni id # with author data class from isni id
if "author" in author_object: if "author" in author_object:
# TESTING skeleton = models.Author.objects.create(
skeleton = models.Author.objects.create(name=author_object["author"].name) name=author_object["author"].name
)
author = author_object["author"].to_model( author = author_object["author"].to_model(
model=models.Author, model=models.Author, overwrite=True, instance=skeleton
overwrite=True,
instance=skeleton
) )
else: else:
# or it's just a name # or it's just a name