fix encoding

The OCLC server claims that the xml payload is encoded as latin1 (ISO-8859-1). This causes Requests to incorrectly encode things as latin1, when actually everything is (thank goodness) UTF-8. We can fix it by just telling Requests that it is really UTF-8 With thanks to Tex Texin, creator of http://i18nqa.com/debug/utf8-debug.html
2025-02-14 01:55:17 +00:00 · 2021-10-29 21:00:35 +11:00 · 2021-10-29 21:00:35 +11:00 · d87e1b1567
commit d87e1b1567
parent f2e4865adf
1 changed files with 5 additions and 1 deletions
--- a/bookwyrm/utils/isni.py
+++ b/bookwyrm/utils/isni.py
@ -1,5 +1,6 @@
 import requests
 import xml.etree.ElementTree as ET
+from xml.etree.ElementTree import XMLParser

 # get data
 base_string = "http://isni.oclc.org/sru/?query=pica.na+%3D+%22"
@ -15,8 +16,11 @@ def find_authors_by_name(names):
    names = url_stringify(names)
    query = base_string + names + suffix_string
    r = requests.get(query)
-    # parse xml
+    # the OCLC ISNI server asserts the payload is encoded
+    # in latin1, but we know better
+    r.encoding = "utf-8"
    payload = r.text
+    # parse xml
    root = ET.fromstring(payload)

    # build list of possible authors