From d87e1b1567da7d6990bc586289246974abdfaa06 Mon Sep 17 00:00:00 2001
From: Hugh Rundle <hugh@hughrundle.net>
Date: Fri, 29 Oct 2021 21:00:35 +1100
Subject: [PATCH] fix encoding

The OCLC server claims that the xml payload is encoded as latin1 (ISO-8859-1).
This causes Requests to incorrectly encode things as latin1, when actually everything is (thank goodness) UTF-8.
We can fix it by just telling Requests that it is really UTF-8

With thanks to Tex Texin, creator of http://i18nqa.com/debug/utf8-debug.html
---
 bookwyrm/utils/isni.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/bookwyrm/utils/isni.py b/bookwyrm/utils/isni.py
index 2fd87bebc..c905eb1c2 100644
--- a/bookwyrm/utils/isni.py
+++ b/bookwyrm/utils/isni.py
@@ -1,5 +1,6 @@
 import requests
 import xml.etree.ElementTree as ET
+from xml.etree.ElementTree import XMLParser
 
 # get data
 base_string = "http://isni.oclc.org/sru/?query=pica.na+%3D+%22"
@@ -15,8 +16,11 @@ def find_authors_by_name(names):
     names = url_stringify(names)
     query = base_string + names + suffix_string
     r = requests.get(query)
-    # parse xml
+    # the OCLC ISNI server asserts the payload is encoded
+    # in latin1, but we know better
+    r.encoding = "utf-8"
     payload = r.text
+    # parse xml
     root = ET.fromstring(payload)
 
     # build list of possible authors