mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2025-01-10 17:25:35 +00:00
fix encoding
The OCLC server claims that the xml payload is encoded as latin1 (ISO-8859-1). This causes Requests to incorrectly encode things as latin1, when actually everything is (thank goodness) UTF-8. We can fix it by just telling Requests that it is really UTF-8 With thanks to Tex Texin, creator of http://i18nqa.com/debug/utf8-debug.html
This commit is contained in:
parent
f2e4865adf
commit
d87e1b1567
1 changed files with 5 additions and 1 deletions
|
@ -1,5 +1,6 @@
|
|||
import requests
|
||||
import xml.etree.ElementTree as ET
|
||||
from xml.etree.ElementTree import XMLParser
|
||||
|
||||
# get data
|
||||
base_string = "http://isni.oclc.org/sru/?query=pica.na+%3D+%22"
|
||||
|
@ -15,8 +16,11 @@ def find_authors_by_name(names):
|
|||
names = url_stringify(names)
|
||||
query = base_string + names + suffix_string
|
||||
r = requests.get(query)
|
||||
# parse xml
|
||||
# the OCLC ISNI server asserts the payload is encoded
|
||||
# in latin1, but we know better
|
||||
r.encoding = "utf-8"
|
||||
payload = r.text
|
||||
# parse xml
|
||||
root = ET.fromstring(payload)
|
||||
|
||||
# build list of possible authors
|
||||
|
|
Loading…
Reference in a new issue