moviewyrm/bookwyrm/tests/connectors/test_openlibrary_connector.py

236 lines
9.5 KiB
Python
Raw Normal View History

2021-03-08 16:49:10 +00:00
""" testing book data connectors """
import json
import pathlib
from unittest.mock import patch
2020-12-20 00:26:47 +00:00
from django.test import TestCase
import responses
from bookwyrm import models
2021-09-16 19:41:30 +00:00
from bookwyrm.book_search import SearchResult
from bookwyrm.connectors.openlibrary import Connector
from bookwyrm.connectors.openlibrary import ignore_edition
from bookwyrm.connectors.openlibrary import get_languages, get_description
2021-03-08 16:49:10 +00:00
from bookwyrm.connectors.openlibrary import pick_default_edition, get_openlibrary_key
2021-01-02 16:14:28 +00:00
from bookwyrm.connectors.connector_manager import ConnectorException
class Openlibrary(TestCase):
2021-04-26 16:15:42 +00:00
"""test loading data from openlibrary.org"""
2021-03-08 16:49:10 +00:00
def setUp(self):
2021-04-26 16:15:42 +00:00
"""creates the connector we'll use"""
models.Connector.objects.create(
2021-03-08 16:49:10 +00:00
identifier="openlibrary.org",
name="OpenLibrary",
connector_file="openlibrary",
base_url="https://openlibrary.org",
books_url="https://openlibrary.org",
covers_url="https://covers.openlibrary.org",
search_url="https://openlibrary.org/search?q=",
isbn_search_url="https://openlibrary.org/isbn",
)
2021-03-08 16:49:10 +00:00
self.connector = Connector("openlibrary.org")
2021-03-08 16:49:10 +00:00
work_file = pathlib.Path(__file__).parent.joinpath("../data/ol_work.json")
edition_file = pathlib.Path(__file__).parent.joinpath("../data/ol_edition.json")
2020-05-10 21:12:03 +00:00
edition_list_file = pathlib.Path(__file__).parent.joinpath(
2021-03-08 16:49:10 +00:00
"../data/ol_edition_list.json"
)
self.work_data = json.loads(work_file.read_bytes())
self.edition_data = json.loads(edition_file.read_bytes())
2020-05-10 21:12:03 +00:00
self.edition_list_data = json.loads(edition_list_file.read_bytes())
def test_get_remote_id_from_data(self):
2021-04-26 16:15:42 +00:00
"""format the remote id from the data"""
2021-03-08 16:49:10 +00:00
data = {"key": "/work/OL1234W"}
result = self.connector.get_remote_id_from_data(data)
2021-03-08 16:49:10 +00:00
self.assertEqual(result, "https://openlibrary.org/work/OL1234W")
# error handlding
with self.assertRaises(ConnectorException):
self.connector.get_remote_id_from_data({})
2020-05-10 21:12:03 +00:00
def test_is_work_data(self):
2021-04-26 16:15:42 +00:00
"""detect if the loaded json is a work"""
2020-05-10 21:12:03 +00:00
self.assertEqual(self.connector.is_work_data(self.work_data), True)
self.assertEqual(self.connector.is_work_data(self.edition_data), False)
@responses.activate
def test_get_edition_from_work_data(self):
2021-04-26 16:15:42 +00:00
"""loads a list of editions"""
2021-03-08 16:49:10 +00:00
data = {"key": "/work/OL1234W"}
responses.add(
responses.GET,
2021-03-08 16:49:10 +00:00
"https://openlibrary.org/work/OL1234W/editions",
json={"entries": []},
status=200,
)
with patch(
"bookwyrm.connectors.openlibrary.pick_default_edition"
) as pick_edition:
pick_edition.return_value = "hi"
result = self.connector.get_edition_from_work_data(data)
2021-03-08 16:49:10 +00:00
self.assertEqual(result, "hi")
@responses.activate
def test_get_work_from_edition_data(self):
2021-04-26 16:15:42 +00:00
"""loads a list of editions"""
2021-03-08 16:49:10 +00:00
data = {"works": [{"key": "/work/OL1234W"}]}
responses.add(
responses.GET,
2021-03-08 16:49:10 +00:00
"https://openlibrary.org/work/OL1234W",
json={"hi": "there"},
status=200,
)
result = self.connector.get_work_from_edition_data(data)
2021-03-08 16:49:10 +00:00
self.assertEqual(result, {"hi": "there"})
@responses.activate
def test_get_authors_from_data(self):
2021-04-26 16:15:42 +00:00
"""find authors in data"""
responses.add(
responses.GET,
2021-03-08 16:49:10 +00:00
"https://openlibrary.org/authors/OL382982A",
2021-02-17 16:35:17 +00:00
json={
"name": "George Elliott",
"personal_name": "George Elliott",
"last_modified": {
"type": "/type/datetime",
2021-03-08 16:49:10 +00:00
"value": "2008-08-31 10:09:33.413686",
},
2021-12-07 21:58:23 +00:00
"remote_ids": {
"isni": "000111",
},
2021-02-17 16:35:17 +00:00
"key": "/authors/OL453734A",
2021-03-08 16:49:10 +00:00
"type": {"key": "/type/author"},
2021-02-17 16:35:17 +00:00
"id": 1259965,
2021-03-08 16:49:10 +00:00
"revision": 2,
2021-02-17 16:35:17 +00:00
},
2021-03-08 16:49:10 +00:00
status=200,
)
results = self.connector.get_authors_from_data(self.work_data)
2021-02-17 16:35:17 +00:00
result = list(results)[0]
self.assertIsInstance(result, models.Author)
2021-03-08 16:49:10 +00:00
self.assertEqual(result.name, "George Elliott")
self.assertEqual(result.openlibrary_key, "OL453734A")
2021-12-07 21:58:23 +00:00
self.assertEqual(result.isni, "000111")
def test_get_cover_url(self):
2021-04-26 16:15:42 +00:00
"""formats a url that should contain the cover image"""
2021-03-08 16:49:10 +00:00
blob = ["image"]
result = self.connector.get_cover_url(blob)
2021-03-08 16:49:10 +00:00
self.assertEqual(result, "https://covers.openlibrary.org/b/id/image-L.jpg")
def test_parse_search_result(self):
2021-04-26 16:15:42 +00:00
"""translate json from openlibrary into SearchResult"""
2021-03-08 16:49:10 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json")
search_data = json.loads(datafile.read_bytes())
2022-05-31 00:00:34 +00:00
result = list(self.connector.parse_search_data(search_data, 0))[0]
self.assertIsInstance(result, SearchResult)
2021-03-08 16:49:10 +00:00
self.assertEqual(result.title, "This Is How You Lose the Time War")
self.assertEqual(result.key, "https://openlibrary.org/works/OL20639540W")
self.assertEqual(result.author, "Amal El-Mohtar, Max Gladstone")
self.assertEqual(result.year, 2019)
self.assertEqual(result.connector, self.connector)
2021-03-01 20:09:21 +00:00
def test_parse_isbn_search_result(self):
2021-04-26 16:15:42 +00:00
"""extract the results from the search json response"""
2021-03-08 16:49:10 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_isbn_search.json")
2021-03-01 20:09:21 +00:00
search_data = json.loads(datafile.read_bytes())
2022-05-31 00:00:34 +00:00
result = list(self.connector.parse_isbn_search_data(search_data))
2021-03-01 20:09:21 +00:00
self.assertEqual(len(result), 1)
2022-05-31 00:00:34 +00:00
result = result[0]
2021-03-01 20:09:21 +00:00
self.assertIsInstance(result, SearchResult)
2021-03-08 16:49:10 +00:00
self.assertEqual(result.title, "Les ombres errantes")
self.assertEqual(result.key, "https://openlibrary.org/books/OL16262504M")
self.assertEqual(result.author, "Pascal Quignard")
self.assertEqual(result.year, "2002")
2021-03-01 20:09:21 +00:00
self.assertEqual(result.connector, self.connector)
@responses.activate
def test_load_edition_data(self):
2021-04-26 16:15:42 +00:00
"""format url from key and make request"""
2021-03-08 16:49:10 +00:00
key = "OL1234W"
responses.add(
responses.GET,
2021-03-08 16:49:10 +00:00
"https://openlibrary.org/works/OL1234W/editions",
json={"hi": "there"},
)
result = self.connector.load_edition_data(key)
2021-03-08 16:49:10 +00:00
self.assertEqual(result, {"hi": "there"})
@responses.activate
def test_expand_book_data(self):
2021-04-26 16:15:42 +00:00
"""given a book, get more editions"""
2021-08-02 23:07:39 +00:00
work = models.Work.objects.create(title="Test Work", openlibrary_key="OL1234W")
edition = models.Edition.objects.create(title="Test Edition", parent_work=work)
responses.add(
responses.GET,
2021-03-08 16:49:10 +00:00
"https://openlibrary.org/works/OL1234W/editions",
json={"entries": []},
)
2021-08-02 23:05:40 +00:00
with patch(
"bookwyrm.connectors.abstract_connector.AbstractConnector."
"create_edition_from_data"
):
self.connector.expand_book_data(edition)
self.connector.expand_book_data(work)
def test_get_description(self):
2021-04-26 16:15:42 +00:00
"""should do some cleanup on the description data"""
2021-03-08 16:49:10 +00:00
description = get_description(self.work_data["description"])
expected = "First in the Old Kingdom/Abhorsen series."
self.assertEqual(description, expected)
def test_get_openlibrary_key(self):
2021-04-26 16:15:42 +00:00
"""extracts the uuid"""
2021-03-08 16:49:10 +00:00
key = get_openlibrary_key("/books/OL27320736M")
self.assertEqual(key, "OL27320736M")
def test_get_languages(self):
2021-04-26 16:15:42 +00:00
"""looks up languages from a list"""
2021-03-08 16:49:10 +00:00
languages = get_languages(self.edition_data["languages"])
self.assertEqual(languages, ["English"])
2020-05-10 23:41:24 +00:00
def test_pick_default_edition(self):
2021-04-26 16:15:42 +00:00
"""detect if the loaded json is an edition"""
2021-03-08 16:49:10 +00:00
edition = pick_default_edition(self.edition_list_data["entries"])
self.assertEqual(edition["key"], "/books/OL9788823M")
2021-01-31 01:36:24 +00:00
2021-02-03 20:52:13 +00:00
@responses.activate
2021-01-31 01:36:24 +00:00
def test_create_edition_from_data(self):
2021-04-26 16:15:42 +00:00
"""okay but can it actually create an edition with proper metadata"""
2021-03-08 16:49:10 +00:00
work = models.Work.objects.create(title="Hello")
2021-02-03 20:52:13 +00:00
responses.add(
responses.GET,
2021-03-08 16:49:10 +00:00
"https://openlibrary.org/authors/OL382982A",
json={"hi": "there"},
status=200,
)
2021-08-02 23:05:40 +00:00
with patch(
"bookwyrm.connectors.openlibrary.Connector.get_authors_from_data"
2021-08-02 23:05:40 +00:00
) as mock:
mock.return_value = []
2021-08-02 23:07:39 +00:00
result = self.connector.create_edition_from_data(work, self.edition_data)
2021-01-31 01:36:24 +00:00
self.assertEqual(result.parent_work, work)
2021-03-08 16:49:10 +00:00
self.assertEqual(result.title, "Sabriel")
self.assertEqual(result.isbn_10, "0060273224")
2021-01-31 01:36:24 +00:00
self.assertIsNotNone(result.description)
2021-03-08 16:49:10 +00:00
self.assertEqual(result.languages[0], "English")
self.assertEqual(result.publishers[0], "Harper Trophy")
2021-01-31 01:36:24 +00:00
self.assertEqual(result.pages, 491)
2021-03-08 16:49:10 +00:00
self.assertEqual(result.subjects[0], "Fantasy.")
self.assertEqual(result.physical_format, "Hardcover")
def test_ignore_edition(self):
2021-04-26 16:15:42 +00:00
"""skip editions with poor metadata"""
self.assertFalse(ignore_edition({"isbn_13": "hi"}))
self.assertFalse(ignore_edition({"oclc_numbers": "hi"}))
self.assertFalse(ignore_edition({"covers": "hi"}))
self.assertFalse(ignore_edition({"languages": "languages/fr"}))
self.assertTrue(ignore_edition({"languages": "languages/eng"}))
self.assertTrue(ignore_edition({"format": "paperback"}))