diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index c59829d6..a1155d2d 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -68,7 +68,7 @@ class Connector(AbstractConnector): key = data['key'] except KeyError: raise ConnectorException('Invalid book data') - return '%s/%s' % (self.books_url, key) + return '%s%s' % (self.books_url, key) def is_work_data(self, data): @@ -80,7 +80,7 @@ class Connector(AbstractConnector): key = data['key'] except KeyError: raise ConnectorException('Invalid book data') - url = '%s/%s/editions' % (self.books_url, key) + url = '%s%s/editions' % (self.books_url, key) data = get_data(url) return pick_default_edition(data['entries']) @@ -90,7 +90,7 @@ class Connector(AbstractConnector): key = data['works'][0]['key'] except (IndexError, KeyError): raise ConnectorException('No work found for edition') - url = '%s/%s' % (self.books_url, key) + url = '%s%s' % (self.books_url, key) return get_data(url) @@ -100,7 +100,7 @@ class Connector(AbstractConnector): author_blob = author_blob.get('author', author_blob) # this id is "/authors/OL1234567A" author_id = author_blob['key'] - url = '%s/%s.json' % (self.base_url, author_id) + url = '%s%s' % (self.base_url, author_id) yield self.get_or_create_author(url) @@ -130,7 +130,7 @@ class Connector(AbstractConnector): def load_edition_data(self, olkey): ''' query openlibrary for editions of a work ''' - url = '%s/works/%s/editions.json' % (self.books_url, olkey) + url = '%s/works/%s/editions' % (self.books_url, olkey) return get_data(url) @@ -150,7 +150,7 @@ def get_description(description_blob): ''' descriptions can be a string or a dict ''' if isinstance(description_blob, dict): return description_blob.get('value') - return description_blob + return description_blob def get_openlibrary_key(key): diff --git a/bookwyrm/models/base_model.py b/bookwyrm/models/base_model.py index d82334e1..b212d693 100644 --- a/bookwyrm/models/base_model.py +++ b/bookwyrm/models/base_model.py @@ -109,7 +109,7 @@ class ActivitypubMixin: not field.deduplication_field: continue - value = data.get(field.activitypub_field) + value = data.get(field.get_activitypub_field()) if not value: continue filters.append({field.name: value}) diff --git a/bookwyrm/tests/connectors/test_abstract_connector.py b/bookwyrm/tests/connectors/test_abstract_connector.py index cab2f834..6bcfc76b 100644 --- a/bookwyrm/tests/connectors/test_abstract_connector.py +++ b/bookwyrm/tests/connectors/test_abstract_connector.py @@ -4,16 +4,14 @@ import responses from bookwyrm import models from bookwyrm.connectors import abstract_connector -from bookwyrm.connectors.abstract_connector import Mapping, SearchResult -from bookwyrm.connectors.openlibrary import Connector +from bookwyrm.connectors.abstract_connector import Mapping +from bookwyrm.settings import DOMAIN class AbstractConnector(TestCase): ''' generic code for connecting to outside data sources ''' def setUp(self): ''' we need an example connector ''' - self.book = models.Edition.objects.create(title='Example Edition') - self.connector_info = models.Connector.objects.create( identifier='example.com', connector_file='openlibrary', @@ -22,107 +20,92 @@ class AbstractConnector(TestCase): covers_url='https://example.com/covers', search_url='https://example.com/search?q=', ) - self.connector = Connector('example.com') - - self.data = { - 'title': 'Unused title', - 'ASIN': 'A00BLAH', - 'isbn_10': '1234567890', - 'isbn_13': 'blahhh', - 'blah': 'bip', - 'format': 'hardcover', - 'series': ['one', 'two'], + work_data = { + 'id': 'abc1', + 'title': 'Test work', + 'type': 'work', + 'openlibraryKey': 'OL1234W', } - self.connector.key_mappings = [ - Mapping('isbn_10'), - Mapping('isbn_13'), - Mapping('lccn'), - Mapping('asin'), - ] + self.work_data = work_data + edition_data = { + 'id': 'abc2', + 'title': 'Test edition', + 'type': 'edition', + 'openlibraryKey': 'OL1234M', + } + self.edition_data = edition_data - - def test_abstract_minimal_connector_init(self): - ''' barebones connector for search with defaults ''' - class TestConnector(abstract_connector.AbstractMinimalConnector): + class TestConnector(abstract_connector.AbstractConnector): ''' nothing added here ''' - def format_search_result(): - pass - def get_or_create_book(): - pass - def parse_search_data(): - pass - - connector = TestConnector('example.com') - self.assertEqual(connector.connector, self.connector_info) - self.assertEqual(connector.base_url, 'https://example.com') - self.assertEqual(connector.books_url, 'https://example.com/books') - self.assertEqual(connector.covers_url, 'https://example.com/covers') - self.assertEqual(connector.search_url, 'https://example.com/search?q=') - self.assertIsNone(connector.name), - self.assertEqual(connector.identifier, 'example.com'), - self.assertIsNone(connector.max_query_count) - self.assertFalse(connector.local) - - - @responses.activate - def test_abstract_minimal_connector_search(self): - ''' makes an http request to the outside service ''' - class TestConnector(abstract_connector.AbstractMinimalConnector): - ''' nothing added here ''' - def format_search_result(self, data): - return data - def get_or_create_book(self, data): - pass + def format_search_result(self, search_result): + return search_result def parse_search_data(self, data): return data - connector = TestConnector('example.com') + def is_work_data(self, data): + return data['type'] == 'work' + def get_edition_from_work_data(self, data): + return edition_data + def get_work_from_edition_data(self, data): + return work_data + def get_authors_from_data(self, data): + return [] + def expand_book_data(self, book): + pass + self.connector = TestConnector('example.com') + self.connector.book_mappings = [ + Mapping('id'), + Mapping('title'), + Mapping('openlibraryKey'), + ] + + self.book = models.Edition.objects.create( + title='Test Book', remote_id='https://example.com/book/1234', + openlibrary_key='OL1234M') + + + def test_abstract_connector_init(self): + ''' barebones connector for search with defaults ''' + self.assertIsInstance(self.connector.book_mappings, list) + + + def test_is_available(self): + ''' this isn't used.... ''' + self.assertTrue(self.connector.is_available()) + self.connector.max_query_count = 1 + self.connector.connector.query_count = 2 + self.assertFalse(self.connector.is_available()) + + + def test_get_or_create_book_existing(self): + ''' find an existing book by remote/origin id ''' + self.assertEqual(models.Book.objects.count(), 1) + self.assertEqual( + self.book.remote_id, 'https://%s/book/%d' % (DOMAIN, self.book.id)) + self.assertEqual( + self.book.origin_id, 'https://example.com/book/1234') + + # dedupe by origin id + result = self.connector.get_or_create_book( + 'https://example.com/book/1234') + self.assertEqual(models.Book.objects.count(), 1) + self.assertEqual(result, self.book) + + # dedupe by remote id + result = self.connector.get_or_create_book( + 'https://%s/book/%d' % (DOMAIN, self.book.id)) + self.assertEqual(models.Book.objects.count(), 1) + self.assertEqual(result, self.book) + + @responses.activate + def test_get_or_create_book_deduped(self): + ''' load remote data and deduplicate ''' responses.add( responses.GET, - 'https://example.com/search?q=a%20book%20title', - json=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l'], - status=200) - results = connector.search('a book title') - self.assertEqual(len(results), 10) - self.assertEqual(results[0], 'a') - self.assertEqual(results[1], 'b') - self.assertEqual(results[2], 'c') - - - def test_search_result(self): - ''' a class that stores info about a search result ''' - result = SearchResult( - title='Title', - key='https://example.com/book/1', - author='Author Name', - year='1850', - connector=self.connector, + 'https://example.com/book/abcd', + json=self.edition_data ) - # there's really not much to test here, it's just a dataclass - self.assertEqual(result.confidence, 1) - self.assertEqual(result.title, 'Title') - - - def test_create_mapping(self): - ''' maps remote fields for book data to bookwyrm activitypub fields ''' - mapping = Mapping('isbn') - self.assertEqual(mapping.local_field, 'isbn') - self.assertEqual(mapping.remote_field, 'isbn') - self.assertEqual(mapping.formatter('bb'), 'bb') - - - def test_create_mapping_with_remote(self): - ''' the remote field is different than the local field ''' - mapping = Mapping('isbn', remote_field='isbn13') - self.assertEqual(mapping.local_field, 'isbn') - self.assertEqual(mapping.remote_field, 'isbn13') - self.assertEqual(mapping.formatter('bb'), 'bb') - - - def test_create_mapping_with_formatter(self): - ''' a function is provided to modify the data ''' - formatter = lambda x: 'aa' + x - mapping = Mapping('isbn', formatter=formatter) - self.assertEqual(mapping.local_field, 'isbn') - self.assertEqual(mapping.remote_field, 'isbn') - self.assertEqual(mapping.formatter, formatter) - self.assertEqual(mapping.formatter('bb'), 'aabb') + result = self.connector.get_or_create_book( + 'https://example.com/book/abcd') + self.assertEqual(result, self.book) + self.assertEqual(models.Edition.objects.count(), 1) + self.assertEqual(models.Edition.objects.count(), 1) diff --git a/bookwyrm/tests/connectors/test_abstract_minimal_connector.py b/bookwyrm/tests/connectors/test_abstract_minimal_connector.py new file mode 100644 index 00000000..0c6d2535 --- /dev/null +++ b/bookwyrm/tests/connectors/test_abstract_minimal_connector.py @@ -0,0 +1,100 @@ +''' testing book data connectors ''' +from django.test import TestCase +import responses + +from bookwyrm import models +from bookwyrm.connectors import abstract_connector +from bookwyrm.connectors.abstract_connector import Mapping, SearchResult + + +class AbstractConnector(TestCase): + ''' generic code for connecting to outside data sources ''' + def setUp(self): + ''' we need an example connector ''' + self.connector_info = models.Connector.objects.create( + identifier='example.com', + connector_file='openlibrary', + base_url='https://example.com', + books_url='https://example.com/books', + covers_url='https://example.com/covers', + search_url='https://example.com/search?q=', + ) + + class TestConnector(abstract_connector.AbstractMinimalConnector): + ''' nothing added here ''' + def format_search_result(self, search_result): + return search_result + def get_or_create_book(self, remote_id): + pass + def parse_search_data(self, data): + return data + self.test_connector = TestConnector('example.com') + + + def test_abstract_minimal_connector_init(self): + ''' barebones connector for search with defaults ''' + connector = self.test_connector + self.assertEqual(connector.connector, self.connector_info) + self.assertEqual(connector.base_url, 'https://example.com') + self.assertEqual(connector.books_url, 'https://example.com/books') + self.assertEqual(connector.covers_url, 'https://example.com/covers') + self.assertEqual(connector.search_url, 'https://example.com/search?q=') + self.assertIsNone(connector.name) + self.assertEqual(connector.identifier, 'example.com') + self.assertIsNone(connector.max_query_count) + self.assertFalse(connector.local) + + + @responses.activate + def test_search(self): + ''' makes an http request to the outside service ''' + responses.add( + responses.GET, + 'https://example.com/search?q=a%20book%20title', + json=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l'], + status=200) + results = self.test_connector.search('a book title') + self.assertEqual(len(results), 10) + self.assertEqual(results[0], 'a') + self.assertEqual(results[1], 'b') + self.assertEqual(results[2], 'c') + + + def test_search_result(self): + ''' a class that stores info about a search result ''' + result = SearchResult( + title='Title', + key='https://example.com/book/1', + author='Author Name', + year='1850', + connector=self.test_connector, + ) + # there's really not much to test here, it's just a dataclass + self.assertEqual(result.confidence, 1) + self.assertEqual(result.title, 'Title') + + + def test_create_mapping(self): + ''' maps remote fields for book data to bookwyrm activitypub fields ''' + mapping = Mapping('isbn') + self.assertEqual(mapping.local_field, 'isbn') + self.assertEqual(mapping.remote_field, 'isbn') + self.assertEqual(mapping.formatter('bb'), 'bb') + + + def test_create_mapping_with_remote(self): + ''' the remote field is different than the local field ''' + mapping = Mapping('isbn', remote_field='isbn13') + self.assertEqual(mapping.local_field, 'isbn') + self.assertEqual(mapping.remote_field, 'isbn13') + self.assertEqual(mapping.formatter('bb'), 'bb') + + + def test_create_mapping_with_formatter(self): + ''' a function is provided to modify the data ''' + formatter = lambda x: 'aa' + x + mapping = Mapping('isbn', formatter=formatter) + self.assertEqual(mapping.local_field, 'isbn') + self.assertEqual(mapping.remote_field, 'isbn') + self.assertEqual(mapping.formatter, formatter) + self.assertEqual(mapping.formatter('bb'), 'aabb') diff --git a/bookwyrm/tests/connectors/test_openlibrary_connector.py b/bookwyrm/tests/connectors/test_openlibrary_connector.py index e2d54cd3..437b23dc 100644 --- a/bookwyrm/tests/connectors/test_openlibrary_connector.py +++ b/bookwyrm/tests/connectors/test_openlibrary_connector.py @@ -1,9 +1,10 @@ ''' testing book data connectors ''' import json import pathlib -from dateutil import parser +from unittest.mock import patch + from django.test import TestCase -import pytz +import responses from bookwyrm import models from bookwyrm.connectors.openlibrary import Connector @@ -11,10 +12,13 @@ from bookwyrm.connectors.openlibrary import get_languages, get_description from bookwyrm.connectors.openlibrary import pick_default_edition, \ get_openlibrary_key from bookwyrm.connectors.abstract_connector import SearchResult +from bookwyrm.connectors.abstract_connector import ConnectorException class Openlibrary(TestCase): + ''' test loading data from openlibrary.org ''' def setUp(self): + ''' creates the connector we'll use ''' models.Connector.objects.create( identifier='openlibrary.org', name='OpenLibrary', @@ -37,19 +41,85 @@ class Openlibrary(TestCase): self.edition_list_data = json.loads(edition_list_file.read_bytes()) + def test_get_remote_id_from_data(self): + ''' format the remote id from the data ''' + data = {'key': '/work/OL1234W'} + result = self.connector.get_remote_id_from_data(data) + self.assertEqual(result, 'https://openlibrary.org/work/OL1234W') + # error handlding + with self.assertRaises(ConnectorException): + self.connector.get_remote_id_from_data({}) + + def test_is_work_data(self): + ''' detect if the loaded json is a work ''' self.assertEqual(self.connector.is_work_data(self.work_data), True) self.assertEqual(self.connector.is_work_data(self.edition_data), False) - def test_pick_default_edition(self): - edition = pick_default_edition(self.edition_list_data['entries']) - self.assertEqual(edition['key'], '/books/OL9788823M') + @responses.activate + def test_get_edition_from_work_data(self): + ''' loads a list of editions ''' + data = {'key': '/work/OL1234W'} + responses.add( + responses.GET, + 'https://openlibrary.org/work/OL1234W/editions', + json={'entries': []}, + status=200) + with patch('bookwyrm.connectors.openlibrary.pick_default_edition') \ + as pick_edition: + pick_edition.return_value = 'hi' + result = self.connector.get_edition_from_work_data(data) + self.assertEqual(result, 'hi') + + + @responses.activate + def test_get_work_from_edition_data(self): + ''' loads a list of editions ''' + data = {'works': [{'key': '/work/OL1234W'}]} + responses.add( + responses.GET, + 'https://openlibrary.org/work/OL1234W', + json={'hi': 'there'}, + status=200) + result = self.connector.get_work_from_edition_data(data) + self.assertEqual(result, {'hi': 'there'}) + + + @responses.activate + def test_get_authors_from_data(self): + ''' find authors in data ''' + responses.add( + responses.GET, + 'https://openlibrary.org/authors/OL382982A', + json={'hi': 'there'}, + status=200) + results = self.connector.get_authors_from_data(self.work_data) + for result in results: + self.assertIsInstance(result, models.Author) + + + def test_get_cover_url(self): + ''' formats a url that should contain the cover image ''' + blob = ['image'] + result = self.connector.get_cover_url(blob) + self.assertEqual( + result, 'https://covers.openlibrary.org/b/id/image-M.jpg') + + def test_parse_search_result(self): + ''' extract the results from the search json response ''' + datafile = pathlib.Path(__file__).parent.joinpath( + '../data/ol_search.json') + search_data = json.loads(datafile.read_bytes()) + result = self.connector.parse_search_data(search_data) + self.assertIsInstance(result, list) + self.assertEqual(len(result), 2) def test_format_search_result(self): ''' translate json from openlibrary into SearchResult ''' - datafile = pathlib.Path(__file__).parent.joinpath('../data/ol_search.json') + datafile = pathlib.Path(__file__).parent.joinpath( + '../data/ol_search.json') search_data = json.loads(datafile.read_bytes()) results = self.connector.parse_search_data(search_data) self.assertIsInstance(results, list) @@ -57,22 +127,66 @@ class Openlibrary(TestCase): result = self.connector.format_search_result(results[0]) self.assertIsInstance(result, SearchResult) self.assertEqual(result.title, 'This Is How You Lose the Time War') - self.assertEqual(result.key, 'https://openlibrary.org/works/OL20639540W') + self.assertEqual( + result.key, 'https://openlibrary.org/works/OL20639540W') self.assertEqual(result.author, 'Amal El-Mohtar, Max Gladstone') self.assertEqual(result.year, 2019) + self.assertEqual(result.connector, self.connector) + + + @responses.activate + def test_load_edition_data(self): + ''' format url from key and make request ''' + key = 'OL1234W' + responses.add( + responses.GET, + 'https://openlibrary.org/works/OL1234W/editions', + json={'hi': 'there'} + ) + result = self.connector.load_edition_data(key) + self.assertEqual(result, {'hi': 'there'}) + + + @responses.activate + def test_expand_book_data(self): + ''' given a book, get more editions ''' + work = models.Work.objects.create( + title='Test Work', openlibrary_key='OL1234W') + edition = models.Edition.objects.create( + title='Test Edition', parent_work=work) + + responses.add( + responses.GET, + 'https://openlibrary.org/works/OL1234W/editions', + json={'entries': []}, + ) + with patch( + 'bookwyrm.connectors.abstract_connector.AbstractConnector.' \ + 'create_edition_from_data'): + self.connector.expand_book_data(edition) + self.connector.expand_book_data(work) def test_get_description(self): + ''' should do some cleanup on the description data ''' description = get_description(self.work_data['description']) expected = 'First in the Old Kingdom/Abhorsen series.' self.assertEqual(description, expected) + def test_get_openlibrary_key(self): + ''' extracts the uuid ''' + key = get_openlibrary_key('/books/OL27320736M') + self.assertEqual(key, 'OL27320736M') + + def test_get_languages(self): + ''' looks up languages from a list ''' languages = get_languages(self.edition_data['languages']) self.assertEqual(languages, ['English']) - def test_get_ol_key(self): - key = get_openlibrary_key('/books/OL27320736M') - self.assertEqual(key, 'OL27320736M') + def test_pick_default_edition(self): + ''' detect if the loaded json is an edition ''' + edition = pick_default_edition(self.edition_list_data['entries']) + self.assertEqual(edition['key'], '/books/OL9788823M') diff --git a/bookwyrm/tests/models/test_base_model.py b/bookwyrm/tests/models/test_base_model.py index 65cf892e..604ba0d6 100644 --- a/bookwyrm/tests/models/test_base_model.py +++ b/bookwyrm/tests/models/test_base_model.py @@ -200,3 +200,15 @@ class BaseModel(TestCase): # test subclass match result = models.Status.find_existing_by_remote_id( 'https://comment.net') + + + def test_find_existing(self): + ''' match a blob of data to a model ''' + book = models.Edition.objects.create( + title='Test edition', + openlibrary_key='OL1234', + ) + + result = models.Edition.find_existing( + {'openlibraryKey': 'OL1234'}) + self.assertEqual(result, book) diff --git a/bookwyrm/tests/models/test_import_model.py b/bookwyrm/tests/models/test_import_model.py index d393d968..a975f410 100644 --- a/bookwyrm/tests/models/test_import_model.py +++ b/bookwyrm/tests/models/test_import_model.py @@ -154,12 +154,12 @@ class ImportJob(TestCase): status=200) responses.add( responses.GET, - 'https://openlibrary.org//works/OL15832982W', + 'https://openlibrary.org/works/OL15832982W', json=bookdata, status=200) responses.add( responses.GET, - 'https://openlibrary.org//authors/OL382982A.json', + 'https://openlibrary.org/authors/OL382982A', json={'name': 'test author'}, status=200)