From 661d49d9cc69915003ed14d3f4f4d4b797d5d4a6 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 30 Jan 2021 17:19:01 -0800 Subject: [PATCH 1/2] Ignore openlibrary editions with little to no metadata Also fixes the isbn problem --- bookwyrm/connectors/openlibrary.py | 31 +++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 55355131..cd196d27 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -27,9 +27,9 @@ class Connector(AbstractConnector): Mapping('series', formatter=get_first), Mapping('seriesNumber', remote_field='series_number'), Mapping('subjects'), - Mapping('subjectPlaces'), - Mapping('isbn13', formatter=get_first), - Mapping('isbn10', formatter=get_first), + Mapping('subjectPlaces', remote_field='subject_places'), + Mapping('isbn13', remote_field='isbn_13', formatter=get_first), + Mapping('isbn10', remote_field='isbn_10', formatter=get_first), Mapping('lccn', formatter=get_first), Mapping( 'oclcNumber', remote_field='oclc_numbers', @@ -144,9 +144,34 @@ class Connector(AbstractConnector): # we can mass download edition data from OL to avoid repeatedly querying edition_options = self.load_edition_data(work.openlibrary_key) for edition_data in edition_options.get('entries'): + # does this edition have ANY interesting data? + if ignore_edition(edition_data): + continue self.create_edition_from_data(work, edition_data) +def ignore_edition(edition_data): + ''' don't load a million editions that have no metadata ''' + # an isbn, we love to see it + if edition_data.get('isbn_13') or edition_data.get('isbn_10'): + print(edition_data.get('isbn_10')) + return False + # grudgingly, oclc can stay + if edition_data.get('oclc_numbers'): + print(edition_data.get('oclc_numbers')) + return False + # if it has a cover it can stay + if edition_data.get('covers'): + print(edition_data.get('covers')) + return False + # keep non-english editions + if edition_data.get('languages') and \ + 'languages/eng' not in str(edition_data.get('languages')): + print(edition_data.get('languages')) + return False + return True + + def get_description(description_blob): ''' descriptions can be a string or a dict ''' if isinstance(description_blob, dict): From 9833f5a03da6ae2b4a8d298c99506316ccac0fb5 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 30 Jan 2021 17:36:24 -0800 Subject: [PATCH 2/2] Tests creating editions --- .../connectors/test_openlibrary_connector.py | 16 ++++++++++++++++ bookwyrm/tests/data/ol_edition.json | 1 + 2 files changed, 17 insertions(+) diff --git a/bookwyrm/tests/connectors/test_openlibrary_connector.py b/bookwyrm/tests/connectors/test_openlibrary_connector.py index dc4c5f5b..c277ba04 100644 --- a/bookwyrm/tests/connectors/test_openlibrary_connector.py +++ b/bookwyrm/tests/connectors/test_openlibrary_connector.py @@ -190,3 +190,19 @@ class Openlibrary(TestCase): ''' detect if the loaded json is an edition ''' edition = pick_default_edition(self.edition_list_data['entries']) self.assertEqual(edition['key'], '/books/OL9788823M') + + + def test_create_edition_from_data(self): + ''' okay but can it actually create an edition with proper metadata ''' + work = models.Work.objects.create(title='Hello') + result = self.connector.create_edition_from_data( + work, self.edition_data) + self.assertEqual(result.parent_work, work) + self.assertEqual(result.title, 'Sabriel') + self.assertEqual(result.isbn_10, '0060273224') + self.assertIsNotNone(result.description) + self.assertEqual(result.languages[0], 'English') + self.assertEqual(result.publishers[0], 'Harper Trophy') + self.assertEqual(result.pages, 491) + self.assertEqual(result.subjects[0], 'Fantasy.') + self.assertEqual(result.physical_format, 'Hardcover') diff --git a/bookwyrm/tests/data/ol_edition.json b/bookwyrm/tests/data/ol_edition.json index 459e9dff..2423364b 100644 --- a/bookwyrm/tests/data/ol_edition.json +++ b/bookwyrm/tests/data/ol_edition.json @@ -9,6 +9,7 @@ "518848" ] }, + "physical_format": "Hardcover", "lc_classifications": [ "PZ7.N647 Sab 1995" ],