Merge pull request #575 from mouse-reeve/openlibrary-editions

Ignore openlibrary editions with little to no metadata
This commit is contained in:
Mouse Reeve 2021-01-30 17:49:54 -08:00 committed by GitHub
commit a36de9026b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 3 deletions

View file

@ -27,9 +27,9 @@ class Connector(AbstractConnector):
Mapping('series', formatter=get_first), Mapping('series', formatter=get_first),
Mapping('seriesNumber', remote_field='series_number'), Mapping('seriesNumber', remote_field='series_number'),
Mapping('subjects'), Mapping('subjects'),
Mapping('subjectPlaces'), Mapping('subjectPlaces', remote_field='subject_places'),
Mapping('isbn13', formatter=get_first), Mapping('isbn13', remote_field='isbn_13', formatter=get_first),
Mapping('isbn10', formatter=get_first), Mapping('isbn10', remote_field='isbn_10', formatter=get_first),
Mapping('lccn', formatter=get_first), Mapping('lccn', formatter=get_first),
Mapping( Mapping(
'oclcNumber', remote_field='oclc_numbers', 'oclcNumber', remote_field='oclc_numbers',
@ -144,9 +144,34 @@ class Connector(AbstractConnector):
# we can mass download edition data from OL to avoid repeatedly querying # we can mass download edition data from OL to avoid repeatedly querying
edition_options = self.load_edition_data(work.openlibrary_key) edition_options = self.load_edition_data(work.openlibrary_key)
for edition_data in edition_options.get('entries'): for edition_data in edition_options.get('entries'):
# does this edition have ANY interesting data?
if ignore_edition(edition_data):
continue
self.create_edition_from_data(work, edition_data) self.create_edition_from_data(work, edition_data)
def ignore_edition(edition_data):
''' don't load a million editions that have no metadata '''
# an isbn, we love to see it
if edition_data.get('isbn_13') or edition_data.get('isbn_10'):
print(edition_data.get('isbn_10'))
return False
# grudgingly, oclc can stay
if edition_data.get('oclc_numbers'):
print(edition_data.get('oclc_numbers'))
return False
# if it has a cover it can stay
if edition_data.get('covers'):
print(edition_data.get('covers'))
return False
# keep non-english editions
if edition_data.get('languages') and \
'languages/eng' not in str(edition_data.get('languages')):
print(edition_data.get('languages'))
return False
return True
def get_description(description_blob): def get_description(description_blob):
''' descriptions can be a string or a dict ''' ''' descriptions can be a string or a dict '''
if isinstance(description_blob, dict): if isinstance(description_blob, dict):

View file

@ -190,3 +190,19 @@ class Openlibrary(TestCase):
''' detect if the loaded json is an edition ''' ''' detect if the loaded json is an edition '''
edition = pick_default_edition(self.edition_list_data['entries']) edition = pick_default_edition(self.edition_list_data['entries'])
self.assertEqual(edition['key'], '/books/OL9788823M') self.assertEqual(edition['key'], '/books/OL9788823M')
def test_create_edition_from_data(self):
''' okay but can it actually create an edition with proper metadata '''
work = models.Work.objects.create(title='Hello')
result = self.connector.create_edition_from_data(
work, self.edition_data)
self.assertEqual(result.parent_work, work)
self.assertEqual(result.title, 'Sabriel')
self.assertEqual(result.isbn_10, '0060273224')
self.assertIsNotNone(result.description)
self.assertEqual(result.languages[0], 'English')
self.assertEqual(result.publishers[0], 'Harper Trophy')
self.assertEqual(result.pages, 491)
self.assertEqual(result.subjects[0], 'Fantasy.')
self.assertEqual(result.physical_format, 'Hardcover')

View file

@ -9,6 +9,7 @@
"518848" "518848"
] ]
}, },
"physical_format": "Hardcover",
"lc_classifications": [ "lc_classifications": [
"PZ7.N647 Sab 1995" "PZ7.N647 Sab 1995"
], ],