forked from mirrors/bookwyrm
Merge pull request #575 from mouse-reeve/openlibrary-editions
Ignore openlibrary editions with little to no metadata
This commit is contained in:
commit
a36de9026b
3 changed files with 45 additions and 3 deletions
|
@ -27,9 +27,9 @@ class Connector(AbstractConnector):
|
|||
Mapping('series', formatter=get_first),
|
||||
Mapping('seriesNumber', remote_field='series_number'),
|
||||
Mapping('subjects'),
|
||||
Mapping('subjectPlaces'),
|
||||
Mapping('isbn13', formatter=get_first),
|
||||
Mapping('isbn10', formatter=get_first),
|
||||
Mapping('subjectPlaces', remote_field='subject_places'),
|
||||
Mapping('isbn13', remote_field='isbn_13', formatter=get_first),
|
||||
Mapping('isbn10', remote_field='isbn_10', formatter=get_first),
|
||||
Mapping('lccn', formatter=get_first),
|
||||
Mapping(
|
||||
'oclcNumber', remote_field='oclc_numbers',
|
||||
|
@ -144,9 +144,34 @@ class Connector(AbstractConnector):
|
|||
# we can mass download edition data from OL to avoid repeatedly querying
|
||||
edition_options = self.load_edition_data(work.openlibrary_key)
|
||||
for edition_data in edition_options.get('entries'):
|
||||
# does this edition have ANY interesting data?
|
||||
if ignore_edition(edition_data):
|
||||
continue
|
||||
self.create_edition_from_data(work, edition_data)
|
||||
|
||||
|
||||
def ignore_edition(edition_data):
|
||||
''' don't load a million editions that have no metadata '''
|
||||
# an isbn, we love to see it
|
||||
if edition_data.get('isbn_13') or edition_data.get('isbn_10'):
|
||||
print(edition_data.get('isbn_10'))
|
||||
return False
|
||||
# grudgingly, oclc can stay
|
||||
if edition_data.get('oclc_numbers'):
|
||||
print(edition_data.get('oclc_numbers'))
|
||||
return False
|
||||
# if it has a cover it can stay
|
||||
if edition_data.get('covers'):
|
||||
print(edition_data.get('covers'))
|
||||
return False
|
||||
# keep non-english editions
|
||||
if edition_data.get('languages') and \
|
||||
'languages/eng' not in str(edition_data.get('languages')):
|
||||
print(edition_data.get('languages'))
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_description(description_blob):
|
||||
''' descriptions can be a string or a dict '''
|
||||
if isinstance(description_blob, dict):
|
||||
|
|
|
@ -190,3 +190,19 @@ class Openlibrary(TestCase):
|
|||
''' detect if the loaded json is an edition '''
|
||||
edition = pick_default_edition(self.edition_list_data['entries'])
|
||||
self.assertEqual(edition['key'], '/books/OL9788823M')
|
||||
|
||||
|
||||
def test_create_edition_from_data(self):
|
||||
''' okay but can it actually create an edition with proper metadata '''
|
||||
work = models.Work.objects.create(title='Hello')
|
||||
result = self.connector.create_edition_from_data(
|
||||
work, self.edition_data)
|
||||
self.assertEqual(result.parent_work, work)
|
||||
self.assertEqual(result.title, 'Sabriel')
|
||||
self.assertEqual(result.isbn_10, '0060273224')
|
||||
self.assertIsNotNone(result.description)
|
||||
self.assertEqual(result.languages[0], 'English')
|
||||
self.assertEqual(result.publishers[0], 'Harper Trophy')
|
||||
self.assertEqual(result.pages, 491)
|
||||
self.assertEqual(result.subjects[0], 'Fantasy.')
|
||||
self.assertEqual(result.physical_format, 'Hardcover')
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
"518848"
|
||||
]
|
||||
},
|
||||
"physical_format": "Hardcover",
|
||||
"lc_classifications": [
|
||||
"PZ7.N647 Sab 1995"
|
||||
],
|
||||
|
|
Loading…
Reference in a new issue