forked from mirrors/bookwyrm
Merge pull request #575 from mouse-reeve/openlibrary-editions
Ignore openlibrary editions with little to no metadata
This commit is contained in:
commit
a36de9026b
3 changed files with 45 additions and 3 deletions
|
@ -27,9 +27,9 @@ class Connector(AbstractConnector):
|
||||||
Mapping('series', formatter=get_first),
|
Mapping('series', formatter=get_first),
|
||||||
Mapping('seriesNumber', remote_field='series_number'),
|
Mapping('seriesNumber', remote_field='series_number'),
|
||||||
Mapping('subjects'),
|
Mapping('subjects'),
|
||||||
Mapping('subjectPlaces'),
|
Mapping('subjectPlaces', remote_field='subject_places'),
|
||||||
Mapping('isbn13', formatter=get_first),
|
Mapping('isbn13', remote_field='isbn_13', formatter=get_first),
|
||||||
Mapping('isbn10', formatter=get_first),
|
Mapping('isbn10', remote_field='isbn_10', formatter=get_first),
|
||||||
Mapping('lccn', formatter=get_first),
|
Mapping('lccn', formatter=get_first),
|
||||||
Mapping(
|
Mapping(
|
||||||
'oclcNumber', remote_field='oclc_numbers',
|
'oclcNumber', remote_field='oclc_numbers',
|
||||||
|
@ -144,9 +144,34 @@ class Connector(AbstractConnector):
|
||||||
# we can mass download edition data from OL to avoid repeatedly querying
|
# we can mass download edition data from OL to avoid repeatedly querying
|
||||||
edition_options = self.load_edition_data(work.openlibrary_key)
|
edition_options = self.load_edition_data(work.openlibrary_key)
|
||||||
for edition_data in edition_options.get('entries'):
|
for edition_data in edition_options.get('entries'):
|
||||||
|
# does this edition have ANY interesting data?
|
||||||
|
if ignore_edition(edition_data):
|
||||||
|
continue
|
||||||
self.create_edition_from_data(work, edition_data)
|
self.create_edition_from_data(work, edition_data)
|
||||||
|
|
||||||
|
|
||||||
|
def ignore_edition(edition_data):
|
||||||
|
''' don't load a million editions that have no metadata '''
|
||||||
|
# an isbn, we love to see it
|
||||||
|
if edition_data.get('isbn_13') or edition_data.get('isbn_10'):
|
||||||
|
print(edition_data.get('isbn_10'))
|
||||||
|
return False
|
||||||
|
# grudgingly, oclc can stay
|
||||||
|
if edition_data.get('oclc_numbers'):
|
||||||
|
print(edition_data.get('oclc_numbers'))
|
||||||
|
return False
|
||||||
|
# if it has a cover it can stay
|
||||||
|
if edition_data.get('covers'):
|
||||||
|
print(edition_data.get('covers'))
|
||||||
|
return False
|
||||||
|
# keep non-english editions
|
||||||
|
if edition_data.get('languages') and \
|
||||||
|
'languages/eng' not in str(edition_data.get('languages')):
|
||||||
|
print(edition_data.get('languages'))
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_description(description_blob):
|
def get_description(description_blob):
|
||||||
''' descriptions can be a string or a dict '''
|
''' descriptions can be a string or a dict '''
|
||||||
if isinstance(description_blob, dict):
|
if isinstance(description_blob, dict):
|
||||||
|
|
|
@ -190,3 +190,19 @@ class Openlibrary(TestCase):
|
||||||
''' detect if the loaded json is an edition '''
|
''' detect if the loaded json is an edition '''
|
||||||
edition = pick_default_edition(self.edition_list_data['entries'])
|
edition = pick_default_edition(self.edition_list_data['entries'])
|
||||||
self.assertEqual(edition['key'], '/books/OL9788823M')
|
self.assertEqual(edition['key'], '/books/OL9788823M')
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_edition_from_data(self):
|
||||||
|
''' okay but can it actually create an edition with proper metadata '''
|
||||||
|
work = models.Work.objects.create(title='Hello')
|
||||||
|
result = self.connector.create_edition_from_data(
|
||||||
|
work, self.edition_data)
|
||||||
|
self.assertEqual(result.parent_work, work)
|
||||||
|
self.assertEqual(result.title, 'Sabriel')
|
||||||
|
self.assertEqual(result.isbn_10, '0060273224')
|
||||||
|
self.assertIsNotNone(result.description)
|
||||||
|
self.assertEqual(result.languages[0], 'English')
|
||||||
|
self.assertEqual(result.publishers[0], 'Harper Trophy')
|
||||||
|
self.assertEqual(result.pages, 491)
|
||||||
|
self.assertEqual(result.subjects[0], 'Fantasy.')
|
||||||
|
self.assertEqual(result.physical_format, 'Hardcover')
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
"518848"
|
"518848"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"physical_format": "Hardcover",
|
||||||
"lc_classifications": [
|
"lc_classifications": [
|
||||||
"PZ7.N647 Sab 1995"
|
"PZ7.N647 Sab 1995"
|
||||||
],
|
],
|
||||||
|
|
Loading…
Reference in a new issue