Merge pull request #1680 from bookwyrm-social/openlibrary-import

Support csv import from Openlibrary
This commit is contained in:
Mouse Reeve 2021-12-14 20:00:55 -08:00 committed by GitHub
commit f5b7fcd0c7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 153 additions and 20 deletions

View file

@ -256,9 +256,7 @@ def get_data(url, params=None, timeout=10):
params=params, params=params,
headers={ # pylint: disable=line-too-long headers={ # pylint: disable=line-too-long
"Accept": ( "Accept": (
"application/activity+json," 'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
' application/ld+json; profile="https://www.w3.org/ns/activitystreams",'
" application/json; charset=utf-8"
), ),
"User-Agent": settings.USER_AGENT, "User-Agent": settings.USER_AGENT,
}, },
@ -266,7 +264,7 @@ def get_data(url, params=None, timeout=10):
) )
except RequestException as err: except RequestException as err:
logger.exception(err) logger.exception(err)
raise ConnectorException() raise ConnectorException(err)
if not resp.ok: if not resp.ok:
raise ConnectorException() raise ConnectorException()
@ -274,7 +272,7 @@ def get_data(url, params=None, timeout=10):
data = resp.json() data = resp.json()
except ValueError as err: except ValueError as err:
logger.exception(err) logger.exception(err)
raise ConnectorException() raise ConnectorException(err)
return data return data

View file

@ -3,4 +3,5 @@
from .importer import Importer from .importer import Importer
from .goodreads_import import GoodreadsImporter from .goodreads_import import GoodreadsImporter
from .librarything_import import LibrarythingImporter from .librarything_import import LibrarythingImporter
from .openlibrary_import import OpenLibraryImporter
from .storygraph_import import StorygraphImporter from .storygraph_import import StorygraphImporter

View file

@ -26,7 +26,7 @@ class Importer:
("authors", ["author", "authors", "primary author"]), ("authors", ["author", "authors", "primary author"]),
("isbn_10", ["isbn10", "isbn"]), ("isbn_10", ["isbn10", "isbn"]),
("isbn_13", ["isbn13", "isbn", "isbns"]), ("isbn_13", ["isbn13", "isbn", "isbns"]),
("shelf", ["shelf", "exclusive shelf", "read status"]), ("shelf", ["shelf", "exclusive shelf", "read status", "bookshelf"]),
("review_name", ["review name"]), ("review_name", ["review name"]),
("review_body", ["my review", "review"]), ("review_body", ["my review", "review"]),
("rating", ["my rating", "rating", "star rating"]), ("rating", ["my rating", "rating", "star rating"]),
@ -36,9 +36,9 @@ class Importer:
] ]
date_fields = ["date_added", "date_started", "date_finished"] date_fields = ["date_added", "date_started", "date_finished"]
shelf_mapping_guesses = { shelf_mapping_guesses = {
"to-read": ["to-read"], "to-read": ["to-read", "want to read"],
"read": ["read"], "read": ["read", "already read"],
"reading": ["currently-reading", "reading"], "reading": ["currently-reading", "reading", "currently reading"],
} }
def create_job(self, user, csv_file, include_reviews, privacy): def create_job(self, user, csv_file, include_reviews, privacy):
@ -90,7 +90,10 @@ class Importer:
def get_shelf(self, normalized_row): def get_shelf(self, normalized_row):
"""determine which shelf to use""" """determine which shelf to use"""
shelf_name = normalized_row["shelf"] shelf_name = normalized_row.get("shelf")
if not shelf_name:
return None
shelf_name = shelf_name.lower()
shelf = [ shelf = [
s for (s, gs) in self.shelf_mapping_guesses.items() if shelf_name in gs s for (s, gs) in self.shelf_mapping_guesses.items() if shelf_name in gs
] ]
@ -106,6 +109,7 @@ class Importer:
user=user, user=user,
include_reviews=original_job.include_reviews, include_reviews=original_job.include_reviews,
privacy=original_job.privacy, privacy=original_job.privacy,
source=original_job.source,
# TODO: allow users to adjust mappings # TODO: allow users to adjust mappings
mappings=original_job.mappings, mappings=original_job.mappings,
retry=True, retry=True,

View file

@ -0,0 +1,13 @@
""" handle reading a csv from openlibrary"""
from . import Importer
class OpenLibraryImporter(Importer):
"""csv downloads from OpenLibrary"""
service = "OpenLibrary"
def __init__(self, *args, **kwargs):
self.row_mappings_guesses.append(("openlibrary_key", ["edition id"]))
self.row_mappings_guesses.append(("openlibrary_work_key", ["work id"]))
super().__init__(*args, **kwargs)

View file

@ -3,6 +3,6 @@ from . import Importer
class StorygraphImporter(Importer): class StorygraphImporter(Importer):
"""csv downloads from librarything""" """csv downloads from Storygraph"""
service = "Storygraph" service = "Storygraph"

View file

@ -25,7 +25,7 @@ def construct_search_term(title, author):
# Strip brackets (usually series title from search term) # Strip brackets (usually series title from search term)
title = re.sub(r"\s*\([^)]*\)\s*", "", title) title = re.sub(r"\s*\([^)]*\)\s*", "", title)
# Open library doesn't like including author initials in search term. # Open library doesn't like including author initials in search term.
author = re.sub(r"(\w\.)+\s*", "", author) author = re.sub(r"(\w\.)+\s*", "", author) if author else ""
return " ".join([title, author]) return " ".join([title, author])
@ -88,7 +88,9 @@ class ImportItem(models.Model):
return return
if self.isbn: if self.isbn:
self.book = self.get_book_from_isbn() self.book = self.get_book_from_identifier()
elif self.openlibrary_key:
self.book = self.get_book_from_identifier(field="openlibrary_key")
else: else:
# don't fall back on title/author search if isbn is present. # don't fall back on title/author search if isbn is present.
# you're too likely to mismatch # you're too likely to mismatch
@ -98,10 +100,10 @@ class ImportItem(models.Model):
else: else:
self.book_guess = book self.book_guess = book
def get_book_from_isbn(self): def get_book_from_identifier(self, field="isbn"):
"""search by isbn""" """search by isbn or other unique identifier"""
search_result = connector_manager.first_search_result( search_result = connector_manager.first_search_result(
self.isbn, min_confidence=0.999 getattr(self, field), min_confidence=0.999
) )
if search_result: if search_result:
# it's already in the right format # it's already in the right format
@ -114,6 +116,8 @@ class ImportItem(models.Model):
def get_book_from_title_author(self): def get_book_from_title_author(self):
"""search by title and author""" """search by title and author"""
if not self.title:
return None, 0
search_term = construct_search_term(self.title, self.author) search_term = construct_search_term(self.title, self.author)
search_result = connector_manager.first_search_result( search_result = connector_manager.first_search_result(
search_term, min_confidence=0.1 search_term, min_confidence=0.1
@ -145,6 +149,13 @@ class ImportItem(models.Model):
self.normalized_data.get("isbn_10") self.normalized_data.get("isbn_10")
) )
@property
def openlibrary_key(self):
"""the edition identifier is preferable to the work key"""
return self.normalized_data.get("openlibrary_key") or self.normalized_data.get(
"openlibrary_work_key"
)
@property @property
def shelf(self): def shelf(self):
"""the goodreads shelf field""" """the goodreads shelf field"""

View file

@ -31,6 +31,9 @@
<option value="LibraryThing" {% if current == 'LibraryThing' %}selected{% endif %}> <option value="LibraryThing" {% if current == 'LibraryThing' %}selected{% endif %}>
LibraryThing (TSV) LibraryThing (TSV)
</option> </option>
<option value="OpenLibrary" {% if current == 'OpenLibrary' %}selected{% endif %}>
OpenLibrary (CSV)
</option>
</select> </select>
</div> </div>
<div class="field"> <div class="field">

View file

@ -105,6 +105,11 @@
<th> <th>
{% trans "ISBN" %} {% trans "ISBN" %}
</th> </th>
{% if job.source == "OpenLibrary" %}
<th>
{% trans "Openlibrary key" %}
</th>
{% endif %}
<th> <th>
{% trans "Author" %} {% trans "Author" %}
</th> </th>
@ -145,6 +150,11 @@
<td> <td>
{{ item.isbn|default:'' }} {{ item.isbn|default:'' }}
</td> </td>
{% if job.source == "OpenLibrary" %}
<td>
{{ item.openlibrary_key }}
</td>
{% endif %}
<td> <td>
{{ item.normalized_data.authors }} {{ item.normalized_data.authors }}
</td> </td>

View file

@ -0,0 +1,5 @@
Work Id,Edition Id,Bookshelf
OL102749W,,Currently Reading
OL361393W,OL7798182M,Currently Reading
OL1652392W,OL7194114M,Want to Read
OL17062644W,OL25726365M,Already Read
1 Work Id Edition Id Bookshelf
2 OL102749W Currently Reading
3 OL361393W OL7798182M Currently Reading
4 OL1652392W OL7194114M Want to Read
5 OL17062644W OL25726365M Already Read

View file

@ -128,7 +128,7 @@ class GenericImporter(TestCase):
import_item = models.ImportItem.objects.get(job=import_job, index=0) import_item = models.ImportItem.objects.get(job=import_job, index=0)
with patch( with patch(
"bookwyrm.models.import_job.ImportItem.get_book_from_isbn" "bookwyrm.models.import_job.ImportItem.get_book_from_identifier"
) as resolve: ) as resolve:
resolve.return_value = self.book resolve.return_value = self.book
@ -158,7 +158,7 @@ class GenericImporter(TestCase):
).exists() ).exists()
) )
item = items[3] item = items.last()
item.fail_reason = "hello" item.fail_reason = "hello"
item.save() item.save()
item.update_job() item.update_job()

View file

@ -0,0 +1,85 @@
""" testing import """
import pathlib
from unittest.mock import patch
import datetime
import pytz
from django.test import TestCase
from bookwyrm import models
from bookwyrm.importers import OpenLibraryImporter
from bookwyrm.importers.importer import handle_imported_book
def make_date(*args):
"""helper function to easily generate a date obj"""
return datetime.datetime(*args, tzinfo=pytz.UTC)
# pylint: disable=consider-using-with
@patch("bookwyrm.suggested_users.rerank_suggestions_task.delay")
@patch("bookwyrm.activitystreams.populate_stream_task.delay")
@patch("bookwyrm.activitystreams.add_book_statuses_task.delay")
class OpenLibraryImport(TestCase):
"""importing from openlibrary csv"""
def setUp(self):
"""use a test csv"""
self.importer = OpenLibraryImporter()
datafile = pathlib.Path(__file__).parent.joinpath("../data/openlibrary.csv")
self.csv = open(datafile, "r", encoding=self.importer.encoding)
with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch(
"bookwyrm.activitystreams.populate_stream_task.delay"
):
self.local_user = models.User.objects.create_user(
"mouse", "mouse@mouse.mouse", "password", local=True
)
work = models.Work.objects.create(title="Test Work")
self.book = models.Edition.objects.create(
title="Example Edition",
remote_id="https://example.com/book/1",
parent_work=work,
)
def test_create_job(self, *_):
"""creates the import job entry and checks csv"""
import_job = self.importer.create_job(
self.local_user, self.csv, False, "public"
)
import_items = models.ImportItem.objects.filter(job=import_job).all()
self.assertEqual(len(import_items), 4)
self.assertEqual(import_items[0].index, 0)
self.assertEqual(import_items[0].data["Work Id"], "OL102749W")
self.assertEqual(import_items[1].data["Work Id"], "OL361393W")
self.assertEqual(import_items[1].data["Edition Id"], "OL7798182M")
self.assertEqual(import_items[0].normalized_data["shelf"], "reading")
self.assertEqual(import_items[0].normalized_data["openlibrary_key"], "")
self.assertEqual(
import_items[0].normalized_data["openlibrary_work_key"], "OL102749W"
)
self.assertEqual(
import_items[1].normalized_data["openlibrary_key"], "OL7798182M"
)
self.assertEqual(import_items[2].normalized_data["shelf"], "to-read")
self.assertEqual(import_items[3].normalized_data["shelf"], "read")
def test_handle_imported_book(self, *_):
"""openlibrary import added a book, this adds related connections"""
shelf = self.local_user.shelf_set.filter(identifier="reading").first()
self.assertIsNone(shelf.books.first())
import_job = self.importer.create_job(
self.local_user, self.csv, False, "public"
)
import_item = import_job.items.first()
import_item.book = self.book
import_item.save()
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
handle_imported_book(import_item)
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)

View file

@ -139,7 +139,7 @@ class ImportJob(TestCase):
self.assertEqual(item.reads, expected) self.assertEqual(item.reads, expected)
@responses.activate @responses.activate
def test_get_book_from_isbn(self): def test_get_book_from_identifier(self):
"""search and load books by isbn (9780356506999)""" """search and load books by isbn (9780356506999)"""
item = models.ImportItem.objects.create( item = models.ImportItem.objects.create(
index=1, index=1,
@ -197,6 +197,6 @@ class ImportJob(TestCase):
with patch( with patch(
"bookwyrm.connectors.openlibrary.Connector." "get_authors_from_data" "bookwyrm.connectors.openlibrary.Connector." "get_authors_from_data"
): ):
book = item.get_book_from_isbn() book = item.get_book_from_identifier()
self.assertEqual(book.title, "Sabriel") self.assertEqual(book.title, "Sabriel")

View file

@ -14,6 +14,7 @@ from bookwyrm.importers import (
LibrarythingImporter, LibrarythingImporter,
GoodreadsImporter, GoodreadsImporter,
StorygraphImporter, StorygraphImporter,
OpenLibraryImporter,
) )
# pylint: disable= no-self-use # pylint: disable= no-self-use
@ -49,6 +50,8 @@ class Import(View):
importer = LibrarythingImporter() importer = LibrarythingImporter()
elif source == "Storygraph": elif source == "Storygraph":
importer = StorygraphImporter() importer = StorygraphImporter()
elif source == "OpenLibrary":
importer = OpenLibraryImporter()
else: else:
# Default : Goodreads # Default : Goodreads
importer = GoodreadsImporter() importer = GoodreadsImporter()