mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-27 18:40:31 +00:00
Merge pull request #1680 from bookwyrm-social/openlibrary-import
Support csv import from Openlibrary
This commit is contained in:
commit
f5b7fcd0c7
13 changed files with 153 additions and 20 deletions
|
@ -256,9 +256,7 @@ def get_data(url, params=None, timeout=10):
|
|||
params=params,
|
||||
headers={ # pylint: disable=line-too-long
|
||||
"Accept": (
|
||||
"application/activity+json,"
|
||||
' application/ld+json; profile="https://www.w3.org/ns/activitystreams",'
|
||||
" application/json; charset=utf-8"
|
||||
'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
|
||||
),
|
||||
"User-Agent": settings.USER_AGENT,
|
||||
},
|
||||
|
@ -266,7 +264,7 @@ def get_data(url, params=None, timeout=10):
|
|||
)
|
||||
except RequestException as err:
|
||||
logger.exception(err)
|
||||
raise ConnectorException()
|
||||
raise ConnectorException(err)
|
||||
|
||||
if not resp.ok:
|
||||
raise ConnectorException()
|
||||
|
@ -274,7 +272,7 @@ def get_data(url, params=None, timeout=10):
|
|||
data = resp.json()
|
||||
except ValueError as err:
|
||||
logger.exception(err)
|
||||
raise ConnectorException()
|
||||
raise ConnectorException(err)
|
||||
|
||||
return data
|
||||
|
||||
|
|
|
@ -3,4 +3,5 @@
|
|||
from .importer import Importer
|
||||
from .goodreads_import import GoodreadsImporter
|
||||
from .librarything_import import LibrarythingImporter
|
||||
from .openlibrary_import import OpenLibraryImporter
|
||||
from .storygraph_import import StorygraphImporter
|
||||
|
|
|
@ -26,7 +26,7 @@ class Importer:
|
|||
("authors", ["author", "authors", "primary author"]),
|
||||
("isbn_10", ["isbn10", "isbn"]),
|
||||
("isbn_13", ["isbn13", "isbn", "isbns"]),
|
||||
("shelf", ["shelf", "exclusive shelf", "read status"]),
|
||||
("shelf", ["shelf", "exclusive shelf", "read status", "bookshelf"]),
|
||||
("review_name", ["review name"]),
|
||||
("review_body", ["my review", "review"]),
|
||||
("rating", ["my rating", "rating", "star rating"]),
|
||||
|
@ -36,9 +36,9 @@ class Importer:
|
|||
]
|
||||
date_fields = ["date_added", "date_started", "date_finished"]
|
||||
shelf_mapping_guesses = {
|
||||
"to-read": ["to-read"],
|
||||
"read": ["read"],
|
||||
"reading": ["currently-reading", "reading"],
|
||||
"to-read": ["to-read", "want to read"],
|
||||
"read": ["read", "already read"],
|
||||
"reading": ["currently-reading", "reading", "currently reading"],
|
||||
}
|
||||
|
||||
def create_job(self, user, csv_file, include_reviews, privacy):
|
||||
|
@ -90,7 +90,10 @@ class Importer:
|
|||
|
||||
def get_shelf(self, normalized_row):
|
||||
"""determine which shelf to use"""
|
||||
shelf_name = normalized_row["shelf"]
|
||||
shelf_name = normalized_row.get("shelf")
|
||||
if not shelf_name:
|
||||
return None
|
||||
shelf_name = shelf_name.lower()
|
||||
shelf = [
|
||||
s for (s, gs) in self.shelf_mapping_guesses.items() if shelf_name in gs
|
||||
]
|
||||
|
@ -106,6 +109,7 @@ class Importer:
|
|||
user=user,
|
||||
include_reviews=original_job.include_reviews,
|
||||
privacy=original_job.privacy,
|
||||
source=original_job.source,
|
||||
# TODO: allow users to adjust mappings
|
||||
mappings=original_job.mappings,
|
||||
retry=True,
|
||||
|
|
13
bookwyrm/importers/openlibrary_import.py
Normal file
13
bookwyrm/importers/openlibrary_import.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
""" handle reading a csv from openlibrary"""
|
||||
from . import Importer
|
||||
|
||||
|
||||
class OpenLibraryImporter(Importer):
|
||||
"""csv downloads from OpenLibrary"""
|
||||
|
||||
service = "OpenLibrary"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.row_mappings_guesses.append(("openlibrary_key", ["edition id"]))
|
||||
self.row_mappings_guesses.append(("openlibrary_work_key", ["work id"]))
|
||||
super().__init__(*args, **kwargs)
|
|
@ -3,6 +3,6 @@ from . import Importer
|
|||
|
||||
|
||||
class StorygraphImporter(Importer):
|
||||
"""csv downloads from librarything"""
|
||||
"""csv downloads from Storygraph"""
|
||||
|
||||
service = "Storygraph"
|
||||
|
|
|
@ -25,7 +25,7 @@ def construct_search_term(title, author):
|
|||
# Strip brackets (usually series title from search term)
|
||||
title = re.sub(r"\s*\([^)]*\)\s*", "", title)
|
||||
# Open library doesn't like including author initials in search term.
|
||||
author = re.sub(r"(\w\.)+\s*", "", author)
|
||||
author = re.sub(r"(\w\.)+\s*", "", author) if author else ""
|
||||
|
||||
return " ".join([title, author])
|
||||
|
||||
|
@ -88,7 +88,9 @@ class ImportItem(models.Model):
|
|||
return
|
||||
|
||||
if self.isbn:
|
||||
self.book = self.get_book_from_isbn()
|
||||
self.book = self.get_book_from_identifier()
|
||||
elif self.openlibrary_key:
|
||||
self.book = self.get_book_from_identifier(field="openlibrary_key")
|
||||
else:
|
||||
# don't fall back on title/author search if isbn is present.
|
||||
# you're too likely to mismatch
|
||||
|
@ -98,10 +100,10 @@ class ImportItem(models.Model):
|
|||
else:
|
||||
self.book_guess = book
|
||||
|
||||
def get_book_from_isbn(self):
|
||||
"""search by isbn"""
|
||||
def get_book_from_identifier(self, field="isbn"):
|
||||
"""search by isbn or other unique identifier"""
|
||||
search_result = connector_manager.first_search_result(
|
||||
self.isbn, min_confidence=0.999
|
||||
getattr(self, field), min_confidence=0.999
|
||||
)
|
||||
if search_result:
|
||||
# it's already in the right format
|
||||
|
@ -114,6 +116,8 @@ class ImportItem(models.Model):
|
|||
|
||||
def get_book_from_title_author(self):
|
||||
"""search by title and author"""
|
||||
if not self.title:
|
||||
return None, 0
|
||||
search_term = construct_search_term(self.title, self.author)
|
||||
search_result = connector_manager.first_search_result(
|
||||
search_term, min_confidence=0.1
|
||||
|
@ -145,6 +149,13 @@ class ImportItem(models.Model):
|
|||
self.normalized_data.get("isbn_10")
|
||||
)
|
||||
|
||||
@property
|
||||
def openlibrary_key(self):
|
||||
"""the edition identifier is preferable to the work key"""
|
||||
return self.normalized_data.get("openlibrary_key") or self.normalized_data.get(
|
||||
"openlibrary_work_key"
|
||||
)
|
||||
|
||||
@property
|
||||
def shelf(self):
|
||||
"""the goodreads shelf field"""
|
||||
|
|
|
@ -31,6 +31,9 @@
|
|||
<option value="LibraryThing" {% if current == 'LibraryThing' %}selected{% endif %}>
|
||||
LibraryThing (TSV)
|
||||
</option>
|
||||
<option value="OpenLibrary" {% if current == 'OpenLibrary' %}selected{% endif %}>
|
||||
OpenLibrary (CSV)
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="field">
|
||||
|
|
|
@ -105,6 +105,11 @@
|
|||
<th>
|
||||
{% trans "ISBN" %}
|
||||
</th>
|
||||
{% if job.source == "OpenLibrary" %}
|
||||
<th>
|
||||
{% trans "Openlibrary key" %}
|
||||
</th>
|
||||
{% endif %}
|
||||
<th>
|
||||
{% trans "Author" %}
|
||||
</th>
|
||||
|
@ -145,6 +150,11 @@
|
|||
<td>
|
||||
{{ item.isbn|default:'' }}
|
||||
</td>
|
||||
{% if job.source == "OpenLibrary" %}
|
||||
<td>
|
||||
{{ item.openlibrary_key }}
|
||||
</td>
|
||||
{% endif %}
|
||||
<td>
|
||||
{{ item.normalized_data.authors }}
|
||||
</td>
|
||||
|
|
5
bookwyrm/tests/data/openlibrary.csv
Normal file
5
bookwyrm/tests/data/openlibrary.csv
Normal file
|
@ -0,0 +1,5 @@
|
|||
Work Id,Edition Id,Bookshelf
|
||||
OL102749W,,Currently Reading
|
||||
OL361393W,OL7798182M,Currently Reading
|
||||
OL1652392W,OL7194114M,Want to Read
|
||||
OL17062644W,OL25726365M,Already Read
|
|
|
@ -128,7 +128,7 @@ class GenericImporter(TestCase):
|
|||
|
||||
import_item = models.ImportItem.objects.get(job=import_job, index=0)
|
||||
with patch(
|
||||
"bookwyrm.models.import_job.ImportItem.get_book_from_isbn"
|
||||
"bookwyrm.models.import_job.ImportItem.get_book_from_identifier"
|
||||
) as resolve:
|
||||
resolve.return_value = self.book
|
||||
|
||||
|
@ -158,7 +158,7 @@ class GenericImporter(TestCase):
|
|||
).exists()
|
||||
)
|
||||
|
||||
item = items[3]
|
||||
item = items.last()
|
||||
item.fail_reason = "hello"
|
||||
item.save()
|
||||
item.update_job()
|
||||
|
|
85
bookwyrm/tests/importers/test_openlibrary_import.py
Normal file
85
bookwyrm/tests/importers/test_openlibrary_import.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
""" testing import """
|
||||
import pathlib
|
||||
from unittest.mock import patch
|
||||
import datetime
|
||||
import pytz
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from bookwyrm import models
|
||||
from bookwyrm.importers import OpenLibraryImporter
|
||||
from bookwyrm.importers.importer import handle_imported_book
|
||||
|
||||
|
||||
def make_date(*args):
|
||||
"""helper function to easily generate a date obj"""
|
||||
return datetime.datetime(*args, tzinfo=pytz.UTC)
|
||||
|
||||
|
||||
# pylint: disable=consider-using-with
|
||||
@patch("bookwyrm.suggested_users.rerank_suggestions_task.delay")
|
||||
@patch("bookwyrm.activitystreams.populate_stream_task.delay")
|
||||
@patch("bookwyrm.activitystreams.add_book_statuses_task.delay")
|
||||
class OpenLibraryImport(TestCase):
|
||||
"""importing from openlibrary csv"""
|
||||
|
||||
def setUp(self):
|
||||
"""use a test csv"""
|
||||
self.importer = OpenLibraryImporter()
|
||||
datafile = pathlib.Path(__file__).parent.joinpath("../data/openlibrary.csv")
|
||||
self.csv = open(datafile, "r", encoding=self.importer.encoding)
|
||||
with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch(
|
||||
"bookwyrm.activitystreams.populate_stream_task.delay"
|
||||
):
|
||||
self.local_user = models.User.objects.create_user(
|
||||
"mouse", "mouse@mouse.mouse", "password", local=True
|
||||
)
|
||||
|
||||
work = models.Work.objects.create(title="Test Work")
|
||||
self.book = models.Edition.objects.create(
|
||||
title="Example Edition",
|
||||
remote_id="https://example.com/book/1",
|
||||
parent_work=work,
|
||||
)
|
||||
|
||||
def test_create_job(self, *_):
|
||||
"""creates the import job entry and checks csv"""
|
||||
import_job = self.importer.create_job(
|
||||
self.local_user, self.csv, False, "public"
|
||||
)
|
||||
|
||||
import_items = models.ImportItem.objects.filter(job=import_job).all()
|
||||
self.assertEqual(len(import_items), 4)
|
||||
self.assertEqual(import_items[0].index, 0)
|
||||
self.assertEqual(import_items[0].data["Work Id"], "OL102749W")
|
||||
self.assertEqual(import_items[1].data["Work Id"], "OL361393W")
|
||||
self.assertEqual(import_items[1].data["Edition Id"], "OL7798182M")
|
||||
|
||||
self.assertEqual(import_items[0].normalized_data["shelf"], "reading")
|
||||
self.assertEqual(import_items[0].normalized_data["openlibrary_key"], "")
|
||||
self.assertEqual(
|
||||
import_items[0].normalized_data["openlibrary_work_key"], "OL102749W"
|
||||
)
|
||||
self.assertEqual(
|
||||
import_items[1].normalized_data["openlibrary_key"], "OL7798182M"
|
||||
)
|
||||
self.assertEqual(import_items[2].normalized_data["shelf"], "to-read")
|
||||
self.assertEqual(import_items[3].normalized_data["shelf"], "read")
|
||||
|
||||
def test_handle_imported_book(self, *_):
|
||||
"""openlibrary import added a book, this adds related connections"""
|
||||
shelf = self.local_user.shelf_set.filter(identifier="reading").first()
|
||||
self.assertIsNone(shelf.books.first())
|
||||
|
||||
import_job = self.importer.create_job(
|
||||
self.local_user, self.csv, False, "public"
|
||||
)
|
||||
import_item = import_job.items.first()
|
||||
import_item.book = self.book
|
||||
import_item.save()
|
||||
|
||||
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
|
||||
handle_imported_book(import_item)
|
||||
|
||||
shelf.refresh_from_db()
|
||||
self.assertEqual(shelf.books.first(), self.book)
|
|
@ -139,7 +139,7 @@ class ImportJob(TestCase):
|
|||
self.assertEqual(item.reads, expected)
|
||||
|
||||
@responses.activate
|
||||
def test_get_book_from_isbn(self):
|
||||
def test_get_book_from_identifier(self):
|
||||
"""search and load books by isbn (9780356506999)"""
|
||||
item = models.ImportItem.objects.create(
|
||||
index=1,
|
||||
|
@ -197,6 +197,6 @@ class ImportJob(TestCase):
|
|||
with patch(
|
||||
"bookwyrm.connectors.openlibrary.Connector." "get_authors_from_data"
|
||||
):
|
||||
book = item.get_book_from_isbn()
|
||||
book = item.get_book_from_identifier()
|
||||
|
||||
self.assertEqual(book.title, "Sabriel")
|
||||
|
|
|
@ -14,6 +14,7 @@ from bookwyrm.importers import (
|
|||
LibrarythingImporter,
|
||||
GoodreadsImporter,
|
||||
StorygraphImporter,
|
||||
OpenLibraryImporter,
|
||||
)
|
||||
|
||||
# pylint: disable= no-self-use
|
||||
|
@ -49,6 +50,8 @@ class Import(View):
|
|||
importer = LibrarythingImporter()
|
||||
elif source == "Storygraph":
|
||||
importer = StorygraphImporter()
|
||||
elif source == "OpenLibrary":
|
||||
importer = OpenLibraryImporter()
|
||||
else:
|
||||
# Default : Goodreads
|
||||
importer = GoodreadsImporter()
|
||||
|
|
Loading…
Reference in a new issue