forked from mirrors/bookwyrm
Merge pull request #1680 from bookwyrm-social/openlibrary-import
Support csv import from Openlibrary
This commit is contained in:
commit
f5b7fcd0c7
13 changed files with 153 additions and 20 deletions
|
@ -256,9 +256,7 @@ def get_data(url, params=None, timeout=10):
|
||||||
params=params,
|
params=params,
|
||||||
headers={ # pylint: disable=line-too-long
|
headers={ # pylint: disable=line-too-long
|
||||||
"Accept": (
|
"Accept": (
|
||||||
"application/activity+json,"
|
'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
|
||||||
' application/ld+json; profile="https://www.w3.org/ns/activitystreams",'
|
|
||||||
" application/json; charset=utf-8"
|
|
||||||
),
|
),
|
||||||
"User-Agent": settings.USER_AGENT,
|
"User-Agent": settings.USER_AGENT,
|
||||||
},
|
},
|
||||||
|
@ -266,7 +264,7 @@ def get_data(url, params=None, timeout=10):
|
||||||
)
|
)
|
||||||
except RequestException as err:
|
except RequestException as err:
|
||||||
logger.exception(err)
|
logger.exception(err)
|
||||||
raise ConnectorException()
|
raise ConnectorException(err)
|
||||||
|
|
||||||
if not resp.ok:
|
if not resp.ok:
|
||||||
raise ConnectorException()
|
raise ConnectorException()
|
||||||
|
@ -274,7 +272,7 @@ def get_data(url, params=None, timeout=10):
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
except ValueError as err:
|
except ValueError as err:
|
||||||
logger.exception(err)
|
logger.exception(err)
|
||||||
raise ConnectorException()
|
raise ConnectorException(err)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
|
@ -3,4 +3,5 @@
|
||||||
from .importer import Importer
|
from .importer import Importer
|
||||||
from .goodreads_import import GoodreadsImporter
|
from .goodreads_import import GoodreadsImporter
|
||||||
from .librarything_import import LibrarythingImporter
|
from .librarything_import import LibrarythingImporter
|
||||||
|
from .openlibrary_import import OpenLibraryImporter
|
||||||
from .storygraph_import import StorygraphImporter
|
from .storygraph_import import StorygraphImporter
|
||||||
|
|
|
@ -26,7 +26,7 @@ class Importer:
|
||||||
("authors", ["author", "authors", "primary author"]),
|
("authors", ["author", "authors", "primary author"]),
|
||||||
("isbn_10", ["isbn10", "isbn"]),
|
("isbn_10", ["isbn10", "isbn"]),
|
||||||
("isbn_13", ["isbn13", "isbn", "isbns"]),
|
("isbn_13", ["isbn13", "isbn", "isbns"]),
|
||||||
("shelf", ["shelf", "exclusive shelf", "read status"]),
|
("shelf", ["shelf", "exclusive shelf", "read status", "bookshelf"]),
|
||||||
("review_name", ["review name"]),
|
("review_name", ["review name"]),
|
||||||
("review_body", ["my review", "review"]),
|
("review_body", ["my review", "review"]),
|
||||||
("rating", ["my rating", "rating", "star rating"]),
|
("rating", ["my rating", "rating", "star rating"]),
|
||||||
|
@ -36,9 +36,9 @@ class Importer:
|
||||||
]
|
]
|
||||||
date_fields = ["date_added", "date_started", "date_finished"]
|
date_fields = ["date_added", "date_started", "date_finished"]
|
||||||
shelf_mapping_guesses = {
|
shelf_mapping_guesses = {
|
||||||
"to-read": ["to-read"],
|
"to-read": ["to-read", "want to read"],
|
||||||
"read": ["read"],
|
"read": ["read", "already read"],
|
||||||
"reading": ["currently-reading", "reading"],
|
"reading": ["currently-reading", "reading", "currently reading"],
|
||||||
}
|
}
|
||||||
|
|
||||||
def create_job(self, user, csv_file, include_reviews, privacy):
|
def create_job(self, user, csv_file, include_reviews, privacy):
|
||||||
|
@ -90,7 +90,10 @@ class Importer:
|
||||||
|
|
||||||
def get_shelf(self, normalized_row):
|
def get_shelf(self, normalized_row):
|
||||||
"""determine which shelf to use"""
|
"""determine which shelf to use"""
|
||||||
shelf_name = normalized_row["shelf"]
|
shelf_name = normalized_row.get("shelf")
|
||||||
|
if not shelf_name:
|
||||||
|
return None
|
||||||
|
shelf_name = shelf_name.lower()
|
||||||
shelf = [
|
shelf = [
|
||||||
s for (s, gs) in self.shelf_mapping_guesses.items() if shelf_name in gs
|
s for (s, gs) in self.shelf_mapping_guesses.items() if shelf_name in gs
|
||||||
]
|
]
|
||||||
|
@ -106,6 +109,7 @@ class Importer:
|
||||||
user=user,
|
user=user,
|
||||||
include_reviews=original_job.include_reviews,
|
include_reviews=original_job.include_reviews,
|
||||||
privacy=original_job.privacy,
|
privacy=original_job.privacy,
|
||||||
|
source=original_job.source,
|
||||||
# TODO: allow users to adjust mappings
|
# TODO: allow users to adjust mappings
|
||||||
mappings=original_job.mappings,
|
mappings=original_job.mappings,
|
||||||
retry=True,
|
retry=True,
|
||||||
|
|
13
bookwyrm/importers/openlibrary_import.py
Normal file
13
bookwyrm/importers/openlibrary_import.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
""" handle reading a csv from openlibrary"""
|
||||||
|
from . import Importer
|
||||||
|
|
||||||
|
|
||||||
|
class OpenLibraryImporter(Importer):
|
||||||
|
"""csv downloads from OpenLibrary"""
|
||||||
|
|
||||||
|
service = "OpenLibrary"
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.row_mappings_guesses.append(("openlibrary_key", ["edition id"]))
|
||||||
|
self.row_mappings_guesses.append(("openlibrary_work_key", ["work id"]))
|
||||||
|
super().__init__(*args, **kwargs)
|
|
@ -3,6 +3,6 @@ from . import Importer
|
||||||
|
|
||||||
|
|
||||||
class StorygraphImporter(Importer):
|
class StorygraphImporter(Importer):
|
||||||
"""csv downloads from librarything"""
|
"""csv downloads from Storygraph"""
|
||||||
|
|
||||||
service = "Storygraph"
|
service = "Storygraph"
|
||||||
|
|
|
@ -25,7 +25,7 @@ def construct_search_term(title, author):
|
||||||
# Strip brackets (usually series title from search term)
|
# Strip brackets (usually series title from search term)
|
||||||
title = re.sub(r"\s*\([^)]*\)\s*", "", title)
|
title = re.sub(r"\s*\([^)]*\)\s*", "", title)
|
||||||
# Open library doesn't like including author initials in search term.
|
# Open library doesn't like including author initials in search term.
|
||||||
author = re.sub(r"(\w\.)+\s*", "", author)
|
author = re.sub(r"(\w\.)+\s*", "", author) if author else ""
|
||||||
|
|
||||||
return " ".join([title, author])
|
return " ".join([title, author])
|
||||||
|
|
||||||
|
@ -88,7 +88,9 @@ class ImportItem(models.Model):
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.isbn:
|
if self.isbn:
|
||||||
self.book = self.get_book_from_isbn()
|
self.book = self.get_book_from_identifier()
|
||||||
|
elif self.openlibrary_key:
|
||||||
|
self.book = self.get_book_from_identifier(field="openlibrary_key")
|
||||||
else:
|
else:
|
||||||
# don't fall back on title/author search if isbn is present.
|
# don't fall back on title/author search if isbn is present.
|
||||||
# you're too likely to mismatch
|
# you're too likely to mismatch
|
||||||
|
@ -98,10 +100,10 @@ class ImportItem(models.Model):
|
||||||
else:
|
else:
|
||||||
self.book_guess = book
|
self.book_guess = book
|
||||||
|
|
||||||
def get_book_from_isbn(self):
|
def get_book_from_identifier(self, field="isbn"):
|
||||||
"""search by isbn"""
|
"""search by isbn or other unique identifier"""
|
||||||
search_result = connector_manager.first_search_result(
|
search_result = connector_manager.first_search_result(
|
||||||
self.isbn, min_confidence=0.999
|
getattr(self, field), min_confidence=0.999
|
||||||
)
|
)
|
||||||
if search_result:
|
if search_result:
|
||||||
# it's already in the right format
|
# it's already in the right format
|
||||||
|
@ -114,6 +116,8 @@ class ImportItem(models.Model):
|
||||||
|
|
||||||
def get_book_from_title_author(self):
|
def get_book_from_title_author(self):
|
||||||
"""search by title and author"""
|
"""search by title and author"""
|
||||||
|
if not self.title:
|
||||||
|
return None, 0
|
||||||
search_term = construct_search_term(self.title, self.author)
|
search_term = construct_search_term(self.title, self.author)
|
||||||
search_result = connector_manager.first_search_result(
|
search_result = connector_manager.first_search_result(
|
||||||
search_term, min_confidence=0.1
|
search_term, min_confidence=0.1
|
||||||
|
@ -145,6 +149,13 @@ class ImportItem(models.Model):
|
||||||
self.normalized_data.get("isbn_10")
|
self.normalized_data.get("isbn_10")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def openlibrary_key(self):
|
||||||
|
"""the edition identifier is preferable to the work key"""
|
||||||
|
return self.normalized_data.get("openlibrary_key") or self.normalized_data.get(
|
||||||
|
"openlibrary_work_key"
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def shelf(self):
|
def shelf(self):
|
||||||
"""the goodreads shelf field"""
|
"""the goodreads shelf field"""
|
||||||
|
|
|
@ -31,6 +31,9 @@
|
||||||
<option value="LibraryThing" {% if current == 'LibraryThing' %}selected{% endif %}>
|
<option value="LibraryThing" {% if current == 'LibraryThing' %}selected{% endif %}>
|
||||||
LibraryThing (TSV)
|
LibraryThing (TSV)
|
||||||
</option>
|
</option>
|
||||||
|
<option value="OpenLibrary" {% if current == 'OpenLibrary' %}selected{% endif %}>
|
||||||
|
OpenLibrary (CSV)
|
||||||
|
</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
<div class="field">
|
<div class="field">
|
||||||
|
|
|
@ -105,6 +105,11 @@
|
||||||
<th>
|
<th>
|
||||||
{% trans "ISBN" %}
|
{% trans "ISBN" %}
|
||||||
</th>
|
</th>
|
||||||
|
{% if job.source == "OpenLibrary" %}
|
||||||
|
<th>
|
||||||
|
{% trans "Openlibrary key" %}
|
||||||
|
</th>
|
||||||
|
{% endif %}
|
||||||
<th>
|
<th>
|
||||||
{% trans "Author" %}
|
{% trans "Author" %}
|
||||||
</th>
|
</th>
|
||||||
|
@ -145,6 +150,11 @@
|
||||||
<td>
|
<td>
|
||||||
{{ item.isbn|default:'' }}
|
{{ item.isbn|default:'' }}
|
||||||
</td>
|
</td>
|
||||||
|
{% if job.source == "OpenLibrary" %}
|
||||||
|
<td>
|
||||||
|
{{ item.openlibrary_key }}
|
||||||
|
</td>
|
||||||
|
{% endif %}
|
||||||
<td>
|
<td>
|
||||||
{{ item.normalized_data.authors }}
|
{{ item.normalized_data.authors }}
|
||||||
</td>
|
</td>
|
||||||
|
|
5
bookwyrm/tests/data/openlibrary.csv
Normal file
5
bookwyrm/tests/data/openlibrary.csv
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
Work Id,Edition Id,Bookshelf
|
||||||
|
OL102749W,,Currently Reading
|
||||||
|
OL361393W,OL7798182M,Currently Reading
|
||||||
|
OL1652392W,OL7194114M,Want to Read
|
||||||
|
OL17062644W,OL25726365M,Already Read
|
|
|
@ -128,7 +128,7 @@ class GenericImporter(TestCase):
|
||||||
|
|
||||||
import_item = models.ImportItem.objects.get(job=import_job, index=0)
|
import_item = models.ImportItem.objects.get(job=import_job, index=0)
|
||||||
with patch(
|
with patch(
|
||||||
"bookwyrm.models.import_job.ImportItem.get_book_from_isbn"
|
"bookwyrm.models.import_job.ImportItem.get_book_from_identifier"
|
||||||
) as resolve:
|
) as resolve:
|
||||||
resolve.return_value = self.book
|
resolve.return_value = self.book
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ class GenericImporter(TestCase):
|
||||||
).exists()
|
).exists()
|
||||||
)
|
)
|
||||||
|
|
||||||
item = items[3]
|
item = items.last()
|
||||||
item.fail_reason = "hello"
|
item.fail_reason = "hello"
|
||||||
item.save()
|
item.save()
|
||||||
item.update_job()
|
item.update_job()
|
||||||
|
|
85
bookwyrm/tests/importers/test_openlibrary_import.py
Normal file
85
bookwyrm/tests/importers/test_openlibrary_import.py
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
""" testing import """
|
||||||
|
import pathlib
|
||||||
|
from unittest.mock import patch
|
||||||
|
import datetime
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
from django.test import TestCase
|
||||||
|
|
||||||
|
from bookwyrm import models
|
||||||
|
from bookwyrm.importers import OpenLibraryImporter
|
||||||
|
from bookwyrm.importers.importer import handle_imported_book
|
||||||
|
|
||||||
|
|
||||||
|
def make_date(*args):
|
||||||
|
"""helper function to easily generate a date obj"""
|
||||||
|
return datetime.datetime(*args, tzinfo=pytz.UTC)
|
||||||
|
|
||||||
|
|
||||||
|
# pylint: disable=consider-using-with
|
||||||
|
@patch("bookwyrm.suggested_users.rerank_suggestions_task.delay")
|
||||||
|
@patch("bookwyrm.activitystreams.populate_stream_task.delay")
|
||||||
|
@patch("bookwyrm.activitystreams.add_book_statuses_task.delay")
|
||||||
|
class OpenLibraryImport(TestCase):
|
||||||
|
"""importing from openlibrary csv"""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
"""use a test csv"""
|
||||||
|
self.importer = OpenLibraryImporter()
|
||||||
|
datafile = pathlib.Path(__file__).parent.joinpath("../data/openlibrary.csv")
|
||||||
|
self.csv = open(datafile, "r", encoding=self.importer.encoding)
|
||||||
|
with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch(
|
||||||
|
"bookwyrm.activitystreams.populate_stream_task.delay"
|
||||||
|
):
|
||||||
|
self.local_user = models.User.objects.create_user(
|
||||||
|
"mouse", "mouse@mouse.mouse", "password", local=True
|
||||||
|
)
|
||||||
|
|
||||||
|
work = models.Work.objects.create(title="Test Work")
|
||||||
|
self.book = models.Edition.objects.create(
|
||||||
|
title="Example Edition",
|
||||||
|
remote_id="https://example.com/book/1",
|
||||||
|
parent_work=work,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_create_job(self, *_):
|
||||||
|
"""creates the import job entry and checks csv"""
|
||||||
|
import_job = self.importer.create_job(
|
||||||
|
self.local_user, self.csv, False, "public"
|
||||||
|
)
|
||||||
|
|
||||||
|
import_items = models.ImportItem.objects.filter(job=import_job).all()
|
||||||
|
self.assertEqual(len(import_items), 4)
|
||||||
|
self.assertEqual(import_items[0].index, 0)
|
||||||
|
self.assertEqual(import_items[0].data["Work Id"], "OL102749W")
|
||||||
|
self.assertEqual(import_items[1].data["Work Id"], "OL361393W")
|
||||||
|
self.assertEqual(import_items[1].data["Edition Id"], "OL7798182M")
|
||||||
|
|
||||||
|
self.assertEqual(import_items[0].normalized_data["shelf"], "reading")
|
||||||
|
self.assertEqual(import_items[0].normalized_data["openlibrary_key"], "")
|
||||||
|
self.assertEqual(
|
||||||
|
import_items[0].normalized_data["openlibrary_work_key"], "OL102749W"
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
import_items[1].normalized_data["openlibrary_key"], "OL7798182M"
|
||||||
|
)
|
||||||
|
self.assertEqual(import_items[2].normalized_data["shelf"], "to-read")
|
||||||
|
self.assertEqual(import_items[3].normalized_data["shelf"], "read")
|
||||||
|
|
||||||
|
def test_handle_imported_book(self, *_):
|
||||||
|
"""openlibrary import added a book, this adds related connections"""
|
||||||
|
shelf = self.local_user.shelf_set.filter(identifier="reading").first()
|
||||||
|
self.assertIsNone(shelf.books.first())
|
||||||
|
|
||||||
|
import_job = self.importer.create_job(
|
||||||
|
self.local_user, self.csv, False, "public"
|
||||||
|
)
|
||||||
|
import_item = import_job.items.first()
|
||||||
|
import_item.book = self.book
|
||||||
|
import_item.save()
|
||||||
|
|
||||||
|
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
|
||||||
|
handle_imported_book(import_item)
|
||||||
|
|
||||||
|
shelf.refresh_from_db()
|
||||||
|
self.assertEqual(shelf.books.first(), self.book)
|
|
@ -139,7 +139,7 @@ class ImportJob(TestCase):
|
||||||
self.assertEqual(item.reads, expected)
|
self.assertEqual(item.reads, expected)
|
||||||
|
|
||||||
@responses.activate
|
@responses.activate
|
||||||
def test_get_book_from_isbn(self):
|
def test_get_book_from_identifier(self):
|
||||||
"""search and load books by isbn (9780356506999)"""
|
"""search and load books by isbn (9780356506999)"""
|
||||||
item = models.ImportItem.objects.create(
|
item = models.ImportItem.objects.create(
|
||||||
index=1,
|
index=1,
|
||||||
|
@ -197,6 +197,6 @@ class ImportJob(TestCase):
|
||||||
with patch(
|
with patch(
|
||||||
"bookwyrm.connectors.openlibrary.Connector." "get_authors_from_data"
|
"bookwyrm.connectors.openlibrary.Connector." "get_authors_from_data"
|
||||||
):
|
):
|
||||||
book = item.get_book_from_isbn()
|
book = item.get_book_from_identifier()
|
||||||
|
|
||||||
self.assertEqual(book.title, "Sabriel")
|
self.assertEqual(book.title, "Sabriel")
|
||||||
|
|
|
@ -14,6 +14,7 @@ from bookwyrm.importers import (
|
||||||
LibrarythingImporter,
|
LibrarythingImporter,
|
||||||
GoodreadsImporter,
|
GoodreadsImporter,
|
||||||
StorygraphImporter,
|
StorygraphImporter,
|
||||||
|
OpenLibraryImporter,
|
||||||
)
|
)
|
||||||
|
|
||||||
# pylint: disable= no-self-use
|
# pylint: disable= no-self-use
|
||||||
|
@ -49,6 +50,8 @@ class Import(View):
|
||||||
importer = LibrarythingImporter()
|
importer = LibrarythingImporter()
|
||||||
elif source == "Storygraph":
|
elif source == "Storygraph":
|
||||||
importer = StorygraphImporter()
|
importer = StorygraphImporter()
|
||||||
|
elif source == "OpenLibrary":
|
||||||
|
importer = OpenLibraryImporter()
|
||||||
else:
|
else:
|
||||||
# Default : Goodreads
|
# Default : Goodreads
|
||||||
importer = GoodreadsImporter()
|
importer = GoodreadsImporter()
|
||||||
|
|
Loading…
Reference in a new issue