Merge pull request #1680 from bookwyrm-social/openlibrary-import

Support csv import from Openlibrary
2021-12-14 20:00:55 -08:00 · 2021-12-14 20:00:55 -08:00 · f5b7fcd0c7
commit f5b7fcd0c7
parent 778027688a e6d500df6b
13 changed files with 153 additions and 20 deletions
--- a/bookwyrm/connectors/abstract_connector.py
+++ b/bookwyrm/connectors/abstract_connector.py
@ -256,9 +256,7 @@ def get_data(url, params=None, timeout=10):
            params=params,
            headers={  # pylint: disable=line-too-long
                "Accept": (
-                    "application/activity+json,"
+                    'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
                    ' application/ld+json; profile="https://www.w3.org/ns/activitystreams",'
                    " application/json; charset=utf-8"
                ),
                "User-Agent": settings.USER_AGENT,
            },
@ -266,7 +264,7 @@ def get_data(url, params=None, timeout=10):
        )
    except RequestException as err:
        logger.exception(err)
-        raise ConnectorException()
+        raise ConnectorException(err)
    if not resp.ok:
        raise ConnectorException()
@ -274,7 +272,7 @@ def get_data(url, params=None, timeout=10):
        data = resp.json()
    except ValueError as err:
        logger.exception(err)
-        raise ConnectorException()
+        raise ConnectorException(err)
    return data
--- a/bookwyrm/importers/init.py
+++ b/bookwyrm/importers/init.py
@ -3,4 +3,5 @@
 from .importer import Importer
 from .goodreads_import import GoodreadsImporter
 from .librarything_import import LibrarythingImporter
 from .openlibrary_import import OpenLibraryImporter
 from .storygraph_import import StorygraphImporter
--- a/bookwyrm/importers/importer.py
+++ b/bookwyrm/importers/importer.py
@ -26,7 +26,7 @@ class Importer:
        ("authors", ["author", "authors", "primary author"]),
        ("isbn_10", ["isbn10", "isbn"]),
        ("isbn_13", ["isbn13", "isbn", "isbns"]),
-        ("shelf", ["shelf", "exclusive shelf", "read status"]),
+        ("shelf", ["shelf", "exclusive shelf", "read status", "bookshelf"]),
        ("review_name", ["review name"]),
        ("review_body", ["my review", "review"]),
        ("rating", ["my rating", "rating", "star rating"]),
@ -36,9 +36,9 @@ class Importer:
    ]
    date_fields = ["date_added", "date_started", "date_finished"]
    shelf_mapping_guesses = {
-        "to-read": ["to-read"],
+        "to-read": ["to-read", "want to read"],
-        "read": ["read"],
+        "read": ["read", "already read"],
-        "reading": ["currently-reading", "reading"],
+        "reading": ["currently-reading", "reading", "currently reading"],
    }
    def create_job(self, user, csv_file, include_reviews, privacy):
@ -90,7 +90,10 @@ class Importer:
    def get_shelf(self, normalized_row):
        """determine which shelf to use"""
-        shelf_name = normalized_row["shelf"]
+        shelf_name = normalized_row.get("shelf")
        if not shelf_name:
            return None
        shelf_name = shelf_name.lower()
        shelf = [
            s for (s, gs) in self.shelf_mapping_guesses.items() if shelf_name in gs
        ]
@ -106,6 +109,7 @@ class Importer:
            user=user,
            include_reviews=original_job.include_reviews,
            privacy=original_job.privacy,
            source=original_job.source,
            # TODO: allow users to adjust mappings
            mappings=original_job.mappings,
            retry=True,
--- a/bookwyrm/importers/openlibrary_import.py
+++ b/bookwyrm/importers/openlibrary_import.py
@ -0,0 +1,13 @@
 """ handle reading a csv from openlibrary"""
 from . import Importer
 class OpenLibraryImporter(Importer):
    """csv downloads from OpenLibrary"""
    service = "OpenLibrary"
    def __init__(self, *args, **kwargs):
        self.row_mappings_guesses.append(("openlibrary_key", ["edition id"]))
        self.row_mappings_guesses.append(("openlibrary_work_key", ["work id"]))
        super().__init__(*args, **kwargs)
--- a/bookwyrm/importers/storygraph_import.py
+++ b/bookwyrm/importers/storygraph_import.py
@ -3,6 +3,6 @@ from . import Importer
 class StorygraphImporter(Importer):
-    """csv downloads from librarything"""
+    """csv downloads from Storygraph"""
    service = "Storygraph"
--- a/bookwyrm/models/import_job.py
+++ b/bookwyrm/models/import_job.py
@ -25,7 +25,7 @@ def construct_search_term(title, author):
    # Strip brackets (usually series title from search term)
    title = re.sub(r"\s*\([^)]*\)\s*", "", title)
    # Open library doesn't like including author initials in search term.
-    author = re.sub(r"(\w\.)+\s*", "", author)
+    author = re.sub(r"(\w\.)+\s*", "", author) if author else ""
    return " ".join([title, author])
@ -88,7 +88,9 @@ class ImportItem(models.Model):
            return
        if self.isbn:
-            self.book = self.get_book_from_isbn()
+            self.book = self.get_book_from_identifier()
        elif self.openlibrary_key:
            self.book = self.get_book_from_identifier(field="openlibrary_key")
        else:
            # don't fall back on title/author search if isbn is present.
            # you're too likely to mismatch
@ -98,10 +100,10 @@ class ImportItem(models.Model):
            else:
                self.book_guess = book
-    def get_book_from_isbn(self):
+    def get_book_from_identifier(self, field="isbn"):
-        """search by isbn"""
+        """search by isbn or other unique identifier"""
        search_result = connector_manager.first_search_result(
-            self.isbn, min_confidence=0.999
+            getattr(self, field), min_confidence=0.999
        )
        if search_result:
            # it's already in the right format
@ -114,6 +116,8 @@ class ImportItem(models.Model):
    def get_book_from_title_author(self):
        """search by title and author"""
        if not self.title:
            return None, 0
        search_term = construct_search_term(self.title, self.author)
        search_result = connector_manager.first_search_result(
            search_term, min_confidence=0.1
@ -145,6 +149,13 @@ class ImportItem(models.Model):
            self.normalized_data.get("isbn_10")
        )
    @property
    def openlibrary_key(self):
        """the edition identifier is preferable to the work key"""
        return self.normalized_data.get("openlibrary_key") or self.normalized_data.get(
            "openlibrary_work_key"
        )
    @property
    def shelf(self):
        """the goodreads shelf field"""
--- a/bookwyrm/templates/import/import.html
+++ b/bookwyrm/templates/import/import.html
@ -31,6 +31,9 @@
                    <option value="LibraryThing" {% if current == 'LibraryThing' %}selected{% endif %}>
                        LibraryThing (TSV)
                    </option>
                    <option value="OpenLibrary" {% if current == 'OpenLibrary' %}selected{% endif %}>
                        OpenLibrary (CSV)
                    </option>
                </select>
            </div>
            <div class="field">
--- a/bookwyrm/templates/import/import_status.html
+++ b/bookwyrm/templates/import/import_status.html
@ -105,6 +105,11 @@
                <th>
                    {% trans "ISBN" %}
                </th>
                {% if job.source == "OpenLibrary" %}
                <th>
                    {% trans "Openlibrary key" %}
                </th>
                {% endif %}
                <th>
                    {% trans "Author" %}
                </th>
@ -145,6 +150,11 @@
                <td>
                    {{ item.isbn|default:'' }}
                </td>
                {% if job.source == "OpenLibrary" %}
                <td>
                    {{ item.openlibrary_key }}
                </td>
                {% endif %}
                <td>
                    {{ item.normalized_data.authors }}
                </td>
--- a/bookwyrm/tests/data/openlibrary.csv
+++ b/bookwyrm/tests/data/openlibrary.csv
@ -0,0 +1,5 @@
 Work Id,Edition Id,Bookshelf
 OL102749W,,Currently Reading
 OL361393W,OL7798182M,Currently Reading
 OL1652392W,OL7194114M,Want to Read
 OL17062644W,OL25726365M,Already Read
--- a/bookwyrm/tests/importers/test_importer.py
+++ b/bookwyrm/tests/importers/test_importer.py
@ -128,7 +128,7 @@ class GenericImporter(TestCase):
        import_item = models.ImportItem.objects.get(job=import_job, index=0)
        with patch(
-            "bookwyrm.models.import_job.ImportItem.get_book_from_isbn"
+            "bookwyrm.models.import_job.ImportItem.get_book_from_identifier"
        ) as resolve:
            resolve.return_value = self.book
@ -158,7 +158,7 @@ class GenericImporter(TestCase):
                ).exists()
            )
-        item = items[3]
+        item = items.last()
        item.fail_reason = "hello"
        item.save()
        item.update_job()
--- a/bookwyrm/tests/importers/test_openlibrary_import.py
+++ b/bookwyrm/tests/importers/test_openlibrary_import.py
@ -0,0 +1,85 @@
 """ testing import """
 import pathlib
 from unittest.mock import patch
 import datetime
 import pytz
 from django.test import TestCase
 from bookwyrm import models
 from bookwyrm.importers import OpenLibraryImporter
 from bookwyrm.importers.importer import handle_imported_book
 def make_date(*args):
    """helper function to easily generate a date obj"""
    return datetime.datetime(*args, tzinfo=pytz.UTC)
 # pylint: disable=consider-using-with
@patch("bookwyrm.suggested_users.rerank_suggestions_task.delay")
@patch("bookwyrm.activitystreams.populate_stream_task.delay")
@patch("bookwyrm.activitystreams.add_book_statuses_task.delay")
 class OpenLibraryImport(TestCase):
    """importing from openlibrary csv"""
    def setUp(self):
        """use a test csv"""
        self.importer = OpenLibraryImporter()
        datafile = pathlib.Path(__file__).parent.joinpath("../data/openlibrary.csv")
        self.csv = open(datafile, "r", encoding=self.importer.encoding)
        with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch(
            "bookwyrm.activitystreams.populate_stream_task.delay"
        ):
            self.local_user = models.User.objects.create_user(
                "mouse", "mouse@mouse.mouse", "password", local=True
            )
        work = models.Work.objects.create(title="Test Work")
        self.book = models.Edition.objects.create(
            title="Example Edition",
            remote_id="https://example.com/book/1",
            parent_work=work,
        )
    def test_create_job(self, *_):
        """creates the import job entry and checks csv"""
        import_job = self.importer.create_job(
            self.local_user, self.csv, False, "public"
        )
        import_items = models.ImportItem.objects.filter(job=import_job).all()
        self.assertEqual(len(import_items), 4)
        self.assertEqual(import_items[0].index, 0)
        self.assertEqual(import_items[0].data["Work Id"], "OL102749W")
        self.assertEqual(import_items[1].data["Work Id"], "OL361393W")
        self.assertEqual(import_items[1].data["Edition Id"], "OL7798182M")
        self.assertEqual(import_items[0].normalized_data["shelf"], "reading")
        self.assertEqual(import_items[0].normalized_data["openlibrary_key"], "")
        self.assertEqual(
            import_items[0].normalized_data["openlibrary_work_key"], "OL102749W"
        )
        self.assertEqual(
            import_items[1].normalized_data["openlibrary_key"], "OL7798182M"
        )
        self.assertEqual(import_items[2].normalized_data["shelf"], "to-read")
        self.assertEqual(import_items[3].normalized_data["shelf"], "read")
    def test_handle_imported_book(self, *_):
        """openlibrary import added a book, this adds related connections"""
        shelf = self.local_user.shelf_set.filter(identifier="reading").first()
        self.assertIsNone(shelf.books.first())
        import_job = self.importer.create_job(
            self.local_user, self.csv, False, "public"
        )
        import_item = import_job.items.first()
        import_item.book = self.book
        import_item.save()
        with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
            handle_imported_book(import_item)
        shelf.refresh_from_db()
        self.assertEqual(shelf.books.first(), self.book)
--- a/bookwyrm/tests/models/test_import_model.py
+++ b/bookwyrm/tests/models/test_import_model.py
@ -139,7 +139,7 @@ class ImportJob(TestCase):
        self.assertEqual(item.reads, expected)
    @responses.activate
-    def test_get_book_from_isbn(self):
+    def test_get_book_from_identifier(self):
        """search and load books by isbn (9780356506999)"""
        item = models.ImportItem.objects.create(
            index=1,
@ -197,6 +197,6 @@ class ImportJob(TestCase):
                with patch(
                    "bookwyrm.connectors.openlibrary.Connector." "get_authors_from_data"
                ):
-                    book = item.get_book_from_isbn()
+                    book = item.get_book_from_identifier()
        self.assertEqual(book.title, "Sabriel")
--- a/bookwyrm/views/imports/import_data.py
+++ b/bookwyrm/views/imports/import_data.py
@ -14,6 +14,7 @@ from bookwyrm.importers import (
    LibrarythingImporter,
    GoodreadsImporter,
    StorygraphImporter,
    OpenLibraryImporter,
 )
 # pylint: disable= no-self-use
@ -49,6 +50,8 @@ class Import(View):
            importer = LibrarythingImporter()
        elif source == "Storygraph":
            importer = StorygraphImporter()
        elif source == "OpenLibrary":
            importer = OpenLibraryImporter()
        else:
            # Default : Goodreads
            importer = GoodreadsImporter()