From 539a9fa212bacb3fcab0a0c75bc0b2d1bce62c98 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sat, 25 Nov 2023 17:34:12 +1100 Subject: [PATCH 1/6] csv import and export fixes Adds shelved and published dates for books and their imported reviews. Provides option to create new (custom) shelves when importing books. fixes #3004 fixes #2846 fixes #2666 fixes #2411 --- bookwyrm/importers/__init__.py | 1 + bookwyrm/importers/bookwyrm_import.py | 14 ++++++ bookwyrm/importers/importer.py | 19 +++++--- .../0189_importjob_create_shelves.py | 18 ++++++++ bookwyrm/models/import_job.py | 46 ++++++++++++++++--- bookwyrm/templates/import/import.html | 10 +++- bookwyrm/views/imports/import_data.py | 11 ++++- bookwyrm/views/preferences/export.py | 27 ++++++++++- 8 files changed, 128 insertions(+), 18 deletions(-) create mode 100644 bookwyrm/importers/bookwyrm_import.py create mode 100644 bookwyrm/migrations/0189_importjob_create_shelves.py diff --git a/bookwyrm/importers/__init__.py b/bookwyrm/importers/__init__.py index 6ce50f160..daf9bae62 100644 --- a/bookwyrm/importers/__init__.py +++ b/bookwyrm/importers/__init__.py @@ -1,6 +1,7 @@ """ import classes """ from .importer import Importer +from .bookwyrm_import import BookwyrmBooksImporter from .calibre_import import CalibreImporter from .goodreads_import import GoodreadsImporter from .librarything_import import LibrarythingImporter diff --git a/bookwyrm/importers/bookwyrm_import.py b/bookwyrm/importers/bookwyrm_import.py new file mode 100644 index 000000000..ed25166f9 --- /dev/null +++ b/bookwyrm/importers/bookwyrm_import.py @@ -0,0 +1,14 @@ +""" handle reading a csv from BookWyrm """ + +from typing import Any +from . import Importer + + +class BookwyrmBooksImporter(Importer): + """Goodreads is the default importer, we basically just use the same structure""" + + service = "BookWyrm" + + def __init__(self, *args: Any, **kwargs: Any): + self.row_mappings_guesses.append(("shelf_name", ["shelf_name"])) + super().__init__(*args, **kwargs) diff --git a/bookwyrm/importers/importer.py b/bookwyrm/importers/importer.py index 5b3192fa5..8e60f7c5f 100644 --- a/bookwyrm/importers/importer.py +++ b/bookwyrm/importers/importer.py @@ -18,14 +18,14 @@ class Importer: row_mappings_guesses = [ ("id", ["id", "book id"]), ("title", ["title"]), - ("authors", ["author", "authors", "primary author"]), + ("authors", ["author_text", "author", "authors", "primary author"]), ("isbn_10", ["isbn10", "isbn", "isbn/uid"]), ("isbn_13", ["isbn13", "isbn", "isbns", "isbn/uid"]), ("shelf", ["shelf", "exclusive shelf", "read status", "bookshelf"]), - ("review_name", ["review name"]), - ("review_body", ["my review", "review"]), + ("review_name", ["review_name", "review name"]), + ("review_body", ["review_content", "my review", "review"]), ("rating", ["my rating", "rating", "star rating"]), - ("date_added", ["date added", "entry date", "added"]), + ("date_added", ["date_added", "date added", "entry date", "added"]), ("date_started", ["date started", "started"]), ("date_finished", ["date finished", "last date read", "date read", "finished"]), ] @@ -38,7 +38,12 @@ class Importer: # pylint: disable=too-many-locals def create_job( - self, user: User, csv_file: Iterable[str], include_reviews: bool, privacy: str + self, + user: User, + csv_file: Iterable[str], + include_reviews: bool, + create_shelves: bool, + privacy: str, ) -> ImportJob: """check over a csv and creates a database entry for the job""" csv_reader = csv.DictReader(csv_file, delimiter=self.delimiter) @@ -55,6 +60,7 @@ class Importer: job = ImportJob.objects.create( user=user, include_reviews=include_reviews, + create_shelves=create_shelves, privacy=privacy, mappings=mappings, source=self.service, @@ -114,7 +120,7 @@ class Importer: shelf = [ s for (s, gs) in self.shelf_mapping_guesses.items() if shelf_name in gs ] - return shelf[0] if shelf else None + return shelf[0] if shelf else normalized_row.get("shelf") or None # pylint: disable=no-self-use def normalize_row( @@ -149,6 +155,7 @@ class Importer: job = ImportJob.objects.create( user=user, include_reviews=original_job.include_reviews, + create_shelves=original_job.create_shelves, privacy=original_job.privacy, source=original_job.source, # TODO: allow users to adjust mappings diff --git a/bookwyrm/migrations/0189_importjob_create_shelves.py b/bookwyrm/migrations/0189_importjob_create_shelves.py new file mode 100644 index 000000000..a1b1fc512 --- /dev/null +++ b/bookwyrm/migrations/0189_importjob_create_shelves.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.23 on 2023-11-25 05:49 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0188_theme_loads"), + ] + + operations = [ + migrations.AddField( + model_name="importjob", + name="create_shelves", + field=models.BooleanField(default=True), + ), + ] diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index f5d86ad2e..6e1cae1ee 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -4,6 +4,7 @@ import math import re import dateutil.parser +from django.core.exceptions import ObjectDoesNotExist from django.db import models from django.utils import timezone from django.utils.translation import gettext_lazy as _ @@ -59,6 +60,7 @@ class ImportJob(models.Model): created_date = models.DateTimeField(default=timezone.now) updated_date = models.DateTimeField(default=timezone.now) include_reviews: bool = models.BooleanField(default=True) + create_shelves: bool = models.BooleanField(default=True) mappings = models.JSONField() source = models.CharField(max_length=100) privacy = models.CharField(max_length=255, default="public", choices=PrivacyLevels) @@ -245,6 +247,11 @@ class ImportItem(models.Model): """the goodreads shelf field""" return self.normalized_data.get("shelf") + @property + def shelf_name(self): + """the goodreads shelf field""" + return self.normalized_data.get("shelf_name") + @property def review(self): """a user-written review, to be imported with the book data""" @@ -388,11 +395,36 @@ def handle_imported_book(item): # shelve the book if it hasn't been shelved already if item.shelf and not existing_shelf: - desired_shelf = Shelf.objects.get(identifier=item.shelf, user=user) + shelved_date = item.date_added or timezone.now() - ShelfBook( - book=item.book, shelf=desired_shelf, user=user, shelved_date=shelved_date - ).save(priority=IMPORT_TRIGGERED) + + try: + + desired_shelf = Shelf.objects.get(identifier=item.shelf, user=user) + shelved_date = item.date_added or timezone.now() + ShelfBook( + book=item.book, + shelf=desired_shelf, + user=user, + shelved_date=shelved_date, + ).save(priority=IMPORT_TRIGGERED) + + except ObjectDoesNotExist: + if job.create_shelves: + shelfname = getattr(item, "shelf_name", item.shelf) + new_shelf = Shelf.objects.create( + user=user, + identifier=item.shelf, + name=shelfname, + privacy=job.privacy, + ) + + ShelfBook( + book=item.book, + shelf=new_shelf, + user=user, + shelved_date=shelved_date, + ).save(priority=IMPORT_TRIGGERED) for read in item.reads: # check for an existing readthrough with the same dates @@ -408,9 +440,9 @@ def handle_imported_book(item): read.save() if job.include_reviews and (item.rating or item.review) and not item.linked_review: - # we don't know the publication date of the review, - # but "now" is a bad guess - published_date_guess = item.date_read or item.date_added + # we don't necessarily know the publication date of the review, + # but "now" is a bad guess unless we have no choice + published_date_guess = item.date_read or item.date_added or timezone.now() if item.review: # pylint: disable=consider-using-f-string review_title = "Review of {!r} on {!r}".format( diff --git a/bookwyrm/templates/import/import.html b/bookwyrm/templates/import/import.html index 2c3be9e07..4781dca83 100644 --- a/bookwyrm/templates/import/import.html +++ b/bookwyrm/templates/import/import.html @@ -70,6 +70,9 @@ + @@ -94,9 +97,14 @@ {% trans "Include reviews" %} +
+ +
{% include 'snippets/privacy_select.html' with no_label=True privacy_uuid="import" %}
diff --git a/bookwyrm/views/imports/import_data.py b/bookwyrm/views/imports/import_data.py index 01812e1d5..e2327ce73 100644 --- a/bookwyrm/views/imports/import_data.py +++ b/bookwyrm/views/imports/import_data.py @@ -15,6 +15,7 @@ from django.views import View from bookwyrm import forms, models from bookwyrm.importers import ( + BookwyrmBooksImporter, CalibreImporter, LibrarythingImporter, GoodreadsImporter, @@ -67,7 +68,7 @@ class Import(View): return TemplateResponse(request, "import/import.html", data) def post(self, request): - """ingest a goodreads csv""" + """ingest a book data csv""" site = models.SiteSettings.objects.get() if not site.imports_enabled: raise PermissionDenied() @@ -77,11 +78,16 @@ class Import(View): return HttpResponseBadRequest() include_reviews = request.POST.get("include_reviews") == "on" + create_shelves = request.POST.get("create_shelves") == "on" privacy = request.POST.get("privacy") source = request.POST.get("source") importer = None - if source == "LibraryThing": + + if source == "BookWyrm": + importer = BookwyrmBooksImporter() + print("BookwyrmBooksImporter") + elif source == "LibraryThing": importer = LibrarythingImporter() elif source == "Storygraph": importer = StorygraphImporter() @@ -98,6 +104,7 @@ class Import(View): request.user, TextIOWrapper(request.FILES["csv_file"], encoding=importer.encoding), include_reviews, + create_shelves, privacy, ) except (UnicodeDecodeError, ValueError, KeyError): diff --git a/bookwyrm/views/preferences/export.py b/bookwyrm/views/preferences/export.py index 6880318bc..ef6dd27bf 100644 --- a/bookwyrm/views/preferences/export.py +++ b/bookwyrm/views/preferences/export.py @@ -48,7 +48,16 @@ class Export(View): fields = ( ["title", "author_text"] + deduplication_fields - + ["rating", "review_name", "review_cw", "review_content"] + + [ + "rating", + "review_published", + "review_name", + "review_cw", + "review_content", + "shelf", + "shelf_name", + "date_added", + ] ) writer.writerow(fields) @@ -72,9 +81,23 @@ class Export(View): .first() ) if review: + book.review_published = review.published_date book.review_name = review.name book.review_cw = review.content_warning - book.review_content = review.raw_content + book.review_content = ( + review.raw_content + ) # do imported reviews not have raw content? + + shelfbook = ( + models.ShelfBook.objects.filter(book=book, user=request.user) + .order_by("shelved_date") + .last() + ) + if shelfbook: + book.shelf = shelfbook.shelf.identifier + book.shelf_name = shelfbook.shelf.name + book.date_added = shelfbook.shelved_date + writer.writerow([getattr(book, field, "") or "" for field in fields]) return HttpResponse( From 06d6360082b6164526dee75d8fb03bc5674c85ab Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sun, 30 Jun 2024 18:54:59 +1000 Subject: [PATCH 2/6] merge latest changes and add tests --- bookwyrm/importers/__init__.py | 2 +- bookwyrm/importers/bookwyrm_import.py | 16 ++ bookwyrm/importers/importer.py | 23 ++- .../migrations/0207_merge_20240629_0626.py | 13 ++ bookwyrm/models/import_job.py | 39 ++-- bookwyrm/tests/data/bookwyrm.csv | 4 + .../tests/importers/test_bookwyrm_import.py | 173 ++++++++++++++++++ bookwyrm/views/imports/import_data.py | 3 +- 8 files changed, 241 insertions(+), 32 deletions(-) create mode 100644 bookwyrm/migrations/0207_merge_20240629_0626.py create mode 100644 bookwyrm/tests/data/bookwyrm.csv create mode 100644 bookwyrm/tests/importers/test_bookwyrm_import.py diff --git a/bookwyrm/importers/__init__.py b/bookwyrm/importers/__init__.py index 8e92872f2..3c895741b 100644 --- a/bookwyrm/importers/__init__.py +++ b/bookwyrm/importers/__init__.py @@ -1,7 +1,7 @@ """ import classes """ from .importer import Importer -from .bookwyrm_import import BookwyrmImporter +from .bookwyrm_import import BookwyrmImporter, BookwyrmBooksImporter from .calibre_import import CalibreImporter from .goodreads_import import GoodreadsImporter from .librarything_import import LibrarythingImporter diff --git a/bookwyrm/importers/bookwyrm_import.py b/bookwyrm/importers/bookwyrm_import.py index 206cd6219..7b343d5c9 100644 --- a/bookwyrm/importers/bookwyrm_import.py +++ b/bookwyrm/importers/bookwyrm_import.py @@ -1,8 +1,10 @@ """Import data from Bookwyrm export files""" +from typing import Any from django.http import QueryDict from bookwyrm.models import User from bookwyrm.models.bookwyrm_import_job import BookwyrmImportJob +from . import Importer class BookwyrmImporter: @@ -22,3 +24,17 @@ class BookwyrmImporter: user=user, archive_file=archive_file, required=required ) return job + + +class BookwyrmBooksImporter(Importer): + """ + Handle reading a csv from BookWyrm. + Goodreads is the default importer, we basically just use the same structure + But BookWyrm has a shelf.id (shelf) and a shelf.name (shelf_name) + """ + + service = "BookWyrm" + + def __init__(self, *args: Any, **kwargs: Any): + self.row_mappings_guesses.append(("shelf_name", ["shelf_name"])) + super().__init__(*args, **kwargs) diff --git a/bookwyrm/importers/importer.py b/bookwyrm/importers/importer.py index 8e60f7c5f..fc8d4ac13 100644 --- a/bookwyrm/importers/importer.py +++ b/bookwyrm/importers/importer.py @@ -19,16 +19,25 @@ class Importer: ("id", ["id", "book id"]), ("title", ["title"]), ("authors", ["author_text", "author", "authors", "primary author"]), - ("isbn_10", ["isbn10", "isbn", "isbn/uid"]), - ("isbn_13", ["isbn13", "isbn", "isbns", "isbn/uid"]), + ("isbn_10", ["isbn_10", "isbn10", "isbn", "isbn/uid"]), + ("isbn_13", ["isbn_13", "isbn13", "isbn", "isbns", "isbn/uid"]), ("shelf", ["shelf", "exclusive shelf", "read status", "bookshelf"]), ("review_name", ["review_name", "review name"]), ("review_body", ["review_content", "my review", "review"]), ("rating", ["my rating", "rating", "star rating"]), - ("date_added", ["date_added", "date added", "entry date", "added"]), - ("date_started", ["date started", "started"]), - ("date_finished", ["date finished", "last date read", "date read", "finished"]), + ( + "date_added", + ["shelf_date", "date_added", "date added", "entry date", "added"], + ), + ("date_started", ["start_date", "date started", "started"]), + ( + "date_finished", + ["finish_date", "date finished", "last date read", "date read", "finished"], + ), ] + + # TODO: stopped + date_fields = ["date_added", "date_started", "date_finished"] shelf_mapping_guesses = { "to-read": ["to-read", "want to read"], @@ -36,14 +45,14 @@ class Importer: "reading": ["currently-reading", "reading", "currently reading"], } - # pylint: disable=too-many-locals + # pylint: disable=too-many-locals.too-many-arguments def create_job( self, user: User, csv_file: Iterable[str], include_reviews: bool, - create_shelves: bool, privacy: str, + create_shelves: bool = True, ) -> ImportJob: """check over a csv and creates a database entry for the job""" csv_reader = csv.DictReader(csv_file, delimiter=self.delimiter) diff --git a/bookwyrm/migrations/0207_merge_20240629_0626.py b/bookwyrm/migrations/0207_merge_20240629_0626.py new file mode 100644 index 000000000..b5a1a4556 --- /dev/null +++ b/bookwyrm/migrations/0207_merge_20240629_0626.py @@ -0,0 +1,13 @@ +# Generated by Django 4.2.11 on 2024-06-29 06:26 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0189_importjob_create_shelves"), + ("bookwyrm", "0206_merge_20240415_1537"), + ] + + operations = [] diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 6e1cae1ee..8ca4c346d 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -375,7 +375,7 @@ def import_item_task(item_id): item.update_job() -def handle_imported_book(item): +def handle_imported_book(item): # pylint: disable=too-many-branches """process a csv and then post about it""" job = item.job if job.complete: @@ -392,39 +392,32 @@ def handle_imported_book(item): item.book = item.book.edition existing_shelf = ShelfBook.objects.filter(book=item.book, user=user).exists() - - # shelve the book if it hasn't been shelved already - if item.shelf and not existing_shelf: + if job.create_shelves and item.shelf and not existing_shelf: + # shelve the book if it hasn't been shelved already shelved_date = item.date_added or timezone.now() + shelfname = getattr(item, "shelf_name", item.shelf) try: - - desired_shelf = Shelf.objects.get(identifier=item.shelf, user=user) - shelved_date = item.date_added or timezone.now() - ShelfBook( - book=item.book, - shelf=desired_shelf, - user=user, - shelved_date=shelved_date, - ).save(priority=IMPORT_TRIGGERED) - + shelf = Shelf.objects.get(name=shelfname, user=user) except ObjectDoesNotExist: - if job.create_shelves: - shelfname = getattr(item, "shelf_name", item.shelf) - new_shelf = Shelf.objects.create( + try: + shelf = Shelf.objects.get(identifier=item.shelf, user=user) + except ObjectDoesNotExist: + + shelf = Shelf.objects.create( user=user, identifier=item.shelf, name=shelfname, privacy=job.privacy, ) - ShelfBook( - book=item.book, - shelf=new_shelf, - user=user, - shelved_date=shelved_date, - ).save(priority=IMPORT_TRIGGERED) + ShelfBook( + book=item.book, + shelf=shelf, + user=user, + shelved_date=shelved_date, + ).save(priority=IMPORT_TRIGGERED) for read in item.reads: # check for an existing readthrough with the same dates diff --git a/bookwyrm/tests/data/bookwyrm.csv b/bookwyrm/tests/data/bookwyrm.csv new file mode 100644 index 000000000..d770f1dcf --- /dev/null +++ b/bookwyrm/tests/data/bookwyrm.csv @@ -0,0 +1,4 @@ +title,author_text,remote_id,openlibrary_key,inventaire_id,librarything_key,goodreads_key,bnf_id,viaf,wikidata,asin,aasin,isfdb,isbn_10,isbn_13,oclc_number,start_date,finish_date,stopped_date,rating,review_name,review_cw,review_content,review_published,shelf,shelf_name,shelf_date +Gideon the Ninth (The Locked Tomb #1),Tamsyn Muir,https://example.com/book2,,,,,,,,,,,1250313198,9781250313195,,2020-10-21,2020-10-25,,3,,,,,read,Read,2020-10-21 +Subcutanean,Aaron A. Reed,https://example.com/book3,,,,,,,,,,,,,,2020-03-05,2020-03-06,,0,,,,,read,Read,2020-03-05 +Patisserie at Home,Mélanie Dupuis,https://example.com/book4,,,,,,,,,,,0062445316,9780062445315,,2019-07-08,,,2,,,mixed feelings,2019-07-08,cooking,Cooking,2019-07-08 diff --git a/bookwyrm/tests/importers/test_bookwyrm_import.py b/bookwyrm/tests/importers/test_bookwyrm_import.py new file mode 100644 index 000000000..3a9169c99 --- /dev/null +++ b/bookwyrm/tests/importers/test_bookwyrm_import.py @@ -0,0 +1,173 @@ +""" testing bookwyrm csv import """ +import pathlib +from unittest.mock import patch +import datetime + +from django.test import TestCase + +from bookwyrm import models +from bookwyrm.importers import BookwyrmBooksImporter +from bookwyrm.models.import_job import handle_imported_book + + +def make_date(*args): + """helper function to easily generate a date obj""" + return datetime.datetime(*args, tzinfo=datetime.timezone.utc) + + +@patch("bookwyrm.suggested_users.rerank_suggestions_task.delay") +@patch("bookwyrm.activitystreams.populate_stream_task.delay") +@patch("bookwyrm.activitystreams.add_book_statuses_task.delay") +class BookwyrmBooksImport(TestCase): + """importing from BookWyrm csv""" + + def setUp(self): + """use a test csv""" + self.importer = BookwyrmBooksImporter() + datafile = pathlib.Path(__file__).parent.joinpath("../data/bookwyrm.csv") + # pylint: disable-next=consider-using-with + self.csv = open(datafile, "r", encoding=self.importer.encoding) + + def tearDown(self): + """close test csv""" + self.csv.close() + + @classmethod + def setUpTestData(cls): + """populate database""" + with ( + patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), + patch("bookwyrm.activitystreams.populate_stream_task.delay"), + patch("bookwyrm.lists_stream.populate_lists_task.delay"), + ): + cls.local_user = models.User.objects.create_user( + "mouse", "mouse@mouse.mouse", "password", local=True + ) + models.SiteSettings.objects.create() + work = models.Work.objects.create(title="Test Work") + cls.book = models.Edition.objects.create( + title="Example Edition", + remote_id="https://example.com/book/1", + parent_work=work, + ) + + def test_create_job(self, *_): + """creates the import job entry and checks csv""" + import_job = self.importer.create_job( + self.local_user, self.csv, False, "public" + ) + + import_items = models.ImportItem.objects.filter(job=import_job).all() + self.assertEqual(len(import_items), 3) + self.assertEqual(import_items[0].index, 0) + self.assertEqual(import_items[0].normalized_data["isbn_13"], "9781250313195") + self.assertEqual(import_items[0].normalized_data["isbn_10"], "1250313198") + self.assertEqual(import_items[1].index, 1) + self.assertEqual(import_items[2].index, 2) + self.assertEqual(import_items[2].shelf_name, "Cooking") + + def test_create_retry_job(self, *_): + """trying again with items that didn't import""" + import_job = self.importer.create_job( + self.local_user, self.csv, False, "unlisted" + ) + import_items = models.ImportItem.objects.filter(job=import_job).all()[:2] + + retry = self.importer.create_retry_job( + self.local_user, import_job, import_items + ) + self.assertNotEqual(import_job, retry) + self.assertEqual(retry.user, self.local_user) + self.assertEqual(retry.include_reviews, False) + self.assertEqual(retry.privacy, "unlisted") + + retry_items = models.ImportItem.objects.filter(job=retry).all() + self.assertEqual(len(retry_items), 2) + self.assertEqual(retry_items[0].index, 0) + self.assertEqual(retry_items[1].index, 1) + + def test_handle_imported_book(self, *_): + """import added a book, this adds related connections""" + shelf = self.local_user.shelf_set.filter( + identifier=models.Shelf.READ_FINISHED + ).first() + self.assertIsNone(shelf.books.first()) + + import_job = self.importer.create_job( + self.local_user, self.csv, False, "public" + ) + import_item = import_job.items.first() + import_item.book = self.book + import_item.save() + + with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): + handle_imported_book(import_item) + + shelf.refresh_from_db() + self.assertEqual(shelf.books.first(), self.book) + self.assertEqual( + shelf.shelfbook_set.first().shelved_date, make_date(2020, 10, 21) + ) + + readthrough = models.ReadThrough.objects.get(user=self.local_user) + self.assertEqual(readthrough.book, self.book) + self.assertEqual(readthrough.start_date, make_date(2020, 10, 21)) + self.assertEqual(readthrough.finish_date, make_date(2020, 10, 25)) + + def test_create_new_shelf(self, *_): + """import added a book, was a new shelf created?""" + shelf = self.local_user.shelf_set.filter(identifier="cooking").first() + self.assertIsNone(shelf) + + import_job = self.importer.create_job( + self.local_user, self.csv, False, "public" + ) + import_item = models.ImportItem.objects.filter(job=import_job).all()[2] + import_item.book = self.book + import_item.save() + + # this doesn't pick up 'shelf_name' when running all tests + # but does when only running this test...???? + with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): + handle_imported_book(import_item) + + shelf_after = self.local_user.shelf_set.filter(identifier="cooking").first() + self.assertEqual(shelf_after.books.first(), self.book) + + @patch("bookwyrm.activitystreams.add_status_task.delay") + def test_handle_imported_book_review(self, *_): + """review import""" + import_job = self.importer.create_job( + self.local_user, self.csv, True, "unlisted" + ) + import_item = import_job.items.get(index=2) + import_item.book = self.book + import_item.save() + + with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): + handle_imported_book(import_item) + + review = models.Review.objects.get(book=self.book, user=self.local_user) + self.assertEqual(review.content, "mixed feelings") + self.assertEqual(review.rating, 2) + self.assertEqual(review.published_date, make_date(2019, 7, 8)) + self.assertEqual(review.privacy, "unlisted") + + @patch("bookwyrm.activitystreams.add_status_task.delay") + def test_handle_imported_book_rating(self, *_): + """rating import""" + import_job = self.importer.create_job( + self.local_user, self.csv, True, "unlisted" + ) + import_item = import_job.items.filter(index=0).first() + import_item.book = self.book + import_item.save() + + with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): + handle_imported_book(import_item) + + review = models.ReviewRating.objects.get(book=self.book, user=self.local_user) + self.assertIsInstance(review, models.ReviewRating) + self.assertEqual(review.rating, 3) + self.assertEqual(review.published_date, make_date(2020, 10, 25)) + self.assertEqual(review.privacy, "unlisted") diff --git a/bookwyrm/views/imports/import_data.py b/bookwyrm/views/imports/import_data.py index 839c5cc8b..59686c1f0 100644 --- a/bookwyrm/views/imports/import_data.py +++ b/bookwyrm/views/imports/import_data.py @@ -16,6 +16,7 @@ from django.views import View from bookwyrm import forms, models from bookwyrm.importers import ( BookwyrmImporter, + BookwyrmBooksImporter, CalibreImporter, LibrarythingImporter, GoodreadsImporter, @@ -105,8 +106,8 @@ class Import(View): request.user, TextIOWrapper(request.FILES["csv_file"], encoding=importer.encoding), include_reviews, - create_shelves, privacy, + create_shelves, ) except (UnicodeDecodeError, ValueError, KeyError): return self.get(request, invalid=True) From 94dfbbcc0526bd50bb756c5c7ee80fd9dee920a3 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sat, 27 Jul 2024 12:55:15 +1000 Subject: [PATCH 3/6] fix BookwyrmBooksImporter and tests - change class attribute to instance attribute for mappings - remove comment from test - order import retry jobs in generic importer test This last change seems innocuous but I may be missing something more fundamental - it was otherwise failing when multiple tests are run, I think because running tests in parallel led to import jobs getting out of order? --- bookwyrm/importers/bookwyrm_import.py | 7 ++-- .../tests/importers/test_bookwyrm_import.py | 38 +++++++++---------- bookwyrm/tests/importers/test_importer.py | 2 +- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/bookwyrm/importers/bookwyrm_import.py b/bookwyrm/importers/bookwyrm_import.py index 7b343d5c9..d6b43f1ce 100644 --- a/bookwyrm/importers/bookwyrm_import.py +++ b/bookwyrm/importers/bookwyrm_import.py @@ -34,7 +34,6 @@ class BookwyrmBooksImporter(Importer): """ service = "BookWyrm" - - def __init__(self, *args: Any, **kwargs: Any): - self.row_mappings_guesses.append(("shelf_name", ["shelf_name"])) - super().__init__(*args, **kwargs) + row_mappings_guesses = Importer.row_mappings_guesses + [ + ("shelf_name", ["shelf_name"]), + ] \ No newline at end of file diff --git a/bookwyrm/tests/importers/test_bookwyrm_import.py b/bookwyrm/tests/importers/test_bookwyrm_import.py index 3a9169c99..0d45b5fda 100644 --- a/bookwyrm/tests/importers/test_bookwyrm_import.py +++ b/bookwyrm/tests/importers/test_bookwyrm_import.py @@ -66,25 +66,27 @@ class BookwyrmBooksImport(TestCase): self.assertEqual(import_items[2].index, 2) self.assertEqual(import_items[2].shelf_name, "Cooking") - def test_create_retry_job(self, *_): - """trying again with items that didn't import""" - import_job = self.importer.create_job( - self.local_user, self.csv, False, "unlisted" - ) - import_items = models.ImportItem.objects.filter(job=import_job).all()[:2] + # def test_create_retry_job(self, *_): + # """trying again with items that didn't import""" + # import_job = self.importer.create_job( + # self.local_user, self.csv, False, "unlisted" + # ) + # import_items = models.ImportItem.objects.filter(job=import_job).all()[:2] - retry = self.importer.create_retry_job( - self.local_user, import_job, import_items - ) - self.assertNotEqual(import_job, retry) - self.assertEqual(retry.user, self.local_user) - self.assertEqual(retry.include_reviews, False) - self.assertEqual(retry.privacy, "unlisted") + # retry = self.importer.create_retry_job( + # self.local_user, import_job, import_items + # ) + # self.assertNotEqual(import_job, retry) + # self.assertEqual(retry.user, self.local_user) + # self.assertEqual(retry.include_reviews, False) + # self.assertEqual(retry.privacy, "unlisted") - retry_items = models.ImportItem.objects.filter(job=retry).all() - self.assertEqual(len(retry_items), 2) - self.assertEqual(retry_items[0].index, 0) - self.assertEqual(retry_items[1].index, 1) + # retry_items = models.ImportItem.objects.filter(job=retry).all() + # self.assertEqual(len(retry_items), 2) + # self.assertEqual(retry_items[0].index, 0) + # self.assertEqual(retry_items[0].data["title"], "Gideon the Ninth (The Locked Tomb #1)") + # self.assertEqual(retry_items[1].index, 1) + # self.assertEqual(retry_items[1].data["author_text"], "Aaron A. Reed") def test_handle_imported_book(self, *_): """import added a book, this adds related connections""" @@ -126,8 +128,6 @@ class BookwyrmBooksImport(TestCase): import_item.book = self.book import_item.save() - # this doesn't pick up 'shelf_name' when running all tests - # but does when only running this test...???? with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): handle_imported_book(import_item) diff --git a/bookwyrm/tests/importers/test_importer.py b/bookwyrm/tests/importers/test_importer.py index da2e1b3d8..c562038af 100644 --- a/bookwyrm/tests/importers/test_importer.py +++ b/bookwyrm/tests/importers/test_importer.py @@ -100,7 +100,7 @@ class GenericImporter(TestCase): self.assertEqual(retry.include_reviews, False) self.assertEqual(retry.privacy, "unlisted") - retry_items = models.ImportItem.objects.filter(job=retry).all() + retry_items = models.ImportItem.objects.filter(job=retry).order_by("index") self.assertEqual(len(retry_items), 2) self.assertEqual(retry_items[0].index, 0) self.assertEqual(retry_items[0].normalized_data["id"], "38") From 1608ca640165a9786d224807ce321a070402c603 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sat, 27 Jul 2024 13:16:34 +1000 Subject: [PATCH 4/6] improve formatting --- bookwyrm/importers/bookwyrm_import.py | 3 +- .../tests/importers/test_bookwyrm_import.py | 40 ++++++++++--------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/bookwyrm/importers/bookwyrm_import.py b/bookwyrm/importers/bookwyrm_import.py index d6b43f1ce..fb46966bf 100644 --- a/bookwyrm/importers/bookwyrm_import.py +++ b/bookwyrm/importers/bookwyrm_import.py @@ -1,5 +1,4 @@ """Import data from Bookwyrm export files""" -from typing import Any from django.http import QueryDict from bookwyrm.models import User @@ -36,4 +35,4 @@ class BookwyrmBooksImporter(Importer): service = "BookWyrm" row_mappings_guesses = Importer.row_mappings_guesses + [ ("shelf_name", ["shelf_name"]), - ] \ No newline at end of file + ] diff --git a/bookwyrm/tests/importers/test_bookwyrm_import.py b/bookwyrm/tests/importers/test_bookwyrm_import.py index 0d45b5fda..8e940c096 100644 --- a/bookwyrm/tests/importers/test_bookwyrm_import.py +++ b/bookwyrm/tests/importers/test_bookwyrm_import.py @@ -66,27 +66,29 @@ class BookwyrmBooksImport(TestCase): self.assertEqual(import_items[2].index, 2) self.assertEqual(import_items[2].shelf_name, "Cooking") - # def test_create_retry_job(self, *_): - # """trying again with items that didn't import""" - # import_job = self.importer.create_job( - # self.local_user, self.csv, False, "unlisted" - # ) - # import_items = models.ImportItem.objects.filter(job=import_job).all()[:2] + def test_create_retry_job(self, *_): + """trying again with items that didn't import""" + import_job = self.importer.create_job( + self.local_user, self.csv, False, "unlisted" + ) + import_items = models.ImportItem.objects.filter(job=import_job).all()[:2] - # retry = self.importer.create_retry_job( - # self.local_user, import_job, import_items - # ) - # self.assertNotEqual(import_job, retry) - # self.assertEqual(retry.user, self.local_user) - # self.assertEqual(retry.include_reviews, False) - # self.assertEqual(retry.privacy, "unlisted") + retry = self.importer.create_retry_job( + self.local_user, import_job, import_items + ) + self.assertNotEqual(import_job, retry) + self.assertEqual(retry.user, self.local_user) + self.assertEqual(retry.include_reviews, False) + self.assertEqual(retry.privacy, "unlisted") - # retry_items = models.ImportItem.objects.filter(job=retry).all() - # self.assertEqual(len(retry_items), 2) - # self.assertEqual(retry_items[0].index, 0) - # self.assertEqual(retry_items[0].data["title"], "Gideon the Ninth (The Locked Tomb #1)") - # self.assertEqual(retry_items[1].index, 1) - # self.assertEqual(retry_items[1].data["author_text"], "Aaron A. Reed") + retry_items = models.ImportItem.objects.filter(job=retry).all() + self.assertEqual(len(retry_items), 2) + self.assertEqual(retry_items[0].index, 0) + self.assertEqual( + retry_items[0].data["title"], "Gideon the Ninth (The Locked Tomb #1)" + ) + self.assertEqual(retry_items[1].index, 1) + self.assertEqual(retry_items[1].data["author_text"], "Aaron A. Reed") def test_handle_imported_book(self, *_): """import added a book, this adds related connections""" From 8aba8caae9edbe000b80ac3478be0307bac3795e Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sun, 28 Jul 2024 21:08:14 +1000 Subject: [PATCH 5/6] merge migration --- ...0207_merge_20240629_0626_0207_sqlparse_update.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 bookwyrm/migrations/0208_merge_0207_merge_20240629_0626_0207_sqlparse_update.py diff --git a/bookwyrm/migrations/0208_merge_0207_merge_20240629_0626_0207_sqlparse_update.py b/bookwyrm/migrations/0208_merge_0207_merge_20240629_0626_0207_sqlparse_update.py new file mode 100644 index 000000000..24ef28e04 --- /dev/null +++ b/bookwyrm/migrations/0208_merge_0207_merge_20240629_0626_0207_sqlparse_update.py @@ -0,0 +1,13 @@ +# Generated by Django 4.2.11 on 2024-07-28 11:07 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0207_merge_20240629_0626"), + ("bookwyrm", "0207_sqlparse_update"), + ] + + operations = [] From 7fc54c509c707f46acfbd9107f4d9b7ac16a02f2 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sat, 10 Aug 2024 16:37:30 +1000 Subject: [PATCH 6/6] fixes for bookwyrm csv import - fix tests - revert change to GenericImporter tests - import the review name - add extra properties to ImportItem --- bookwyrm/importers/bookwyrm_import.py | 3 +- bookwyrm/importers/importer.py | 1 + bookwyrm/models/import_job.py | 22 ++++++-- bookwyrm/tests/data/bookwyrm.csv | 6 +-- .../tests/importers/test_bookwyrm_import.py | 51 +++++++++++-------- bookwyrm/tests/importers/test_importer.py | 2 +- 6 files changed, 55 insertions(+), 30 deletions(-) diff --git a/bookwyrm/importers/bookwyrm_import.py b/bookwyrm/importers/bookwyrm_import.py index fb46966bf..8afc1abf7 100644 --- a/bookwyrm/importers/bookwyrm_import.py +++ b/bookwyrm/importers/bookwyrm_import.py @@ -29,10 +29,11 @@ class BookwyrmBooksImporter(Importer): """ Handle reading a csv from BookWyrm. Goodreads is the default importer, we basically just use the same structure - But BookWyrm has a shelf.id (shelf) and a shelf.name (shelf_name) + But BookWyrm has additional attributes in the csv """ service = "BookWyrm" row_mappings_guesses = Importer.row_mappings_guesses + [ ("shelf_name", ["shelf_name"]), + ("review_published", ["review_published"]), ] diff --git a/bookwyrm/importers/importer.py b/bookwyrm/importers/importer.py index 3e01c6abc..d2a11d7f2 100644 --- a/bookwyrm/importers/importer.py +++ b/bookwyrm/importers/importer.py @@ -45,6 +45,7 @@ class Importer: "reading": ["currently-reading", "reading", "currently reading"], } + # pylint: disable=too-many-arguments def create_job( self, user: User, diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 351d722f7..5a6ba3f51 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -257,6 +257,16 @@ class ImportItem(models.Model): """a user-written review, to be imported with the book data""" return self.normalized_data.get("review_body") + @property + def review_name(self): + """a user-written review name, to be imported with the book data""" + return self.normalized_data.get("review_name") + + @property + def review_published(self): + """date the review was published - included in BookWyrm export csv""" + return self.normalized_data.get("review_published", None) + @property def rating(self): """x/5 star rating for a book""" @@ -435,17 +445,23 @@ def handle_imported_book(item): # pylint: disable=too-many-branches if job.include_reviews and (item.rating or item.review) and not item.linked_review: # we don't necessarily know the publication date of the review, # but "now" is a bad guess unless we have no choice - published_date_guess = item.date_read or item.date_added or timezone.now() + + published_date_guess = ( + item.review_published or item.date_read or item.date_added or timezone.now() + ) if item.review: + # pylint: disable=consider-using-f-string review_title = "Review of {!r} on {!r}".format( item.book.title, job.source, ) + review_name = getattr(item, "review_name", review_title) + review = Review.objects.filter( user=user, book=item.book, - name=review_title, + name=review_name, rating=item.rating, published_date=published_date_guess, ).first() @@ -453,7 +469,7 @@ def handle_imported_book(item): # pylint: disable=too-many-branches review = Review( user=user, book=item.book, - name=review_title, + name=review_name, content=item.review, rating=item.rating, published_date=published_date_guess, diff --git a/bookwyrm/tests/data/bookwyrm.csv b/bookwyrm/tests/data/bookwyrm.csv index d770f1dcf..9af375c0b 100644 --- a/bookwyrm/tests/data/bookwyrm.csv +++ b/bookwyrm/tests/data/bookwyrm.csv @@ -1,4 +1,4 @@ title,author_text,remote_id,openlibrary_key,inventaire_id,librarything_key,goodreads_key,bnf_id,viaf,wikidata,asin,aasin,isfdb,isbn_10,isbn_13,oclc_number,start_date,finish_date,stopped_date,rating,review_name,review_cw,review_content,review_published,shelf,shelf_name,shelf_date -Gideon the Ninth (The Locked Tomb #1),Tamsyn Muir,https://example.com/book2,,,,,,,,,,,1250313198,9781250313195,,2020-10-21,2020-10-25,,3,,,,,read,Read,2020-10-21 -Subcutanean,Aaron A. Reed,https://example.com/book3,,,,,,,,,,,,,,2020-03-05,2020-03-06,,0,,,,,read,Read,2020-03-05 -Patisserie at Home,Mélanie Dupuis,https://example.com/book4,,,,,,,,,,,0062445316,9780062445315,,2019-07-08,,,2,,,mixed feelings,2019-07-08,cooking,Cooking,2019-07-08 +我穿我自己,琅俨,https://example.com/book/2010,,,,,,,,,,,,,,,,,,,,,,to-read,To Read,2024-08-10 +Ottolenghi Simple,Yotam Ottolenghi,https://example.com/book/2,OL43065148M,,,,,,,,,,0449017036,9780449017036,,2022-08-10,2022-10-10,,4,Too much tahini,,...in his hummus,2022-11-10,cooking-9,Cooking,2024-08-10 +The Blue Bedspread,Raj Kamal Jha,https://example.com/book/270,OL7425890M,,,,,,,,,,0375503129,9780375503122,41754476,2001-06-01,2001-07-10,,5,,,,,read,Read,2024-08-10 diff --git a/bookwyrm/tests/importers/test_bookwyrm_import.py b/bookwyrm/tests/importers/test_bookwyrm_import.py index 8e940c096..1f50e78a6 100644 --- a/bookwyrm/tests/importers/test_bookwyrm_import.py +++ b/bookwyrm/tests/importers/test_bookwyrm_import.py @@ -60,11 +60,19 @@ class BookwyrmBooksImport(TestCase): import_items = models.ImportItem.objects.filter(job=import_job).all() self.assertEqual(len(import_items), 3) self.assertEqual(import_items[0].index, 0) - self.assertEqual(import_items[0].normalized_data["isbn_13"], "9781250313195") - self.assertEqual(import_items[0].normalized_data["isbn_10"], "1250313198") + self.assertEqual(import_items[0].normalized_data["isbn_13"], "") + self.assertEqual(import_items[0].normalized_data["isbn_10"], "") + self.assertEqual(import_items[0].shelf_name, "To Read") + self.assertEqual(import_items[1].index, 1) + self.assertEqual(import_items[1].normalized_data["isbn_13"], "9780449017036") + self.assertEqual(import_items[1].normalized_data["isbn_10"], "0449017036") + self.assertEqual(import_items[1].shelf_name, "Cooking") + self.assertEqual(import_items[2].index, 2) - self.assertEqual(import_items[2].shelf_name, "Cooking") + self.assertEqual(import_items[2].normalized_data["isbn_13"], "9780375503122") + self.assertEqual(import_items[2].normalized_data["isbn_10"], "0375503129") + self.assertEqual(import_items[2].shelf_name, "Read") def test_create_retry_job(self, *_): """trying again with items that didn't import""" @@ -84,11 +92,9 @@ class BookwyrmBooksImport(TestCase): retry_items = models.ImportItem.objects.filter(job=retry).all() self.assertEqual(len(retry_items), 2) self.assertEqual(retry_items[0].index, 0) - self.assertEqual( - retry_items[0].data["title"], "Gideon the Ninth (The Locked Tomb #1)" - ) + self.assertEqual(retry_items[0].data["title"], "我穿我自己") self.assertEqual(retry_items[1].index, 1) - self.assertEqual(retry_items[1].data["author_text"], "Aaron A. Reed") + self.assertEqual(retry_items[1].data["author_text"], "Yotam Ottolenghi") def test_handle_imported_book(self, *_): """import added a book, this adds related connections""" @@ -100,7 +106,7 @@ class BookwyrmBooksImport(TestCase): import_job = self.importer.create_job( self.local_user, self.csv, False, "public" ) - import_item = import_job.items.first() + import_item = import_job.items.last() import_item.book = self.book import_item.save() @@ -110,13 +116,13 @@ class BookwyrmBooksImport(TestCase): shelf.refresh_from_db() self.assertEqual(shelf.books.first(), self.book) self.assertEqual( - shelf.shelfbook_set.first().shelved_date, make_date(2020, 10, 21) + shelf.shelfbook_set.first().shelved_date, make_date(2024, 8, 10) ) readthrough = models.ReadThrough.objects.get(user=self.local_user) self.assertEqual(readthrough.book, self.book) - self.assertEqual(readthrough.start_date, make_date(2020, 10, 21)) - self.assertEqual(readthrough.finish_date, make_date(2020, 10, 25)) + self.assertEqual(readthrough.start_date, make_date(2001, 6, 1)) + self.assertEqual(readthrough.finish_date, make_date(2001, 7, 10)) def test_create_new_shelf(self, *_): """import added a book, was a new shelf created?""" @@ -126,14 +132,14 @@ class BookwyrmBooksImport(TestCase): import_job = self.importer.create_job( self.local_user, self.csv, False, "public" ) - import_item = models.ImportItem.objects.filter(job=import_job).all()[2] + import_item = models.ImportItem.objects.filter(job=import_job).all()[1] import_item.book = self.book import_item.save() with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): handle_imported_book(import_item) - shelf_after = self.local_user.shelf_set.filter(identifier="cooking").first() + shelf_after = self.local_user.shelf_set.filter(identifier="cooking-9").first() self.assertEqual(shelf_after.books.first(), self.book) @patch("bookwyrm.activitystreams.add_status_task.delay") @@ -142,7 +148,7 @@ class BookwyrmBooksImport(TestCase): import_job = self.importer.create_job( self.local_user, self.csv, True, "unlisted" ) - import_item = import_job.items.get(index=2) + import_item = import_job.items.get(index=1) import_item.book = self.book import_item.save() @@ -150,18 +156,19 @@ class BookwyrmBooksImport(TestCase): handle_imported_book(import_item) review = models.Review.objects.get(book=self.book, user=self.local_user) - self.assertEqual(review.content, "mixed feelings") - self.assertEqual(review.rating, 2) - self.assertEqual(review.published_date, make_date(2019, 7, 8)) + self.assertEqual(review.name, "Too much tahini") + self.assertEqual(review.content, "...in his hummus") + self.assertEqual(review.rating, 4) + self.assertEqual(review.published_date, make_date(2022, 11, 10)) self.assertEqual(review.privacy, "unlisted") @patch("bookwyrm.activitystreams.add_status_task.delay") def test_handle_imported_book_rating(self, *_): """rating import""" import_job = self.importer.create_job( - self.local_user, self.csv, True, "unlisted" + self.local_user, self.csv, True, "followers" ) - import_item = import_job.items.filter(index=0).first() + import_item = import_job.items.filter(index=2).first() import_item.book = self.book import_item.save() @@ -170,6 +177,6 @@ class BookwyrmBooksImport(TestCase): review = models.ReviewRating.objects.get(book=self.book, user=self.local_user) self.assertIsInstance(review, models.ReviewRating) - self.assertEqual(review.rating, 3) - self.assertEqual(review.published_date, make_date(2020, 10, 25)) - self.assertEqual(review.privacy, "unlisted") + self.assertEqual(review.rating, 5) + self.assertEqual(review.published_date, make_date(2001, 7, 10)) + self.assertEqual(review.privacy, "followers") diff --git a/bookwyrm/tests/importers/test_importer.py b/bookwyrm/tests/importers/test_importer.py index c562038af..da2e1b3d8 100644 --- a/bookwyrm/tests/importers/test_importer.py +++ b/bookwyrm/tests/importers/test_importer.py @@ -100,7 +100,7 @@ class GenericImporter(TestCase): self.assertEqual(retry.include_reviews, False) self.assertEqual(retry.privacy, "unlisted") - retry_items = models.ImportItem.objects.filter(job=retry).order_by("index") + retry_items = models.ImportItem.objects.filter(job=retry).all() self.assertEqual(len(retry_items), 2) self.assertEqual(retry_items[0].index, 0) self.assertEqual(retry_items[0].normalized_data["id"], "38")