Merge pull request #3135 from hughrun/csv

csv import and export fixes
This commit is contained in:
Mouse Reeve 2024-08-23 16:29:04 -07:00 committed by GitHub
commit 413c26bc5e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 342 additions and 24 deletions

View file

@ -1,7 +1,7 @@
""" import classes """
from .importer import Importer
from .bookwyrm_import import BookwyrmImporter
from .bookwyrm_import import BookwyrmImporter, BookwyrmBooksImporter
from .calibre_import import CalibreImporter
from .goodreads_import import GoodreadsImporter
from .librarything_import import LibrarythingImporter

View file

@ -3,6 +3,7 @@ from django.http import QueryDict
from bookwyrm.models import User
from bookwyrm.models.bookwyrm_import_job import BookwyrmImportJob
from . import Importer
class BookwyrmImporter:
@ -22,3 +23,17 @@ class BookwyrmImporter:
user=user, archive_file=archive_file, required=required
)
return job
class BookwyrmBooksImporter(Importer):
"""
Handle reading a csv from BookWyrm.
Goodreads is the default importer, we basically just use the same structure
But BookWyrm has additional attributes in the csv
"""
service = "BookWyrm"
row_mappings_guesses = Importer.row_mappings_guesses + [
("shelf_name", ["shelf_name"]),
("review_published", ["review_published"]),
]

View file

@ -18,17 +18,26 @@ class Importer:
row_mappings_guesses = [
("id", ["id", "book id"]),
("title", ["title"]),
("authors", ["author", "authors", "primary author"]),
("isbn_10", ["isbn10", "isbn", "isbn/uid"]),
("isbn_13", ["isbn13", "isbn", "isbns", "isbn/uid"]),
("authors", ["author_text", "author", "authors", "primary author"]),
("isbn_10", ["isbn_10", "isbn10", "isbn", "isbn/uid"]),
("isbn_13", ["isbn_13", "isbn13", "isbn", "isbns", "isbn/uid"]),
("shelf", ["shelf", "exclusive shelf", "read status", "bookshelf"]),
("review_name", ["review name"]),
("review_body", ["my review", "review"]),
("review_name", ["review_name", "review name"]),
("review_body", ["review_content", "my review", "review"]),
("rating", ["my rating", "rating", "star rating"]),
("date_added", ["date added", "entry date", "added"]),
("date_started", ["date started", "started"]),
("date_finished", ["date finished", "last date read", "date read", "finished"]),
(
"date_added",
["shelf_date", "date_added", "date added", "entry date", "added"],
),
("date_started", ["start_date", "date started", "started"]),
(
"date_finished",
["finish_date", "date finished", "last date read", "date read", "finished"],
),
]
# TODO: stopped
date_fields = ["date_added", "date_started", "date_finished"]
shelf_mapping_guesses = {
"to-read": ["to-read", "want to read"],
@ -36,8 +45,14 @@ class Importer:
"reading": ["currently-reading", "reading", "currently reading"],
}
# pylint: disable=too-many-arguments
def create_job(
self, user: User, csv_file: Iterable[str], include_reviews: bool, privacy: str
self,
user: User,
csv_file: Iterable[str],
include_reviews: bool,
privacy: str,
create_shelves: bool = True,
) -> ImportJob:
"""check over a csv and creates a database entry for the job"""
csv_reader = csv.DictReader(csv_file, delimiter=self.delimiter)
@ -54,6 +69,7 @@ class Importer:
job = ImportJob.objects.create(
user=user,
include_reviews=include_reviews,
create_shelves=create_shelves,
privacy=privacy,
mappings=mappings,
source=self.service,
@ -113,7 +129,7 @@ class Importer:
shelf = [
s for (s, gs) in self.shelf_mapping_guesses.items() if shelf_name in gs
]
return shelf[0] if shelf else None
return shelf[0] if shelf else normalized_row.get("shelf") or None
# pylint: disable=no-self-use
def normalize_row(
@ -148,6 +164,7 @@ class Importer:
job = ImportJob.objects.create(
user=user,
include_reviews=original_job.include_reviews,
create_shelves=original_job.create_shelves,
privacy=original_job.privacy,
source=original_job.source,
# TODO: allow users to adjust mappings

View file

@ -0,0 +1,18 @@
# Generated by Django 3.2.23 on 2023-11-25 05:49
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0188_theme_loads"),
]
operations = [
migrations.AddField(
model_name="importjob",
name="create_shelves",
field=models.BooleanField(default=True),
),
]

View file

@ -0,0 +1,13 @@
# Generated by Django 4.2.11 on 2024-06-29 06:26
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0189_importjob_create_shelves"),
("bookwyrm", "0206_merge_20240415_1537"),
]
operations = []

View file

@ -0,0 +1,13 @@
# Generated by Django 4.2.11 on 2024-07-28 11:07
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0207_merge_20240629_0626"),
("bookwyrm", "0207_sqlparse_update"),
]
operations = []

View file

@ -4,6 +4,7 @@ import math
import re
import dateutil.parser
from django.core.exceptions import ObjectDoesNotExist
from django.db import models
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
@ -59,6 +60,7 @@ class ImportJob(models.Model):
created_date = models.DateTimeField(default=timezone.now)
updated_date = models.DateTimeField(default=timezone.now)
include_reviews: bool = models.BooleanField(default=True)
create_shelves: bool = models.BooleanField(default=True)
mappings = models.JSONField()
source = models.CharField(max_length=100)
privacy = models.CharField(max_length=255, default="public", choices=PrivacyLevels)
@ -245,11 +247,26 @@ class ImportItem(models.Model):
"""the goodreads shelf field"""
return self.normalized_data.get("shelf")
@property
def shelf_name(self):
"""the goodreads shelf field"""
return self.normalized_data.get("shelf_name")
@property
def review(self):
"""a user-written review, to be imported with the book data"""
return self.normalized_data.get("review_body")
@property
def review_name(self):
"""a user-written review name, to be imported with the book data"""
return self.normalized_data.get("review_name")
@property
def review_published(self):
"""date the review was published - included in BookWyrm export csv"""
return self.normalized_data.get("review_published", None)
@property
def rating(self):
"""x/5 star rating for a book"""
@ -368,7 +385,7 @@ def import_item_task(item_id):
item.update_job()
def handle_imported_book(item):
def handle_imported_book(item): # pylint: disable=too-many-branches
"""process a csv and then post about it"""
job = item.job
if job.complete:
@ -385,13 +402,31 @@ def handle_imported_book(item):
item.book = item.book.edition
existing_shelf = ShelfBook.objects.filter(book=item.book, user=user).exists()
if job.create_shelves and item.shelf and not existing_shelf:
# shelve the book if it hasn't been shelved already
if item.shelf and not existing_shelf:
desired_shelf = Shelf.objects.get(identifier=item.shelf, user=user)
shelved_date = item.date_added or timezone.now()
shelfname = getattr(item, "shelf_name", item.shelf)
try:
shelf = Shelf.objects.get(name=shelfname, user=user)
except ObjectDoesNotExist:
try:
shelf = Shelf.objects.get(identifier=item.shelf, user=user)
except ObjectDoesNotExist:
shelf = Shelf.objects.create(
user=user,
identifier=item.shelf,
name=shelfname,
privacy=job.privacy,
)
ShelfBook(
book=item.book, shelf=desired_shelf, user=user, shelved_date=shelved_date
book=item.book,
shelf=shelf,
user=user,
shelved_date=shelved_date,
).save(priority=IMPORT_TRIGGERED)
for read in item.reads:
@ -408,19 +443,25 @@ def handle_imported_book(item):
read.save()
if job.include_reviews and (item.rating or item.review) and not item.linked_review:
# we don't know the publication date of the review,
# but "now" is a bad guess
published_date_guess = item.date_read or item.date_added
# we don't necessarily know the publication date of the review,
# but "now" is a bad guess unless we have no choice
published_date_guess = (
item.review_published or item.date_read or item.date_added or timezone.now()
)
if item.review:
# pylint: disable=consider-using-f-string
review_title = "Review of {!r} on {!r}".format(
item.book.title,
job.source,
)
review_name = getattr(item, "review_name", review_title)
review = Review.objects.filter(
user=user,
book=item.book,
name=review_title,
name=review_name,
rating=item.rating,
published_date=published_date_guess,
).first()
@ -428,7 +469,7 @@ def handle_imported_book(item):
review = Review(
user=user,
book=item.book,
name=review_title,
name=review_name,
content=item.review,
rating=item.rating,
published_date=published_date_guess,

View file

@ -69,6 +69,9 @@
<option value="Calibre" {% if current == 'Calibre' %}selected{% endif %}>
{% trans "Calibre (CSV)" %}
</option>
<option value="BookWyrm" {% if current == 'BookWyrm' %}selected{% endif %}>
{% trans "BookWyrm (CSV)" %}
</option>
</select>
</div>
@ -93,9 +96,14 @@
<input type="checkbox" name="include_reviews" checked> {% trans "Include reviews" %}
</label>
</div>
<div class="field">
<label class="label">
<input type="checkbox" name="create_shelves" checked> {% trans "Create new shelves if they do not exist" %}
</label>
</div>
<div class="field">
<label class="label" for="privacy_import">
{% trans "Privacy setting for imported reviews:" %}
{% trans "Privacy setting for imported reviews and shelves:" %}
</label>
{% include 'snippets/privacy_select.html' with no_label=True privacy_uuid="import" %}
</div>

View file

@ -0,0 +1,4 @@
title,author_text,remote_id,openlibrary_key,inventaire_id,librarything_key,goodreads_key,bnf_id,viaf,wikidata,asin,aasin,isfdb,isbn_10,isbn_13,oclc_number,start_date,finish_date,stopped_date,rating,review_name,review_cw,review_content,review_published,shelf,shelf_name,shelf_date
我穿我自己,琅俨,https://example.com/book/2010,,,,,,,,,,,,,,,,,,,,,,to-read,To Read,2024-08-10
Ottolenghi Simple,Yotam Ottolenghi,https://example.com/book/2,OL43065148M,,,,,,,,,,0449017036,9780449017036,,2022-08-10,2022-10-10,,4,Too much tahini,,...in his hummus,2022-11-10,cooking-9,Cooking,2024-08-10
The Blue Bedspread,Raj Kamal Jha,https://example.com/book/270,OL7425890M,,,,,,,,,,0375503129,9780375503122,41754476,2001-06-01,2001-07-10,,5,,,,,read,Read,2024-08-10
1 title author_text remote_id openlibrary_key inventaire_id librarything_key goodreads_key bnf_id viaf wikidata asin aasin isfdb isbn_10 isbn_13 oclc_number start_date finish_date stopped_date rating review_name review_cw review_content review_published shelf shelf_name shelf_date
2 我穿我自己 琅俨 https://example.com/book/2010 to-read To Read 2024-08-10
3 Ottolenghi Simple Yotam Ottolenghi https://example.com/book/2 OL43065148M 0449017036 9780449017036 2022-08-10 2022-10-10 4 Too much tahini ...in his hummus 2022-11-10 cooking-9 Cooking 2024-08-10
4 The Blue Bedspread Raj Kamal Jha https://example.com/book/270 OL7425890M 0375503129 9780375503122 41754476 2001-06-01 2001-07-10 5 read Read 2024-08-10

View file

@ -0,0 +1,182 @@
""" testing bookwyrm csv import """
import pathlib
from unittest.mock import patch
import datetime
from django.test import TestCase
from bookwyrm import models
from bookwyrm.importers import BookwyrmBooksImporter
from bookwyrm.models.import_job import handle_imported_book
def make_date(*args):
"""helper function to easily generate a date obj"""
return datetime.datetime(*args, tzinfo=datetime.timezone.utc)
@patch("bookwyrm.suggested_users.rerank_suggestions_task.delay")
@patch("bookwyrm.activitystreams.populate_stream_task.delay")
@patch("bookwyrm.activitystreams.add_book_statuses_task.delay")
class BookwyrmBooksImport(TestCase):
"""importing from BookWyrm csv"""
def setUp(self):
"""use a test csv"""
self.importer = BookwyrmBooksImporter()
datafile = pathlib.Path(__file__).parent.joinpath("../data/bookwyrm.csv")
# pylint: disable-next=consider-using-with
self.csv = open(datafile, "r", encoding=self.importer.encoding)
def tearDown(self):
"""close test csv"""
self.csv.close()
@classmethod
def setUpTestData(cls):
"""populate database"""
with (
patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"),
patch("bookwyrm.activitystreams.populate_stream_task.delay"),
patch("bookwyrm.lists_stream.populate_lists_task.delay"),
):
cls.local_user = models.User.objects.create_user(
"mouse", "mouse@mouse.mouse", "password", local=True
)
models.SiteSettings.objects.create()
work = models.Work.objects.create(title="Test Work")
cls.book = models.Edition.objects.create(
title="Example Edition",
remote_id="https://example.com/book/1",
parent_work=work,
)
def test_create_job(self, *_):
"""creates the import job entry and checks csv"""
import_job = self.importer.create_job(
self.local_user, self.csv, False, "public"
)
import_items = models.ImportItem.objects.filter(job=import_job).all()
self.assertEqual(len(import_items), 3)
self.assertEqual(import_items[0].index, 0)
self.assertEqual(import_items[0].normalized_data["isbn_13"], "")
self.assertEqual(import_items[0].normalized_data["isbn_10"], "")
self.assertEqual(import_items[0].shelf_name, "To Read")
self.assertEqual(import_items[1].index, 1)
self.assertEqual(import_items[1].normalized_data["isbn_13"], "9780449017036")
self.assertEqual(import_items[1].normalized_data["isbn_10"], "0449017036")
self.assertEqual(import_items[1].shelf_name, "Cooking")
self.assertEqual(import_items[2].index, 2)
self.assertEqual(import_items[2].normalized_data["isbn_13"], "9780375503122")
self.assertEqual(import_items[2].normalized_data["isbn_10"], "0375503129")
self.assertEqual(import_items[2].shelf_name, "Read")
def test_create_retry_job(self, *_):
"""trying again with items that didn't import"""
import_job = self.importer.create_job(
self.local_user, self.csv, False, "unlisted"
)
import_items = models.ImportItem.objects.filter(job=import_job).all()[:2]
retry = self.importer.create_retry_job(
self.local_user, import_job, import_items
)
self.assertNotEqual(import_job, retry)
self.assertEqual(retry.user, self.local_user)
self.assertEqual(retry.include_reviews, False)
self.assertEqual(retry.privacy, "unlisted")
retry_items = models.ImportItem.objects.filter(job=retry).all()
self.assertEqual(len(retry_items), 2)
self.assertEqual(retry_items[0].index, 0)
self.assertEqual(retry_items[0].data["title"], "我穿我自己")
self.assertEqual(retry_items[1].index, 1)
self.assertEqual(retry_items[1].data["author_text"], "Yotam Ottolenghi")
def test_handle_imported_book(self, *_):
"""import added a book, this adds related connections"""
shelf = self.local_user.shelf_set.filter(
identifier=models.Shelf.READ_FINISHED
).first()
self.assertIsNone(shelf.books.first())
import_job = self.importer.create_job(
self.local_user, self.csv, False, "public"
)
import_item = import_job.items.last()
import_item.book = self.book
import_item.save()
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
handle_imported_book(import_item)
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
self.assertEqual(
shelf.shelfbook_set.first().shelved_date, make_date(2024, 8, 10)
)
readthrough = models.ReadThrough.objects.get(user=self.local_user)
self.assertEqual(readthrough.book, self.book)
self.assertEqual(readthrough.start_date, make_date(2001, 6, 1))
self.assertEqual(readthrough.finish_date, make_date(2001, 7, 10))
def test_create_new_shelf(self, *_):
"""import added a book, was a new shelf created?"""
shelf = self.local_user.shelf_set.filter(identifier="cooking").first()
self.assertIsNone(shelf)
import_job = self.importer.create_job(
self.local_user, self.csv, False, "public"
)
import_item = models.ImportItem.objects.filter(job=import_job).all()[1]
import_item.book = self.book
import_item.save()
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
handle_imported_book(import_item)
shelf_after = self.local_user.shelf_set.filter(identifier="cooking-9").first()
self.assertEqual(shelf_after.books.first(), self.book)
@patch("bookwyrm.activitystreams.add_status_task.delay")
def test_handle_imported_book_review(self, *_):
"""review import"""
import_job = self.importer.create_job(
self.local_user, self.csv, True, "unlisted"
)
import_item = import_job.items.get(index=1)
import_item.book = self.book
import_item.save()
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
handle_imported_book(import_item)
review = models.Review.objects.get(book=self.book, user=self.local_user)
self.assertEqual(review.name, "Too much tahini")
self.assertEqual(review.content, "...in his hummus")
self.assertEqual(review.rating, 4)
self.assertEqual(review.published_date, make_date(2022, 11, 10))
self.assertEqual(review.privacy, "unlisted")
@patch("bookwyrm.activitystreams.add_status_task.delay")
def test_handle_imported_book_rating(self, *_):
"""rating import"""
import_job = self.importer.create_job(
self.local_user, self.csv, True, "followers"
)
import_item = import_job.items.filter(index=2).first()
import_item.book = self.book
import_item.save()
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
handle_imported_book(import_item)
review = models.ReviewRating.objects.get(book=self.book, user=self.local_user)
self.assertIsInstance(review, models.ReviewRating)
self.assertEqual(review.rating, 5)
self.assertEqual(review.published_date, make_date(2001, 7, 10))
self.assertEqual(review.privacy, "followers")

View file

@ -16,6 +16,7 @@ from django.views import View
from bookwyrm import forms, models
from bookwyrm.importers import (
BookwyrmImporter,
BookwyrmBooksImporter,
CalibreImporter,
LibrarythingImporter,
GoodreadsImporter,
@ -69,7 +70,7 @@ class Import(View):
return TemplateResponse(request, "import/import.html", data)
def post(self, request):
"""ingest a goodreads csv"""
"""ingest a book data csv"""
site = models.SiteSettings.objects.get()
if not site.imports_enabled:
raise PermissionDenied()
@ -79,11 +80,16 @@ class Import(View):
return HttpResponseBadRequest()
include_reviews = request.POST.get("include_reviews") == "on"
create_shelves = request.POST.get("create_shelves") == "on"
privacy = request.POST.get("privacy")
source = request.POST.get("source")
importer = None
if source == "LibraryThing":
if source == "BookWyrm":
importer = BookwyrmBooksImporter()
print("BookwyrmBooksImporter")
elif source == "LibraryThing":
importer = LibrarythingImporter()
elif source == "Storygraph":
importer = StorygraphImporter()
@ -101,6 +107,7 @@ class Import(View):
TextIOWrapper(request.FILES["csv_file"], encoding=importer.encoding),
include_reviews,
privacy,
create_shelves,
)
except (UnicodeDecodeError, ValueError, KeyError):
return self.get(request, invalid=True)