csv import and export fixes

Adds shelved and published dates for books and their imported reviews.
Provides option to create new (custom) shelves when importing books.

fixes #3004
fixes #2846
fixes #2666
fixes #2411
This commit is contained in:
Hugh Rundle 2023-11-25 17:34:12 +11:00
parent 7c2de92df3
commit 539a9fa212
No known key found for this signature in database
GPG key ID: A7E35779918253F9
8 changed files with 128 additions and 18 deletions

View file

@ -1,6 +1,7 @@
""" import classes """
from .importer import Importer
from .bookwyrm_import import BookwyrmBooksImporter
from .calibre_import import CalibreImporter
from .goodreads_import import GoodreadsImporter
from .librarything_import import LibrarythingImporter

View file

@ -0,0 +1,14 @@
""" handle reading a csv from BookWyrm """
from typing import Any
from . import Importer
class BookwyrmBooksImporter(Importer):
"""Goodreads is the default importer, we basically just use the same structure"""
service = "BookWyrm"
def __init__(self, *args: Any, **kwargs: Any):
self.row_mappings_guesses.append(("shelf_name", ["shelf_name"]))
super().__init__(*args, **kwargs)

View file

@ -18,14 +18,14 @@ class Importer:
row_mappings_guesses = [
("id", ["id", "book id"]),
("title", ["title"]),
("authors", ["author", "authors", "primary author"]),
("authors", ["author_text", "author", "authors", "primary author"]),
("isbn_10", ["isbn10", "isbn", "isbn/uid"]),
("isbn_13", ["isbn13", "isbn", "isbns", "isbn/uid"]),
("shelf", ["shelf", "exclusive shelf", "read status", "bookshelf"]),
("review_name", ["review name"]),
("review_body", ["my review", "review"]),
("review_name", ["review_name", "review name"]),
("review_body", ["review_content", "my review", "review"]),
("rating", ["my rating", "rating", "star rating"]),
("date_added", ["date added", "entry date", "added"]),
("date_added", ["date_added", "date added", "entry date", "added"]),
("date_started", ["date started", "started"]),
("date_finished", ["date finished", "last date read", "date read", "finished"]),
]
@ -38,7 +38,12 @@ class Importer:
# pylint: disable=too-many-locals
def create_job(
self, user: User, csv_file: Iterable[str], include_reviews: bool, privacy: str
self,
user: User,
csv_file: Iterable[str],
include_reviews: bool,
create_shelves: bool,
privacy: str,
) -> ImportJob:
"""check over a csv and creates a database entry for the job"""
csv_reader = csv.DictReader(csv_file, delimiter=self.delimiter)
@ -55,6 +60,7 @@ class Importer:
job = ImportJob.objects.create(
user=user,
include_reviews=include_reviews,
create_shelves=create_shelves,
privacy=privacy,
mappings=mappings,
source=self.service,
@ -114,7 +120,7 @@ class Importer:
shelf = [
s for (s, gs) in self.shelf_mapping_guesses.items() if shelf_name in gs
]
return shelf[0] if shelf else None
return shelf[0] if shelf else normalized_row.get("shelf") or None
# pylint: disable=no-self-use
def normalize_row(
@ -149,6 +155,7 @@ class Importer:
job = ImportJob.objects.create(
user=user,
include_reviews=original_job.include_reviews,
create_shelves=original_job.create_shelves,
privacy=original_job.privacy,
source=original_job.source,
# TODO: allow users to adjust mappings

View file

@ -0,0 +1,18 @@
# Generated by Django 3.2.23 on 2023-11-25 05:49
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0188_theme_loads"),
]
operations = [
migrations.AddField(
model_name="importjob",
name="create_shelves",
field=models.BooleanField(default=True),
),
]

View file

@ -4,6 +4,7 @@ import math
import re
import dateutil.parser
from django.core.exceptions import ObjectDoesNotExist
from django.db import models
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
@ -59,6 +60,7 @@ class ImportJob(models.Model):
created_date = models.DateTimeField(default=timezone.now)
updated_date = models.DateTimeField(default=timezone.now)
include_reviews: bool = models.BooleanField(default=True)
create_shelves: bool = models.BooleanField(default=True)
mappings = models.JSONField()
source = models.CharField(max_length=100)
privacy = models.CharField(max_length=255, default="public", choices=PrivacyLevels)
@ -245,6 +247,11 @@ class ImportItem(models.Model):
"""the goodreads shelf field"""
return self.normalized_data.get("shelf")
@property
def shelf_name(self):
"""the goodreads shelf field"""
return self.normalized_data.get("shelf_name")
@property
def review(self):
"""a user-written review, to be imported with the book data"""
@ -388,11 +395,36 @@ def handle_imported_book(item):
# shelve the book if it hasn't been shelved already
if item.shelf and not existing_shelf:
desired_shelf = Shelf.objects.get(identifier=item.shelf, user=user)
shelved_date = item.date_added or timezone.now()
ShelfBook(
book=item.book, shelf=desired_shelf, user=user, shelved_date=shelved_date
).save(priority=IMPORT_TRIGGERED)
try:
desired_shelf = Shelf.objects.get(identifier=item.shelf, user=user)
shelved_date = item.date_added or timezone.now()
ShelfBook(
book=item.book,
shelf=desired_shelf,
user=user,
shelved_date=shelved_date,
).save(priority=IMPORT_TRIGGERED)
except ObjectDoesNotExist:
if job.create_shelves:
shelfname = getattr(item, "shelf_name", item.shelf)
new_shelf = Shelf.objects.create(
user=user,
identifier=item.shelf,
name=shelfname,
privacy=job.privacy,
)
ShelfBook(
book=item.book,
shelf=new_shelf,
user=user,
shelved_date=shelved_date,
).save(priority=IMPORT_TRIGGERED)
for read in item.reads:
# check for an existing readthrough with the same dates
@ -408,9 +440,9 @@ def handle_imported_book(item):
read.save()
if job.include_reviews and (item.rating or item.review) and not item.linked_review:
# we don't know the publication date of the review,
# but "now" is a bad guess
published_date_guess = item.date_read or item.date_added
# we don't necessarily know the publication date of the review,
# but "now" is a bad guess unless we have no choice
published_date_guess = item.date_read or item.date_added or timezone.now()
if item.review:
# pylint: disable=consider-using-f-string
review_title = "Review of {!r} on {!r}".format(

View file

@ -70,6 +70,9 @@
<option value="Calibre" {% if current == 'Calibre' %}selected{% endif %}>
{% trans "Calibre (CSV)" %}
</option>
<option value="BookWyrm" {% if current == 'BookWyrm' %}selected{% endif %}>
{% trans "BookWyrm (CSV)" %}
</option>
</select>
</div>
@ -94,9 +97,14 @@
<input type="checkbox" name="include_reviews" checked> {% trans "Include reviews" %}
</label>
</div>
<div class="field">
<label class="label">
<input type="checkbox" name="create_shelves" checked> {% trans "Create new shelves if they do not exist" %}
</label>
</div>
<div class="field">
<label class="label" for="privacy_import">
{% trans "Privacy setting for imported reviews:" %}
{% trans "Privacy setting for imported reviews and shelves:" %}
</label>
{% include 'snippets/privacy_select.html' with no_label=True privacy_uuid="import" %}
</div>

View file

@ -15,6 +15,7 @@ from django.views import View
from bookwyrm import forms, models
from bookwyrm.importers import (
BookwyrmBooksImporter,
CalibreImporter,
LibrarythingImporter,
GoodreadsImporter,
@ -67,7 +68,7 @@ class Import(View):
return TemplateResponse(request, "import/import.html", data)
def post(self, request):
"""ingest a goodreads csv"""
"""ingest a book data csv"""
site = models.SiteSettings.objects.get()
if not site.imports_enabled:
raise PermissionDenied()
@ -77,11 +78,16 @@ class Import(View):
return HttpResponseBadRequest()
include_reviews = request.POST.get("include_reviews") == "on"
create_shelves = request.POST.get("create_shelves") == "on"
privacy = request.POST.get("privacy")
source = request.POST.get("source")
importer = None
if source == "LibraryThing":
if source == "BookWyrm":
importer = BookwyrmBooksImporter()
print("BookwyrmBooksImporter")
elif source == "LibraryThing":
importer = LibrarythingImporter()
elif source == "Storygraph":
importer = StorygraphImporter()
@ -98,6 +104,7 @@ class Import(View):
request.user,
TextIOWrapper(request.FILES["csv_file"], encoding=importer.encoding),
include_reviews,
create_shelves,
privacy,
)
except (UnicodeDecodeError, ValueError, KeyError):

View file

@ -48,7 +48,16 @@ class Export(View):
fields = (
["title", "author_text"]
+ deduplication_fields
+ ["rating", "review_name", "review_cw", "review_content"]
+ [
"rating",
"review_published",
"review_name",
"review_cw",
"review_content",
"shelf",
"shelf_name",
"date_added",
]
)
writer.writerow(fields)
@ -72,9 +81,23 @@ class Export(View):
.first()
)
if review:
book.review_published = review.published_date
book.review_name = review.name
book.review_cw = review.content_warning
book.review_content = review.raw_content
book.review_content = (
review.raw_content
) # do imported reviews not have raw content?
shelfbook = (
models.ShelfBook.objects.filter(book=book, user=request.user)
.order_by("shelved_date")
.last()
)
if shelfbook:
book.shelf = shelfbook.shelf.identifier
book.shelf_name = shelfbook.shelf.name
book.date_added = shelfbook.shelved_date
writer.writerow([getattr(book, field, "") or "" for field in fields])
return HttpResponse(