""" handle reading a csv from an external service, defaults are from Goodreads """ import csv import logging from django.utils import timezone from django.utils.translation import gettext_lazy as _ from bookwyrm import models from bookwyrm.models import ImportJob, ImportItem from bookwyrm.tasks import app, LOW logger = logging.getLogger(__name__) class Importer: """Generic class for csv data import from an outside service""" service = "Import" delimiter = "," encoding = "UTF-8" # these are from Goodreads row_mappings_guesses = { "id": ["id", "book id"], "title": ["title"], "authors": ["author", "authors", "primary author"], "isbn_13": ["isbn13", "isbn"], "isbn_10": ["isbn10", "isbn"], "shelf": ["shelf", "exclusive shelf", "read status"], "review_name": ["review name"], "review_body": ["my review", "review"], "rating": ["my rating", "rating", "star rating"], "date_added": ["date added", "entry date", "added"], "date_started": ["date started", "started"], "date_finished": ["date finished", "last date read", "date read", "finished"], } date_fields = ["date_added", "date_started", "date_finished"] shelf_mapping_guesses = { "to-read": ["to-read"], "read": ["read"], "reading": ["currently-reading", "reading"], } def create_job(self, user, csv_file, include_reviews, privacy): """check over a csv and creates a database entry for the job""" csv_reader = csv.DictReader(csv_file, delimiter=self.delimiter) rows = enumerate(list(csv_reader)) job = ImportJob.objects.create( user=user, include_reviews=include_reviews, privacy=privacy, mappings=self.create_row_mappings(csv_reader.fieldnames), source=self.service, ) for index, entry in rows: self.create_item(job, index, entry) return job def create_row_mappings(self, headers): """guess what the headers mean""" mappings = {} for (key, guesses) in self.row_mappings_guesses.items(): value = [h for h in headers if h.lower() in guesses] value = value[0] if len(value) else None if value: headers.remove(value) mappings[key] = value return mappings def create_item(self, job, index, data): """creates and saves an import item""" normalized = self.normalize_row(data, job.mappings) normalized["shelf"] = self.get_shelf(normalized) ImportItem(job=job, index=index, data=data, normalized_data=normalized).save() def get_shelf(self, normalized_row): """determine which shelf to use""" shelf_name = normalized_row["shelf"] shelf = [ s for (s, gs) in self.shelf_mapping_guesses.items() if shelf_name in gs ] return shelf[0] if shelf else None def normalize_row(self, entry, mappings): # pylint: disable=no-self-use """use the dataclass to create the formatted row of data""" return {k: entry.get(v) for k, v in mappings.items()} def create_retry_job(self, user, original_job, items): """retry items that didn't import""" job = ImportJob.objects.create( user=user, include_reviews=original_job.include_reviews, privacy=original_job.privacy, # TODO: allow users to adjust mappings mappings=original_job.mappings, retry=True, ) for item in items: # this will re-normalize the raw data self.create_item(job, item.index, item.data) return job def start_import(self, job): # pylint: disable=no-self-use """initalizes a csv import job""" result = start_import_task.delay(job.id) job.task_id = result.id job.save() @app.task(queue="low_priority") def start_import_task(job_id): """trigger the child tasks for each row""" job = ImportJob.objects.get(id=job_id) # these are sub-tasks so that one big task doesn't use up all the memory in celery for item in job.items.values_list("id", flat=True).all(): import_item_task.delay(item) @app.task(queue="low_priority") def import_item_task(item_id): """resolve a row into a book""" item = models.ImportItem.objects.get(id=item_id) try: item.resolve() except Exception as err: # pylint: disable=broad-except item.fail_reason = _("Error loading book") item.save() raise err if item.book: # shelves book and handles reviews handle_imported_book(item) else: item.fail_reason = _("Could not find a match for book") item.save() def handle_imported_book(item): """process a csv and then post about it""" job = item.job user = job.user if isinstance(item.book, models.Work): item.book = item.book.default_edition if not item.book: return existing_shelf = models.ShelfBook.objects.filter(book=item.book, user=user).exists() # shelve the book if it hasn't been shelved already if item.shelf and not existing_shelf: desired_shelf = models.Shelf.objects.get(identifier=item.shelf, user=user) shelved_date = item.date_added or timezone.now() models.ShelfBook( book=item.book, shelf=desired_shelf, user=user, shelved_date=shelved_date ).save(priority=LOW) for read in item.reads: # check for an existing readthrough with the same dates if models.ReadThrough.objects.filter( user=user, book=item.book, start_date=read.start_date, finish_date=read.finish_date, ).exists(): continue read.book = item.book read.user = user read.save() if job.include_reviews and (item.rating or item.review): # we don't know the publication date of the review, # but "now" is a bad guess published_date_guess = item.date_read or item.date_added if item.review: # pylint: disable=consider-using-f-string review_title = ( "Review of {!r} on {!r}".format( item.book.title, job.source, ) if item.review else "" ) review = models.Review( user=user, book=item.book, name=review_title, content=item.review, rating=item.rating, published_date=published_date_guess, privacy=job.privacy, ) else: # just a rating review = models.ReviewRating( user=user, book=item.book, rating=item.rating, published_date=published_date_guess, privacy=job.privacy, ) # only broadcast this review to other bookwyrm instances review.save(software="bookwyrm", priority=LOW)