2020-03-29 07:05:09 +00:00
|
|
|
''' handle reading a csv from goodreads '''
|
2020-03-25 12:29:21 +00:00
|
|
|
import csv
|
2020-11-13 17:47:35 +00:00
|
|
|
import logging
|
2020-03-25 12:29:21 +00:00
|
|
|
|
2021-01-13 21:36:01 +00:00
|
|
|
from bookwyrm import models
|
2020-09-21 15:10:37 +00:00
|
|
|
from bookwyrm.models import ImportJob, ImportItem
|
2021-01-13 21:36:01 +00:00
|
|
|
from bookwyrm.tasks import app
|
2020-03-29 07:05:09 +00:00
|
|
|
|
2020-11-13 17:47:35 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
2020-03-25 12:58:27 +00:00
|
|
|
|
2020-03-29 07:05:09 +00:00
|
|
|
|
2020-10-30 18:21:02 +00:00
|
|
|
def create_job(user, csv_file, include_reviews, privacy):
|
2020-05-09 21:26:27 +00:00
|
|
|
''' check over a csv and creates a database entry for the job'''
|
2020-10-30 18:21:02 +00:00
|
|
|
job = ImportJob.objects.create(
|
|
|
|
user=user,
|
|
|
|
include_reviews=include_reviews,
|
|
|
|
privacy=privacy
|
|
|
|
)
|
2021-01-02 17:42:33 +00:00
|
|
|
for index, entry in enumerate(list(csv.DictReader(csv_file))):
|
2020-04-29 14:33:06 +00:00
|
|
|
if not all(x in entry for x in ('ISBN13', 'Title', 'Author')):
|
2020-11-12 21:33:12 +00:00
|
|
|
raise ValueError('Author, title, and isbn must be in data.')
|
2020-04-21 14:09:21 +00:00
|
|
|
ImportItem(job=job, index=index, data=entry).save()
|
|
|
|
return job
|
2020-03-25 12:29:21 +00:00
|
|
|
|
2021-01-02 19:29:50 +00:00
|
|
|
|
2020-11-13 17:02:41 +00:00
|
|
|
def create_retry_job(user, original_job, items):
|
|
|
|
''' retry items that didn't import '''
|
|
|
|
job = ImportJob.objects.create(
|
|
|
|
user=user,
|
|
|
|
include_reviews=original_job.include_reviews,
|
|
|
|
privacy=original_job.privacy,
|
|
|
|
retry=True
|
|
|
|
)
|
|
|
|
for item in items:
|
|
|
|
ImportItem(job=job, index=item.index, data=item.data).save()
|
|
|
|
return job
|
2020-05-09 21:26:27 +00:00
|
|
|
|
2021-01-02 19:29:50 +00:00
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
def start_import(job):
|
2020-05-09 21:26:27 +00:00
|
|
|
''' initalizes a csv import job '''
|
2020-04-21 14:09:21 +00:00
|
|
|
result = import_data.delay(job.id)
|
|
|
|
job.task_id = result.id
|
|
|
|
job.save()
|
2020-04-20 16:10:19 +00:00
|
|
|
|
2020-05-09 21:26:27 +00:00
|
|
|
|
2020-04-20 16:10:19 +00:00
|
|
|
@app.task
|
2020-04-21 14:09:21 +00:00
|
|
|
def import_data(job_id):
|
2020-05-09 21:26:27 +00:00
|
|
|
''' does the actual lookup work in a celery task '''
|
2020-04-21 14:09:21 +00:00
|
|
|
job = ImportJob.objects.get(id=job_id)
|
2020-04-22 11:43:10 +00:00
|
|
|
try:
|
|
|
|
for item in job.items.all():
|
|
|
|
try:
|
|
|
|
item.resolve()
|
2020-12-13 02:13:00 +00:00
|
|
|
except Exception as e:# pylint: disable=broad-except
|
2020-11-13 17:47:35 +00:00
|
|
|
logger.exception(e)
|
2020-11-12 21:33:12 +00:00
|
|
|
item.fail_reason = 'Error loading book'
|
|
|
|
item.save()
|
2020-11-12 22:01:17 +00:00
|
|
|
continue
|
|
|
|
|
2020-04-22 11:43:10 +00:00
|
|
|
if item.book:
|
|
|
|
item.save()
|
2020-10-30 18:21:02 +00:00
|
|
|
|
2020-10-30 19:07:22 +00:00
|
|
|
# shelves book and handles reviews
|
2021-01-13 21:36:01 +00:00
|
|
|
handle_imported_book(
|
2020-10-30 19:07:22 +00:00
|
|
|
job.user, item, job.include_reviews, job.privacy)
|
2020-04-22 11:43:10 +00:00
|
|
|
else:
|
2020-11-12 21:33:12 +00:00
|
|
|
item.fail_reason = 'Could not find a match for book'
|
2020-04-22 11:43:10 +00:00
|
|
|
item.save()
|
|
|
|
finally:
|
2021-01-07 16:08:12 +00:00
|
|
|
job.complete = True
|
|
|
|
job.save()
|
2021-01-13 21:36:01 +00:00
|
|
|
|
|
|
|
|
|
|
|
def handle_imported_book(user, item, include_reviews, privacy):
|
|
|
|
''' process a goodreads csv and then post about it '''
|
|
|
|
if isinstance(item.book, models.Work):
|
|
|
|
item.book = item.book.default_edition
|
|
|
|
if not item.book:
|
|
|
|
return
|
|
|
|
|
|
|
|
existing_shelf = models.ShelfBook.objects.filter(
|
2021-02-04 22:27:26 +00:00
|
|
|
book=item.book, user=user).exists()
|
2021-01-13 21:36:01 +00:00
|
|
|
|
|
|
|
# shelve the book if it hasn't been shelved already
|
|
|
|
if item.shelf and not existing_shelf:
|
|
|
|
desired_shelf = models.Shelf.objects.get(
|
|
|
|
identifier=item.shelf,
|
|
|
|
user=user
|
|
|
|
)
|
2021-02-04 21:21:55 +00:00
|
|
|
models.ShelfBook.objects.create(
|
2021-02-04 22:27:26 +00:00
|
|
|
book=item.book, shelf=desired_shelf, user=user)
|
2021-01-13 21:36:01 +00:00
|
|
|
|
|
|
|
for read in item.reads:
|
|
|
|
# check for an existing readthrough with the same dates
|
|
|
|
if models.ReadThrough.objects.filter(
|
|
|
|
user=user, book=item.book,
|
|
|
|
start_date=read.start_date,
|
|
|
|
finish_date=read.finish_date
|
|
|
|
).exists():
|
|
|
|
continue
|
|
|
|
read.book = item.book
|
|
|
|
read.user = user
|
|
|
|
read.save()
|
|
|
|
|
|
|
|
if include_reviews and (item.rating or item.review):
|
|
|
|
review_title = 'Review of {!r} on Goodreads'.format(
|
|
|
|
item.book.title,
|
|
|
|
) if item.review else ''
|
|
|
|
|
|
|
|
# we don't know the publication date of the review,
|
|
|
|
# but "now" is a bad guess
|
|
|
|
published_date_guess = item.date_read or item.date_added
|
2021-02-04 21:21:55 +00:00
|
|
|
models.Review.objects.create(
|
2021-01-13 21:36:01 +00:00
|
|
|
user=user,
|
|
|
|
book=item.book,
|
|
|
|
name=review_title,
|
|
|
|
content=item.review,
|
|
|
|
rating=item.rating,
|
|
|
|
published_date=published_date_guess,
|
|
|
|
privacy=privacy,
|
|
|
|
)
|