moviewyrm/bookwyrm/importers/importer.py

154 lines
5 KiB
Python
Raw Normal View History

2021-03-08 16:49:10 +00:00
""" handle reading a csv from an external service, defaults are from GoodReads """
2021-02-20 16:02:36 +00:00
import csv
import logging
from django.utils import timezone
2021-02-20 16:02:36 +00:00
from bookwyrm import models
from bookwyrm.models import ImportJob, ImportItem
from bookwyrm.tasks import app
logger = logging.getLogger(__name__)
2021-03-08 16:49:10 +00:00
2021-02-20 16:02:36 +00:00
class Importer:
2021-04-26 16:15:42 +00:00
"""Generic class for csv data import from an outside service"""
2021-03-30 15:43:38 +00:00
2021-03-08 16:49:10 +00:00
service = "Unknown"
delimiter = ","
encoding = "UTF-8"
mandatory_fields = ["Title", "Author"]
2021-02-20 16:02:36 +00:00
def create_job(self, user, csv_file, include_reviews, privacy):
2021-04-26 16:15:42 +00:00
"""check over a csv and creates a database entry for the job"""
2021-02-20 16:02:36 +00:00
job = ImportJob.objects.create(
2021-03-08 16:49:10 +00:00
user=user, include_reviews=include_reviews, privacy=privacy
2021-02-20 16:02:36 +00:00
)
2021-03-08 16:49:10 +00:00
for index, entry in enumerate(
list(csv.DictReader(csv_file, delimiter=self.delimiter))
):
2021-02-20 16:02:36 +00:00
if not all(x in entry for x in self.mandatory_fields):
2021-03-08 16:49:10 +00:00
raise ValueError("Author and title must be in data.")
2021-02-20 16:02:36 +00:00
entry = self.parse_fields(entry)
self.save_item(job, index, entry)
return job
2021-03-30 15:46:22 +00:00
def save_item(self, job, index, data): # pylint: disable=no-self-use
2021-04-26 16:15:42 +00:00
"""creates and saves an import item"""
2021-02-20 16:02:36 +00:00
ImportItem(job=job, index=index, data=data).save()
def parse_fields(self, entry):
2021-04-26 16:15:42 +00:00
"""updates csv data with additional info"""
2021-03-08 16:49:10 +00:00
entry.update({"import_source": self.service})
return entry
2021-02-20 16:02:36 +00:00
def create_retry_job(self, user, original_job, items):
2021-04-26 16:15:42 +00:00
"""retry items that didn't import"""
2021-02-20 16:02:36 +00:00
job = ImportJob.objects.create(
user=user,
include_reviews=original_job.include_reviews,
privacy=original_job.privacy,
2021-03-08 16:49:10 +00:00
retry=True,
2021-02-20 16:02:36 +00:00
)
for item in items:
self.save_item(job, item.index, item.data)
return job
def start_import(self, job):
2021-04-26 16:15:42 +00:00
"""initalizes a csv import job"""
2021-02-20 16:02:36 +00:00
result = import_data.delay(self.service, job.id)
job.task_id = result.id
job.save()
2021-09-08 00:04:10 +00:00
@app.task(queue="low_priority")
2021-02-20 16:02:36 +00:00
def import_data(source, job_id):
2021-04-26 16:15:42 +00:00
"""does the actual lookup work in a celery task"""
2021-02-20 16:02:36 +00:00
job = ImportJob.objects.get(id=job_id)
try:
for item in job.items.all():
try:
item.resolve()
2021-06-18 21:29:24 +00:00
except Exception as err: # pylint: disable=broad-except
logger.exception(err)
2021-03-08 16:49:10 +00:00
item.fail_reason = "Error loading book"
2021-02-20 16:02:36 +00:00
item.save()
continue
if item.book:
item.save()
# shelves book and handles reviews
2021-03-08 16:49:10 +00:00
handle_imported_book(
source, job.user, item, job.include_reviews, job.privacy
)
2021-02-20 16:02:36 +00:00
else:
2021-03-08 16:49:10 +00:00
item.fail_reason = "Could not find a match for book"
2021-02-20 16:02:36 +00:00
item.save()
finally:
job.complete = True
job.save()
def handle_imported_book(source, user, item, include_reviews, privacy):
2021-04-26 16:15:42 +00:00
"""process a csv and then post about it"""
2021-02-20 16:02:36 +00:00
if isinstance(item.book, models.Work):
item.book = item.book.default_edition
if not item.book:
return
2021-03-08 16:49:10 +00:00
existing_shelf = models.ShelfBook.objects.filter(book=item.book, user=user).exists()
2021-02-20 16:02:36 +00:00
# shelve the book if it hasn't been shelved already
if item.shelf and not existing_shelf:
2021-03-08 16:49:10 +00:00
desired_shelf = models.Shelf.objects.get(identifier=item.shelf, user=user)
shelved_date = item.date_added or timezone.now()
models.ShelfBook.objects.create(
book=item.book, shelf=desired_shelf, user=user, shelved_date=shelved_date
)
2021-02-20 16:02:36 +00:00
for read in item.reads:
# check for an existing readthrough with the same dates
if models.ReadThrough.objects.filter(
2021-03-08 16:49:10 +00:00
user=user,
book=item.book,
start_date=read.start_date,
finish_date=read.finish_date,
).exists():
2021-02-20 16:02:36 +00:00
continue
read.book = item.book
read.user = user
read.save()
if include_reviews and (item.rating or item.review):
# we don't know the publication date of the review,
# but "now" is a bad guess
published_date_guess = item.date_read or item.date_added
2021-04-26 17:04:33 +00:00
if item.review:
review_title = (
"Review of {!r} on {!r}".format(
item.book.title,
source,
)
if item.review
else ""
)
models.Review.objects.create(
user=user,
book=item.book,
name=review_title,
content=item.review,
rating=item.rating,
published_date=published_date_guess,
privacy=privacy,
)
else:
# just a rating
models.ReviewRating.objects.create(
user=user,
book=item.book,
rating=item.rating,
published_date=published_date_guess,
privacy=privacy,
)