moviewyrm/bookwyrm/models/import_job.py
2021-01-02 08:14:28 -08:00

163 lines
4.8 KiB
Python

''' track progress of goodreads imports '''
import re
import dateutil.parser
from django.contrib.postgres.fields import JSONField
from django.db import models
from django.utils import timezone
from bookwyrm.connectors import connector_manager
from bookwyrm.models import ReadThrough, User, Book
from .fields import PrivacyLevels
# Mapping goodreads -> bookwyrm shelf titles.
GOODREADS_SHELVES = {
'read': 'read',
'currently-reading': 'reading',
'to-read': 'to-read',
}
def unquote_string(text):
''' resolve csv quote weirdness '''
match = re.match(r'="([^"]*)"', text)
if match:
return match.group(1)
return text
def construct_search_term(title, author):
''' formulate a query for the data connector '''
# Strip brackets (usually series title from search term)
title = re.sub(r'\s*\([^)]*\)\s*', '', title)
# Open library doesn't like including author initials in search term.
author = re.sub(r'(\w\.)+\s*', '', author)
return ' '.join([title, author])
class ImportJob(models.Model):
''' entry for a specific request for book data import '''
user = models.ForeignKey(User, on_delete=models.CASCADE)
created_date = models.DateTimeField(default=timezone.now)
task_id = models.CharField(max_length=100, null=True)
include_reviews = models.BooleanField(default=True)
privacy = models.CharField(
max_length=255,
default='public',
choices=PrivacyLevels.choices
)
retry = models.BooleanField(default=False)
class ImportItem(models.Model):
''' a single line of a csv being imported '''
job = models.ForeignKey(
ImportJob,
on_delete=models.CASCADE,
related_name='items')
index = models.IntegerField()
data = JSONField()
book = models.ForeignKey(
Book, on_delete=models.SET_NULL, null=True, blank=True)
fail_reason = models.TextField(null=True)
def resolve(self):
''' try various ways to lookup a book '''
self.book = (
self.get_book_from_isbn() or
self.get_book_from_title_author()
)
def get_book_from_isbn(self):
''' search by isbn '''
search_result = connector_manager.first_search_result(
self.isbn, min_confidence=0.999
)
if search_result:
# raises ConnectorException
return search_result.connector.get_or_create_book(search_result.key)
return None
def get_book_from_title_author(self):
''' search by title and author '''
search_term = construct_search_term(
self.data['Title'],
self.data['Author']
)
search_result = connector_manager.first_search_result(
search_term, min_confidence=0.999
)
if search_result:
# raises ConnectorException
return search_result.connector.get_or_create_book(search_result.key)
return None
@property
def title(self):
''' get the book title '''
return self.data['Title']
@property
def author(self):
''' get the book title '''
return self.data['Author']
@property
def isbn(self):
''' pulls out the isbn13 field from the csv line data '''
return unquote_string(self.data['ISBN13'])
@property
def shelf(self):
''' the goodreads shelf field '''
if self.data['Exclusive Shelf']:
return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])
return None
@property
def review(self):
''' a user-written review, to be imported with the book data '''
return self.data['My Review']
@property
def rating(self):
''' x/5 star rating for a book '''
return int(self.data['My Rating'])
@property
def date_added(self):
''' when the book was added to this dataset '''
if self.data['Date Added']:
return timezone.make_aware(
dateutil.parser.parse(self.data['Date Added']))
return None
@property
def date_read(self):
''' the date a book was completed '''
if self.data['Date Read']:
return timezone.make_aware(
dateutil.parser.parse(self.data['Date Read']))
return None
@property
def reads(self):
''' formats a read through dataset for the book in this line '''
if (self.shelf == 'reading'
and self.date_added and not self.date_read):
return [ReadThrough(start_date=self.date_added)]
if self.date_read:
return [ReadThrough(
start_date=self.date_added,
finish_date=self.date_read,
)]
return []
def __repr__(self):
return "<GoodreadsItem {!r}>".format(self.data['Title'])
def __str__(self):
return "{} by {}".format(self.data['Title'], self.data['Author'])