moviewyrm/bookwyrm/models/import_job.py

164 lines
4.8 KiB
Python
Raw Normal View History

2020-05-09 21:26:27 +00:00
''' track progress of goodreads imports '''
import re
import dateutil.parser
from django.contrib.postgres.fields import JSONField
from django.db import models
from django.utils import timezone
from bookwyrm import books_manager
from bookwyrm.models import ReadThrough, User, Book
from .fields import PrivacyLevels
# Mapping goodreads -> bookwyrm shelf titles.
GOODREADS_SHELVES = {
'read': 'read',
'currently-reading': 'reading',
'to-read': 'to-read',
}
def unquote_string(text):
''' resolve csv quote weirdness '''
match = re.match(r'="([^"]*)"', text)
if match:
return match.group(1)
return text
def construct_search_term(title, author):
''' formulate a query for the data connector '''
# Strip brackets (usually series title from search term)
title = re.sub(r'\s*\([^)]*\)\s*', '', title)
# Open library doesn't like including author initials in search term.
author = re.sub(r'(\w\.)+\s*', '', author)
return ' '.join([title, author])
2020-05-09 21:26:27 +00:00
class ImportJob(models.Model):
2020-09-21 17:25:26 +00:00
''' entry for a specific request for book data import '''
user = models.ForeignKey(User, on_delete=models.CASCADE)
created_date = models.DateTimeField(default=timezone.now)
task_id = models.CharField(max_length=100, null=True)
include_reviews = models.BooleanField(default=True)
privacy = models.CharField(
max_length=255,
default='public',
choices=PrivacyLevels.choices
)
2020-11-13 17:02:41 +00:00
retry = models.BooleanField(default=False)
class ImportItem(models.Model):
2020-09-21 17:25:26 +00:00
''' a single line of a csv being imported '''
job = models.ForeignKey(
ImportJob,
on_delete=models.CASCADE,
related_name='items')
index = models.IntegerField()
data = JSONField()
book = models.ForeignKey(
Book, on_delete=models.SET_NULL, null=True, blank=True)
fail_reason = models.TextField(null=True)
def resolve(self):
''' try various ways to lookup a book '''
self.book = (
self.get_book_from_isbn() or
self.get_book_from_title_author()
)
def get_book_from_isbn(self):
''' search by isbn '''
2020-10-29 22:29:23 +00:00
search_result = books_manager.first_search_result(
self.isbn, min_confidence=0.999
2020-10-29 22:29:23 +00:00
)
if search_result:
# raises ConnectorException
return books_manager.get_or_create_book(search_result.key)
return None
def get_book_from_title_author(self):
''' search by title and author '''
search_term = construct_search_term(
self.data['Title'],
self.data['Author']
)
2020-10-29 22:29:23 +00:00
search_result = books_manager.first_search_result(
search_term, min_confidence=0.999
2020-10-29 22:29:23 +00:00
)
if search_result:
2020-11-13 17:47:35 +00:00
# raises ConnectorException
return books_manager.get_or_create_book(search_result.key)
return None
2020-11-13 17:02:41 +00:00
@property
def title(self):
''' get the book title '''
return self.data['Title']
@property
def author(self):
''' get the book title '''
return self.data['Author']
@property
def isbn(self):
2020-09-21 17:25:26 +00:00
''' pulls out the isbn13 field from the csv line data '''
return unquote_string(self.data['ISBN13'])
@property
def shelf(self):
''' the goodreads shelf field '''
if self.data['Exclusive Shelf']:
return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])
return None
@property
def review(self):
2020-09-21 17:25:26 +00:00
''' a user-written review, to be imported with the book data '''
return self.data['My Review']
@property
def rating(self):
2020-09-21 17:25:26 +00:00
''' x/5 star rating for a book '''
return int(self.data['My Rating'])
@property
def date_added(self):
2020-09-21 17:25:26 +00:00
''' when the book was added to this dataset '''
if self.data['Date Added']:
return timezone.make_aware(
dateutil.parser.parse(self.data['Date Added']))
return None
@property
def date_read(self):
2020-09-21 17:25:26 +00:00
''' the date a book was completed '''
if self.data['Date Read']:
return timezone.make_aware(
dateutil.parser.parse(self.data['Date Read']))
return None
@property
def reads(self):
2020-09-21 17:25:26 +00:00
''' formats a read through dataset for the book in this line '''
if (self.shelf == 'reading'
and self.date_added and not self.date_read):
return [ReadThrough(start_date=self.date_added)]
if self.date_read:
return [ReadThrough(
start_date=self.date_added,
finish_date=self.date_read,
)]
return []
def __repr__(self):
return "<GoodreadsItem {!r}>".format(self.data['Title'])
def __str__(self):
return "{} by {}".format(self.data['Title'], self.data['Author'])