2020-05-09 21:26:27 +00:00
|
|
|
''' track progress of goodreads imports '''
|
2020-04-21 14:09:21 +00:00
|
|
|
import re
|
|
|
|
import dateutil.parser
|
|
|
|
|
|
|
|
from django.db import models
|
|
|
|
from django.utils import timezone
|
|
|
|
|
2020-09-21 15:10:37 +00:00
|
|
|
from bookwyrm import books_manager
|
2020-09-30 17:27:40 +00:00
|
|
|
from bookwyrm.connectors import ConnectorException
|
2020-09-21 15:10:37 +00:00
|
|
|
from bookwyrm.models import ReadThrough, User, Book
|
|
|
|
from bookwyrm.utils.fields import JSONField
|
2020-10-30 18:21:02 +00:00
|
|
|
from .base_model import PrivacyLevels
|
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
|
2020-09-21 15:10:37 +00:00
|
|
|
# Mapping goodreads -> bookwyrm shelf titles.
|
2020-04-21 14:09:21 +00:00
|
|
|
GOODREADS_SHELVES = {
|
|
|
|
'read': 'read',
|
|
|
|
'currently-reading': 'reading',
|
|
|
|
'to-read': 'to-read',
|
|
|
|
}
|
|
|
|
|
|
|
|
def unquote_string(text):
|
|
|
|
''' resolve csv quote weirdness '''
|
|
|
|
match = re.match(r'="([^"]*)"', text)
|
|
|
|
if match:
|
|
|
|
return match.group(1)
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
def construct_search_term(title, author):
|
|
|
|
''' formulate a query for the data connector '''
|
|
|
|
# Strip brackets (usually series title from search term)
|
|
|
|
title = re.sub(r'\s*\([^)]*\)\s*', '', title)
|
|
|
|
# Open library doesn't like including author initials in search term.
|
|
|
|
author = re.sub(r'(\w\.)+\s*', '', author)
|
|
|
|
|
|
|
|
return ' '.join([title, author])
|
|
|
|
|
2020-05-09 21:26:27 +00:00
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
class ImportJob(models.Model):
|
2020-09-21 17:25:26 +00:00
|
|
|
''' entry for a specific request for book data import '''
|
2020-04-21 14:09:21 +00:00
|
|
|
user = models.ForeignKey(User, on_delete=models.CASCADE)
|
|
|
|
created_date = models.DateTimeField(default=timezone.now)
|
|
|
|
task_id = models.CharField(max_length=100, null=True)
|
2020-10-30 18:21:02 +00:00
|
|
|
include_reviews = models.BooleanField(default=True)
|
|
|
|
privacy = models.CharField(
|
|
|
|
max_length=255,
|
|
|
|
default='public',
|
|
|
|
choices=PrivacyLevels.choices
|
|
|
|
)
|
2020-10-29 21:29:31 +00:00
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
class ImportItem(models.Model):
|
2020-09-21 17:25:26 +00:00
|
|
|
''' a single line of a csv being imported '''
|
2020-04-21 14:09:21 +00:00
|
|
|
job = models.ForeignKey(
|
|
|
|
ImportJob,
|
|
|
|
on_delete=models.CASCADE,
|
|
|
|
related_name='items')
|
|
|
|
index = models.IntegerField()
|
|
|
|
data = JSONField()
|
|
|
|
book = models.ForeignKey(
|
|
|
|
Book, on_delete=models.SET_NULL, null=True, blank=True)
|
|
|
|
fail_reason = models.TextField(null=True)
|
|
|
|
|
|
|
|
def resolve(self):
|
|
|
|
''' try various ways to lookup a book '''
|
|
|
|
self.book = (
|
|
|
|
self.get_book_from_isbn() or
|
|
|
|
self.get_book_from_title_author()
|
|
|
|
)
|
|
|
|
|
|
|
|
def get_book_from_isbn(self):
|
|
|
|
''' search by isbn '''
|
2020-10-29 22:29:23 +00:00
|
|
|
search_result = books_manager.first_search_result(
|
2020-10-31 17:50:00 +00:00
|
|
|
self.isbn, min_confidence=0.995
|
2020-10-29 22:29:23 +00:00
|
|
|
)
|
2020-05-03 19:59:06 +00:00
|
|
|
if search_result:
|
2020-09-30 17:27:40 +00:00
|
|
|
try:
|
|
|
|
# don't crash the import when the connector fails
|
|
|
|
return books_manager.get_or_create_book(search_result.key)
|
|
|
|
except ConnectorException:
|
|
|
|
pass
|
2020-10-29 21:29:31 +00:00
|
|
|
return None
|
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
def get_book_from_title_author(self):
|
|
|
|
''' search by title and author '''
|
|
|
|
search_term = construct_search_term(
|
|
|
|
self.data['Title'],
|
|
|
|
self.data['Author']
|
|
|
|
)
|
2020-10-29 22:29:23 +00:00
|
|
|
search_result = books_manager.first_search_result(
|
2020-10-31 17:50:00 +00:00
|
|
|
search_term, min_confidence=0.995
|
2020-10-29 22:29:23 +00:00
|
|
|
)
|
2020-05-03 19:59:06 +00:00
|
|
|
if search_result:
|
2020-10-29 19:32:37 +00:00
|
|
|
try:
|
|
|
|
return books_manager.get_or_create_book(search_result.key)
|
|
|
|
except ConnectorException:
|
|
|
|
pass
|
2020-10-29 21:29:31 +00:00
|
|
|
return None
|
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def isbn(self):
|
2020-09-21 17:25:26 +00:00
|
|
|
''' pulls out the isbn13 field from the csv line data '''
|
2020-04-21 14:09:21 +00:00
|
|
|
return unquote_string(self.data['ISBN13'])
|
|
|
|
|
|
|
|
@property
|
|
|
|
def shelf(self):
|
|
|
|
''' the goodreads shelf field '''
|
|
|
|
if self.data['Exclusive Shelf']:
|
2020-04-28 14:16:41 +00:00
|
|
|
return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])
|
2020-10-29 21:29:31 +00:00
|
|
|
return None
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def review(self):
|
2020-09-21 17:25:26 +00:00
|
|
|
''' a user-written review, to be imported with the book data '''
|
2020-04-21 14:09:21 +00:00
|
|
|
return self.data['My Review']
|
|
|
|
|
|
|
|
@property
|
|
|
|
def rating(self):
|
2020-09-21 17:25:26 +00:00
|
|
|
''' x/5 star rating for a book '''
|
2020-04-21 14:09:21 +00:00
|
|
|
return int(self.data['My Rating'])
|
|
|
|
|
|
|
|
@property
|
|
|
|
def date_added(self):
|
2020-09-21 17:25:26 +00:00
|
|
|
''' when the book was added to this dataset '''
|
2020-04-21 14:09:21 +00:00
|
|
|
if self.data['Date Added']:
|
|
|
|
return dateutil.parser.parse(self.data['Date Added'])
|
2020-10-29 21:29:31 +00:00
|
|
|
return None
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def date_read(self):
|
2020-09-21 17:25:26 +00:00
|
|
|
''' the date a book was completed '''
|
2020-04-21 14:09:21 +00:00
|
|
|
if self.data['Date Read']:
|
|
|
|
return dateutil.parser.parse(self.data['Date Read'])
|
2020-10-29 21:29:31 +00:00
|
|
|
return None
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def reads(self):
|
2020-09-21 17:25:26 +00:00
|
|
|
''' formats a read through dataset for the book in this line '''
|
2020-04-25 10:29:30 +00:00
|
|
|
if (self.shelf == 'reading'
|
|
|
|
and self.date_added and not self.date_read):
|
|
|
|
return [ReadThrough(start_date=self.date_added)]
|
|
|
|
if self.date_read:
|
|
|
|
return [ReadThrough(
|
2020-10-29 21:29:31 +00:00
|
|
|
start_date=self.date_added,
|
2020-04-25 10:29:30 +00:00
|
|
|
finish_date=self.date_read,
|
|
|
|
)]
|
|
|
|
return []
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return "<GoodreadsItem {!r}>".format(self.data['Title'])
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return "{} by {}".format(self.data['Title'], self.data['Author'])
|