2021-03-08 16:49:10 +00:00
|
|
|
""" track progress of goodreads imports """
|
2020-04-21 14:09:21 +00:00
|
|
|
import re
|
|
|
|
import dateutil.parser
|
|
|
|
|
2021-02-10 22:18:55 +00:00
|
|
|
from django.apps import apps
|
2020-04-21 14:09:21 +00:00
|
|
|
from django.db import models
|
|
|
|
from django.utils import timezone
|
|
|
|
|
2021-01-02 16:14:28 +00:00
|
|
|
from bookwyrm.connectors import connector_manager
|
2020-09-21 15:10:37 +00:00
|
|
|
from bookwyrm.models import ReadThrough, User, Book
|
2020-12-13 21:03:17 +00:00
|
|
|
from .fields import PrivacyLevels
|
2020-10-30 18:21:02 +00:00
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
|
2020-09-21 15:10:37 +00:00
|
|
|
# Mapping goodreads -> bookwyrm shelf titles.
|
2020-04-21 14:09:21 +00:00
|
|
|
GOODREADS_SHELVES = {
|
2021-03-08 16:49:10 +00:00
|
|
|
"read": "read",
|
|
|
|
"currently-reading": "reading",
|
|
|
|
"to-read": "to-read",
|
2020-04-21 14:09:21 +00:00
|
|
|
}
|
|
|
|
|
2021-03-08 16:49:10 +00:00
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
def unquote_string(text):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""resolve csv quote weirdness"""
|
2020-04-21 14:09:21 +00:00
|
|
|
match = re.match(r'="([^"]*)"', text)
|
|
|
|
if match:
|
|
|
|
return match.group(1)
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
def construct_search_term(title, author):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""formulate a query for the data connector"""
|
2020-04-21 14:09:21 +00:00
|
|
|
# Strip brackets (usually series title from search term)
|
2021-03-08 16:49:10 +00:00
|
|
|
title = re.sub(r"\s*\([^)]*\)\s*", "", title)
|
2020-04-21 14:09:21 +00:00
|
|
|
# Open library doesn't like including author initials in search term.
|
2021-03-08 16:49:10 +00:00
|
|
|
author = re.sub(r"(\w\.)+\s*", "", author)
|
2020-04-21 14:09:21 +00:00
|
|
|
|
2021-03-08 16:49:10 +00:00
|
|
|
return " ".join([title, author])
|
2020-04-21 14:09:21 +00:00
|
|
|
|
2020-05-09 21:26:27 +00:00
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
class ImportJob(models.Model):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""entry for a specific request for book data import"""
|
2021-03-08 16:49:10 +00:00
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
user = models.ForeignKey(User, on_delete=models.CASCADE)
|
|
|
|
created_date = models.DateTimeField(default=timezone.now)
|
|
|
|
task_id = models.CharField(max_length=100, null=True)
|
2020-10-30 18:21:02 +00:00
|
|
|
include_reviews = models.BooleanField(default=True)
|
2021-01-07 16:08:12 +00:00
|
|
|
complete = models.BooleanField(default=False)
|
2020-10-30 18:21:02 +00:00
|
|
|
privacy = models.CharField(
|
2021-03-08 16:49:10 +00:00
|
|
|
max_length=255, default="public", choices=PrivacyLevels.choices
|
2020-10-30 18:21:02 +00:00
|
|
|
)
|
2020-11-13 17:02:41 +00:00
|
|
|
retry = models.BooleanField(default=False)
|
2020-10-29 21:29:31 +00:00
|
|
|
|
2021-02-10 22:18:55 +00:00
|
|
|
def save(self, *args, **kwargs):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""save and notify"""
|
2021-02-10 22:18:55 +00:00
|
|
|
super().save(*args, **kwargs)
|
|
|
|
if self.complete:
|
|
|
|
notification_model = apps.get_model(
|
2021-03-08 16:49:10 +00:00
|
|
|
"bookwyrm.Notification", require_ready=True
|
|
|
|
)
|
2021-02-10 22:18:55 +00:00
|
|
|
notification_model.objects.create(
|
|
|
|
user=self.user,
|
2021-03-08 16:49:10 +00:00
|
|
|
notification_type="IMPORT",
|
2021-02-10 22:18:55 +00:00
|
|
|
related_import=self,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
class ImportItem(models.Model):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""a single line of a csv being imported"""
|
2021-03-08 16:49:10 +00:00
|
|
|
|
|
|
|
job = models.ForeignKey(ImportJob, on_delete=models.CASCADE, related_name="items")
|
2020-04-21 14:09:21 +00:00
|
|
|
index = models.IntegerField()
|
2021-03-19 19:43:36 +00:00
|
|
|
data = models.JSONField()
|
2021-03-08 16:49:10 +00:00
|
|
|
book = models.ForeignKey(Book, on_delete=models.SET_NULL, null=True, blank=True)
|
2020-04-21 14:09:21 +00:00
|
|
|
fail_reason = models.TextField(null=True)
|
|
|
|
|
|
|
|
def resolve(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""try various ways to lookup a book"""
|
2021-06-14 19:30:43 +00:00
|
|
|
if self.isbn:
|
|
|
|
self.book = self.get_book_from_isbn()
|
|
|
|
else:
|
|
|
|
# don't fall back on title/author search is isbn is present.
|
|
|
|
# you're too likely to mismatch
|
2021-08-10 21:30:28 +00:00
|
|
|
self.book = self.get_book_from_title_author()
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
def get_book_from_isbn(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""search by isbn"""
|
2021-01-02 16:14:28 +00:00
|
|
|
search_result = connector_manager.first_search_result(
|
2020-11-13 19:03:39 +00:00
|
|
|
self.isbn, min_confidence=0.999
|
2020-10-29 22:29:23 +00:00
|
|
|
)
|
2020-05-03 19:59:06 +00:00
|
|
|
if search_result:
|
2020-11-13 19:03:39 +00:00
|
|
|
# raises ConnectorException
|
2020-12-27 22:27:18 +00:00
|
|
|
return search_result.connector.get_or_create_book(search_result.key)
|
2020-10-29 21:29:31 +00:00
|
|
|
return None
|
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
def get_book_from_title_author(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""search by title and author"""
|
2021-03-08 16:49:10 +00:00
|
|
|
search_term = construct_search_term(self.title, self.author)
|
2021-01-02 16:14:28 +00:00
|
|
|
search_result = connector_manager.first_search_result(
|
2020-11-13 19:03:39 +00:00
|
|
|
search_term, min_confidence=0.999
|
2020-10-29 22:29:23 +00:00
|
|
|
)
|
2020-05-03 19:59:06 +00:00
|
|
|
if search_result:
|
2020-11-13 17:47:35 +00:00
|
|
|
# raises ConnectorException
|
2020-12-27 22:27:18 +00:00
|
|
|
return search_result.connector.get_or_create_book(search_result.key)
|
2020-10-29 21:29:31 +00:00
|
|
|
return None
|
|
|
|
|
2020-11-13 17:02:41 +00:00
|
|
|
@property
|
|
|
|
def title(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""get the book title"""
|
2021-03-08 16:49:10 +00:00
|
|
|
return self.data["Title"]
|
2020-11-13 17:02:41 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def author(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""get the book title"""
|
2021-03-08 16:49:10 +00:00
|
|
|
return self.data["Author"]
|
2020-11-13 17:02:41 +00:00
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
@property
|
|
|
|
def isbn(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""pulls out the isbn13 field from the csv line data"""
|
2021-03-08 16:49:10 +00:00
|
|
|
return unquote_string(self.data["ISBN13"])
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def shelf(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""the goodreads shelf field"""
|
2021-03-08 16:49:10 +00:00
|
|
|
if self.data["Exclusive Shelf"]:
|
|
|
|
return GOODREADS_SHELVES.get(self.data["Exclusive Shelf"])
|
2020-10-29 21:29:31 +00:00
|
|
|
return None
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def review(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""a user-written review, to be imported with the book data"""
|
2021-03-08 16:49:10 +00:00
|
|
|
return self.data["My Review"]
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def rating(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""x/5 star rating for a book"""
|
2021-05-10 21:11:28 +00:00
|
|
|
if self.data.get("My Rating", None):
|
|
|
|
return int(self.data["My Rating"])
|
|
|
|
return None
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def date_added(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""when the book was added to this dataset"""
|
2021-03-08 16:49:10 +00:00
|
|
|
if self.data["Date Added"]:
|
|
|
|
return timezone.make_aware(dateutil.parser.parse(self.data["Date Added"]))
|
2020-10-29 21:29:31 +00:00
|
|
|
return None
|
2020-04-21 14:09:21 +00:00
|
|
|
|
2021-02-20 16:02:36 +00:00
|
|
|
@property
|
|
|
|
def date_started(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""when the book was started"""
|
2021-03-08 16:49:10 +00:00
|
|
|
if "Date Started" in self.data and self.data["Date Started"]:
|
|
|
|
return timezone.make_aware(dateutil.parser.parse(self.data["Date Started"]))
|
2021-02-20 16:02:36 +00:00
|
|
|
return None
|
|
|
|
|
2020-04-21 14:09:21 +00:00
|
|
|
@property
|
|
|
|
def date_read(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""the date a book was completed"""
|
2021-03-08 16:49:10 +00:00
|
|
|
if self.data["Date Read"]:
|
|
|
|
return timezone.make_aware(dateutil.parser.parse(self.data["Date Read"]))
|
2020-10-29 21:29:31 +00:00
|
|
|
return None
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def reads(self):
|
2021-04-26 16:15:42 +00:00
|
|
|
"""formats a read through dataset for the book in this line"""
|
2021-02-20 16:02:36 +00:00
|
|
|
start_date = self.date_started
|
|
|
|
|
|
|
|
# Goodreads special case (no 'date started' field)
|
2021-03-08 16:49:10 +00:00
|
|
|
if (
|
|
|
|
(self.shelf == "reading" or (self.shelf == "read" and self.date_read))
|
|
|
|
and self.date_added
|
|
|
|
and not start_date
|
|
|
|
):
|
2021-02-20 16:02:36 +00:00
|
|
|
start_date = self.date_added
|
|
|
|
|
2021-03-08 16:49:10 +00:00
|
|
|
if start_date and start_date is not None and not self.date_read:
|
2021-02-20 16:02:36 +00:00
|
|
|
return [ReadThrough(start_date=start_date)]
|
2020-04-25 10:29:30 +00:00
|
|
|
if self.date_read:
|
2021-09-08 01:09:43 +00:00
|
|
|
start_date = start_date if start_date < self.date_read else None
|
2021-03-08 16:49:10 +00:00
|
|
|
return [
|
|
|
|
ReadThrough(
|
|
|
|
start_date=start_date,
|
|
|
|
finish_date=self.date_read,
|
|
|
|
)
|
|
|
|
]
|
2020-04-25 10:29:30 +00:00
|
|
|
return []
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
def __repr__(self):
|
2021-03-08 16:49:10 +00:00
|
|
|
return "<{!r}Item {!r}>".format(self.data["import_source"], self.data["Title"])
|
2020-04-21 14:09:21 +00:00
|
|
|
|
|
|
|
def __str__(self):
|
2021-03-08 16:49:10 +00:00
|
|
|
return "{} by {}".format(self.data["Title"], self.data["Author"])
|