bookwyrm/bookwyrm/models/import_job.py
2021-09-18 11:32:00 -07:00

206 lines
6.4 KiB
Python

""" track progress of goodreads imports """
import re
import dateutil.parser
from django.apps import apps
from django.db import models
from django.utils import timezone
from bookwyrm.connectors import connector_manager
from bookwyrm.models import ReadThrough, User, Book
from .fields import PrivacyLevels
# Mapping goodreads -> bookwyrm shelf titles.
GOODREADS_SHELVES = {
"read": "read",
"currently-reading": "reading",
"to-read": "to-read",
}
def unquote_string(text):
"""resolve csv quote weirdness"""
match = re.match(r'="([^"]*)"', text)
if match:
return match.group(1)
return text
def construct_search_term(title, author):
"""formulate a query for the data connector"""
# Strip brackets (usually series title from search term)
title = re.sub(r"\s*\([^)]*\)\s*", "", title)
# Open library doesn't like including author initials in search term.
author = re.sub(r"(\w\.)+\s*", "", author)
return " ".join([title, author])
class ImportJob(models.Model):
"""entry for a specific request for book data import"""
user = models.ForeignKey(User, on_delete=models.CASCADE)
created_date = models.DateTimeField(default=timezone.now)
task_id = models.CharField(max_length=100, null=True)
include_reviews = models.BooleanField(default=True)
complete = models.BooleanField(default=False)
privacy = models.CharField(
max_length=255, default="public", choices=PrivacyLevels.choices
)
retry = models.BooleanField(default=False)
def save(self, *args, **kwargs):
"""save and notify"""
super().save(*args, **kwargs)
if self.complete:
notification_model = apps.get_model(
"bookwyrm.Notification", require_ready=True
)
notification_model.objects.create(
user=self.user,
notification_type="IMPORT",
related_import=self,
)
class ImportItem(models.Model):
"""a single line of a csv being imported"""
job = models.ForeignKey(ImportJob, on_delete=models.CASCADE, related_name="items")
index = models.IntegerField()
data = models.JSONField()
book = models.ForeignKey(Book, on_delete=models.SET_NULL, null=True, blank=True)
book_guess = models.ForeignKey(
Book,
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="book_guess",
)
fail_reason = models.TextField(null=True)
def resolve(self):
"""try various ways to lookup a book"""
if self.isbn:
self.book = self.get_book_from_isbn()
else:
# don't fall back on title/author search if isbn is present.
# you're too likely to mismatch
book, confidence = self.get_book_from_title_author()
if confidence > 0.999:
self.book = book
else:
self.book_guess = book
def get_book_from_isbn(self):
"""search by isbn"""
search_result = connector_manager.first_search_result(
self.isbn, min_confidence=0.999
)
if search_result:
# raises ConnectorException
return search_result.connector.get_or_create_book(search_result.key)
return None
def get_book_from_title_author(self):
"""search by title and author"""
search_term = construct_search_term(self.title, self.author)
search_result = connector_manager.first_search_result(
search_term, min_confidence=0.1
)
if search_result:
# raises ConnectorException
return (
search_result.connector.get_or_create_book(search_result.key),
search_result.confidence,
)
return None, 0
@property
def title(self):
"""get the book title"""
return self.data["Title"]
@property
def author(self):
"""get the book title"""
return self.data["Author"]
@property
def isbn(self):
"""pulls out the isbn13 field from the csv line data"""
return unquote_string(self.data["ISBN13"])
@property
def shelf(self):
"""the goodreads shelf field"""
if self.data["Exclusive Shelf"]:
return GOODREADS_SHELVES.get(self.data["Exclusive Shelf"])
return None
@property
def review(self):
"""a user-written review, to be imported with the book data"""
return self.data["My Review"]
@property
def rating(self):
"""x/5 star rating for a book"""
if self.data.get("My Rating", None):
return int(self.data["My Rating"])
return None
@property
def date_added(self):
"""when the book was added to this dataset"""
if self.data["Date Added"]:
return timezone.make_aware(dateutil.parser.parse(self.data["Date Added"]))
return None
@property
def date_started(self):
"""when the book was started"""
if "Date Started" in self.data and self.data["Date Started"]:
return timezone.make_aware(dateutil.parser.parse(self.data["Date Started"]))
return None
@property
def date_read(self):
"""the date a book was completed"""
if self.data["Date Read"]:
return timezone.make_aware(dateutil.parser.parse(self.data["Date Read"]))
return None
@property
def reads(self):
"""formats a read through dataset for the book in this line"""
start_date = self.date_started
# Goodreads special case (no 'date started' field)
if (
(self.shelf == "reading" or (self.shelf == "read" and self.date_read))
and self.date_added
and not start_date
):
start_date = self.date_added
if start_date and start_date is not None and not self.date_read:
return [ReadThrough(start_date=start_date)]
if self.date_read:
start_date = start_date if start_date < self.date_read else None
return [
ReadThrough(
start_date=start_date,
finish_date=self.date_read,
)
]
return []
def __repr__(self):
# pylint: disable=consider-using-f-string
return "<{!r}Item {!r}>".format(self.data["import_source"], self.data["Title"])
def __str__(self):
# pylint: disable=consider-using-f-string
return "{} by {}".format(self.data["Title"], self.data["Author"])