moviewyrm/bookwyrm/models/import_job.py

''' track progress of goodreads imports '''
import re
import dateutil.parser

from django.contrib.postgres.fields import JSONField
from django.db import models
from django.utils import timezone

from bookwyrm import books_manager
from bookwyrm.models import ReadThrough, User, Book
from .fields import PrivacyLevels


# Mapping goodreads -> bookwyrm shelf titles.
GOODREADS_SHELVES = {
    'read': 'read',
    'currently-reading': 'reading',
    'to-read': 'to-read',
}

def unquote_string(text):
    ''' resolve csv quote weirdness '''
    match = re.match(r'="([^"]*)"', text)
    if match:
        return match.group(1)
    return text


def construct_search_term(title, author):
    ''' formulate a query for the data connector '''
    # Strip brackets (usually series title from search term)
    title = re.sub(r'\s*\([^)]*\)\s*', '', title)
    # Open library doesn't like including author initials in search term.
    author = re.sub(r'(\w\.)+\s*', '', author)

    return ' '.join([title, author])


class ImportJob(models.Model):
    ''' entry for a specific request for book data import '''
    user = models.ForeignKey(User, on_delete=models.CASCADE)
    created_date = models.DateTimeField(default=timezone.now)
    task_id = models.CharField(max_length=100, null=True)
    include_reviews = models.BooleanField(default=True)
    privacy = models.CharField(
        max_length=255,
        default='public',
        choices=PrivacyLevels.choices
    )
    retry = models.BooleanField(default=False)


class ImportItem(models.Model):
    ''' a single line of a csv being imported '''
    job = models.ForeignKey(
        ImportJob,
        on_delete=models.CASCADE,
        related_name='items')
    index = models.IntegerField()
    data = JSONField()
    book = models.ForeignKey(
        Book, on_delete=models.SET_NULL, null=True, blank=True)
    fail_reason = models.TextField(null=True)

    def resolve(self):
        ''' try various ways to lookup a book '''
        self.book = (
            self.get_book_from_isbn() or
            self.get_book_from_title_author()
        )

    def get_book_from_isbn(self):
        ''' search by isbn '''
        search_result = books_manager.first_search_result(
            self.isbn, min_confidence=0.999
        )
        if search_result:
            # raises ConnectorException
            return books_manager.get_or_create_book(search_result.key)
        return None


    def get_book_from_title_author(self):
        ''' search by title and author '''
        search_term = construct_search_term(
            self.data['Title'],
            self.data['Author']
        )
        search_result = books_manager.first_search_result(
            search_term, min_confidence=0.999
        )
        if search_result:
            # raises ConnectorException
            return books_manager.get_or_create_book(search_result.key)
        return None


    @property
    def title(self):
        ''' get the book title '''
        return self.data['Title']

    @property
    def author(self):
        ''' get the book title '''
        return self.data['Author']

    @property
    def isbn(self):
        ''' pulls out the isbn13 field from the csv line data '''
        return unquote_string(self.data['ISBN13'])

    @property
    def shelf(self):
        ''' the goodreads shelf field '''
        if self.data['Exclusive Shelf']:
            return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])
        return None

    @property
    def review(self):
        ''' a user-written review, to be imported with the book data '''
        return self.data['My Review']

    @property
    def rating(self):
        ''' x/5 star rating for a book '''
        return int(self.data['My Rating'])

    @property
    def date_added(self):
        ''' when the book was added to this dataset '''
        if self.data['Date Added']:
            return timezone.make_aware(
                dateutil.parser.parse(self.data['Date Added']))
        return None

    @property
    def date_read(self):
        ''' the date a book was completed '''
        if self.data['Date Read']:
            return timezone.make_aware(
                dateutil.parser.parse(self.data['Date Read']))
        return None

    @property
    def reads(self):
        ''' formats a read through dataset for the book in this line '''
        if (self.shelf == 'reading'
                and self.date_added and not self.date_read):
            return [ReadThrough(start_date=self.date_added)]
        if self.date_read:
            return [ReadThrough(
                start_date=self.date_added,
                finish_date=self.date_read,
            )]
        return []

    def __repr__(self):
        return "<GoodreadsItem {!r}>".format(self.data['Title'])

    def __str__(self):
        return "{} by {}".format(self.data['Title'], self.data['Author'])
code style cleanup 2020-05-09 21:26:27 +00:00			`''' track progress of goodreads imports '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`import re`
			`import dateutil.parser`

Removes sqlite support 😢 RIP, things have gotten too complicated for this I think 2020-12-13 04:11:23 +00:00			`from django.contrib.postgres.fields import JSONField`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`from django.db import models`
			`from django.utils import timezone`

Updates migrations To get the app working again I ran resetdb, let it crash in initdb, then ran the migration, then re-ran initdb 2020-09-21 15:10:37 +00:00			`from bookwyrm import books_manager`
			`from bookwyrm.models import ReadThrough, User, Book`
Creates Privacy field that handles setting to/cc 2020-12-13 21:03:17 +00:00			`from .fields import PrivacyLevels`
Allow users to set privacy on imported reviews or not import them at all. Fixes #252 2020-10-30 18:21:02 +00:00
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
Updates migrations To get the app working again I ran resetdb, let it crash in initdb, then ran the migration, then re-ran initdb 2020-09-21 15:10:37 +00:00			`# Mapping goodreads -> bookwyrm shelf titles.`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`GOODREADS_SHELVES = {`
			`'read': 'read',`
			`'currently-reading': 'reading',`
			`'to-read': 'to-read',`
			`}`

			`def unquote_string(text):`
			`''' resolve csv quote weirdness '''`
			`match = re.match(r'="([^"]*)"', text)`
			`if match:`
			`return match.group(1)`
			`return text`


			`def construct_search_term(title, author):`
			`''' formulate a query for the data connector '''`
			`# Strip brackets (usually series title from search term)`
			`title = re.sub(r'\s\([^)]\)\s*', '', title)`
			`# Open library doesn't like including author initials in search term.`
			`author = re.sub(r'(\w\.)+\s*', '', author)`

			`return ' '.join([title, author])`

code style cleanup 2020-05-09 21:26:27 +00:00
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`class ImportJob(models.Model):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' entry for a specific request for book data import '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`user = models.ForeignKey(User, on_delete=models.CASCADE)`
			`created_date = models.DateTimeField(default=timezone.now)`
			`task_id = models.CharField(max_length=100, null=True)`
Allow users to set privacy on imported reviews or not import them at all. Fixes #252 2020-10-30 18:21:02 +00:00			`include_reviews = models.BooleanField(default=True)`
			`privacy = models.CharField(`
			`max_length=255,`
			`default='public',`
			`choices=PrivacyLevels.choices`
			`)`
Allow import retry 2020-11-13 17:02:41 +00:00			`retry = models.BooleanField(default=False)`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`class ImportItem(models.Model):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' a single line of a csv being imported '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`job = models.ForeignKey(`
			`ImportJob,`
			`on_delete=models.CASCADE,`
			`related_name='items')`
			`index = models.IntegerField()`
			`data = JSONField()`
			`book = models.ForeignKey(`
			`Book, on_delete=models.SET_NULL, null=True, blank=True)`
			`fail_reason = models.TextField(null=True)`

			`def resolve(self):`
			`''' try various ways to lookup a book '''`
			`self.book = (`
			`self.get_book_from_isbn() or`
			`self.get_book_from_title_author()`
			`)`

			`def get_book_from_isbn(self):`
			`''' search by isbn '''`
Stop assuming every book is Hamlet 2020-10-29 22:29:23 +00:00			`search_result = books_manager.first_search_result(`
tweaks search rankings for better results 2020-11-13 19:03:39 +00:00			`self.isbn, min_confidence=0.999`
Stop assuming every book is Hamlet 2020-10-29 22:29:23 +00:00			`)`
Separate out local and remote search results 2020-05-03 19:59:06 +00:00			`if search_result:`
tweaks search rankings for better results 2020-11-13 19:03:39 +00:00			`# raises ConnectorException`
			`return books_manager.get_or_create_book(search_result.key)`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00			`return None`

Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`def get_book_from_title_author(self):`
			`''' search by title and author '''`
			`search_term = construct_search_term(`
			`self.data['Title'],`
			`self.data['Author']`
			`)`
Stop assuming every book is Hamlet 2020-10-29 22:29:23 +00:00			`search_result = books_manager.first_search_result(`
tweaks search rankings for better results 2020-11-13 19:03:39 +00:00			`search_term, min_confidence=0.999`
Stop assuming every book is Hamlet 2020-10-29 22:29:23 +00:00			`)`
Separate out local and remote search results 2020-05-03 19:59:06 +00:00			`if search_result:`
fixes import matching with local books 2020-11-13 17:47:35 +00:00			`# raises ConnectorException`
			`return books_manager.get_or_create_book(search_result.key)`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00			`return None`

Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
Allow import retry 2020-11-13 17:02:41 +00:00			`@property`
			`def title(self):`
			`''' get the book title '''`
			`return self.data['Title']`

			`@property`
			`def author(self):`
			`''' get the book title '''`
			`return self.data['Author']`

Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`@property`
			`def isbn(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' pulls out the isbn13 field from the csv line data '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`return unquote_string(self.data['ISBN13'])`

			`@property`
			`def shelf(self):`
			`''' the goodreads shelf field '''`
			`if self.data['Exclusive Shelf']:`
Don't crash if we don't recognise the exclusive shelf. 2020-04-28 14:16:41 +00:00			`return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00			`return None`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`@property`
			`def review(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' a user-written review, to be imported with the book data '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`return self.data['My Review']`

			`@property`
			`def rating(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' x/5 star rating for a book '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`return int(self.data['My Rating'])`

			`@property`
			`def date_added(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' when the book was added to this dataset '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`if self.data['Date Added']:`
Replace naive datetimes with aware ones 2020-11-28 00:24:53 +00:00			`return timezone.make_aware(`
			`dateutil.parser.parse(self.data['Date Added']))`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00			`return None`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`@property`
			`def date_read(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' the date a book was completed '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`if self.data['Date Read']:`
Replace naive datetimes with aware ones 2020-11-28 00:24:53 +00:00			`return timezone.make_aware(`
			`dateutil.parser.parse(self.data['Date Read']))`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00			`return None`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`@property`
			`def reads(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' formats a read through dataset for the book in this line '''`
Only use added_date as start_date for books being read right now. 2020-04-25 10:29:30 +00:00			`if (self.shelf == 'reading'`
			`and self.date_added and not self.date_read):`
			`return [ReadThrough(start_date=self.date_added)]`
			`if self.date_read:`
			`return [ReadThrough(`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00			`start_date=self.date_added,`
Only use added_date as start_date for books being read right now. 2020-04-25 10:29:30 +00:00			`finish_date=self.date_read,`
			`)]`
			`return []`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`def __repr__(self):`
			`return "<GoodreadsItem {!r}>".format(self.data['Title'])`

			`def __str__(self):`
			`return "{} by {}".format(self.data['Title'], self.data['Author'])`