moviewyrm/bookwyrm/models/import_job.py

''' track progress of goodreads imports '''
import re
import dateutil.parser

from django.db import models
from django.utils import timezone

from bookwyrm import books_manager
from bookwyrm.models import ReadThrough, User, Book
from bookwyrm.utils.fields import JSONField

# Mapping goodreads -> bookwyrm shelf titles.
GOODREADS_SHELVES = {
    'read': 'read',
    'currently-reading': 'reading',
    'to-read': 'to-read',
}

def unquote_string(text):
    ''' resolve csv quote weirdness '''
    match = re.match(r'="([^"]*)"', text)
    if match:
        return match.group(1)
    return text


def construct_search_term(title, author):
    ''' formulate a query for the data connector '''
    # Strip brackets (usually series title from search term)
    title = re.sub(r'\s*\([^)]*\)\s*', '', title)
    # Open library doesn't like including author initials in search term.
    author = re.sub(r'(\w\.)+\s*', '', author)

    return ' '.join([title, author])


class ImportJob(models.Model):
    ''' entry for a specific request for book data import '''
    user = models.ForeignKey(User, on_delete=models.CASCADE)
    created_date = models.DateTimeField(default=timezone.now)
    task_id = models.CharField(max_length=100, null=True)
    import_status = models.ForeignKey(
        'Status', null=True, on_delete=models.PROTECT)

class ImportItem(models.Model):
    ''' a single line of a csv being imported '''
    job = models.ForeignKey(
        ImportJob,
        on_delete=models.CASCADE,
        related_name='items')
    index = models.IntegerField()
    data = JSONField()
    book = models.ForeignKey(
        Book, on_delete=models.SET_NULL, null=True, blank=True)
    fail_reason = models.TextField(null=True)

    def resolve(self):
        ''' try various ways to lookup a book '''
        self.book = (
            self.get_book_from_isbn() or
            self.get_book_from_title_author()
        )

    def get_book_from_isbn(self):
        ''' search by isbn '''
        search_result = books_manager.first_search_result(self.isbn)
        if search_result:
            return books_manager.get_or_create_book(search_result.key)

    def get_book_from_title_author(self):
        ''' search by title and author '''
        search_term = construct_search_term(
            self.data['Title'],
            self.data['Author']
        )
        search_result = books_manager.first_search_result(search_term)
        if search_result:
            return books_manager.get_or_create_book(search_result.key)

    @property
    def isbn(self):
        ''' pulls out the isbn13 field from the csv line data '''
        return unquote_string(self.data['ISBN13'])

    @property
    def shelf(self):
        ''' the goodreads shelf field '''
        if self.data['Exclusive Shelf']:
            return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])

    @property
    def review(self):
        ''' a user-written review, to be imported with the book data '''
        return self.data['My Review']

    @property
    def rating(self):
        ''' x/5 star rating for a book '''
        return int(self.data['My Rating'])

    @property
    def date_added(self):
        ''' when the book was added to this dataset '''
        if self.data['Date Added']:
            return dateutil.parser.parse(self.data['Date Added'])

    @property
    def date_read(self):
        ''' the date a book was completed '''
        if self.data['Date Read']:
            return dateutil.parser.parse(self.data['Date Read'])

    @property
    def reads(self):
        ''' formats a read through dataset for the book in this line '''
        if (self.shelf == 'reading'
                and self.date_added and not self.date_read):
            return [ReadThrough(start_date=self.date_added)]
        if self.date_read:
            return [ReadThrough(
                finish_date=self.date_read,
            )]
        return []

    def __repr__(self):
        return "<GoodreadsItem {!r}>".format(self.data['Title'])

    def __str__(self):
        return "{} by {}".format(self.data['Title'], self.data['Author'])
code style cleanup 2020-05-09 21:26:27 +00:00			`''' track progress of goodreads imports '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`import re`
			`import dateutil.parser`

			`from django.db import models`
			`from django.utils import timezone`

Updates migrations To get the app working again I ran resetdb, let it crash in initdb, then ran the migration, then re-ran initdb 2020-09-21 15:10:37 +00:00			`from bookwyrm import books_manager`
			`from bookwyrm.models import ReadThrough, User, Book`
			`from bookwyrm.utils.fields import JSONField`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
Updates migrations To get the app working again I ran resetdb, let it crash in initdb, then ran the migration, then re-ran initdb 2020-09-21 15:10:37 +00:00			`# Mapping goodreads -> bookwyrm shelf titles.`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`GOODREADS_SHELVES = {`
			`'read': 'read',`
			`'currently-reading': 'reading',`
			`'to-read': 'to-read',`
			`}`

			`def unquote_string(text):`
			`''' resolve csv quote weirdness '''`
			`match = re.match(r'="([^"]*)"', text)`
			`if match:`
			`return match.group(1)`
			`return text`


			`def construct_search_term(title, author):`
			`''' formulate a query for the data connector '''`
			`# Strip brackets (usually series title from search term)`
			`title = re.sub(r'\s\([^)]\)\s*', '', title)`
			`# Open library doesn't like including author initials in search term.`
			`author = re.sub(r'(\w\.)+\s*', '', author)`

			`return ' '.join([title, author])`

code style cleanup 2020-05-09 21:26:27 +00:00
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`class ImportJob(models.Model):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' entry for a specific request for book data import '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`user = models.ForeignKey(User, on_delete=models.CASCADE)`
			`created_date = models.DateTimeField(default=timezone.now)`
			`task_id = models.CharField(max_length=100, null=True)`
Link import job to import status and display status on status page. 2020-04-22 13:16:46 +00:00			`import_status = models.ForeignKey(`
			`'Status', null=True, on_delete=models.PROTECT)`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`class ImportItem(models.Model):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' a single line of a csv being imported '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`job = models.ForeignKey(`
			`ImportJob,`
			`on_delete=models.CASCADE,`
			`related_name='items')`
			`index = models.IntegerField()`
			`data = JSONField()`
			`book = models.ForeignKey(`
			`Book, on_delete=models.SET_NULL, null=True, blank=True)`
			`fail_reason = models.TextField(null=True)`

			`def resolve(self):`
			`''' try various ways to lookup a book '''`
			`self.book = (`
			`self.get_book_from_isbn() or`
			`self.get_book_from_title_author()`
			`)`

			`def get_book_from_isbn(self):`
			`''' search by isbn '''`
Search cleanup 2020-05-03 22:26:47 +00:00			`search_result = books_manager.first_search_result(self.isbn)`
Separate out local and remote search results 2020-05-03 19:59:06 +00:00			`if search_result:`
			`return books_manager.get_or_create_book(search_result.key)`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`def get_book_from_title_author(self):`
			`''' search by title and author '''`
			`search_term = construct_search_term(`
			`self.data['Title'],`
			`self.data['Author']`
			`)`
Search cleanup 2020-05-03 22:26:47 +00:00			`search_result = books_manager.first_search_result(search_term)`
Separate out local and remote search results 2020-05-03 19:59:06 +00:00			`if search_result:`
			`return books_manager.get_or_create_book(search_result.key)`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`@property`
			`def isbn(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' pulls out the isbn13 field from the csv line data '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`return unquote_string(self.data['ISBN13'])`

			`@property`
			`def shelf(self):`
			`''' the goodreads shelf field '''`
			`if self.data['Exclusive Shelf']:`
Don't crash if we don't recognise the exclusive shelf. 2020-04-28 14:16:41 +00:00			`return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`@property`
			`def review(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' a user-written review, to be imported with the book data '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`return self.data['My Review']`

			`@property`
			`def rating(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' x/5 star rating for a book '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`return int(self.data['My Rating'])`

			`@property`
			`def date_added(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' when the book was added to this dataset '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`if self.data['Date Added']:`
			`return dateutil.parser.parse(self.data['Date Added'])`

			`@property`
			`def date_read(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' the date a book was completed '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`if self.data['Date Read']:`
			`return dateutil.parser.parse(self.data['Date Read'])`

			`@property`
			`def reads(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' formats a read through dataset for the book in this line '''`
Only use added_date as start_date for books being read right now. 2020-04-25 10:29:30 +00:00			`if (self.shelf == 'reading'`
			`and self.date_added and not self.date_read):`
			`return [ReadThrough(start_date=self.date_added)]`
			`if self.date_read:`
			`return [ReadThrough(`
			`finish_date=self.date_read,`
			`)]`
			`return []`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`def __repr__(self):`
			`return "<GoodreadsItem {!r}>".format(self.data['Title'])`

			`def __str__(self):`
			`return "{} by {}".format(self.data['Title'], self.data['Author'])`