moviewyrm/bookwyrm/models/import_job.py

''' track progress of goodreads imports '''
import re
import dateutil.parser

from django.apps import apps
from django.contrib.postgres.fields import JSONField
from django.db import models
from django.utils import timezone

from bookwyrm.connectors import connector_manager
from bookwyrm.models import ReadThrough, User, Book
from .fields import PrivacyLevels


# Mapping goodreads -> bookwyrm shelf titles.
GOODREADS_SHELVES = {
    'read': 'read',
    'currently-reading': 'reading',
    'to-read': 'to-read',
}

def unquote_string(text):
    ''' resolve csv quote weirdness '''
    match = re.match(r'="([^"]*)"', text)
    if match:
        return match.group(1)
    return text


def construct_search_term(title, author):
    ''' formulate a query for the data connector '''
    # Strip brackets (usually series title from search term)
    title = re.sub(r'\s*\([^)]*\)\s*', '', title)
    # Open library doesn't like including author initials in search term.
    author = re.sub(r'(\w\.)+\s*', '', author)

    return ' '.join([title, author])


class ImportJob(models.Model):
    ''' entry for a specific request for book data import '''
    user = models.ForeignKey(User, on_delete=models.CASCADE)
    created_date = models.DateTimeField(default=timezone.now)
    task_id = models.CharField(max_length=100, null=True)
    include_reviews = models.BooleanField(default=True)
    complete = models.BooleanField(default=False)
    privacy = models.CharField(
        max_length=255,
        default='public',
        choices=PrivacyLevels.choices
    )
    retry = models.BooleanField(default=False)

    def save(self, *args, **kwargs):
        ''' save and notify '''
        super().save(*args, **kwargs)
        if self.complete:
            notification_model = apps.get_model(
                'bookwyrm.Notification', require_ready=True)
            notification_model.objects.create(
                user=self.user,
                notification_type='IMPORT',
                related_import=self,
            )


class ImportItem(models.Model):
    ''' a single line of a csv being imported '''
    job = models.ForeignKey(
        ImportJob,
        on_delete=models.CASCADE,
        related_name='items')
    index = models.IntegerField()
    data = JSONField()
    book = models.ForeignKey(
        Book, on_delete=models.SET_NULL, null=True, blank=True)
    fail_reason = models.TextField(null=True)

    def resolve(self):
        ''' try various ways to lookup a book '''
        self.book = (
            self.get_book_from_isbn() or
            self.get_book_from_title_author()
        )

    def get_book_from_isbn(self):
        ''' search by isbn '''
        search_result = connector_manager.first_search_result(
            self.isbn, min_confidence=0.999
        )
        if search_result:
            # raises ConnectorException
            return search_result.connector.get_or_create_book(search_result.key)
        return None


    def get_book_from_title_author(self):
        ''' search by title and author '''
        search_term = construct_search_term(
            self.title,
            self.author
        )
        search_result = connector_manager.first_search_result(
            search_term, min_confidence=0.999
        )
        if search_result:
            # raises ConnectorException
            return search_result.connector.get_or_create_book(search_result.key)
        return None


    @property
    def title(self):
        ''' get the book title '''
        return self.data['Title']

    @property
    def author(self):
        ''' get the book title '''
        return self.data['Author']

    @property
    def isbn(self):
        ''' pulls out the isbn13 field from the csv line data '''
        return unquote_string(self.data['ISBN13'])

    @property
    def shelf(self):
        ''' the goodreads shelf field '''
        if self.data['Exclusive Shelf']:
            return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])
        return None

    @property
    def review(self):
        ''' a user-written review, to be imported with the book data '''
        return self.data['My Review']

    @property
    def rating(self):
        ''' x/5 star rating for a book '''
        return int(self.data['My Rating'])

    @property
    def date_added(self):
        ''' when the book was added to this dataset '''
        if self.data['Date Added']:
            return timezone.make_aware(
                dateutil.parser.parse(self.data['Date Added']))
        return None

    @property
    def date_started(self):
        ''' when the book was started '''
        if "Date Started" in self.data and self.data['Date Started']:
            return timezone.make_aware(
                dateutil.parser.parse(self.data['Date Started']))
        return None

    @property
    def date_read(self):
        ''' the date a book was completed '''
        if self.data['Date Read']:
            return timezone.make_aware(
                dateutil.parser.parse(self.data['Date Read']))
        return None

    @property
    def reads(self):
        ''' formats a read through dataset for the book in this line '''
        start_date = self.date_started

        # Goodreads special case (no 'date started' field)
        if ((self.shelf == 'reading' or (self.shelf == 'read' and self.date_read))
                and self.date_added and not start_date):
            start_date = self.date_added

        if (start_date and start_date is not None and not self.date_read):
            return [ReadThrough(start_date=start_date)]
        if self.date_read:
            return [ReadThrough(
                start_date=start_date,
                finish_date=self.date_read,
            )]
        return []

    def __repr__(self):
        return "<{!r}Item {!r}>".format(self.data['import_source'], self.data['Title'])

    def __str__(self):
        return "{} by {}".format(self.data['Title'], self.data['Author'])
code style cleanup 2020-05-09 21:26:27 +00:00			`''' track progress of goodreads imports '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`import re`
			`import dateutil.parser`

Moves import complete notification to model 2021-02-10 22:18:55 +00:00			`from django.apps import apps`
Removes sqlite support 😢 RIP, things have gotten too complicated for this I think 2020-12-13 04:11:23 +00:00			`from django.contrib.postgres.fields import JSONField`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`from django.db import models`
			`from django.utils import timezone`

Remove books manager at long last 2021-01-02 16:14:28 +00:00			`from bookwyrm.connectors import connector_manager`
Updates migrations To get the app working again I ran resetdb, let it crash in initdb, then ran the migration, then re-ran initdb 2020-09-21 15:10:37 +00:00			`from bookwyrm.models import ReadThrough, User, Book`
Creates Privacy field that handles setting to/cc 2020-12-13 21:03:17 +00:00			`from .fields import PrivacyLevels`
Allow users to set privacy on imported reviews or not import them at all. Fixes #252 2020-10-30 18:21:02 +00:00
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
Updates migrations To get the app working again I ran resetdb, let it crash in initdb, then ran the migration, then re-ran initdb 2020-09-21 15:10:37 +00:00			`# Mapping goodreads -> bookwyrm shelf titles.`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`GOODREADS_SHELVES = {`
			`'read': 'read',`
			`'currently-reading': 'reading',`
			`'to-read': 'to-read',`
			`}`

			`def unquote_string(text):`
			`''' resolve csv quote weirdness '''`
			`match = re.match(r'="([^"]*)"', text)`
			`if match:`
			`return match.group(1)`
			`return text`


			`def construct_search_term(title, author):`
			`''' formulate a query for the data connector '''`
			`# Strip brackets (usually series title from search term)`
			`title = re.sub(r'\s\([^)]\)\s*', '', title)`
			`# Open library doesn't like including author initials in search term.`
			`author = re.sub(r'(\w\.)+\s*', '', author)`

			`return ' '.join([title, author])`

code style cleanup 2020-05-09 21:26:27 +00:00
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`class ImportJob(models.Model):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' entry for a specific request for book data import '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`user = models.ForeignKey(User, on_delete=models.CASCADE)`
			`created_date = models.DateTimeField(default=timezone.now)`
			`task_id = models.CharField(max_length=100, null=True)`
Allow users to set privacy on imported reviews or not import them at all. Fixes #252 2020-10-30 18:21:02 +00:00			`include_reviews = models.BooleanField(default=True)`
Adds field on import job to check if job completed the task isn't a reliable indicator, unfortunately. 2021-01-07 16:08:12 +00:00			`complete = models.BooleanField(default=False)`
Allow users to set privacy on imported reviews or not import them at all. Fixes #252 2020-10-30 18:21:02 +00:00			`privacy = models.CharField(`
			`max_length=255,`
			`default='public',`
			`choices=PrivacyLevels.choices`
			`)`
Allow import retry 2020-11-13 17:02:41 +00:00			`retry = models.BooleanField(default=False)`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00
Moves import complete notification to model 2021-02-10 22:18:55 +00:00			`def save(self, args, *kwargs):`
			`''' save and notify '''`
			`super().save(args, *kwargs)`
			`if self.complete:`
			`notification_model = apps.get_model(`
			`'bookwyrm.Notification', require_ready=True)`
			`notification_model.objects.create(`
			`user=self.user,`
			`notification_type='IMPORT',`
			`related_import=self,`
			`)`


Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`class ImportItem(models.Model):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' a single line of a csv being imported '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`job = models.ForeignKey(`
			`ImportJob,`
			`on_delete=models.CASCADE,`
			`related_name='items')`
			`index = models.IntegerField()`
			`data = JSONField()`
			`book = models.ForeignKey(`
			`Book, on_delete=models.SET_NULL, null=True, blank=True)`
			`fail_reason = models.TextField(null=True)`

			`def resolve(self):`
			`''' try various ways to lookup a book '''`
			`self.book = (`
			`self.get_book_from_isbn() or`
			`self.get_book_from_title_author()`
			`)`

			`def get_book_from_isbn(self):`
			`''' search by isbn '''`
Remove books manager at long last 2021-01-02 16:14:28 +00:00			`search_result = connector_manager.first_search_result(`
tweaks search rankings for better results 2020-11-13 19:03:39 +00:00			`self.isbn, min_confidence=0.999`
Stop assuming every book is Hamlet 2020-10-29 22:29:23 +00:00			`)`
Separate out local and remote search results 2020-05-03 19:59:06 +00:00			`if search_result:`
tweaks search rankings for better results 2020-11-13 19:03:39 +00:00			`# raises ConnectorException`
Send connector with search result also fix typo in get_work_from_edition_data function 2020-12-27 22:27:18 +00:00			`return search_result.connector.get_or_create_book(search_result.key)`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00			`return None`

Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`def get_book_from_title_author(self):`
			`''' search by title and author '''`
			`search_term = construct_search_term(`
librarything import 2021-02-20 16:02:36 +00:00			`self.title,`
			`self.author`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`)`
Remove books manager at long last 2021-01-02 16:14:28 +00:00			`search_result = connector_manager.first_search_result(`
tweaks search rankings for better results 2020-11-13 19:03:39 +00:00			`search_term, min_confidence=0.999`
Stop assuming every book is Hamlet 2020-10-29 22:29:23 +00:00			`)`
Separate out local and remote search results 2020-05-03 19:59:06 +00:00			`if search_result:`
fixes import matching with local books 2020-11-13 17:47:35 +00:00			`# raises ConnectorException`
Send connector with search result also fix typo in get_work_from_edition_data function 2020-12-27 22:27:18 +00:00			`return search_result.connector.get_or_create_book(search_result.key)`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00			`return None`

Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
Allow import retry 2020-11-13 17:02:41 +00:00			`@property`
			`def title(self):`
			`''' get the book title '''`
			`return self.data['Title']`

			`@property`
			`def author(self):`
			`''' get the book title '''`
			`return self.data['Author']`

Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`@property`
			`def isbn(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' pulls out the isbn13 field from the csv line data '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`return unquote_string(self.data['ISBN13'])`

			`@property`
			`def shelf(self):`
			`''' the goodreads shelf field '''`
			`if self.data['Exclusive Shelf']:`
Don't crash if we don't recognise the exclusive shelf. 2020-04-28 14:16:41 +00:00			`return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00			`return None`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`@property`
			`def review(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' a user-written review, to be imported with the book data '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`return self.data['My Review']`

			`@property`
			`def rating(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' x/5 star rating for a book '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`return int(self.data['My Rating'])`

			`@property`
			`def date_added(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' when the book was added to this dataset '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`if self.data['Date Added']:`
Replace naive datetimes with aware ones 2020-11-28 00:24:53 +00:00			`return timezone.make_aware(`
			`dateutil.parser.parse(self.data['Date Added']))`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00			`return None`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
librarything import 2021-02-20 16:02:36 +00:00			`@property`
			`def date_started(self):`
			`''' when the book was started '''`
			`if "Date Started" in self.data and self.data['Date Started']:`
			`return timezone.make_aware(`
			`dateutil.parser.parse(self.data['Date Started']))`
			`return None`

Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`@property`
			`def date_read(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' the date a book was completed '''`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00			`if self.data['Date Read']:`
Replace naive datetimes with aware ones 2020-11-28 00:24:53 +00:00			`return timezone.make_aware(`
			`dateutil.parser.parse(self.data['Date Read']))`
Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 2020-10-29 21:29:31 +00:00			`return None`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`@property`
			`def reads(self):`
Fixes linter issues 2020-09-21 17:25:26 +00:00			`''' formats a read through dataset for the book in this line '''`
librarything import 2021-02-20 16:02:36 +00:00			`start_date = self.date_started`

			`# Goodreads special case (no 'date started' field)`
			`if ((self.shelf == 'reading' or (self.shelf == 'read' and self.date_read))`
			`and self.date_added and not start_date):`
			`start_date = self.date_added`

			`if (start_date and start_date is not None and not self.date_read):`
			`return [ReadThrough(start_date=start_date)]`
Only use added_date as start_date for books being read right now. 2020-04-25 10:29:30 +00:00			`if self.date_read:`
			`return [ReadThrough(`
librarything import 2021-02-20 16:02:36 +00:00			`start_date=start_date,`
Only use added_date as start_date for books being read right now. 2020-04-25 10:29:30 +00:00			`finish_date=self.date_read,`
			`)]`
			`return []`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`def __repr__(self):`
librarything import 2021-02-20 16:02:36 +00:00			`return "<{!r}Item {!r}>".format(self.data['import_source'], self.data['Title'])`
Store csv in the database and then import via celery. 2020-04-21 14:09:21 +00:00
			`def __str__(self):`
			`return "{} by {}".format(self.data['Title'], self.data['Author'])`