librarything import

2024-11-26 19:41:11 +00:00 · 2021-02-20 17:02:36 +01:00 · 2021-02-20 17:02:36 +01:00 · b85fed3595
commit b85fed3595
parent dcd4baed82
9 changed files with 510 additions and 152 deletions
--- a/bookwyrm/goodreads_import.py
+++ b/bookwyrm/goodreads_import.py
@ -1,121 +1,13 @@
 ''' handle reading a csv from goodreads '''
-import csv
+from bookwyrm.importer import Importer
 import logging
-from bookwyrm import models
+# GoodReads is the default importer, thus Importer follows its structure. For a more complete example of overriding see librarything_import.py  
 from bookwyrm.models import ImportJob, ImportItem
 from bookwyrm.tasks import app
-logger = logging.getLogger(__name__)
+class GoodreadsImporter(Importer):
    service = 'GoodReads'
-
+    def parse_fields(self, data):
-def create_job(user, csv_file, include_reviews, privacy):
+        data.update({'import_source': self.service }) 
-    ''' check over a csv and creates a database entry for the job'''
+        # add missing 'Date Started' field
-    job = ImportJob.objects.create(
+        data.update({'Date Started': None }) 
-        user=user,
+        return data
        include_reviews=include_reviews,
        privacy=privacy
    )
    for index, entry in enumerate(list(csv.DictReader(csv_file))):
        if not all(x in entry for x in ('ISBN13', 'Title', 'Author')):
            raise ValueError('Author, title, and isbn must be in data.')
        ImportItem(job=job, index=index, data=entry).save()
    return job
 def create_retry_job(user, original_job, items):
    ''' retry items that didn't import '''
    job = ImportJob.objects.create(
        user=user,
        include_reviews=original_job.include_reviews,
        privacy=original_job.privacy,
        retry=True
    )
    for item in items:
        ImportItem(job=job, index=item.index, data=item.data).save()
    return job
 def start_import(job):
    ''' initalizes a csv import job '''
    result = import_data.delay(job.id)
    job.task_id = result.id
    job.save()
@app.task
 def import_data(job_id):
    ''' does the actual lookup work in a celery task '''
    job = ImportJob.objects.get(id=job_id)
    try:
        for item in job.items.all():
            try:
                item.resolve()
            except Exception as e:# pylint: disable=broad-except
                logger.exception(e)
                item.fail_reason = 'Error loading book'
                item.save()
                continue
            if item.book:
                item.save()
                # shelves book and handles reviews
                handle_imported_book(
                    job.user, item, job.include_reviews, job.privacy)
            else:
                item.fail_reason = 'Could not find a match for book'
                item.save()
    finally:
        job.complete = True
        job.save()
 def handle_imported_book(user, item, include_reviews, privacy):
    ''' process a goodreads csv and then post about it '''
    if isinstance(item.book, models.Work):
        item.book = item.book.default_edition
    if not item.book:
        return
    existing_shelf = models.ShelfBook.objects.filter(
        book=item.book, user=user).exists()
    # shelve the book if it hasn't been shelved already
    if item.shelf and not existing_shelf:
        desired_shelf = models.Shelf.objects.get(
            identifier=item.shelf,
            user=user
        )
        models.ShelfBook.objects.create(
            book=item.book, shelf=desired_shelf, user=user)
    for read in item.reads:
        # check for an existing readthrough with the same dates
        if models.ReadThrough.objects.filter(
                user=user, book=item.book,
                start_date=read.start_date,
                finish_date=read.finish_date
            ).exists():
            continue
        read.book = item.book
        read.user = user
        read.save()
    if include_reviews and (item.rating or item.review):
        review_title = 'Review of {!r} on Goodreads'.format(
            item.book.title,
        ) if item.review else ''
        # we don't know the publication date of the review,
        # but "now" is a bad guess
        published_date_guess = item.date_read or item.date_added
        models.Review.objects.create(
            user=user,
            book=item.book,
            name=review_title,
            content=item.review,
            rating=item.rating,
            published_date=published_date_guess,
            privacy=privacy,
        )
--- a/bookwyrm/importer.py
+++ b/bookwyrm/importer.py
@ -0,0 +1,135 @@
 ''' handle reading a csv from an external service, defaults are from GoodReads '''
 import csv
 import logging
 from bookwyrm import models
 from bookwyrm.models import ImportJob, ImportItem
 from bookwyrm.tasks import app
 logger = logging.getLogger(__name__)
 class Importer:
    service = 'Unknown'
    delimiter = ','
    encoding = 'UTF-8'
    mandatory_fields = ['ISBN13', 'Title', 'Author']
    def create_job(self, user, csv_file, include_reviews, privacy):
        ''' check over a csv and creates a database entry for the job'''
        job = ImportJob.objects.create(
            user=user,
            include_reviews=include_reviews,
            privacy=privacy
        )
        for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.delimiter ))):
            if not all(x in entry for x in self.mandatory_fields):
                raise ValueError('Author, title, and isbn must be in data.')
            entry = self.parse_fields(entry)
            self.save_item(job, index, entry)
        return job
    def save_item(self, job, index, data):
        ImportItem(job=job, index=index, data=data).save()
    def parse_fields(self, entry):
        entry.update({'import_source': self.service }) 
        return entry 
    def create_retry_job(self, user, original_job, items):
        ''' retry items that didn't import '''
        job = ImportJob.objects.create(
            user=user,
            include_reviews=original_job.include_reviews,
            privacy=original_job.privacy,
            retry=True
        )
        for item in items:
            self.save_item(job, item.index, item.data)
        return job
    def start_import(self, job):
        ''' initalizes a csv import job '''
        result = import_data.delay(self.service, job.id)
        job.task_id = result.id
        job.save()
@app.task
 def import_data(source, job_id):
    ''' does the actual lookup work in a celery task '''
    job = ImportJob.objects.get(id=job_id)
    try:
        for item in job.items.all():
            try:
                item.resolve()
            except Exception as e:# pylint: disable=broad-except
                logger.exception(e)
                item.fail_reason = 'Error loading book'
                item.save()
                continue
            if item.book:
                item.save()
                # shelves book and handles reviews
                handle_imported_book(source,
                    job.user, item, job.include_reviews, job.privacy)
            else:
                item.fail_reason = 'Could not find a match for book'
                item.save()
    finally:
        job.complete = True
        job.save()
 def handle_imported_book(source, user, item, include_reviews, privacy):
    ''' process a csv and then post about it '''
    if isinstance(item.book, models.Work):
        item.book = item.book.default_edition
    if not item.book:
        return
    existing_shelf = models.ShelfBook.objects.filter(
        book=item.book, user=user).exists()
    # shelve the book if it hasn't been shelved already
    if item.shelf and not existing_shelf:
        desired_shelf = models.Shelf.objects.get(
            identifier=item.shelf,
            user=user
        )
        models.ShelfBook.objects.create(
            book=item.book, shelf=desired_shelf, user=user)
    for read in item.reads:
        # check for an existing readthrough with the same dates
        if models.ReadThrough.objects.filter(
                user=user, book=item.book,
                start_date=read.start_date,
                finish_date=read.finish_date
            ).exists():
            continue
        read.book = item.book
        read.user = user
        read.save()
    if include_reviews and (item.rating or item.review):
        review_title = 'Review of {!r} on {!r}'.format(
            item.book.title,
            source,
        ) if item.review else ''
        # we don't know the publication date of the review,
        # but "now" is a bad guess
        published_date_guess = item.date_read or item.date_added
        models.Review.objects.create(
            user=user,
            book=item.book,
            name=review_title,
            content=item.review,
            rating=item.rating,
            published_date=published_date_guess,
            privacy=privacy,
        )
--- a/bookwyrm/librarything_import.py
+++ b/bookwyrm/librarything_import.py
@ -0,0 +1,39 @@
 ''' handle reading a csv from librarything '''
 import csv
 import re
 import math
 from bookwyrm import models
 from bookwyrm.models import ImportItem
 from bookwyrm.importer import Importer
 class LibrarythingImporter(Importer):
    service = 'LibraryThing'
    delimiter = '\t'
    encoding = 'ISO-8859-1'
    # mandatory_fields : fields matching the book ISBN13, title and author
    mandatory_fields = ['ISBN', 'Title', 'Primary Author']
    def parse_fields(self, initial):
        data = {}
        data['import_source']   = self.service
        data['Book Id'] = initial['Book Id']
        data['Title'] = initial['Title']
        data['Author'] = initial['Primary Author']
        data['ISBN13'] = initial['ISBN']
        data['My Review'] = initial['Review']
        data['My Rating'] = math.ceil(float(initial['Rating']))
        data['Date Added'] = re.sub('\[|\]', '', initial['Entry Date'])
        data['Date Started'] = re.sub('\[|\]', '', initial['Date Started'])
        data['Date Read'] = re.sub('\[|\]', '', initial['Date Read'])
        data['Exclusive Shelf'] = None
        if data['Date Read']:
            data['Exclusive Shelf'] = "read"
        elif data['Date Started']:
            data['Exclusive Shelf'] = "reading"
        else:
            data['Exclusive Shelf'] = "to-read"
        return data
--- a/bookwyrm/models/import_job.py
+++ b/bookwyrm/models/import_job.py
@ -97,8 +97,8 @@ class ImportItem(models.Model):
    def get_book_from_title_author(self):
        ''' search by title and author '''
        search_term = construct_search_term(
-            self.data['Title'],
+            self.title,
-            self.data['Author']
+            self.author
        )
        search_result = connector_manager.first_search_result(
            search_term, min_confidence=0.999
@ -149,6 +149,14 @@ class ImportItem(models.Model):
                dateutil.parser.parse(self.data['Date Added']))
        return None
    @property
    def date_started(self):
        ''' when the book was started '''
        if "Date Started" in self.data and self.data['Date Started']:
            return timezone.make_aware(
                dateutil.parser.parse(self.data['Date Started']))
        return None
    @property
    def date_read(self):
        ''' the date a book was completed '''
@ -160,18 +168,24 @@ class ImportItem(models.Model):
    @property
    def reads(self):
        ''' formats a read through dataset for the book in this line '''
-        if (self.shelf == 'reading'
+        start_date = self.date_started
-                and self.date_added and not self.date_read):
+
-            return [ReadThrough(start_date=self.date_added)]
+        # Goodreads special case (no 'date started' field)
        if ((self.shelf == 'reading' or (self.shelf == 'read' and self.date_read))
                and self.date_added and not start_date):
            start_date = self.date_added
        if (start_date and start_date is not None and not self.date_read):
            return [ReadThrough(start_date=start_date)]
        if self.date_read:
            return [ReadThrough(
-                start_date=self.date_added,
+                start_date=start_date,
                finish_date=self.date_read,
            )]
        return []
    def __repr__(self):
-        return "<GoodreadsItem {!r}>".format(self.data['Title'])
+        return "<{!r}Item {!r}>".format(self.data['import_source'], self.data['Title'])
    def __str__(self):
        return "{} by {}".format(self.data['Title'], self.data['Author'])
--- a/bookwyrm/templates/import.html
+++ b/bookwyrm/templates/import.html
@ -2,9 +2,24 @@
 {% load humanize %}
 {% block content %}
 <div class="block">
-    <h1 class="title">Import Books from GoodReads</h1>
+    <h1 class="title">Import Books</h1>
    <form name="import" action="/import" method="post" enctype="multipart/form-data">
        {% csrf_token %}
        <label class="label" for="source">
          <p>Data source</p>
        <div class="select {{ class }}">
          <select name="source" id="source">
            <option value="GoodReads" {% if current == 'LibraryThing' %}selected{% endif %}>
            GoodReads
            </option>
            <option value="LibraryThing" {% if current == 'LibraryThing' %}selected{% endif %}>
            LibraryThing
            </option>
          </select>
        </div>
        </label>
        <div class="field">
            {{ import_form.as_p }}
        </div>
--- a/bookwyrm/tests/data/librarything.tsv
+++ b/bookwyrm/tests/data/librarything.tsv
@ -0,0 +1,4 @@
 Book Id	Title	Sort Character	Primary Author	Primary Author Role	Secondary Author	Secondary Author Roles	Publication	Date	Review	Rating	Comment	Private Comment	Summary	Media	Physical Description	Weight	Height	Thickness	Length	Dimensions	Page Count	LCCN	Acquired	Date Started	Date Read	Barcode	BCID	Tags	Collections	Languages	Original Languages	LC Classification	ISBN	ISBNs	Subjects	Dewey Decimal	Dewey Wording	Other Call Number	Copies	Source	Entry Date	From Where	OCLC	Work id	Lending Patron	Lending Status	Lending Start	Lending End
 5498194	Marelle	1	CortÃ¡zar, Julio				Gallimard (1979), Poche	1979	chef d'oeuvre	4.5			Marelle by Julio CortÃ¡zar (1979)	BrochÃ©	590 p.; 7.24 inches	1.28 pounds	7.24 inches	1.26 inches	4.96 inches	7.24 x 4.96 x 1.26 inches	590 			[2007-04-16]	[2007-05-08]			roman, espagnol, expÃ©rimental, bohÃ¨me, philosophie	Your library	French	Spanish	PQ7797 .C7145	[2070291340]	2070291340, 9782070291342	Cortâazar, Julio. Rayuela	863	Literature > Spanish And Portuguese > Spanish fiction		1	Amazon.fr	[2006-08-09]			57814				
 5015319	Le grand incendie de Londres: RÃ©cit, avec incises et bifurcations, 1985-1987 (Fiction & Cie)	1	Roubaud, Jacques				Seuil (1989), Unknown Binding	1989		5			Le grand incendie de Londres: RÃ©cit, avec incises et bifurcations, 1985-1987 (Fiction & Cie) by Jacques Roubaud (1989)	BrochÃ©	411 p.; 7.72 inches	0.88 pounds	7.72 inches	1.02 inches	5.43 inches	7.72 x 5.43 x 1.02 inches	411 								Your library	English		PQ2678 .O77	[2020104725]	2020104725, 9782020104722	Autobiographical fiction|Roubaud, Jacques > Fiction	813	American And Canadian > Fiction > Literature		1	Amazon.com	[2006-07-25]			478910				
 5015399	Le MaÃ®tre et Marguerite	1	Boulgakov, MikhaÃ¯l				Pocket (1994), Poche	1994		5			Le MaÃ®tre et Marguerite by MikhaÃ¯l Boulgakov (1994)	BrochÃ©	579 p.; 7.09 inches	0.66 pounds	7.09 inches	1.18 inches	4.33 inches	7.09 x 4.33 x 1.18 inches	579 								Your library	French		PG3476 .B78	[2266062328]	2266062328, 9782266062329	Allegories|Bulgakov|Good and evil > Fiction|Humanities|Jerusalem > Fiction|Jesus Christ > Fiction|Literature|Mental illness > Fiction|Moscow (Russia) > Fiction|Novel|Pilate, Pontius, 1st cent. > Fiction|Political fiction|Russia > Fiction|Russian fiction|Russian publications (Form Entry)|Soviet Union > History > 1925-1953 > Fiction|literature	891.7342	1917-1945 > 1917-1991 (USSR) > Literature > Literature of other Indo-European languages > Other Languages > Russian > Russian Fiction		1	Amazon.fr	[2006-07-25]			10151				
--- a/bookwyrm/tests/test_goodreads_import.py
+++ b/bookwyrm/tests/test_goodreads_import.py
@ -7,16 +7,19 @@ from unittest.mock import patch
 from django.test import TestCase
 import responses
-from bookwyrm import goodreads_import, models
+from bookwyrm import models, importer
 from bookwyrm.goodreads_import import GoodreadsImporter
 from bookwyrm import importer
 from bookwyrm.settings import DOMAIN
 class GoodreadsImport(TestCase):
    ''' importing from goodreads csv '''
    def setUp(self):
        self.importer = GoodreadsImporter()
        ''' use a test csv '''
        datafile = pathlib.Path(__file__).parent.joinpath(
            'data/goodreads.csv')
-        self.csv = open(datafile, 'r')
+        self.csv = open(datafile, 'r', encoding=self.importer.encoding)
        self.user = models.User.objects.create_user(
            'mouse', 'mouse@mouse.mouse', 'password', local=True)
@ -41,7 +44,7 @@ class GoodreadsImport(TestCase):
    def test_create_job(self):
        ''' creates the import job entry and checks csv '''
-        import_job = goodreads_import.create_job(
+        import_job = self.importer.create_job(
            self.user, self.csv, False, 'public')
        self.assertEqual(import_job.user, self.user)
        self.assertEqual(import_job.include_reviews, False)
@ -59,13 +62,13 @@ class GoodreadsImport(TestCase):
    def test_create_retry_job(self):
        ''' trying again with items that didn't import '''
-        import_job = goodreads_import.create_job(
+        import_job = self.importer.create_job(
            self.user, self.csv, False, 'unlisted')
        import_items = models.ImportItem.objects.filter(
            job=import_job
            ).all()[:2]
-        retry = goodreads_import.create_retry_job(
+        retry = self.importer.create_retry_job(
            self.user, import_job, import_items)
        self.assertNotEqual(import_job, retry)
        self.assertEqual(retry.user, self.user)
@ -82,13 +85,13 @@ class GoodreadsImport(TestCase):
    def test_start_import(self):
        ''' begin loading books '''
-        import_job = goodreads_import.create_job(
+        import_job = self.importer.create_job(
            self.user, self.csv, False, 'unlisted')
        MockTask = namedtuple('Task', ('id'))
        mock_task = MockTask(7)
-        with patch('bookwyrm.goodreads_import.import_data.delay') as start:
+        with patch('bookwyrm.importer.import_data.delay') as start:
            start.return_value = mock_task
-            goodreads_import.start_import(import_job)
+            self.importer.start_import(import_job)
        import_job.refresh_from_db()
        self.assertEqual(import_job.task_id, '7')
@ -96,7 +99,7 @@ class GoodreadsImport(TestCase):
    @responses.activate
    def test_import_data(self):
        ''' resolve entry '''
-        import_job = goodreads_import.create_job(
+        import_job = self.importer.create_job(
            self.user, self.csv, False, 'unlisted')
        book = models.Edition.objects.create(title='Test Book')
@ -104,8 +107,8 @@ class GoodreadsImport(TestCase):
                'bookwyrm.models.import_job.ImportItem.get_book_from_isbn'
                ) as resolve:
            resolve.return_value = book
-            with patch('bookwyrm.goodreads_import.handle_imported_book'):
+            with patch('bookwyrm.importer.handle_imported_book'):
-                goodreads_import.import_data(import_job.id)
+                importer.import_data(self.importer.service, import_job.id)
        import_item = models.ImportItem.objects.get(job=import_job, index=0)
        self.assertEqual(import_item.book.id, book.id)
@ -120,13 +123,14 @@ class GoodreadsImport(TestCase):
        datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
        csv_file = open(datafile, 'r')
        for index, entry in enumerate(list(csv.DictReader(csv_file))):
            entry = self.importer.parse_fields(entry)
            import_item = models.ImportItem.objects.create(
                job_id=import_job.id, index=index, data=entry, book=self.book)
            break
        with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
-            goodreads_import.handle_imported_book(
+            importer.handle_imported_book(
-                self.user, import_item, False, 'public')
+                self.importer.service, self.user, import_item, False, 'public')
        shelf.refresh_from_db()
        self.assertEqual(shelf.books.first(), self.book)
@ -153,13 +157,14 @@ class GoodreadsImport(TestCase):
        datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
        csv_file = open(datafile, 'r')
        for index, entry in enumerate(list(csv.DictReader(csv_file))):
            entry = self.importer.parse_fields(entry)
            import_item = models.ImportItem.objects.create(
                job_id=import_job.id, index=index, data=entry, book=self.book)
            break
        with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
-            goodreads_import.handle_imported_book(
+            importer.handle_imported_book(
-                self.user, import_item, False, 'public')
+                self.importer.service, self.user, import_item, False, 'public')
        shelf.refresh_from_db()
        self.assertEqual(shelf.books.first(), self.book)
@ -182,15 +187,16 @@ class GoodreadsImport(TestCase):
        datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
        csv_file = open(datafile, 'r')
        for index, entry in enumerate(list(csv.DictReader(csv_file))):
            entry = self.importer.parse_fields(entry)
            import_item = models.ImportItem.objects.create(
                job_id=import_job.id, index=index, data=entry, book=self.book)
            break
        with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
-            goodreads_import.handle_imported_book(
+            importer.handle_imported_book(
-                self.user, import_item, False, 'public')
+                self.importer.service, self.user, import_item, False, 'public')
-            goodreads_import.handle_imported_book(
+            importer.handle_imported_book(
-                self.user, import_item, False, 'public')
+                self.importer.service, self.user, import_item, False, 'public')
        shelf.refresh_from_db()
        self.assertEqual(shelf.books.first(), self.book)
@ -212,12 +218,13 @@ class GoodreadsImport(TestCase):
        datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
        csv_file = open(datafile, 'r')
        entry = list(csv.DictReader(csv_file))[2]
        entry = self.importer.parse_fields(entry)
        import_item = models.ImportItem.objects.create(
            job_id=import_job.id, index=0, data=entry, book=self.book)
        with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
-            goodreads_import.handle_imported_book(
+            importer.handle_imported_book(
-                self.user, import_item, True, 'unlisted')
+                self.importer.service, self.user, import_item, True, 'unlisted')
        review = models.Review.objects.get(book=self.book, user=self.user)
        self.assertEqual(review.content, 'mixed feelings')
        self.assertEqual(review.rating, 2)
@ -233,12 +240,13 @@ class GoodreadsImport(TestCase):
        datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
        csv_file = open(datafile, 'r')
        entry = list(csv.DictReader(csv_file))[2]
        entry = self.importer.parse_fields(entry)
        import_item = models.ImportItem.objects.create(
            job_id=import_job.id, index=0, data=entry, book=self.book)
        with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
-            goodreads_import.handle_imported_book(
+            importer.handle_imported_book(
-                self.user, import_item, False, 'unlisted')
+                self.importer.service, self.user, import_item, False, 'unlisted')
        self.assertFalse(models.Review.objects.filter(
            book=self.book, user=self.user
        ).exists())
--- a/bookwyrm/tests/test_librarything_import.py
+++ b/bookwyrm/tests/test_librarything_import.py
@ -0,0 +1,240 @@
 ''' testing import '''
 from collections import namedtuple
 import csv
 import pathlib
 from unittest.mock import patch
 from django.test import TestCase
 import responses
 from bookwyrm import models, importer
 from bookwyrm.librarything_import import LibrarythingImporter
 from bookwyrm.settings import DOMAIN
 class LibrarythingImport(TestCase):
    ''' importing from librarything tsv '''
    def setUp(self):
        self.importer = LibrarythingImporter()
        ''' use a test tsv '''
        datafile = pathlib.Path(__file__).parent.joinpath(
            'data/librarything.tsv')
        # Librarything generates latin encoded exports...
        self.csv = open(datafile, 'r', encoding=self.importer.encoding)
        self.user = models.User.objects.create_user(
            'mmai', 'mmai@mmai.mmai', 'password', local=True)
        models.Connector.objects.create(
            identifier=DOMAIN,
            name='Local',
            local=True,
            connector_file='self_connector',
            base_url='https://%s' % DOMAIN,
            books_url='https://%s/book' % DOMAIN,
            covers_url='https://%s/images/covers' % DOMAIN,
            search_url='https://%s/search?q=' % DOMAIN,
            priority=1,
        )
        work = models.Work.objects.create(title='Test Work')
        self.book = models.Edition.objects.create(
            title='Example Edition',
            remote_id='https://example.com/book/1',
            parent_work=work
        )
    def test_create_job(self):
        ''' creates the import job entry and checks csv '''
        import_job = self.importer.create_job(
            self.user, self.csv, False, 'public')
        self.assertEqual(import_job.user, self.user)
        self.assertEqual(import_job.include_reviews, False)
        self.assertEqual(import_job.privacy, 'public')
        import_items = models.ImportItem.objects.filter(job=import_job).all()
        self.assertEqual(len(import_items), 3)
        self.assertEqual(import_items[0].index, 0)
        self.assertEqual(import_items[0].data['Book Id'], '5498194')
        self.assertEqual(import_items[1].index, 1)
        self.assertEqual(import_items[1].data['Book Id'], '5015319')
        self.assertEqual(import_items[2].index, 2)
        self.assertEqual(import_items[2].data['Book Id'], '5015399')
    def test_create_retry_job(self):
        ''' trying again with items that didn't import '''
        import_job = self.importer.create_job(
            self.user, self.csv, False, 'unlisted')
        import_items = models.ImportItem.objects.filter(
            job=import_job
            ).all()[:2]
        retry = self.importer.create_retry_job(
            self.user, import_job, import_items)
        self.assertNotEqual(import_job, retry)
        self.assertEqual(retry.user, self.user)
        self.assertEqual(retry.include_reviews, False)
        self.assertEqual(retry.privacy, 'unlisted')
        retry_items = models.ImportItem.objects.filter(job=retry).all()
        self.assertEqual(len(retry_items), 2)
        self.assertEqual(retry_items[0].index, 0)
        self.assertEqual(import_items[0].data['Book Id'], '5498194')
        self.assertEqual(retry_items[1].index, 1)
        self.assertEqual(retry_items[1].data['Book Id'], '5015319')
    @responses.activate
    def test_import_data(self):
        ''' resolve entry '''
        import_job = self.importer.create_job(
            self.user, self.csv, False, 'unlisted')
        book = models.Edition.objects.create(title='Test Book')
        with patch(
                'bookwyrm.models.import_job.ImportItem.get_book_from_isbn'
                ) as resolve:
            resolve.return_value = book
            with patch('bookwyrm.importer.handle_imported_book'):
                importer.import_data(self.importer.service, import_job.id)
        import_item = models.ImportItem.objects.get(job=import_job, index=0)
        self.assertEqual(import_item.book.id, book.id)
    def test_handle_imported_book(self):
        ''' librarything import added a book, this adds related connections '''
        shelf = self.user.shelf_set.filter(identifier='read').first()
        self.assertIsNone(shelf.books.first())
        import_job = models.ImportJob.objects.create(user=self.user)
        datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
        csv_file = open(datafile, 'r', encoding=self.importer.encoding)
        for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))):
            entry = self.importer.parse_fields(entry)
            import_item = models.ImportItem.objects.create(
                job_id=import_job.id, index=index, data=entry, book=self.book)
            break
        with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
            importer.handle_imported_book(
                self.importer.service, self.user, import_item, False, 'public')
        shelf.refresh_from_db()
        self.assertEqual(shelf.books.first(), self.book)
        readthrough = models.ReadThrough.objects.get(user=self.user)
        self.assertEqual(readthrough.book, self.book)
        # I can't remember how to create dates and I don't want to look it up.
        self.assertEqual(readthrough.start_date.year, 2007)
        self.assertEqual(readthrough.start_date.month, 4)
        self.assertEqual(readthrough.start_date.day, 16)
        self.assertEqual(readthrough.finish_date.year, 2007)
        self.assertEqual(readthrough.finish_date.month, 5)
        self.assertEqual(readthrough.finish_date.day, 8)
    def test_handle_imported_book_already_shelved(self):
        ''' librarything import added a book, this adds related connections '''
        with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
            shelf = self.user.shelf_set.filter(identifier='to-read').first()
            models.ShelfBook.objects.create(
                shelf=shelf, user=self.user, book=self.book)
        import_job = models.ImportJob.objects.create(user=self.user)
        datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
        csv_file = open(datafile, 'r', encoding=self.importer.encoding)
        for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))):
            entry = self.importer.parse_fields(entry)
            import_item = models.ImportItem.objects.create(
                job_id=import_job.id, index=index, data=entry, book=self.book)
            break
        with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
            importer.handle_imported_book(
                self.importer.service, self.user, import_item, False, 'public')
        shelf.refresh_from_db()
        self.assertEqual(shelf.books.first(), self.book)
        self.assertIsNone(
            self.user.shelf_set.get(identifier='read').books.first())
        readthrough = models.ReadThrough.objects.get(user=self.user)
        self.assertEqual(readthrough.book, self.book)
        self.assertEqual(readthrough.start_date.year, 2007)
        self.assertEqual(readthrough.start_date.month, 4)
        self.assertEqual(readthrough.start_date.day, 16)
        self.assertEqual(readthrough.finish_date.year, 2007)
        self.assertEqual(readthrough.finish_date.month, 5)
        self.assertEqual(readthrough.finish_date.day, 8)
    def test_handle_import_twice(self):
        ''' re-importing books '''
        shelf = self.user.shelf_set.filter(identifier='read').first()
        import_job = models.ImportJob.objects.create(user=self.user)
        datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
        csv_file = open(datafile, 'r', encoding=self.importer.encoding)
        for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))):
            entry = self.importer.parse_fields(entry)
            import_item = models.ImportItem.objects.create(
                job_id=import_job.id, index=index, data=entry, book=self.book)
            break
        with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
            importer.handle_imported_book(
                self.importer.service, self.user, import_item, False, 'public')
            importer.handle_imported_book(
                self.importer.service, self.user, import_item, False, 'public')
        shelf.refresh_from_db()
        self.assertEqual(shelf.books.first(), self.book)
        readthrough = models.ReadThrough.objects.get(user=self.user)
        self.assertEqual(readthrough.book, self.book)
        # I can't remember how to create dates and I don't want to look it up.
        self.assertEqual(readthrough.start_date.year, 2007)
        self.assertEqual(readthrough.start_date.month, 4)
        self.assertEqual(readthrough.start_date.day, 16)
        self.assertEqual(readthrough.finish_date.year, 2007)
        self.assertEqual(readthrough.finish_date.month, 5)
        self.assertEqual(readthrough.finish_date.day, 8)
    def test_handle_imported_book_review(self):
        ''' librarything review import '''
        import_job = models.ImportJob.objects.create(user=self.user)
        datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
        csv_file = open(datafile, 'r', encoding=self.importer.encoding)
        entry = list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))[0]
        entry = self.importer.parse_fields(entry)
        import_item = models.ImportItem.objects.create(
            job_id=import_job.id, index=0, data=entry, book=self.book)
        with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
            importer.handle_imported_book(
                self.importer.service, self.user, import_item, True, 'unlisted')
        review = models.Review.objects.get(book=self.book, user=self.user)
        self.assertEqual(review.content, 'chef d\'oeuvre')
        self.assertEqual(review.rating, 5)
        self.assertEqual(review.published_date.year, 2007)
        self.assertEqual(review.published_date.month, 5)
        self.assertEqual(review.published_date.day, 8)
        self.assertEqual(review.privacy, 'unlisted')
    def test_handle_imported_book_reviews_disabled(self):
        ''' librarything review import '''
        import_job = models.ImportJob.objects.create(user=self.user)
        datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
        csv_file = open(datafile, 'r', encoding=self.importer.encoding)
        entry = list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))[2]
        entry = self.importer.parse_fields(entry)
        import_item = models.ImportItem.objects.create(
            job_id=import_job.id, index=0, data=entry, book=self.book)
        with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
            importer.handle_imported_book(
                self.importer.service, self.user, import_item, False, 'unlisted')
        self.assertFalse(models.Review.objects.filter(
            book=self.book, user=self.user
        ).exists())
--- a/bookwyrm/views/import_data.py
+++ b/bookwyrm/views/import_data.py
@ -9,7 +9,7 @@ from django.template.response import TemplateResponse
 from django.utils.decorators import method_decorator
 from django.views import View
-from bookwyrm import forms, goodreads_import, models
+from bookwyrm import forms, goodreads_import, librarything_import, models
 from bookwyrm.tasks import app
 # pylint: disable= no-self-use
@ -31,18 +31,29 @@ class Import(View):
        if form.is_valid():
            include_reviews = request.POST.get('include_reviews') == 'on'
            privacy = request.POST.get('privacy')
            source = request.POST.get('source')
            importer = None
            if source == 'LibraryThing':
                importer = librarything_import.LibrarythingImporter()
            else:
                # Default : GoodReads
                importer = goodreads_import.GoodreadsImporter()
            try:
-                job = goodreads_import.create_job(
+                job = importer.create_job(
                    request.user,
                    TextIOWrapper(
                        request.FILES['csv_file'],
-                        encoding=request.encoding),
+                        encoding=importer.encoding),
                    include_reviews,
                    privacy,
                )
            except (UnicodeDecodeError, ValueError):
                return HttpResponseBadRequest('Not a valid csv file')
-            goodreads_import.start_import(job)
+
            importer.start_import(job)
            return redirect('/import/%d' % job.id)
        return HttpResponseBadRequest()