librarything import

This commit is contained in:
Henri Bourcereau 2021-02-20 17:02:36 +01:00
parent dcd4baed82
commit b85fed3595
9 changed files with 510 additions and 152 deletions

View file

@ -1,121 +1,13 @@
''' handle reading a csv from goodreads ''' ''' handle reading a csv from goodreads '''
import csv from bookwyrm.importer import Importer
import logging
from bookwyrm import models # GoodReads is the default importer, thus Importer follows its structure. For a more complete example of overriding see librarything_import.py
from bookwyrm.models import ImportJob, ImportItem
from bookwyrm.tasks import app
logger = logging.getLogger(__name__) class GoodreadsImporter(Importer):
service = 'GoodReads'
def parse_fields(self, data):
def create_job(user, csv_file, include_reviews, privacy): data.update({'import_source': self.service })
''' check over a csv and creates a database entry for the job''' # add missing 'Date Started' field
job = ImportJob.objects.create( data.update({'Date Started': None })
user=user, return data
include_reviews=include_reviews,
privacy=privacy
)
for index, entry in enumerate(list(csv.DictReader(csv_file))):
if not all(x in entry for x in ('ISBN13', 'Title', 'Author')):
raise ValueError('Author, title, and isbn must be in data.')
ImportItem(job=job, index=index, data=entry).save()
return job
def create_retry_job(user, original_job, items):
''' retry items that didn't import '''
job = ImportJob.objects.create(
user=user,
include_reviews=original_job.include_reviews,
privacy=original_job.privacy,
retry=True
)
for item in items:
ImportItem(job=job, index=item.index, data=item.data).save()
return job
def start_import(job):
''' initalizes a csv import job '''
result = import_data.delay(job.id)
job.task_id = result.id
job.save()
@app.task
def import_data(job_id):
''' does the actual lookup work in a celery task '''
job = ImportJob.objects.get(id=job_id)
try:
for item in job.items.all():
try:
item.resolve()
except Exception as e:# pylint: disable=broad-except
logger.exception(e)
item.fail_reason = 'Error loading book'
item.save()
continue
if item.book:
item.save()
# shelves book and handles reviews
handle_imported_book(
job.user, item, job.include_reviews, job.privacy)
else:
item.fail_reason = 'Could not find a match for book'
item.save()
finally:
job.complete = True
job.save()
def handle_imported_book(user, item, include_reviews, privacy):
''' process a goodreads csv and then post about it '''
if isinstance(item.book, models.Work):
item.book = item.book.default_edition
if not item.book:
return
existing_shelf = models.ShelfBook.objects.filter(
book=item.book, user=user).exists()
# shelve the book if it hasn't been shelved already
if item.shelf and not existing_shelf:
desired_shelf = models.Shelf.objects.get(
identifier=item.shelf,
user=user
)
models.ShelfBook.objects.create(
book=item.book, shelf=desired_shelf, user=user)
for read in item.reads:
# check for an existing readthrough with the same dates
if models.ReadThrough.objects.filter(
user=user, book=item.book,
start_date=read.start_date,
finish_date=read.finish_date
).exists():
continue
read.book = item.book
read.user = user
read.save()
if include_reviews and (item.rating or item.review):
review_title = 'Review of {!r} on Goodreads'.format(
item.book.title,
) if item.review else ''
# we don't know the publication date of the review,
# but "now" is a bad guess
published_date_guess = item.date_read or item.date_added
models.Review.objects.create(
user=user,
book=item.book,
name=review_title,
content=item.review,
rating=item.rating,
published_date=published_date_guess,
privacy=privacy,
)

135
bookwyrm/importer.py Normal file
View file

@ -0,0 +1,135 @@
''' handle reading a csv from an external service, defaults are from GoodReads '''
import csv
import logging
from bookwyrm import models
from bookwyrm.models import ImportJob, ImportItem
from bookwyrm.tasks import app
logger = logging.getLogger(__name__)
class Importer:
service = 'Unknown'
delimiter = ','
encoding = 'UTF-8'
mandatory_fields = ['ISBN13', 'Title', 'Author']
def create_job(self, user, csv_file, include_reviews, privacy):
''' check over a csv and creates a database entry for the job'''
job = ImportJob.objects.create(
user=user,
include_reviews=include_reviews,
privacy=privacy
)
for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.delimiter ))):
if not all(x in entry for x in self.mandatory_fields):
raise ValueError('Author, title, and isbn must be in data.')
entry = self.parse_fields(entry)
self.save_item(job, index, entry)
return job
def save_item(self, job, index, data):
ImportItem(job=job, index=index, data=data).save()
def parse_fields(self, entry):
entry.update({'import_source': self.service })
return entry
def create_retry_job(self, user, original_job, items):
''' retry items that didn't import '''
job = ImportJob.objects.create(
user=user,
include_reviews=original_job.include_reviews,
privacy=original_job.privacy,
retry=True
)
for item in items:
self.save_item(job, item.index, item.data)
return job
def start_import(self, job):
''' initalizes a csv import job '''
result = import_data.delay(self.service, job.id)
job.task_id = result.id
job.save()
@app.task
def import_data(source, job_id):
''' does the actual lookup work in a celery task '''
job = ImportJob.objects.get(id=job_id)
try:
for item in job.items.all():
try:
item.resolve()
except Exception as e:# pylint: disable=broad-except
logger.exception(e)
item.fail_reason = 'Error loading book'
item.save()
continue
if item.book:
item.save()
# shelves book and handles reviews
handle_imported_book(source,
job.user, item, job.include_reviews, job.privacy)
else:
item.fail_reason = 'Could not find a match for book'
item.save()
finally:
job.complete = True
job.save()
def handle_imported_book(source, user, item, include_reviews, privacy):
''' process a csv and then post about it '''
if isinstance(item.book, models.Work):
item.book = item.book.default_edition
if not item.book:
return
existing_shelf = models.ShelfBook.objects.filter(
book=item.book, user=user).exists()
# shelve the book if it hasn't been shelved already
if item.shelf and not existing_shelf:
desired_shelf = models.Shelf.objects.get(
identifier=item.shelf,
user=user
)
models.ShelfBook.objects.create(
book=item.book, shelf=desired_shelf, user=user)
for read in item.reads:
# check for an existing readthrough with the same dates
if models.ReadThrough.objects.filter(
user=user, book=item.book,
start_date=read.start_date,
finish_date=read.finish_date
).exists():
continue
read.book = item.book
read.user = user
read.save()
if include_reviews and (item.rating or item.review):
review_title = 'Review of {!r} on {!r}'.format(
item.book.title,
source,
) if item.review else ''
# we don't know the publication date of the review,
# but "now" is a bad guess
published_date_guess = item.date_read or item.date_added
models.Review.objects.create(
user=user,
book=item.book,
name=review_title,
content=item.review,
rating=item.rating,
published_date=published_date_guess,
privacy=privacy,
)

View file

@ -0,0 +1,39 @@
''' handle reading a csv from librarything '''
import csv
import re
import math
from bookwyrm import models
from bookwyrm.models import ImportItem
from bookwyrm.importer import Importer
class LibrarythingImporter(Importer):
service = 'LibraryThing'
delimiter = '\t'
encoding = 'ISO-8859-1'
# mandatory_fields : fields matching the book ISBN13, title and author
mandatory_fields = ['ISBN', 'Title', 'Primary Author']
def parse_fields(self, initial):
data = {}
data['import_source'] = self.service
data['Book Id'] = initial['Book Id']
data['Title'] = initial['Title']
data['Author'] = initial['Primary Author']
data['ISBN13'] = initial['ISBN']
data['My Review'] = initial['Review']
data['My Rating'] = math.ceil(float(initial['Rating']))
data['Date Added'] = re.sub('\[|\]', '', initial['Entry Date'])
data['Date Started'] = re.sub('\[|\]', '', initial['Date Started'])
data['Date Read'] = re.sub('\[|\]', '', initial['Date Read'])
data['Exclusive Shelf'] = None
if data['Date Read']:
data['Exclusive Shelf'] = "read"
elif data['Date Started']:
data['Exclusive Shelf'] = "reading"
else:
data['Exclusive Shelf'] = "to-read"
return data

View file

@ -97,8 +97,8 @@ class ImportItem(models.Model):
def get_book_from_title_author(self): def get_book_from_title_author(self):
''' search by title and author ''' ''' search by title and author '''
search_term = construct_search_term( search_term = construct_search_term(
self.data['Title'], self.title,
self.data['Author'] self.author
) )
search_result = connector_manager.first_search_result( search_result = connector_manager.first_search_result(
search_term, min_confidence=0.999 search_term, min_confidence=0.999
@ -149,6 +149,14 @@ class ImportItem(models.Model):
dateutil.parser.parse(self.data['Date Added'])) dateutil.parser.parse(self.data['Date Added']))
return None return None
@property
def date_started(self):
''' when the book was started '''
if "Date Started" in self.data and self.data['Date Started']:
return timezone.make_aware(
dateutil.parser.parse(self.data['Date Started']))
return None
@property @property
def date_read(self): def date_read(self):
''' the date a book was completed ''' ''' the date a book was completed '''
@ -160,18 +168,24 @@ class ImportItem(models.Model):
@property @property
def reads(self): def reads(self):
''' formats a read through dataset for the book in this line ''' ''' formats a read through dataset for the book in this line '''
if (self.shelf == 'reading' start_date = self.date_started
and self.date_added and not self.date_read):
return [ReadThrough(start_date=self.date_added)] # Goodreads special case (no 'date started' field)
if ((self.shelf == 'reading' or (self.shelf == 'read' and self.date_read))
and self.date_added and not start_date):
start_date = self.date_added
if (start_date and start_date is not None and not self.date_read):
return [ReadThrough(start_date=start_date)]
if self.date_read: if self.date_read:
return [ReadThrough( return [ReadThrough(
start_date=self.date_added, start_date=start_date,
finish_date=self.date_read, finish_date=self.date_read,
)] )]
return [] return []
def __repr__(self): def __repr__(self):
return "<GoodreadsItem {!r}>".format(self.data['Title']) return "<{!r}Item {!r}>".format(self.data['import_source'], self.data['Title'])
def __str__(self): def __str__(self):
return "{} by {}".format(self.data['Title'], self.data['Author']) return "{} by {}".format(self.data['Title'], self.data['Author'])

View file

@ -2,9 +2,24 @@
{% load humanize %} {% load humanize %}
{% block content %} {% block content %}
<div class="block"> <div class="block">
<h1 class="title">Import Books from GoodReads</h1> <h1 class="title">Import Books</h1>
<form name="import" action="/import" method="post" enctype="multipart/form-data"> <form name="import" action="/import" method="post" enctype="multipart/form-data">
{% csrf_token %} {% csrf_token %}
<label class="label" for="source">
<p>Data source</p>
<div class="select {{ class }}">
<select name="source" id="source">
<option value="GoodReads" {% if current == 'LibraryThing' %}selected{% endif %}>
GoodReads
</option>
<option value="LibraryThing" {% if current == 'LibraryThing' %}selected{% endif %}>
LibraryThing
</option>
</select>
</div>
</label>
<div class="field"> <div class="field">
{{ import_form.as_p }} {{ import_form.as_p }}
</div> </div>

View file

@ -0,0 +1,4 @@
Book Id Title Sort Character Primary Author Primary Author Role Secondary Author Secondary Author Roles Publication Date Review Rating Comment Private Comment Summary Media Physical Description Weight Height Thickness Length Dimensions Page Count LCCN Acquired Date Started Date Read Barcode BCID Tags Collections Languages Original Languages LC Classification ISBN ISBNs Subjects Dewey Decimal Dewey Wording Other Call Number Copies Source Entry Date From Where OCLC Work id Lending Patron Lending Status Lending Start Lending End
5498194 Marelle 1 Cortázar, Julio Gallimard (1979), Poche 1979 chef d'oeuvre 4.5 Marelle by Julio Cortázar (1979) Broché 590 p.; 7.24 inches 1.28 pounds 7.24 inches 1.26 inches 4.96 inches 7.24 x 4.96 x 1.26 inches 590 [2007-04-16] [2007-05-08] roman, espagnol, expérimental, bohème, philosophie Your library French Spanish PQ7797 .C7145 [2070291340] 2070291340, 9782070291342 Cortâazar, Julio. Rayuela 863 Literature > Spanish And Portuguese > Spanish fiction 1 Amazon.fr [2006-08-09] 57814
5015319 Le grand incendie de Londres: Récit, avec incises et bifurcations, 1985-1987 (Fiction & Cie) 1 Roubaud, Jacques Seuil (1989), Unknown Binding 1989 5 Le grand incendie de Londres: Récit, avec incises et bifurcations, 1985-1987 (Fiction & Cie) by Jacques Roubaud (1989) Broché 411 p.; 7.72 inches 0.88 pounds 7.72 inches 1.02 inches 5.43 inches 7.72 x 5.43 x 1.02 inches 411 Your library English PQ2678 .O77 [2020104725] 2020104725, 9782020104722 Autobiographical fiction|Roubaud, Jacques > Fiction 813 American And Canadian > Fiction > Literature 1 Amazon.com [2006-07-25] 478910
5015399 Le Maître et Marguerite 1 Boulgakov, Mikhaïl Pocket (1994), Poche 1994 5 Le Maître et Marguerite by Mikhaïl Boulgakov (1994) Broché 579 p.; 7.09 inches 0.66 pounds 7.09 inches 1.18 inches 4.33 inches 7.09 x 4.33 x 1.18 inches 579 Your library French PG3476 .B78 [2266062328] 2266062328, 9782266062329 Allegories|Bulgakov|Good and evil > Fiction|Humanities|Jerusalem > Fiction|Jesus Christ > Fiction|Literature|Mental illness > Fiction|Moscow (Russia) > Fiction|Novel|Pilate, Pontius, 1st cent. > Fiction|Political fiction|Russia > Fiction|Russian fiction|Russian publications (Form Entry)|Soviet Union > History > 1925-1953 > Fiction|literature 891.7342 1917-1945 > 1917-1991 (USSR) > Literature > Literature of other Indo-European languages > Other Languages > Russian > Russian Fiction 1 Amazon.fr [2006-07-25] 10151
1 Book Id Title Sort Character Primary Author Primary Author Role Secondary Author Secondary Author Roles Publication Date Review Rating Comment Private Comment Summary Media Physical Description Weight Height Thickness Length Dimensions Page Count LCCN Acquired Date Started Date Read Barcode BCID Tags Collections Languages Original Languages LC Classification ISBN ISBNs Subjects Dewey Decimal Dewey Wording Other Call Number Copies Source Entry Date From Where OCLC Work id Lending Patron Lending Status Lending Start Lending End
2 5498194 Marelle 1 Cortázar, Julio Gallimard (1979), Poche 1979 chef d'oeuvre 4.5 Marelle by Julio Cortázar (1979) Broché 590 p.; 7.24 inches 1.28 pounds 7.24 inches 1.26 inches 4.96 inches 7.24 x 4.96 x 1.26 inches 590 [2007-04-16] [2007-05-08] roman, espagnol, expérimental, bohème, philosophie Your library French Spanish PQ7797 .C7145 [2070291340] 2070291340, 9782070291342 Cortâazar, Julio. Rayuela 863 Literature > Spanish And Portuguese > Spanish fiction 1 Amazon.fr [2006-08-09] 57814
3 5015319 Le grand incendie de Londres: Récit, avec incises et bifurcations, 1985-1987 (Fiction & Cie) 1 Roubaud, Jacques Seuil (1989), Unknown Binding 1989 5 Le grand incendie de Londres: Récit, avec incises et bifurcations, 1985-1987 (Fiction & Cie) by Jacques Roubaud (1989) Broché 411 p.; 7.72 inches 0.88 pounds 7.72 inches 1.02 inches 5.43 inches 7.72 x 5.43 x 1.02 inches 411 Your library English PQ2678 .O77 [2020104725] 2020104725, 9782020104722 Autobiographical fiction|Roubaud, Jacques > Fiction 813 American And Canadian > Fiction > Literature 1 Amazon.com [2006-07-25] 478910
4 5015399 Le Maître et Marguerite 1 Boulgakov, Mikhaïl Pocket (1994), Poche 1994 5 Le Maître et Marguerite by Mikhaïl Boulgakov (1994) Broché 579 p.; 7.09 inches 0.66 pounds 7.09 inches 1.18 inches 4.33 inches 7.09 x 4.33 x 1.18 inches 579 Your library French PG3476 .B78 [2266062328] 2266062328, 9782266062329 Allegories|Bulgakov|Good and evil > Fiction|Humanities|Jerusalem > Fiction|Jesus Christ > Fiction|Literature|Mental illness > Fiction|Moscow (Russia) > Fiction|Novel|Pilate, Pontius, 1st cent. > Fiction|Political fiction|Russia > Fiction|Russian fiction|Russian publications (Form Entry)|Soviet Union > History > 1925-1953 > Fiction|literature 891.7342 1917-1945 > 1917-1991 (USSR) > Literature > Literature of other Indo-European languages > Other Languages > Russian > Russian Fiction 1 Amazon.fr [2006-07-25] 10151

View file

@ -7,16 +7,19 @@ from unittest.mock import patch
from django.test import TestCase from django.test import TestCase
import responses import responses
from bookwyrm import goodreads_import, models from bookwyrm import models, importer
from bookwyrm.goodreads_import import GoodreadsImporter
from bookwyrm import importer
from bookwyrm.settings import DOMAIN from bookwyrm.settings import DOMAIN
class GoodreadsImport(TestCase): class GoodreadsImport(TestCase):
''' importing from goodreads csv ''' ''' importing from goodreads csv '''
def setUp(self): def setUp(self):
self.importer = GoodreadsImporter()
''' use a test csv ''' ''' use a test csv '''
datafile = pathlib.Path(__file__).parent.joinpath( datafile = pathlib.Path(__file__).parent.joinpath(
'data/goodreads.csv') 'data/goodreads.csv')
self.csv = open(datafile, 'r') self.csv = open(datafile, 'r', encoding=self.importer.encoding)
self.user = models.User.objects.create_user( self.user = models.User.objects.create_user(
'mouse', 'mouse@mouse.mouse', 'password', local=True) 'mouse', 'mouse@mouse.mouse', 'password', local=True)
@ -41,7 +44,7 @@ class GoodreadsImport(TestCase):
def test_create_job(self): def test_create_job(self):
''' creates the import job entry and checks csv ''' ''' creates the import job entry and checks csv '''
import_job = goodreads_import.create_job( import_job = self.importer.create_job(
self.user, self.csv, False, 'public') self.user, self.csv, False, 'public')
self.assertEqual(import_job.user, self.user) self.assertEqual(import_job.user, self.user)
self.assertEqual(import_job.include_reviews, False) self.assertEqual(import_job.include_reviews, False)
@ -59,13 +62,13 @@ class GoodreadsImport(TestCase):
def test_create_retry_job(self): def test_create_retry_job(self):
''' trying again with items that didn't import ''' ''' trying again with items that didn't import '''
import_job = goodreads_import.create_job( import_job = self.importer.create_job(
self.user, self.csv, False, 'unlisted') self.user, self.csv, False, 'unlisted')
import_items = models.ImportItem.objects.filter( import_items = models.ImportItem.objects.filter(
job=import_job job=import_job
).all()[:2] ).all()[:2]
retry = goodreads_import.create_retry_job( retry = self.importer.create_retry_job(
self.user, import_job, import_items) self.user, import_job, import_items)
self.assertNotEqual(import_job, retry) self.assertNotEqual(import_job, retry)
self.assertEqual(retry.user, self.user) self.assertEqual(retry.user, self.user)
@ -82,13 +85,13 @@ class GoodreadsImport(TestCase):
def test_start_import(self): def test_start_import(self):
''' begin loading books ''' ''' begin loading books '''
import_job = goodreads_import.create_job( import_job = self.importer.create_job(
self.user, self.csv, False, 'unlisted') self.user, self.csv, False, 'unlisted')
MockTask = namedtuple('Task', ('id')) MockTask = namedtuple('Task', ('id'))
mock_task = MockTask(7) mock_task = MockTask(7)
with patch('bookwyrm.goodreads_import.import_data.delay') as start: with patch('bookwyrm.importer.import_data.delay') as start:
start.return_value = mock_task start.return_value = mock_task
goodreads_import.start_import(import_job) self.importer.start_import(import_job)
import_job.refresh_from_db() import_job.refresh_from_db()
self.assertEqual(import_job.task_id, '7') self.assertEqual(import_job.task_id, '7')
@ -96,7 +99,7 @@ class GoodreadsImport(TestCase):
@responses.activate @responses.activate
def test_import_data(self): def test_import_data(self):
''' resolve entry ''' ''' resolve entry '''
import_job = goodreads_import.create_job( import_job = self.importer.create_job(
self.user, self.csv, False, 'unlisted') self.user, self.csv, False, 'unlisted')
book = models.Edition.objects.create(title='Test Book') book = models.Edition.objects.create(title='Test Book')
@ -104,8 +107,8 @@ class GoodreadsImport(TestCase):
'bookwyrm.models.import_job.ImportItem.get_book_from_isbn' 'bookwyrm.models.import_job.ImportItem.get_book_from_isbn'
) as resolve: ) as resolve:
resolve.return_value = book resolve.return_value = book
with patch('bookwyrm.goodreads_import.handle_imported_book'): with patch('bookwyrm.importer.handle_imported_book'):
goodreads_import.import_data(import_job.id) importer.import_data(self.importer.service, import_job.id)
import_item = models.ImportItem.objects.get(job=import_job, index=0) import_item = models.ImportItem.objects.get(job=import_job, index=0)
self.assertEqual(import_item.book.id, book.id) self.assertEqual(import_item.book.id, book.id)
@ -120,13 +123,14 @@ class GoodreadsImport(TestCase):
datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv') datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
csv_file = open(datafile, 'r') csv_file = open(datafile, 'r')
for index, entry in enumerate(list(csv.DictReader(csv_file))): for index, entry in enumerate(list(csv.DictReader(csv_file))):
entry = self.importer.parse_fields(entry)
import_item = models.ImportItem.objects.create( import_item = models.ImportItem.objects.create(
job_id=import_job.id, index=index, data=entry, book=self.book) job_id=import_job.id, index=index, data=entry, book=self.book)
break break
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
goodreads_import.handle_imported_book( importer.handle_imported_book(
self.user, import_item, False, 'public') self.importer.service, self.user, import_item, False, 'public')
shelf.refresh_from_db() shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book) self.assertEqual(shelf.books.first(), self.book)
@ -153,13 +157,14 @@ class GoodreadsImport(TestCase):
datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv') datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
csv_file = open(datafile, 'r') csv_file = open(datafile, 'r')
for index, entry in enumerate(list(csv.DictReader(csv_file))): for index, entry in enumerate(list(csv.DictReader(csv_file))):
entry = self.importer.parse_fields(entry)
import_item = models.ImportItem.objects.create( import_item = models.ImportItem.objects.create(
job_id=import_job.id, index=index, data=entry, book=self.book) job_id=import_job.id, index=index, data=entry, book=self.book)
break break
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
goodreads_import.handle_imported_book( importer.handle_imported_book(
self.user, import_item, False, 'public') self.importer.service, self.user, import_item, False, 'public')
shelf.refresh_from_db() shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book) self.assertEqual(shelf.books.first(), self.book)
@ -182,15 +187,16 @@ class GoodreadsImport(TestCase):
datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv') datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
csv_file = open(datafile, 'r') csv_file = open(datafile, 'r')
for index, entry in enumerate(list(csv.DictReader(csv_file))): for index, entry in enumerate(list(csv.DictReader(csv_file))):
entry = self.importer.parse_fields(entry)
import_item = models.ImportItem.objects.create( import_item = models.ImportItem.objects.create(
job_id=import_job.id, index=index, data=entry, book=self.book) job_id=import_job.id, index=index, data=entry, book=self.book)
break break
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
goodreads_import.handle_imported_book( importer.handle_imported_book(
self.user, import_item, False, 'public') self.importer.service, self.user, import_item, False, 'public')
goodreads_import.handle_imported_book( importer.handle_imported_book(
self.user, import_item, False, 'public') self.importer.service, self.user, import_item, False, 'public')
shelf.refresh_from_db() shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book) self.assertEqual(shelf.books.first(), self.book)
@ -212,12 +218,13 @@ class GoodreadsImport(TestCase):
datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv') datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
csv_file = open(datafile, 'r') csv_file = open(datafile, 'r')
entry = list(csv.DictReader(csv_file))[2] entry = list(csv.DictReader(csv_file))[2]
entry = self.importer.parse_fields(entry)
import_item = models.ImportItem.objects.create( import_item = models.ImportItem.objects.create(
job_id=import_job.id, index=0, data=entry, book=self.book) job_id=import_job.id, index=0, data=entry, book=self.book)
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
goodreads_import.handle_imported_book( importer.handle_imported_book(
self.user, import_item, True, 'unlisted') self.importer.service, self.user, import_item, True, 'unlisted')
review = models.Review.objects.get(book=self.book, user=self.user) review = models.Review.objects.get(book=self.book, user=self.user)
self.assertEqual(review.content, 'mixed feelings') self.assertEqual(review.content, 'mixed feelings')
self.assertEqual(review.rating, 2) self.assertEqual(review.rating, 2)
@ -233,12 +240,13 @@ class GoodreadsImport(TestCase):
datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv') datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
csv_file = open(datafile, 'r') csv_file = open(datafile, 'r')
entry = list(csv.DictReader(csv_file))[2] entry = list(csv.DictReader(csv_file))[2]
entry = self.importer.parse_fields(entry)
import_item = models.ImportItem.objects.create( import_item = models.ImportItem.objects.create(
job_id=import_job.id, index=0, data=entry, book=self.book) job_id=import_job.id, index=0, data=entry, book=self.book)
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
goodreads_import.handle_imported_book( importer.handle_imported_book(
self.user, import_item, False, 'unlisted') self.importer.service, self.user, import_item, False, 'unlisted')
self.assertFalse(models.Review.objects.filter( self.assertFalse(models.Review.objects.filter(
book=self.book, user=self.user book=self.book, user=self.user
).exists()) ).exists())

View file

@ -0,0 +1,240 @@
''' testing import '''
from collections import namedtuple
import csv
import pathlib
from unittest.mock import patch
from django.test import TestCase
import responses
from bookwyrm import models, importer
from bookwyrm.librarything_import import LibrarythingImporter
from bookwyrm.settings import DOMAIN
class LibrarythingImport(TestCase):
''' importing from librarything tsv '''
def setUp(self):
self.importer = LibrarythingImporter()
''' use a test tsv '''
datafile = pathlib.Path(__file__).parent.joinpath(
'data/librarything.tsv')
# Librarything generates latin encoded exports...
self.csv = open(datafile, 'r', encoding=self.importer.encoding)
self.user = models.User.objects.create_user(
'mmai', 'mmai@mmai.mmai', 'password', local=True)
models.Connector.objects.create(
identifier=DOMAIN,
name='Local',
local=True,
connector_file='self_connector',
base_url='https://%s' % DOMAIN,
books_url='https://%s/book' % DOMAIN,
covers_url='https://%s/images/covers' % DOMAIN,
search_url='https://%s/search?q=' % DOMAIN,
priority=1,
)
work = models.Work.objects.create(title='Test Work')
self.book = models.Edition.objects.create(
title='Example Edition',
remote_id='https://example.com/book/1',
parent_work=work
)
def test_create_job(self):
''' creates the import job entry and checks csv '''
import_job = self.importer.create_job(
self.user, self.csv, False, 'public')
self.assertEqual(import_job.user, self.user)
self.assertEqual(import_job.include_reviews, False)
self.assertEqual(import_job.privacy, 'public')
import_items = models.ImportItem.objects.filter(job=import_job).all()
self.assertEqual(len(import_items), 3)
self.assertEqual(import_items[0].index, 0)
self.assertEqual(import_items[0].data['Book Id'], '5498194')
self.assertEqual(import_items[1].index, 1)
self.assertEqual(import_items[1].data['Book Id'], '5015319')
self.assertEqual(import_items[2].index, 2)
self.assertEqual(import_items[2].data['Book Id'], '5015399')
def test_create_retry_job(self):
''' trying again with items that didn't import '''
import_job = self.importer.create_job(
self.user, self.csv, False, 'unlisted')
import_items = models.ImportItem.objects.filter(
job=import_job
).all()[:2]
retry = self.importer.create_retry_job(
self.user, import_job, import_items)
self.assertNotEqual(import_job, retry)
self.assertEqual(retry.user, self.user)
self.assertEqual(retry.include_reviews, False)
self.assertEqual(retry.privacy, 'unlisted')
retry_items = models.ImportItem.objects.filter(job=retry).all()
self.assertEqual(len(retry_items), 2)
self.assertEqual(retry_items[0].index, 0)
self.assertEqual(import_items[0].data['Book Id'], '5498194')
self.assertEqual(retry_items[1].index, 1)
self.assertEqual(retry_items[1].data['Book Id'], '5015319')
@responses.activate
def test_import_data(self):
''' resolve entry '''
import_job = self.importer.create_job(
self.user, self.csv, False, 'unlisted')
book = models.Edition.objects.create(title='Test Book')
with patch(
'bookwyrm.models.import_job.ImportItem.get_book_from_isbn'
) as resolve:
resolve.return_value = book
with patch('bookwyrm.importer.handle_imported_book'):
importer.import_data(self.importer.service, import_job.id)
import_item = models.ImportItem.objects.get(job=import_job, index=0)
self.assertEqual(import_item.book.id, book.id)
def test_handle_imported_book(self):
''' librarything import added a book, this adds related connections '''
shelf = self.user.shelf_set.filter(identifier='read').first()
self.assertIsNone(shelf.books.first())
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
csv_file = open(datafile, 'r', encoding=self.importer.encoding)
for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))):
entry = self.importer.parse_fields(entry)
import_item = models.ImportItem.objects.create(
job_id=import_job.id, index=index, data=entry, book=self.book)
break
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
importer.handle_imported_book(
self.importer.service, self.user, import_item, False, 'public')
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
readthrough = models.ReadThrough.objects.get(user=self.user)
self.assertEqual(readthrough.book, self.book)
# I can't remember how to create dates and I don't want to look it up.
self.assertEqual(readthrough.start_date.year, 2007)
self.assertEqual(readthrough.start_date.month, 4)
self.assertEqual(readthrough.start_date.day, 16)
self.assertEqual(readthrough.finish_date.year, 2007)
self.assertEqual(readthrough.finish_date.month, 5)
self.assertEqual(readthrough.finish_date.day, 8)
def test_handle_imported_book_already_shelved(self):
''' librarything import added a book, this adds related connections '''
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
shelf = self.user.shelf_set.filter(identifier='to-read').first()
models.ShelfBook.objects.create(
shelf=shelf, user=self.user, book=self.book)
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
csv_file = open(datafile, 'r', encoding=self.importer.encoding)
for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))):
entry = self.importer.parse_fields(entry)
import_item = models.ImportItem.objects.create(
job_id=import_job.id, index=index, data=entry, book=self.book)
break
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
importer.handle_imported_book(
self.importer.service, self.user, import_item, False, 'public')
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
self.assertIsNone(
self.user.shelf_set.get(identifier='read').books.first())
readthrough = models.ReadThrough.objects.get(user=self.user)
self.assertEqual(readthrough.book, self.book)
self.assertEqual(readthrough.start_date.year, 2007)
self.assertEqual(readthrough.start_date.month, 4)
self.assertEqual(readthrough.start_date.day, 16)
self.assertEqual(readthrough.finish_date.year, 2007)
self.assertEqual(readthrough.finish_date.month, 5)
self.assertEqual(readthrough.finish_date.day, 8)
def test_handle_import_twice(self):
''' re-importing books '''
shelf = self.user.shelf_set.filter(identifier='read').first()
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
csv_file = open(datafile, 'r', encoding=self.importer.encoding)
for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))):
entry = self.importer.parse_fields(entry)
import_item = models.ImportItem.objects.create(
job_id=import_job.id, index=index, data=entry, book=self.book)
break
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
importer.handle_imported_book(
self.importer.service, self.user, import_item, False, 'public')
importer.handle_imported_book(
self.importer.service, self.user, import_item, False, 'public')
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
readthrough = models.ReadThrough.objects.get(user=self.user)
self.assertEqual(readthrough.book, self.book)
# I can't remember how to create dates and I don't want to look it up.
self.assertEqual(readthrough.start_date.year, 2007)
self.assertEqual(readthrough.start_date.month, 4)
self.assertEqual(readthrough.start_date.day, 16)
self.assertEqual(readthrough.finish_date.year, 2007)
self.assertEqual(readthrough.finish_date.month, 5)
self.assertEqual(readthrough.finish_date.day, 8)
def test_handle_imported_book_review(self):
''' librarything review import '''
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
csv_file = open(datafile, 'r', encoding=self.importer.encoding)
entry = list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))[0]
entry = self.importer.parse_fields(entry)
import_item = models.ImportItem.objects.create(
job_id=import_job.id, index=0, data=entry, book=self.book)
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
importer.handle_imported_book(
self.importer.service, self.user, import_item, True, 'unlisted')
review = models.Review.objects.get(book=self.book, user=self.user)
self.assertEqual(review.content, 'chef d\'oeuvre')
self.assertEqual(review.rating, 5)
self.assertEqual(review.published_date.year, 2007)
self.assertEqual(review.published_date.month, 5)
self.assertEqual(review.published_date.day, 8)
self.assertEqual(review.privacy, 'unlisted')
def test_handle_imported_book_reviews_disabled(self):
''' librarything review import '''
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
csv_file = open(datafile, 'r', encoding=self.importer.encoding)
entry = list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))[2]
entry = self.importer.parse_fields(entry)
import_item = models.ImportItem.objects.create(
job_id=import_job.id, index=0, data=entry, book=self.book)
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
importer.handle_imported_book(
self.importer.service, self.user, import_item, False, 'unlisted')
self.assertFalse(models.Review.objects.filter(
book=self.book, user=self.user
).exists())

View file

@ -9,7 +9,7 @@ from django.template.response import TemplateResponse
from django.utils.decorators import method_decorator from django.utils.decorators import method_decorator
from django.views import View from django.views import View
from bookwyrm import forms, goodreads_import, models from bookwyrm import forms, goodreads_import, librarything_import, models
from bookwyrm.tasks import app from bookwyrm.tasks import app
# pylint: disable= no-self-use # pylint: disable= no-self-use
@ -31,18 +31,29 @@ class Import(View):
if form.is_valid(): if form.is_valid():
include_reviews = request.POST.get('include_reviews') == 'on' include_reviews = request.POST.get('include_reviews') == 'on'
privacy = request.POST.get('privacy') privacy = request.POST.get('privacy')
source = request.POST.get('source')
importer = None
if source == 'LibraryThing':
importer = librarything_import.LibrarythingImporter()
else:
# Default : GoodReads
importer = goodreads_import.GoodreadsImporter()
try: try:
job = goodreads_import.create_job( job = importer.create_job(
request.user, request.user,
TextIOWrapper( TextIOWrapper(
request.FILES['csv_file'], request.FILES['csv_file'],
encoding=request.encoding), encoding=importer.encoding),
include_reviews, include_reviews,
privacy, privacy,
) )
except (UnicodeDecodeError, ValueError): except (UnicodeDecodeError, ValueError):
return HttpResponseBadRequest('Not a valid csv file') return HttpResponseBadRequest('Not a valid csv file')
goodreads_import.start_import(job)
importer.start_import(job)
return redirect('/import/%d' % job.id) return redirect('/import/%d' % job.id)
return HttpResponseBadRequest() return HttpResponseBadRequest()