mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-26 11:31:08 +00:00
librarything import
This commit is contained in:
parent
dcd4baed82
commit
b85fed3595
9 changed files with 510 additions and 152 deletions
|
@ -1,121 +1,13 @@
|
|||
''' handle reading a csv from goodreads '''
|
||||
import csv
|
||||
import logging
|
||||
from bookwyrm.importer import Importer
|
||||
|
||||
from bookwyrm import models
|
||||
from bookwyrm.models import ImportJob, ImportItem
|
||||
from bookwyrm.tasks import app
|
||||
# GoodReads is the default importer, thus Importer follows its structure. For a more complete example of overriding see librarything_import.py
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
class GoodreadsImporter(Importer):
|
||||
service = 'GoodReads'
|
||||
|
||||
|
||||
def create_job(user, csv_file, include_reviews, privacy):
|
||||
''' check over a csv and creates a database entry for the job'''
|
||||
job = ImportJob.objects.create(
|
||||
user=user,
|
||||
include_reviews=include_reviews,
|
||||
privacy=privacy
|
||||
)
|
||||
for index, entry in enumerate(list(csv.DictReader(csv_file))):
|
||||
if not all(x in entry for x in ('ISBN13', 'Title', 'Author')):
|
||||
raise ValueError('Author, title, and isbn must be in data.')
|
||||
ImportItem(job=job, index=index, data=entry).save()
|
||||
return job
|
||||
|
||||
|
||||
def create_retry_job(user, original_job, items):
|
||||
''' retry items that didn't import '''
|
||||
job = ImportJob.objects.create(
|
||||
user=user,
|
||||
include_reviews=original_job.include_reviews,
|
||||
privacy=original_job.privacy,
|
||||
retry=True
|
||||
)
|
||||
for item in items:
|
||||
ImportItem(job=job, index=item.index, data=item.data).save()
|
||||
return job
|
||||
|
||||
|
||||
def start_import(job):
|
||||
''' initalizes a csv import job '''
|
||||
result = import_data.delay(job.id)
|
||||
job.task_id = result.id
|
||||
job.save()
|
||||
|
||||
|
||||
@app.task
|
||||
def import_data(job_id):
|
||||
''' does the actual lookup work in a celery task '''
|
||||
job = ImportJob.objects.get(id=job_id)
|
||||
try:
|
||||
for item in job.items.all():
|
||||
try:
|
||||
item.resolve()
|
||||
except Exception as e:# pylint: disable=broad-except
|
||||
logger.exception(e)
|
||||
item.fail_reason = 'Error loading book'
|
||||
item.save()
|
||||
continue
|
||||
|
||||
if item.book:
|
||||
item.save()
|
||||
|
||||
# shelves book and handles reviews
|
||||
handle_imported_book(
|
||||
job.user, item, job.include_reviews, job.privacy)
|
||||
else:
|
||||
item.fail_reason = 'Could not find a match for book'
|
||||
item.save()
|
||||
finally:
|
||||
job.complete = True
|
||||
job.save()
|
||||
|
||||
|
||||
def handle_imported_book(user, item, include_reviews, privacy):
|
||||
''' process a goodreads csv and then post about it '''
|
||||
if isinstance(item.book, models.Work):
|
||||
item.book = item.book.default_edition
|
||||
if not item.book:
|
||||
return
|
||||
|
||||
existing_shelf = models.ShelfBook.objects.filter(
|
||||
book=item.book, user=user).exists()
|
||||
|
||||
# shelve the book if it hasn't been shelved already
|
||||
if item.shelf and not existing_shelf:
|
||||
desired_shelf = models.Shelf.objects.get(
|
||||
identifier=item.shelf,
|
||||
user=user
|
||||
)
|
||||
models.ShelfBook.objects.create(
|
||||
book=item.book, shelf=desired_shelf, user=user)
|
||||
|
||||
for read in item.reads:
|
||||
# check for an existing readthrough with the same dates
|
||||
if models.ReadThrough.objects.filter(
|
||||
user=user, book=item.book,
|
||||
start_date=read.start_date,
|
||||
finish_date=read.finish_date
|
||||
).exists():
|
||||
continue
|
||||
read.book = item.book
|
||||
read.user = user
|
||||
read.save()
|
||||
|
||||
if include_reviews and (item.rating or item.review):
|
||||
review_title = 'Review of {!r} on Goodreads'.format(
|
||||
item.book.title,
|
||||
) if item.review else ''
|
||||
|
||||
# we don't know the publication date of the review,
|
||||
# but "now" is a bad guess
|
||||
published_date_guess = item.date_read or item.date_added
|
||||
models.Review.objects.create(
|
||||
user=user,
|
||||
book=item.book,
|
||||
name=review_title,
|
||||
content=item.review,
|
||||
rating=item.rating,
|
||||
published_date=published_date_guess,
|
||||
privacy=privacy,
|
||||
)
|
||||
def parse_fields(self, data):
|
||||
data.update({'import_source': self.service })
|
||||
# add missing 'Date Started' field
|
||||
data.update({'Date Started': None })
|
||||
return data
|
||||
|
|
135
bookwyrm/importer.py
Normal file
135
bookwyrm/importer.py
Normal file
|
@ -0,0 +1,135 @@
|
|||
''' handle reading a csv from an external service, defaults are from GoodReads '''
|
||||
import csv
|
||||
import logging
|
||||
|
||||
from bookwyrm import models
|
||||
from bookwyrm.models import ImportJob, ImportItem
|
||||
from bookwyrm.tasks import app
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Importer:
|
||||
service = 'Unknown'
|
||||
delimiter = ','
|
||||
encoding = 'UTF-8'
|
||||
mandatory_fields = ['ISBN13', 'Title', 'Author']
|
||||
|
||||
def create_job(self, user, csv_file, include_reviews, privacy):
|
||||
''' check over a csv and creates a database entry for the job'''
|
||||
job = ImportJob.objects.create(
|
||||
user=user,
|
||||
include_reviews=include_reviews,
|
||||
privacy=privacy
|
||||
)
|
||||
for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.delimiter ))):
|
||||
if not all(x in entry for x in self.mandatory_fields):
|
||||
raise ValueError('Author, title, and isbn must be in data.')
|
||||
entry = self.parse_fields(entry)
|
||||
self.save_item(job, index, entry)
|
||||
return job
|
||||
|
||||
|
||||
def save_item(self, job, index, data):
|
||||
ImportItem(job=job, index=index, data=data).save()
|
||||
|
||||
def parse_fields(self, entry):
|
||||
entry.update({'import_source': self.service })
|
||||
return entry
|
||||
|
||||
def create_retry_job(self, user, original_job, items):
|
||||
''' retry items that didn't import '''
|
||||
job = ImportJob.objects.create(
|
||||
user=user,
|
||||
include_reviews=original_job.include_reviews,
|
||||
privacy=original_job.privacy,
|
||||
retry=True
|
||||
)
|
||||
for item in items:
|
||||
self.save_item(job, item.index, item.data)
|
||||
return job
|
||||
|
||||
|
||||
def start_import(self, job):
|
||||
''' initalizes a csv import job '''
|
||||
result = import_data.delay(self.service, job.id)
|
||||
job.task_id = result.id
|
||||
job.save()
|
||||
|
||||
|
||||
@app.task
|
||||
def import_data(source, job_id):
|
||||
''' does the actual lookup work in a celery task '''
|
||||
job = ImportJob.objects.get(id=job_id)
|
||||
try:
|
||||
for item in job.items.all():
|
||||
try:
|
||||
item.resolve()
|
||||
except Exception as e:# pylint: disable=broad-except
|
||||
logger.exception(e)
|
||||
item.fail_reason = 'Error loading book'
|
||||
item.save()
|
||||
continue
|
||||
|
||||
if item.book:
|
||||
item.save()
|
||||
|
||||
# shelves book and handles reviews
|
||||
handle_imported_book(source,
|
||||
job.user, item, job.include_reviews, job.privacy)
|
||||
else:
|
||||
item.fail_reason = 'Could not find a match for book'
|
||||
item.save()
|
||||
finally:
|
||||
job.complete = True
|
||||
job.save()
|
||||
|
||||
|
||||
def handle_imported_book(source, user, item, include_reviews, privacy):
|
||||
''' process a csv and then post about it '''
|
||||
if isinstance(item.book, models.Work):
|
||||
item.book = item.book.default_edition
|
||||
if not item.book:
|
||||
return
|
||||
|
||||
existing_shelf = models.ShelfBook.objects.filter(
|
||||
book=item.book, user=user).exists()
|
||||
|
||||
# shelve the book if it hasn't been shelved already
|
||||
if item.shelf and not existing_shelf:
|
||||
desired_shelf = models.Shelf.objects.get(
|
||||
identifier=item.shelf,
|
||||
user=user
|
||||
)
|
||||
models.ShelfBook.objects.create(
|
||||
book=item.book, shelf=desired_shelf, user=user)
|
||||
|
||||
for read in item.reads:
|
||||
# check for an existing readthrough with the same dates
|
||||
if models.ReadThrough.objects.filter(
|
||||
user=user, book=item.book,
|
||||
start_date=read.start_date,
|
||||
finish_date=read.finish_date
|
||||
).exists():
|
||||
continue
|
||||
read.book = item.book
|
||||
read.user = user
|
||||
read.save()
|
||||
|
||||
if include_reviews and (item.rating or item.review):
|
||||
review_title = 'Review of {!r} on {!r}'.format(
|
||||
item.book.title,
|
||||
source,
|
||||
) if item.review else ''
|
||||
|
||||
# we don't know the publication date of the review,
|
||||
# but "now" is a bad guess
|
||||
published_date_guess = item.date_read or item.date_added
|
||||
models.Review.objects.create(
|
||||
user=user,
|
||||
book=item.book,
|
||||
name=review_title,
|
||||
content=item.review,
|
||||
rating=item.rating,
|
||||
published_date=published_date_guess,
|
||||
privacy=privacy,
|
||||
)
|
39
bookwyrm/librarything_import.py
Normal file
39
bookwyrm/librarything_import.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
''' handle reading a csv from librarything '''
|
||||
import csv
|
||||
import re
|
||||
import math
|
||||
|
||||
from bookwyrm import models
|
||||
from bookwyrm.models import ImportItem
|
||||
from bookwyrm.importer import Importer
|
||||
|
||||
|
||||
class LibrarythingImporter(Importer):
|
||||
service = 'LibraryThing'
|
||||
delimiter = '\t'
|
||||
encoding = 'ISO-8859-1'
|
||||
# mandatory_fields : fields matching the book ISBN13, title and author
|
||||
mandatory_fields = ['ISBN', 'Title', 'Primary Author']
|
||||
|
||||
def parse_fields(self, initial):
|
||||
data = {}
|
||||
data['import_source'] = self.service
|
||||
data['Book Id'] = initial['Book Id']
|
||||
data['Title'] = initial['Title']
|
||||
data['Author'] = initial['Primary Author']
|
||||
data['ISBN13'] = initial['ISBN']
|
||||
data['My Review'] = initial['Review']
|
||||
data['My Rating'] = math.ceil(float(initial['Rating']))
|
||||
data['Date Added'] = re.sub('\[|\]', '', initial['Entry Date'])
|
||||
data['Date Started'] = re.sub('\[|\]', '', initial['Date Started'])
|
||||
data['Date Read'] = re.sub('\[|\]', '', initial['Date Read'])
|
||||
|
||||
data['Exclusive Shelf'] = None
|
||||
if data['Date Read']:
|
||||
data['Exclusive Shelf'] = "read"
|
||||
elif data['Date Started']:
|
||||
data['Exclusive Shelf'] = "reading"
|
||||
else:
|
||||
data['Exclusive Shelf'] = "to-read"
|
||||
|
||||
return data
|
|
@ -97,8 +97,8 @@ class ImportItem(models.Model):
|
|||
def get_book_from_title_author(self):
|
||||
''' search by title and author '''
|
||||
search_term = construct_search_term(
|
||||
self.data['Title'],
|
||||
self.data['Author']
|
||||
self.title,
|
||||
self.author
|
||||
)
|
||||
search_result = connector_manager.first_search_result(
|
||||
search_term, min_confidence=0.999
|
||||
|
@ -149,6 +149,14 @@ class ImportItem(models.Model):
|
|||
dateutil.parser.parse(self.data['Date Added']))
|
||||
return None
|
||||
|
||||
@property
|
||||
def date_started(self):
|
||||
''' when the book was started '''
|
||||
if "Date Started" in self.data and self.data['Date Started']:
|
||||
return timezone.make_aware(
|
||||
dateutil.parser.parse(self.data['Date Started']))
|
||||
return None
|
||||
|
||||
@property
|
||||
def date_read(self):
|
||||
''' the date a book was completed '''
|
||||
|
@ -160,18 +168,24 @@ class ImportItem(models.Model):
|
|||
@property
|
||||
def reads(self):
|
||||
''' formats a read through dataset for the book in this line '''
|
||||
if (self.shelf == 'reading'
|
||||
and self.date_added and not self.date_read):
|
||||
return [ReadThrough(start_date=self.date_added)]
|
||||
start_date = self.date_started
|
||||
|
||||
# Goodreads special case (no 'date started' field)
|
||||
if ((self.shelf == 'reading' or (self.shelf == 'read' and self.date_read))
|
||||
and self.date_added and not start_date):
|
||||
start_date = self.date_added
|
||||
|
||||
if (start_date and start_date is not None and not self.date_read):
|
||||
return [ReadThrough(start_date=start_date)]
|
||||
if self.date_read:
|
||||
return [ReadThrough(
|
||||
start_date=self.date_added,
|
||||
start_date=start_date,
|
||||
finish_date=self.date_read,
|
||||
)]
|
||||
return []
|
||||
|
||||
def __repr__(self):
|
||||
return "<GoodreadsItem {!r}>".format(self.data['Title'])
|
||||
return "<{!r}Item {!r}>".format(self.data['import_source'], self.data['Title'])
|
||||
|
||||
def __str__(self):
|
||||
return "{} by {}".format(self.data['Title'], self.data['Author'])
|
||||
|
|
|
@ -2,9 +2,24 @@
|
|||
{% load humanize %}
|
||||
{% block content %}
|
||||
<div class="block">
|
||||
<h1 class="title">Import Books from GoodReads</h1>
|
||||
<h1 class="title">Import Books</h1>
|
||||
<form name="import" action="/import" method="post" enctype="multipart/form-data">
|
||||
{% csrf_token %}
|
||||
|
||||
<label class="label" for="source">
|
||||
<p>Data source</p>
|
||||
<div class="select {{ class }}">
|
||||
<select name="source" id="source">
|
||||
<option value="GoodReads" {% if current == 'LibraryThing' %}selected{% endif %}>
|
||||
GoodReads
|
||||
</option>
|
||||
<option value="LibraryThing" {% if current == 'LibraryThing' %}selected{% endif %}>
|
||||
LibraryThing
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
</label>
|
||||
|
||||
<div class="field">
|
||||
{{ import_form.as_p }}
|
||||
</div>
|
||||
|
|
4
bookwyrm/tests/data/librarything.tsv
Normal file
4
bookwyrm/tests/data/librarything.tsv
Normal file
|
@ -0,0 +1,4 @@
|
|||
Book Id Title Sort Character Primary Author Primary Author Role Secondary Author Secondary Author Roles Publication Date Review Rating Comment Private Comment Summary Media Physical Description Weight Height Thickness Length Dimensions Page Count LCCN Acquired Date Started Date Read Barcode BCID Tags Collections Languages Original Languages LC Classification ISBN ISBNs Subjects Dewey Decimal Dewey Wording Other Call Number Copies Source Entry Date From Where OCLC Work id Lending Patron Lending Status Lending Start Lending End
|
||||
5498194 Marelle 1 Cortázar, Julio Gallimard (1979), Poche 1979 chef d'oeuvre 4.5 Marelle by Julio Cortázar (1979) Broché 590 p.; 7.24 inches 1.28 pounds 7.24 inches 1.26 inches 4.96 inches 7.24 x 4.96 x 1.26 inches 590 [2007-04-16] [2007-05-08] roman, espagnol, expérimental, bohème, philosophie Your library French Spanish PQ7797 .C7145 [2070291340] 2070291340, 9782070291342 Cortâazar, Julio. Rayuela 863 Literature > Spanish And Portuguese > Spanish fiction 1 Amazon.fr [2006-08-09] 57814
|
||||
5015319 Le grand incendie de Londres: Récit, avec incises et bifurcations, 1985-1987 (Fiction & Cie) 1 Roubaud, Jacques Seuil (1989), Unknown Binding 1989 5 Le grand incendie de Londres: Récit, avec incises et bifurcations, 1985-1987 (Fiction & Cie) by Jacques Roubaud (1989) Broché 411 p.; 7.72 inches 0.88 pounds 7.72 inches 1.02 inches 5.43 inches 7.72 x 5.43 x 1.02 inches 411 Your library English PQ2678 .O77 [2020104725] 2020104725, 9782020104722 Autobiographical fiction|Roubaud, Jacques > Fiction 813 American And Canadian > Fiction > Literature 1 Amazon.com [2006-07-25] 478910
|
||||
5015399 Le Maître et Marguerite 1 Boulgakov, Mikhaïl Pocket (1994), Poche 1994 5 Le Maître et Marguerite by Mikhaïl Boulgakov (1994) Broché 579 p.; 7.09 inches 0.66 pounds 7.09 inches 1.18 inches 4.33 inches 7.09 x 4.33 x 1.18 inches 579 Your library French PG3476 .B78 [2266062328] 2266062328, 9782266062329 Allegories|Bulgakov|Good and evil > Fiction|Humanities|Jerusalem > Fiction|Jesus Christ > Fiction|Literature|Mental illness > Fiction|Moscow (Russia) > Fiction|Novel|Pilate, Pontius, 1st cent. > Fiction|Political fiction|Russia > Fiction|Russian fiction|Russian publications (Form Entry)|Soviet Union > History > 1925-1953 > Fiction|literature 891.7342 1917-1945 > 1917-1991 (USSR) > Literature > Literature of other Indo-European languages > Other Languages > Russian > Russian Fiction 1 Amazon.fr [2006-07-25] 10151
|
|
|
@ -7,16 +7,19 @@ from unittest.mock import patch
|
|||
from django.test import TestCase
|
||||
import responses
|
||||
|
||||
from bookwyrm import goodreads_import, models
|
||||
from bookwyrm import models, importer
|
||||
from bookwyrm.goodreads_import import GoodreadsImporter
|
||||
from bookwyrm import importer
|
||||
from bookwyrm.settings import DOMAIN
|
||||
|
||||
class GoodreadsImport(TestCase):
|
||||
''' importing from goodreads csv '''
|
||||
def setUp(self):
|
||||
self.importer = GoodreadsImporter()
|
||||
''' use a test csv '''
|
||||
datafile = pathlib.Path(__file__).parent.joinpath(
|
||||
'data/goodreads.csv')
|
||||
self.csv = open(datafile, 'r')
|
||||
self.csv = open(datafile, 'r', encoding=self.importer.encoding)
|
||||
self.user = models.User.objects.create_user(
|
||||
'mouse', 'mouse@mouse.mouse', 'password', local=True)
|
||||
|
||||
|
@ -41,7 +44,7 @@ class GoodreadsImport(TestCase):
|
|||
|
||||
def test_create_job(self):
|
||||
''' creates the import job entry and checks csv '''
|
||||
import_job = goodreads_import.create_job(
|
||||
import_job = self.importer.create_job(
|
||||
self.user, self.csv, False, 'public')
|
||||
self.assertEqual(import_job.user, self.user)
|
||||
self.assertEqual(import_job.include_reviews, False)
|
||||
|
@ -59,13 +62,13 @@ class GoodreadsImport(TestCase):
|
|||
|
||||
def test_create_retry_job(self):
|
||||
''' trying again with items that didn't import '''
|
||||
import_job = goodreads_import.create_job(
|
||||
import_job = self.importer.create_job(
|
||||
self.user, self.csv, False, 'unlisted')
|
||||
import_items = models.ImportItem.objects.filter(
|
||||
job=import_job
|
||||
).all()[:2]
|
||||
|
||||
retry = goodreads_import.create_retry_job(
|
||||
retry = self.importer.create_retry_job(
|
||||
self.user, import_job, import_items)
|
||||
self.assertNotEqual(import_job, retry)
|
||||
self.assertEqual(retry.user, self.user)
|
||||
|
@ -82,13 +85,13 @@ class GoodreadsImport(TestCase):
|
|||
|
||||
def test_start_import(self):
|
||||
''' begin loading books '''
|
||||
import_job = goodreads_import.create_job(
|
||||
import_job = self.importer.create_job(
|
||||
self.user, self.csv, False, 'unlisted')
|
||||
MockTask = namedtuple('Task', ('id'))
|
||||
mock_task = MockTask(7)
|
||||
with patch('bookwyrm.goodreads_import.import_data.delay') as start:
|
||||
with patch('bookwyrm.importer.import_data.delay') as start:
|
||||
start.return_value = mock_task
|
||||
goodreads_import.start_import(import_job)
|
||||
self.importer.start_import(import_job)
|
||||
import_job.refresh_from_db()
|
||||
self.assertEqual(import_job.task_id, '7')
|
||||
|
||||
|
@ -96,7 +99,7 @@ class GoodreadsImport(TestCase):
|
|||
@responses.activate
|
||||
def test_import_data(self):
|
||||
''' resolve entry '''
|
||||
import_job = goodreads_import.create_job(
|
||||
import_job = self.importer.create_job(
|
||||
self.user, self.csv, False, 'unlisted')
|
||||
book = models.Edition.objects.create(title='Test Book')
|
||||
|
||||
|
@ -104,8 +107,8 @@ class GoodreadsImport(TestCase):
|
|||
'bookwyrm.models.import_job.ImportItem.get_book_from_isbn'
|
||||
) as resolve:
|
||||
resolve.return_value = book
|
||||
with patch('bookwyrm.goodreads_import.handle_imported_book'):
|
||||
goodreads_import.import_data(import_job.id)
|
||||
with patch('bookwyrm.importer.handle_imported_book'):
|
||||
importer.import_data(self.importer.service, import_job.id)
|
||||
|
||||
import_item = models.ImportItem.objects.get(job=import_job, index=0)
|
||||
self.assertEqual(import_item.book.id, book.id)
|
||||
|
@ -120,13 +123,14 @@ class GoodreadsImport(TestCase):
|
|||
datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
|
||||
csv_file = open(datafile, 'r')
|
||||
for index, entry in enumerate(list(csv.DictReader(csv_file))):
|
||||
entry = self.importer.parse_fields(entry)
|
||||
import_item = models.ImportItem.objects.create(
|
||||
job_id=import_job.id, index=index, data=entry, book=self.book)
|
||||
break
|
||||
|
||||
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
|
||||
goodreads_import.handle_imported_book(
|
||||
self.user, import_item, False, 'public')
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, False, 'public')
|
||||
|
||||
shelf.refresh_from_db()
|
||||
self.assertEqual(shelf.books.first(), self.book)
|
||||
|
@ -153,13 +157,14 @@ class GoodreadsImport(TestCase):
|
|||
datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
|
||||
csv_file = open(datafile, 'r')
|
||||
for index, entry in enumerate(list(csv.DictReader(csv_file))):
|
||||
entry = self.importer.parse_fields(entry)
|
||||
import_item = models.ImportItem.objects.create(
|
||||
job_id=import_job.id, index=index, data=entry, book=self.book)
|
||||
break
|
||||
|
||||
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
|
||||
goodreads_import.handle_imported_book(
|
||||
self.user, import_item, False, 'public')
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, False, 'public')
|
||||
|
||||
shelf.refresh_from_db()
|
||||
self.assertEqual(shelf.books.first(), self.book)
|
||||
|
@ -182,15 +187,16 @@ class GoodreadsImport(TestCase):
|
|||
datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
|
||||
csv_file = open(datafile, 'r')
|
||||
for index, entry in enumerate(list(csv.DictReader(csv_file))):
|
||||
entry = self.importer.parse_fields(entry)
|
||||
import_item = models.ImportItem.objects.create(
|
||||
job_id=import_job.id, index=index, data=entry, book=self.book)
|
||||
break
|
||||
|
||||
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
|
||||
goodreads_import.handle_imported_book(
|
||||
self.user, import_item, False, 'public')
|
||||
goodreads_import.handle_imported_book(
|
||||
self.user, import_item, False, 'public')
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, False, 'public')
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, False, 'public')
|
||||
|
||||
shelf.refresh_from_db()
|
||||
self.assertEqual(shelf.books.first(), self.book)
|
||||
|
@ -212,12 +218,13 @@ class GoodreadsImport(TestCase):
|
|||
datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
|
||||
csv_file = open(datafile, 'r')
|
||||
entry = list(csv.DictReader(csv_file))[2]
|
||||
entry = self.importer.parse_fields(entry)
|
||||
import_item = models.ImportItem.objects.create(
|
||||
job_id=import_job.id, index=0, data=entry, book=self.book)
|
||||
|
||||
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
|
||||
goodreads_import.handle_imported_book(
|
||||
self.user, import_item, True, 'unlisted')
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, True, 'unlisted')
|
||||
review = models.Review.objects.get(book=self.book, user=self.user)
|
||||
self.assertEqual(review.content, 'mixed feelings')
|
||||
self.assertEqual(review.rating, 2)
|
||||
|
@ -233,12 +240,13 @@ class GoodreadsImport(TestCase):
|
|||
datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv')
|
||||
csv_file = open(datafile, 'r')
|
||||
entry = list(csv.DictReader(csv_file))[2]
|
||||
entry = self.importer.parse_fields(entry)
|
||||
import_item = models.ImportItem.objects.create(
|
||||
job_id=import_job.id, index=0, data=entry, book=self.book)
|
||||
|
||||
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
|
||||
goodreads_import.handle_imported_book(
|
||||
self.user, import_item, False, 'unlisted')
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, False, 'unlisted')
|
||||
self.assertFalse(models.Review.objects.filter(
|
||||
book=self.book, user=self.user
|
||||
).exists())
|
||||
|
|
240
bookwyrm/tests/test_librarything_import.py
Normal file
240
bookwyrm/tests/test_librarything_import.py
Normal file
|
@ -0,0 +1,240 @@
|
|||
''' testing import '''
|
||||
from collections import namedtuple
|
||||
import csv
|
||||
import pathlib
|
||||
from unittest.mock import patch
|
||||
|
||||
from django.test import TestCase
|
||||
import responses
|
||||
|
||||
from bookwyrm import models, importer
|
||||
from bookwyrm.librarything_import import LibrarythingImporter
|
||||
from bookwyrm.settings import DOMAIN
|
||||
|
||||
class LibrarythingImport(TestCase):
|
||||
''' importing from librarything tsv '''
|
||||
def setUp(self):
|
||||
self.importer = LibrarythingImporter()
|
||||
''' use a test tsv '''
|
||||
datafile = pathlib.Path(__file__).parent.joinpath(
|
||||
'data/librarything.tsv')
|
||||
|
||||
# Librarything generates latin encoded exports...
|
||||
self.csv = open(datafile, 'r', encoding=self.importer.encoding)
|
||||
self.user = models.User.objects.create_user(
|
||||
'mmai', 'mmai@mmai.mmai', 'password', local=True)
|
||||
|
||||
models.Connector.objects.create(
|
||||
identifier=DOMAIN,
|
||||
name='Local',
|
||||
local=True,
|
||||
connector_file='self_connector',
|
||||
base_url='https://%s' % DOMAIN,
|
||||
books_url='https://%s/book' % DOMAIN,
|
||||
covers_url='https://%s/images/covers' % DOMAIN,
|
||||
search_url='https://%s/search?q=' % DOMAIN,
|
||||
priority=1,
|
||||
)
|
||||
work = models.Work.objects.create(title='Test Work')
|
||||
self.book = models.Edition.objects.create(
|
||||
title='Example Edition',
|
||||
remote_id='https://example.com/book/1',
|
||||
parent_work=work
|
||||
)
|
||||
|
||||
|
||||
def test_create_job(self):
|
||||
''' creates the import job entry and checks csv '''
|
||||
import_job = self.importer.create_job(
|
||||
self.user, self.csv, False, 'public')
|
||||
self.assertEqual(import_job.user, self.user)
|
||||
self.assertEqual(import_job.include_reviews, False)
|
||||
self.assertEqual(import_job.privacy, 'public')
|
||||
|
||||
import_items = models.ImportItem.objects.filter(job=import_job).all()
|
||||
self.assertEqual(len(import_items), 3)
|
||||
self.assertEqual(import_items[0].index, 0)
|
||||
self.assertEqual(import_items[0].data['Book Id'], '5498194')
|
||||
self.assertEqual(import_items[1].index, 1)
|
||||
self.assertEqual(import_items[1].data['Book Id'], '5015319')
|
||||
self.assertEqual(import_items[2].index, 2)
|
||||
self.assertEqual(import_items[2].data['Book Id'], '5015399')
|
||||
|
||||
|
||||
def test_create_retry_job(self):
|
||||
''' trying again with items that didn't import '''
|
||||
import_job = self.importer.create_job(
|
||||
self.user, self.csv, False, 'unlisted')
|
||||
import_items = models.ImportItem.objects.filter(
|
||||
job=import_job
|
||||
).all()[:2]
|
||||
|
||||
retry = self.importer.create_retry_job(
|
||||
self.user, import_job, import_items)
|
||||
self.assertNotEqual(import_job, retry)
|
||||
self.assertEqual(retry.user, self.user)
|
||||
self.assertEqual(retry.include_reviews, False)
|
||||
self.assertEqual(retry.privacy, 'unlisted')
|
||||
|
||||
retry_items = models.ImportItem.objects.filter(job=retry).all()
|
||||
self.assertEqual(len(retry_items), 2)
|
||||
self.assertEqual(retry_items[0].index, 0)
|
||||
self.assertEqual(import_items[0].data['Book Id'], '5498194')
|
||||
self.assertEqual(retry_items[1].index, 1)
|
||||
self.assertEqual(retry_items[1].data['Book Id'], '5015319')
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_import_data(self):
|
||||
''' resolve entry '''
|
||||
import_job = self.importer.create_job(
|
||||
self.user, self.csv, False, 'unlisted')
|
||||
book = models.Edition.objects.create(title='Test Book')
|
||||
|
||||
with patch(
|
||||
'bookwyrm.models.import_job.ImportItem.get_book_from_isbn'
|
||||
) as resolve:
|
||||
resolve.return_value = book
|
||||
with patch('bookwyrm.importer.handle_imported_book'):
|
||||
importer.import_data(self.importer.service, import_job.id)
|
||||
|
||||
import_item = models.ImportItem.objects.get(job=import_job, index=0)
|
||||
self.assertEqual(import_item.book.id, book.id)
|
||||
|
||||
|
||||
def test_handle_imported_book(self):
|
||||
''' librarything import added a book, this adds related connections '''
|
||||
shelf = self.user.shelf_set.filter(identifier='read').first()
|
||||
self.assertIsNone(shelf.books.first())
|
||||
|
||||
import_job = models.ImportJob.objects.create(user=self.user)
|
||||
datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
|
||||
csv_file = open(datafile, 'r', encoding=self.importer.encoding)
|
||||
for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))):
|
||||
entry = self.importer.parse_fields(entry)
|
||||
import_item = models.ImportItem.objects.create(
|
||||
job_id=import_job.id, index=index, data=entry, book=self.book)
|
||||
break
|
||||
|
||||
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, False, 'public')
|
||||
|
||||
shelf.refresh_from_db()
|
||||
self.assertEqual(shelf.books.first(), self.book)
|
||||
|
||||
readthrough = models.ReadThrough.objects.get(user=self.user)
|
||||
self.assertEqual(readthrough.book, self.book)
|
||||
# I can't remember how to create dates and I don't want to look it up.
|
||||
self.assertEqual(readthrough.start_date.year, 2007)
|
||||
self.assertEqual(readthrough.start_date.month, 4)
|
||||
self.assertEqual(readthrough.start_date.day, 16)
|
||||
self.assertEqual(readthrough.finish_date.year, 2007)
|
||||
self.assertEqual(readthrough.finish_date.month, 5)
|
||||
self.assertEqual(readthrough.finish_date.day, 8)
|
||||
|
||||
|
||||
def test_handle_imported_book_already_shelved(self):
|
||||
''' librarything import added a book, this adds related connections '''
|
||||
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
|
||||
shelf = self.user.shelf_set.filter(identifier='to-read').first()
|
||||
models.ShelfBook.objects.create(
|
||||
shelf=shelf, user=self.user, book=self.book)
|
||||
|
||||
import_job = models.ImportJob.objects.create(user=self.user)
|
||||
datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
|
||||
csv_file = open(datafile, 'r', encoding=self.importer.encoding)
|
||||
for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))):
|
||||
entry = self.importer.parse_fields(entry)
|
||||
import_item = models.ImportItem.objects.create(
|
||||
job_id=import_job.id, index=index, data=entry, book=self.book)
|
||||
break
|
||||
|
||||
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, False, 'public')
|
||||
|
||||
shelf.refresh_from_db()
|
||||
self.assertEqual(shelf.books.first(), self.book)
|
||||
self.assertIsNone(
|
||||
self.user.shelf_set.get(identifier='read').books.first())
|
||||
readthrough = models.ReadThrough.objects.get(user=self.user)
|
||||
self.assertEqual(readthrough.book, self.book)
|
||||
self.assertEqual(readthrough.start_date.year, 2007)
|
||||
self.assertEqual(readthrough.start_date.month, 4)
|
||||
self.assertEqual(readthrough.start_date.day, 16)
|
||||
self.assertEqual(readthrough.finish_date.year, 2007)
|
||||
self.assertEqual(readthrough.finish_date.month, 5)
|
||||
self.assertEqual(readthrough.finish_date.day, 8)
|
||||
|
||||
|
||||
def test_handle_import_twice(self):
|
||||
''' re-importing books '''
|
||||
shelf = self.user.shelf_set.filter(identifier='read').first()
|
||||
import_job = models.ImportJob.objects.create(user=self.user)
|
||||
datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
|
||||
csv_file = open(datafile, 'r', encoding=self.importer.encoding)
|
||||
for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))):
|
||||
entry = self.importer.parse_fields(entry)
|
||||
import_item = models.ImportItem.objects.create(
|
||||
job_id=import_job.id, index=index, data=entry, book=self.book)
|
||||
break
|
||||
|
||||
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, False, 'public')
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, False, 'public')
|
||||
|
||||
shelf.refresh_from_db()
|
||||
self.assertEqual(shelf.books.first(), self.book)
|
||||
|
||||
readthrough = models.ReadThrough.objects.get(user=self.user)
|
||||
self.assertEqual(readthrough.book, self.book)
|
||||
# I can't remember how to create dates and I don't want to look it up.
|
||||
self.assertEqual(readthrough.start_date.year, 2007)
|
||||
self.assertEqual(readthrough.start_date.month, 4)
|
||||
self.assertEqual(readthrough.start_date.day, 16)
|
||||
self.assertEqual(readthrough.finish_date.year, 2007)
|
||||
self.assertEqual(readthrough.finish_date.month, 5)
|
||||
self.assertEqual(readthrough.finish_date.day, 8)
|
||||
|
||||
|
||||
def test_handle_imported_book_review(self):
|
||||
''' librarything review import '''
|
||||
import_job = models.ImportJob.objects.create(user=self.user)
|
||||
datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
|
||||
csv_file = open(datafile, 'r', encoding=self.importer.encoding)
|
||||
entry = list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))[0]
|
||||
entry = self.importer.parse_fields(entry)
|
||||
import_item = models.ImportItem.objects.create(
|
||||
job_id=import_job.id, index=0, data=entry, book=self.book)
|
||||
|
||||
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, True, 'unlisted')
|
||||
review = models.Review.objects.get(book=self.book, user=self.user)
|
||||
self.assertEqual(review.content, 'chef d\'oeuvre')
|
||||
self.assertEqual(review.rating, 5)
|
||||
self.assertEqual(review.published_date.year, 2007)
|
||||
self.assertEqual(review.published_date.month, 5)
|
||||
self.assertEqual(review.published_date.day, 8)
|
||||
self.assertEqual(review.privacy, 'unlisted')
|
||||
|
||||
|
||||
def test_handle_imported_book_reviews_disabled(self):
|
||||
''' librarything review import '''
|
||||
import_job = models.ImportJob.objects.create(user=self.user)
|
||||
datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv')
|
||||
csv_file = open(datafile, 'r', encoding=self.importer.encoding)
|
||||
entry = list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))[2]
|
||||
entry = self.importer.parse_fields(entry)
|
||||
import_item = models.ImportItem.objects.create(
|
||||
job_id=import_job.id, index=0, data=entry, book=self.book)
|
||||
|
||||
with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'):
|
||||
importer.handle_imported_book(
|
||||
self.importer.service, self.user, import_item, False, 'unlisted')
|
||||
self.assertFalse(models.Review.objects.filter(
|
||||
book=self.book, user=self.user
|
||||
).exists())
|
|
@ -9,7 +9,7 @@ from django.template.response import TemplateResponse
|
|||
from django.utils.decorators import method_decorator
|
||||
from django.views import View
|
||||
|
||||
from bookwyrm import forms, goodreads_import, models
|
||||
from bookwyrm import forms, goodreads_import, librarything_import, models
|
||||
from bookwyrm.tasks import app
|
||||
|
||||
# pylint: disable= no-self-use
|
||||
|
@ -31,18 +31,29 @@ class Import(View):
|
|||
if form.is_valid():
|
||||
include_reviews = request.POST.get('include_reviews') == 'on'
|
||||
privacy = request.POST.get('privacy')
|
||||
source = request.POST.get('source')
|
||||
|
||||
importer = None
|
||||
if source == 'LibraryThing':
|
||||
importer = librarything_import.LibrarythingImporter()
|
||||
else:
|
||||
# Default : GoodReads
|
||||
importer = goodreads_import.GoodreadsImporter()
|
||||
|
||||
try:
|
||||
job = goodreads_import.create_job(
|
||||
job = importer.create_job(
|
||||
request.user,
|
||||
TextIOWrapper(
|
||||
request.FILES['csv_file'],
|
||||
encoding=request.encoding),
|
||||
encoding=importer.encoding),
|
||||
include_reviews,
|
||||
privacy,
|
||||
)
|
||||
except (UnicodeDecodeError, ValueError):
|
||||
return HttpResponseBadRequest('Not a valid csv file')
|
||||
goodreads_import.start_import(job)
|
||||
|
||||
importer.start_import(job)
|
||||
|
||||
return redirect('/import/%d' % job.id)
|
||||
return HttpResponseBadRequest()
|
||||
|
||||
|
|
Loading…
Reference in a new issue