From b85fed359574039cfba25bd5ea969885109234eb Mon Sep 17 00:00:00 2001 From: Henri Bourcereau Date: Sat, 20 Feb 2021 17:02:36 +0100 Subject: [PATCH 01/15] librarything import --- bookwyrm/goodreads_import.py | 126 +---------- bookwyrm/importer.py | 135 ++++++++++++ bookwyrm/librarything_import.py | 39 ++++ bookwyrm/models/import_job.py | 28 ++- bookwyrm/templates/import.html | 17 +- bookwyrm/tests/data/librarything.tsv | 4 + bookwyrm/tests/test_goodreads_import.py | 54 +++-- bookwyrm/tests/test_librarything_import.py | 240 +++++++++++++++++++++ bookwyrm/views/import_data.py | 19 +- 9 files changed, 510 insertions(+), 152 deletions(-) create mode 100644 bookwyrm/importer.py create mode 100644 bookwyrm/librarything_import.py create mode 100644 bookwyrm/tests/data/librarything.tsv create mode 100644 bookwyrm/tests/test_librarything_import.py diff --git a/bookwyrm/goodreads_import.py b/bookwyrm/goodreads_import.py index 1b2b971c..f5b84e17 100644 --- a/bookwyrm/goodreads_import.py +++ b/bookwyrm/goodreads_import.py @@ -1,121 +1,13 @@ ''' handle reading a csv from goodreads ''' -import csv -import logging +from bookwyrm.importer import Importer -from bookwyrm import models -from bookwyrm.models import ImportJob, ImportItem -from bookwyrm.tasks import app +# GoodReads is the default importer, thus Importer follows its structure. For a more complete example of overriding see librarything_import.py -logger = logging.getLogger(__name__) +class GoodreadsImporter(Importer): + service = 'GoodReads' - -def create_job(user, csv_file, include_reviews, privacy): - ''' check over a csv and creates a database entry for the job''' - job = ImportJob.objects.create( - user=user, - include_reviews=include_reviews, - privacy=privacy - ) - for index, entry in enumerate(list(csv.DictReader(csv_file))): - if not all(x in entry for x in ('ISBN13', 'Title', 'Author')): - raise ValueError('Author, title, and isbn must be in data.') - ImportItem(job=job, index=index, data=entry).save() - return job - - -def create_retry_job(user, original_job, items): - ''' retry items that didn't import ''' - job = ImportJob.objects.create( - user=user, - include_reviews=original_job.include_reviews, - privacy=original_job.privacy, - retry=True - ) - for item in items: - ImportItem(job=job, index=item.index, data=item.data).save() - return job - - -def start_import(job): - ''' initalizes a csv import job ''' - result = import_data.delay(job.id) - job.task_id = result.id - job.save() - - -@app.task -def import_data(job_id): - ''' does the actual lookup work in a celery task ''' - job = ImportJob.objects.get(id=job_id) - try: - for item in job.items.all(): - try: - item.resolve() - except Exception as e:# pylint: disable=broad-except - logger.exception(e) - item.fail_reason = 'Error loading book' - item.save() - continue - - if item.book: - item.save() - - # shelves book and handles reviews - handle_imported_book( - job.user, item, job.include_reviews, job.privacy) - else: - item.fail_reason = 'Could not find a match for book' - item.save() - finally: - job.complete = True - job.save() - - -def handle_imported_book(user, item, include_reviews, privacy): - ''' process a goodreads csv and then post about it ''' - if isinstance(item.book, models.Work): - item.book = item.book.default_edition - if not item.book: - return - - existing_shelf = models.ShelfBook.objects.filter( - book=item.book, user=user).exists() - - # shelve the book if it hasn't been shelved already - if item.shelf and not existing_shelf: - desired_shelf = models.Shelf.objects.get( - identifier=item.shelf, - user=user - ) - models.ShelfBook.objects.create( - book=item.book, shelf=desired_shelf, user=user) - - for read in item.reads: - # check for an existing readthrough with the same dates - if models.ReadThrough.objects.filter( - user=user, book=item.book, - start_date=read.start_date, - finish_date=read.finish_date - ).exists(): - continue - read.book = item.book - read.user = user - read.save() - - if include_reviews and (item.rating or item.review): - review_title = 'Review of {!r} on Goodreads'.format( - item.book.title, - ) if item.review else '' - - # we don't know the publication date of the review, - # but "now" is a bad guess - published_date_guess = item.date_read or item.date_added - models.Review.objects.create( - user=user, - book=item.book, - name=review_title, - content=item.review, - rating=item.rating, - published_date=published_date_guess, - privacy=privacy, - ) + def parse_fields(self, data): + data.update({'import_source': self.service }) + # add missing 'Date Started' field + data.update({'Date Started': None }) + return data diff --git a/bookwyrm/importer.py b/bookwyrm/importer.py new file mode 100644 index 00000000..b6e959cb --- /dev/null +++ b/bookwyrm/importer.py @@ -0,0 +1,135 @@ +''' handle reading a csv from an external service, defaults are from GoodReads ''' +import csv +import logging + +from bookwyrm import models +from bookwyrm.models import ImportJob, ImportItem +from bookwyrm.tasks import app + +logger = logging.getLogger(__name__) + +class Importer: + service = 'Unknown' + delimiter = ',' + encoding = 'UTF-8' + mandatory_fields = ['ISBN13', 'Title', 'Author'] + + def create_job(self, user, csv_file, include_reviews, privacy): + ''' check over a csv and creates a database entry for the job''' + job = ImportJob.objects.create( + user=user, + include_reviews=include_reviews, + privacy=privacy + ) + for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.delimiter ))): + if not all(x in entry for x in self.mandatory_fields): + raise ValueError('Author, title, and isbn must be in data.') + entry = self.parse_fields(entry) + self.save_item(job, index, entry) + return job + + + def save_item(self, job, index, data): + ImportItem(job=job, index=index, data=data).save() + + def parse_fields(self, entry): + entry.update({'import_source': self.service }) + return entry + + def create_retry_job(self, user, original_job, items): + ''' retry items that didn't import ''' + job = ImportJob.objects.create( + user=user, + include_reviews=original_job.include_reviews, + privacy=original_job.privacy, + retry=True + ) + for item in items: + self.save_item(job, item.index, item.data) + return job + + + def start_import(self, job): + ''' initalizes a csv import job ''' + result = import_data.delay(self.service, job.id) + job.task_id = result.id + job.save() + + +@app.task +def import_data(source, job_id): + ''' does the actual lookup work in a celery task ''' + job = ImportJob.objects.get(id=job_id) + try: + for item in job.items.all(): + try: + item.resolve() + except Exception as e:# pylint: disable=broad-except + logger.exception(e) + item.fail_reason = 'Error loading book' + item.save() + continue + + if item.book: + item.save() + + # shelves book and handles reviews + handle_imported_book(source, + job.user, item, job.include_reviews, job.privacy) + else: + item.fail_reason = 'Could not find a match for book' + item.save() + finally: + job.complete = True + job.save() + + +def handle_imported_book(source, user, item, include_reviews, privacy): + ''' process a csv and then post about it ''' + if isinstance(item.book, models.Work): + item.book = item.book.default_edition + if not item.book: + return + + existing_shelf = models.ShelfBook.objects.filter( + book=item.book, user=user).exists() + + # shelve the book if it hasn't been shelved already + if item.shelf and not existing_shelf: + desired_shelf = models.Shelf.objects.get( + identifier=item.shelf, + user=user + ) + models.ShelfBook.objects.create( + book=item.book, shelf=desired_shelf, user=user) + + for read in item.reads: + # check for an existing readthrough with the same dates + if models.ReadThrough.objects.filter( + user=user, book=item.book, + start_date=read.start_date, + finish_date=read.finish_date + ).exists(): + continue + read.book = item.book + read.user = user + read.save() + + if include_reviews and (item.rating or item.review): + review_title = 'Review of {!r} on {!r}'.format( + item.book.title, + source, + ) if item.review else '' + + # we don't know the publication date of the review, + # but "now" is a bad guess + published_date_guess = item.date_read or item.date_added + models.Review.objects.create( + user=user, + book=item.book, + name=review_title, + content=item.review, + rating=item.rating, + published_date=published_date_guess, + privacy=privacy, + ) diff --git a/bookwyrm/librarything_import.py b/bookwyrm/librarything_import.py new file mode 100644 index 00000000..019573ac --- /dev/null +++ b/bookwyrm/librarything_import.py @@ -0,0 +1,39 @@ +''' handle reading a csv from librarything ''' +import csv +import re +import math + +from bookwyrm import models +from bookwyrm.models import ImportItem +from bookwyrm.importer import Importer + + +class LibrarythingImporter(Importer): + service = 'LibraryThing' + delimiter = '\t' + encoding = 'ISO-8859-1' + # mandatory_fields : fields matching the book ISBN13, title and author + mandatory_fields = ['ISBN', 'Title', 'Primary Author'] + + def parse_fields(self, initial): + data = {} + data['import_source'] = self.service + data['Book Id'] = initial['Book Id'] + data['Title'] = initial['Title'] + data['Author'] = initial['Primary Author'] + data['ISBN13'] = initial['ISBN'] + data['My Review'] = initial['Review'] + data['My Rating'] = math.ceil(float(initial['Rating'])) + data['Date Added'] = re.sub('\[|\]', '', initial['Entry Date']) + data['Date Started'] = re.sub('\[|\]', '', initial['Date Started']) + data['Date Read'] = re.sub('\[|\]', '', initial['Date Read']) + + data['Exclusive Shelf'] = None + if data['Date Read']: + data['Exclusive Shelf'] = "read" + elif data['Date Started']: + data['Exclusive Shelf'] = "reading" + else: + data['Exclusive Shelf'] = "to-read" + + return data diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 407d820b..ca05ddb0 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -97,8 +97,8 @@ class ImportItem(models.Model): def get_book_from_title_author(self): ''' search by title and author ''' search_term = construct_search_term( - self.data['Title'], - self.data['Author'] + self.title, + self.author ) search_result = connector_manager.first_search_result( search_term, min_confidence=0.999 @@ -149,6 +149,14 @@ class ImportItem(models.Model): dateutil.parser.parse(self.data['Date Added'])) return None + @property + def date_started(self): + ''' when the book was started ''' + if "Date Started" in self.data and self.data['Date Started']: + return timezone.make_aware( + dateutil.parser.parse(self.data['Date Started'])) + return None + @property def date_read(self): ''' the date a book was completed ''' @@ -160,18 +168,24 @@ class ImportItem(models.Model): @property def reads(self): ''' formats a read through dataset for the book in this line ''' - if (self.shelf == 'reading' - and self.date_added and not self.date_read): - return [ReadThrough(start_date=self.date_added)] + start_date = self.date_started + + # Goodreads special case (no 'date started' field) + if ((self.shelf == 'reading' or (self.shelf == 'read' and self.date_read)) + and self.date_added and not start_date): + start_date = self.date_added + + if (start_date and start_date is not None and not self.date_read): + return [ReadThrough(start_date=start_date)] if self.date_read: return [ReadThrough( - start_date=self.date_added, + start_date=start_date, finish_date=self.date_read, )] return [] def __repr__(self): - return "".format(self.data['Title']) + return "<{!r}Item {!r}>".format(self.data['import_source'], self.data['Title']) def __str__(self): return "{} by {}".format(self.data['Title'], self.data['Author']) diff --git a/bookwyrm/templates/import.html b/bookwyrm/templates/import.html index 23cd3445..dd60ebff 100644 --- a/bookwyrm/templates/import.html +++ b/bookwyrm/templates/import.html @@ -2,9 +2,24 @@ {% load humanize %} {% block content %}
-

Import Books from GoodReads

+

Import Books

{% csrf_token %} + + +
{{ import_form.as_p }}
diff --git a/bookwyrm/tests/data/librarything.tsv b/bookwyrm/tests/data/librarything.tsv new file mode 100644 index 00000000..f0dca77f --- /dev/null +++ b/bookwyrm/tests/data/librarything.tsv @@ -0,0 +1,4 @@ +Book Id Title Sort Character Primary Author Primary Author Role Secondary Author Secondary Author Roles Publication Date Review Rating Comment Private Comment Summary Media Physical Description Weight Height Thickness Length Dimensions Page Count LCCN Acquired Date Started Date Read Barcode BCID Tags Collections Languages Original Languages LC Classification ISBN ISBNs Subjects Dewey Decimal Dewey Wording Other Call Number Copies Source Entry Date From Where OCLC Work id Lending Patron Lending Status Lending Start Lending End +5498194 Marelle 1 Cortázar, Julio Gallimard (1979), Poche 1979 chef d'oeuvre 4.5 Marelle by Julio Cortázar (1979) Broché 590 p.; 7.24 inches 1.28 pounds 7.24 inches 1.26 inches 4.96 inches 7.24 x 4.96 x 1.26 inches 590 [2007-04-16] [2007-05-08] roman, espagnol, expérimental, bohème, philosophie Your library French Spanish PQ7797 .C7145 [2070291340] 2070291340, 9782070291342 Cortazar, Julio. Rayuela 863 Literature > Spanish And Portuguese > Spanish fiction 1 Amazon.fr [2006-08-09] 57814 +5015319 Le grand incendie de Londres: Récit, avec incises et bifurcations, 1985-1987 (Fiction & Cie) 1 Roubaud, Jacques Seuil (1989), Unknown Binding 1989 5 Le grand incendie de Londres: Récit, avec incises et bifurcations, 1985-1987 (Fiction & Cie) by Jacques Roubaud (1989) Broché 411 p.; 7.72 inches 0.88 pounds 7.72 inches 1.02 inches 5.43 inches 7.72 x 5.43 x 1.02 inches 411 Your library English PQ2678 .O77 [2020104725] 2020104725, 9782020104722 Autobiographical fiction|Roubaud, Jacques > Fiction 813 American And Canadian > Fiction > Literature 1 Amazon.com [2006-07-25] 478910 +5015399 Le Maître et Marguerite 1 Boulgakov, Mikhaïl Pocket (1994), Poche 1994 5 Le Maître et Marguerite by Mikhaïl Boulgakov (1994) Broché 579 p.; 7.09 inches 0.66 pounds 7.09 inches 1.18 inches 4.33 inches 7.09 x 4.33 x 1.18 inches 579 Your library French PG3476 .B78 [2266062328] 2266062328, 9782266062329 Allegories|Bulgakov|Good and evil > Fiction|Humanities|Jerusalem > Fiction|Jesus Christ > Fiction|Literature|Mental illness > Fiction|Moscow (Russia) > Fiction|Novel|Pilate, Pontius, 1st cent. > Fiction|Political fiction|Russia > Fiction|Russian fiction|Russian publications (Form Entry)|Soviet Union > History > 1925-1953 > Fiction|literature 891.7342 1917-1945 > 1917-1991 (USSR) > Literature > Literature of other Indo-European languages > Other Languages > Russian > Russian Fiction 1 Amazon.fr [2006-07-25] 10151 diff --git a/bookwyrm/tests/test_goodreads_import.py b/bookwyrm/tests/test_goodreads_import.py index 6a14368c..aee9afe4 100644 --- a/bookwyrm/tests/test_goodreads_import.py +++ b/bookwyrm/tests/test_goodreads_import.py @@ -7,16 +7,19 @@ from unittest.mock import patch from django.test import TestCase import responses -from bookwyrm import goodreads_import, models +from bookwyrm import models, importer +from bookwyrm.goodreads_import import GoodreadsImporter +from bookwyrm import importer from bookwyrm.settings import DOMAIN class GoodreadsImport(TestCase): ''' importing from goodreads csv ''' def setUp(self): + self.importer = GoodreadsImporter() ''' use a test csv ''' datafile = pathlib.Path(__file__).parent.joinpath( 'data/goodreads.csv') - self.csv = open(datafile, 'r') + self.csv = open(datafile, 'r', encoding=self.importer.encoding) self.user = models.User.objects.create_user( 'mouse', 'mouse@mouse.mouse', 'password', local=True) @@ -41,7 +44,7 @@ class GoodreadsImport(TestCase): def test_create_job(self): ''' creates the import job entry and checks csv ''' - import_job = goodreads_import.create_job( + import_job = self.importer.create_job( self.user, self.csv, False, 'public') self.assertEqual(import_job.user, self.user) self.assertEqual(import_job.include_reviews, False) @@ -59,13 +62,13 @@ class GoodreadsImport(TestCase): def test_create_retry_job(self): ''' trying again with items that didn't import ''' - import_job = goodreads_import.create_job( + import_job = self.importer.create_job( self.user, self.csv, False, 'unlisted') import_items = models.ImportItem.objects.filter( job=import_job ).all()[:2] - retry = goodreads_import.create_retry_job( + retry = self.importer.create_retry_job( self.user, import_job, import_items) self.assertNotEqual(import_job, retry) self.assertEqual(retry.user, self.user) @@ -82,13 +85,13 @@ class GoodreadsImport(TestCase): def test_start_import(self): ''' begin loading books ''' - import_job = goodreads_import.create_job( + import_job = self.importer.create_job( self.user, self.csv, False, 'unlisted') MockTask = namedtuple('Task', ('id')) mock_task = MockTask(7) - with patch('bookwyrm.goodreads_import.import_data.delay') as start: + with patch('bookwyrm.importer.import_data.delay') as start: start.return_value = mock_task - goodreads_import.start_import(import_job) + self.importer.start_import(import_job) import_job.refresh_from_db() self.assertEqual(import_job.task_id, '7') @@ -96,7 +99,7 @@ class GoodreadsImport(TestCase): @responses.activate def test_import_data(self): ''' resolve entry ''' - import_job = goodreads_import.create_job( + import_job = self.importer.create_job( self.user, self.csv, False, 'unlisted') book = models.Edition.objects.create(title='Test Book') @@ -104,8 +107,8 @@ class GoodreadsImport(TestCase): 'bookwyrm.models.import_job.ImportItem.get_book_from_isbn' ) as resolve: resolve.return_value = book - with patch('bookwyrm.goodreads_import.handle_imported_book'): - goodreads_import.import_data(import_job.id) + with patch('bookwyrm.importer.handle_imported_book'): + importer.import_data(self.importer.service, import_job.id) import_item = models.ImportItem.objects.get(job=import_job, index=0) self.assertEqual(import_item.book.id, book.id) @@ -120,13 +123,14 @@ class GoodreadsImport(TestCase): datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv') csv_file = open(datafile, 'r') for index, entry in enumerate(list(csv.DictReader(csv_file))): + entry = self.importer.parse_fields(entry) import_item = models.ImportItem.objects.create( job_id=import_job.id, index=index, data=entry, book=self.book) break with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): - goodreads_import.handle_imported_book( - self.user, import_item, False, 'public') + importer.handle_imported_book( + self.importer.service, self.user, import_item, False, 'public') shelf.refresh_from_db() self.assertEqual(shelf.books.first(), self.book) @@ -153,13 +157,14 @@ class GoodreadsImport(TestCase): datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv') csv_file = open(datafile, 'r') for index, entry in enumerate(list(csv.DictReader(csv_file))): + entry = self.importer.parse_fields(entry) import_item = models.ImportItem.objects.create( job_id=import_job.id, index=index, data=entry, book=self.book) break with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): - goodreads_import.handle_imported_book( - self.user, import_item, False, 'public') + importer.handle_imported_book( + self.importer.service, self.user, import_item, False, 'public') shelf.refresh_from_db() self.assertEqual(shelf.books.first(), self.book) @@ -182,15 +187,16 @@ class GoodreadsImport(TestCase): datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv') csv_file = open(datafile, 'r') for index, entry in enumerate(list(csv.DictReader(csv_file))): + entry = self.importer.parse_fields(entry) import_item = models.ImportItem.objects.create( job_id=import_job.id, index=index, data=entry, book=self.book) break with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): - goodreads_import.handle_imported_book( - self.user, import_item, False, 'public') - goodreads_import.handle_imported_book( - self.user, import_item, False, 'public') + importer.handle_imported_book( + self.importer.service, self.user, import_item, False, 'public') + importer.handle_imported_book( + self.importer.service, self.user, import_item, False, 'public') shelf.refresh_from_db() self.assertEqual(shelf.books.first(), self.book) @@ -212,12 +218,13 @@ class GoodreadsImport(TestCase): datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv') csv_file = open(datafile, 'r') entry = list(csv.DictReader(csv_file))[2] + entry = self.importer.parse_fields(entry) import_item = models.ImportItem.objects.create( job_id=import_job.id, index=0, data=entry, book=self.book) with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): - goodreads_import.handle_imported_book( - self.user, import_item, True, 'unlisted') + importer.handle_imported_book( + self.importer.service, self.user, import_item, True, 'unlisted') review = models.Review.objects.get(book=self.book, user=self.user) self.assertEqual(review.content, 'mixed feelings') self.assertEqual(review.rating, 2) @@ -233,12 +240,13 @@ class GoodreadsImport(TestCase): datafile = pathlib.Path(__file__).parent.joinpath('data/goodreads.csv') csv_file = open(datafile, 'r') entry = list(csv.DictReader(csv_file))[2] + entry = self.importer.parse_fields(entry) import_item = models.ImportItem.objects.create( job_id=import_job.id, index=0, data=entry, book=self.book) with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): - goodreads_import.handle_imported_book( - self.user, import_item, False, 'unlisted') + importer.handle_imported_book( + self.importer.service, self.user, import_item, False, 'unlisted') self.assertFalse(models.Review.objects.filter( book=self.book, user=self.user ).exists()) diff --git a/bookwyrm/tests/test_librarything_import.py b/bookwyrm/tests/test_librarything_import.py new file mode 100644 index 00000000..2623a504 --- /dev/null +++ b/bookwyrm/tests/test_librarything_import.py @@ -0,0 +1,240 @@ +''' testing import ''' +from collections import namedtuple +import csv +import pathlib +from unittest.mock import patch + +from django.test import TestCase +import responses + +from bookwyrm import models, importer +from bookwyrm.librarything_import import LibrarythingImporter +from bookwyrm.settings import DOMAIN + +class LibrarythingImport(TestCase): + ''' importing from librarything tsv ''' + def setUp(self): + self.importer = LibrarythingImporter() + ''' use a test tsv ''' + datafile = pathlib.Path(__file__).parent.joinpath( + 'data/librarything.tsv') + + # Librarything generates latin encoded exports... + self.csv = open(datafile, 'r', encoding=self.importer.encoding) + self.user = models.User.objects.create_user( + 'mmai', 'mmai@mmai.mmai', 'password', local=True) + + models.Connector.objects.create( + identifier=DOMAIN, + name='Local', + local=True, + connector_file='self_connector', + base_url='https://%s' % DOMAIN, + books_url='https://%s/book' % DOMAIN, + covers_url='https://%s/images/covers' % DOMAIN, + search_url='https://%s/search?q=' % DOMAIN, + priority=1, + ) + work = models.Work.objects.create(title='Test Work') + self.book = models.Edition.objects.create( + title='Example Edition', + remote_id='https://example.com/book/1', + parent_work=work + ) + + + def test_create_job(self): + ''' creates the import job entry and checks csv ''' + import_job = self.importer.create_job( + self.user, self.csv, False, 'public') + self.assertEqual(import_job.user, self.user) + self.assertEqual(import_job.include_reviews, False) + self.assertEqual(import_job.privacy, 'public') + + import_items = models.ImportItem.objects.filter(job=import_job).all() + self.assertEqual(len(import_items), 3) + self.assertEqual(import_items[0].index, 0) + self.assertEqual(import_items[0].data['Book Id'], '5498194') + self.assertEqual(import_items[1].index, 1) + self.assertEqual(import_items[1].data['Book Id'], '5015319') + self.assertEqual(import_items[2].index, 2) + self.assertEqual(import_items[2].data['Book Id'], '5015399') + + + def test_create_retry_job(self): + ''' trying again with items that didn't import ''' + import_job = self.importer.create_job( + self.user, self.csv, False, 'unlisted') + import_items = models.ImportItem.objects.filter( + job=import_job + ).all()[:2] + + retry = self.importer.create_retry_job( + self.user, import_job, import_items) + self.assertNotEqual(import_job, retry) + self.assertEqual(retry.user, self.user) + self.assertEqual(retry.include_reviews, False) + self.assertEqual(retry.privacy, 'unlisted') + + retry_items = models.ImportItem.objects.filter(job=retry).all() + self.assertEqual(len(retry_items), 2) + self.assertEqual(retry_items[0].index, 0) + self.assertEqual(import_items[0].data['Book Id'], '5498194') + self.assertEqual(retry_items[1].index, 1) + self.assertEqual(retry_items[1].data['Book Id'], '5015319') + + + @responses.activate + def test_import_data(self): + ''' resolve entry ''' + import_job = self.importer.create_job( + self.user, self.csv, False, 'unlisted') + book = models.Edition.objects.create(title='Test Book') + + with patch( + 'bookwyrm.models.import_job.ImportItem.get_book_from_isbn' + ) as resolve: + resolve.return_value = book + with patch('bookwyrm.importer.handle_imported_book'): + importer.import_data(self.importer.service, import_job.id) + + import_item = models.ImportItem.objects.get(job=import_job, index=0) + self.assertEqual(import_item.book.id, book.id) + + + def test_handle_imported_book(self): + ''' librarything import added a book, this adds related connections ''' + shelf = self.user.shelf_set.filter(identifier='read').first() + self.assertIsNone(shelf.books.first()) + + import_job = models.ImportJob.objects.create(user=self.user) + datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv') + csv_file = open(datafile, 'r', encoding=self.importer.encoding) + for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))): + entry = self.importer.parse_fields(entry) + import_item = models.ImportItem.objects.create( + job_id=import_job.id, index=index, data=entry, book=self.book) + break + + with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): + importer.handle_imported_book( + self.importer.service, self.user, import_item, False, 'public') + + shelf.refresh_from_db() + self.assertEqual(shelf.books.first(), self.book) + + readthrough = models.ReadThrough.objects.get(user=self.user) + self.assertEqual(readthrough.book, self.book) + # I can't remember how to create dates and I don't want to look it up. + self.assertEqual(readthrough.start_date.year, 2007) + self.assertEqual(readthrough.start_date.month, 4) + self.assertEqual(readthrough.start_date.day, 16) + self.assertEqual(readthrough.finish_date.year, 2007) + self.assertEqual(readthrough.finish_date.month, 5) + self.assertEqual(readthrough.finish_date.day, 8) + + + def test_handle_imported_book_already_shelved(self): + ''' librarything import added a book, this adds related connections ''' + with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): + shelf = self.user.shelf_set.filter(identifier='to-read').first() + models.ShelfBook.objects.create( + shelf=shelf, user=self.user, book=self.book) + + import_job = models.ImportJob.objects.create(user=self.user) + datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv') + csv_file = open(datafile, 'r', encoding=self.importer.encoding) + for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))): + entry = self.importer.parse_fields(entry) + import_item = models.ImportItem.objects.create( + job_id=import_job.id, index=index, data=entry, book=self.book) + break + + with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): + importer.handle_imported_book( + self.importer.service, self.user, import_item, False, 'public') + + shelf.refresh_from_db() + self.assertEqual(shelf.books.first(), self.book) + self.assertIsNone( + self.user.shelf_set.get(identifier='read').books.first()) + readthrough = models.ReadThrough.objects.get(user=self.user) + self.assertEqual(readthrough.book, self.book) + self.assertEqual(readthrough.start_date.year, 2007) + self.assertEqual(readthrough.start_date.month, 4) + self.assertEqual(readthrough.start_date.day, 16) + self.assertEqual(readthrough.finish_date.year, 2007) + self.assertEqual(readthrough.finish_date.month, 5) + self.assertEqual(readthrough.finish_date.day, 8) + + + def test_handle_import_twice(self): + ''' re-importing books ''' + shelf = self.user.shelf_set.filter(identifier='read').first() + import_job = models.ImportJob.objects.create(user=self.user) + datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv') + csv_file = open(datafile, 'r', encoding=self.importer.encoding) + for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))): + entry = self.importer.parse_fields(entry) + import_item = models.ImportItem.objects.create( + job_id=import_job.id, index=index, data=entry, book=self.book) + break + + with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): + importer.handle_imported_book( + self.importer.service, self.user, import_item, False, 'public') + importer.handle_imported_book( + self.importer.service, self.user, import_item, False, 'public') + + shelf.refresh_from_db() + self.assertEqual(shelf.books.first(), self.book) + + readthrough = models.ReadThrough.objects.get(user=self.user) + self.assertEqual(readthrough.book, self.book) + # I can't remember how to create dates and I don't want to look it up. + self.assertEqual(readthrough.start_date.year, 2007) + self.assertEqual(readthrough.start_date.month, 4) + self.assertEqual(readthrough.start_date.day, 16) + self.assertEqual(readthrough.finish_date.year, 2007) + self.assertEqual(readthrough.finish_date.month, 5) + self.assertEqual(readthrough.finish_date.day, 8) + + + def test_handle_imported_book_review(self): + ''' librarything review import ''' + import_job = models.ImportJob.objects.create(user=self.user) + datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv') + csv_file = open(datafile, 'r', encoding=self.importer.encoding) + entry = list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))[0] + entry = self.importer.parse_fields(entry) + import_item = models.ImportItem.objects.create( + job_id=import_job.id, index=0, data=entry, book=self.book) + + with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): + importer.handle_imported_book( + self.importer.service, self.user, import_item, True, 'unlisted') + review = models.Review.objects.get(book=self.book, user=self.user) + self.assertEqual(review.content, 'chef d\'oeuvre') + self.assertEqual(review.rating, 5) + self.assertEqual(review.published_date.year, 2007) + self.assertEqual(review.published_date.month, 5) + self.assertEqual(review.published_date.day, 8) + self.assertEqual(review.privacy, 'unlisted') + + + def test_handle_imported_book_reviews_disabled(self): + ''' librarything review import ''' + import_job = models.ImportJob.objects.create(user=self.user) + datafile = pathlib.Path(__file__).parent.joinpath('data/librarything.tsv') + csv_file = open(datafile, 'r', encoding=self.importer.encoding) + entry = list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))[2] + entry = self.importer.parse_fields(entry) + import_item = models.ImportItem.objects.create( + job_id=import_job.id, index=0, data=entry, book=self.book) + + with patch('bookwyrm.models.activitypub_mixin.broadcast_task.delay'): + importer.handle_imported_book( + self.importer.service, self.user, import_item, False, 'unlisted') + self.assertFalse(models.Review.objects.filter( + book=self.book, user=self.user + ).exists()) diff --git a/bookwyrm/views/import_data.py b/bookwyrm/views/import_data.py index 8c7cee88..675cae3d 100644 --- a/bookwyrm/views/import_data.py +++ b/bookwyrm/views/import_data.py @@ -9,7 +9,7 @@ from django.template.response import TemplateResponse from django.utils.decorators import method_decorator from django.views import View -from bookwyrm import forms, goodreads_import, models +from bookwyrm import forms, goodreads_import, librarything_import, models from bookwyrm.tasks import app # pylint: disable= no-self-use @@ -31,18 +31,29 @@ class Import(View): if form.is_valid(): include_reviews = request.POST.get('include_reviews') == 'on' privacy = request.POST.get('privacy') + source = request.POST.get('source') + + importer = None + if source == 'LibraryThing': + importer = librarything_import.LibrarythingImporter() + else: + # Default : GoodReads + importer = goodreads_import.GoodreadsImporter() + try: - job = goodreads_import.create_job( + job = importer.create_job( request.user, TextIOWrapper( request.FILES['csv_file'], - encoding=request.encoding), + encoding=importer.encoding), include_reviews, privacy, ) except (UnicodeDecodeError, ValueError): return HttpResponseBadRequest('Not a valid csv file') - goodreads_import.start_import(job) + + importer.start_import(job) + return redirect('/import/%d' % job.id) return HttpResponseBadRequest() From bfedb300ca0856c1f00a66435b594625315cd5b4 Mon Sep 17 00:00:00 2001 From: Henri Bourcereau Date: Fri, 26 Feb 2021 13:32:17 +0100 Subject: [PATCH 02/15] librarything import : fix parsing & select box --- bookwyrm/importer.py | 4 ++-- bookwyrm/librarything_import.py | 9 ++++++--- bookwyrm/templates/import.html | 2 +- bookwyrm/tests/data/librarything.tsv | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/bookwyrm/importer.py b/bookwyrm/importer.py index b6e959cb..a1288400 100644 --- a/bookwyrm/importer.py +++ b/bookwyrm/importer.py @@ -12,7 +12,7 @@ class Importer: service = 'Unknown' delimiter = ',' encoding = 'UTF-8' - mandatory_fields = ['ISBN13', 'Title', 'Author'] + mandatory_fields = ['Title', 'Author'] def create_job(self, user, csv_file, include_reviews, privacy): ''' check over a csv and creates a database entry for the job''' @@ -23,7 +23,7 @@ class Importer: ) for index, entry in enumerate(list(csv.DictReader(csv_file, delimiter=self.delimiter ))): if not all(x in entry for x in self.mandatory_fields): - raise ValueError('Author, title, and isbn must be in data.') + raise ValueError('Author and title must be in data.') entry = self.parse_fields(entry) self.save_item(job, index, entry) return job diff --git a/bookwyrm/librarything_import.py b/bookwyrm/librarything_import.py index 019573ac..0584daad 100644 --- a/bookwyrm/librarything_import.py +++ b/bookwyrm/librarything_import.py @@ -12,8 +12,8 @@ class LibrarythingImporter(Importer): service = 'LibraryThing' delimiter = '\t' encoding = 'ISO-8859-1' - # mandatory_fields : fields matching the book ISBN13, title and author - mandatory_fields = ['ISBN', 'Title', 'Primary Author'] + # mandatory_fields : fields matching the book title and author + mandatory_fields = ['Title', 'Primary Author'] def parse_fields(self, initial): data = {} @@ -23,7 +23,10 @@ class LibrarythingImporter(Importer): data['Author'] = initial['Primary Author'] data['ISBN13'] = initial['ISBN'] data['My Review'] = initial['Review'] - data['My Rating'] = math.ceil(float(initial['Rating'])) + if initial['Rating']: + data['My Rating'] = math.ceil(float(initial['Rating'])) + else: + data['My Rating'] = '' data['Date Added'] = re.sub('\[|\]', '', initial['Entry Date']) data['Date Started'] = re.sub('\[|\]', '', initial['Date Started']) data['Date Read'] = re.sub('\[|\]', '', initial['Date Read']) diff --git a/bookwyrm/templates/import.html b/bookwyrm/templates/import.html index dd60ebff..a7fde76d 100644 --- a/bookwyrm/templates/import.html +++ b/bookwyrm/templates/import.html @@ -10,7 +10,7 @@

Data source

- +
{% endif %} @@ -54,21 +55,21 @@
{% if book.isbn_13 %}
-
ISBN:
+
{% trans "ISBN:" %}
{{ book.isbn_13 }}
{% endif %} {% if book.oclc_number %}
-
OCLC Number:
+
{% trans "OCLC Number:" %}
{{ book.oclc_number }}
{% endif %} {% if book.asin %}
-
ASIN:
+
{% trans "ASIN:" %}
{{ book.asin }}
{% endif %} @@ -80,7 +81,7 @@

{% if book.openlibrary_key %} -

View on OpenLibrary

+

{% trans "View on OpenLibrary" %}

{% endif %}
@@ -98,11 +99,11 @@
{% csrf_token %}

- +

- + {% include 'snippets/toggle/close_button.html' with text="Cancel" controls_text="add-description" controls_uid=book.id hide_inactive=True %}
@@ -134,20 +135,20 @@ {% if request.user.is_authenticated %}
-

Your reading activity

+

{% trans "Your reading activity" %}

{% include 'snippets/toggle/open_button.html' with text="Add read dates" icon="plus" class="is-small" controls_text="add-readthrough" %}
{% if not readthroughs.exists %} -

You don't have any reading activity for this book.

+

{% trans "You don't have any reading activity for this book." %}

{% endif %}