From 1f5d95e2be8c0f9db76b2b33fc9b5531a6ec3589 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 2 Jan 2021 09:42:33 -0800 Subject: [PATCH 1/4] Removes import limit --- bookwyrm/goodreads_import.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bookwyrm/goodreads_import.py b/bookwyrm/goodreads_import.py index 93fc1c48..b052ce99 100644 --- a/bookwyrm/goodreads_import.py +++ b/bookwyrm/goodreads_import.py @@ -8,8 +8,6 @@ from bookwyrm.models import ImportJob, ImportItem from bookwyrm.status import create_notification logger = logging.getLogger(__name__) -# TODO: remove or increase once we're confident it's not causing problems. -MAX_ENTRIES = 500 def create_job(user, csv_file, include_reviews, privacy): @@ -19,7 +17,7 @@ def create_job(user, csv_file, include_reviews, privacy): include_reviews=include_reviews, privacy=privacy ) - for index, entry in enumerate(list(csv.DictReader(csv_file))[:MAX_ENTRIES]): + for index, entry in enumerate(list(csv.DictReader(csv_file))): if not all(x in entry for x in ('ISBN13', 'Title', 'Author')): raise ValueError('Author, title, and isbn must be in data.') ImportItem(job=job, index=index, data=entry).save() From 204967e421156cd949079123b9c8c3e1b5c98051 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 2 Jan 2021 09:42:50 -0800 Subject: [PATCH 2/4] Adds tests file for goodreads import --- bookwyrm/tests/data/goodreads.csv | 4 +++ bookwyrm/tests/test_goodreads_import.py | 39 +++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 bookwyrm/tests/data/goodreads.csv create mode 100644 bookwyrm/tests/test_goodreads_import.py diff --git a/bookwyrm/tests/data/goodreads.csv b/bookwyrm/tests/data/goodreads.csv new file mode 100644 index 00000000..b96a0c26 --- /dev/null +++ b/bookwyrm/tests/data/goodreads.csv @@ -0,0 +1,4 @@ +Book Id,Title,Author,Author l-f,Additional Authors,ISBN,ISBN13,My Rating,Average Rating,Publisher,Binding,Number of Pages,Year Published,Original Publication Year,Date Read,Date Added,Bookshelves,Bookshelves with positions,Exclusive Shelf,My Review,Spoiler,Private Notes,Read Count,Recommended For,Recommended By,Owned Copies,Original Purchase Date,Original Purchase Location,Condition,Condition Description,BCID +42036538,Gideon the Ninth (The Locked Tomb #1),Tamsyn Muir,"Muir, Tamsyn",,"=""1250313198""","=""9781250313195""",0,4.20,Tor,Hardcover,448,2019,2019,2020/10/25,2020/10/21,,,read,,,,1,,,0,,,,, +52691223,Subcutanean,Aaron A. Reed,"Reed, Aaron A.",,"=""""","=""""",0,4.45,,Paperback,232,2020,,2020/03/06,2020/03/05,,,read,,,,1,,,0,,,,, +28694510,Patisserie at Home,Mélanie Dupuis,"Dupuis, Mélanie",Anne Cazor,"=""0062445316""","=""9780062445315""",2,4.60,Harper Design,Hardcover,288,2016,,,2019/07/08,,,read,"The good:
- Well illustrated and good photographs
- I loved the organization, with sections for base recipes like pie crust, full recipes, and skills
- I loved the madeleines and sweet pie crust recipe
- Very precise measurements

The bad:
- I found the index very hard to use, I would have much preferred a regular, alphabetical index of everything instead of breaking it down in to categories like a table of contents
- The primary unit of measure is ounces, which I found very hard to work with, and in fraction form which my food scale definitely does not do. One recipe calls for 1/32 oz, which I have absolutely no way of measuring out
- Some of the measurements were baffling, like 1/3 tablespoon. 1/3 tablespoon is 1 teaspoon, why would you write 1/3 tablespoon??
- The croissant dough recipe said to allow the pastry to get to room temperature before rolling which meant the butter squirted out and made a huge mess. I don't know why it said to do this??? Rolling works just fine if it's chilled.
- The financier recipe just tells you to add egg whites and has no other raising agent so if you just add the egg whites it will obviously not rise. Either there should have been a raising agent or the egg whites should have been beaten? I don't know.",,,2,,,0,,,,, diff --git a/bookwyrm/tests/test_goodreads_import.py b/bookwyrm/tests/test_goodreads_import.py new file mode 100644 index 00000000..312538a4 --- /dev/null +++ b/bookwyrm/tests/test_goodreads_import.py @@ -0,0 +1,39 @@ +''' testing import ''' +import pathlib + +from django.test import TestCase +import responses + +from bookwyrm import goodreads_import, models + +class GoodreadsImport(TestCase): + ''' importing from goodreads csv ''' + def setUp(self): + ''' use a test csv ''' + datafile = pathlib.Path(__file__).parent.joinpath( + 'data/goodreads.csv') + self.csv = open(datafile, 'r') + self.user = models.User.objects.create_user( + 'mouse', 'mouse@mouse.mouse', 'password', local=True) + + + def test_create_job(self): + ''' creates the import job entry and checks csv ''' + goodreads_import.create_job(self.user, self.csv, False, 'public') + import_job = models.ImportJob.objects.get() + self.assertEqual(import_job.user, self.user) + self.assertEqual(import_job.include_reviews, False) + self.assertEqual(import_job.privacy, 'public') + + import_items = models.ImportItem.objects.filter(job=import_job).all() + self.assertEqual(import_items[0].index, 0) + self.assertEqual(import_items[0].data['Book Id'], '42036538') + self.assertEqual(import_items[1].index, 1) + self.assertEqual(import_items[1].data['Book Id'], '52691223') + self.assertEqual(import_items[2].index, 2) + self.assertEqual(import_items[2].data['Book Id'], '28694510') + + + @responses.activate + def test_import_data(self): + ''' resolve entry ''' From 97b56e9bc28038a7d382ab56c9d2948dc10d4574 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 2 Jan 2021 09:45:51 -0800 Subject: [PATCH 3/4] Removes references to max entries in views --- bookwyrm/templates/import.html | 2 -- bookwyrm/views.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/bookwyrm/templates/import.html b/bookwyrm/templates/import.html index 8e3f5eb4..bfa8d3ec 100644 --- a/bookwyrm/templates/import.html +++ b/bookwyrm/templates/import.html @@ -21,8 +21,6 @@ -

- Imports are limited in size, and only the first {{ limit }} items will be imported.

diff --git a/bookwyrm/views.py b/bookwyrm/views.py index 9a0157b1..10137684 100644 --- a/bookwyrm/views.py +++ b/bookwyrm/views.py @@ -14,7 +14,6 @@ from django.views.decorators.http import require_GET from bookwyrm import outgoing from bookwyrm import forms, models -from bookwyrm import goodreads_import from bookwyrm.activitypub import ActivitypubResponse from bookwyrm.connectors import connector_manager from bookwyrm.settings import PAGE_LENGTH @@ -252,7 +251,6 @@ def import_page(request): 'import_form': forms.ImportForm(), 'jobs': models.ImportJob. objects.filter(user=request.user).order_by('-created_date'), - 'limit': goodreads_import.MAX_ENTRIES, }) From 3344eed3b978ca449f86bf4a75b052b304d4899c Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 2 Jan 2021 11:29:50 -0800 Subject: [PATCH 4/4] Tests for goodreads import lookup --- bookwyrm/connectors/self_connector.py | 3 +- bookwyrm/goodreads_import.py | 4 +- bookwyrm/tests/test_goodreads_import.py | 69 ++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index cad98249..7d0298be 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -9,8 +9,7 @@ from .abstract_connector import AbstractConnector, SearchResult class Connector(AbstractConnector): ''' instantiate a connector ''' def search(self, query, min_confidence=0.1): - ''' right now you can't search bookwyrm sorry, but when - that gets implemented it will totally rule ''' + ''' search your local database ''' vector = SearchVector('title', weight='A') +\ SearchVector('subtitle', weight='B') +\ SearchVector('authors__name', weight='C') +\ diff --git a/bookwyrm/goodreads_import.py b/bookwyrm/goodreads_import.py index b052ce99..9b8a4f01 100644 --- a/bookwyrm/goodreads_import.py +++ b/bookwyrm/goodreads_import.py @@ -23,6 +23,7 @@ def create_job(user, csv_file, include_reviews, privacy): ImportItem(job=job, index=index, data=entry).save() return job + def create_retry_job(user, original_job, items): ''' retry items that didn't import ''' job = ImportJob.objects.create( @@ -35,6 +36,7 @@ def create_retry_job(user, original_job, items): ImportItem(job=job, index=item.index, data=item.data).save() return job + def start_import(job): ''' initalizes a csv import job ''' result = import_data.delay(job.id) @@ -47,7 +49,6 @@ def import_data(job_id): ''' does the actual lookup work in a celery task ''' job = ImportJob.objects.get(id=job_id) try: - results = [] for item in job.items.all(): try: item.resolve() @@ -59,7 +60,6 @@ def import_data(job_id): if item.book: item.save() - results.append(item) # shelves book and handles reviews outgoing.handle_imported_book( diff --git a/bookwyrm/tests/test_goodreads_import.py b/bookwyrm/tests/test_goodreads_import.py index 312538a4..2518ab7b 100644 --- a/bookwyrm/tests/test_goodreads_import.py +++ b/bookwyrm/tests/test_goodreads_import.py @@ -1,10 +1,13 @@ ''' testing import ''' +from collections import namedtuple import pathlib +from unittest.mock import patch from django.test import TestCase import responses from bookwyrm import goodreads_import, models +from bookwyrm.settings import DOMAIN class GoodreadsImport(TestCase): ''' importing from goodreads csv ''' @@ -16,16 +19,29 @@ class GoodreadsImport(TestCase): self.user = models.User.objects.create_user( 'mouse', 'mouse@mouse.mouse', 'password', local=True) + models.Connector.objects.create( + identifier=DOMAIN, + name='Local', + local=True, + connector_file='self_connector', + base_url='https://%s' % DOMAIN, + books_url='https://%s/book' % DOMAIN, + covers_url='https://%s/images/covers' % DOMAIN, + search_url='https://%s/search?q=' % DOMAIN, + priority=1, + ) + def test_create_job(self): ''' creates the import job entry and checks csv ''' - goodreads_import.create_job(self.user, self.csv, False, 'public') - import_job = models.ImportJob.objects.get() + import_job = goodreads_import.create_job( + self.user, self.csv, False, 'public') self.assertEqual(import_job.user, self.user) self.assertEqual(import_job.include_reviews, False) self.assertEqual(import_job.privacy, 'public') import_items = models.ImportItem.objects.filter(job=import_job).all() + self.assertEqual(len(import_items), 3) self.assertEqual(import_items[0].index, 0) self.assertEqual(import_items[0].data['Book Id'], '42036538') self.assertEqual(import_items[1].index, 1) @@ -34,6 +50,55 @@ class GoodreadsImport(TestCase): self.assertEqual(import_items[2].data['Book Id'], '28694510') + def test_create_retry_job(self): + ''' trying again with items that didn't import ''' + import_job = goodreads_import.create_job( + self.user, self.csv, False, 'unlisted') + import_items = models.ImportItem.objects.filter( + job=import_job + ).all()[:2] + + retry = goodreads_import.create_retry_job( + self.user, import_job, import_items) + self.assertNotEqual(import_job, retry) + self.assertEqual(retry.user, self.user) + self.assertEqual(retry.include_reviews, False) + self.assertEqual(retry.privacy, 'unlisted') + + retry_items = models.ImportItem.objects.filter(job=retry).all() + self.assertEqual(len(retry_items), 2) + self.assertEqual(retry_items[0].index, 0) + self.assertEqual(retry_items[0].data['Book Id'], '42036538') + self.assertEqual(retry_items[1].index, 1) + self.assertEqual(retry_items[1].data['Book Id'], '52691223') + + + def test_start_import(self): + ''' begin loading books ''' + import_job = goodreads_import.create_job( + self.user, self.csv, False, 'unlisted') + MockTask = namedtuple('Task', ('id')) + mock_task = MockTask(7) + with patch('bookwyrm.goodreads_import.import_data.delay') as start: + start.return_value = mock_task + goodreads_import.start_import(import_job) + import_job.refresh_from_db() + self.assertEqual(import_job.task_id, '7') + + @responses.activate def test_import_data(self): ''' resolve entry ''' + import_job = goodreads_import.create_job( + self.user, self.csv, False, 'unlisted') + book = models.Edition.objects.create(title='Test Book') + + with patch( + 'bookwyrm.models.import_job.ImportItem.get_book_from_isbn' + ) as resolve: + resolve.return_value = book + with patch('bookwyrm.outgoing.handle_imported_book'): + goodreads_import.import_data(import_job.id) + + import_item = models.ImportItem.objects.get(job=import_job, index=0) + self.assertEqual(import_item.book.id, book.id)