moviewyrm/bookwyrm/tests/importers/test_goodreads_import.py

286 lines
12 KiB
Python
Raw Normal View History

2021-03-08 16:49:10 +00:00
""" testing import """
2021-01-02 19:29:50 +00:00
from collections import namedtuple
2021-01-13 21:54:01 +00:00
import csv
2021-01-02 17:42:50 +00:00
import pathlib
2021-01-02 19:29:50 +00:00
from unittest.mock import patch
import datetime
import pytz
2021-01-02 17:42:50 +00:00
from django.test import TestCase
import responses
2021-03-30 15:56:25 +00:00
from bookwyrm import models
2021-03-30 16:04:11 +00:00
from bookwyrm.importers import GoodreadsImporter
from bookwyrm.importers.importer import import_data, handle_imported_book
2021-01-02 19:29:50 +00:00
from bookwyrm.settings import DOMAIN
2021-01-02 17:42:50 +00:00
2021-03-08 16:49:10 +00:00
def make_date(*args):
"""helper function to easily generate a date obj"""
return datetime.datetime(*args, tzinfo=pytz.UTC)
# pylint: disable=consider-using-with
@patch("bookwyrm.suggested_users.rerank_suggestions_task.delay")
2021-09-06 21:50:33 +00:00
@patch("bookwyrm.activitystreams.populate_stream_task.delay")
2021-09-06 23:59:58 +00:00
@patch("bookwyrm.activitystreams.add_book_statuses_task.delay")
2021-01-02 17:42:50 +00:00
class GoodreadsImport(TestCase):
2021-04-26 16:15:42 +00:00
"""importing from goodreads csv"""
2021-03-08 16:49:10 +00:00
2021-01-02 17:42:50 +00:00
def setUp(self):
2021-04-26 16:15:42 +00:00
"""use a test csv"""
2021-03-21 19:30:53 +00:00
self.importer = GoodreadsImporter()
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
self.csv = open(datafile, "r", encoding=self.importer.encoding)
with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch(
"bookwyrm.activitystreams.populate_stream_task.delay"
):
self.user = models.User.objects.create_user(
"mouse", "mouse@mouse.mouse", "password", local=True
)
2021-01-02 17:42:50 +00:00
2021-01-02 19:29:50 +00:00
models.Connector.objects.create(
identifier=DOMAIN,
2021-03-08 16:49:10 +00:00
name="Local",
2021-01-02 19:29:50 +00:00
local=True,
2021-03-08 16:49:10 +00:00
connector_file="self_connector",
base_url="https://%s" % DOMAIN,
books_url="https://%s/book" % DOMAIN,
covers_url="https://%s/images/covers" % DOMAIN,
search_url="https://%s/search?q=" % DOMAIN,
2021-01-02 19:29:50 +00:00
priority=1,
)
2021-08-02 23:05:40 +00:00
work = models.Work.objects.create(title="Test Work")
self.book = models.Edition.objects.create(
title="Example Edition",
remote_id="https://example.com/book/1",
parent_work=work,
)
2021-01-02 19:29:50 +00:00
2021-09-06 22:09:04 +00:00
def test_create_job(self, *_):
2021-04-26 16:15:42 +00:00
"""creates the import job entry and checks csv"""
2021-03-08 16:49:10 +00:00
import_job = self.importer.create_job(self.user, self.csv, False, "public")
2021-01-02 17:42:50 +00:00
self.assertEqual(import_job.user, self.user)
self.assertEqual(import_job.include_reviews, False)
2021-03-08 16:49:10 +00:00
self.assertEqual(import_job.privacy, "public")
2021-01-02 17:42:50 +00:00
import_items = models.ImportItem.objects.filter(job=import_job).all()
2021-01-02 19:29:50 +00:00
self.assertEqual(len(import_items), 3)
2021-01-02 17:42:50 +00:00
self.assertEqual(import_items[0].index, 0)
2021-03-08 16:49:10 +00:00
self.assertEqual(import_items[0].data["Book Id"], "42036538")
2021-01-02 17:42:50 +00:00
self.assertEqual(import_items[1].index, 1)
2021-03-08 16:49:10 +00:00
self.assertEqual(import_items[1].data["Book Id"], "52691223")
2021-01-02 17:42:50 +00:00
self.assertEqual(import_items[2].index, 2)
2021-03-08 16:49:10 +00:00
self.assertEqual(import_items[2].data["Book Id"], "28694510")
2021-01-02 17:42:50 +00:00
2021-09-06 22:09:04 +00:00
def test_create_retry_job(self, *_):
2021-04-26 16:15:42 +00:00
"""trying again with items that didn't import"""
2021-03-08 16:49:10 +00:00
import_job = self.importer.create_job(self.user, self.csv, False, "unlisted")
import_items = models.ImportItem.objects.filter(job=import_job).all()[:2]
retry = self.importer.create_retry_job(self.user, import_job, import_items)
2021-01-02 19:29:50 +00:00
self.assertNotEqual(import_job, retry)
self.assertEqual(retry.user, self.user)
self.assertEqual(retry.include_reviews, False)
2021-03-08 16:49:10 +00:00
self.assertEqual(retry.privacy, "unlisted")
2021-01-02 19:29:50 +00:00
retry_items = models.ImportItem.objects.filter(job=retry).all()
self.assertEqual(len(retry_items), 2)
self.assertEqual(retry_items[0].index, 0)
2021-03-08 16:49:10 +00:00
self.assertEqual(retry_items[0].data["Book Id"], "42036538")
2021-01-02 19:29:50 +00:00
self.assertEqual(retry_items[1].index, 1)
2021-03-08 16:49:10 +00:00
self.assertEqual(retry_items[1].data["Book Id"], "52691223")
2021-01-02 19:29:50 +00:00
2021-09-06 22:09:04 +00:00
def test_start_import(self, *_):
2021-04-26 16:15:42 +00:00
"""begin loading books"""
2021-03-08 16:49:10 +00:00
import_job = self.importer.create_job(self.user, self.csv, False, "unlisted")
MockTask = namedtuple("Task", ("id"))
2021-01-02 19:29:50 +00:00
mock_task = MockTask(7)
2021-03-30 16:04:11 +00:00
with patch("bookwyrm.importers.importer.import_data.delay") as start:
2021-01-02 19:29:50 +00:00
start.return_value = mock_task
2021-02-20 16:02:36 +00:00
self.importer.start_import(import_job)
2021-01-02 19:29:50 +00:00
import_job.refresh_from_db()
2021-03-08 16:49:10 +00:00
self.assertEqual(import_job.task_id, "7")
2021-01-02 19:29:50 +00:00
2021-01-02 17:42:50 +00:00
@responses.activate
2021-09-06 22:09:04 +00:00
def test_import_data(self, *_):
2021-04-26 16:15:42 +00:00
"""resolve entry"""
2021-03-08 16:49:10 +00:00
import_job = self.importer.create_job(self.user, self.csv, False, "unlisted")
2021-08-02 23:05:40 +00:00
book = models.Edition.objects.create(title="Test Book")
2021-01-02 19:29:50 +00:00
with patch(
2021-03-08 16:49:10 +00:00
"bookwyrm.models.import_job.ImportItem.get_book_from_isbn"
) as resolve:
2021-01-02 19:29:50 +00:00
resolve.return_value = book
2021-03-30 16:04:11 +00:00
with patch("bookwyrm.importers.importer.handle_imported_book"):
import_data(self.importer.service, import_job.id)
2021-01-02 19:29:50 +00:00
import_item = models.ImportItem.objects.get(job=import_job, index=0)
self.assertEqual(import_item.book.id, book.id)
2021-01-13 21:54:01 +00:00
2021-09-06 22:09:04 +00:00
def test_handle_imported_book(self, *_):
2021-04-26 16:15:42 +00:00
"""goodreads import added a book, this adds related connections"""
2021-03-08 16:49:10 +00:00
shelf = self.user.shelf_set.filter(identifier="read").first()
2021-01-13 21:54:01 +00:00
self.assertIsNone(shelf.books.first())
2021-01-13 22:02:01 +00:00
import_job = models.ImportJob.objects.create(user=self.user)
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
csv_file = open(datafile, "r")
2021-01-13 21:54:01 +00:00
for index, entry in enumerate(list(csv.DictReader(csv_file))):
2021-02-20 16:02:36 +00:00
entry = self.importer.parse_fields(entry)
2021-01-13 21:54:01 +00:00
import_item = models.ImportItem.objects.create(
2021-03-08 16:49:10 +00:00
job_id=import_job.id, index=index, data=entry, book=self.book
)
2021-01-13 21:54:01 +00:00
break
2021-08-02 23:05:40 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
handle_imported_book(
self.importer.service, self.user, import_item, False, "public"
)
2021-01-13 21:54:01 +00:00
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
self.assertEqual(
shelf.shelfbook_set.first().shelved_date, make_date(2020, 10, 21)
)
2021-01-13 21:54:01 +00:00
2021-01-13 22:02:01 +00:00
readthrough = models.ReadThrough.objects.get(user=self.user)
2021-01-13 21:54:01 +00:00
self.assertEqual(readthrough.book, self.book)
self.assertEqual(readthrough.start_date, make_date(2020, 10, 21))
self.assertEqual(readthrough.finish_date, make_date(2020, 10, 25))
2021-01-13 21:54:01 +00:00
2021-09-06 22:09:04 +00:00
def test_handle_imported_book_already_shelved(self, *_):
2021-04-26 16:15:42 +00:00
"""goodreads import added a book, this adds related connections"""
2021-03-08 16:49:10 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
shelf = self.user.shelf_set.filter(identifier="to-read").first()
models.ShelfBook.objects.create(
shelf=shelf,
user=self.user,
book=self.book,
shelved_date=make_date(2020, 2, 2),
)
2021-01-13 21:54:01 +00:00
2021-01-13 22:02:01 +00:00
import_job = models.ImportJob.objects.create(user=self.user)
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
csv_file = open(datafile, "r")
2021-01-13 21:54:01 +00:00
for index, entry in enumerate(list(csv.DictReader(csv_file))):
2021-02-20 16:02:36 +00:00
entry = self.importer.parse_fields(entry)
2021-01-13 21:54:01 +00:00
import_item = models.ImportItem.objects.create(
2021-03-08 16:49:10 +00:00
job_id=import_job.id, index=index, data=entry, book=self.book
)
2021-01-13 21:54:01 +00:00
break
2021-03-08 16:49:10 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
2021-03-30 16:04:11 +00:00
handle_imported_book(
2021-03-08 16:49:10 +00:00
self.importer.service, self.user, import_item, False, "public"
)
2021-01-13 21:54:01 +00:00
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
self.assertEqual(
shelf.shelfbook_set.first().shelved_date, make_date(2020, 2, 2)
)
2021-03-08 16:49:10 +00:00
self.assertIsNone(self.user.shelf_set.get(identifier="read").books.first())
2021-01-13 22:02:01 +00:00
readthrough = models.ReadThrough.objects.get(user=self.user)
2021-01-13 21:54:01 +00:00
self.assertEqual(readthrough.book, self.book)
self.assertEqual(readthrough.start_date, make_date(2020, 10, 21))
self.assertEqual(readthrough.finish_date, make_date(2020, 10, 25))
2021-01-13 21:54:01 +00:00
2021-09-06 22:09:04 +00:00
def test_handle_import_twice(self, *_):
2021-04-26 16:15:42 +00:00
"""re-importing books"""
2021-03-08 16:49:10 +00:00
shelf = self.user.shelf_set.filter(identifier="read").first()
2021-01-13 22:02:01 +00:00
import_job = models.ImportJob.objects.create(user=self.user)
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
csv_file = open(datafile, "r")
2021-01-13 21:54:01 +00:00
for index, entry in enumerate(list(csv.DictReader(csv_file))):
2021-02-20 16:02:36 +00:00
entry = self.importer.parse_fields(entry)
2021-01-13 21:54:01 +00:00
import_item = models.ImportItem.objects.create(
2021-03-08 16:49:10 +00:00
job_id=import_job.id, index=index, data=entry, book=self.book
)
2021-01-13 21:54:01 +00:00
break
2021-08-02 23:05:40 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
handle_imported_book(
self.importer.service, self.user, import_item, False, "public"
)
handle_imported_book(
self.importer.service, self.user, import_item, False, "public"
)
2021-01-13 21:54:01 +00:00
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
self.assertEqual(
shelf.shelfbook_set.first().shelved_date, make_date(2020, 10, 21)
)
2021-01-13 21:54:01 +00:00
2021-01-13 22:02:01 +00:00
readthrough = models.ReadThrough.objects.get(user=self.user)
2021-01-13 21:54:01 +00:00
self.assertEqual(readthrough.book, self.book)
self.assertEqual(readthrough.start_date, make_date(2020, 10, 21))
self.assertEqual(readthrough.finish_date, make_date(2020, 10, 25))
2021-01-13 21:54:01 +00:00
2021-09-06 20:53:49 +00:00
@patch("bookwyrm.activitystreams.add_status_task.delay")
def test_handle_imported_book_review(self, *_):
2021-04-26 16:15:42 +00:00
"""goodreads review import"""
2021-01-13 22:02:01 +00:00
import_job = models.ImportJob.objects.create(user=self.user)
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
csv_file = open(datafile, "r")
2021-01-13 21:54:01 +00:00
entry = list(csv.DictReader(csv_file))[2]
2021-02-20 16:02:36 +00:00
entry = self.importer.parse_fields(entry)
2021-01-13 21:54:01 +00:00
import_item = models.ImportItem.objects.create(
2021-03-08 16:49:10 +00:00
job_id=import_job.id, index=0, data=entry, book=self.book
)
2021-01-13 21:54:01 +00:00
2021-08-02 23:05:40 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
handle_imported_book(
self.importer.service, self.user, import_item, True, "unlisted"
)
2021-01-13 22:02:01 +00:00
review = models.Review.objects.get(book=self.book, user=self.user)
2021-03-08 16:49:10 +00:00
self.assertEqual(review.content, "mixed feelings")
2021-01-13 21:54:01 +00:00
self.assertEqual(review.rating, 2)
self.assertEqual(review.published_date, make_date(2019, 7, 8))
2021-04-26 20:48:54 +00:00
self.assertEqual(review.privacy, "unlisted")
2021-09-06 20:53:49 +00:00
@patch("bookwyrm.activitystreams.add_status_task.delay")
def test_handle_imported_book_rating(self, *_):
2021-04-26 20:48:54 +00:00
"""goodreads rating import"""
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath(
"../data/goodreads-rating.csv"
)
csv_file = open(datafile, "r")
entry = list(csv.DictReader(csv_file))[2]
entry = self.importer.parse_fields(entry)
import_item = models.ImportItem.objects.create(
job_id=import_job.id, index=0, data=entry, book=self.book
)
2021-08-02 23:05:40 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
handle_imported_book(
self.importer.service, self.user, import_item, True, "unlisted"
)
2021-04-26 20:48:54 +00:00
review = models.ReviewRating.objects.get(book=self.book, user=self.user)
self.assertIsInstance(review, models.ReviewRating)
self.assertEqual(review.rating, 2)
self.assertEqual(review.published_date, make_date(2019, 7, 8))
2021-03-08 16:49:10 +00:00
self.assertEqual(review.privacy, "unlisted")
2021-01-13 21:54:01 +00:00
2021-09-06 22:09:04 +00:00
def test_handle_imported_book_reviews_disabled(self, *_):
2021-04-26 16:15:42 +00:00
"""goodreads review import"""
2021-01-13 22:02:01 +00:00
import_job = models.ImportJob.objects.create(user=self.user)
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
csv_file = open(datafile, "r")
2021-01-13 21:54:01 +00:00
entry = list(csv.DictReader(csv_file))[2]
2021-02-20 16:02:36 +00:00
entry = self.importer.parse_fields(entry)
2021-01-13 21:54:01 +00:00
import_item = models.ImportItem.objects.create(
2021-03-08 16:49:10 +00:00
job_id=import_job.id, index=0, data=entry, book=self.book
)
2021-01-13 21:54:01 +00:00
2021-08-02 23:05:40 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
handle_imported_book(
self.importer.service, self.user, import_item, False, "unlisted"
)
2021-03-08 16:49:10 +00:00
self.assertFalse(
models.Review.objects.filter(book=self.book, user=self.user).exists()
)