moviewyrm/bookwyrm/tests/importers/test_goodreads_import.py

249 lines
11 KiB
Python
Raw Normal View History

2021-03-08 16:49:10 +00:00
""" testing import """
2021-01-02 19:29:50 +00:00
from collections import namedtuple
2021-01-13 21:54:01 +00:00
import csv
2021-01-02 17:42:50 +00:00
import pathlib
2021-01-02 19:29:50 +00:00
from unittest.mock import patch
2021-01-02 17:42:50 +00:00
from django.test import TestCase
import responses
2021-03-30 15:56:25 +00:00
from bookwyrm import models
2021-03-30 16:04:11 +00:00
from bookwyrm.importers import GoodreadsImporter
from bookwyrm.importers.importer import import_data, handle_imported_book
2021-01-02 19:29:50 +00:00
from bookwyrm.settings import DOMAIN
2021-01-02 17:42:50 +00:00
2021-03-08 16:49:10 +00:00
2021-01-02 17:42:50 +00:00
class GoodreadsImport(TestCase):
2021-03-08 16:49:10 +00:00
""" importing from goodreads csv """
2021-01-02 17:42:50 +00:00
def setUp(self):
2021-03-08 16:49:10 +00:00
""" use a test csv """
2021-03-21 19:30:53 +00:00
self.importer = GoodreadsImporter()
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
self.csv = open(datafile, "r", encoding=self.importer.encoding)
2021-01-02 17:42:50 +00:00
self.user = models.User.objects.create_user(
2021-03-08 16:49:10 +00:00
"mouse", "mouse@mouse.mouse", "password", local=True
)
2021-01-02 17:42:50 +00:00
2021-01-02 19:29:50 +00:00
models.Connector.objects.create(
identifier=DOMAIN,
2021-03-08 16:49:10 +00:00
name="Local",
2021-01-02 19:29:50 +00:00
local=True,
2021-03-08 16:49:10 +00:00
connector_file="self_connector",
base_url="https://%s" % DOMAIN,
books_url="https://%s/book" % DOMAIN,
covers_url="https://%s/images/covers" % DOMAIN,
search_url="https://%s/search?q=" % DOMAIN,
2021-01-02 19:29:50 +00:00
priority=1,
)
2021-03-08 16:49:10 +00:00
work = models.Work.objects.create(title="Test Work")
2021-01-13 22:02:01 +00:00
self.book = models.Edition.objects.create(
2021-03-08 16:49:10 +00:00
title="Example Edition",
remote_id="https://example.com/book/1",
parent_work=work,
2021-01-13 22:02:01 +00:00
)
2021-01-02 19:29:50 +00:00
2021-01-02 17:42:50 +00:00
def test_create_job(self):
2021-03-08 16:49:10 +00:00
""" creates the import job entry and checks csv """
import_job = self.importer.create_job(self.user, self.csv, False, "public")
2021-01-02 17:42:50 +00:00
self.assertEqual(import_job.user, self.user)
self.assertEqual(import_job.include_reviews, False)
2021-03-08 16:49:10 +00:00
self.assertEqual(import_job.privacy, "public")
2021-01-02 17:42:50 +00:00
import_items = models.ImportItem.objects.filter(job=import_job).all()
2021-01-02 19:29:50 +00:00
self.assertEqual(len(import_items), 3)
2021-01-02 17:42:50 +00:00
self.assertEqual(import_items[0].index, 0)
2021-03-08 16:49:10 +00:00
self.assertEqual(import_items[0].data["Book Id"], "42036538")
2021-01-02 17:42:50 +00:00
self.assertEqual(import_items[1].index, 1)
2021-03-08 16:49:10 +00:00
self.assertEqual(import_items[1].data["Book Id"], "52691223")
2021-01-02 17:42:50 +00:00
self.assertEqual(import_items[2].index, 2)
2021-03-08 16:49:10 +00:00
self.assertEqual(import_items[2].data["Book Id"], "28694510")
2021-01-02 17:42:50 +00:00
2021-01-02 19:29:50 +00:00
def test_create_retry_job(self):
2021-03-08 16:49:10 +00:00
""" trying again with items that didn't import """
import_job = self.importer.create_job(self.user, self.csv, False, "unlisted")
import_items = models.ImportItem.objects.filter(job=import_job).all()[:2]
retry = self.importer.create_retry_job(self.user, import_job, import_items)
2021-01-02 19:29:50 +00:00
self.assertNotEqual(import_job, retry)
self.assertEqual(retry.user, self.user)
self.assertEqual(retry.include_reviews, False)
2021-03-08 16:49:10 +00:00
self.assertEqual(retry.privacy, "unlisted")
2021-01-02 19:29:50 +00:00
retry_items = models.ImportItem.objects.filter(job=retry).all()
self.assertEqual(len(retry_items), 2)
self.assertEqual(retry_items[0].index, 0)
2021-03-08 16:49:10 +00:00
self.assertEqual(retry_items[0].data["Book Id"], "42036538")
2021-01-02 19:29:50 +00:00
self.assertEqual(retry_items[1].index, 1)
2021-03-08 16:49:10 +00:00
self.assertEqual(retry_items[1].data["Book Id"], "52691223")
2021-01-02 19:29:50 +00:00
def test_start_import(self):
2021-03-08 16:49:10 +00:00
""" begin loading books """
import_job = self.importer.create_job(self.user, self.csv, False, "unlisted")
MockTask = namedtuple("Task", ("id"))
2021-01-02 19:29:50 +00:00
mock_task = MockTask(7)
2021-03-30 16:04:11 +00:00
with patch("bookwyrm.importers.importer.import_data.delay") as start:
2021-01-02 19:29:50 +00:00
start.return_value = mock_task
2021-02-20 16:02:36 +00:00
self.importer.start_import(import_job)
2021-01-02 19:29:50 +00:00
import_job.refresh_from_db()
2021-03-08 16:49:10 +00:00
self.assertEqual(import_job.task_id, "7")
2021-01-02 19:29:50 +00:00
2021-01-02 17:42:50 +00:00
@responses.activate
def test_import_data(self):
2021-03-08 16:49:10 +00:00
""" resolve entry """
import_job = self.importer.create_job(self.user, self.csv, False, "unlisted")
book = models.Edition.objects.create(title="Test Book")
2021-01-02 19:29:50 +00:00
with patch(
2021-03-08 16:49:10 +00:00
"bookwyrm.models.import_job.ImportItem.get_book_from_isbn"
) as resolve:
2021-01-02 19:29:50 +00:00
resolve.return_value = book
2021-03-30 16:04:11 +00:00
with patch("bookwyrm.importers.importer.handle_imported_book"):
import_data(self.importer.service, import_job.id)
2021-01-02 19:29:50 +00:00
import_item = models.ImportItem.objects.get(job=import_job, index=0)
self.assertEqual(import_item.book.id, book.id)
2021-01-13 21:54:01 +00:00
def test_handle_imported_book(self):
2021-03-08 16:49:10 +00:00
""" goodreads import added a book, this adds related connections """
shelf = self.user.shelf_set.filter(identifier="read").first()
2021-01-13 21:54:01 +00:00
self.assertIsNone(shelf.books.first())
2021-01-13 22:02:01 +00:00
import_job = models.ImportJob.objects.create(user=self.user)
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
csv_file = open(datafile, "r")
2021-01-13 21:54:01 +00:00
for index, entry in enumerate(list(csv.DictReader(csv_file))):
2021-02-20 16:02:36 +00:00
entry = self.importer.parse_fields(entry)
2021-01-13 21:54:01 +00:00
import_item = models.ImportItem.objects.create(
2021-03-08 16:49:10 +00:00
job_id=import_job.id, index=index, data=entry, book=self.book
)
2021-01-13 21:54:01 +00:00
break
2021-03-08 16:49:10 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
2021-03-30 16:04:11 +00:00
handle_imported_book(
2021-03-08 16:49:10 +00:00
self.importer.service, self.user, import_item, False, "public"
)
2021-01-13 21:54:01 +00:00
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
2021-01-13 22:02:01 +00:00
readthrough = models.ReadThrough.objects.get(user=self.user)
2021-01-13 21:54:01 +00:00
self.assertEqual(readthrough.book, self.book)
# I can't remember how to create dates and I don't want to look it up.
self.assertEqual(readthrough.start_date.year, 2020)
self.assertEqual(readthrough.start_date.month, 10)
self.assertEqual(readthrough.start_date.day, 21)
self.assertEqual(readthrough.finish_date.year, 2020)
self.assertEqual(readthrough.finish_date.month, 10)
self.assertEqual(readthrough.finish_date.day, 25)
def test_handle_imported_book_already_shelved(self):
2021-03-08 16:49:10 +00:00
""" goodreads import added a book, this adds related connections """
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
shelf = self.user.shelf_set.filter(identifier="to-read").first()
models.ShelfBook.objects.create(shelf=shelf, user=self.user, book=self.book)
2021-01-13 21:54:01 +00:00
2021-01-13 22:02:01 +00:00
import_job = models.ImportJob.objects.create(user=self.user)
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
csv_file = open(datafile, "r")
2021-01-13 21:54:01 +00:00
for index, entry in enumerate(list(csv.DictReader(csv_file))):
2021-02-20 16:02:36 +00:00
entry = self.importer.parse_fields(entry)
2021-01-13 21:54:01 +00:00
import_item = models.ImportItem.objects.create(
2021-03-08 16:49:10 +00:00
job_id=import_job.id, index=index, data=entry, book=self.book
)
2021-01-13 21:54:01 +00:00
break
2021-03-08 16:49:10 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
2021-03-30 16:04:11 +00:00
handle_imported_book(
2021-03-08 16:49:10 +00:00
self.importer.service, self.user, import_item, False, "public"
)
2021-01-13 21:54:01 +00:00
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
2021-03-08 16:49:10 +00:00
self.assertIsNone(self.user.shelf_set.get(identifier="read").books.first())
2021-01-13 22:02:01 +00:00
readthrough = models.ReadThrough.objects.get(user=self.user)
2021-01-13 21:54:01 +00:00
self.assertEqual(readthrough.book, self.book)
self.assertEqual(readthrough.start_date.year, 2020)
self.assertEqual(readthrough.start_date.month, 10)
self.assertEqual(readthrough.start_date.day, 21)
self.assertEqual(readthrough.finish_date.year, 2020)
self.assertEqual(readthrough.finish_date.month, 10)
self.assertEqual(readthrough.finish_date.day, 25)
def test_handle_import_twice(self):
2021-03-08 16:49:10 +00:00
""" re-importing books """
shelf = self.user.shelf_set.filter(identifier="read").first()
2021-01-13 22:02:01 +00:00
import_job = models.ImportJob.objects.create(user=self.user)
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
csv_file = open(datafile, "r")
2021-01-13 21:54:01 +00:00
for index, entry in enumerate(list(csv.DictReader(csv_file))):
2021-02-20 16:02:36 +00:00
entry = self.importer.parse_fields(entry)
2021-01-13 21:54:01 +00:00
import_item = models.ImportItem.objects.create(
2021-03-08 16:49:10 +00:00
job_id=import_job.id, index=index, data=entry, book=self.book
)
2021-01-13 21:54:01 +00:00
break
2021-03-08 16:49:10 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
2021-03-30 16:04:11 +00:00
handle_imported_book(
2021-03-08 16:49:10 +00:00
self.importer.service, self.user, import_item, False, "public"
)
2021-03-30 16:04:11 +00:00
handle_imported_book(
2021-03-08 16:49:10 +00:00
self.importer.service, self.user, import_item, False, "public"
)
2021-01-13 21:54:01 +00:00
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
2021-01-13 22:02:01 +00:00
readthrough = models.ReadThrough.objects.get(user=self.user)
2021-01-13 21:54:01 +00:00
self.assertEqual(readthrough.book, self.book)
# I can't remember how to create dates and I don't want to look it up.
self.assertEqual(readthrough.start_date.year, 2020)
self.assertEqual(readthrough.start_date.month, 10)
self.assertEqual(readthrough.start_date.day, 21)
self.assertEqual(readthrough.finish_date.year, 2020)
self.assertEqual(readthrough.finish_date.month, 10)
self.assertEqual(readthrough.finish_date.day, 25)
2021-03-23 16:19:11 +00:00
@patch("bookwyrm.activitystreams.ActivityStream.add_status")
def test_handle_imported_book_review(self, _):
2021-03-08 16:49:10 +00:00
""" goodreads review import """
2021-01-13 22:02:01 +00:00
import_job = models.ImportJob.objects.create(user=self.user)
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
csv_file = open(datafile, "r")
2021-01-13 21:54:01 +00:00
entry = list(csv.DictReader(csv_file))[2]
2021-02-20 16:02:36 +00:00
entry = self.importer.parse_fields(entry)
2021-01-13 21:54:01 +00:00
import_item = models.ImportItem.objects.create(
2021-03-08 16:49:10 +00:00
job_id=import_job.id, index=0, data=entry, book=self.book
)
2021-01-13 21:54:01 +00:00
2021-03-08 16:49:10 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
2021-03-30 16:04:11 +00:00
handle_imported_book(
2021-03-08 16:49:10 +00:00
self.importer.service, self.user, import_item, True, "unlisted"
)
2021-01-13 22:02:01 +00:00
review = models.Review.objects.get(book=self.book, user=self.user)
2021-03-08 16:49:10 +00:00
self.assertEqual(review.content, "mixed feelings")
2021-01-13 21:54:01 +00:00
self.assertEqual(review.rating, 2)
self.assertEqual(review.published_date.year, 2019)
self.assertEqual(review.published_date.month, 7)
self.assertEqual(review.published_date.day, 8)
2021-03-08 16:49:10 +00:00
self.assertEqual(review.privacy, "unlisted")
2021-01-13 21:54:01 +00:00
def test_handle_imported_book_reviews_disabled(self):
2021-03-08 16:49:10 +00:00
""" goodreads review import """
2021-01-13 22:02:01 +00:00
import_job = models.ImportJob.objects.create(user=self.user)
2021-03-30 16:04:11 +00:00
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
2021-03-08 16:49:10 +00:00
csv_file = open(datafile, "r")
2021-01-13 21:54:01 +00:00
entry = list(csv.DictReader(csv_file))[2]
2021-02-20 16:02:36 +00:00
entry = self.importer.parse_fields(entry)
2021-01-13 21:54:01 +00:00
import_item = models.ImportItem.objects.create(
2021-03-08 16:49:10 +00:00
job_id=import_job.id, index=0, data=entry, book=self.book
)
2021-01-13 21:54:01 +00:00
2021-03-08 16:49:10 +00:00
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
2021-03-30 16:04:11 +00:00
handle_imported_book(
2021-03-08 16:49:10 +00:00
self.importer.service, self.user, import_item, False, "unlisted"
)
self.assertFalse(
models.Review.objects.filter(book=self.book, user=self.user).exists()
)