Merge pull request #3511 from ilkka-ollakka/feat/openreads_import

importer: add openreads importer
This commit is contained in:
Hugh Rundle 2025-04-01 06:51:01 +11:00 committed by GitHub
commit 173358867a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 268 additions and 0 deletions

View file

@ -7,3 +7,4 @@ from .goodreads_import import GoodreadsImporter
from .librarything_import import LibrarythingImporter
from .openlibrary_import import OpenLibraryImporter
from .storygraph_import import StorygraphImporter
from .openreads_import import OpenReadsImporter

View file

@ -0,0 +1,60 @@
""" handle reading a csv from openreads"""
from typing import Any, Optional
from datetime import datetime
from bookwyrm.models import Shelf
from . import Importer
def parse_iso_timestamp(iso_date: str | None) -> None | str:
"""Parse iso timestamp and return iso-formated date"""
if not iso_date:
return iso_date
return datetime.fromisoformat(iso_date).date().isoformat()
class OpenReadsImporter(Importer):
"""csv downloads from OpenLibrary"""
service = "OpenReads"
def __init__(self, *args: Any, **kwargs: Any):
self.row_mappings_guesses.append(("openlibrary_key", ["olid"]))
self.row_mappings_guesses.append(("pages", ["pages"]))
self.row_mappings_guesses.append(("description", ["description"]))
self.row_mappings_guesses.append(("physical_format", ["book_format"]))
self.row_mappings_guesses.append(("published_date", ["publication_year"]))
super().__init__(*args, **kwargs)
def normalize_row(
self, entry: dict[str, str], mappings: dict[str, Optional[str]]
) -> dict[str, Optional[str]]:
normalized = {k: entry.get(v) if v else None for k, v in mappings.items()}
reading_list = value.split(";") if (value := entry.get("readings")) else []
if reading_list:
if reading_dates := reading_list[0].split("|"):
normalized["date_started"] = (
parse_iso_timestamp(reading_dates[0]) or None
)
normalized["date_finished"] = (
parse_iso_timestamp(reading_dates[1]) or None
)
if date_added := normalized.get("date_added"):
normalized["date_added"] = parse_iso_timestamp(date_added)
if read_status := entry.get("status"):
match read_status:
case "finished":
normalized["shelf"] = Shelf.READ_FINISHED
case "in_progress":
normalized["shelf"] = Shelf.READING
case "abandoned":
normalized["shelf"] = Shelf.STOPPED_READING
return normalized
def get_shelf(self, normalized_row: dict[str, Optional[str]]) -> Optional[str]:
if normalized_row["date_finished"]:
return Shelf.READ_FINISHED
if normalized_row["date_started"]:
return Shelf.READING
return Shelf.TO_READ

View file

@ -66,6 +66,9 @@
<option value="OpenLibrary" {% if current == 'OpenLibrary' %}selected{% endif %}>
{% trans "OpenLibrary (CSV)" %}
</option>
<option value="OpenReads" {% if current == 'OpenLibrary' %}selected{% endif %}>
{% trans "OpenReads (CSV)" %}
</option>
<option value="Calibre" {% if current == 'Calibre' %}selected{% endif %}>
{% trans "Calibre (CSV)" %}
</option>

View file

@ -0,0 +1,9 @@
title,subtitle,author,description,status,favourite,deleted,rating,pages,publication_year,isbn,olid,tags,my_review,notes,book_format,readings,date_added,date_modified
Wild Flowers Electric Beasts,,Alina Leonova,,planned,false,false,,,2023,,,1|||||Solarpunk,,,paperback,,2024-03-01T13:01:12.937,2024-03-01T13:01:12.937
Permanent Record,,Edward Snowden,"Edward Snowden, the man who risked everything to expose the US governments system of mass surveillance, reveals for the first time the story of his life, including how he helped to build that system and what motivated him to try to bring it down.
In 2013, twenty-nine-year-old Edward Snowden shocked the world when he broke with the American intelligence establishment and revealed that the United States government was secretly pursuing the means to collect every single phone call, text message, and email. The result would be an unprecedented system of mass surveillance with the ability to pry into the private lives of every person on earth. Six years later, Snowden reveals for the very first time how he helped to build this system and why he was moved to expose it.
Spanning the bucolic Beltway suburbs of his childhood and the clandestine CIA and NSA postings of his adulthood, Permanent Record is the extraordinary account of a bright young man who grew up online a man who became a spy, a whistleblower, and, in exile, the Internets conscience. Written with wit, grace, passion, and an unflinching candor, Permanent Record is a crucial memoir of our digital age and destined to be a classic.",finished,true,false,5,,2019,9781529035650,OL27260330M,,,,hardcover,2023-10-27T00:00:00.000|2023-11-28T00:00:00.000|,2024-03-01T13:01:12.938,2024-03-01T13:01:12.938
The Divide,,Jason Hickel,,finished,false,false,4.5,347,2017,9781786090034,OL30405770M,owned,,,paperback,|2023-12-10T00:00:00.000|,2024-03-01T13:01:12.938,2024-04-06T09:13:43.335
The road to winter,,"Smith, Mark","Since a deadly virus and the violence that followed wiped out his parents and most of his community, Finn has lived alone on the rugged coast with only his loyal dog Rowdy for company. He has stayed alive for two winters-hunting and fishing and trading food, and keeping out of sight of the Wilders, an armed and dangerous gang that controls the north, led by a ruthless man named Ramage. But Finn's isolation is shattered when a girl runs onto the beach. Rose is a Siley-an asylum seeker-and she has escaped from Ramage, who had enslaved her and her younger sister, Kas. Rose is desperate, sick, and needs Finn's help. Kas is still missing somewhere out in the bush. And Ramage wants the girls back-at any cost. .... From the author Wilder Country.",finished,false,false,4,233,2016,9781925355123,OL26934980M,,,,paperback,2023-10-12T00:00:00.000|2023-11-15T00:00:00.000|,2024-03-01T13:01:12.939,2024-03-01T13:01:12.939
1 title subtitle author description status favourite deleted rating pages publication_year isbn olid tags my_review notes book_format readings date_added date_modified
2 Wild Flowers Electric Beasts Alina Leonova planned false false 2023 1|||||Solarpunk paperback 2024-03-01T13:01:12.937 2024-03-01T13:01:12.937
3 Permanent Record Edward Snowden Edward Snowden, the man who risked everything to expose the US government’s system of mass surveillance, reveals for the first time the story of his life, including how he helped to build that system and what motivated him to try to bring it down. In 2013, twenty-nine-year-old Edward Snowden shocked the world when he broke with the American intelligence establishment and revealed that the United States government was secretly pursuing the means to collect every single phone call, text message, and email. The result would be an unprecedented system of mass surveillance with the ability to pry into the private lives of every person on earth. Six years later, Snowden reveals for the very first time how he helped to build this system and why he was moved to expose it. Spanning the bucolic Beltway suburbs of his childhood and the clandestine CIA and NSA postings of his adulthood, Permanent Record is the extraordinary account of a bright young man who grew up online – a man who became a spy, a whistleblower, and, in exile, the Internet’s conscience. Written with wit, grace, passion, and an unflinching candor, Permanent Record is a crucial memoir of our digital age and destined to be a classic. finished true false 5 2019 9781529035650 OL27260330M hardcover 2023-10-27T00:00:00.000|2023-11-28T00:00:00.000| 2024-03-01T13:01:12.938 2024-03-01T13:01:12.938
4 The Divide Jason Hickel finished false false 4.5 347 2017 9781786090034 OL30405770M owned paperback |2023-12-10T00:00:00.000| 2024-03-01T13:01:12.938 2024-04-06T09:13:43.335
5 The road to winter Smith, Mark Since a deadly virus and the violence that followed wiped out his parents and most of his community, Finn has lived alone on the rugged coast with only his loyal dog Rowdy for company. He has stayed alive for two winters-hunting and fishing and trading food, and keeping out of sight of the Wilders, an armed and dangerous gang that controls the north, led by a ruthless man named Ramage. But Finn's isolation is shattered when a girl runs onto the beach. Rose is a Siley-an asylum seeker-and she has escaped from Ramage, who had enslaved her and her younger sister, Kas. Rose is desperate, sick, and needs Finn's help. Kas is still missing somewhere out in the bush. And Ramage wants the girls back-at any cost. .... From the author Wilder Country. finished false false 4 233 2016 9781925355123 OL26934980M paperback 2023-10-12T00:00:00.000|2023-11-15T00:00:00.000| 2024-03-01T13:01:12.939 2024-03-01T13:01:12.939

View file

@ -0,0 +1,192 @@
""" testing import """
import pathlib
from unittest.mock import patch
import datetime
from django.test import TestCase
from bookwyrm import models
from bookwyrm.importers import OpenReadsImporter
from bookwyrm.models.import_job import handle_imported_book
def make_date(*args):
"""helper function to easily generate a date obj"""
return datetime.datetime(*args, tzinfo=datetime.timezone.utc)
@patch("bookwyrm.suggested_users.rerank_suggestions_task.delay")
@patch("bookwyrm.activitystreams.populate_stream_task.delay")
@patch("bookwyrm.activitystreams.add_book_statuses_task.delay")
class OpenReadsImport(TestCase):
"""importing from openreads csv"""
def setUp(self):
"""use a test tsv"""
self.importer = OpenReadsImporter()
datafile = pathlib.Path(__file__).parent.joinpath(
"../data/openreads-csv-example.csv"
)
# pylint: disable-next=consider-using-with
self.csv = open(datafile, "r", encoding=self.importer.encoding)
def tearDown(self):
"""close test csv"""
self.csv.close()
@classmethod
def setUpTestData(cls):
"""populate database"""
with (
patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"),
patch("bookwyrm.activitystreams.populate_stream_task.delay"),
patch("bookwyrm.lists_stream.populate_lists_task.delay"),
):
cls.local_user = models.User.objects.create_user(
"mmai", "mmai@mmai.mmai", "password", local=True
)
models.SiteSettings.objects.create()
work = models.Work.objects.create(title="Test Work")
cls.book = models.Edition.objects.create(
title="Permanent Record",
remote_id="https://example.com/book/1",
parent_work=work,
)
def test_create_job(self, *_):
"""creates the import job entry and checks csv"""
import_job = self.importer.create_job(
self.local_user, self.csv, False, "public"
)
self.assertEqual(import_job.user, self.local_user)
self.assertEqual(import_job.include_reviews, False)
self.assertEqual(import_job.privacy, "public")
import_items = models.ImportItem.objects.filter(job=import_job).all()
self.assertEqual(len(import_items), 4)
self.assertEqual(import_items[0].index, 0)
self.assertEqual(import_items[0].normalized_data["isbn_13"], None)
self.assertEqual(import_items[0].normalized_data["isbn_10"], "")
self.assertEqual(
import_items[0].normalized_data["title"], "Wild Flowers Electric Beasts"
)
self.assertEqual(import_items[0].normalized_data["authors"], "Alina Leonova")
self.assertEqual(import_items[0].normalized_data["date_added"], "2024-03-01")
self.assertEqual(import_items[0].normalized_data["date_started"], None)
self.assertEqual(import_items[0].normalized_data["date_finished"], None)
self.assertEqual(import_items[0].normalized_data["shelf"], "to-read")
self.assertEqual(import_items[1].index, 1)
self.assertEqual(import_items[1].normalized_data["title"], "Permanent Record")
self.assertEqual(import_items[1].normalized_data["date_started"], "2023-10-27")
self.assertEqual(import_items[1].normalized_data["date_finished"], "2023-11-28")
self.assertEqual(import_items[1].normalized_data["shelf"], "read")
self.assertEqual(import_items[2].index, 2)
self.assertEqual(import_items[2].normalized_data["title"], "The Divide")
self.assertEqual(import_items[2].normalized_data["date_started"], None)
self.assertEqual(import_items[2].normalized_data["date_finished"], "2023-12-10")
self.assertEqual(import_items[2].normalized_data["shelf"], "read")
self.assertEqual(import_items[3].index, 3)
self.assertEqual(import_items[3].normalized_data["title"], "The road to winter")
self.assertEqual(import_items[3].normalized_data["date_started"], "2023-10-12")
self.assertEqual(import_items[3].normalized_data["date_finished"], "2023-11-15")
self.assertEqual(import_items[3].normalized_data["shelf"], "read")
def test_create_retry_job(self, *_):
"""trying again with items that didn't import"""
import_job = self.importer.create_job(
self.local_user, self.csv, False, "unlisted"
)
import_items = models.ImportItem.objects.filter(job=import_job).all()[:2]
retry = self.importer.create_retry_job(
self.local_user, import_job, import_items
)
self.assertNotEqual(import_job, retry)
self.assertEqual(retry.user, self.local_user)
self.assertEqual(retry.include_reviews, False)
self.assertEqual(retry.privacy, "unlisted")
retry_items = models.ImportItem.objects.filter(job=retry).all()
self.assertEqual(len(retry_items), 2)
self.assertEqual(retry_items[0].index, 0)
self.assertEqual(import_items[0].data["title"], "Wild Flowers Electric Beasts")
self.assertEqual(retry_items[1].index, 1)
self.assertEqual(retry_items[1].data["title"], "Permanent Record")
def test_handle_imported_book(self, *_):
"""openreads import added a book, this adds related connections"""
shelf = self.local_user.shelf_set.filter(
identifier=models.Shelf.READ_FINISHED
).first()
self.assertIsNone(shelf.books.first())
import_job = self.importer.create_job(
self.local_user, self.csv, False, "public"
)
import_item = import_job.items.filter(index=1).first()
import_item.book = self.book
import_item.save()
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
handle_imported_book(import_item)
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
readthrough = models.ReadThrough.objects.get(user=self.local_user)
self.assertEqual(readthrough.book, self.book)
self.assertEqual(readthrough.start_date, make_date(2023, 10, 27))
self.assertEqual(readthrough.finish_date, make_date(2023, 11, 28))
def test_handle_imported_book_already_shelved(self, *_):
"""openreads import added a book, this adds related connections"""
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
shelf = self.local_user.shelf_set.filter(
identifier=models.Shelf.TO_READ
).first()
models.ShelfBook.objects.create(
shelf=shelf, user=self.local_user, book=self.book
)
import_job = self.importer.create_job(
self.local_user, self.csv, False, "public"
)
import_item = import_job.items.filter(index=1).first()
import_item.book = self.book
import_item.save()
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
handle_imported_book(import_item)
shelf.refresh_from_db()
self.assertEqual(shelf.books.first(), self.book)
self.assertIsNone(
self.local_user.shelf_set.get(
identifier=models.Shelf.READ_FINISHED
).books.first()
)
readthrough = models.ReadThrough.objects.get(user=self.local_user)
self.assertEqual(readthrough.book, self.book)
self.assertEqual(readthrough.start_date, make_date(2023, 10, 27))
self.assertEqual(readthrough.finish_date, make_date(2023, 11, 28))
@patch("bookwyrm.activitystreams.add_status_task.delay")
def test_handle_imported_book_review(self, *_):
"""openreads review import"""
import_job = self.importer.create_job(
self.local_user, self.csv, True, "unlisted"
)
import_item = import_job.items.filter(index=3).first()
import_item.book = self.book
import_item.save()
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"):
handle_imported_book(import_item)
review = models.Review.objects.get(book=self.book, user=self.local_user)
self.assertEqual(review.rating, 4)
self.assertEqual(review.published_date, make_date(2023, 11, 15))
self.assertEqual(review.privacy, "unlisted")

View file

@ -23,6 +23,7 @@ from bookwyrm.importers import (
GoodreadsImporter,
StorygraphImporter,
OpenLibraryImporter,
OpenReadsImporter,
)
from bookwyrm.models.bookwyrm_import_job import BookwyrmImportJob
from bookwyrm.settings import PAGE_LENGTH
@ -96,6 +97,8 @@ class Import(View):
importer = StorygraphImporter()
elif source == "OpenLibrary":
importer = OpenLibraryImporter()
elif source == "OpenReads":
importer = OpenReadsImporter()
elif source == "Calibre":
importer = CalibreImporter()
else: