From 1a215e9b9eed5f69e32207e095918754eec5365c Mon Sep 17 00:00:00 2001 From: Joeri de Ruiter Date: Tue, 1 Aug 2023 11:45:46 +0200 Subject: [PATCH] Convert description from Markdown to HTML when importing from Open Library --- bookwyrm/connectors/openlibrary.py | 9 +++- .../connectors/test_openlibrary_connector.py | 37 ++++++++++++- bookwyrm/tests/data/ol_edition_markdown.json | 54 +++++++++++++++++++ 3 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 bookwyrm/tests/data/ol_edition_markdown.json diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 0fd786660..722e05764 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -1,6 +1,8 @@ """ openlibrary data connector """ import re +from markdown import markdown + from bookwyrm import models from bookwyrm.book_search import SearchResult from .abstract_connector import AbstractConnector, Mapping @@ -235,8 +237,11 @@ def ignore_edition(edition_data): def get_description(description_blob): """descriptions can be a string or a dict""" if isinstance(description_blob, dict): - return description_blob.get("value") - return description_blob + description = description_blob.get("value") + else: + description = description_blob + # Strip the surrounding p tag to keep the description a bit cleaner + return markdown(description).removeprefix("

").removesuffix("

").strip() def get_openlibrary_key(key): diff --git a/bookwyrm/tests/connectors/test_openlibrary_connector.py b/bookwyrm/tests/connectors/test_openlibrary_connector.py index 01b9b9f6a..7075ccdf6 100644 --- a/bookwyrm/tests/connectors/test_openlibrary_connector.py +++ b/bookwyrm/tests/connectors/test_openlibrary_connector.py @@ -34,11 +34,15 @@ class Openlibrary(TestCase): work_file = pathlib.Path(__file__).parent.joinpath("../data/ol_work.json") edition_file = pathlib.Path(__file__).parent.joinpath("../data/ol_edition.json") + edition_md_file = pathlib.Path(__file__).parent.joinpath( + "../data/ol_edition_markdown.json" + ) edition_list_file = pathlib.Path(__file__).parent.joinpath( "../data/ol_edition_list.json" ) self.work_data = json.loads(work_file.read_bytes()) self.edition_data = json.loads(edition_file.read_bytes()) + self.edition_md_data = json.loads(edition_md_file.read_bytes()) self.edition_list_data = json.loads(edition_list_file.read_bytes()) def test_get_remote_id_from_data(self): @@ -218,13 +222,44 @@ class Openlibrary(TestCase): self.assertEqual(result.parent_work, work) self.assertEqual(result.title, "Sabriel") self.assertEqual(result.isbn_10, "0060273224") - self.assertIsNotNone(result.description) + self.assertEqual(result.description, self.edition_data["description"]["value"]) self.assertEqual(result.languages[0], "English") self.assertEqual(result.publishers[0], "Harper Trophy") self.assertEqual(result.pages, 491) self.assertEqual(result.subjects[0], "Fantasy.") self.assertEqual(result.physical_format, "Hardcover") + @responses.activate + def test_create_edition_markdown_from_data(self): + """okay but can it actually create an edition with proper metadata""" + work = models.Work.objects.create(title="Hello") + responses.add( + responses.GET, + "https://openlibrary.org/authors/OL10183984A", + json={"hi": "there"}, + status=200, + ) + with patch( + "bookwyrm.connectors.openlibrary.Connector.get_authors_from_data" + ) as mock: + mock.return_value = [] + result = self.connector.create_edition_from_data(work, self.edition_md_data) + self.assertEqual( + result.description, + '
\n

"She didn\'t choose her garden" opens this chapbook ' + "exploring Black womanhood, mental and physical health, spirituality, and " + "ancestral roots. It is an investigation of how to locate a self amidst " + "complex racial history and how to forge an authentic way forward. There's " + "internal slippage as the subject weaves between the presence and spirits " + "of others, as well as a reckoning with the toll of navigating this world " + "as a Black woman. Yet, we also see hopefulness: a refuge in becoming part " + "of the collective, beyond individuality. The Stars With You " + "gives us a speculative yearning for what is to come and probes what is " + "required to reach it.

\n
\n', + ) + def test_ignore_edition(self): """skip editions with poor metadata""" self.assertFalse(ignore_edition({"isbn_13": "hi"})) diff --git a/bookwyrm/tests/data/ol_edition_markdown.json b/bookwyrm/tests/data/ol_edition_markdown.json new file mode 100644 index 000000000..71633782c --- /dev/null +++ b/bookwyrm/tests/data/ol_edition_markdown.json @@ -0,0 +1,54 @@ +{ + "type": { + "key": "/type/edition" + }, + "authors": [ + { + "key": "/authors/OL10183984A" + } + ], + "languages": [ + { + "key": "/languages/eng" + } + ], + "publish_date": "2022", + "publishers": [ + "Cooper Dillon Books" + ], + "source_records": [ + "bwb:9781943899159" + ], + "subjects": [ + "Poetry (poetic works by one author)", + "Poetry, collections" + ], + "title": "The Stars with You", + "description": { + "type": "/type/text", + "value": ">\"She didn't choose her garden\" opens this chapbook exploring Black womanhood, mental and physical health, spirituality, and ancestral roots. It is an investigation of how to locate a self amidst complex racial history and how to forge an authentic way forward. There's internal slippage as the subject weaves between the presence and spirits of others, as well as a reckoning with the toll of navigating this world as a Black woman. Yet, we also see hopefulness: a refuge in becoming part of the collective, beyond individuality. *The Stars With You* gives us a speculative yearning for what is to come and probes what is required to reach it.\r\n\r\n- [publisher](https://store.cooperdillon.com/product/the-stars-with-you-by-stefani-cox)" + }, + "works": [ + { + "key": "/works/OL27172905W" + } + ], + "key": "/books/OL36884359M", + "identifiers": {}, + "isbn_13": [ + "9781943899159" + ], + "classifications": {}, + "physical_format": "Paperback", + "number_of_pages": 36, + "latest_revision": 3, + "revision": 3, + "created": { + "type": "/type/datetime", + "value": "2022-01-28T19:20:08.156459" + }, + "last_modified": { + "type": "/type/datetime", + "value": "2023-07-30T23:42:51.589566" + } +}