Convert description from Markdown to HTML when importing from Open Library

This commit is contained in:
Joeri de Ruiter 2023-08-01 11:45:46 +02:00
parent 07aca2f62c
commit 1a215e9b9e
3 changed files with 97 additions and 3 deletions

View file

@ -1,6 +1,8 @@
""" openlibrary data connector """ """ openlibrary data connector """
import re import re
from markdown import markdown
from bookwyrm import models from bookwyrm import models
from bookwyrm.book_search import SearchResult from bookwyrm.book_search import SearchResult
from .abstract_connector import AbstractConnector, Mapping from .abstract_connector import AbstractConnector, Mapping
@ -235,8 +237,11 @@ def ignore_edition(edition_data):
def get_description(description_blob): def get_description(description_blob):
"""descriptions can be a string or a dict""" """descriptions can be a string or a dict"""
if isinstance(description_blob, dict): if isinstance(description_blob, dict):
return description_blob.get("value") description = description_blob.get("value")
return description_blob else:
description = description_blob
# Strip the surrounding p tag to keep the description a bit cleaner
return markdown(description).removeprefix("<p>").removesuffix("</p>").strip()
def get_openlibrary_key(key): def get_openlibrary_key(key):

View file

@ -34,11 +34,15 @@ class Openlibrary(TestCase):
work_file = pathlib.Path(__file__).parent.joinpath("../data/ol_work.json") work_file = pathlib.Path(__file__).parent.joinpath("../data/ol_work.json")
edition_file = pathlib.Path(__file__).parent.joinpath("../data/ol_edition.json") edition_file = pathlib.Path(__file__).parent.joinpath("../data/ol_edition.json")
edition_md_file = pathlib.Path(__file__).parent.joinpath(
"../data/ol_edition_markdown.json"
)
edition_list_file = pathlib.Path(__file__).parent.joinpath( edition_list_file = pathlib.Path(__file__).parent.joinpath(
"../data/ol_edition_list.json" "../data/ol_edition_list.json"
) )
self.work_data = json.loads(work_file.read_bytes()) self.work_data = json.loads(work_file.read_bytes())
self.edition_data = json.loads(edition_file.read_bytes()) self.edition_data = json.loads(edition_file.read_bytes())
self.edition_md_data = json.loads(edition_md_file.read_bytes())
self.edition_list_data = json.loads(edition_list_file.read_bytes()) self.edition_list_data = json.loads(edition_list_file.read_bytes())
def test_get_remote_id_from_data(self): def test_get_remote_id_from_data(self):
@ -218,13 +222,44 @@ class Openlibrary(TestCase):
self.assertEqual(result.parent_work, work) self.assertEqual(result.parent_work, work)
self.assertEqual(result.title, "Sabriel") self.assertEqual(result.title, "Sabriel")
self.assertEqual(result.isbn_10, "0060273224") self.assertEqual(result.isbn_10, "0060273224")
self.assertIsNotNone(result.description) self.assertEqual(result.description, self.edition_data["description"]["value"])
self.assertEqual(result.languages[0], "English") self.assertEqual(result.languages[0], "English")
self.assertEqual(result.publishers[0], "Harper Trophy") self.assertEqual(result.publishers[0], "Harper Trophy")
self.assertEqual(result.pages, 491) self.assertEqual(result.pages, 491)
self.assertEqual(result.subjects[0], "Fantasy.") self.assertEqual(result.subjects[0], "Fantasy.")
self.assertEqual(result.physical_format, "Hardcover") self.assertEqual(result.physical_format, "Hardcover")
@responses.activate
def test_create_edition_markdown_from_data(self):
"""okay but can it actually create an edition with proper metadata"""
work = models.Work.objects.create(title="Hello")
responses.add(
responses.GET,
"https://openlibrary.org/authors/OL10183984A",
json={"hi": "there"},
status=200,
)
with patch(
"bookwyrm.connectors.openlibrary.Connector.get_authors_from_data"
) as mock:
mock.return_value = []
result = self.connector.create_edition_from_data(work, self.edition_md_data)
self.assertEqual(
result.description,
'<blockquote>\n<p>"She didn\'t choose her garden" opens this chapbook '
"exploring Black womanhood, mental and physical health, spirituality, and "
"ancestral roots. It is an investigation of how to locate a self amidst "
"complex racial history and how to forge an authentic way forward. There's "
"internal slippage as the subject weaves between the presence and spirits "
"of others, as well as a reckoning with the toll of navigating this world "
"as a Black woman. Yet, we also see hopefulness: a refuge in becoming part "
"of the collective, beyond individuality. <em>The Stars With You</em> "
"gives us a speculative yearning for what is to come and probes what is "
"required to reach it.</p>\n</blockquote>\n<ul>\n<li><a "
'href="https://store.cooperdillon.com/product/the-stars-with-you-by-'
'stefani-cox">publisher</a></li>\n</ul>',
)
def test_ignore_edition(self): def test_ignore_edition(self):
"""skip editions with poor metadata""" """skip editions with poor metadata"""
self.assertFalse(ignore_edition({"isbn_13": "hi"})) self.assertFalse(ignore_edition({"isbn_13": "hi"}))

View file

@ -0,0 +1,54 @@
{
"type": {
"key": "/type/edition"
},
"authors": [
{
"key": "/authors/OL10183984A"
}
],
"languages": [
{
"key": "/languages/eng"
}
],
"publish_date": "2022",
"publishers": [
"Cooper Dillon Books"
],
"source_records": [
"bwb:9781943899159"
],
"subjects": [
"Poetry (poetic works by one author)",
"Poetry, collections"
],
"title": "The Stars with You",
"description": {
"type": "/type/text",
"value": ">\"She didn't choose her garden\" opens this chapbook exploring Black womanhood, mental and physical health, spirituality, and ancestral roots. It is an investigation of how to locate a self amidst complex racial history and how to forge an authentic way forward. There's internal slippage as the subject weaves between the presence and spirits of others, as well as a reckoning with the toll of navigating this world as a Black woman. Yet, we also see hopefulness: a refuge in becoming part of the collective, beyond individuality. *The Stars With You* gives us a speculative yearning for what is to come and probes what is required to reach it.\r\n\r\n- [publisher](https://store.cooperdillon.com/product/the-stars-with-you-by-stefani-cox)"
},
"works": [
{
"key": "/works/OL27172905W"
}
],
"key": "/books/OL36884359M",
"identifiers": {},
"isbn_13": [
"9781943899159"
],
"classifications": {},
"physical_format": "Paperback",
"number_of_pages": 36,
"latest_revision": 3,
"revision": 3,
"created": {
"type": "/type/datetime",
"value": "2022-01-28T19:20:08.156459"
},
"last_modified": {
"type": "/type/datetime",
"value": "2023-07-30T23:42:51.589566"
}
}