mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2025-01-11 09:45:27 +00:00
Merge pull request #2935 from jderuiter/markdown-import
Convert description from Markdown when importing from Open Library
This commit is contained in:
commit
861d3b1500
4 changed files with 121 additions and 6 deletions
|
@ -2,8 +2,11 @@
|
|||
import re
|
||||
from typing import Any, Optional, Union, Iterator, Iterable
|
||||
|
||||
from markdown import markdown
|
||||
|
||||
from bookwyrm import models
|
||||
from bookwyrm.book_search import SearchResult
|
||||
from bookwyrm.utils.sanitizer import clean
|
||||
from .abstract_connector import AbstractConnector, Mapping, JsonDict
|
||||
from .abstract_connector import get_data, infer_physical_format, unique_physical_format
|
||||
from .connector_manager import ConnectorException, create_edition_task
|
||||
|
@ -235,11 +238,22 @@ def ignore_edition(edition_data: JsonDict) -> bool:
|
|||
return True
|
||||
|
||||
|
||||
def get_description(description_blob: Union[JsonDict, str]) -> Optional[str]:
|
||||
def get_description(description_blob: Union[JsonDict, str]) -> str:
|
||||
"""descriptions can be a string or a dict"""
|
||||
if isinstance(description_blob, dict):
|
||||
return description_blob.get("value")
|
||||
return description_blob
|
||||
description = markdown(description_blob.get("value", ""))
|
||||
else:
|
||||
description = markdown(description_blob)
|
||||
|
||||
if (
|
||||
description.startswith("<p>")
|
||||
and description.endswith("</p>")
|
||||
and description.count("<p>") == 1
|
||||
):
|
||||
# If there is just one <p> tag and it is around the text remove it
|
||||
return description[len("<p>") : -len("</p>")].strip()
|
||||
|
||||
return clean(description)
|
||||
|
||||
|
||||
def get_openlibrary_key(key: str) -> str:
|
||||
|
|
|
@ -14,7 +14,7 @@ from bookwyrm.connectors.openlibrary import get_languages, get_description
|
|||
from bookwyrm.connectors.openlibrary import pick_default_edition, get_openlibrary_key
|
||||
from bookwyrm.connectors.connector_manager import ConnectorException
|
||||
|
||||
|
||||
# pylint: disable=too-many-public-methods
|
||||
class Openlibrary(TestCase):
|
||||
"""test loading data from openlibrary.org"""
|
||||
|
||||
|
@ -34,11 +34,15 @@ class Openlibrary(TestCase):
|
|||
|
||||
work_file = pathlib.Path(__file__).parent.joinpath("../data/ol_work.json")
|
||||
edition_file = pathlib.Path(__file__).parent.joinpath("../data/ol_edition.json")
|
||||
edition_md_file = pathlib.Path(__file__).parent.joinpath(
|
||||
"../data/ol_edition_markdown.json"
|
||||
)
|
||||
edition_list_file = pathlib.Path(__file__).parent.joinpath(
|
||||
"../data/ol_edition_list.json"
|
||||
)
|
||||
self.work_data = json.loads(work_file.read_bytes())
|
||||
self.edition_data = json.loads(edition_file.read_bytes())
|
||||
self.edition_md_data = json.loads(edition_md_file.read_bytes())
|
||||
self.edition_list_data = json.loads(edition_list_file.read_bytes())
|
||||
|
||||
def test_get_remote_id_from_data(self):
|
||||
|
@ -185,6 +189,18 @@ class Openlibrary(TestCase):
|
|||
expected = "First in the Old Kingdom/Abhorsen series."
|
||||
self.assertEqual(description, expected)
|
||||
|
||||
def test_get_description_markdown_paragraphs(self):
|
||||
"""should do some cleanup on the description data"""
|
||||
description = get_description("Paragraph 1\n\nParagraph 2")
|
||||
expected = "<p>Paragraph 1</p>\n<p>Paragraph 2</p>"
|
||||
self.assertEqual(description, expected)
|
||||
|
||||
def test_get_description_markdown_blockquote(self):
|
||||
"""should do some cleanup on the description data"""
|
||||
description = get_description("> Quote\n\nParagraph 2")
|
||||
expected = "<blockquote>\n<p>Quote</p>\n</blockquote>\n<p>Paragraph 2</p>"
|
||||
self.assertEqual(description, expected)
|
||||
|
||||
def test_get_openlibrary_key(self):
|
||||
"""extracts the uuid"""
|
||||
key = get_openlibrary_key("/books/OL27320736M")
|
||||
|
@ -218,13 +234,44 @@ class Openlibrary(TestCase):
|
|||
self.assertEqual(result.parent_work, work)
|
||||
self.assertEqual(result.title, "Sabriel")
|
||||
self.assertEqual(result.isbn_10, "0060273224")
|
||||
self.assertIsNotNone(result.description)
|
||||
self.assertEqual(result.description, self.edition_data["description"]["value"])
|
||||
self.assertEqual(result.languages[0], "English")
|
||||
self.assertEqual(result.publishers[0], "Harper Trophy")
|
||||
self.assertEqual(result.pages, 491)
|
||||
self.assertEqual(result.subjects[0], "Fantasy.")
|
||||
self.assertEqual(result.physical_format, "Hardcover")
|
||||
|
||||
@responses.activate
|
||||
def test_create_edition_markdown_from_data(self):
|
||||
"""okay but can it actually create an edition with proper metadata"""
|
||||
work = models.Work.objects.create(title="Hello")
|
||||
responses.add(
|
||||
responses.GET,
|
||||
"https://openlibrary.org/authors/OL10183984A",
|
||||
json={"hi": "there"},
|
||||
status=200,
|
||||
)
|
||||
with patch(
|
||||
"bookwyrm.connectors.openlibrary.Connector.get_authors_from_data"
|
||||
) as mock:
|
||||
mock.return_value = []
|
||||
result = self.connector.create_edition_from_data(work, self.edition_md_data)
|
||||
self.assertEqual(
|
||||
result.description,
|
||||
'<blockquote>\n<p>"She didn\'t choose her garden" opens this chapbook '
|
||||
"exploring Black womanhood, mental and physical health, spirituality, and "
|
||||
"ancestral roots. It is an investigation of how to locate a self amidst "
|
||||
"complex racial history and how to forge an authentic way forward. There's "
|
||||
"internal slippage as the subject weaves between the presence and spirits "
|
||||
"of others, as well as a reckoning with the toll of navigating this world "
|
||||
"as a Black woman. Yet, we also see hopefulness: a refuge in becoming part "
|
||||
"of the collective, beyond individuality. <em>The Stars With You</em> "
|
||||
"gives us a speculative yearning for what is to come and probes what is "
|
||||
"required to reach it.</p>\n</blockquote>\n<ul>\n<li><a "
|
||||
'href="https://store.cooperdillon.com/product/the-stars-with-you-by-'
|
||||
'stefani-cox">publisher</a></li>\n</ul>',
|
||||
)
|
||||
|
||||
def test_ignore_edition(self):
|
||||
"""skip editions with poor metadata"""
|
||||
self.assertFalse(ignore_edition({"isbn_13": "hi"}))
|
||||
|
|
54
bookwyrm/tests/data/ol_edition_markdown.json
Normal file
54
bookwyrm/tests/data/ol_edition_markdown.json
Normal file
|
@ -0,0 +1,54 @@
|
|||
{
|
||||
"type": {
|
||||
"key": "/type/edition"
|
||||
},
|
||||
"authors": [
|
||||
{
|
||||
"key": "/authors/OL10183984A"
|
||||
}
|
||||
],
|
||||
"languages": [
|
||||
{
|
||||
"key": "/languages/eng"
|
||||
}
|
||||
],
|
||||
"publish_date": "2022",
|
||||
"publishers": [
|
||||
"Cooper Dillon Books"
|
||||
],
|
||||
"source_records": [
|
||||
"bwb:9781943899159"
|
||||
],
|
||||
"subjects": [
|
||||
"Poetry (poetic works by one author)",
|
||||
"Poetry, collections"
|
||||
],
|
||||
"title": "The Stars with You",
|
||||
"description": {
|
||||
"type": "/type/text",
|
||||
"value": ">\"She didn't choose her garden\" opens this chapbook exploring Black womanhood, mental and physical health, spirituality, and ancestral roots. It is an investigation of how to locate a self amidst complex racial history and how to forge an authentic way forward. There's internal slippage as the subject weaves between the presence and spirits of others, as well as a reckoning with the toll of navigating this world as a Black woman. Yet, we also see hopefulness: a refuge in becoming part of the collective, beyond individuality. *The Stars With You* gives us a speculative yearning for what is to come and probes what is required to reach it.\r\n\r\n- [publisher](https://store.cooperdillon.com/product/the-stars-with-you-by-stefani-cox)"
|
||||
},
|
||||
"works": [
|
||||
{
|
||||
"key": "/works/OL27172905W"
|
||||
}
|
||||
],
|
||||
"key": "/books/OL36884359M",
|
||||
"identifiers": {},
|
||||
"isbn_13": [
|
||||
"9781943899159"
|
||||
],
|
||||
"classifications": {},
|
||||
"physical_format": "Paperback",
|
||||
"number_of_pages": 36,
|
||||
"latest_revision": 3,
|
||||
"revision": 3,
|
||||
"created": {
|
||||
"type": "/type/datetime",
|
||||
"value": "2022-01-28T19:20:08.156459"
|
||||
},
|
||||
"last_modified": {
|
||||
"type": "/type/datetime",
|
||||
"value": "2023-07-30T23:42:51.589566"
|
||||
}
|
||||
}
|
|
@ -2,7 +2,7 @@
|
|||
import bleach
|
||||
|
||||
|
||||
def clean(input_text):
|
||||
def clean(input_text: str) -> str:
|
||||
"""Run through "bleach" """
|
||||
return bleach.clean(
|
||||
input_text,
|
||||
|
|
Loading…
Reference in a new issue