Only remove surrounding p tags if there are no other p tags

This commit is contained in:
Joeri de Ruiter 2023-08-01 12:17:57 +02:00
parent 1a215e9b9e
commit 1a733746f2
2 changed files with 24 additions and 4 deletions

View file

@ -237,11 +237,19 @@ def ignore_edition(edition_data):
def get_description(description_blob):
"""descriptions can be a string or a dict"""
if isinstance(description_blob, dict):
description = description_blob.get("value")
description = markdown(description_blob.get("value"))
else:
description = description_blob
# Strip the surrounding p tag to keep the description a bit cleaner
return markdown(description).removeprefix("<p>").removesuffix("</p>").strip()
description = markdown(description_blob)
if (
description.startswith("<p>")
and description.endswith("</p>")
and description.count("<p>") == 1
):
# If there is just one <p> tag around the text remove it
return description[len("<p>") : -len("</p>")].strip()
return description
def get_openlibrary_key(key):

View file

@ -189,6 +189,18 @@ class Openlibrary(TestCase):
expected = "First in the Old Kingdom/Abhorsen series."
self.assertEqual(description, expected)
def test_get_description_markdown_paragraphs(self):
"""should do some cleanup on the description data"""
description = get_description("Paragraph 1\n\nParagraph 2")
expected = "<p>Paragraph 1</p>\n<p>Paragraph 2</p>"
self.assertEqual(description, expected)
def test_get_description_markdown_blockquote(self):
"""should do some cleanup on the description data"""
description = get_description("> Quote\n\nParagraph 2")
expected = "<blockquote>\n<p>Quote</p>\n</blockquote>\n<p>Paragraph 2</p>"
self.assertEqual(description, expected)
def test_get_openlibrary_key(self):
"""extracts the uuid"""
key = get_openlibrary_key("/books/OL27320736M")