This simplifies the parser. Having read the spec more closely as part of investigating bug 855, I was overcomplicating it. The server promises to send html so we should render it that way, not with heuristics. (#900)

This puts in line breaks exclusively where there are <br>s and takes out other line breaks that are in the body text.

*Doesn't* fix bug 855
This commit is contained in:
Gareth Simpson 2023-02-17 05:31:24 +00:00 committed by GitHub
parent 16ed93acaf
commit c1205036a2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -129,17 +129,7 @@ public struct HTMLString: Codable, Equatable, Hashable {
}
} else if node.nodeName() == "br" {
if asMarkdown.count > 0 { // ignore first opening <br>
// some code to try and stop double carriage rerturns where they aren't required
// not perfect but effective in almost all cases
if !asMarkdown.hasSuffix("\n") && !asMarkdown.hasSuffix("\u{2028}") {
if let next = node.nextSibling() {
if next.nodeName() == "#text" && (next.description.hasPrefix("\n") || next.description.hasPrefix("\u{2028}")) {
// do nothing
} else {
asMarkdown += "\n"
}
}
}
asMarkdown += "\n"
}
} else if node.nodeName() == "a" {
let href = try node.attr("href")
@ -168,8 +158,8 @@ public struct HTMLString: Codable, Equatable, Hashable {
txt = main_regex.stringByReplacingMatches(in: txt, options: [], range: NSRange(location: 0, length: txt.count), withTemplate: "\\\\$1")
txt = underscore_regex.stringByReplacingMatches(in: txt, options: [], range: NSRange(location: 0, length: txt.count), withTemplate: "\\\\$1")
}
asMarkdown += txt
// Strip newlines and line separators - they should be being sent as <br>s
asMarkdown += txt.replacingOccurrences(of: "\n", with: "").replacingOccurrences(of: "\u{2028}", with: "")
}
for n in node.getChildNodes() {