Faster HTML String disk cache

This commit is contained in:
Thomas Ricouard 2023-02-05 21:23:47 +01:00
parent d7bad26f04
commit 0af60c4b1e

View file

@ -2,53 +2,70 @@ import Foundation
import SwiftSoup import SwiftSoup
import SwiftUI import SwiftUI
fileprivate enum CodingKeys: CodingKey {
case htmlValue, asMarkdown, asRawText, statusesURLs
}
public struct HTMLString: Codable, Equatable, Hashable { public struct HTMLString: Codable, Equatable, Hashable {
public var htmlValue: String = "" public var htmlValue: String = ""
public var asMarkdown: String = "" public var asMarkdown: String = ""
public var asRawText: String = "" public var asRawText: String = ""
public var statusesURLs = [URL]() public var statusesURLs = [URL]()
public var asSafeMarkdownAttributedString: AttributedString = .init() public var asSafeMarkdownAttributedString: AttributedString = .init()
private var regex: NSRegularExpression? private var regex: NSRegularExpression?
public init(from decoder: Decoder) { public init(from decoder: Decoder) {
var alreadyDecoded: Bool = false
do { do {
let container = try decoder.singleValueContainer() let container = try decoder.singleValueContainer()
htmlValue = try container.decode(String.self) htmlValue = try container.decode(String.self)
} catch { } catch {
htmlValue = "" do {
alreadyDecoded = true
let container = try decoder.container(keyedBy: CodingKeys.self)
htmlValue = try container.decode(String.self, forKey: .htmlValue)
asMarkdown = try container.decode(String.self, forKey: .asMarkdown)
asRawText = try container.decode(String.self, forKey: .asRawText)
statusesURLs = try container.decode([URL].self, forKey: .statusesURLs)
} catch {
htmlValue = ""
}
} }
// https://daringfireball.net/projects/markdown/syntax if !alreadyDecoded {
// Pre-escape \ ` _ * and [ as these are the only // https://daringfireball.net/projects/markdown/syntax
// characters the markdown parser used picks up // Pre-escape \ ` _ * and [ as these are the only
// when it renders to attributed text // characters the markdown parser used picks up
regex = try? NSRegularExpression(pattern: "([\\_\\*\\`\\[\\\\])", options: .caseInsensitive) // when it renders to attributed text
regex = try? NSRegularExpression(pattern: "([\\_\\*\\`\\[\\\\])", options: .caseInsensitive)
asMarkdown = "" asMarkdown = ""
do { do {
let document: Document = try SwiftSoup.parse(htmlValue) let document: Document = try SwiftSoup.parse(htmlValue)
handleNode(node: document) handleNode(node: document)
document.outputSettings(OutputSettings().prettyPrint(pretty: false)) document.outputSettings(OutputSettings().prettyPrint(pretty: false))
try document.select("br").after("\n") try document.select("br").after("\n")
try document.select("p").after("\n\n") try document.select("p").after("\n\n")
let html = try document.html() let html = try document.html()
var text = try SwiftSoup.clean(html, "", Whitelist.none(), OutputSettings().prettyPrint(pretty: false)) ?? "" var text = try SwiftSoup.clean(html, "", Whitelist.none(), OutputSettings().prettyPrint(pretty: false)) ?? ""
// Remove the two last line break added after the last paragraph. // Remove the two last line break added after the last paragraph.
if text.hasSuffix("\n\n") { if text.hasSuffix("\n\n") {
_ = text.removeLast() _ = text.removeLast()
_ = text.removeLast() _ = text.removeLast()
}
asRawText = text
if asMarkdown.hasPrefix("\n") {
_ = asMarkdown.removeFirst()
}
} catch {
asRawText = htmlValue
} }
asRawText = text
if asMarkdown.hasPrefix("\n") {
_ = asMarkdown.removeFirst()
}
} catch {
asRawText = htmlValue
} }
do { do {
let options = AttributedString.MarkdownParsingOptions(allowsExtendedAttributes: true, let options = AttributedString.MarkdownParsingOptions(allowsExtendedAttributes: true,
interpretedSyntax: .inlineOnlyPreservingWhitespace) interpretedSyntax: .inlineOnlyPreservingWhitespace)
@ -67,8 +84,11 @@ public struct HTMLString: Codable, Equatable, Hashable {
} }
public func encode(to encoder: Encoder) throws { public func encode(to encoder: Encoder) throws {
var container = encoder.singleValueContainer() var container = encoder.container(keyedBy: CodingKeys.self)
try container.encode(htmlValue) try container.encode(htmlValue, forKey: .htmlValue)
try container.encode(asMarkdown, forKey: .asMarkdown)
try container.encode(asRawText, forKey: .asRawText)
try container.encode(statusesURLs, forKey: .statusesURLs)
} }
private mutating func handleNode(node: SwiftSoup.Node) { private mutating func handleNode(node: SwiftSoup.Node) {