2021-03-28 23:36:25 -07:00

128 lines
4 KiB
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright © 2020 Metabolist. All rights reserved.
import Foundation
#if !os(macOS)
import UIKit
import AppKit
public struct HTML: Hashable {
public let raw: String
public let attributed: NSAttributedString
extension HTML: Codable {
public init(from decoder: Decoder) throws {
let container = try decoder.singleValueContainer()
raw = try container.decode(String.self)
if let cachedAttributedString = Self.attributedStringCache.object(forKey: raw as NSString) {
attributed = cachedAttributedString
} else {
attributed = HTMLParser(string: raw).parse()
Self.attributedStringCache.setObject(attributed, forKey: raw as NSString)
public func encode(to encoder: Encoder) throws {
var container = encoder.singleValueContainer()
try container.encode(raw)
private extension HTML {
static var attributedStringCache = NSCache<NSString, NSAttributedString>()
private final class HTMLParser: NSObject {
private struct Link: Hashable {
let href: URL
let location: Int
var length = 0
private let rawString: String
private let parser: XMLParser
private let parseStopColumn: Int
private var constructedString = ""
private var attributesStack = [[String: String]]()
private var currentLink: Link?
private var links = Set<Link>()
private static let containerTag = "com.metabolist.metatext.container-tag"
private static let openingContainerTag = "<\(containerTag)>"
private static let closingContainerTag = "</\(containerTag)>"
init(string: String) {
rawString = Self.openingContainerTag
.appending(string.replacingOccurrences(of: "<br>", with: "<br/>")
.replacingOccurrences(of: "&nbsp;", with: " "))
parser = XMLParser(data: Data(rawString.utf8))
parseStopColumn = rawString.count - Self.closingContainerTag.count
parser.delegate = self
func parse() -> NSAttributedString {
let attributedString = NSMutableAttributedString(string: constructedString)
for link in links {
value: link.href,
range: .init(location: link.location, length: link.length))
return attributedString
extension HTMLParser: XMLParserDelegate {
func parser(_ parser: XMLParser,
didStartElement elementName: String,
namespaceURI: String?,
qualifiedName qName: String?,
attributes attributeDict: [String: String] = [:]) {
if elementName == "a", let hrefString = attributeDict["href"], let href = URL(unicodeString: hrefString) {
currentLink = Link(href: href, location: constructedString.utf16.count)
} else if elementName == "br" {
func parser(_ parser: XMLParser,
didEndElement elementName: String,
namespaceURI: String?,
qualifiedName qName: String?) {
let attributes = attributesStack.removeLast()
if attributes["class"] == "ellipsis" {
if elementName == "a", var link = currentLink {
link.length = constructedString.utf16.count - link.location
currentLink = nil
} else if elementName == "p", parser.columnNumber < parseStopColumn {
func parser(_ parser: XMLParser, foundCharacters string: String) {
if attributesStack.last?["class"] != "invisible" {