mirror of
https://github.com/Dimillian/IceCubesApp.git
synced 2024-06-01 07:08:17 +00:00
7caf00d07d
* Resolve escaped characters in a status Escaped characters are now returned to their original form for HTMLString.asRawText. * Unescape the markdown version too The HTMLString.asMarkdown string is now also unescaped, & and similar are resolved. * Fix a internal fallback If one of the unescape(...) commands fails, the original, unescaped text is used instead of an empty string.
294 lines
11 KiB
Swift
294 lines
11 KiB
Swift
import Foundation
|
|
import SwiftSoup
|
|
import SwiftUI
|
|
|
|
private enum CodingKeys: CodingKey {
|
|
case htmlValue, asMarkdown, asRawText, statusesURLs, links
|
|
}
|
|
|
|
public struct HTMLString: Codable, Equatable, Hashable, @unchecked Sendable {
|
|
public var htmlValue: String = ""
|
|
public var asMarkdown: String = ""
|
|
public var asRawText: String = ""
|
|
public var statusesURLs = [URL]()
|
|
public private(set) var links = [Link]()
|
|
|
|
public var asSafeMarkdownAttributedString: AttributedString = .init()
|
|
private var main_regex: NSRegularExpression?
|
|
private var underscore_regex: NSRegularExpression?
|
|
public init(from decoder: Decoder) {
|
|
var alreadyDecoded = false
|
|
do {
|
|
let container = try decoder.singleValueContainer()
|
|
htmlValue = try container.decode(String.self)
|
|
} catch {
|
|
do {
|
|
alreadyDecoded = true
|
|
let container = try decoder.container(keyedBy: CodingKeys.self)
|
|
htmlValue = try container.decode(String.self, forKey: .htmlValue)
|
|
asMarkdown = try container.decode(String.self, forKey: .asMarkdown)
|
|
asRawText = try container.decode(String.self, forKey: .asRawText)
|
|
statusesURLs = try container.decode([URL].self, forKey: .statusesURLs)
|
|
links = try container.decode([Link].self, forKey: .links)
|
|
} catch {
|
|
htmlValue = ""
|
|
}
|
|
}
|
|
|
|
if !alreadyDecoded {
|
|
// https://daringfireball.net/projects/markdown/syntax
|
|
// Pre-escape \ ` _ * ~ and [ as these are the only
|
|
// characters the markdown parser uses when it renders
|
|
// to attributed text. Note that ~ for strikethrough is
|
|
// not documented in the syntax docs but is used by
|
|
// AttributedString.
|
|
main_regex = try? NSRegularExpression(pattern: "([\\*\\`\\~\\[\\\\])", options: .caseInsensitive)
|
|
// don't escape underscores that are between colons, they are most likely custom emoji
|
|
underscore_regex = try? NSRegularExpression(pattern: "(?!\\B:[^:]*)(_)(?![^:]*:\\B)", options: .caseInsensitive)
|
|
|
|
asMarkdown = ""
|
|
do {
|
|
let document: Document = try SwiftSoup.parse(htmlValue)
|
|
handleNode(node: document)
|
|
|
|
document.outputSettings(OutputSettings().prettyPrint(pretty: false))
|
|
try document.select("br").after("\n")
|
|
try document.select("p").after("\n\n")
|
|
let html = try document.html()
|
|
var text = try SwiftSoup.clean(html, "", Whitelist.none(), OutputSettings().prettyPrint(pretty: false)) ?? ""
|
|
// Remove the two last line break added after the last paragraph.
|
|
if text.hasSuffix("\n\n") {
|
|
_ = text.removeLast()
|
|
_ = text.removeLast()
|
|
}
|
|
asRawText = (try? Entities.unescape(text)) ?? text
|
|
|
|
if asMarkdown.hasPrefix("\n") {
|
|
_ = asMarkdown.removeFirst()
|
|
}
|
|
|
|
} catch {
|
|
asRawText = htmlValue
|
|
}
|
|
}
|
|
|
|
do {
|
|
let options = AttributedString.MarkdownParsingOptions(allowsExtendedAttributes: true,
|
|
interpretedSyntax: .inlineOnlyPreservingWhitespace)
|
|
asSafeMarkdownAttributedString = try AttributedString(markdown: asMarkdown, options: options)
|
|
} catch {
|
|
asSafeMarkdownAttributedString = AttributedString(stringLiteral: htmlValue)
|
|
}
|
|
}
|
|
|
|
public init(stringValue: String, parseMarkdown: Bool = false) {
|
|
htmlValue = stringValue
|
|
asMarkdown = stringValue
|
|
asRawText = stringValue
|
|
statusesURLs = []
|
|
|
|
if parseMarkdown {
|
|
do {
|
|
let options = AttributedString.MarkdownParsingOptions(allowsExtendedAttributes: true,
|
|
interpretedSyntax: .inlineOnlyPreservingWhitespace)
|
|
asSafeMarkdownAttributedString = try AttributedString(markdown: asMarkdown, options: options)
|
|
} catch {
|
|
asSafeMarkdownAttributedString = AttributedString(stringLiteral: htmlValue)
|
|
}
|
|
} else {
|
|
asSafeMarkdownAttributedString = AttributedString(stringLiteral: htmlValue)
|
|
}
|
|
}
|
|
|
|
public func encode(to encoder: Encoder) throws {
|
|
var container = encoder.container(keyedBy: CodingKeys.self)
|
|
try container.encode(htmlValue, forKey: .htmlValue)
|
|
try container.encode(asMarkdown, forKey: .asMarkdown)
|
|
try container.encode(asRawText, forKey: .asRawText)
|
|
try container.encode(statusesURLs, forKey: .statusesURLs)
|
|
try container.encode(links, forKey: .links)
|
|
}
|
|
|
|
private mutating func handleNode(node: SwiftSoup.Node) {
|
|
do {
|
|
if let className = try? node.attr("class") {
|
|
if className == "invisible" {
|
|
// don't display
|
|
return
|
|
}
|
|
|
|
if className == "ellipsis" {
|
|
// descend into this one now and
|
|
// append the ellipsis
|
|
for nn in node.getChildNodes() {
|
|
handleNode(node: nn)
|
|
}
|
|
asMarkdown += "…"
|
|
return
|
|
}
|
|
}
|
|
|
|
if node.nodeName() == "p" {
|
|
if asMarkdown.count > 0 { // ignore first opening <p>
|
|
asMarkdown += "\n\n"
|
|
}
|
|
} else if node.nodeName() == "br" {
|
|
if asMarkdown.count > 0 { // ignore first opening <br>
|
|
asMarkdown += "\n"
|
|
}
|
|
} else if node.nodeName() == "a" {
|
|
let href = try node.attr("href")
|
|
if href != "" {
|
|
if let url = URL(string: href),
|
|
let _ = Int(url.lastPathComponent)
|
|
{
|
|
statusesURLs.append(url)
|
|
}
|
|
}
|
|
asMarkdown += "["
|
|
let start = asMarkdown.endIndex
|
|
// descend into this node now so we can wrap the
|
|
// inner part of the link in the right markup
|
|
for nn in node.getChildNodes() {
|
|
handleNode(node: nn)
|
|
}
|
|
let finish = asMarkdown.endIndex
|
|
|
|
var linkRef = href
|
|
|
|
// Try creating a URL from the string. If it fails, try URL encoding
|
|
// the string first.
|
|
var url = URL(string: href)
|
|
if url == nil {
|
|
url = URL(string: href, encodePath: true)
|
|
}
|
|
if let linkUrl = url {
|
|
linkRef = linkUrl.absoluteString
|
|
let displayString = asMarkdown[start ..< finish]
|
|
links.append(Link(linkUrl, displayString: String(displayString)))
|
|
}
|
|
|
|
asMarkdown += "]("
|
|
asMarkdown += linkRef
|
|
asMarkdown += ")"
|
|
|
|
return
|
|
} else if node.nodeName() == "#text" {
|
|
var txt = node.description
|
|
|
|
txt = (try? Entities.unescape(txt)) ?? txt
|
|
|
|
if let underscore_regex, let main_regex {
|
|
// This is the markdown escaper
|
|
txt = main_regex.stringByReplacingMatches(in: txt, options: [], range: NSRange(location: 0, length: txt.count), withTemplate: "\\\\$1")
|
|
txt = underscore_regex.stringByReplacingMatches(in: txt, options: [], range: NSRange(location: 0, length: txt.count), withTemplate: "\\\\$1")
|
|
}
|
|
// Strip newlines and line separators - they should be being sent as <br>s
|
|
asMarkdown += txt.replacingOccurrences(of: "\n", with: "").replacingOccurrences(of: "\u{2028}", with: "")
|
|
} else if node.nodeName() == "ul" {
|
|
// Unordered (bulleted) list
|
|
// SwiftUI's Text won't display these in an AttributedString, but we can at least improve the appearance
|
|
asMarkdown += "\n\n"
|
|
for nn in node.getChildNodes() {
|
|
asMarkdown += "- "
|
|
handleNode(node: nn)
|
|
asMarkdown += "\n"
|
|
}
|
|
return
|
|
} else if node.nodeName() == "ol" {
|
|
// Ordered (numbered) list
|
|
// Same thing, won't display in a Text, but this is just an attempt to improve the appearance
|
|
asMarkdown += "\n\n"
|
|
var curNumber = 1
|
|
for nn in node.getChildNodes() {
|
|
asMarkdown += "\(curNumber). "
|
|
handleNode(node: nn)
|
|
asMarkdown += "\n"
|
|
curNumber += 1
|
|
}
|
|
return
|
|
}
|
|
|
|
for n in node.getChildNodes() {
|
|
handleNode(node: n)
|
|
}
|
|
} catch {}
|
|
}
|
|
|
|
public struct Link: Codable, Hashable, Identifiable {
|
|
public var id: Int { hashValue }
|
|
public let url: URL
|
|
public let displayString: String
|
|
public let type: LinkType
|
|
public let title: String
|
|
|
|
init(_ url: URL, displayString: String) {
|
|
self.url = url
|
|
self.displayString = displayString
|
|
|
|
switch displayString.first {
|
|
case "@":
|
|
type = .mention
|
|
title = displayString
|
|
case "#":
|
|
type = .hashtag
|
|
title = String(displayString.dropFirst())
|
|
default:
|
|
type = .url
|
|
var hostNameUrl = url.host ?? url.absoluteString
|
|
if hostNameUrl.hasPrefix("www.") {
|
|
hostNameUrl = String(hostNameUrl.dropFirst(4))
|
|
}
|
|
title = hostNameUrl
|
|
}
|
|
}
|
|
|
|
public enum LinkType: String, Codable {
|
|
case url
|
|
case mention
|
|
case hashtag
|
|
}
|
|
}
|
|
}
|
|
|
|
public extension URL {
|
|
// It's common to use non-ASCII characters in URLs even though they're technically
|
|
// invalid characters. Every modern browser handles this by silently encoding
|
|
// the invalid characters on the user's behalf. However, trying to create a URL
|
|
// object with un-encoded characters will result in nil so we need to encode the
|
|
// invalid characters before creating the URL object. The unencoded version
|
|
// should still be shown in the displayed status.
|
|
init?(string: String, encodePath: Bool) {
|
|
var encodedUrlString = ""
|
|
if encodePath,
|
|
string.starts(with: "http://") || string.starts(with: "https://"),
|
|
var startIndex = string.firstIndex(of: "/")
|
|
{
|
|
startIndex = string.index(startIndex, offsetBy: 1)
|
|
|
|
// We don't want to encode the host portion of the URL
|
|
if var startIndex = string[startIndex...].firstIndex(of: "/") {
|
|
encodedUrlString = String(string[...startIndex])
|
|
while let endIndex = string[string.index(after: startIndex)...].firstIndex(of: "/") {
|
|
let componentStartIndex = string.index(after: startIndex)
|
|
encodedUrlString = encodedUrlString + (string[componentStartIndex ... endIndex].addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? "")
|
|
startIndex = endIndex
|
|
}
|
|
|
|
// The last part of the path may have a query string appended to it
|
|
let componentStartIndex = string.index(after: startIndex)
|
|
if let queryStartIndex = string[componentStartIndex...].firstIndex(of: "?") {
|
|
encodedUrlString = encodedUrlString + (string[componentStartIndex ..< queryStartIndex].addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? "")
|
|
encodedUrlString = encodedUrlString + (string[queryStartIndex...].addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? "")
|
|
} else {
|
|
encodedUrlString = encodedUrlString + (string[componentStartIndex...].addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? "")
|
|
}
|
|
}
|
|
}
|
|
if encodedUrlString.isEmpty {
|
|
encodedUrlString = string
|
|
}
|
|
self.init(string: encodedUrlString)
|
|
}
|
|
}
|