diff --git a/IceCubesApp.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/IceCubesApp.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index f8cc1984..4b1a80f8 100644 --- a/IceCubesApp.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/IceCubesApp.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -9,15 +9,6 @@ "version" : "1.2.0" } }, - { - "identity" : "html2markdown", - "kind" : "remoteSourceControl", - "location" : "https://gitlab.com/mflint/HTML2Markdown", - "state" : { - "revision" : "00d7a9744bbd1e7762c587bbd248775e16345a65", - "version" : "1.0.0" - } - }, { "identity" : "keychain-swift", "kind" : "remoteSourceControl", diff --git a/Packages/DesignSystem/Sources/DesignSystem/Views/EmojiText.swift b/Packages/DesignSystem/Sources/DesignSystem/Views/EmojiText.swift index 92663a03..a43c5c91 100644 --- a/Packages/DesignSystem/Sources/DesignSystem/Views/EmojiText.swift +++ b/Packages/DesignSystem/Sources/DesignSystem/Views/EmojiText.swift @@ -1,6 +1,5 @@ import EmojiText import Foundation -import HTML2Markdown import Models import SwiftUI diff --git a/Packages/Models/Package.swift b/Packages/Models/Package.swift index 17156d6d..b7e4b5ab 100644 --- a/Packages/Models/Package.swift +++ b/Packages/Models/Package.swift @@ -16,14 +16,12 @@ let package = Package( ), ], dependencies: [ - .package(url: "https://gitlab.com/mflint/HTML2Markdown", exact: "1.0.0"), .package(url: "https://github.com/scinfu/SwiftSoup.git", from: "2.4.3"), ], targets: [ .target( name: "Models", - dependencies: ["HTML2Markdown", - "SwiftSoup"] + dependencies: ["SwiftSoup"] ), .testTarget( name: "ModelsTests", diff --git a/Packages/Models/Sources/Models/Alias/HTMLString.swift b/Packages/Models/Sources/Models/Alias/HTMLString.swift index 37254f96..b69e7cca 100644 --- a/Packages/Models/Sources/Models/Alias/HTMLString.swift +++ b/Packages/Models/Sources/Models/Alias/HTMLString.swift @@ -1,15 +1,15 @@ import Foundation -import HTML2Markdown import SwiftSoup import SwiftUI public struct HTMLString: Decodable, Equatable, Hashable { - public var htmlValue: String - public let asMarkdown: String - public let asRawText: String - public let statusesURLs: [URL] - public let asSafeMarkdownAttributedString: AttributedString - + public var htmlValue: String = "" + public var asMarkdown: String = "" + public var asRawText: String = "" + public var statusesURLs = [URL]() + public var asSafeMarkdownAttributedString: AttributedString = AttributedString() + private var regex: NSRegularExpression? + public init(from decoder: Decoder) { do { let container = try decoder.singleValueContainer() @@ -17,53 +17,23 @@ public struct HTMLString: Decodable, Equatable, Hashable { } catch { htmlValue = "" } - + // https://daringfireball.net/projects/markdown/syntax - // HTML2Markdown only auto escapes * on the way out - // so we pre-escape \ ` _ and [ as these are the only - // other characters the markdown parser used picks up + // Pre-escape \ ` _ * and [ as these are the only + // characters the markdown parser used picks up // when it renders to attributed text - if let regex = try? NSRegularExpression(pattern: "([\\_\\`\\[\\\\])", options: .caseInsensitive) { - htmlValue = regex.stringByReplacingMatches(in: htmlValue, options: [], range: NSRange(location: 0, length: htmlValue.count), withTemplate: "\\\\$1") - } - - // match intended mastodon presentation - // strip out blah - // append ellipsis to blah - if let regex = try? NSRegularExpression(pattern: "(.*?)", options: .caseInsensitive) { - htmlValue = regex.stringByReplacingMatches(in: htmlValue, options: [], range: NSRange(location: 0, length: htmlValue.count), withTemplate: "") - } - if let regex = try? NSRegularExpression(pattern: "((.*?))", options: .caseInsensitive) { - htmlValue = regex.stringByReplacingMatches(in: htmlValue, options: [], range: NSRange(location: 0, length: htmlValue.count), withTemplate: "$2…") - } - - do { - asMarkdown = try HTMLParser().parse(html: htmlValue) - .toMarkdown() - .replacingOccurrences(of: ")[", with: ") [") - } catch { - asMarkdown = htmlValue - } - - var statusesURLs: [URL] = [] + regex = try? NSRegularExpression(pattern: "([\\_\\*\\`\\[\\\\])", options: .caseInsensitive) + + asMarkdown = "" do { + let document: Document = try SwiftSoup.parse(htmlValue) - let links: Elements = try document.select("a") - for link in links { - let href = try link.attr("href") - if let url = URL(string: href), - let _ = Int(url.lastPathComponent) - { - statusesURLs.append(url) - } - } + handleNode(node: document) asRawText = try document.text() } catch { asRawText = htmlValue } - - self.statusesURLs = statusesURLs - + do { let options = AttributedString.MarkdownParsingOptions(allowsExtendedAttributes: true, interpretedSyntax: .inlineOnlyPreservingWhitespace) @@ -72,7 +42,7 @@ public struct HTMLString: Decodable, Equatable, Hashable { asSafeMarkdownAttributedString = AttributedString(stringLiteral: htmlValue) } } - + public init(stringValue: String) { htmlValue = stringValue asMarkdown = stringValue @@ -80,4 +50,92 @@ public struct HTMLString: Decodable, Equatable, Hashable { statusesURLs = [] asSafeMarkdownAttributedString = AttributedString(stringLiteral: htmlValue) } + + private mutating func handleNode(node: SwiftSoup.Node ) { + + + do { + if let className = try? node.attr("class") { + if className == "invisible" { + // don't display + return + } + + if className == "ellipsis" { + // descend into this one now and + // append the ellipsis + for nn in node.getChildNodes() { + handleNode(node: nn) + } + asMarkdown += "…" + return + } + } + + if node.nodeName() == "p" { + if asMarkdown.count > 0 { // ignore first opening
+ asMarkdown += "\n\n"
+ }
+ }
+ else if node.nodeName() == "br" {
+ if asMarkdown.count > 0 { // ignore first opening
+
+ // some code to try and stop double carriage rerturns where they aren't required
+ // not perfect but effective in almost all cases
+ if !asMarkdown.hasSuffix("\n") && !asMarkdown.hasSuffix("\u{2028}") {
+ if let next = node.nextSibling() {
+ if next.nodeName() == "#text" && (next.description.hasPrefix("\n") || next.description.hasPrefix("\u{2028}")) {
+ // do nothing
+ }
+ else {
+ asMarkdown += "\n"
+ }
+ }
+ }
+ }
+ }
+ else if node.nodeName() == "a" {
+ let href = try node.attr("href")
+ if href != "" {
+ if let url = URL(string: href),
+ let _ = Int(url.lastPathComponent)
+ {
+ statusesURLs.append(url)
+ }
+ }
+ asMarkdown += "["
+ // descend into this node now so we can wrap the
+ // inner part of the link in the right markup
+ for nn in node.getChildNodes() {
+ handleNode(node: nn)
+ }
+ asMarkdown += "]("
+ asMarkdown += href
+ asMarkdown += ")"
+ return
+ }
+ else if node.nodeName() == "#text" {
+
+ var txt = node.description
+
+ if let regex {
+ // This is the markdown escaper
+ txt = regex.stringByReplacingMatches(in: txt, options: [], range: NSRange(location: 0, length: txt.count), withTemplate: "\\\\$1")
+ }
+
+ asMarkdown += txt
+ }
+
+ for n in node.getChildNodes() {
+ handleNode(node: n)
+ }
+
+ }
+ catch {
+
+ }
+
+ }
+
+
}