From 7643293f6b41c2f5d17443f93eed32b4f012a4d4 Mon Sep 17 00:00:00 2001 From: Zed Date: Fri, 12 Jun 2020 08:01:31 +0200 Subject: [PATCH] Fix tombstone parsing Apparently they just got rid of the "epitaph", oh well --- src/formatters.nim | 14 +------------- src/parserutils.nim | 18 ++---------------- 2 files changed, 3 insertions(+), 29 deletions(-) diff --git a/src/formatters.nim b/src/formatters.nim index 5de8981..c5a1d80 100644 --- a/src/formatters.nim +++ b/src/formatters.nim @@ -1,28 +1,19 @@ -import strutils, strformat, times, uri, tables -import xmltree, htmlparser +import strutils, strformat, times, uri, tables, xmltree, htmlparser import regex - import types, utils, query -from unicode import Rune, `$` - const ytRegex = re"([A-z.]+\.)?youtu(be\.com|\.be)" twRegex = re"(www\.|mobile\.)?twitter\.com" igRegex = re"(www\.)?instagram.com" cards = "cards.twitter.com/cards" tco = "https://t.co" - nbsp = $Rune(0x000A0) wwwRegex = re"https?://(www[0-9]?\.)?" m3u8Regex = re"""url="(.+.m3u8)"""" manifestRegex = re"(.+(.ts|.m3u8|.vmap))" userpicRegex = re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$" extRegex = re"(\.[A-z]+)$" - tombstoneRegex = re"\n* *Learn more" - -proc stripText*(text: string): string = - text.replace(nbsp, " ").strip() proc stripHtml*(text: string): string = var html = parseHtml(text) @@ -129,9 +120,6 @@ proc getLink*(tweet: Tweet; focus=true): string = result = &"/{username}/status/{tweet.id}" if focus: result &= "#m" -proc getTombstone*(text: string): string = - text.replace(tombstoneRegex, "").stripText().strip(chars={' ', '\n'}) - proc getTwitterLink*(path: string; params: Table[string, string]): string = let twitter = parseUri("https://twitter.com") diff --git a/src/parserutils.nim b/src/parserutils.nim index ba1681f..d0e8bce 100644 --- a/src/parserutils.nim +++ b/src/parserutils.nim @@ -109,22 +109,8 @@ proc getBanner*(js: JsonNode): string = return "#161616" proc getTombstone*(js: JsonNode): string = - let epitaph = js{"epitaph"}.getStr - case epitaph - of "Suspended": - result = "This tweet is from a suspended account." - of "Protected": - result = "This account owner limits who can view their tweets." - of "Missing": - result = "This tweet is unavailable." - of "Deactivated": - result = "This tweet is from an account that no longer exists." - of "Bounced", "BounceDeleted": - result = "This tweet violated the Twitter rules." - else: - result = js{"tombstoneInfo", "richText", "text"}.getStr - if epitaph.len > 0 or result.len > 0: - echo "Unknown tombstone (", epitaph, "): ", result + result = js{"tombstoneInfo", "richText", "text"}.getStr + result.removeSuffix(" Learn more") template getSlice(text: string; slice: seq[int]): string = text.runeSubStr(slice[0], slice[1] - slice[0])