From aea884c48ef1a429790e8c401e98548d2d6de841 Mon Sep 17 00:00:00 2001
From: Zed <zedeus@pm.me>
Date: Fri, 3 Mar 2023 21:19:21 +0100
Subject: [PATCH] Deduplicate note tweet parsing

---
 src/parser.nim      |  4 +-
 src/parserutils.nim | 94 +++++++++++++++------------------------------
 tests/test_card.py  |  2 +-
 3 files changed, 35 insertions(+), 65 deletions(-)

diff --git a/src/parser.nim b/src/parser.nim
index 3858956..a491cea 100644
--- a/src/parser.nim
+++ b/src/parser.nim
@@ -204,12 +204,12 @@ proc parseTweet(js: JsonNode; jsCard: JsonNode = newJNull()): Tweet =
     )
   )
 
+  result.expandTweetEntities(js)
+
   # fix for pinned threads
   if result.hasThread and result.threadId == 0:
     result.threadId = js{"self_thread", "id_str"}.getId
 
-  result.expandTweetEntities(js)
-
   if js{"is_quote_status"}.getBool:
     result.quote = some Tweet(id: js{"quoted_status_id_str"}.getId)
 
diff --git a/src/parserutils.nim b/src/parserutils.nim
index 22c3d86..8ae9cd0 100644
--- a/src/parserutils.nim
+++ b/src/parserutils.nim
@@ -230,47 +230,37 @@ proc expandUserEntities*(user: var User; js: JsonNode) =
   user.bio = user.bio.replacef(unRegex, unReplace)
                      .replacef(htRegex, htReplace)
 
-proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
-  let
-    orig = tweet.text.toRunes
-    textRange = js{"display_text_range"}
-    textSlice = textRange{0}.getInt .. textRange{1}.getInt
-    hasQuote = js{"is_quote_status"}.getBool
-    hasCard = tweet.card.isSome
-
-  var replyTo = ""
-  if tweet.replyId != 0:
-    with reply, js{"in_reply_to_screen_name"}:
-      tweet.reply.add reply.getStr
-      replyTo = reply.getStr
-
-  let ent = ? js{"entities"}
+proc expandTextEntities(tweet: Tweet; entities: JsonNode; text: string; textSlice: Slice[int];
+                        replyTo=""; hasQuote=false) =
+  let hasCard = tweet.card.isSome
 
   var replacements = newSeq[ReplaceSlice]()
 
-  with urls, ent{"urls"}:
+  with urls, entities{"urls"}:
     for u in urls:
       let urlStr = u["url"].getStr
-      if urlStr.len == 0 or urlStr notin tweet.text:
+      if urlStr.len == 0 or urlStr notin text:
         continue
+
       replacements.extractUrls(u, textSlice.b, hideTwitter = hasQuote)
+
       if hasCard and u{"url"}.getStr == get(tweet.card).url:
         get(tweet.card).url = u{"expanded_url"}.getStr
 
-  with media, ent{"media"}:
+  with media, entities{"media"}:
     for m in media:
       replacements.extractUrls(m, textSlice.b, hideTwitter = true)
 
-  if "hashtags" in ent:
-    for hashtag in ent["hashtags"]:
+  if "hashtags" in entities:
+    for hashtag in entities["hashtags"]:
       replacements.extractHashtags(hashtag)
 
-  if "symbols" in ent:
-    for symbol in ent["symbols"]:
+  if "symbols" in entities:
+    for symbol in entities["symbols"]:
       replacements.extractHashtags(symbol)
 
-  if "user_mentions" in ent:
-    for mention in ent["user_mentions"]:
+  if "user_mentions" in entities:
+    for mention in entities["user_mentions"]:
       let
         name = mention{"screen_name"}.getStr
         slice = mention.extractSlice
@@ -287,47 +277,27 @@ proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
   replacements.deduplicate
   replacements.sort(cmp)
 
-  tweet.text = orig.replacedWith(replacements, textSlice)
-                   .strip(leading=false)
+  tweet.text = text.toRunes.replacedWith(replacements, textSlice).strip(leading=false)
 
-proc expandNoteTweetEntities*(tweet: Tweet; noteTweet: JsonNode) =
+proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
   let
-    text = noteTweet{"text"}.getStr
-    orig = text.toRunes
-    ent = ? noteTweet{"entity_set"}
-    hasCard = tweet.card.isSome
+    entities = ? js{"entities"}
+    hasQuote = js{"is_quote_status"}.getBool
+    textRange = js{"display_text_range"}
+    textSlice = textRange{0}.getInt .. textRange{1}.getInt
 
-  var replacements = newSeq[ReplaceSlice]()
+  var replyTo = ""
+  if tweet.replyId != 0:
+    with reply, js{"in_reply_to_screen_name"}:
+      replyTo = reply.getStr
+      tweet.reply.add replyTo
 
-  with urls, ent{"urls"}:
-    for u in urls:
-      let urlStr = u["url"].getStr
-      if urlStr.len == 0 or urlStr notin text:
-        continue
-      replacements.extractUrls(u, orig.len, hideTwitter = false)
-      if hasCard and u{"url"}.getStr == get(tweet.card).url:
-        get(tweet.card).url = u{"expanded_url"}.getStr
+  tweet.expandTextEntities(entities, tweet.text, textSlice, replyTo, hasQuote)
 
-  if "hashtags" in ent:
-    for hashtag in ent["hashtags"]:
-      replacements.extractHashtags(hashtag)
+proc expandNoteTweetEntities*(tweet: Tweet; js: JsonNode) =
+  let
+    entities = ? js{"entity_set"}
+    text = js{"text"}.getStr
+    textSlice = 0..text.runeLen
 
-  if "symbols" in ent:
-    for symbol in ent["symbols"]:
-      replacements.extractHashtags(symbol)
-
-  if "user_mentions" in ent:
-    for mention in ent["user_mentions"]:
-      let
-        name = mention{"screen_name"}.getStr
-        slice = mention.extractSlice
-        idx = tweet.reply.find(name)
-
-      replacements.add ReplaceSlice(kind: rkMention, slice: slice,
-        url: "/" & name, display: mention["name"].getStr)
-
-  replacements.deduplicate
-  replacements.sort(cmp)
-
-  tweet.text = orig.replacedWith(replacements, 0..orig.len)
-                   .strip(leading=false)
+  tweet.expandTextEntities(entities, text, textSlice)
diff --git a/tests/test_card.py b/tests/test_card.py
index 77e44e1..51945d6 100644
--- a/tests/test_card.py
+++ b/tests/test_card.py
@@ -42,7 +42,7 @@ no_thumb = [
 
     ['nim_lang/status/1082989146040340480',
      'Nim in 2018: A short recap',
-     '36 votes and 46 comments so far on Reddit',
+     'Posted by u/miran1 - 36 votes and 46 comments',
      'reddit.com']
 ]