nitter/src/parser.nim

181 lines
5.6 KiB
Nim
Raw Normal View History

2019-06-24 03:14:14 +00:00
import xmltree, sequtils, strtabs, strutils, strformat, json
2019-06-20 14:16:20 +00:00
import types, parserutils, formatters
2019-06-20 14:16:20 +00:00
2019-06-21 00:15:46 +00:00
proc parsePopupProfile*(node: XmlNode): Profile =
2019-06-26 16:51:21 +00:00
let profile = node.select(".profile-card")
2019-06-27 19:07:29 +00:00
if profile == nil: return
2019-06-21 00:15:46 +00:00
result = Profile(
2019-06-24 00:09:32 +00:00
fullname: profile.getName(".fullname"),
username: profile.getUsername(".username"),
bio: profile.getBio(".bio"),
userpic: profile.getAvatar(".ProfileCard-avatarImage"),
verified: isVerified(profile),
protected: isProtected(profile),
banner: getBanner(profile)
2019-06-21 00:15:46 +00:00
)
2019-06-24 06:07:36 +00:00
2019-06-23 23:34:30 +00:00
result.getPopupStats(profile)
2019-06-20 14:16:20 +00:00
2019-06-21 00:15:46 +00:00
proc parseIntentProfile*(profile: XmlNode): Profile =
result = Profile(
2019-06-24 00:09:32 +00:00
fullname: profile.getName("a.fn.url.alternate-context"),
username: profile.getUsername(".nickname"),
bio: profile.getBio("p.note"),
2019-06-26 16:51:21 +00:00
userpic: profile.select(".profile.summary").getAvatar("img.photo"),
2019-06-27 19:07:29 +00:00
verified: profile.select("li.verified") != nil,
protected: profile.select("li.protected") != nil,
2019-06-24 00:09:32 +00:00
banner: getBanner(profile)
2019-06-21 00:15:46 +00:00
)
2019-06-24 06:07:36 +00:00
2019-06-23 23:34:30 +00:00
result.getIntentStats(profile)
2019-06-21 00:15:46 +00:00
proc parseTweetProfile*(profile: XmlNode): Profile =
2019-06-20 14:16:20 +00:00
result = Profile(
2019-06-27 19:07:29 +00:00
fullname: profile.attr("data-name").stripText(),
username: profile.attr("data-screen-name"),
2019-06-23 23:34:30 +00:00
userpic: profile.getAvatar(".avatar"),
verified: isVerified(profile)
)
2019-06-24 06:07:36 +00:00
proc parseQuote*(quote: XmlNode): Quote =
result = Quote(
id: quote.attr("data-item-id"),
text: getQuoteText(quote),
reply: parseTweetReply(quote),
hasThread: quote.select(".self-thread-context") != nil,
2019-07-04 02:38:23 +00:00
available: true
2019-06-23 23:34:30 +00:00
)
result.profile = Profile(
2019-06-25 00:38:18 +00:00
fullname: quote.selectText(".QuoteTweet-fullname").stripText(),
2019-06-27 19:07:29 +00:00
username: quote.attr("data-screen-name"),
2019-06-24 06:07:36 +00:00
verified: isVerified(quote)
2019-06-20 14:16:20 +00:00
)
2019-06-24 06:07:36 +00:00
result.getQuoteMedia(quote)
2019-06-26 16:51:21 +00:00
proc parseTweet*(node: XmlNode): Tweet =
let tweet = node.select(".tweet")
2019-06-27 19:07:29 +00:00
if tweet == nil: return Tweet()
2019-06-26 16:51:21 +00:00
2019-06-21 00:30:57 +00:00
result = Tweet(
2019-06-27 19:07:29 +00:00
id: tweet.attr("data-item-id"),
threadId: tweet.attr("data-conversation-id"),
2019-06-23 23:34:30 +00:00
text: getTweetText(tweet),
time: getTimestamp(tweet),
shortTime: getShortTime(tweet),
2019-06-27 19:07:29 +00:00
profile: parseTweetProfile(tweet),
2019-07-01 21:48:25 +00:00
stats: parseTweetStats(tweet),
reply: parseTweetReply(tweet),
hasThread: tweet.select(".content > .self-thread-context") != nil,
2019-06-27 19:07:29 +00:00
pinned: "pinned" in tweet.attr("class"),
available: true
2019-06-21 00:30:57 +00:00
)
2019-06-20 14:16:20 +00:00
2019-06-23 23:34:30 +00:00
result.getTweetMedia(tweet)
2019-06-29 12:11:23 +00:00
result.getTweetCards(tweet)
2019-06-20 14:16:20 +00:00
2019-06-21 00:30:57 +00:00
let by = tweet.selectText(".js-retweet-text > a > b")
if by.len > 0:
2019-07-01 21:22:00 +00:00
result.retweet = some(Retweet(
by: stripText(by),
id: tweet.attr("data-retweet-id")
))
2019-06-21 00:30:57 +00:00
2019-06-26 16:51:21 +00:00
let quote = tweet.select(".QuoteTweet-innerContainer")
2019-06-27 19:07:29 +00:00
if quote != nil:
2019-06-24 06:07:36 +00:00
result.quote = some(parseQuote(quote))
let tombstone = tweet.select(".Tombstone")
if tombstone != nil:
if "unavailable" in tombstone.innerText():
result.quote = some(Quote())
2019-06-24 06:07:36 +00:00
2019-07-01 01:13:12 +00:00
proc parseThread*(nodes: XmlNode): Thread =
2019-06-29 04:31:02 +00:00
if nodes == nil: return
result = Thread()
2019-06-29 04:31:02 +00:00
for n in nodes.filterIt(it.kind != xnText):
let class = n.attr("class").toLower()
if "tombstone" in class or "unavailable" in class:
2019-07-01 01:13:12 +00:00
result.tweets.add Tweet()
elif "morereplies" in class:
result.more = getMoreReplies(n)
else:
result.tweets.add parseTweet(n)
2019-06-20 14:16:20 +00:00
proc parseConversation*(node: XmlNode): Conversation =
2019-06-24 03:14:14 +00:00
result = Conversation(
2019-06-26 16:51:21 +00:00
tweet: parseTweet(node.select(".permalink-tweet-container")),
2019-07-01 01:13:12 +00:00
before: parseThread(node.select(".in-reply-to .stream-items"))
2019-06-24 03:14:14 +00:00
)
2019-06-20 14:16:20 +00:00
2019-06-29 04:31:02 +00:00
let replies = node.select(".replies-to .stream-items")
2019-06-27 19:07:29 +00:00
if replies == nil: return
2019-06-20 14:16:20 +00:00
for i, reply in replies.filterIt(it.kind != xnText):
2019-06-29 04:31:02 +00:00
let class = reply.attr("class").toLower()
let thread = reply.select(".stream-items")
if i == 0 and "self" in class:
2019-07-01 01:13:12 +00:00
result.after = parseThread(thread)
2019-06-29 04:31:02 +00:00
elif "lone" in class:
2019-07-01 01:13:12 +00:00
result.replies.add parseThread(reply)
2019-06-29 04:31:02 +00:00
else:
2019-07-01 01:13:12 +00:00
result.replies.add parseThread(thread)
2019-06-24 03:14:14 +00:00
proc parseVideo*(node: JsonNode): Video =
2019-06-29 05:45:36 +00:00
let
track = node{"track"}
cType = track["contentType"].to(string)
pType = track["playbackType"].to(string)
2019-06-25 05:37:44 +00:00
2019-06-29 05:45:36 +00:00
case cType
2019-06-25 05:37:44 +00:00
of "media_entity":
result = Video(
2019-06-29 05:45:36 +00:00
playbackType: if "mp4" in pType: mp4 else: m3u8,
contentId: track["contentId"].to(string),
durationMs: track["durationMs"].to(int),
2019-06-25 05:37:44 +00:00
views: track["viewCount"].to(string),
url: track["playbackUrl"].to(string),
2019-07-01 21:55:19 +00:00
available: track{"mediaAvailability"}["status"].to(string) == "available")
2019-06-25 05:37:44 +00:00
of "vmap":
result = Video(
2019-06-29 05:45:36 +00:00
playbackType: vmap,
durationMs: track["durationMs"].to(int),
2019-07-01 21:55:19 +00:00
url: track["vmapUrl"].to(string))
2019-06-25 05:37:44 +00:00
else:
2019-06-29 05:45:36 +00:00
echo "Can't parse video of type ", cType
result.thumb = node["posterImage"].to(string)
2019-06-29 12:11:23 +00:00
proc parsePoll*(node: XmlNode): Poll =
let
choices = node.selectAll(".PollXChoice-choice")
votes = node.selectText(".PollXChoice-footer--total")
result.votes = votes.strip().split(" ")[0]
result.status = node.selectText(".PollXChoice-footer--time")
for choice in choices:
for span in choice.select(".PollXChoice-choice--text").filterIt(it.kind != xnText):
if span.attr("class").len == 0:
result.options.add span.innerText()
elif "progress" in span.attr("class"):
result.values.add parseInt(span.innerText()[0 .. ^2])
var highest = 0
for i, n in result.values:
if n > highest:
highest = n
result.leader = i
2019-07-04 02:18:32 +00:00
proc parsePhotoRail*(node: XmlNode): seq[GalleryPhoto] =
for img in node.selectAll(".tweet-media-img-placeholder"):
result.add GalleryPhoto(
url: img.attr("data-image-url"),
tweetId: img.attr("data-tweet-id"),
color: img.attr("background-color").replace("style", "background-color")
)