From b31338dcc7ed89374e8f1e56f5a4aa6f4bca08c9 Mon Sep 17 00:00:00 2001 From: Zed Date: Sat, 29 Jun 2019 06:31:02 +0200 Subject: [PATCH] Improve thread/timeline parsing --- .gitignore | 3 ++- src/parser.nim | 26 +++++++++++++++++--------- src/parserutils.nim | 10 ++-------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index 6938093..e1e4ff7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ nitter *.html *.db -tests/__pycache__ \ No newline at end of file +/tests/__pycache__ +/tests/geckodriver.log diff --git a/src/parser.nim b/src/parser.nim index 9887e26..d156761 100644 --- a/src/parser.nim +++ b/src/parser.nim @@ -81,26 +81,34 @@ proc parseTweet*(node: XmlNode): Tweet = if quote != nil: result.quote = some(parseQuote(quote)) -proc parseTweets*(node: XmlNode): Tweets = - if node == nil or node.kind == xnText: return - for n in node.selectAll(".stream-item"): - if "account" notin n.child("div").attr("class"): +proc parseTweets*(nodes: XmlNode): Tweets = + if nodes == nil: return + for n in nodes.filterIt(it.kind != xnText): + let class = n.attr("class").toLower() + if "tombstone" in class or "unavailable" in class: + result.add Tweet() + elif "morereplies" notin class: result.add parseTweet(n) proc parseConversation*(node: XmlNode): Conversation = result = Conversation( tweet: parseTweet(node.select(".permalink-tweet-container")), - before: parseTweets(node.select(".in-reply-to")) + before: parseTweets(node.select(".in-reply-to .stream-items")) ) - let replies = node.select(".replies-to", ".stream-items") + let replies = node.select(".replies-to .stream-items") if replies == nil: return for reply in replies.filterIt(it.kind != xnText): - if "selfThread" in reply.attr("class"): - result.after = parseTweets(reply.select(".stream-items")) - else: + let class = reply.attr("class").toLower() + let thread = reply.select(".stream-items") + + if "self" in class: + result.after = parseTweets(thread) + elif "lone" in class: result.replies.add parseTweets(reply) + else: + result.replies.add parseTweets(thread) proc parseVideo*(node: JsonNode): Video = let track = node{"track"} diff --git a/src/parserutils.nim b/src/parserutils.nim index 44ed652..7cff51b 100644 --- a/src/parserutils.nim +++ b/src/parserutils.nim @@ -18,11 +18,6 @@ proc select*(node: XmlNode; selector: string): XmlNode = let nodes = node.selectAll(selector) if nodes.len > 0: nodes[0] else: nil -proc select*(node: XmlNode; parent, child: string): XmlNode = - let parentNode = node.select(parent) - if parentNode == nil: return - result = parentNode.select(child) - proc selectAttr*(node: XmlNode; selector: string; attr: string): string = let res = node.select(selector) if res == nil: "" else: res.attr(attr) @@ -39,10 +34,10 @@ proc getHeader(profile: XmlNode): XmlNode = result = profile.select(".ProfileCard-userFields") proc isVerified*(profile: XmlNode): bool = - getHeader(profile).selectText(".Icon.Icon--verified").len > 0 + getHeader(profile).select(".Icon.Icon--verified") != nil proc isProtected*(profile: XmlNode): bool = - getHeader(profile).selectText(".Icon.Icon--protected").len > 0 + getHeader(profile).select(".Icon.Icon--protected") != nil proc getName*(profile: XmlNode; selector: string): string = profile.selectText(selector).stripText() @@ -92,7 +87,6 @@ proc getAvatar*(profile: XmlNode; selector: string): string = proc getBanner*(tweet: XmlNode): string = let url = tweet.selectAttr("svg > image", "xlink:href") - if url.len > 0: result = url.replace("600x200", "1500x500") else: