2019-06-20 14:16:20 +00:00
|
|
|
import httpclient, asyncdispatch, htmlparser, times
|
|
|
|
import sequtils, strutils, strformat, json, xmltree, uri
|
2019-06-26 16:51:21 +00:00
|
|
|
import regex
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-06-26 16:51:21 +00:00
|
|
|
import ./types, ./parser, ./parserutils
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-06-21 01:51:14 +00:00
|
|
|
const
|
|
|
|
agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
|
2019-06-24 03:14:14 +00:00
|
|
|
auth = "Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw"
|
|
|
|
|
|
|
|
base = parseUri("https://twitter.com/")
|
|
|
|
apiBase = parseUri("https://api.twitter.com/1.1/")
|
|
|
|
|
2019-06-25 17:19:01 +00:00
|
|
|
timelineParams = "?include_available_features=1&include_entities=1&include_new_items_bar=false&reset_error_state=false"
|
|
|
|
showUrl = "i/profiles/show/$1" & timelineParams
|
|
|
|
timelineUrl = showUrl % "$1/timeline/tweets"
|
2019-06-21 01:51:14 +00:00
|
|
|
profilePopupUrl = "i/profiles/popup"
|
|
|
|
profileIntentUrl = "intent/user"
|
2019-06-27 18:13:46 +00:00
|
|
|
tweetUrl = "status"
|
2019-06-24 03:14:14 +00:00
|
|
|
videoUrl = "videos/tweet/config/$1.json"
|
|
|
|
tokenUrl = "guest/activate.json"
|
2019-06-21 01:51:14 +00:00
|
|
|
|
2019-06-24 03:29:47 +00:00
|
|
|
var
|
2019-06-25 13:09:13 +00:00
|
|
|
guestToken = ""
|
|
|
|
tokenUses = 0
|
|
|
|
tokenMaxUses = 230
|
2019-06-24 03:29:47 +00:00
|
|
|
tokenUpdated: Time
|
2019-06-25 13:09:13 +00:00
|
|
|
tokenLifetime = initDuration(minutes=20)
|
2019-06-24 03:29:47 +00:00
|
|
|
|
|
|
|
template newClient() {.dirty.} =
|
2019-06-21 01:51:14 +00:00
|
|
|
var client = newAsyncHttpClient()
|
|
|
|
defer: client.close()
|
|
|
|
client.headers = headers
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-06-24 03:29:47 +00:00
|
|
|
proc fetchHtml(url: Uri; headers: HttpHeaders; jsonKey = ""): Future[XmlNode] {.async.} =
|
|
|
|
newClient()
|
|
|
|
|
2019-06-21 00:15:46 +00:00
|
|
|
var resp = ""
|
|
|
|
try:
|
|
|
|
resp = await client.getContent($url)
|
|
|
|
except:
|
|
|
|
return nil
|
|
|
|
|
|
|
|
if jsonKey.len > 0:
|
|
|
|
let json = parseJson(resp)[jsonKey].str
|
|
|
|
return parseHtml(json)
|
|
|
|
else:
|
|
|
|
return parseHtml(resp)
|
|
|
|
|
2019-06-24 03:14:14 +00:00
|
|
|
proc fetchJson(url: Uri; headers: HttpHeaders): Future[JsonNode] {.async.} =
|
2019-06-24 03:29:47 +00:00
|
|
|
newClient()
|
2019-06-24 03:14:14 +00:00
|
|
|
|
|
|
|
var resp = ""
|
|
|
|
try:
|
|
|
|
resp = await client.getContent($url)
|
2019-06-25 11:18:44 +00:00
|
|
|
result = parseJson(resp)
|
2019-06-24 03:14:14 +00:00
|
|
|
except:
|
|
|
|
return nil
|
|
|
|
|
2019-06-25 13:09:13 +00:00
|
|
|
proc getGuestToken(force=false): Future[string] {.async.} =
|
|
|
|
if getTime() - tokenUpdated < tokenLifetime and
|
|
|
|
not force and tokenUses < tokenMaxUses:
|
|
|
|
return guestToken
|
2019-06-21 00:15:46 +00:00
|
|
|
|
2019-06-24 03:29:47 +00:00
|
|
|
tokenUpdated = getTime()
|
2019-06-25 13:09:13 +00:00
|
|
|
tokenUses = 0
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-06-24 03:14:14 +00:00
|
|
|
let headers = newHttpHeaders({
|
|
|
|
"Accept": "application/json, text/javascript, */*; q=0.01",
|
|
|
|
"Referer": $base,
|
|
|
|
"User-Agent": agent,
|
|
|
|
"Authorization": auth
|
|
|
|
})
|
|
|
|
|
2019-06-24 03:29:47 +00:00
|
|
|
newClient()
|
2019-06-24 03:14:14 +00:00
|
|
|
|
|
|
|
let
|
|
|
|
url = apibase / tokenUrl
|
|
|
|
json = parseJson(await client.postContent($url))
|
|
|
|
|
|
|
|
result = json["guest_token"].to(string)
|
2019-06-25 13:09:13 +00:00
|
|
|
guestToken = result
|
2019-06-24 03:14:14 +00:00
|
|
|
|
|
|
|
proc getVideo*(tweet: Tweet; token: string) {.async.} =
|
2019-06-25 00:38:18 +00:00
|
|
|
if tweet.video.isNone(): return
|
2019-06-24 03:29:47 +00:00
|
|
|
|
2019-06-24 03:14:14 +00:00
|
|
|
let headers = newHttpHeaders({
|
|
|
|
"Accept": "application/json, text/javascript, */*; q=0.01",
|
|
|
|
"Referer": tweet.link,
|
|
|
|
"User-Agent": agent,
|
|
|
|
"Authorization": auth,
|
|
|
|
"x-guest-token": token
|
|
|
|
})
|
|
|
|
|
|
|
|
let
|
|
|
|
url = apiBase / (videoUrl % tweet.id)
|
|
|
|
json = await fetchJson(url, headers)
|
|
|
|
|
2019-06-25 13:09:13 +00:00
|
|
|
if json.isNil:
|
|
|
|
if getTime() - tokenUpdated > initDuration(seconds=1):
|
|
|
|
tokenUpdated = getTime()
|
|
|
|
guestToken = await getGuestToken(force=true)
|
|
|
|
await getVideo(tweet, guestToken)
|
|
|
|
return
|
|
|
|
|
2019-06-24 03:14:14 +00:00
|
|
|
tweet.video = some(parseVideo(json))
|
2019-06-25 13:09:13 +00:00
|
|
|
tokenUses.inc
|
2019-06-24 03:14:14 +00:00
|
|
|
|
2019-06-25 13:09:13 +00:00
|
|
|
proc getVideos*(tweets: Tweets; token="") {.async.} =
|
|
|
|
var gToken = token
|
2019-06-24 03:29:47 +00:00
|
|
|
var videoFuts: seq[Future[void]]
|
2019-06-24 03:14:14 +00:00
|
|
|
|
2019-06-25 13:09:13 +00:00
|
|
|
if gToken.len == 0:
|
|
|
|
gToken = await getGuestToken()
|
|
|
|
|
2019-06-24 03:29:47 +00:00
|
|
|
for tweet in tweets.filterIt(it.video.isSome):
|
|
|
|
videoFuts.add getVideo(tweet, token)
|
2019-06-24 03:14:14 +00:00
|
|
|
|
|
|
|
await all(videoFuts)
|
|
|
|
|
|
|
|
proc getConversationVideos*(convo: Conversation) {.async.} =
|
|
|
|
var token = await getGuestToken()
|
|
|
|
var futs: seq[Future[void]]
|
|
|
|
|
|
|
|
futs.add getVideo(convo.tweet, token)
|
2019-06-24 03:29:47 +00:00
|
|
|
futs.add getVideos(convo.before)
|
|
|
|
futs.add getVideos(convo.after)
|
2019-06-25 13:09:13 +00:00
|
|
|
futs.add convo.replies.mapIt(getVideos(it, token))
|
2019-06-24 03:14:14 +00:00
|
|
|
|
|
|
|
await all(futs)
|
|
|
|
|
2019-06-24 03:29:47 +00:00
|
|
|
proc getProfileFallback(username: string; headers: HttpHeaders): Future[Profile] {.async.} =
|
|
|
|
let
|
|
|
|
url = base / profileIntentUrl ? {"screen_name": username}
|
|
|
|
html = await fetchHtml(url, headers)
|
|
|
|
|
2019-06-24 22:55:41 +00:00
|
|
|
if html.isNil:
|
|
|
|
return Profile()
|
|
|
|
|
2019-06-24 03:29:47 +00:00
|
|
|
result = parseIntentProfile(html)
|
|
|
|
|
|
|
|
proc getProfile*(username: string): Future[Profile] {.async.} =
|
|
|
|
let headers = newHttpHeaders({
|
|
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9",
|
|
|
|
"Referer": $(base / username),
|
|
|
|
"User-Agent": agent,
|
|
|
|
"X-Twitter-Active-User": "yes",
|
|
|
|
"X-Requested-With": "XMLHttpRequest",
|
|
|
|
"Accept-Language": "en-US,en;q=0.9"
|
|
|
|
})
|
|
|
|
|
|
|
|
let
|
|
|
|
params = {
|
|
|
|
"screen_name": username,
|
|
|
|
"wants_hovercard": "true",
|
|
|
|
"_": $(epochTime().int)
|
|
|
|
}
|
|
|
|
url = base / profilePopupUrl ? params
|
|
|
|
html = await fetchHtml(url, headers, jsonKey="html")
|
|
|
|
|
2019-06-24 22:55:41 +00:00
|
|
|
if html.isNil:
|
|
|
|
return Profile()
|
|
|
|
|
2019-06-26 16:51:21 +00:00
|
|
|
if not html.select(".ProfileCard-sensitiveWarningContainer").isNil:
|
2019-06-24 03:29:47 +00:00
|
|
|
return await getProfileFallback(username, headers)
|
|
|
|
|
|
|
|
result = parsePopupProfile(html)
|
|
|
|
|
2019-06-25 05:36:36 +00:00
|
|
|
proc getTimeline*(username: string; after=""): Future[Timeline] {.async.} =
|
2019-06-21 01:51:14 +00:00
|
|
|
let headers = newHttpHeaders({
|
2019-06-20 14:16:20 +00:00
|
|
|
"Accept": "application/json, text/javascript, */*; q=0.01",
|
|
|
|
"Referer": $(base / username),
|
|
|
|
"User-Agent": agent,
|
|
|
|
"X-Twitter-Active-User": "yes",
|
|
|
|
"X-Requested-With": "XMLHttpRequest",
|
|
|
|
"Accept-Language": "en-US,en;q=0.9"
|
|
|
|
})
|
|
|
|
|
|
|
|
var url = timelineUrl % username
|
2019-06-25 11:07:49 +00:00
|
|
|
let cleanAfter = after.replace(re"[^\d]*(\d+)[^\d]*", "$1")
|
|
|
|
if cleanAfter.len > 0:
|
|
|
|
url &= "&max_position=" & cleanAfter
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-06-25 05:36:36 +00:00
|
|
|
let json = await fetchJson(base / url, headers)
|
2019-06-25 11:18:44 +00:00
|
|
|
if json.isNil: return Timeline()
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-06-25 05:36:36 +00:00
|
|
|
result = Timeline(
|
|
|
|
hasMore: json["has_more_items"].to(bool),
|
2019-06-25 11:07:49 +00:00
|
|
|
maxId: json.getOrDefault("max_position").getStr(""),
|
|
|
|
minId: json.getOrDefault("min_position").getStr(""),
|
2019-06-25 05:36:36 +00:00
|
|
|
)
|
|
|
|
|
2019-06-25 11:07:49 +00:00
|
|
|
if json["new_latent_count"].to(int) == 0:
|
|
|
|
return
|
2019-06-25 05:36:36 +00:00
|
|
|
|
2019-06-25 11:18:44 +00:00
|
|
|
let html = parseHtml(json["items_html"].to(string))
|
2019-06-25 11:07:49 +00:00
|
|
|
result.tweets = parseTweets(html)
|
2019-06-25 05:36:36 +00:00
|
|
|
await getVideos(result.tweets)
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-06-27 18:13:46 +00:00
|
|
|
proc getTweet*(username: string; id: string): Future[Conversation] {.async.} =
|
2019-06-21 01:51:14 +00:00
|
|
|
let headers = newHttpHeaders({
|
2019-06-20 14:16:20 +00:00
|
|
|
"Accept": "application/json, text/javascript, */*; q=0.01",
|
|
|
|
"Referer": $base,
|
|
|
|
"User-Agent": agent,
|
|
|
|
"X-Twitter-Active-User": "yes",
|
|
|
|
"X-Requested-With": "XMLHttpRequest",
|
|
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
|
|
"pragma": "no-cache",
|
|
|
|
"x-previous-page-name": "profile"
|
|
|
|
})
|
|
|
|
|
2019-06-21 00:15:46 +00:00
|
|
|
let
|
2019-06-27 18:13:46 +00:00
|
|
|
url = base / username / tweetUrl / id
|
2019-06-21 01:51:14 +00:00
|
|
|
html = await fetchHtml(url, headers)
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-06-25 11:07:49 +00:00
|
|
|
if html.isNil:
|
|
|
|
return
|
|
|
|
|
2019-06-20 14:16:20 +00:00
|
|
|
result = parseConversation(html)
|
2019-06-24 03:14:14 +00:00
|
|
|
await getConversationVideos(result)
|