Add workaround for Twitter's layout A/B testing

Fixes #110
This commit is contained in:
Zed 2020-01-19 08:34:32 +01:00
parent 6b16ad2ce0
commit ba57511a01
10 changed files with 43 additions and 21 deletions

16
src/api/cookie.nim Normal file
View file

@ -0,0 +1,16 @@
import httpclient, strutils
proc getGuestId*(): string =
let client = newHttpClient()
for i in 0 .. 10:
try:
let req = client.get("https://twitter.com")
if "react-root" in req.body: continue
for k, v in req.headers:
if "guest_id" in v:
return v[v.find("=") + 1 .. v.find(";")]
except:
discard
finally:
try: client.close()
except: discard

View file

@ -54,7 +54,7 @@ proc getListMembers*(username, list, after, agent: string): Future[Result[Profil
let let
url = base / (listMembersUrl % [username, list]) url = base / (listMembersUrl % [username, list])
html = await fetchHtml(url, genHeaders(agent, url)) html = await fetchHtml(url, genHeaders(agent, url, guestId=true))
result = Result[Profile]( result = Result[Profile](
minId: html.selectAttr(".stream-container", "data-min-position"), minId: html.selectAttr(".stream-container", "data-min-position"),

View file

@ -34,7 +34,7 @@ proc getProfile*(username, agent: string): Future[Profile] {.async.} =
proc getProfileFull*(username, agent: string): Future[Profile] {.async.} = proc getProfileFull*(username, agent: string): Future[Profile] {.async.} =
let let
url = base / username url = base / username
headers = genHeaders(agent, url, auth=true) headers = genHeaders(agent, url, auth=true, guestId=true)
html = await fetchHtml(url, headers) html = await fetchHtml(url, headers)
if html == nil: return if html == nil: return

View file

@ -14,7 +14,8 @@ proc getResult*[T](json: JsonNode; query: Query; after: string): Result[T] =
beginning: after.len == 0 beginning: after.len == 0
) )
proc getSearch*[T](query: Query; after, agent: string; media=true): Future[Result[T]] {.async.} = proc getSearch*[T](query: Query; after, agent: string;
media=true): Future[Result[T]] {.async.} =
let let
kind = if query.kind == users: "users" else: "tweets" kind = if query.kind == users: "users" else: "tweets"
@ -22,7 +23,7 @@ proc getSearch*[T](query: Query; after, agent: string; media=true): Future[Resul
encoded = encodeUrl(param, usePlus=false) encoded = encodeUrl(param, usePlus=false)
referer = base / ("search?f=$1&q=$2&src=typd" % [kind, encoded]) referer = base / ("search?f=$1&q=$2&src=typd" % [kind, encoded])
headers = genHeaders(agent, referer, auth=true, xml=true) headers = genHeaders(agent, referer, auth=true, xml=true, guestId=true)
params = { params = {
"f": kind, "f": kind,

View file

@ -30,12 +30,12 @@ proc getProfileAndTimeline*(username, after, agent: string;
url = url ? {"max_position": after} url = url ? {"max_position": after}
let let
headers = genHeaders(agent, base / username, auth=true) headers = genHeaders(agent, base / username, auth=true, guestId=true)
html = await fetchHtml(url, headers) html = await fetchHtml(url, headers)
timeline = parseTimeline(html.select("#timeline > .stream-container"), after) timeline = parseTimeline(html.select("#timeline > .stream-container"), after)
profile = parseTimelineProfile(html) profile = parseTimelineProfile(html)
if media: await getMedia(timeline, agent) if media and profile.username.len > 0: await getMedia(timeline, agent)
result = (profile, timeline) result = (profile, timeline)
proc getTimeline*(username, after, agent: string; proc getTimeline*(username, after, agent: string;

View file

@ -8,7 +8,7 @@ proc getTweet*(username, id, after, agent: string): Future[Conversation] {.async
headers = genHeaders({ headers = genHeaders({
"pragma": "no-cache", "pragma": "no-cache",
"x-previous-page-name": "profile" "x-previous-page-name": "profile"
}, agent, base, xml=true) }, agent, base, xml=true, guestId=true)
url = base / username / tweetUrl / id ? {"max_position": after} url = base / username / tweetUrl / id ? {"max_position": after}
html = await fetchHtml(url, headers) html = await fetchHtml(url, headers)

View file

@ -2,11 +2,13 @@ import httpclient, asyncdispatch, htmlparser, options
import strutils, json, xmltree, uri import strutils, json, xmltree, uri
import ../types import ../types
import consts import consts, cookie
var guestIdCookie = "guest_id=" & getGuestId()
proc genHeaders*(headers: openArray[tuple[key: string, val: string]]; proc genHeaders*(headers: openArray[tuple[key: string, val: string]];
agent: string; referer: Uri; lang=true; agent: string; referer: Uri; lang=true;
auth=false; xml=false): HttpHeaders = auth=false; xml=false; guestId=false): HttpHeaders =
result = newHttpHeaders({ result = newHttpHeaders({
"referer": $referer, "referer": $referer,
"user-agent": agent, "user-agent": agent,
@ -16,13 +18,14 @@ proc genHeaders*(headers: openArray[tuple[key: string, val: string]];
if auth: result["authority"] = "twitter.com" if auth: result["authority"] = "twitter.com"
if lang: result["accept-language"] = consts.lang if lang: result["accept-language"] = consts.lang
if xml: result["x-requested-with"] = "XMLHttpRequest" if xml: result["x-requested-with"] = "XMLHttpRequest"
if guestId: result["cookie"] = guestIdCookie
for (key, val) in headers: for (key, val) in headers:
result[key] = val result[key] = val
proc genHeaders*(agent: string; referer: Uri; lang=true; proc genHeaders*(agent: string; referer: Uri; lang=true;
auth=false; xml=false): HttpHeaders = auth=false; xml=false; guestId=false): HttpHeaders =
genHeaders([], agent, referer, lang, auth, xml) genHeaders([], agent, referer, lang, auth, xml, guestId)
template newClient*() {.dirty.} = template newClient*() {.dirty.} =
var client = newAsyncHttpClient() var client = newAsyncHttpClient()

View file

@ -41,7 +41,8 @@ proc hasCachedProfile*(username: string): Option[Profile] =
except AssertionError, KeyError: except AssertionError, KeyError:
result = none Profile result = none Profile
proc getCachedProfile*(username, agent: string; force=false): Future[Profile] {.async.} = proc getCachedProfile*(username, agent: string;
force=false): Future[Profile] {.async.} =
withDb: withDb:
try: try:
result.getOne("lower(username) = ?", toLower(username)) result.getOne("lower(username) = ?", toLower(username))

View file

@ -20,7 +20,8 @@ proc showRss*(req: Request; hostname: string; query: Query): Future[(string, str
(profile, timeline) = (profile, timeline) =
await fetchSingleTimeline(names[0], after, getAgent(), query, media=false) await fetchSingleTimeline(names[0], after, getAgent(), query, media=false)
else: else:
timeline = await fetchMultiTimeline(names, after, getAgent(), query, media=false) let multiQuery = query.getMultiQuery(names)
timeline = await getSearch[Tweet](multiQuery, after, getAgent(), media=false)
# this is kinda dumb # this is kinda dumb
profile = Profile( profile = Profile(
username: name, username: name,

View file

@ -45,13 +45,11 @@ proc fetchSingleTimeline*(name, after, agent: string; query: Query;
if profile.username.len == 0: return if profile.username.len == 0: return
return (profile, timeline) return (profile, timeline)
proc fetchMultiTimeline*(names: seq[string]; after, agent: string; query: Query; proc getMultiQuery*(q: Query; names: seq[string]): Query =
media=true): Future[Timeline] {.async.} = result = q
var q = query result.fromUser = names
q.fromUser = names
if q.kind == posts and "replies" notin q.excludes: if q.kind == posts and "replies" notin q.excludes:
q.excludes.add "replies" result.excludes.add "replies"
return await getSearch[Tweet](q, after, agent, media)
proc get*(req: Request; key: string): string = proc get*(req: Request; key: string): string =
params(req).getOrDefault(key) params(req).getOrDefault(key)
@ -62,8 +60,10 @@ proc showTimeline*(request: Request; query: Query; cfg: Config; prefs: Prefs;
let names = getNames(request.get("name")) let names = getNames(request.get("name"))
if names.len != 1: if names.len != 1:
let timeline = await fetchMultiTimeline(names, after, agent, query) let
let html = renderTweetSearch(timeline, prefs, getPath()) multiQuery = query.getMultiQuery(names)
timeline = await getSearch[Tweet](multiQuery, after, agent)
html = renderTweetSearch(timeline, prefs, getPath())
return renderMain(html, request, cfg, "Multi", rss=rss) return renderMain(html, request, cfg, "Multi", rss=rss)
let let