mirror of
https://github.com/zedeus/nitter.git
synced 2025-01-20 21:58:08 +00:00
Add experimental user parser
This commit is contained in:
parent
fcfc1ef497
commit
cdf49dcddd
8 changed files with 270 additions and 29 deletions
|
@ -7,6 +7,7 @@
|
|||
# disable annoying warnings
|
||||
warning("GcUnsafe2", off)
|
||||
hint("XDeclaredButNotUsed", off)
|
||||
hint("XCannotRaiseY", off)
|
||||
hint("User", off)
|
||||
|
||||
const
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
import asyncdispatch, httpclient, uri, strutils
|
||||
import packedjson
|
||||
import types, query, formatters, consts, apiutils, parser
|
||||
import experimental/parser/user
|
||||
|
||||
proc getGraphListBySlug*(name, list: string): Future[List] {.async.} =
|
||||
let
|
||||
|
@ -32,14 +33,14 @@ proc getListMembers*(list: List; after=""): Future[Result[Profile]] {.async.} =
|
|||
proc getProfile*(username: string): Future[Profile] {.async.} =
|
||||
let
|
||||
ps = genParams({"screen_name": username})
|
||||
js = await fetch(userShow ? ps, Api.userShow)
|
||||
result = parseUserShow(js, username=username)
|
||||
json = await fetchRaw(userShow ? ps, Api.userShow)
|
||||
result = parseUser(json)
|
||||
|
||||
proc getProfileById*(userId: string): Future[Profile] {.async.} =
|
||||
let
|
||||
ps = genParams({"user_id": userId})
|
||||
js = await fetch(userShow ? ps, Api.userShow)
|
||||
result = parseUserShow(js, id=userId)
|
||||
json = await fetchRaw(userShow ? ps, Api.userShow)
|
||||
result = parseUser(json)
|
||||
|
||||
proc getTimeline*(id: string; after=""; replies=false): Future[Timeline] {.async.} =
|
||||
let
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
import httpclient, asyncdispatch, options, times, strutils, uri
|
||||
import packedjson, zippy
|
||||
import httpclient, asyncdispatch, options, sequtils, strutils, uri
|
||||
import jsony, packedjson, zippy
|
||||
import types, tokens, consts, parserutils, http_pool
|
||||
from experimental/types/common import Errors, ErrorObj
|
||||
|
||||
const
|
||||
rlRemaining = "x-rate-limit-remaining"
|
||||
|
@ -40,7 +41,14 @@ proc genHeaders*(token: Token = nil): HttpHeaders =
|
|||
"DNT": "1"
|
||||
})
|
||||
|
||||
proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
|
||||
template updateToken() =
|
||||
if api != Api.search and resp.headers.hasKey(rlRemaining):
|
||||
let
|
||||
remaining = parseInt(resp.headers[rlRemaining])
|
||||
reset = parseInt(resp.headers[rlReset])
|
||||
token.setRateLimit(api, remaining, reset)
|
||||
|
||||
template fetchImpl(result, fetchBody) {.dirty.} =
|
||||
once:
|
||||
pool = HttpPool()
|
||||
|
||||
|
@ -48,37 +56,21 @@ proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
|
|||
if token.tok.len == 0:
|
||||
raise rateLimitError()
|
||||
|
||||
let headers = genHeaders(token)
|
||||
try:
|
||||
var resp: AsyncResponse
|
||||
var body = pool.use(headers):
|
||||
result = pool.use(genHeaders(token)):
|
||||
resp = await c.get($url)
|
||||
await resp.body
|
||||
|
||||
if body.len > 0:
|
||||
if result.len > 0:
|
||||
if resp.headers.getOrDefault("content-encoding") == "gzip":
|
||||
body = uncompress(body, dfGzip)
|
||||
result = uncompress(result, dfGzip)
|
||||
else:
|
||||
echo "non-gzip body, url: ", url, ", body: ", body
|
||||
echo "non-gzip body, url: ", url, ", body: ", result
|
||||
|
||||
if body.startsWith('{') or body.startsWith('['):
|
||||
result = parseJson(body)
|
||||
else:
|
||||
echo resp.status, ": ", body
|
||||
result = newJNull()
|
||||
fetchBody
|
||||
|
||||
if api != Api.search and resp.headers.hasKey(rlRemaining):
|
||||
let
|
||||
remaining = parseInt(resp.headers[rlRemaining])
|
||||
reset = parseInt(resp.headers[rlReset])
|
||||
token.setRateLimit(api, remaining, reset)
|
||||
|
||||
if result.getError notin {invalidToken, forbidden, badToken}:
|
||||
release(token, used=true)
|
||||
else:
|
||||
echo "fetch error: ", result.getError
|
||||
release(token, invalid=true)
|
||||
raise rateLimitError()
|
||||
release(token, used=true)
|
||||
|
||||
if resp.status == $Http400:
|
||||
raise newException(InternalError, $url)
|
||||
|
@ -89,3 +81,35 @@ proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
|
|||
if "length" notin e.msg and "descriptor" notin e.msg:
|
||||
release(token, invalid=true)
|
||||
raise rateLimitError()
|
||||
|
||||
proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} =
|
||||
var body: string
|
||||
fetchImpl body:
|
||||
if body.startsWith('{') or body.startsWith('['):
|
||||
result = parseJson(body)
|
||||
else:
|
||||
echo resp.status, ": ", body
|
||||
result = newJNull()
|
||||
|
||||
updateToken()
|
||||
|
||||
let error = result.getError
|
||||
if error in {invalidToken, forbidden, badToken}:
|
||||
echo "fetch error: ", result.getError
|
||||
release(token, invalid=true)
|
||||
raise rateLimitError()
|
||||
|
||||
proc fetchRaw*(url: Uri; api: Api): Future[string] {.async.} =
|
||||
fetchImpl result:
|
||||
if not (result.startsWith('{') or result.startsWith('[')):
|
||||
echo resp.status, ": ", result
|
||||
result.setLen(0)
|
||||
|
||||
updateToken()
|
||||
|
||||
if result.startsWith("{\"errors"):
|
||||
let errors = result.fromJson(Errors).errors
|
||||
if errors.anyIt(it.code in {invalidToken, forbidden, badToken}):
|
||||
echo "fetch error: ", errors
|
||||
release(token, invalid=true)
|
||||
raise rateLimitError()
|
||||
|
|
67
src/experimental/parser/slices.nim
Normal file
67
src/experimental/parser/slices.nim
Normal file
|
@ -0,0 +1,67 @@
|
|||
import std/[macros, htmlgen, unicode]
|
||||
import ../types/common
|
||||
import ".."/../[formatters, utils]
|
||||
|
||||
type
|
||||
ReplaceSliceKind = enum
|
||||
rkRemove, rkUrl, rkHashtag, rkMention
|
||||
|
||||
ReplaceSlice* = object
|
||||
slice: Slice[int]
|
||||
kind: ReplaceSliceKind
|
||||
url, display: string
|
||||
|
||||
proc cmp*(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b)
|
||||
|
||||
proc dedupSlices*(s: var seq[ReplaceSlice]) =
|
||||
var
|
||||
len = s.len
|
||||
i = 0
|
||||
while i < len:
|
||||
var j = i + 1
|
||||
while j < len:
|
||||
if s[i].slice.a == s[j].slice.a:
|
||||
s.del j
|
||||
dec len
|
||||
else:
|
||||
inc j
|
||||
inc i
|
||||
|
||||
proc extractUrls*(result: var seq[ReplaceSlice]; url: Url;
|
||||
textLen: int; hideTwitter = false) =
|
||||
let
|
||||
link = url.expandedUrl
|
||||
slice = url.indices[0] ..< url.indices[1]
|
||||
|
||||
if hideTwitter and slice.b.succ >= textLen and link.isTwitterUrl:
|
||||
if slice.a < textLen:
|
||||
result.add ReplaceSlice(kind: rkRemove, slice: slice)
|
||||
else:
|
||||
result.add ReplaceSlice(kind: rkUrl, url: link,
|
||||
display: link.shortLink, slice: slice)
|
||||
|
||||
proc replacedWith*(runes: seq[Rune]; repls: openArray[ReplaceSlice];
|
||||
textSlice: Slice[int]): string =
|
||||
template extractLowerBound(i: int; idx): int =
|
||||
if i > 0: repls[idx].slice.b.succ else: textSlice.a
|
||||
|
||||
result = newStringOfCap(runes.len)
|
||||
|
||||
for i, rep in repls:
|
||||
result.add $runes[extractLowerBound(i, i - 1) ..< rep.slice.a]
|
||||
case rep.kind
|
||||
of rkHashtag:
|
||||
let
|
||||
name = $runes[rep.slice.a.succ .. rep.slice.b]
|
||||
symbol = $runes[rep.slice.a]
|
||||
result.add a(symbol & name, href = "/search?q=%23" & name)
|
||||
of rkMention:
|
||||
result.add a($runes[rep.slice], href = rep.url, title = rep.display)
|
||||
of rkUrl:
|
||||
result.add a(rep.display, href = rep.url)
|
||||
of rkRemove:
|
||||
discard
|
||||
|
||||
let rest = extractLowerBound(repls.len, ^1) ..< textSlice.b
|
||||
if rest.a <= rest.b:
|
||||
result.add $runes[rest]
|
68
src/experimental/parser/user.nim
Normal file
68
src/experimental/parser/user.nim
Normal file
|
@ -0,0 +1,68 @@
|
|||
import std/[algorithm, unicode, re, strutils]
|
||||
import jsony
|
||||
import utils, slices
|
||||
import ../types/user as userType
|
||||
from ../../types import Profile, Error
|
||||
|
||||
let
|
||||
unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
|
||||
unReplace = "$1<a href=\"/$2\">@$2</a>"
|
||||
|
||||
htRegex = re"(^|[^\w-_./?])([##$])([\w_]+)"
|
||||
htReplace = "$1<a href=\"/search?q=%23$3\">$2$3</a>"
|
||||
|
||||
proc expandProfileEntities(profile: var Profile; user: User) =
|
||||
let
|
||||
orig = profile.bio.toRunes
|
||||
ent = user.entities
|
||||
|
||||
if ent.url.urls.len > 0:
|
||||
profile.website = ent.url.urls[0].expandedUrl
|
||||
|
||||
var replacements = newSeq[ReplaceSlice]()
|
||||
|
||||
for u in ent.description.urls:
|
||||
replacements.extractUrls(u, orig.high)
|
||||
|
||||
replacements.dedupSlices
|
||||
replacements.sort(cmp)
|
||||
|
||||
profile.bio = orig.replacedWith(replacements, 0 .. orig.len)
|
||||
.replacef(unRegex, unReplace)
|
||||
.replacef(htRegex, htReplace)
|
||||
|
||||
proc getBanner(user: User): string =
|
||||
if user.profileBannerUrl.len > 0:
|
||||
return user.profileBannerUrl & "/1500x500"
|
||||
if user.profileLinkColor.len > 0:
|
||||
return '#' & user.profileLinkColor
|
||||
return "#161616"
|
||||
|
||||
proc parseUser*(json: string): Profile =
|
||||
handleErrors:
|
||||
case error
|
||||
of suspended: return Profile(suspended: true)
|
||||
of userNotFound: return
|
||||
else: echo "[error - parseUser]: ", error
|
||||
|
||||
let user = json.fromJson(User)
|
||||
|
||||
result = Profile(
|
||||
id: user.idStr,
|
||||
username: user.screenName,
|
||||
fullname: user.name,
|
||||
location: user.location,
|
||||
bio: user.description,
|
||||
following: user.friendsCount,
|
||||
followers: user.followersCount,
|
||||
tweets: user.statusesCount,
|
||||
likes: user.favouritesCount,
|
||||
media: user.mediaCount,
|
||||
verified: user.verified,
|
||||
protected: user.protected,
|
||||
joinDate: parseTwitterDate(user.createdAt),
|
||||
banner: getBanner(user),
|
||||
userPic: getImageUrl(user.profileImageUrlHttps).replace("_normal", "")
|
||||
)
|
||||
|
||||
result.expandProfileEntities(user)
|
22
src/experimental/parser/utils.nim
Normal file
22
src/experimental/parser/utils.nim
Normal file
|
@ -0,0 +1,22 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
import std/[sugar, strutils, times]
|
||||
import ../types/common
|
||||
import ../../utils as uutils
|
||||
|
||||
template parseTime(time: string; f: static string; flen: int): DateTime =
|
||||
if time.len != flen: return
|
||||
parse(time, f, utc())
|
||||
|
||||
proc parseIsoDate*(date: string): DateTime =
|
||||
date.parseTime("yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20)
|
||||
|
||||
proc parseTwitterDate*(date: string): DateTime =
|
||||
date.parseTime("ddd MMM dd hh:mm:ss \'+0000\' yyyy", 30)
|
||||
|
||||
proc getImageUrl*(url: string): string =
|
||||
url.dup(removePrefix(twimg), removePrefix(https))
|
||||
|
||||
template handleErrors*(body) =
|
||||
if json.startsWith("{\"errors"):
|
||||
let error {.inject.} = json.fromJson(Errors).errors[0].code
|
||||
body
|
30
src/experimental/types/common.nim
Normal file
30
src/experimental/types/common.nim
Normal file
|
@ -0,0 +1,30 @@
|
|||
from ../../types import Error
|
||||
|
||||
type
|
||||
Url* = object
|
||||
url*: string
|
||||
expandedUrl*: string
|
||||
displayUrl*: string
|
||||
indices*: array[2, int]
|
||||
|
||||
ErrorCode* = enum
|
||||
null = 0
|
||||
noUserMatches = 17
|
||||
protectedUser = 22
|
||||
couldntAuth = 32
|
||||
doesntExist = 34
|
||||
userNotFound = 50
|
||||
suspended = 63
|
||||
rateLimited = 88
|
||||
invalidToken = 89
|
||||
listIdOrSlug = 112
|
||||
forbidden = 200
|
||||
badToken = 239
|
||||
noCsrf = 353
|
||||
|
||||
ErrorObj* = object
|
||||
code*: Error
|
||||
message*: string
|
||||
|
||||
Errors* = object
|
||||
errors*: seq[ErrorObj]
|
28
src/experimental/types/user.nim
Normal file
28
src/experimental/types/user.nim
Normal file
|
@ -0,0 +1,28 @@
|
|||
import common
|
||||
|
||||
type
|
||||
User* = object
|
||||
idStr*: string
|
||||
name*: string
|
||||
screenName*: string
|
||||
location*: string
|
||||
description*: string
|
||||
entities*: Entities
|
||||
createdAt*: string
|
||||
followersCount*: int
|
||||
friendsCount*: int
|
||||
favouritesCount*: int
|
||||
statusesCount*: int
|
||||
mediaCount*: int
|
||||
verified*: bool
|
||||
protected*: bool
|
||||
profileBannerUrl*: string
|
||||
profileImageUrlHttps*: string
|
||||
profileLinkColor*: string
|
||||
|
||||
Entities* = object
|
||||
url*: Urls
|
||||
description*: Urls
|
||||
|
||||
Urls* = object
|
||||
urls*: seq[Url]
|
Loading…
Reference in a new issue