mirror of
https://github.com/zedeus/nitter.git
synced 2024-12-04 23:26:31 +00:00
WIP tweets/timeline parser
This commit is contained in:
parent
266e0a0082
commit
c9b261a793
13 changed files with 359 additions and 50 deletions
|
@ -89,8 +89,8 @@ proc getSearch*[T](query: Query; after=""): Future[Result[T]] {.async.} =
|
|||
else:
|
||||
const
|
||||
searchMode = ("tweet_search_mode", "live")
|
||||
parse = parseTimeline
|
||||
fetchFunc = fetch
|
||||
parse = parseTweets
|
||||
fetchFunc = fetchRaw
|
||||
|
||||
let q = genQueryParam(query)
|
||||
if q.len == 0 or q == emptyQuery:
|
||||
|
|
44
src/experimental/parser/media.nim
Normal file
44
src/experimental/parser/media.nim
Normal file
|
@ -0,0 +1,44 @@
|
|||
import std/[json, strutils, times, math]
|
||||
import utils
|
||||
import ".."/types/[media, tweet]
|
||||
from ../../types import Poll, Gif, Video, VideoVariant, VideoType
|
||||
|
||||
proc parseVideo*(entity: Entity): Video =
|
||||
result = Video(
|
||||
thumb: entity.mediaUrlHttps.getImageUrl,
|
||||
views: entity.ext.mediaStats{"r", "ok", "viewCount"}.getStr,
|
||||
available: entity.extMediaAvailability.status == "available",
|
||||
title: entity.extAltText,
|
||||
durationMs: entity.videoInfo.durationMillis,
|
||||
description: entity.additionalMediaInfo.description,
|
||||
variants: entity.videoInfo.variants
|
||||
# playbackType: mp4
|
||||
)
|
||||
|
||||
if entity.additionalMediaInfo.title.len > 0:
|
||||
result.title = entity.additionalMediaInfo.title
|
||||
|
||||
proc parseGif*(entity: Entity): Gif =
|
||||
result = Gif(
|
||||
url: entity.videoInfo.variants[0].url.getImageUrl,
|
||||
thumb: entity.getImageUrl
|
||||
)
|
||||
|
||||
proc parsePoll*(card: Card): Poll =
|
||||
let vals = card.bindingValues
|
||||
|
||||
# name format is pollNchoice_*
|
||||
for i in '1' .. card.name[4]:
|
||||
let choice = "choice" & i
|
||||
result.values.add parseInt(vals{choice & "_count", "string_value"}.getStr("0"))
|
||||
result.options.add vals{choice & "_label", "string_value"}.getStr
|
||||
|
||||
let time = vals{"end_datetime_utc", "string_value"}.getStr.parseIsoDate
|
||||
if time > now():
|
||||
let timeLeft = $(time - now())
|
||||
result.status = timeLeft[0 ..< timeLeft.find(",")]
|
||||
else:
|
||||
result.status = "Final results"
|
||||
|
||||
result.leader = result.values.find(max(result.values))
|
||||
result.votes = result.values.sum
|
|
@ -1,15 +1,14 @@
|
|||
import std/[macros, htmlgen, unicode]
|
||||
import ../types/common
|
||||
import ".."/../[formatters, utils]
|
||||
|
||||
type
|
||||
ReplaceSliceKind = enum
|
||||
ReplaceSliceKind* = enum
|
||||
rkRemove, rkUrl, rkHashtag, rkMention
|
||||
|
||||
ReplaceSlice* = object
|
||||
slice: Slice[int]
|
||||
kind: ReplaceSliceKind
|
||||
url, display: string
|
||||
slice*: Slice[int]
|
||||
kind*: ReplaceSliceKind
|
||||
url*, display*: string
|
||||
|
||||
proc cmp*(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b)
|
||||
|
||||
|
@ -27,11 +26,14 @@ proc dedupSlices*(s: var seq[ReplaceSlice]) =
|
|||
inc j
|
||||
inc i
|
||||
|
||||
proc extractUrls*(result: var seq[ReplaceSlice]; url: Url;
|
||||
proc extractHashtags*(result: var seq[ReplaceSlice]; slice: Slice[int]) =
|
||||
result.add ReplaceSlice(kind: rkHashtag, slice: slice)
|
||||
|
||||
proc extractUrls*[T](result: var seq[ReplaceSlice]; entity: T;
|
||||
textLen: int; hideTwitter = false) =
|
||||
let
|
||||
link = url.expandedUrl
|
||||
slice = url.indices[0] ..< url.indices[1]
|
||||
link = entity.expandedUrl
|
||||
slice = entity.indices
|
||||
|
||||
if hideTwitter and slice.b.succ >= textLen and link.isTwitterUrl:
|
||||
if slice.a < textLen:
|
||||
|
|
|
@ -1,13 +1,50 @@
|
|||
import std/[strutils, tables]
|
||||
import std/[strutils, tables, options]
|
||||
import jsony
|
||||
import user, ../types/timeline
|
||||
from ../../types import Result, User
|
||||
import user, tweet, utils, ../types/timeline
|
||||
from ../../types import Result, User, Tweet
|
||||
|
||||
proc parseHook(s: string; i: var int; v: var Slice[int]) =
|
||||
var slice: array[2, int]
|
||||
parseHook(s, i, slice)
|
||||
v = slice[0] ..< slice[1]
|
||||
|
||||
proc getId(id: string): string {.inline.} =
|
||||
let start = id.rfind("-")
|
||||
if start < 0: return id
|
||||
id[start + 1 ..< id.len]
|
||||
|
||||
proc processTweet(id: string; objects: GlobalObjects;
|
||||
userCache: var Table[string, User]): Tweet =
|
||||
let raw = objects.tweets[id]
|
||||
result = toTweet raw
|
||||
|
||||
let uid = result.user.id
|
||||
if uid.len > 0 and uid in objects.users:
|
||||
if uid notin userCache:
|
||||
userCache[uid] = toUser objects.users[uid]
|
||||
result.user = userCache[uid]
|
||||
|
||||
let rtId = raw.retweetedStatusIdStr
|
||||
if rtId.len > 0:
|
||||
if rtId in objects.tweets:
|
||||
result.retweet = some processTweet(rtId, objects, userCache)
|
||||
else:
|
||||
result.retweet = some Tweet(id: rtId.toId)
|
||||
|
||||
let qId = raw.quotedStatusIdStr
|
||||
if qId.len > 0:
|
||||
if qId in objects.tweets:
|
||||
result.quote = some processTweet(qId, objects, userCache)
|
||||
else:
|
||||
result.quote = some Tweet(id: qId.toId)
|
||||
|
||||
proc parseCursor[T](e: Entry; result: var Result[T]) =
|
||||
let cursor = e.content.operation.cursor
|
||||
if cursor.cursorType == "Top":
|
||||
result.top = cursor.value
|
||||
elif cursor.cursorType == "Bottom":
|
||||
result.bottom = cursor.value
|
||||
|
||||
proc parseUsers*(json: string; after=""): Result[User] =
|
||||
result = Result[User](beginning: after.len == 0)
|
||||
|
||||
|
@ -16,13 +53,32 @@ proc parseUsers*(json: string; after=""): Result[User] =
|
|||
return
|
||||
|
||||
for e in raw.timeline.instructions[0].addEntries.entries:
|
||||
let id = e.entryId.getId
|
||||
if e.entryId.startsWith("user"):
|
||||
let
|
||||
eId = e.entryId
|
||||
id = eId.getId
|
||||
|
||||
if eId.startsWith("user") or eId.startsWith("sq-U"):
|
||||
if id in raw.globalObjects.users:
|
||||
result.content.add toUser raw.globalObjects.users[id]
|
||||
elif e.entryId.startsWith("cursor"):
|
||||
let cursor = e.content.operation.cursor
|
||||
if cursor.cursorType == "Top":
|
||||
result.top = cursor.value
|
||||
elif cursor.cursorType == "Bottom":
|
||||
result.bottom = cursor.value
|
||||
elif eId.startsWith("cursor") or eId.startsWith("sq-C"):
|
||||
parseCursor(e, result)
|
||||
|
||||
proc parseTweets*(json: string; after=""): Result[Tweet] =
|
||||
result = Result[Tweet](beginning: after.len == 0)
|
||||
|
||||
let raw = json.fromJson(Search)
|
||||
if raw.timeline.instructions.len == 0:
|
||||
return
|
||||
|
||||
var userCache: Table[string, User]
|
||||
|
||||
for e in raw.timeline.instructions[0].addEntries.entries:
|
||||
let
|
||||
eId = e.entryId
|
||||
id = eId.getId
|
||||
|
||||
if eId.startsWith("tweet") or eId.startsWith("sq-I-t"):
|
||||
if id in raw.globalObjects.tweets:
|
||||
result.content.add processTweet(id, raw.globalObjects, userCache)
|
||||
elif eId.startsWith("cursor") or eId.startsWith("sq-C"):
|
||||
parseCursor(e, result)
|
||||
|
|
97
src/experimental/parser/tweet.nim
Normal file
97
src/experimental/parser/tweet.nim
Normal file
|
@ -0,0 +1,97 @@
|
|||
import std/[strutils, options, algorithm, json]
|
||||
import std/unicode except strip
|
||||
import utils, slices, media, user
|
||||
import ../types/tweet
|
||||
from ../types/media as mediaTypes import MediaType
|
||||
from ../../types import Tweet, User, TweetStats
|
||||
|
||||
proc expandTweetEntities(tweet: var Tweet; raw: RawTweet) =
|
||||
let
|
||||
orig = raw.fullText.toRunes
|
||||
textRange = raw.displayTextRange
|
||||
textSlice = textRange[0] .. textRange[1]
|
||||
hasCard = raw.card.isSome
|
||||
|
||||
var replyTo = ""
|
||||
if tweet.replyId > 0:
|
||||
tweet.reply.add raw.inReplyToScreenName
|
||||
replyTo = raw.inReplyToScreenName
|
||||
|
||||
var replacements = newSeq[ReplaceSlice]()
|
||||
|
||||
for u in raw.entities.urls:
|
||||
if u.url.len == 0 or u.url notin raw.fullText:
|
||||
continue
|
||||
|
||||
replacements.extractUrls(u, textSlice.b, hideTwitter=raw.isQuoteStatus)
|
||||
# if hasCard and u.url == get(tweet.card).url:
|
||||
# get(tweet.card).url = u.expandedUrl
|
||||
|
||||
for m in raw.entities.media:
|
||||
replacements.extractUrls(m, textSlice.b, hideTwitter=true)
|
||||
|
||||
for hashtag in raw.entities.hashtags:
|
||||
replacements.extractHashtags(hashtag.indices)
|
||||
|
||||
for symbol in raw.entities.symbols:
|
||||
replacements.extractHashtags(symbol.indices)
|
||||
|
||||
for mention in raw.entities.userMentions:
|
||||
let
|
||||
name = mention.screenName
|
||||
idx = tweet.reply.find(name)
|
||||
|
||||
if mention.indices.a >= textSlice.a:
|
||||
replacements.add ReplaceSlice(kind: rkMention, slice: mention.indices,
|
||||
url: "/" & name, display: mention.name)
|
||||
if idx > -1 and name != replyTo:
|
||||
tweet.reply.delete idx
|
||||
elif idx == -1 and tweet.replyId != 0:
|
||||
tweet.reply.add name
|
||||
|
||||
replacements.dedupSlices
|
||||
replacements.sort(cmp)
|
||||
|
||||
tweet.text = orig.replacedWith(replacements, textSlice)
|
||||
.strip(leading=false)
|
||||
|
||||
proc toTweet*(raw: RawTweet): Tweet =
|
||||
result = Tweet(
|
||||
id: raw.idStr.toId,
|
||||
threadId: raw.conversationIdStr.toId,
|
||||
replyId: raw.inReplyToStatusIdStr.toId,
|
||||
time: parseTwitterDate(raw.createdAt),
|
||||
hasThread: raw.selfThread.idStr.len > 0,
|
||||
available: true,
|
||||
user: User(id: raw.userIdStr),
|
||||
stats: TweetStats(
|
||||
replies: raw.replyCount,
|
||||
retweets: raw.retweetCount,
|
||||
likes: raw.favoriteCount,
|
||||
quotes: raw.quoteCount
|
||||
)
|
||||
)
|
||||
|
||||
result.expandTweetEntities(raw)
|
||||
|
||||
if raw.card.isSome:
|
||||
let card = raw.card.get
|
||||
if "poll" in card.name:
|
||||
result.poll = some parsePoll(card)
|
||||
if "image" in card.name:
|
||||
result.photos.add card.bindingValues{"image_large", "image_value", "url"}
|
||||
.getStr.getImageUrl
|
||||
# elif card.name == "amplify":
|
||||
# discard
|
||||
# # result.video = some(parsePromoVideo(jsCard{"binding_values"}))
|
||||
# else:
|
||||
# result.card = some parseCard(card, raw.entities.urls)
|
||||
|
||||
for m in raw.extendedEntities.media:
|
||||
case m.kind
|
||||
of photo: result.photos.add m.getImageUrl
|
||||
of video:
|
||||
result.video = some parseVideo(m)
|
||||
if m.additionalMediaInfo.sourceUser.isSome:
|
||||
result.attribution = some toUser get(m.additionalMediaInfo.sourceUser)
|
||||
of animatedGif: result.gif = some parseGif(m)
|
|
@ -1,11 +1,8 @@
|
|||
import std/[options, tables, strutils, strformat, sugar]
|
||||
import std/[options, tables, strformat]
|
||||
import jsony
|
||||
import ../types/unifiedcard
|
||||
import utils
|
||||
import ".."/types/[unifiedcard, media]
|
||||
from ../../types import Card, CardKind, Video
|
||||
from ../../utils import twimg, https
|
||||
|
||||
proc getImageUrl(entity: MediaEntity): string =
|
||||
entity.mediaUrlHttps.dup(removePrefix(twimg), removePrefix(https))
|
||||
|
||||
proc parseDestination(id: string; card: UnifiedCard; result: var Card) =
|
||||
let destination = card.destinationObjects[id].data
|
||||
|
@ -66,6 +63,7 @@ proc parseMedia(component: Component; card: UnifiedCard; result: var Card) =
|
|||
durationMs: videoInfo.durationMillis,
|
||||
variants: videoInfo.variants
|
||||
)
|
||||
of animatedGif: discard
|
||||
|
||||
proc parseUnifiedCard*(json: string): Card =
|
||||
let card = json.fromJson(UnifiedCard)
|
||||
|
@ -78,7 +76,7 @@ proc parseUnifiedCard*(json: string): Card =
|
|||
component.data.parseAppDetails(card, result)
|
||||
of mediaWithDetailsHorizontal:
|
||||
component.data.parseMediaDetails(card, result)
|
||||
of media, swipeableMedia:
|
||||
of ComponentType.media, swipeableMedia:
|
||||
component.parseMedia(card, result)
|
||||
of buttonGroup:
|
||||
discard
|
||||
|
|
|
@ -1,12 +1,16 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
import std/[sugar, strutils, times]
|
||||
import ../types/common
|
||||
import ".."/types/[common, media, tweet]
|
||||
import ../../utils as uutils
|
||||
|
||||
template parseTime(time: string; f: static string; flen: int): DateTime =
|
||||
if time.len != flen: return
|
||||
parse(time, f, utc())
|
||||
|
||||
proc toId*(id: string): int64 =
|
||||
if id.len == 0: 0'i64
|
||||
else: parseBiggestInt(id)
|
||||
|
||||
proc parseIsoDate*(date: string): DateTime =
|
||||
date.parseTime("yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20)
|
||||
|
||||
|
@ -16,6 +20,9 @@ proc parseTwitterDate*(date: string): DateTime =
|
|||
proc getImageUrl*(url: string): string =
|
||||
url.dup(removePrefix(twimg), removePrefix(https))
|
||||
|
||||
proc getImageUrl*(entity: MediaEntity | Entity): string =
|
||||
entity.mediaUrlHttps.getImageUrl
|
||||
|
||||
template handleErrors*(body) =
|
||||
if json.startsWith("{\"errors"):
|
||||
for error {.inject.} in json.fromJson(Errors).errors:
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import jsony
|
||||
from ../../types import Error
|
||||
|
||||
type
|
||||
|
@ -5,7 +6,7 @@ type
|
|||
url*: string
|
||||
expandedUrl*: string
|
||||
displayUrl*: string
|
||||
indices*: array[2, int]
|
||||
indices*: Slice[int]
|
||||
|
||||
ErrorObj* = object
|
||||
code*: Error
|
||||
|
@ -18,3 +19,8 @@ proc contains*(codes: set[Error]; errors: Errors): bool =
|
|||
for e in errors.errors:
|
||||
if e.code in codes:
|
||||
return true
|
||||
|
||||
proc parseHook*(s: string; i: var int; v: var Slice[int]) =
|
||||
var slice: array[2, int]
|
||||
parseHook(s, i, slice)
|
||||
v = slice[0] ..< slice[1]
|
||||
|
|
15
src/experimental/types/media.nim
Normal file
15
src/experimental/types/media.nim
Normal file
|
@ -0,0 +1,15 @@
|
|||
import options
|
||||
from ../../types import VideoType, VideoVariant
|
||||
|
||||
type
|
||||
MediaType* = enum
|
||||
photo, video, animatedGif
|
||||
|
||||
MediaEntity* = object
|
||||
kind*: MediaType
|
||||
mediaUrlHttps*: string
|
||||
videoInfo*: Option[VideoInfo]
|
||||
|
||||
VideoInfo* = object
|
||||
durationMillis*: int
|
||||
variants*: seq[VideoVariant]
|
|
@ -1,13 +1,14 @@
|
|||
import std/tables
|
||||
import user
|
||||
import user, tweet
|
||||
|
||||
type
|
||||
Search* = object
|
||||
globalObjects*: GlobalObjects
|
||||
timeline*: Timeline
|
||||
|
||||
GlobalObjects = object
|
||||
GlobalObjects* = object
|
||||
users*: Table[string, RawUser]
|
||||
tweets*: Table[string, RawTweet]
|
||||
|
||||
Timeline = object
|
||||
instructions*: seq[Instructions]
|
||||
|
@ -15,9 +16,13 @@ type
|
|||
Instructions = object
|
||||
addEntries*: tuple[entries: seq[Entry]]
|
||||
|
||||
Entry = object
|
||||
Entry* = object
|
||||
entryId*: string
|
||||
content*: tuple[operation: Operation]
|
||||
|
||||
Operation = object
|
||||
cursor*: tuple[value, cursorType: string]
|
||||
|
||||
proc renameHook*(v: var Entity; fieldName: var string) =
|
||||
if fieldName == "type":
|
||||
fieldName = "kind"
|
||||
|
|
85
src/experimental/types/tweet.nim
Normal file
85
src/experimental/types/tweet.nim
Normal file
|
@ -0,0 +1,85 @@
|
|||
import options
|
||||
import jsony
|
||||
from json import JsonNode
|
||||
import user, media, common
|
||||
|
||||
type
|
||||
RawTweet* = object
|
||||
createdAt*: string
|
||||
idStr*: string
|
||||
fullText*: string
|
||||
displayTextRange*: array[2, int]
|
||||
entities*: Entities
|
||||
extendedEntities*: ExtendedEntities
|
||||
inReplyToStatusIdStr*: string
|
||||
inReplyToScreenName*: string
|
||||
userIdStr*: string
|
||||
isQuoteStatus*: bool
|
||||
replyCount*: int
|
||||
retweetCount*: int
|
||||
favoriteCount*: int
|
||||
quoteCount*: int
|
||||
conversationIdStr*: string
|
||||
favorited*: bool
|
||||
retweeted*: bool
|
||||
selfThread*: tuple[idStr: string]
|
||||
card*: Option[Card]
|
||||
quotedStatusIdStr*: string
|
||||
retweetedStatusIdStr*: string
|
||||
|
||||
Card* = object
|
||||
name*: string
|
||||
url*: string
|
||||
bindingValues*: JsonNode
|
||||
|
||||
Entities* = object
|
||||
hashtags*: seq[Hashtag]
|
||||
symbols*: seq[Hashtag]
|
||||
userMentions*: seq[UserMention]
|
||||
urls*: seq[Url]
|
||||
media*: seq[Entity]
|
||||
|
||||
Hashtag* = object
|
||||
indices*: Slice[int]
|
||||
|
||||
UserMention* = object
|
||||
screenName*: string
|
||||
name*: string
|
||||
indices*: Slice[int]
|
||||
|
||||
ExtendedEntities* = object
|
||||
media*: seq[Entity]
|
||||
|
||||
Entity* = object
|
||||
kind*: MediaType
|
||||
indices*: Slice[int]
|
||||
mediaUrlHttps*: string
|
||||
url*: string
|
||||
expandedUrl*: string
|
||||
videoInfo*: VideoInfo
|
||||
ext*: Ext
|
||||
extMediaAvailability*: tuple[status: string]
|
||||
extAltText*: string
|
||||
additionalMediaInfo*: AdditionalMediaInfo
|
||||
sourceStatusIdStr*: string
|
||||
sourceUserIdStr*: string
|
||||
|
||||
AdditionalMediaInfo* = object
|
||||
sourceUser*: Option[RawUser]
|
||||
title*: string
|
||||
description*: string
|
||||
|
||||
Ext* = object
|
||||
mediaStats*: JsonNode
|
||||
|
||||
MediaStats* = object
|
||||
ok*: tuple[viewCount: string]
|
||||
|
||||
proc renameHook*(v: var Entity; fieldName: var string) =
|
||||
if fieldName == "type":
|
||||
fieldName = "kind"
|
||||
|
||||
proc parseHook*(s: string; i: var int; v: var Slice[int]) =
|
||||
var slice: array[2, int]
|
||||
parseHook(s, i, slice)
|
||||
v = slice[0] ..< slice[1]
|
|
@ -1,5 +1,5 @@
|
|||
import options, tables
|
||||
from ../../types import VideoType, VideoVariant
|
||||
import media as mediaTypes
|
||||
|
||||
type
|
||||
UnifiedCard* = object
|
||||
|
@ -38,25 +38,13 @@ type
|
|||
id*: string
|
||||
destination*: string
|
||||
|
||||
Destination* = object
|
||||
kind*: string
|
||||
data*: tuple[urlData: UrlData]
|
||||
|
||||
UrlData* = object
|
||||
url*: string
|
||||
vanity*: string
|
||||
|
||||
MediaType* = enum
|
||||
photo, video
|
||||
|
||||
MediaEntity* = object
|
||||
kind*: MediaType
|
||||
mediaUrlHttps*: string
|
||||
videoInfo*: Option[VideoInfo]
|
||||
|
||||
VideoInfo* = object
|
||||
durationMillis*: int
|
||||
variants*: seq[VideoVariant]
|
||||
Destination* = object
|
||||
kind*: string
|
||||
data*: tuple[urlData: UrlData]
|
||||
|
||||
AppType* = enum
|
||||
androidApp, iPhoneApp, iPadApp
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import options
|
||||
import jsony
|
||||
import common
|
||||
|
||||
type
|
||||
|
@ -41,3 +42,8 @@ type
|
|||
|
||||
Color* = object
|
||||
red*, green*, blue*: int
|
||||
|
||||
proc parseHook*(s: string; i: var int; v: var Slice[int]) =
|
||||
var slice: array[2, int]
|
||||
parseHook(s, i, slice)
|
||||
v = slice[0] ..< slice[1]
|
||||
|
|
Loading…
Reference in a new issue