mirror of
https://github.com/zedeus/nitter.git
synced 2025-01-07 15:45:24 +00:00
Remove nim-regex dependency, improve performance
This commit is contained in:
parent
0e5da8c305
commit
6c6386913e
4 changed files with 33 additions and 32 deletions
|
@ -14,7 +14,6 @@ requires "nim >= 1.4.8"
|
||||||
requires "jester >= 0.5.0"
|
requires "jester >= 0.5.0"
|
||||||
requires "karax#c71bc92"
|
requires "karax#c71bc92"
|
||||||
requires "sass#e683aa1"
|
requires "sass#e683aa1"
|
||||||
requires "regex#eeefb4f"
|
|
||||||
requires "nimcrypto#a5742a9"
|
requires "nimcrypto#a5742a9"
|
||||||
requires "markdown#abdbe5e"
|
requires "markdown#abdbe5e"
|
||||||
requires "packedjson#d11d167"
|
requires "packedjson#d11d167"
|
||||||
|
|
|
@ -1,10 +1,17 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
import strutils, strformat, times, uri, tables, xmltree, htmlparser, htmlgen
|
import strutils, strformat, times, uri, tables, xmltree, htmlparser, htmlgen
|
||||||
import std/enumerate
|
import std/[enumerate, re]
|
||||||
import regex
|
|
||||||
import types, utils, query
|
import types, utils, query
|
||||||
|
|
||||||
const
|
const
|
||||||
|
cards = "cards.twitter.com/cards"
|
||||||
|
tco = "https://t.co"
|
||||||
|
twitter = parseUri("https://twitter.com")
|
||||||
|
|
||||||
|
let
|
||||||
|
twRegex = re"(?<=(?<!\S)https:\/\/|(?<=\s))(www\.|mobile\.)?twitter\.com"
|
||||||
|
twLinkRegex = re"""<a href="https:\/\/twitter.com([^"]+)">twitter\.com(\S+)</a>"""
|
||||||
|
|
||||||
ytRegex = re"([A-z.]+\.)?youtu(be\.com|\.be)"
|
ytRegex = re"([A-z.]+\.)?youtu(be\.com|\.be)"
|
||||||
igRegex = re"(www\.)?instagram\.com"
|
igRegex = re"(www\.)?instagram\.com"
|
||||||
|
|
||||||
|
@ -15,20 +22,11 @@ const
|
||||||
# Images aren't supported due to errors from Teddit when the image
|
# Images aren't supported due to errors from Teddit when the image
|
||||||
# wasn't first displayed via a post on the Teddit instance.
|
# wasn't first displayed via a post on the Teddit instance.
|
||||||
|
|
||||||
twRegex = re"(?<=(?<!\S)https:\/\/|(?<=\s))(www\.|mobile\.)?twitter\.com"
|
|
||||||
twLinkRegex = re"""<a href="https:\/\/twitter.com([^"]+)">twitter\.com(\S+)</a>"""
|
|
||||||
|
|
||||||
cards = "cards.twitter.com/cards"
|
|
||||||
tco = "https://t.co"
|
|
||||||
|
|
||||||
wwwRegex = re"https?://(www[0-9]?\.)?"
|
wwwRegex = re"https?://(www[0-9]?\.)?"
|
||||||
m3u8Regex = re"""url="(.+.m3u8)""""
|
m3u8Regex = re"""url="(.+.m3u8)""""
|
||||||
manifestRegex = re"\/(.+(.ts|.m4s|.m3u8|.vmap|.mp4))"
|
|
||||||
userPicRegex = re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$"
|
userPicRegex = re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$"
|
||||||
extRegex = re"(\.[A-z]+)$"
|
extRegex = re"(\.[A-z]+)$"
|
||||||
illegalXmlRegex = re"[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\u10000-\u10FFFF]"
|
illegalXmlRegex = re"(*UTF8)[^\x09\x0A\x0D\x20-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]"
|
||||||
|
|
||||||
twitter = parseUri("https://twitter.com")
|
|
||||||
|
|
||||||
proc getUrlPrefix*(cfg: Config): string =
|
proc getUrlPrefix*(cfg: Config): string =
|
||||||
if cfg.useHttps: https & cfg.hostname
|
if cfg.useHttps: https & cfg.hostname
|
||||||
|
@ -54,45 +52,48 @@ proc shortLink*(text: string; length=28): string =
|
||||||
proc replaceUrls*(body: string; prefs: Prefs; absolute=""): string =
|
proc replaceUrls*(body: string; prefs: Prefs; absolute=""): string =
|
||||||
result = body
|
result = body
|
||||||
|
|
||||||
if prefs.replaceYouTube.len > 0 and ytRegex in result:
|
if prefs.replaceYouTube.len > 0 and "youtu" in result:
|
||||||
result = result.replace(ytRegex, prefs.replaceYouTube)
|
result = result.replace(ytRegex, prefs.replaceYouTube)
|
||||||
if prefs.replaceYouTube in result:
|
if prefs.replaceYouTube in result:
|
||||||
result = result.replace("/c/", "/")
|
result = result.replace("/c/", "/")
|
||||||
|
|
||||||
if prefs.replaceTwitter.len > 0 and
|
if prefs.replaceTwitter.len > 0 and ("twitter.com" in body or tco in body):
|
||||||
(twRegex in result or twLinkRegex in result or tco in result):
|
|
||||||
result = result.replace(tco, https & prefs.replaceTwitter & "/t.co")
|
result = result.replace(tco, https & prefs.replaceTwitter & "/t.co")
|
||||||
result = result.replace(cards, prefs.replaceTwitter & "/cards")
|
result = result.replace(cards, prefs.replaceTwitter & "/cards")
|
||||||
result = result.replace(twRegex, prefs.replaceTwitter)
|
result = result.replace(twRegex, prefs.replaceTwitter)
|
||||||
result = result.replace(twLinkRegex, a(
|
result = result.replace(twLinkRegex, a(
|
||||||
prefs.replaceTwitter & "$2", href = https & prefs.replaceTwitter & "$1"))
|
prefs.replaceTwitter & "$2", href = https & prefs.replaceTwitter & "$1"))
|
||||||
|
|
||||||
if prefs.replaceReddit.len > 0 and (rdRegex in result or "redd.it" in result):
|
if prefs.replaceReddit.len > 0 and ("reddit.com" in result or "redd.it" in result):
|
||||||
result = result.replace(rdShortRegex, prefs.replaceReddit & "/comments/")
|
result = result.replace(rdShortRegex, prefs.replaceReddit & "/comments/")
|
||||||
result = result.replace(rdRegex, prefs.replaceReddit)
|
result = result.replace(rdRegex, prefs.replaceReddit)
|
||||||
if prefs.replaceReddit in result and "/gallery/" in result:
|
if prefs.replaceReddit in result and "/gallery/" in result:
|
||||||
result = result.replace("/gallery/", "/comments/")
|
result = result.replace("/gallery/", "/comments/")
|
||||||
|
|
||||||
if prefs.replaceInstagram.len > 0 and igRegex in result:
|
if prefs.replaceInstagram.len > 0 and "instagram.com" in result:
|
||||||
result = result.replace(igRegex, prefs.replaceInstagram)
|
result = result.replace(igRegex, prefs.replaceInstagram)
|
||||||
|
|
||||||
if absolute.len > 0 and "href" in result:
|
if absolute.len > 0 and "href" in result:
|
||||||
result = result.replace("href=\"/", "href=\"" & absolute & "/")
|
result = result.replace("href=\"/", "href=\"" & absolute & "/")
|
||||||
|
|
||||||
proc getM3u8Url*(content: string): string =
|
proc getM3u8Url*(content: string): string =
|
||||||
var m: RegexMatch
|
var matches: array[1, string]
|
||||||
if content.find(m3u8Regex, m):
|
if re.find(content, m3u8Regex, matches) != -1:
|
||||||
result = content[m.group(0)[0]]
|
result = matches[0]
|
||||||
|
|
||||||
proc proxifyVideo*(manifest: string; proxy: bool): string =
|
proc proxifyVideo*(manifest: string; proxy: bool): string =
|
||||||
proc cb(m: RegexMatch; s: string): string =
|
var replacements: seq[(string, string)]
|
||||||
result = "https://video.twimg.com/" & s[m.group(0)[0]]
|
for line in manifest.splitLines:
|
||||||
if proxy: result = getVidUrl(result)
|
let url =
|
||||||
result = manifest.replace(manifestRegex, cb)
|
if line.startsWith("#EXT-X-MAP:URI"): line[16 .. ^2]
|
||||||
|
else: line
|
||||||
|
if url[0] == '/':
|
||||||
|
let path = "https://video.twimg.com" & url
|
||||||
|
replacements.add (url, if proxy: path.getVidUrl else: path)
|
||||||
|
return manifest.multiReplace(replacements)
|
||||||
|
|
||||||
proc getUserPic*(userPic: string; style=""): string =
|
proc getUserPic*(userPic: string; style=""): string =
|
||||||
let pic = userPic.replace(userPicRegex, "$2")
|
userPic.replacef(userPicRegex, "$2").replacef(extRegex, style & "$1")
|
||||||
pic.replace(extRegex, style & "$1")
|
|
||||||
|
|
||||||
proc getUserPic*(profile: Profile; style=""): string =
|
proc getUserPic*(profile: Profile; style=""): string =
|
||||||
getUserPic(profile.userPic, style)
|
getUserPic(profile.userPic, style)
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
import strutils, times, macros, htmlgen, unicode, options, algorithm
|
import strutils, times, macros, htmlgen, unicode, options, algorithm
|
||||||
import regex, packedjson
|
import std/re
|
||||||
|
import packedjson
|
||||||
import types, utils, formatters
|
import types, utils, formatters
|
||||||
|
|
||||||
const
|
let
|
||||||
unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
|
unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
|
||||||
unReplace = "$1<a href=\"/$2\">@$2</a>"
|
unReplace = "$1<a href=\"/$2\">@$2</a>"
|
||||||
|
|
||||||
|
@ -213,8 +214,8 @@ proc expandProfileEntities*(profile: var Profile; js: JsonNode) =
|
||||||
replacements.sort(cmp)
|
replacements.sort(cmp)
|
||||||
|
|
||||||
profile.bio = orig.replacedWith(replacements, 0 .. orig.len)
|
profile.bio = orig.replacedWith(replacements, 0 .. orig.len)
|
||||||
profile.bio = profile.bio.replace(unRegex, unReplace)
|
profile.bio = profile.bio.replacef(unRegex, unReplace)
|
||||||
.replace(htRegex, htReplace)
|
.replacef(htRegex, htReplace)
|
||||||
|
|
||||||
proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
|
proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
|
||||||
let
|
let
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
import strutils, strformat, uri, tables, base64
|
import strutils, strformat, uri, tables, base64
|
||||||
import nimcrypto, regex
|
import nimcrypto
|
||||||
|
|
||||||
var
|
var
|
||||||
hmacKey: string
|
hmacKey: string
|
||||||
|
|
Loading…
Reference in a new issue