Merge branch 'master' into dsgvo

This commit is contained in:
cy8aer 2018-08-09 15:17:43 +02:00 committed by GitHub
commit d9bf508f31
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 95 additions and 20 deletions

View file

@ -0,0 +1,14 @@
{
"ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
"versions": [
"61.0.1",
"61.0",
"60.0.2",
"60.0.1",
"60.0"
],
"os": [
"Windows NT 10; WOW64",
"X11; Linux x86_64"
]
}

View file

@ -10,8 +10,10 @@ from codecs import getincrementalencoder
from imp import load_source
from numbers import Number
from os.path import splitext, join
from io import open
from random import choice
import sys
import json
from searx import settings
from searx.version import VERSION_STRING
@ -39,29 +41,11 @@ else:
logger = logger.getChild('utils')
ua_versions = ('52.8.1',
'53.0',
'54.0',
'55.0',
'56.0',
'57.0',
'58.0',
'59.0',
'60.0.2')
ua_os = ('Windows NT 6.3; WOW64',
'X11; Linux x86_64',
'X11; Linux x86')
ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
blocked_tags = ('script',
'style')
def gen_useragent(os=None):
# TODO
return ua.format(os=os or choice(ua_os), version=choice(ua_versions))
useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
+ "/data/useragents.json", 'r', encoding='utf-8').read())
def searx_useragent():
@ -70,6 +54,10 @@ def searx_useragent():
suffix=settings['outgoing'].get('useragent_suffix', ''))
def gen_useragent(os=None):
return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions'])))
def highlight_content(content, query):
if not content:

73
utils/fetch_firefox_version.py Executable file
View file

@ -0,0 +1,73 @@
#!/usr/bin/env python
# set path
from sys import path
from os.path import realpath, dirname, join
path.append(realpath(dirname(realpath(__file__)) + '/../'))
#
import json
import requests
import re
from distutils.version import LooseVersion, StrictVersion
from lxml import html
from searx.url_utils import urlparse, urljoin
from searx import searx_dir
URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
RELEASE_PATH = '/pub/firefox/releases/'
NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$')
# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
#
useragents = {
"versions": (),
"os": ('Windows NT 10; WOW64',
'X11; Linux x86_64'),
"ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
}
def fetch_firefox_versions():
resp = requests.get(URL, timeout=2.0)
if resp.status_code != 200:
raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
else:
dom = html.fromstring(resp.text)
versions = []
for link in dom.xpath('//a/@href'):
url = urlparse(urljoin(URL, link))
path = url.path
if path.startswith(RELEASE_PATH):
version = path[len(RELEASE_PATH):-1]
if NORMAL_REGEX.match(version):
versions.append(LooseVersion(version))
list.sort(versions, reverse=True)
return versions
def fetch_firefox_last_versions():
versions = fetch_firefox_versions()
result = []
major_last = versions[0].version[0]
major_list = (major_last, major_last - 1)
for version in versions:
major_current = version.version[0]
if major_current in major_list:
result.append(version.vstring)
return result
def get_useragents_filename():
return join(join(searx_dir, "data"), "useragents.json")
useragents["versions"] = fetch_firefox_last_versions()
with open(get_useragents_filename(), "w") as f:
json.dump(useragents, f, indent=4, ensure_ascii=False)