launcher: Add support for running tests inside rr

Allowing us to easily run the tests forever and then replay the
failures!

https://rr-project.org/

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-devtools/-/merge_requests/197>
This commit is contained in:
Thibault Saunier 2020-03-10 11:52:35 -03:00
parent f78997ec2e
commit 766f339fb8
4 changed files with 68 additions and 6 deletions

View file

@ -290,7 +290,7 @@ class GstCheckTestsManager(MesonTestsManager):
else:
child_env['CK_TIMEOUT_MULTIPLIER'] = str(self.options.timeout_factor)
if self.options.gdb:
if self.options.gdb or self.options.rr:
child_env['CK_FORK'] = "no"
if self.options.gst_check_leak_trace_testnames:

View file

@ -63,6 +63,7 @@ from .utils import mkdir, Result, Colors, printc, DEFAULT_TIMEOUT, GST_SECOND, \
# The factor by which we increase the hard timeout when running inside
# Valgrind
GDB_TIMEOUT_FACTOR = VALGRIND_TIMEOUT_FACTOR = 20
RR_TIMEOUT_FACTOR = 2
TIMEOUT_FACTOR = float(os.environ.get("TIMEOUT_FACTOR", 1))
# The error reported by valgrind when detecting errors
VALGRIND_ERROR_CODE = 20
@ -127,6 +128,7 @@ class Test(Loggable):
self.workdir = workdir
self.allow_flakiness = False
self.html_log = None
self.rr_logdir = None
self.clean()
@ -237,6 +239,11 @@ class Test(Loggable):
self.out.write('\n\n## %s:\n\n```\n%s\n```\n' % (
os.path.basename(logfile), self.get_extra_log_content(logfile))
)
if self.rr_logdir:
self.out.write('\n\n## rr trace:\n\n```\nrr replay %s/latest-trace\n```\n' % (
self.rr_logdir))
self.out.flush()
self.out.close()
@ -293,6 +300,9 @@ class Test(Loggable):
def add_stack_trace_to_logfile(self):
self.debug("Adding stack trace")
if self.options.rr:
return
trace_gatherer = BackTraceGenerator.get_default()
stack_trace = trace_gatherer.get_trace(self)
@ -364,7 +374,9 @@ class Test(Loggable):
return
self.debug("%s returncode: %s", self, self.process.returncode)
if self.process.returncode == 0:
if self.options.rr and self.process.returncode == -signal.SIGPIPE:
self.set_result(Result.SKIPPED, "SIGPIPE received under `rr`, known issue.")
elif self.process.returncode == 0:
self.set_result(Result.PASSED)
elif self.process.returncode in EXITING_SIGNALS:
self.add_stack_trace_to_logfile()
@ -443,7 +455,19 @@ class Test(Loggable):
return os.environ.copy()
def kill_subprocess(self):
utils.kill_subprocess(self, self.process, DEFAULT_TIMEOUT)
subprocs_id = None
if self.options.rr and self.process and self.process.returncode is None:
cmd = ["ps", "-o", "pid", "--ppid", str(self.process.pid), "--noheaders"]
try:
subprocs_id = [int(pid.strip('\n')) for
pid in subprocess.check_output(cmd).decode().split(' ') if pid]
except FileNotFoundError:
self.error("Ps not found, will probably not be able to get rr "
"working properly after we kill the process")
except subprocess.CalledProcessError as e:
self.error("Couldn't get rr subprocess pid: %s" % (e))
utils.kill_subprocess(self, self.process, DEFAULT_TIMEOUT, subprocs_id)
def run_external_checks(self):
pass
@ -496,6 +520,20 @@ class Test(Loggable):
args += ["--args"] + command
return args
def use_rr(self, command, subenv):
command = ["rr", 'record', '-h'] + command
self.timeout *= RR_TIMEOUT_FACTOR
self.rr_logdir = os.path.join(self.options.logsdir, self.classname.replace(".", os.sep), 'rr-logs')
subenv['_RR_TRACE_DIR'] = self.rr_logdir
try:
shutil.rmtree(self.rr_logdir, ignore_errors=False, onerror=None)
except FileNotFoundError:
pass
self.add_env_variable('_RR_TRACE_DIR', self.rr_logdir)
return command
def use_valgrind(self, command, subenv):
vglogsfile = os.path.splitext(self.logfile)[0] + '.valgrind'
self.extra_logfiles.add(vglogsfile)
@ -607,6 +645,9 @@ class Test(Loggable):
if self.options.valgrind:
self.command = self.use_valgrind(self.command, self.proc_env)
if self.options.rr:
self.command = self.use_rr(self.command, self.proc_env)
if not self.options.redirect_logs:
self.out.write("# `%s`\n\n"
"## Command\n\n``` bash\n%s\n```\n\n" % (
@ -762,6 +803,7 @@ class GstValidateTest(Test):
""" A class representing a particular test. """
HARD_TIMEOUT_FACTOR = 5
fault_sig_regex = re.compile("<Caught SIGNAL: .*>")
needs_gst_inspect = set()
def __init__(self, application_name, classname,
options, reporter, duration=0,
@ -1009,9 +1051,13 @@ class GstValidateTest(Test):
if self.result in [Result.FAILED, Result.PASSED, Result.SKIPPED]:
return
self.debug("%s returncode: %s", self, self.process.returncode)
expected_issues = copy.deepcopy(self.expected_issues)
if self.options.rr:
# signal.SIGPPIPE is 13 but it sometimes isn't present in python for some reason.
expected_issues.append({"returncode": -13, "sometimes": True})
self.criticals, not_found_expected_issues, expected_returncode = self.check_reported_issues(expected_issues)
expected_timeout = None
expected_signal = None
@ -2029,7 +2075,7 @@ class _TestsLauncher(Loggable):
current_test_num += 1
res = test.test_end(retry_on_failure=retry_on_failures)
to_report = True
if res != Result.PASSED:
if res not in [Result.PASSED, Result.SKIPPED]:
if self.options.forever or self.options.fatal_error:
self.print_result(current_test_num - 1, test, retry_on_failure=retry_on_failures)
self.reporter.after_test(test)

View file

@ -18,6 +18,7 @@
# Boston, MA 02110-1301, USA.
import os
import sys
import shutil
from . import utils
import urllib.parse
from . import loggable
@ -202,6 +203,7 @@ class LauncherConfig(Loggable):
self.valgrind = False
self.gdb = False
self.no_display = False
self.rr = False
self.xunit_file = None
self.main_dir = utils.DEFAULT_MAIN_DIR
self.output_dir = None
@ -332,6 +334,12 @@ class LauncherConfig(Loggable):
Colors.FAIL)
return False
if self.rr:
if not shutil.which('rr'):
printc("Want to use rr, but not available on the system",
Colors.FAIL)
return False
if self.html:
try:
import commonmark
@ -478,6 +486,9 @@ class LauncherConfig(Loggable):
parser.add_argument("-vg", "--valgrind", dest="valgrind",
action="store_true",
help="Run the tests inside Valgrind")
parser.add_argument("-rr", "--rr", dest="rr",
action="store_true",
help="Run the tests inside rr record")
parser.add_argument("--gdb", dest="gdb",
action="store_true",
help="Run the tests inside gdb (implies"

View file

@ -594,7 +594,7 @@ def check_bugs_resolution(bugs_definitions):
return res
def kill_subprocess(owner, process, timeout):
def kill_subprocess(owner, process, timeout, subprocess_ids=None):
if process is None:
return
@ -611,11 +611,16 @@ def kill_subprocess(owner, process, timeout):
subprocess.call(
['taskkill', '/F', '/T', '/PID', str(process.pid)])
else:
process.send_signal(killsig)
if subprocess_ids:
for subprocess_id in subprocess_ids:
os.kill(subprocess_id, killsig)
else:
process.send_signal(killsig)
time.sleep(waittime)
waittime *= 2
except OSError:
pass
if not is_windows() and time.time() - stime > timeout / 4:
killsig = signal.SIGKILL
if time.time() - stime > timeout: