tests: Retry failed cases automatically in parallel VM run

parallel-vm.py is now retrying failed cases once at the end of the run.
If all the failed test cases passed on the second attempt, that is noted
in the summary output. Results are also indicated as the exit value from
the run: 0 = all cases passed on first run, 1 = some cases failed once,
but everything passed after one retry, 2 = some cases failed did not
succeed at all.

Signed-off-by: Jouni Malinen <j@w1.fi>
This commit is contained in:
Jouni Malinen 2014-12-23 22:25:29 +02:00
parent a628eeb42b
commit 3eb1db0377

View file

@ -33,6 +33,7 @@ def show_progress(scr):
global dir global dir
global timestamp global timestamp
global tests global tests
global first_run_failures
total_tests = len(tests) total_tests = len(tests)
@ -45,10 +46,73 @@ def show_progress(scr):
scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests)) scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
scr.refresh() scr.refresh()
completed_first_pass = False
rerun_tests = []
while True: while True:
running = False running = False
first_running = False
updated = False updated = False
for i in range(0, num_servers): for i in range(0, num_servers):
if completed_first_pass:
continue
if vm[i]['first_run_done']:
continue
if not vm[i]['proc']:
continue
if vm[i]['proc'].poll() is not None:
vm[i]['proc'] = None
scr.move(i + 1, 10)
scr.clrtoeol()
log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
with open(log, 'r') as f:
if "Kernel panic" in f.read():
scr.addstr("kernel panic")
else:
scr.addstr("unexpected exit")
updated = True
continue
running = True
first_running = True
try:
err = vm[i]['proc'].stderr.read()
vm[i]['err'] += err
except:
pass
try:
out = vm[i]['proc'].stdout.read()
vm[i]['out'] += out
if "READY" in out or "PASS" in out or "FAIL" in out or "SKIP" in out:
scr.move(i + 1, 10)
scr.clrtoeol()
updated = True
if not tests:
vm[i]['first_run_done'] = True
scr.addstr("completed first round")
continue
else:
name = tests.pop(0)
vm[i]['proc'].stdin.write(name + '\n')
scr.addstr(name)
except:
pass
if not first_running and not completed_first_pass:
if tests:
raise Exception("Unexpected test cases remaining from first round")
completed_first_pass = True
(started, passed, failed, skipped) = get_results()
for f in failed:
name = f.split(' ')[1]
rerun_tests.append(name)
first_run_failures.append(name)
for i in range(num_servers):
if not completed_first_pass:
continue
if not vm[i]['proc']: if not vm[i]['proc']:
continue continue
if vm[i]['proc'].poll() is not None: if vm[i]['proc'].poll() is not None:
@ -72,28 +136,28 @@ def show_progress(scr):
pass pass
try: try:
out = vm[i]['proc'].stdout.read() ready = False
if "READY" in out or "PASS" in out or "FAIL" in out or "SKIP" in out: if vm[i]['first_run_done']:
if not tests: vm[i]['first_run_done'] = False
vm[i]['proc'].stdin.write('\n') ready = True
else: else:
name = tests.pop(0) out = vm[i]['proc'].stdout.read()
vm[i]['proc'].stdin.write(name + '\n') vm[i]['out'] += out
except: if "READY" in out or "PASS" in out or "FAIL" in out or "SKIP" in out:
continue ready = True
#print("VM {}: '{}'".format(i, out)) if ready:
vm[i]['out'] += out
lines = vm[i]['out'].splitlines()
last = [ l for l in lines if l.startswith('START ') ]
if len(last) > 0:
try:
info = last[-1].split(' ')
scr.move(i + 1, 10) scr.move(i + 1, 10)
scr.clrtoeol() scr.clrtoeol()
scr.addstr(info[1])
updated = True updated = True
except: if not rerun_tests:
pass vm[i]['proc'].stdin.write('\n')
scr.addstr("shutting down")
else:
name = rerun_tests.pop(0)
vm[i]['proc'].stdin.write(name + '\n')
scr.addstr(name + "(*)")
except:
pass
if not running: if not running:
break break
@ -111,9 +175,17 @@ def show_progress(scr):
for f in failed: for f in failed:
scr.addstr(f.split(' ')[1]) scr.addstr(f.split(' ')[1])
scr.addstr(' ') scr.addstr(' ')
scr.move(0, 35)
scr.clrtoeol()
if rerun_tests:
scr.addstr("(RETRY FAILED %d)" % len(rerun_tests))
elif first_run_failures:
scr.addstr("(RETRY FAILED)")
scr.refresh() scr.refresh()
time.sleep(0.5) time.sleep(0.25)
scr.refresh() scr.refresh()
time.sleep(0.3) time.sleep(0.3)
@ -124,6 +196,7 @@ def main():
global dir global dir
global timestamp global timestamp
global tests global tests
global first_run_failures
if len(sys.argv) < 2: if len(sys.argv) < 2:
sys.exit("Usage: %s <number of VMs> [--codecov] [params..]" % sys.argv[0]) sys.exit("Usage: %s <number of VMs> [--codecov] [params..]" % sys.argv[0])
@ -146,6 +219,7 @@ def main():
codecov_args = [] codecov_args = []
codecov = False codecov = False
first_run_failures = []
tests = [] tests = []
cmd = [ '../run-tests.py', '-L' ] + sys.argv[idx:] cmd = [ '../run-tests.py', '-L' ] + sys.argv[idx:]
lst = subprocess.Popen(cmd, stdout=subprocess.PIPE) lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
@ -214,6 +288,7 @@ def main():
'--ext', 'srv.%d' % (i + 1), '--ext', 'srv.%d' % (i + 1),
'-i'] + codecov_args + extra_args '-i'] + codecov_args + extra_args
vm[i] = {} vm[i] = {}
vm[i]['first_run_done'] = False
vm[i]['proc'] = subprocess.Popen(cmd, vm[i]['proc'] = subprocess.Popen(cmd,
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
@ -234,10 +309,23 @@ def main():
(started, passed, failed, skipped) = get_results() (started, passed, failed, skipped) = get_results()
if len(failed) > 0: if first_run_failures:
print "Failed test cases:" print "Failed test cases:"
for f in failed: for f in first_run_failures:
print f.split(' ')[1], print f,
print
double_failed = []
for f in failed:
name = f.split(' ')[1]
double_failed.append(name)
for test in first_run_failures:
double_failed.remove(test)
if failed and not double_failed:
print "All failed cases passed on retry"
elif double_failed:
print "Failed even on retry:"
for f in double_failed:
print f,
print print
print("TOTAL={} PASS={} FAIL={} SKIP={}".format(len(started), len(passed), len(failed), len(skipped))) print("TOTAL={} PASS={} FAIL={} SKIP={}".format(len(started), len(passed), len(failed), len(skipped)))
print "Logs: " + dir + '/' + str(timestamp) print "Logs: " + dir + '/' + str(timestamp)
@ -257,5 +345,11 @@ def main():
subprocess.check_call(['./combine-codecov.sh', logdir]) subprocess.check_call(['./combine-codecov.sh', logdir])
print "file://%s/index.html" % logdir print "file://%s/index.html" % logdir
if double_failed:
sys.exit(2)
if failed:
sys.exit(1)
sys.exit(0)
if __name__ == "__main__": if __name__ == "__main__":
main() main()