hostap/tests/hwsim/vm/parallel-vm.py
Jouni Malinen d3fb9c1403 tests: Add debug logging for parallel-vm.py
parallel-vm.log is now written with details of test execution steps and
results. This makes it easier to debug if something goes wrong in VM
monitoring. The --debug option can be used to enable verbose debugging.

Signed-off-by: Jouni Malinen <j@w1.fi>
2014-12-25 16:36:51 +02:00

400 lines
14 KiB
Python
Executable file

#!/usr/bin/env python2
#
# Parallel VM test case executor
# Copyright (c) 2014, Jouni Malinen <j@w1.fi>
#
# This software may be distributed under the terms of the BSD license.
# See README for more details.
import curses
import fcntl
import logging
import os
import subprocess
import sys
import time
logger = logging.getLogger()
def get_results():
global vm
started = []
passed = []
failed = []
skipped = []
for i in range(0, num_servers):
lines = vm[i]['out'].splitlines()
started += [ l for l in lines if l.startswith('START ') ]
passed += [ l for l in lines if l.startswith('PASS ') ]
failed += [ l for l in lines if l.startswith('FAIL ') ]
skipped += [ l for l in lines if l.startswith('SKIP ') ]
return (started, passed, failed, skipped)
def show_progress(scr):
global num_servers
global vm
global dir
global timestamp
global tests
global first_run_failures
total_tests = len(tests)
logger.info("Total tests: %d" % total_tests)
scr.leaveok(1)
scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD)
for i in range(0, num_servers):
scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD)
scr.addstr(i + 1, 10, "starting VM")
scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD)
scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
scr.refresh()
completed_first_pass = False
rerun_tests = []
while True:
running = False
first_running = False
updated = False
for i in range(0, num_servers):
if completed_first_pass:
continue
if vm[i]['first_run_done']:
continue
if not vm[i]['proc']:
continue
if vm[i]['proc'].poll() is not None:
vm[i]['proc'] = None
scr.move(i + 1, 10)
scr.clrtoeol()
log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
with open(log, 'r') as f:
if "Kernel panic" in f.read():
scr.addstr("kernel panic")
logger.info("VM[%d] kernel panic" % i)
else:
scr.addstr("unexpected exit")
logger.info("VM[%d] unexpected exit" % i)
updated = True
continue
running = True
first_running = True
try:
err = vm[i]['proc'].stderr.read()
vm[i]['err'] += err
logger.debug("VM[%d] stderr.read[%s]" % (i, err))
except:
pass
try:
out = vm[i]['proc'].stdout.read()
vm[i]['out'] += out
logger.debug("VM[%d] stdout.read[%s]" % (i, out))
if "READY" in out or "PASS" in out or "FAIL" in out or "SKIP" in out:
scr.move(i + 1, 10)
scr.clrtoeol()
updated = True
if not tests:
vm[i]['first_run_done'] = True
scr.addstr("completed first round")
logger.info("VM[%d] completed first round" % i)
continue
else:
name = tests.pop(0)
vm[i]['proc'].stdin.write(name + '\n')
scr.addstr(name)
logger.debug("VM[%d] start test %s" % (i, name))
except:
pass
if not first_running and not completed_first_pass:
logger.info("First round of testing completed")
if tests:
logger.info("Unexpected test cases remaining from first round: " + str(tests))
raise Exception("Unexpected test cases remaining from first round")
completed_first_pass = True
(started, passed, failed, skipped) = get_results()
for f in failed:
name = f.split(' ')[1]
rerun_tests.append(name)
first_run_failures.append(name)
for i in range(num_servers):
if not completed_first_pass:
continue
if not vm[i]['proc']:
continue
if vm[i]['proc'].poll() is not None:
vm[i]['proc'] = None
scr.move(i + 1, 10)
scr.clrtoeol()
log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
with open(log, 'r') as f:
if "Kernel panic" in f.read():
scr.addstr("kernel panic")
logger.info("VM[%d] kernel panic" % i)
else:
scr.addstr("completed run")
logger.info("VM[%d] completed run" % i)
updated = True
continue
running = True
try:
err = vm[i]['proc'].stderr.read()
vm[i]['err'] += err
logger.debug("VM[%d] stderr.read[%s]" % (i, err))
except:
pass
try:
ready = False
if vm[i]['first_run_done']:
vm[i]['first_run_done'] = False
ready = True
else:
out = vm[i]['proc'].stdout.read()
vm[i]['out'] += out
logger.debug("VM[%d] stdout.read[%s]" % (i, out))
if "READY" in out or "PASS" in out or "FAIL" in out or "SKIP" in out:
ready = True
if ready:
scr.move(i + 1, 10)
scr.clrtoeol()
updated = True
if not rerun_tests:
vm[i]['proc'].stdin.write('\n')
scr.addstr("shutting down")
logger.info("VM[%d] shutting down" % i)
else:
name = rerun_tests.pop(0)
vm[i]['proc'].stdin.write(name + '\n')
scr.addstr(name + "(*)")
logger.debug("VM[%d] start test %s (*)" % (i, name))
except:
pass
if not running:
break
if updated:
(started, passed, failed, skipped) = get_results()
scr.move(num_servers + 1, 10)
scr.clrtoeol()
scr.addstr("{} %".format(int(100.0 * (len(passed) + len(failed) + len(skipped)) / total_tests)))
scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, len(started), len(passed), len(failed), len(skipped)))
if len(failed) > 0:
scr.move(num_servers + 2, 0)
scr.clrtoeol()
scr.addstr("Failed test cases: ")
for f in failed:
scr.addstr(f.split(' ')[1])
scr.addstr(' ')
scr.move(0, 35)
scr.clrtoeol()
if rerun_tests:
scr.addstr("(RETRY FAILED %d)" % len(rerun_tests))
elif first_run_failures:
scr.addstr("(RETRY FAILED)")
scr.refresh()
time.sleep(0.25)
scr.refresh()
time.sleep(0.3)
def main():
global num_servers
global vm
global dir
global timestamp
global tests
global first_run_failures
debug_level = logging.INFO
if len(sys.argv) < 2:
sys.exit("Usage: %s <number of VMs> [--debug] [--codecov] [params..]" % sys.argv[0])
num_servers = int(sys.argv[1])
if num_servers < 1:
sys.exit("Too small number of VMs")
timestamp = int(time.time())
idx = 2
if len(sys.argv) > idx and sys.argv[idx] == "--debug":
idx += 1
debug_level = logging.DEBUG
if len(sys.argv) > idx and sys.argv[idx] == "--codecov":
idx += 1
print "Code coverage - build separate binaries"
logdir = "/tmp/hwsim-test-logs/" + str(timestamp)
os.makedirs(logdir)
subprocess.check_call(['./build-codecov.sh', logdir])
codecov_args = ['--codecov_dir', logdir]
codecov = True
else:
codecov_args = []
codecov = False
first_run_failures = []
tests = []
cmd = [ '../run-tests.py', '-L' ] + sys.argv[idx:]
lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
for l in lst.stdout.readlines():
name = l.split(' ')[0]
tests.append(name)
if len(tests) == 0:
sys.exit("No test cases selected")
if '-f' in sys.argv[idx:]:
extra_args = sys.argv[idx:]
else:
extra_args = [x for x in sys.argv[idx:] if x not in tests]
dir = '/tmp/hwsim-test-logs'
try:
os.mkdir(dir)
except:
pass
if num_servers > 2 and len(tests) > 100:
# Move test cases with long duration to the beginning as an
# optimization to avoid last part of the test execution running a long
# duration test case on a single VM while all other VMs have already
# completed their work.
long = [ "ap_roam_open",
"ap_hs20_fetch_osu_stop",
"ap_roam_wpa2_psk",
"ibss_wpa_none_ccmp",
"nfc_wps_er_handover_pk_hash_mismatch_sta",
"go_neg_peers_force_diff_freq",
"p2p_cli_invite",
"sta_ap_scan_2b",
"ap_pmf_sta_unprot_deauth_burst",
"ap_bss_add_remove_during_ht_scan",
"wext_scan_hidden",
"autoscan_exponential",
"nfc_p2p_client",
"wnm_bss_keep_alive",
"ap_inactivity_disconnect",
"scan_bss_expiration_age",
"autoscan_periodic",
"discovery_group_client",
"concurrent_p2pcli",
"ap_bss_add_remove",
"wpas_ap_wps",
"wext_pmksa_cache",
"ibss_wpa_none",
"ap_ht_40mhz_intolerant_ap",
"ibss_rsn",
"discovery_pd_retries",
"ap_wps_setup_locked_timeout",
"ap_vht160",
"dfs_radar",
"dfs",
"grpform_cred_ready_timeout",
"ap_wps_pbc_timeout" ]
for l in long:
if l in tests:
tests.remove(l)
tests.insert(0, l)
logger.setLevel(debug_level)
log_handler = logging.FileHandler('parallel-vm.log')
log_handler.setLevel(debug_level)
fmt = "%(asctime)s %(levelname)s %(message)s"
log_formatter = logging.Formatter(fmt)
log_handler.setFormatter(log_formatter)
logger.addHandler(log_handler)
vm = {}
for i in range(0, num_servers):
print("\rStarting virtual machine {}/{}".format(i + 1, num_servers)),
logger.info("Starting virtual machine {}/{}".format(i + 1, num_servers))
cmd = ['./vm-run.sh', '--delay', str(i), '--timestamp', str(timestamp),
'--ext', 'srv.%d' % (i + 1),
'-i'] + codecov_args + extra_args
vm[i] = {}
vm[i]['first_run_done'] = False
vm[i]['proc'] = subprocess.Popen(cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
vm[i]['out'] = ""
vm[i]['err'] = ""
for stream in [ vm[i]['proc'].stdout, vm[i]['proc'].stderr ]:
fd = stream.fileno()
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
print
curses.wrapper(show_progress)
with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f:
for i in range(0, num_servers):
f.write('VM {}\n{}\n{}\n'.format(i, vm[i]['out'], vm[i]['err']))
(started, passed, failed, skipped) = get_results()
if first_run_failures:
print "Failed test cases:"
for f in first_run_failures:
print f,
logger.info("Failed: " + f)
print
double_failed = []
for f in failed:
name = f.split(' ')[1]
double_failed.append(name)
for test in first_run_failures:
double_failed.remove(test)
if failed and not double_failed:
print "All failed cases passed on retry"
logger.info("All failed cases passed on retry")
elif double_failed:
print "Failed even on retry:"
for f in double_failed:
print f,
logger.info("Failed on retry: " + f)
print
res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(len(started), len(passed), len(failed), len(skipped))
print(res)
logger.info(res)
print "Logs: " + dir + '/' + str(timestamp)
logger.info("Logs: " + dir + '/' + str(timestamp))
for i in range(0, num_servers):
log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
with open(log, 'r') as f:
if "Kernel panic" in f.read():
print "Kernel panic in " + log
logger.info("Kernel panic in " + log)
if codecov:
print "Code coverage - preparing report"
for i in range(num_servers):
subprocess.check_call(['./process-codecov.sh',
logdir + ".srv.%d" % (i + 1),
str(i)])
subprocess.check_call(['./combine-codecov.sh', logdir])
print "file://%s/index.html" % logdir
logger.info("Code coverage report: file://%s/index.html" % logdir)
if double_failed:
logger.info("Test run complete - failures found")
sys.exit(2)
if failed:
logger.info("Test run complete - failures found on first run; passed on retry")
sys.exit(1)
logger.info("Test run complete - no failures")
sys.exit(0)
if __name__ == "__main__":
main()