818 lines
32 KiB
Python
Executable File
818 lines
32 KiB
Python
Executable File
#!/usr/bin/python2 -u
|
|
# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
|
|
# Released under the GPL v2
|
|
|
|
"""
|
|
Run a control file through the server side engine
|
|
"""
|
|
|
|
import datetime
|
|
import contextlib
|
|
import getpass
|
|
import logging
|
|
import os
|
|
import re
|
|
import shutil
|
|
import signal
|
|
import socket
|
|
import sys
|
|
import traceback
|
|
import time
|
|
import six
|
|
from six.moves import urllib
|
|
|
|
import common
|
|
from autotest_lib.client.bin.result_tools import utils as result_utils
|
|
from autotest_lib.client.bin.result_tools import view as result_view
|
|
from autotest_lib.client.common_lib import control_data
|
|
from autotest_lib.client.common_lib import autotest_enum
|
|
from autotest_lib.client.common_lib import error
|
|
from autotest_lib.client.common_lib import global_config
|
|
from autotest_lib.client.common_lib import host_queue_entry_states
|
|
from autotest_lib.client.common_lib import host_states
|
|
from autotest_lib.client.common_lib import seven
|
|
from autotest_lib.server.cros.dynamic_suite import suite
|
|
|
|
try:
|
|
from chromite.lib import metrics
|
|
from chromite.lib import cloud_trace
|
|
except ImportError:
|
|
from autotest_lib.client.common_lib import utils as common_utils
|
|
metrics = common_utils.metrics_mock
|
|
import mock
|
|
cloud_trace = mock.MagicMock()
|
|
|
|
_CONFIG = global_config.global_config
|
|
|
|
# Number of seconds to wait before returning if testing mode is enabled
|
|
TESTING_MODE_SLEEP_SECS = 1
|
|
|
|
|
|
from autotest_lib.server import frontend
|
|
from autotest_lib.server import server_logging_config
|
|
from autotest_lib.server import server_job, utils, autoserv_parser, autotest
|
|
from autotest_lib.server import utils as server_utils
|
|
from autotest_lib.server import site_utils
|
|
from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
|
|
from autotest_lib.site_utils import job_directories
|
|
from autotest_lib.site_utils import lxc
|
|
from autotest_lib.site_utils.lxc import utils as lxc_utils
|
|
from autotest_lib.client.common_lib import pidfile, logging_manager
|
|
|
|
|
|
# Control segment to stage server-side package.
|
|
STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
|
|
'stage_server_side_package')
|
|
|
|
# Command line to start servod in a moblab.
|
|
START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
|
|
STOP_SERVOD_CMD = 'sudo stop servod'
|
|
|
|
_AUTOTEST_ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
|
|
_CONTROL_FILE_FROM_CONTROL_NAME = 'control.from_control_name'
|
|
|
|
_LXC_JOB_FOLDER = 'lxc_job_folder'
|
|
|
|
def log_alarm(signum, frame):
|
|
logging.error("Received SIGALARM. Ignoring and continuing on.")
|
|
sys.exit(1)
|
|
|
|
|
|
def _get_machines(parser):
|
|
"""Get a list of machine names from command line arg -m or a file.
|
|
|
|
@param parser: Parser for the command line arguments.
|
|
|
|
@return: A list of machine names from command line arg -m or the
|
|
machines file specified in the command line arg -M.
|
|
"""
|
|
if parser.options.machines:
|
|
machines = parser.options.machines.replace(',', ' ').strip().split()
|
|
else:
|
|
machines = []
|
|
machines_file = parser.options.machines_file
|
|
if machines_file:
|
|
machines = []
|
|
for m in open(machines_file, 'r').readlines():
|
|
# remove comments, spaces
|
|
m = re.sub('#.*', '', m).strip()
|
|
if m:
|
|
machines.append(m)
|
|
logging.debug('Read list of machines from file: %s', machines_file)
|
|
logging.debug('Machines: %s', ','.join(machines))
|
|
|
|
if machines:
|
|
for machine in machines:
|
|
if not machine or re.search('\s', machine):
|
|
parser.parser.error("Invalid machine: %s" % str(machine))
|
|
machines = list(set(machines))
|
|
machines.sort()
|
|
return machines
|
|
|
|
|
|
def _stage_ssp(parser, resultsdir):
|
|
"""Stage server-side package.
|
|
|
|
This function calls a control segment to stage server-side package based on
|
|
the job and autoserv command line option. The detail implementation could
|
|
be different for each host type. Currently, only CrosHost has
|
|
stage_server_side_package function defined.
|
|
The script returns None if no server-side package is available. However,
|
|
it may raise exception if it failed for reasons other than artifact (the
|
|
server-side package) not found.
|
|
|
|
@param parser: Command line arguments parser passed in the autoserv process.
|
|
@param resultsdir: Folder to store results. This could be different from
|
|
parser.options.results: parser.options.results can be set to None
|
|
for results to be stored in a temp folder. resultsdir can be None
|
|
for autoserv run requires no logging.
|
|
|
|
@return: url to the autotest server-side package. None in case of errors.
|
|
"""
|
|
machines_list = _get_machines(parser)
|
|
machines_list = server_job.get_machine_dicts(
|
|
machine_names=machines_list,
|
|
store_dir=os.path.join(resultsdir, parser.options.host_info_subdir),
|
|
in_lab=parser.options.lab,
|
|
use_shadow_store=not parser.options.local_only_host_info,
|
|
host_attributes=parser.options.host_attributes,
|
|
)
|
|
|
|
namespace = {'machines': machines_list,
|
|
'isolate_hash': parser.options.isolate,
|
|
'image': parser.options.test_source_build}
|
|
script_locals = {}
|
|
|
|
seven.exec_file(
|
|
STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE,
|
|
globals_=namespace,
|
|
locals_=script_locals,
|
|
)
|
|
ssp_url = script_locals['ssp_url']
|
|
if not ssp_url:
|
|
logging.error('Failed to stage SSP package: %s',
|
|
script_locals['error_msg'])
|
|
logging.error('This job will fail later, when attempting to run with'
|
|
' SSP')
|
|
return ssp_url
|
|
|
|
|
|
def _run_with_ssp(job, container_id, job_id, results, parser, ssp_url,
|
|
machines):
|
|
"""Run the server job with server-side packaging.
|
|
|
|
@param job: The server job object.
|
|
@param container_id: ID of the container to run the test.
|
|
@param job_id: ID of the test job.
|
|
@param results: Folder to store results. This could be different from
|
|
parser.options.results:
|
|
parser.options.results can be set to None for results to be
|
|
stored in a temp folder.
|
|
results can be None if the autoserv run requires no logging.
|
|
@param parser: Command line parser that contains the options.
|
|
@param ssp_url: url of the staged server-side package.
|
|
@param machines: A list of machines to run the test.
|
|
"""
|
|
if not ssp_url:
|
|
job.record('FAIL', None, None,
|
|
'Failed to stage server-side package')
|
|
raise error.AutoservError('Failed to stage server-side package')
|
|
|
|
bucket = lxc.ContainerBucket(
|
|
base_name=_ssp_base_image_name_or_default(parser.options))
|
|
control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
|
|
else None)
|
|
try:
|
|
dut_name = machines[0] if len(machines) >= 1 else None
|
|
test_container = bucket.setup_test(container_id, job_id, ssp_url,
|
|
results, control=control,
|
|
job_folder=_LXC_JOB_FOLDER,
|
|
dut_name=dut_name,
|
|
isolate_hash=parser.options.isolate)
|
|
except Exception as e:
|
|
job.record('FAIL', None, None,
|
|
'Failed to setup container for test: %s. Check logs in '
|
|
'ssp_logs folder for more details.' % e)
|
|
raise
|
|
|
|
args = sys.argv[:]
|
|
args.remove('--require-ssp')
|
|
# --parent_job_id is only useful in autoserv running in host, not in
|
|
# container. Include this argument will cause test to fail for builds before
|
|
# CL 286265 was merged.
|
|
if '--parent_job_id' in args:
|
|
index = args.index('--parent_job_id')
|
|
args.remove('--parent_job_id')
|
|
# Remove the actual parent job id in command line arg.
|
|
del args[index]
|
|
|
|
# A dictionary of paths to replace in the command line. Key is the path to
|
|
# be replaced with the one in value.
|
|
paths_to_replace = {}
|
|
# Replace the control file path with the one in container.
|
|
if control:
|
|
container_control_filename = os.path.join(
|
|
lxc.CONTROL_TEMP_PATH, os.path.basename(control))
|
|
paths_to_replace[control] = container_control_filename
|
|
# Update result directory with the one in container.
|
|
container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % _LXC_JOB_FOLDER)
|
|
if parser.options.results:
|
|
paths_to_replace[parser.options.results] = container_result_dir
|
|
args = [paths_to_replace.get(arg, arg) for arg in args]
|
|
|
|
# Apply --use-existing-results, results directory is aready created and
|
|
# mounted in container. Apply this arg to avoid exception being raised.
|
|
if not '--use-existing-results' in args:
|
|
args.append('--use-existing-results')
|
|
|
|
# Make sure autoserv running in container using a different pid file.
|
|
if not '--pidfile-label' in args:
|
|
args.extend(['--pidfile-label', 'container_autoserv'])
|
|
|
|
cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
|
|
logging.info('Run command in container: %s', cmd_line)
|
|
success = False
|
|
try:
|
|
test_container.attach_run(cmd_line)
|
|
success = True
|
|
except Exception as e:
|
|
# If the test run inside container fails without generating any log,
|
|
# write a message to status.log to help troubleshooting.
|
|
debug_files = os.listdir(os.path.join(results, 'debug'))
|
|
if not debug_files:
|
|
job.record('FAIL', None, None,
|
|
'Failed to run test inside the container: %s. Check '
|
|
'logs in ssp_logs folder for more details.' % e)
|
|
raise
|
|
finally:
|
|
metrics.Counter(
|
|
'chromeos/autotest/experimental/execute_job_in_ssp').increment(
|
|
fields={'success': success})
|
|
test_container.destroy()
|
|
|
|
|
|
def correct_results_folder_permission(results):
|
|
"""Make sure the results folder has the right permission settings.
|
|
|
|
For tests running with server-side packaging, the results folder has the
|
|
owner of root. This must be changed to the user running the autoserv
|
|
process, so parsing job can access the results folder.
|
|
TODO(dshi): crbug.com/459344 Remove this function when test container can be
|
|
unprivileged container.
|
|
|
|
@param results: Path to the results folder.
|
|
|
|
"""
|
|
if not results:
|
|
return
|
|
|
|
utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
|
|
utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
|
|
|
|
|
|
def _start_servod(machine):
|
|
"""Try to start servod in moblab if it's not already running or running with
|
|
different board or port.
|
|
|
|
@param machine: Name of the dut used for test.
|
|
"""
|
|
if not utils.is_moblab():
|
|
return
|
|
|
|
logging.debug('Trying to start servod.')
|
|
try:
|
|
afe = frontend.AFE()
|
|
board = server_utils.get_board_from_afe(machine, afe)
|
|
hosts = afe.get_hosts(hostname=machine)
|
|
servo_host = hosts[0].attributes.get('servo_host', None)
|
|
servo_port = hosts[0].attributes.get('servo_port', 9999)
|
|
if not servo_host in ['localhost', '127.0.0.1']:
|
|
logging.warn('Starting servod is aborted. The dut\'s servo_host '
|
|
'attribute is not set to localhost.')
|
|
return
|
|
except (urllib.error.HTTPError, urllib.error.URLError):
|
|
# Ignore error if RPC failed to get board
|
|
logging.error('Failed to get board name from AFE. Start servod is '
|
|
'aborted')
|
|
return
|
|
|
|
try:
|
|
pid = utils.run('pgrep servod').stdout
|
|
cmd_line = utils.run('ps -fp %s' % pid).stdout
|
|
if ('--board %s' % board in cmd_line and
|
|
'--port %s' % servo_port in cmd_line):
|
|
logging.debug('Servod is already running with given board and port.'
|
|
' There is no need to restart servod.')
|
|
return
|
|
logging.debug('Servod is running with different board or port. '
|
|
'Stopping existing servod.')
|
|
utils.run('sudo stop servod')
|
|
except error.CmdError:
|
|
# servod is not running.
|
|
pass
|
|
|
|
try:
|
|
utils.run(START_SERVOD_CMD % (board, servo_port))
|
|
logging.debug('Servod is started')
|
|
except error.CmdError as e:
|
|
logging.error('Servod failed to be started, error: %s', e)
|
|
|
|
|
|
def _control_path_on_disk(control_name):
|
|
"""Find the control file corresponding to the given control name, on disk.
|
|
|
|
@param control_name: NAME attribute of the control file to fetch.
|
|
@return: Path to the control file.
|
|
"""
|
|
cf_getter = suite.create_fs_getter(_AUTOTEST_ROOT)
|
|
control_name_predicate = suite.test_name_matches_pattern_predicate(
|
|
'^%s$' % control_name)
|
|
tests = suite.find_and_parse_tests(cf_getter, control_name_predicate)
|
|
if not tests:
|
|
raise error.AutoservError(
|
|
'Failed to find any control files with NAME %s' % control_name)
|
|
if len(tests) > 1:
|
|
logging.error('Found more than one control file with NAME %s: %s',
|
|
control_name, [t.path for t in tests])
|
|
raise error.AutoservError(
|
|
'Found more than one control file with NAME %s' % control_name)
|
|
return tests[0].path
|
|
|
|
|
|
def _stage_control_file(control_name, results_dir):
|
|
"""Stage the control file to execute from local autotest checkout.
|
|
|
|
@param control_name: Name of the control file to stage.
|
|
@param results_dir: Results directory to stage the control file into.
|
|
@return: Absolute path to the staged control file.
|
|
"""
|
|
control_path = _control_path_on_disk(control_name)
|
|
new_control = os.path.join(results_dir, _CONTROL_FILE_FROM_CONTROL_NAME)
|
|
shutil.copy2(control_path, new_control)
|
|
return new_control
|
|
|
|
|
|
def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
|
|
"""Run server job with given options.
|
|
|
|
@param pid_file_manager: PidFileManager used to monitor the autoserv process
|
|
@param results: Folder to store results.
|
|
@param parser: Parser for the command line arguments.
|
|
@param ssp_url: Url to server-side package.
|
|
@param use_ssp: Set to True to run with server-side packaging.
|
|
"""
|
|
# send stdin to /dev/null
|
|
dev_null = os.open(os.devnull, os.O_RDONLY)
|
|
os.dup2(dev_null, sys.stdin.fileno())
|
|
os.close(dev_null)
|
|
|
|
# Create separate process group if the process is not a process group
|
|
# leader. This allows autoserv process to keep running after the caller
|
|
# process (drone manager call) exits.
|
|
if os.getpid() != os.getpgid(0):
|
|
os.setsid()
|
|
|
|
# Container name is predefined so the container can be destroyed in
|
|
# handle_sigterm.
|
|
job_or_task_id = job_directories.get_job_id_or_task_id(
|
|
parser.options.results)
|
|
container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid())
|
|
|
|
# Implement SIGTERM handler
|
|
def handle_sigterm(signum, frame):
|
|
logging.debug('Received SIGTERM')
|
|
if pid_file_manager:
|
|
pid_file_manager.close_file(1, signal.SIGTERM)
|
|
logging.debug('Finished writing to pid_file. Killing process.')
|
|
|
|
# Update results folder's file permission. This needs to be done ASAP
|
|
# before the parsing process tries to access the log.
|
|
if use_ssp and results:
|
|
correct_results_folder_permission(results)
|
|
|
|
# This sleep allows the pending output to be logged before the kill
|
|
# signal is sent.
|
|
time.sleep(.1)
|
|
if use_ssp:
|
|
logging.debug('Destroy container %s before aborting the autoserv '
|
|
'process.', container_id)
|
|
try:
|
|
bucket = lxc.ContainerBucket(
|
|
base_name=_ssp_base_image_name_or_default(
|
|
parser.options))
|
|
container = bucket.get_container(container_id)
|
|
if container:
|
|
container.destroy()
|
|
logging.debug("Container %s destroyed.", container_id)
|
|
else:
|
|
logging.debug('Container %s is not found.', container_id)
|
|
bucket.scrub_container_location(container_id)
|
|
except:
|
|
# Handle any exception so the autoserv process can be aborted.
|
|
logging.exception('Failed to destroy container %s.',
|
|
container_id)
|
|
# Try to correct the result file permission again after the
|
|
# container is destroyed, as the container might have created some
|
|
# new files in the result folder.
|
|
if results:
|
|
correct_results_folder_permission(results)
|
|
|
|
os.killpg(os.getpgrp(), signal.SIGKILL)
|
|
|
|
# Set signal handler
|
|
signal.signal(signal.SIGTERM, handle_sigterm)
|
|
|
|
# faulthandler is only needed to debug in the Lab and is not avaliable to
|
|
# be imported in the chroot as part of VMTest, so Try-Except it.
|
|
try:
|
|
import faulthandler
|
|
faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
|
|
logging.debug('faulthandler registered on SIGTERM.')
|
|
except ImportError:
|
|
# exc_clear() doesn't exist (nor is needed) in python3
|
|
if six.PY2:
|
|
sys.exc_clear()
|
|
|
|
# Ignore SIGTTOU's generated by output from forked children.
|
|
signal.signal(signal.SIGTTOU, signal.SIG_IGN)
|
|
|
|
# If we received a SIGALARM, let's be loud about it.
|
|
signal.signal(signal.SIGALRM, log_alarm)
|
|
|
|
# Server side tests that call shell scripts often depend on $USER being set
|
|
# but depending on how you launch your autotest scheduler it may not be set.
|
|
os.environ['USER'] = getpass.getuser()
|
|
|
|
label = parser.options.label
|
|
group_name = parser.options.group_name
|
|
user = parser.options.user
|
|
client = parser.options.client
|
|
server = parser.options.server
|
|
verify = parser.options.verify
|
|
repair = parser.options.repair
|
|
cleanup = parser.options.cleanup
|
|
provision = parser.options.provision
|
|
reset = parser.options.reset
|
|
job_labels = parser.options.job_labels
|
|
no_tee = parser.options.no_tee
|
|
execution_tag = parser.options.execution_tag
|
|
ssh_user = parser.options.ssh_user
|
|
ssh_port = parser.options.ssh_port
|
|
ssh_pass = parser.options.ssh_pass
|
|
collect_crashinfo = parser.options.collect_crashinfo
|
|
control_filename = parser.options.control_filename
|
|
verify_job_repo_url = parser.options.verify_job_repo_url
|
|
skip_crash_collection = parser.options.skip_crash_collection
|
|
ssh_verbosity = int(parser.options.ssh_verbosity)
|
|
ssh_options = parser.options.ssh_options
|
|
no_use_packaging = parser.options.no_use_packaging
|
|
in_lab = bool(parser.options.lab)
|
|
|
|
# can't be both a client and a server side test
|
|
if client and server:
|
|
parser.parser.error("Can not specify a test as both server and client!")
|
|
|
|
if provision and client:
|
|
parser.parser.error("Cannot specify provisioning and client!")
|
|
|
|
is_special_task = (verify or repair or cleanup or collect_crashinfo or
|
|
provision or reset)
|
|
use_client_trampoline = False
|
|
if parser.options.control_name:
|
|
if use_ssp:
|
|
# When use_ssp is True, autoserv will be re-executed inside a
|
|
# container preserving the --control-name argument. Control file
|
|
# will be staged inside the rexecuted autoserv.
|
|
control = None
|
|
else:
|
|
try:
|
|
control = _stage_control_file(parser.options.control_name,
|
|
results)
|
|
except error.AutoservError as e:
|
|
logging.info("Using client trampoline because of: %s", e)
|
|
control = parser.options.control_name
|
|
use_client_trampoline = True
|
|
|
|
elif parser.args:
|
|
control = parser.args[0]
|
|
else:
|
|
if not is_special_task:
|
|
parser.parser.error("Missing argument: control file")
|
|
control = None
|
|
|
|
if ssh_verbosity > 0:
|
|
# ssh_verbosity is an integer between 0 and 3, inclusive
|
|
ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
|
|
else:
|
|
ssh_verbosity_flag = ''
|
|
|
|
machines = _get_machines(parser)
|
|
if group_name and len(machines) < 2:
|
|
parser.parser.error('-G %r may only be supplied with more than one '
|
|
'machine.' % group_name)
|
|
|
|
logging.debug("Parser.args is %r", parser.args)
|
|
try:
|
|
logging.debug("Parser.options.args is %r", parser.options.args)
|
|
except AttributeError:
|
|
logging.debug("No Parser.options.args.")
|
|
|
|
try:
|
|
logging.debug("Parser.options is %r", parser.options)
|
|
except AttributeError:
|
|
logging.debug("No Parser.options.")
|
|
job_kwargs = {
|
|
'control': control,
|
|
'args': parser.args[1:],
|
|
'resultdir': results,
|
|
'label': label,
|
|
'user': user,
|
|
'machines': machines,
|
|
'machine_dict_list': server_job.get_machine_dicts(
|
|
machine_names=machines,
|
|
store_dir=os.path.join(results,
|
|
parser.options.host_info_subdir),
|
|
in_lab=in_lab,
|
|
use_shadow_store=not parser.options.local_only_host_info,
|
|
host_attributes=parser.options.host_attributes,
|
|
),
|
|
'client': client,
|
|
'ssh_user': ssh_user,
|
|
'ssh_port': ssh_port,
|
|
'ssh_pass': ssh_pass,
|
|
'ssh_verbosity_flag': ssh_verbosity_flag,
|
|
'ssh_options': ssh_options,
|
|
'group_name': group_name,
|
|
'tag': execution_tag,
|
|
'disable_sysinfo': parser.options.disable_sysinfo,
|
|
'in_lab': in_lab,
|
|
'use_client_trampoline': use_client_trampoline,
|
|
'sync_offload_dir': parser.options.sync_offload_dir,
|
|
}
|
|
if parser.options.parent_job_id:
|
|
job_kwargs['parent_job_id'] = int(parser.options.parent_job_id)
|
|
if control_filename:
|
|
job_kwargs['control_filename'] = control_filename
|
|
job = server_job.server_job(**job_kwargs)
|
|
|
|
job.logging.start_logging()
|
|
|
|
# perform checks
|
|
job.precheck()
|
|
|
|
# run the job
|
|
exit_code = 0
|
|
auto_start_servod = _CONFIG.get_config_value(
|
|
'AUTOSERV', 'auto_start_servod', type=bool, default=False)
|
|
|
|
if not utils.is_in_container():
|
|
# crbug.com/1054522 -- ts_mon setup is broken inside the SSP container
|
|
# due to a problem in the installed python packages.
|
|
# Trying to clean up an incorrectly initialized ts_mon state adds a 5
|
|
# second overhead in process teardown, so avoid setting up ts_mon
|
|
# entirely inside the SSP container.
|
|
site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
|
|
short_lived=True)
|
|
try:
|
|
try:
|
|
if repair:
|
|
if auto_start_servod and len(machines) == 1:
|
|
_start_servod(machines[0])
|
|
job.repair(job_labels)
|
|
elif verify:
|
|
job.verify(job_labels)
|
|
elif provision:
|
|
job.provision(job_labels)
|
|
elif reset:
|
|
job.reset(job_labels)
|
|
elif cleanup:
|
|
job.cleanup(job_labels)
|
|
else:
|
|
if auto_start_servod and len(machines) == 1:
|
|
_start_servod(machines[0])
|
|
if use_ssp:
|
|
try:
|
|
_run_with_ssp(job, container_id, job_or_task_id,
|
|
results, parser, ssp_url, machines)
|
|
finally:
|
|
# Update the ownership of files in result folder.
|
|
correct_results_folder_permission(results)
|
|
else:
|
|
if collect_crashinfo:
|
|
# Update the ownership of files in result folder. If the
|
|
# job to collect crashinfo was running inside container
|
|
# (SSP) and crashed before correcting folder permission,
|
|
# the result folder might have wrong permission setting.
|
|
try:
|
|
correct_results_folder_permission(results)
|
|
except:
|
|
# Ignore any error as the user may not have root
|
|
# permission to run sudo command.
|
|
pass
|
|
metric_name = ('chromeos/autotest/experimental/'
|
|
'autoserv_job_run_duration')
|
|
f = {'in_container': utils.is_in_container(),
|
|
'success': False}
|
|
with metrics.SecondsTimer(metric_name, fields=f) as c:
|
|
job.run(verify_job_repo_url=verify_job_repo_url,
|
|
only_collect_crashinfo=collect_crashinfo,
|
|
skip_crash_collection=skip_crash_collection,
|
|
job_labels=job_labels,
|
|
use_packaging=(not no_use_packaging))
|
|
c['success'] = True
|
|
|
|
finally:
|
|
job.close()
|
|
except:
|
|
exit_code = 1
|
|
traceback.print_exc()
|
|
finally:
|
|
metrics.Flush()
|
|
|
|
sys.exit(exit_code)
|
|
|
|
|
|
# Job breakdown statuses
|
|
_hs = host_states.Status
|
|
_qs = host_queue_entry_states.Status
|
|
_status_list = [
|
|
_qs.QUEUED, _qs.RESETTING, _qs.VERIFYING,
|
|
_qs.PROVISIONING, _hs.REPAIRING, _qs.CLEANING,
|
|
_qs.RUNNING, _qs.GATHERING, _qs.PARSING]
|
|
_JOB_OVERHEAD_STATUS = autotest_enum.AutotestEnum(*_status_list,
|
|
string_values=True)
|
|
|
|
|
|
def get_job_status(options):
|
|
"""Returns the HQE Status for this run.
|
|
|
|
@param options: parser options.
|
|
"""
|
|
s = _JOB_OVERHEAD_STATUS
|
|
task_mapping = {
|
|
'reset': s.RESETTING, 'verify': s.VERIFYING,
|
|
'provision': s.PROVISIONING, 'repair': s.REPAIRING,
|
|
'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
|
|
match = [task for task in task_mapping if getattr(options, task, False)]
|
|
return task_mapping[match[0]] if match else s.RUNNING
|
|
|
|
|
|
def _require_ssp_from_control(control_name):
|
|
"""Read the value of REQUIRE_SSP from test control file.
|
|
|
|
This reads the control file from the prod checkout of autotest and uses that
|
|
to determine whether to even stage the SSP package on a devserver.
|
|
|
|
This means:
|
|
[1] Any change in REQUIRE_SSP directive in a test requires a prod-push to go
|
|
live.
|
|
[2] This function may find that the control file does not exist but the SSP
|
|
package may contain the test file. This function conservatively returns True
|
|
in that case.
|
|
|
|
This function is called very early in autoserv, before logging is setup.
|
|
"""
|
|
if not control_name:
|
|
return True
|
|
try:
|
|
path = _control_path_on_disk(control_name)
|
|
except error.AutoservError as e:
|
|
sys.stderr.write("autoserv: Could not determine control file path,"
|
|
" assuming we need SSP: %s\n" % e)
|
|
sys.stderr.flush()
|
|
return True
|
|
if not os.path.isfile(path):
|
|
return True
|
|
control = control_data.parse_control(path)
|
|
# There must be explicit directive in the control file to disable SSP.
|
|
if not control or control.require_ssp is None:
|
|
return True
|
|
return control.require_ssp
|
|
|
|
|
|
def _ssp_base_image_name_or_default(options):
|
|
"""Extract base image name from autoserv options or the global config."""
|
|
if options.ssp_base_image_name:
|
|
return options.ssp_base_image_name
|
|
return global_config.global_config.get_config_value('AUTOSERV',
|
|
'container_base_name')
|
|
|
|
|
|
def main():
|
|
start_time = datetime.datetime.now()
|
|
parser = autoserv_parser.autoserv_parser
|
|
parser.parse_args()
|
|
|
|
if len(sys.argv) == 1:
|
|
parser.parser.print_help()
|
|
sys.exit(1)
|
|
|
|
if parser.options.no_logging:
|
|
results = None
|
|
else:
|
|
results = parser.options.results
|
|
if not results:
|
|
results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
|
|
results = os.path.abspath(results)
|
|
resultdir_exists = False
|
|
for filename in ('control.srv', 'status.log', '.autoserv_execute'):
|
|
if os.path.exists(os.path.join(results, filename)):
|
|
resultdir_exists = True
|
|
if not parser.options.use_existing_results and resultdir_exists:
|
|
error = "Error: results directory already exists: %s\n" % results
|
|
sys.stderr.write(error)
|
|
sys.exit(1)
|
|
|
|
# Now that we certified that there's no leftover results dir from
|
|
# previous jobs, lets create the result dir since the logging system
|
|
# needs to create the log file in there.
|
|
if not os.path.isdir(results):
|
|
os.makedirs(results)
|
|
|
|
if parser.options.require_ssp:
|
|
# This is currently only used for skylab (i.e., when --control-name is
|
|
# used).
|
|
use_ssp = _require_ssp_from_control(parser.options.control_name)
|
|
else:
|
|
use_ssp = False
|
|
|
|
|
|
if use_ssp:
|
|
log_dir = os.path.join(results, 'ssp_logs') if results else None
|
|
if log_dir and not os.path.exists(log_dir):
|
|
os.makedirs(log_dir)
|
|
else:
|
|
log_dir = results
|
|
|
|
logging_manager.configure_logging(
|
|
server_logging_config.ServerLoggingConfig(),
|
|
results_dir=log_dir,
|
|
use_console=not parser.options.no_tee,
|
|
verbose=parser.options.verbose,
|
|
no_console_prefix=parser.options.no_console_prefix)
|
|
|
|
logging.debug('autoserv is running in drone %s.', socket.gethostname())
|
|
logging.debug('autoserv environment: %r', os.environ)
|
|
logging.debug('autoserv command was: %s', ' '.join(sys.argv))
|
|
logging.debug('autoserv parsed options: %s', parser.options)
|
|
|
|
if use_ssp:
|
|
ssp_url = _stage_ssp(parser, results)
|
|
else:
|
|
ssp_url = None
|
|
|
|
if results:
|
|
logging.info("Results placed in %s" % results)
|
|
|
|
# wait until now to perform this check, so it get properly logged
|
|
if (parser.options.use_existing_results and not resultdir_exists and
|
|
not utils.is_in_container()):
|
|
logging.error("No existing results directory found: %s", results)
|
|
sys.exit(1)
|
|
|
|
if parser.options.write_pidfile and results:
|
|
pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
|
|
results)
|
|
pid_file_manager.open_file()
|
|
else:
|
|
pid_file_manager = None
|
|
|
|
autotest.Autotest.set_install_in_tmpdir(
|
|
parser.options.install_in_tmpdir)
|
|
|
|
exit_code = 0
|
|
is_task = (parser.options.verify or parser.options.repair or
|
|
parser.options.provision or parser.options.reset or
|
|
parser.options.cleanup or parser.options.collect_crashinfo)
|
|
|
|
trace_labels = {
|
|
'job_id': job_directories.get_job_id_or_task_id(
|
|
parser.options.results)
|
|
}
|
|
trace = cloud_trace.SpanStack(
|
|
labels=trace_labels,
|
|
global_context=parser.options.cloud_trace_context)
|
|
trace.enabled = parser.options.cloud_trace_context_enabled == 'True'
|
|
try:
|
|
try:
|
|
with trace.Span(get_job_status(parser.options)):
|
|
run_autoserv(pid_file_manager, results, parser, ssp_url,
|
|
use_ssp)
|
|
except SystemExit as e:
|
|
exit_code = e.code
|
|
if exit_code:
|
|
logging.exception('Uncaught SystemExit with code %s', exit_code)
|
|
except Exception:
|
|
# If we don't know what happened, we'll classify it as
|
|
# an 'abort' and return 1.
|
|
logging.exception('Uncaught Exception, exit_code = 1.')
|
|
exit_code = 1
|
|
finally:
|
|
if pid_file_manager:
|
|
pid_file_manager.close_file(exit_code)
|
|
sys.exit(exit_code)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|