406 lines
16 KiB
Python
406 lines
16 KiB
Python
# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
import logging
|
|
import os
|
|
import re
|
|
import shutil
|
|
from autotest_lib.client.common_lib import utils as client_utils
|
|
from autotest_lib.client.common_lib.cros import dev_server
|
|
from autotest_lib.client.common_lib.cros import retry
|
|
from autotest_lib.client.cros import constants
|
|
from autotest_lib.server.cros.dynamic_suite.constants import JOB_BUILD_KEY
|
|
from autotest_lib.server.crashcollect import collect_log_file
|
|
from autotest_lib.server import utils
|
|
|
|
try:
|
|
from chromite.lib import metrics
|
|
except ImportError:
|
|
metrics = client_utils.metrics_mock
|
|
|
|
|
|
def generate_minidump_stacktrace(minidump_path):
|
|
"""
|
|
Generates a stacktrace for the specified minidump.
|
|
|
|
This function expects the debug symbols to reside under:
|
|
/build/<board>/usr/lib/debug
|
|
|
|
@param minidump_path: absolute path to minidump to by symbolicated.
|
|
@raise client_utils.error.CmdError if minidump_stackwalk return code != 0.
|
|
"""
|
|
symbol_dir = '%s/../../../lib/debug' % utils.get_server_dir()
|
|
logging.info('symbol_dir: %s', symbol_dir)
|
|
client_utils.run('minidump_stackwalk "%s" "%s" > "%s.txt"' %
|
|
(minidump_path, symbol_dir, minidump_path))
|
|
|
|
|
|
def _resolve_crashserver():
|
|
"""
|
|
Attempts to find a devserver / crashserver that has capacity to
|
|
symbolicate a crashdump.
|
|
|
|
@raises DevServerException if no server with capacity could be found.
|
|
@returns Hostname of resolved server, if found.
|
|
"""
|
|
crashserver_name = dev_server.get_least_loaded_devserver(
|
|
devserver_type=dev_server.CrashServer)
|
|
if not crashserver_name:
|
|
metrics.Counter('chromeos/autotest/crashcollect/could_not_resolve'
|
|
).increment()
|
|
raise dev_server.DevServerException(
|
|
'No crash server has the capacity to symbolicate the dump.')
|
|
else:
|
|
metrics.Counter('chromeos/autotest/crashcollect/resolved'
|
|
).increment(fields={'crash_server': crashserver_name})
|
|
return crashserver_name
|
|
|
|
|
|
def _symbolicate_minidump_with_devserver(minidump_path, resultdir,
|
|
crashserver_name):
|
|
"""
|
|
Generates a stack trace for the specified minidump by consulting devserver.
|
|
|
|
This function assumes the debug symbols have been staged on the devserver.
|
|
|
|
@param minidump_path: absolute path to minidump to by symbolicated.
|
|
@param resultdir: server job's result directory.
|
|
@param crashserver_name: Name of crashserver to attempt to symbolicate with.
|
|
@raise DevServerException upon failure, HTTP or otherwise.
|
|
"""
|
|
# First, look up what build we tested. If we can't find this, we can't
|
|
# get the right debug symbols, so we might as well give up right now.
|
|
keyvals = client_utils.read_keyval(resultdir)
|
|
if JOB_BUILD_KEY not in keyvals:
|
|
raise dev_server.DevServerException(
|
|
'Cannot determine build being tested.')
|
|
|
|
devserver = dev_server.CrashServer(crashserver_name)
|
|
|
|
with metrics.SecondsTimer(
|
|
'chromeos/autotest/crashcollect/symbolicate_duration',
|
|
fields={'crash_server': crashserver_name}):
|
|
trace_text = devserver.symbolicate_dump(minidump_path,
|
|
keyvals[JOB_BUILD_KEY])
|
|
|
|
if not trace_text:
|
|
raise dev_server.DevServerException('Unknown error!!')
|
|
with open(minidump_path + '.txt', 'w') as trace_file:
|
|
trace_file.write(trace_text)
|
|
|
|
def generate_stacktrace_for_file(minidump, host_resultdir):
|
|
"""
|
|
Tries to generate a stack trace for the file located at |minidump|.
|
|
@param minidump: path to minidump file to generate the stacktrace for.
|
|
@param host_resultdir: server job's result directory.
|
|
"""
|
|
# First, try to symbolicate locally.
|
|
try:
|
|
logging.info('Trying to generate stack trace locally for %s', minidump)
|
|
generate_minidump_stacktrace(minidump)
|
|
logging.info('Generated stack trace for dump %s', minidump)
|
|
return
|
|
except client_utils.error.CmdError as err:
|
|
logging.info('Failed to generate stack trace locally for '
|
|
'dump %s (rc=%d):\n%r',
|
|
minidump, err.result_obj.exit_status, err)
|
|
|
|
# If that did not succeed, try to symbolicate using the dev server.
|
|
try:
|
|
logging.info('Generating stack trace using devserver for %s', minidump)
|
|
crashserver_name = _resolve_crashserver()
|
|
args = (minidump, host_resultdir, crashserver_name)
|
|
is_timeout, _ = retry.timeout(_symbolicate_minidump_with_devserver,
|
|
args=args,
|
|
timeout_sec=600)
|
|
if is_timeout:
|
|
logging.info('Generating stack trace timed out for dump %s',
|
|
minidump)
|
|
metrics.Counter(
|
|
'chromeos/autotest/crashcollect/symbolicate_timed_out'
|
|
).increment(fields={'crash_server': crashserver_name})
|
|
else:
|
|
logging.info('Generated stack trace for dump %s', minidump)
|
|
return
|
|
except dev_server.DevServerException as e:
|
|
logging.info('Failed to generate stack trace on devserver for dump '
|
|
'%s:\n%r', minidump, e)
|
|
|
|
# Symbolicating failed.
|
|
logging.warning('Failed to generate stack trace for %s (see info logs)',
|
|
minidump)
|
|
|
|
def find_and_generate_minidump_stacktraces(host_resultdir):
|
|
"""
|
|
Finds all minidump files and generates a stack trace for each.
|
|
|
|
Enumerates all files under the test results directory (recursively)
|
|
and generates a stack trace file for the minidumps. Minidump files are
|
|
identified as files with .dmp extension. The stack trace filename is
|
|
composed by appending the .txt extension to the minidump filename.
|
|
|
|
@param host_resultdir: Directory to walk looking for dmp files.
|
|
|
|
@returns The list of all found minidump files. Each dump may or may not have
|
|
been symbolized.
|
|
"""
|
|
minidumps = []
|
|
for file in _find_crashdumps(host_resultdir):
|
|
generate_stacktrace_for_file(file, host_resultdir)
|
|
minidumps.append(file)
|
|
return minidumps
|
|
|
|
|
|
def _find_crashdumps(host_resultdir):
|
|
"""Find crashdumps.
|
|
|
|
@param host_resultdir The result directory for this host for this test run.
|
|
"""
|
|
for dir, subdirs, files in os.walk(host_resultdir):
|
|
for file in files:
|
|
if file.endswith('.dmp'):
|
|
yield os.path.join(dir, file)
|
|
|
|
|
|
def _find_orphaned_crashdumps(host):
|
|
"""Return file paths of crashdumps on host.
|
|
|
|
@param host A host object of the device.
|
|
"""
|
|
return host.list_files_glob(os.path.join(constants.CRASH_DIR, '*'))
|
|
|
|
|
|
def report_crashdumps(host):
|
|
"""Report on crashdumps for host.
|
|
|
|
This is run when no tests failed. We don't process crashdumps in this
|
|
case because of devserver load, but they should still be reported.
|
|
|
|
@param host A host object of the device we're to pull crashes from.
|
|
"""
|
|
for crashfile in _find_orphaned_crashdumps(host):
|
|
logging.warning('Host crashdump exists: %s', crashfile)
|
|
host.job.record('INFO', None, None,
|
|
'Host crashdump exists: %s' % (crashfile,))
|
|
|
|
host_resultdir = _get_host_resultdir(host)
|
|
for crashfile in _find_crashdumps(host_resultdir):
|
|
logging.warning('Local crashdump exists: %s', crashfile)
|
|
host.job.record('INFO', None, None,
|
|
'Local crashdump exists: %s' % (crashfile,))
|
|
|
|
|
|
def fetch_orphaned_crashdumps(host, infodir):
|
|
"""
|
|
Copy all of the crashes in the crash directory over to the results folder.
|
|
|
|
@param host A host object of the device we're to pull crashes from.
|
|
@param infodir The directory to fetch crashdumps into.
|
|
@return The list of minidumps that we pulled back from the host.
|
|
"""
|
|
if not os.path.exists(infodir):
|
|
os.mkdir(infodir)
|
|
orphans = []
|
|
|
|
if not host.check_cached_up_status():
|
|
logging.warning('Host %s did not answer to ping, skip fetching '
|
|
'orphaned crashdumps.', host.hostname)
|
|
return orphans
|
|
|
|
try:
|
|
for file in _find_orphaned_crashdumps(host):
|
|
logging.info('Collecting %s...', file)
|
|
collect_log_file(host, file, infodir, clean=True)
|
|
orphans.append(file)
|
|
except Exception as e:
|
|
logging.warning('Collection of orphaned crash dumps failed %s', e)
|
|
finally:
|
|
# Delete infodir if we have no orphans
|
|
if not orphans:
|
|
logging.info('There are no orphaned crashes; deleting %s', infodir)
|
|
os.rmdir(infodir)
|
|
return orphans
|
|
|
|
|
|
def _copy_to_debug_dir(host_resultdir, filename):
|
|
"""
|
|
Copies a file to the debug dir under host_resultdir.
|
|
|
|
@param host_resultdir The result directory for this host for this test run.
|
|
@param filename The full path of the file to copy to the debug folder.
|
|
"""
|
|
debugdir = os.path.join(host_resultdir, 'debug')
|
|
src = filename
|
|
dst = os.path.join(debugdir, os.path.basename(filename))
|
|
|
|
try:
|
|
shutil.copyfile(src, dst)
|
|
logging.info('Copied %s to %s', src, dst)
|
|
except IOError:
|
|
logging.warning('Failed to copy %s to %s', src, dst)
|
|
|
|
|
|
def _get_host_resultdir(host):
|
|
"""Get resultdir for host.
|
|
|
|
@param host A host object of the device we're to pull crashes from.
|
|
"""
|
|
return getattr(getattr(host, 'job', None), 'resultdir', None)
|
|
|
|
|
|
def get_host_infodir(host):
|
|
"""Get infodir for host.
|
|
|
|
@param host A host object of the device we're to pull crashes from.
|
|
"""
|
|
host_resultdir = _get_host_resultdir(host)
|
|
return os.path.join(host_resultdir, 'crashinfo.%s' % host.hostname)
|
|
|
|
|
|
def get_site_crashdumps(host, test_start_time):
|
|
"""
|
|
Copy all of the crashdumps from a host to the results directory.
|
|
|
|
@param host The host object from which to pull crashes
|
|
@param test_start_time When the test we just ran started.
|
|
@return A list of all the minidumps
|
|
"""
|
|
host_resultdir = _get_host_resultdir(host)
|
|
infodir = get_host_infodir(host)
|
|
|
|
orphans = fetch_orphaned_crashdumps(host, infodir)
|
|
minidumps = find_and_generate_minidump_stacktraces(host_resultdir)
|
|
|
|
# Record all crashdumps in status.log of the job:
|
|
# - If one server job runs several client jobs we will only record
|
|
# crashdumps in the status.log of the high level server job.
|
|
# - We will record these crashdumps whether or not we successfully
|
|
# symbolicate them.
|
|
if host.job and minidumps or orphans:
|
|
host.job.record('INFO', None, None, 'Start crashcollection record')
|
|
for minidump in minidumps:
|
|
host.job.record('INFO', None, 'New Crash Dump', minidump)
|
|
for orphan in orphans:
|
|
host.job.record('INFO', None, 'Orphaned Crash Dump', orphan)
|
|
host.job.record('INFO', None, None, 'End crashcollection record')
|
|
|
|
orphans.extend(minidumps)
|
|
|
|
for minidump in orphans:
|
|
report_bug_from_crash(host, minidump)
|
|
|
|
# We copy Chrome crash information to the debug dir to assist debugging.
|
|
# Since orphans occurred on a previous run, they are most likely not
|
|
# relevant to the current failure, so we don't copy them.
|
|
for minidump in minidumps:
|
|
minidump_no_ext = os.path.splitext(minidump)[0]
|
|
_copy_to_debug_dir(host_resultdir, minidump_no_ext + '.dmp.txt')
|
|
_copy_to_debug_dir(host_resultdir, minidump_no_ext + '.log')
|
|
|
|
return orphans
|
|
|
|
|
|
def find_package_of(host, exec_name):
|
|
"""
|
|
Find the package that an executable came from.
|
|
|
|
@param host A host object that has the executable.
|
|
@param exec_name Name of or path to executable.
|
|
@return The name of the package that installed the executable.
|
|
"""
|
|
# Run "portageq owners" on "host" to determine which package owns
|
|
# "exec_name." Portageq queue output consists of package names followed
|
|
# tab-prefixed path names. For example, owners of "python:"
|
|
#
|
|
# sys-devel/gdb-7.7.1-r2
|
|
# /usr/share/gdb/python
|
|
# chromeos-base/dev-install-0.0.1-r711
|
|
# /usr/bin/python
|
|
# dev-lang/python-2.7.3-r7
|
|
# /etc/env.d/python
|
|
#
|
|
# This gets piped into "xargs stat" to annotate each line with
|
|
# information about the path, so we later can consider only packages
|
|
# with executable files. After annotation the above looks like:
|
|
#
|
|
# stat: cannot stat '@@@ sys-devel/gdb-7.7.1-r2 @@@': ...
|
|
# stat: cannot stat '/usr/share/gdb/python': ...
|
|
# stat: cannot stat '@@@ chromeos-base/dev-install-0.0.1-r711 @@@': ...
|
|
# 755 -rwxr-xr-x /usr/bin/python
|
|
# stat: cannot stat '@@@ dev-lang/python-2.7.3-r7 @@@': ...
|
|
# 755 drwxr-xr-x /etc/env.d/python
|
|
#
|
|
# Package names are surrounded by "@@@" to facilitate parsing. Lines
|
|
# starting with an octal number were successfully annotated, because
|
|
# the path existed on "host."
|
|
# The above is then parsed to find packages which contain executable files
|
|
# (not directories), in this case "chromeos-base/dev-install-0.0.1-r711."
|
|
#
|
|
# TODO(milleral): portageq can show scary looking error messages
|
|
# in the debug logs via stderr. We only look at stdout, so those
|
|
# get filtered, but it would be good to silence them.
|
|
cmd = ('portageq owners / ' + exec_name +
|
|
r'| sed -e "s/^[^\t].*/@@@ & @@@/" -e "s/^\t//"'
|
|
r'| tr \\n \\0'
|
|
' | xargs -0 -r stat -L -c "%a %A %n" 2>&1')
|
|
portageq = host.run(cmd, ignore_status=True)
|
|
|
|
# Parse into a set of names of packages containing an executable file.
|
|
packages = set()
|
|
pkg = ''
|
|
pkg_re = re.compile('@@@ (.*) @@@')
|
|
path_re = re.compile('^([0-7]{3,}) (.)')
|
|
for line in portageq.stdout.splitlines():
|
|
match = pkg_re.search(line)
|
|
if match:
|
|
pkg = match.group(1)
|
|
continue
|
|
match = path_re.match(line)
|
|
if match:
|
|
isexec = int(match.group(1), 8) & 0o111
|
|
isfile = match.group(2) == '-'
|
|
if pkg and isexec and isfile:
|
|
packages.add(pkg)
|
|
|
|
# If exactly one package found it must be the one we want, return it.
|
|
if len(packages) == 1:
|
|
return packages.pop()
|
|
|
|
# TODO(milleral): Decide if it really is an error if not exactly one
|
|
# package is found.
|
|
# It is highly questionable as to if this should be left in the
|
|
# production version of this code or not.
|
|
if len(packages) == 0:
|
|
logging.warning('find_package_of() found no packages for "%s"',
|
|
exec_name)
|
|
else:
|
|
logging.warning('find_package_of() found multiple packages for "%s": '
|
|
'%s', exec_name, ', '.join(packages))
|
|
return ''
|
|
|
|
|
|
def report_bug_from_crash(host, minidump_path):
|
|
"""
|
|
Given a host to query and a minidump, file a bug about the crash.
|
|
|
|
@param host A host object that is where the dump came from
|
|
@param minidump_path The path to the dump file that should be reported.
|
|
"""
|
|
# TODO(milleral): Once this has actually been tested, remove the
|
|
# try/except. In the meantime, let's make sure nothing dies because of
|
|
# the fact that this code isn't very heavily tested.
|
|
try:
|
|
meta_path = os.path.splitext(minidump_path)[0] + '.meta'
|
|
with open(meta_path, 'r') as f:
|
|
for line in f.readlines():
|
|
parts = line.split('=')
|
|
if parts[0] == 'exec_name':
|
|
package = find_package_of(host, parts[1].strip())
|
|
if not package:
|
|
package = '<unknown package>'
|
|
logging.info('Would report crash on %s.', package)
|
|
break
|
|
except Exception as e:
|
|
logging.warning('Crash detection failed with: %s', e)
|