From 6aa551ec416bac9ec96a6103e25be78221cf36a4 Mon Sep 17 00:00:00 2001 From: Peter Qiu Date: Fri, 6 Mar 2015 11:42:04 -0800 Subject: [PATCH] crash-reporter: add support for device coredump Copy the device coredump file to /var/spool/crash directory when udev event "devcoredump" is detected. Also include the last 50 lines of /var/log/messages and /var/log/net.log in the coredump file for debugging purpose. Only perform the collection if feedback is allowed (metrics are enabled) or the device is running a developer image. By default, crash-reporter will not upload device coredumps to the crash server. The user can enable device coredumps upload via a to-be-added crosh command, which should only be done at the direction of CrOS engineers. BUG=chromium:464872 TEST=USE="asan clang" FEATURES=test emerge-$BOARD crash-reporter Manual Test: 1. Trigger wifi firmware error on a Clapper running developer image by running following command: "echo 1 > /sys/kernel/debug/iwlwifi/0000\:01\:00.0/iwlmvm/fw_restart" 2. Verify there is a device coredump file in "/var/spool/crash/" "devcoredump_iwlwifi*.devcore", as well as the additional log file ".log" and the meta file ".meta". Change-Id: Ic4cf67d4b5715a6f422505f409276d1261b7d51f Reviewed-on: https://chromium-review.googlesource.com/257091 Reviewed-by: Zeping Qiu Commit-Queue: Zeping Qiu Tested-by: Zeping Qiu Reviewed-by: Mike Frysinger --- crash_reporter/99-crash-reporter.rules | 1 + crash_reporter/crash_reporter_logs.conf | 9 ++ crash_reporter/crash_sender | 19 ++- crash_reporter/udev_collector.cc | 182 +++++++++++++++++++++--- crash_reporter/udev_collector.h | 24 ++++ crash_reporter/udev_collector_test.cc | 73 +++++++++- 6 files changed, 278 insertions(+), 30 deletions(-) diff --git a/crash_reporter/99-crash-reporter.rules b/crash_reporter/99-crash-reporter.rules index 57a71cac5..aea5b1c36 100644 --- a/crash_reporter/99-crash-reporter.rules +++ b/crash_reporter/99-crash-reporter.rules @@ -3,3 +3,4 @@ ACTION=="change", SUBSYSTEM=="drm", KERNEL=="card0", ENV{ERROR}=="1", RUN+="/sbi ACTION=="change", SUBSYSTEM=="i2c", DRIVER=="cyapa", ENV{ERROR}=="1", RUN+="/sbin/crash_reporter --udev=SUBSYSTEM=i2c-cyapa:ACTION=change" # For detecting Atmel trackpad/touchscreen issue. Passing into crash_reporter SUBSYSTEM=i2c-atmel_mxt_ts since crash_reporter does not handle DRIVER string. ACTION=="change", SUBSYSTEM=="i2c", DRIVER=="atmel_mxt_ts", ENV{ERROR}=="1", RUN+="/sbin/crash_reporter --udev=SUBSYSTEM=i2c-atmel_mxt_ts:ACTION=change" +ACTION=="add", SUBSYSTEM=="devcoredump", RUN+="/sbin/crash_reporter --udev=SUBSYSTEM=devcoredump:ACTION=add:KERNEL_NUMBER=%n" diff --git a/crash_reporter/crash_reporter_logs.conf b/crash_reporter/crash_reporter_logs.conf index 1a69032fb..f5ca80c91 100644 --- a/crash_reporter/crash_reporter_logs.conf +++ b/crash_reporter/crash_reporter_logs.conf @@ -74,6 +74,15 @@ crash_reporter-udev-collection---TouchNoise=cat /var/log/touch_noise.log # Periodically collect touch event log for debugging (crosbug.com/p/17244) crash_reporter-udev-collection---TouchEvent=cat /var/log/touch_event.log +# Collect the last 50 lines of /var/log/messages and /var/log/net.log for +# intel wifi driver (iwlwifi) for debugging purpose. +crash_reporter-udev-collection-devcoredump-iwlwifi=\ + echo "===/var/log/messages==="; \ + tail -n 50 /var/log/messages; \ + echo "===/var/log/net.log==="; \ + tail -n 50 /var/log/net.log; \ + echo EOF + # Dump the last 50 lines of the last two powerd log files -- if the job has # already restarted, we want to see the end of the previous instance's logs. powerd=\ diff --git a/crash_reporter/crash_sender b/crash_reporter/crash_sender index c83a12ae2..2ad1e4635 100755 --- a/crash_reporter/crash_sender +++ b/crash_reporter/crash_sender @@ -69,6 +69,10 @@ RUN_FILE="/var/run/crash_sender.pid" # Maximum time to sleep between sends. SECONDS_SEND_SPREAD=${SECONDS_SEND_SPREAD:-600} +# Set this to 1 to allow uploading of device coredumps. +DEVCOREDUMP_UPLOAD_FLAG_FILE=\ +"/var/lib/crash_reporter/device_coredump_upload_allowed" + # The syslog tag for all logging we emit. TAG="$(basename $0)[$$]" @@ -156,6 +160,12 @@ is_developer_mode() { crossystem "devsw_boot?1" # exit status will be accurate } +# Return 0 if the uploading of device coredumps is allowed. +is_device_coredump_upload_allowed() { + [ -f "${DEVCOREDUMP_UPLOAD_FLAG_FILE}" ] && return 0 + return 1 +} + # Generate a uniform random number in 0..max-1. generate_uniform_random() { local max=$1 @@ -508,7 +518,8 @@ send_crashes() { local kind=$(get_kind "${meta_path}") if [ "${kind}" != "minidump" ] && \ [ "${kind}" != "kcrash" ] && \ - [ "${kind}" != "log" ]; then + [ "${kind}" != "log" ] && + [ "${kind}" != "devcore" ]; then lecho "Unknown report kind ${kind}. Removing report." remove_report "${meta_path}" continue @@ -527,6 +538,12 @@ send_crashes() { continue fi + # Ignore device coredump if device coredump uploading is not allowed. + if [ "${kind}" = "devcore" ] && ! is_device_coredump_upload_allowed; then + lecho "Ignoring device coredump. Device coredump upload not allowed." + continue + fi + if ! is_mock && ! is_official_image; then lecho "Not an official OS version. Removing crash." remove_report "${meta_path}" diff --git a/crash_reporter/udev_collector.cc b/crash_reporter/udev_collector.cc index 64ec3392c..908bbc9c4 100644 --- a/crash_reporter/udev_collector.cc +++ b/crash_reporter/udev_collector.cc @@ -8,26 +8,41 @@ #include #include +#include #include #include +#include #include #include +#include #include -static const char kCollectUdevSignature[] = "crash_reporter-udev-collection"; -static const char kGzipPath[] = "/bin/gzip"; -static const char kUdevExecName[] = "udev"; -static const char kUdevSignatureKey[] = "sig"; - using base::FilePath; -UdevCollector::UdevCollector() {} +namespace { + +const char kCollectUdevSignature[] = "crash_reporter-udev-collection"; +const char kGzipPath[] = "/bin/gzip"; +const char kUdevExecName[] = "udev"; +const char kUdevSignatureKey[] = "sig"; +const char kUdevSubsystemDevCoredump[] = "devcoredump"; +const char kDefaultDevCoredumpDirectory[] = "/sys/class/devcoredump"; +const char kDevCoredumpFilePrefixFormat[] = "devcoredump_%s"; + +} // namespace + +UdevCollector::UdevCollector() + : dev_coredump_directory_(kDefaultDevCoredumpDirectory) {} UdevCollector::~UdevCollector() {} bool UdevCollector::HandleCrash(const std::string &udev_event) { - if (!is_feedback_allowed_function_()) { - LOG(ERROR) << "No consent given to collect crash info."; + if (IsDeveloperImage()) { + LOG(INFO) << "developer image - collect udev crash info."; + } else if (is_feedback_allowed_function_()) { + LOG(INFO) << "Consent given - collect udev crash info."; + } else { + LOG(INFO) << "Ignoring - Non-developer image and no consent given."; return false; } @@ -43,24 +58,43 @@ bool UdevCollector::HandleCrash(const std::string &udev_event) { udev_event_map[iter->first] = iter->second; } - // Construct the basename string for crash_reporter_logs.conf: - // "crash_reporter-udev-collection-[action]-[name]-[subsystem]" - // If a udev field is not provided, "" is used in its place, e.g.: - // "crash_reporter-udev-collection-[action]--[subsystem]" - // Hence, "" is used as a wildcard name string. - // TODO(sque, crosbug.com/32238): Implement wildcard checking. - std::string basename = udev_event_map["ACTION"] + "-" + - udev_event_map["KERNEL"] + "-" + - udev_event_map["SUBSYSTEM"]; - std::string udev_log_name = std::string(kCollectUdevSignature) + '-' + - basename; - // Make sure the crash directory exists, or create it if it doesn't. FilePath crash_directory; if (!GetCreatedCrashDirectoryByEuid(0, &crash_directory, nullptr)) { LOG(ERROR) << "Could not get crash directory."; return false; } + + if (udev_event_map["SUBSYSTEM"] == kUdevSubsystemDevCoredump) { + int instance_number; + if (!base::StringToInt(udev_event_map["KERNEL_NUMBER"], &instance_number)) { + LOG(ERROR) << "Invalid kernel number: " + << udev_event_map["KERNEL_NUMBER"]; + return false; + } + return ProcessDevCoredump(crash_directory, instance_number); + } + + return ProcessUdevCrashLogs(crash_directory, + udev_event_map["ACTION"], + udev_event_map["KERNEL"], + udev_event_map["SUBSYSTEM"]); +} + +bool UdevCollector::ProcessUdevCrashLogs(const FilePath& crash_directory, + const std::string& action, + const std::string& kernel, + const std::string& subsystem) { + // Construct the basename string for crash_reporter_logs.conf: + // "crash_reporter-udev-collection-[action]-[name]-[subsystem]" + // If a udev field is not provided, "" is used in its place, e.g.: + // "crash_reporter-udev-collection-[action]--[subsystem]" + // Hence, "" is used as a wildcard name string. + // TODO(sque, crosbug.com/32238): Implement wildcard checking. + std::string basename = action + "-" + kernel + "-" + subsystem; + std::string udev_log_name = std::string(kCollectUdevSignature) + '-' + + basename; + // Create the destination path. std::string log_file_name = FormatDumpBasename(basename, time(nullptr), 0); @@ -85,10 +119,114 @@ bool UdevCollector::HandleCrash(const std::string &udev_event) { else crash_path = crash_path_zipped; - std::string exec_name = std::string(kUdevExecName) + "-" + - udev_event_map["SUBSYSTEM"]; + std::string exec_name = std::string(kUdevExecName) + "-" + subsystem; AddCrashMetaData(kUdevSignatureKey, udev_log_name); WriteCrashMetaData(GetCrashPath(crash_directory, log_file_name, "meta"), exec_name, crash_path.value()); return true; } + +bool UdevCollector::ProcessDevCoredump(const FilePath& crash_directory, + int instance_number) { + FilePath coredump_path = + FilePath(base::StringPrintf("%s/devcd%d/data", + dev_coredump_directory_.c_str(), + instance_number)); + if (!base::PathExists(coredump_path)) { + LOG(ERROR) << "Device coredump file " << coredump_path.value() + << " does not exist"; + return false; + } + + // Add coredump file to the crash directory. + if (!AppendDevCoredump(crash_directory, coredump_path, instance_number)) { + ClearDevCoredump(coredump_path); + return false; + } + + // Clear the coredump data to allow generation of future device coredumps + // without having to wait for the 5-minutes timeout. + return ClearDevCoredump(coredump_path); +} + +bool UdevCollector::AppendDevCoredump(const FilePath& crash_directory, + const FilePath& coredump_path, + int instance_number) { + // Retrieve the driver name of the failing device. + std::string driver_name = GetFailingDeviceDriverName(instance_number); + if (driver_name.empty()) { + LOG(ERROR) << "Failed to obtain driver name for instance: " + << instance_number; + return false; + } + + std::string coredump_prefix = + base::StringPrintf(kDevCoredumpFilePrefixFormat, driver_name.c_str()); + + std::string dump_basename = FormatDumpBasename(coredump_prefix, + time(nullptr), + instance_number); + FilePath core_path = GetCrashPath(crash_directory, dump_basename, "devcore"); + FilePath log_path = GetCrashPath(crash_directory, dump_basename, "log"); + FilePath meta_path = GetCrashPath(crash_directory, dump_basename, "meta"); + + // Collect coredump data. + if (!base::CopyFile(coredump_path, core_path)) { + LOG(ERROR) << "Failed to copy device coredumpm file from " + << coredump_path.value() << " to " << core_path.value(); + return false; + } + + // Collect additional logs if one is specified in the config file. + std::string udev_log_name = std::string(kCollectUdevSignature) + '-' + + kUdevSubsystemDevCoredump + '-' + driver_name; + bool result = GetLogContents(log_config_path_, udev_log_name, log_path); + if (result) { + AddCrashMetaUploadFile("logs", log_path.value()); + } + + WriteCrashMetaData(meta_path, coredump_prefix, core_path.value()); + + return true; +} + +bool UdevCollector::ClearDevCoredump(const FilePath& coredump_path) { + if (!base::WriteFile(coredump_path, "0", 1)) { + LOG(ERROR) << "Failed to delete the coredump data file " + << coredump_path.value(); + return false; + } + return true; +} + +std::string UdevCollector::GetFailingDeviceDriverName(int instance_number) { + FilePath failing_uevent_path = + FilePath(base::StringPrintf("%s/devcd%d/failing_device/uevent", + dev_coredump_directory_.c_str(), + instance_number)); + if (!base::PathExists(failing_uevent_path)) { + LOG(ERROR) << "Failing uevent path " << failing_uevent_path.value() + << " does not exist"; + return ""; + } + + std::string uevent_content; + if (!base::ReadFileToString(failing_uevent_path, &uevent_content)) { + LOG(ERROR) << "Failed to read uevent file " << failing_uevent_path.value(); + return ""; + } + + // Parse uevent file contents as key-value pairs. + std::vector> uevent_keyval; + base::SplitStringIntoKeyValuePairs(uevent_content, '=', '\n', &uevent_keyval); + std::vector>::const_iterator iter; + for (iter = uevent_keyval.begin(); + iter != uevent_keyval.end(); + ++iter) { + if (iter->first == "DRIVER") { + return iter->second; + } + } + + return ""; +} diff --git a/crash_reporter/udev_collector.h b/crash_reporter/udev_collector.h index 68eb85a4f..1689dd395 100644 --- a/crash_reporter/udev_collector.h +++ b/crash_reporter/udev_collector.h @@ -26,9 +26,33 @@ class UdevCollector : public CrashCollector { // could be omitted, in which case it would be treated as a wildcard (*). bool HandleCrash(const std::string& udev_event); + protected: + std::string dev_coredump_directory_; + private: friend class UdevCollectorTest; + // Process udev crash logs, collecting log files according to the config + // file (crash_reporter_logs.conf). + bool ProcessUdevCrashLogs(const base::FilePath& crash_directory, + const std::string& action, + const std::string& kernel, + const std::string& subsystem); + // Process device coredump, collecting device coredump file. + // |instance_number| is the kernel number of the virtual device for the device + // coredump instance. + bool ProcessDevCoredump(const base::FilePath& crash_directory, + int instance_number); + // Copy device coredump file to crash directory, and perform necessary + // coredump file management. + bool AppendDevCoredump(const base::FilePath& crash_directory, + const base::FilePath& coredump_path, + int instance_number); + // Clear the device coredump file by performing a dummy write to it. + bool ClearDevCoredump(const base::FilePath& coredump_path); + // Return the driver name of the device that generates the coredump. + std::string GetFailingDeviceDriverName(int instance_number); + // Mutator for unit testing. void set_log_config_path(const std::string& path) { log_config_path_ = base::FilePath(path); diff --git a/crash_reporter/udev_collector_test.cc b/crash_reporter/udev_collector_test.cc index 1705a6d93..08d9b2c2f 100644 --- a/crash_reporter/udev_collector_test.cc +++ b/crash_reporter/udev_collector_test.cc @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -18,12 +19,25 @@ namespace { // Dummy log config file name. const char kLogConfigFileName[] = "log_config_file"; +// Dummy directory for storing device coredumps. +const char kDevCoredumpDirectory[] = "devcoredump"; + // A bunch of random rules to put into the dummy log config file. const char kLogConfigFileContents[] = "crash_reporter-udev-collection-change-card0-drm=echo change card0 drm\n" "crash_reporter-udev-collection-add-state0-cpu=echo change state0 cpu\n" + "crash_reporter-udev-collection-devcoredump-iwlwifi=echo devcoredump\n" "cros_installer=echo not for udev"; +const char kCrashLogFilePattern[] = "*.log.gz"; +const char kDevCoredumpFilePattern[] = "*.devcore"; + +// Dummy content for device coredump data file. +const char kDevCoredumpDataContents[] = "coredump"; + +// Content for failing device's uevent file. +const char kFailingDeviceUeventContents[] = "DRIVER=iwlwifi\n"; + void CountCrash() {} bool s_consent_given = true; @@ -32,10 +46,11 @@ bool IsMetrics() { return s_consent_given; } -// Returns the number of compressed crash log files found in the given path. -int GetNumLogFiles(const FilePath& path) { +// Returns the number of files found in the given path that matches the +// specified file name pattern. +int GetNumFiles(const FilePath& path, const std::string& file_pattern) { base::FileEnumerator enumerator(path, false, base::FileEnumerator::FILES, - "*.log.gz"); + file_pattern); int num_files = 0; for (FilePath file_path = enumerator.Next(); !file_path.value().empty(); @@ -60,6 +75,35 @@ class UdevCollectorTest : public ::testing::Test { collector_.HandleCrash(udev_event); } + void GenerateDevCoredump(const std::string& device_name) { + // Generate coredump data file. + ASSERT_TRUE(CreateDirectory( + FilePath(base::StringPrintf("%s/%s", + collector_.dev_coredump_directory_.c_str(), + device_name.c_str())))); + FilePath data_path = + FilePath(base::StringPrintf("%s/%s/data", + collector_.dev_coredump_directory_.c_str(), + device_name.c_str())); + ASSERT_EQ(strlen(kDevCoredumpDataContents), + base::WriteFile(data_path, + kDevCoredumpDataContents, + strlen(kDevCoredumpDataContents))); + // Generate uevent file for failing device. + ASSERT_TRUE(CreateDirectory( + FilePath(base::StringPrintf("%s/%s/failing_device", + collector_.dev_coredump_directory_.c_str(), + device_name.c_str())))); + FilePath uevent_path = + FilePath(base::StringPrintf("%s/%s/failing_device/uevent", + collector_.dev_coredump_directory_.c_str(), + device_name.c_str())); + ASSERT_EQ(strlen(kFailingDeviceUeventContents), + base::WriteFile(uevent_path, + kFailingDeviceUeventContents, + strlen(kFailingDeviceUeventContents))); + } + private: void SetUp() override { s_consent_given = true; @@ -75,6 +119,10 @@ class UdevCollectorTest : public ::testing::Test { collector_.log_config_path_ = log_config_path; collector_.ForceCrashDirectory(temp_dir_generator_.path()); + FilePath dev_coredump_path = + temp_dir_generator_.path().Append(kDevCoredumpDirectory); + collector_.dev_coredump_directory_ = dev_coredump_path.value(); + // Write to a dummy log config file. ASSERT_EQ(strlen(kLogConfigFileContents), base::WriteFile(log_config_path, @@ -90,22 +138,33 @@ class UdevCollectorTest : public ::testing::Test { TEST_F(UdevCollectorTest, TestNoConsent) { s_consent_given = false; HandleCrash("ACTION=change:KERNEL=card0:SUBSYSTEM=drm"); - EXPECT_EQ(0, GetNumLogFiles(temp_dir_generator_.path())); + EXPECT_EQ(0, GetNumFiles(temp_dir_generator_.path(), kCrashLogFilePattern)); } TEST_F(UdevCollectorTest, TestNoMatch) { // No rule should match this. HandleCrash("ACTION=change:KERNEL=foo:SUBSYSTEM=bar"); - EXPECT_EQ(0, GetNumLogFiles(temp_dir_generator_.path())); + EXPECT_EQ(0, GetNumFiles(temp_dir_generator_.path(), kCrashLogFilePattern)); } TEST_F(UdevCollectorTest, TestMatches) { // Try multiple udev events in sequence. The number of log files generated // should increase. HandleCrash("ACTION=change:KERNEL=card0:SUBSYSTEM=drm"); - EXPECT_EQ(1, GetNumLogFiles(temp_dir_generator_.path())); + EXPECT_EQ(1, GetNumFiles(temp_dir_generator_.path(), kCrashLogFilePattern)); HandleCrash("ACTION=add:KERNEL=state0:SUBSYSTEM=cpu"); - EXPECT_EQ(2, GetNumLogFiles(temp_dir_generator_.path())); + EXPECT_EQ(2, GetNumFiles(temp_dir_generator_.path(), kCrashLogFilePattern)); +} + +TEST_F(UdevCollectorTest, TestDevCoredump) { + GenerateDevCoredump("devcd0"); + HandleCrash("ACTION=add:KERNEL_NUMBER=0:SUBSYSTEM=devcoredump"); + EXPECT_EQ(1, GetNumFiles(temp_dir_generator_.path(), + kDevCoredumpFilePattern)); + GenerateDevCoredump("devcd1"); + HandleCrash("ACTION=add:KERNEL_NUMBER=1:SUBSYSTEM=devcoredump"); + EXPECT_EQ(2, GetNumFiles(temp_dir_generator_.path(), + kDevCoredumpFilePattern)); } // TODO(sque, crosbug.com/32238) - test wildcard cases, multiple identical udev