diff --git a/bootstat/boot_reason_test.sh b/bootstat/boot_reason_test.sh index 1043df16f..b194bbe7b 100755 --- a/bootstat/boot_reason_test.sh +++ b/bootstat/boot_reason_test.sh @@ -808,6 +808,63 @@ test_kernel_panic() { exitPstore } +[ "USAGE: test_kernel_panic_subreason + +kernel_panic_subreason test: +- echo SysRq : Trigger a crash : 'test' | adb shell su root tee /dev/kmsg +- echo c | adb shell su root tee /proc/sysrq-trigger +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report kernel_panic,sysrq,test" ] +test_kernel_panic_subreason() { + checkDebugBuild || return + duration_test ">90" + panic_msg="kernel_panic,sysrq,test" + enterPstore + if [ ${?} != 0 ]; then + echo " or functional bootloader" >&2 + panic_msg="\(kernel_panic,sysrq,test\|kernel_panic\)" + pstore_ok=true + fi + echo "SysRq : Trigger a crash : 'test'" | adb shell su root tee /dev/kmsg + echo c | adb shell su root tee /proc/sysrq-trigger >/dev/null + wait_for_screen + EXPECT_PROPERTY sys.boot.reason ${panic_msg} + EXPECT_PROPERTY persist.sys.boot.reason ${panic_msg} + report_bootstat_logs kernel_panic,sysrq,test \ + "-bootstat: Unknown boot reason: kernel_panic,sysrq,test" + exitPstore +} + +[ "USAGE: test_kernel_panic_hung + +kernel_panic_hung test: +- echo Kernel panic - not synching: hung_task: blocked tasks | + adb shell su root tee /dev/kmsg +- adb reboot warm +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report kernel_panic,hung" ] +test_kernel_panic_hung() { + checkDebugBuild || return + duration_test + panic_msg="kernel_panic,hung" + enterPstore + if [ ${?} != 0 ]; then + echo " or functional bootloader" >&2 + panic_msg="\(kernel_panic,hung\|reboot,hung\)" + pstore_ok=true + fi + echo "Kernel panic - not syncing: hung_task: blocked tasks" | + adb shell su root tee /dev/kmsg + adb reboot warm + wait_for_screen + EXPECT_PROPERTY sys.boot.reason ${panic_msg} + EXPECT_PROPERTY persist.sys.boot.reason ${panic_msg} + report_bootstat_logs kernel_panic,hung + exitPstore +} + [ "USAGE: test_warm warm test @@ -1067,12 +1124,12 @@ if [ -z "$*" ]; then if [ -z "${2}" ]; then # Hard coded should shell fail to find them above (search/permission issues) eval set properties ota cold factory_reset hard battery unknown \ - kernel_panic warm thermal_shutdown userrequested_shutdown \ - shell_reboot adb_reboot Its_Just_So_Hard_reboot \ - bootloader_normal bootloader_watchdog bootloader_kernel_panic \ - bootloader_oem_powerkey bootloader_wdog_reset \ - bootloader_wdog_reset bootloader_wdog_reset bootloader_hard \ - bootloader_recovery + kernel_panic kernel_panic_subreason kernel_panic_hung warm \ + thermal_shutdown userrequested_shutdown shell_reboot adb_reboot \ + Its_Just_So_Hard_reboot bootloader_normal bootloader_watchdog \ + bootloader_kernel_panic bootloader_oem_powerkey \ + bootloader_wdog_reset bootloader_wdog_reset bootloader_wdog_reset \ + bootloader_hard bootloader_recovery fi if [ X"nothing" = X"${1}" ]; then shift 1 diff --git a/bootstat/bootstat.cpp b/bootstat/bootstat.cpp index 29988c837..e60e6be25 100644 --- a/bootstat/bootstat.cpp +++ b/bootstat/bootstat.cpp @@ -295,6 +295,13 @@ const std::map kBootReasonMap = { {"reboot,tool", 151}, {"reboot,wdt", 152}, {"reboot,unknown", 153}, + {"kernel_panic,audit", 154}, + {"kernel_panic,atomic", 155}, + {"kernel_panic,hung", 156}, + {"kernel_panic,hung,rcu", 157}, + {"kernel_panic,init", 158}, + {"kernel_panic,oom", 159}, + {"kernel_panic,stack", 160}, }; // Converts a string value representing the reason the system booted to an @@ -519,9 +526,100 @@ void transformReason(std::string& reason) { [](char c) { return ::isprint(c) ? c : '?'; }); } -// Pull out and correct quoted (') subreason, pos just beyond first quote. -// Check subreasons for reboot, and kernel_panic,sysrq, -std::string getSubreason(const std::string& content, size_t pos) { +// Check subreasons for reboot, kernel_panic,sysrq, or +// kernel_panic,. +// +// If quoted flag is set, pull out and correct single quoted ('), newline (\n) +// or unprintable character terminated subreason, pos is supplied just beyond +// first quote. if quoted false, pull out and correct newline (\n) or +// unprintable character terminated subreason. +// +// Heuristics to find termination is painted into a corner: + +// single bit error for quote ' that we can block. It is acceptable for +// the others 7, g in reason. 2/9 chance will miss the terminating quote, +// but there is always the terminating newline that usually immediately +// follows to fortify our chances. +bool likely_single_quote(char c) { + switch (static_cast(c)) { + case '\'': // '\'' + case '\'' ^ 0x01: // '&' + case '\'' ^ 0x02: // '%' + case '\'' ^ 0x04: // '#' + case '\'' ^ 0x08: // '/' + return true; + case '\'' ^ 0x10: // '7' + break; + case '\'' ^ 0x20: // '\a' (unprintable) + return true; + case '\'' ^ 0x40: // 'g' + break; + case '\'' ^ 0x80: // 0xA7 (unprintable) + return true; + } + return false; +} + +// ::isprint(c) and likely_space() will prevent us from being called for +// fundamentally printable entries, except for '\r' and '\b'. +// +// Except for * and J, single bit errors for \n, all others are non- +// printable so easy catch. It is _acceptable_ for *, J or j to exist in +// the reason string, so 2/9 chance we will miss the terminating newline. +// +// NB: J might not be acceptable, except if at the beginning or preceded +// with a space, '(' or any of the quotes and their BER aliases. +// NB: * might not be acceptable, except if at the beginning or preceded +// with a space, another *, or any of the quotes or their BER aliases. +// +// To reduce the chances to closer to 1/9 is too complicated for the gain. +bool likely_newline(char c) { + switch (static_cast(c)) { + case '\n': // '\n' (unprintable) + case '\n' ^ 0x01: // '\r' (unprintable) + case '\n' ^ 0x02: // '\b' (unprintable) + case '\n' ^ 0x04: // 0x0E (unprintable) + case '\n' ^ 0x08: // 0x02 (unprintable) + case '\n' ^ 0x10: // 0x1A (unprintable) + return true; + case '\n' ^ 0x20: // '*' + case '\n' ^ 0x40: // 'J' + break; + case '\n' ^ 0x80: // 0x8A (unprintable) + return true; + } + return false; +} + +// ::isprint(c) will prevent us from being called for all the printable +// matches below. If we let unprintables through because of this, they +// get converted to underscore (_) by the validation phase. +bool likely_space(char c) { + switch (static_cast(c)) { + case ' ': // ' ' + case ' ' ^ 0x01: // '!' + case ' ' ^ 0x02: // '"' + case ' ' ^ 0x04: // '$' + case ' ' ^ 0x08: // '(' + case ' ' ^ 0x10: // '0' + case ' ' ^ 0x20: // '\0' (unprintable) + case ' ' ^ 0x40: // 'P' + case ' ' ^ 0x80: // 0xA0 (unprintable) + case '\t': // '\t' + case '\t' ^ 0x01: // '\b' (unprintable) (likely_newline counters) + case '\t' ^ 0x02: // '\v' (unprintable) + case '\t' ^ 0x04: // '\r' (unprintable) (likely_newline counters) + case '\t' ^ 0x08: // 0x01 (unprintable) + case '\t' ^ 0x10: // 0x19 (unprintable) + case '\t' ^ 0x20: // ')' + case '\t' ^ 0x40: // '1' + case '\t' ^ 0x80: // 0x89 (unprintable) + return true; + } + return false; +} + +std::string getSubreason(const std::string& content, size_t pos, bool quoted) { static constexpr size_t max_reason_length = 256; std::string subReason(content.substr(pos, max_reason_length)); @@ -529,20 +627,24 @@ std::string getSubreason(const std::string& content, size_t pos) { for (const auto& s : knownReasons) { correctForBitErrorOrUnderline(subReason, s); } + std::string terminator(quoted ? "'" : ""); for (const auto& m : kBootReasonMap) { if (m.first.length() <= strlen("cold")) continue; // too short? - if (correctForBitErrorOrUnderline(subReason, m.first + "'")) continue; + if (correctForBitErrorOrUnderline(subReason, m.first + terminator)) continue; if (m.first.length() <= strlen("reboot,cold")) continue; // short? if (android::base::StartsWith(m.first, "reboot,")) { - correctForBitErrorOrUnderline(subReason, m.first.substr(strlen("reboot,")) + "'"); + correctForBitErrorOrUnderline(subReason, m.first.substr(strlen("reboot,")) + terminator); } else if (android::base::StartsWith(m.first, "kernel_panic,sysrq,")) { - correctForBitErrorOrUnderline(subReason, m.first.substr(strlen("kernel_panic,sysrq,")) + "'"); + correctForBitErrorOrUnderline(subReason, + m.first.substr(strlen("kernel_panic,sysrq,")) + terminator); + } else if (android::base::StartsWith(m.first, "kernel_panic,")) { + correctForBitErrorOrUnderline(subReason, m.first.substr(strlen("kernel_panic,")) + terminator); } } for (pos = 0; pos < subReason.length(); ++pos) { char c = subReason[pos]; - // #, &, %, / are common single bit error for ' that we can block - if (!::isprint(c) || (c == '\'') || (c == '#') || (c == '&') || (c == '%') || (c == '/')) { + if (!(::isprint(c) || likely_space(c)) || likely_newline(c) || + (quoted && likely_single_quote(c))) { subReason.erase(pos); break; } @@ -561,7 +663,7 @@ bool addKernelPanicSubReason(const pstoreConsole& console, std::string& ret) { static constexpr char sysrqSubreason[] = "SysRq : Trigger a crash : '"; auto pos = console.rfind(sysrqSubreason); if (pos != std::string::npos) { - ret += "," + getSubreason(console, pos + strlen(sysrqSubreason)); + ret += "," + getSubreason(console, pos + strlen(sysrqSubreason), /* quoted */ true); } return true; } @@ -574,6 +676,43 @@ bool addKernelPanicSubReason(const pstoreConsole& console, std::string& ret) { ret = "kernel_panic,bug"; return true; } + + std::string panic("Kernel panic - not syncing: "); + auto pos = console.rfind(panic); + if (pos != std::string::npos) { + static const std::vector> panicReasons = { + {"Out of memory", "oom"}, + {"out of memory", "oom"}, + {"Oh boy, that early out of memory", "oom"}, // omg + {"BUG!", "bug"}, + {"hung_task: blocked tasks", "hung"}, + {"audit: ", "audit"}, + {"scheduling while atomic", "atomic"}, + {"Attempted to kill init!", "init"}, + {"Requested init", "init"}, + {"No working init", "init"}, + {"Could not decompress init", "init"}, + {"RCU Stall", "hung,rcu"}, + {"stack-protector", "stack"}, + {"kernel stack overflow", "stack"}, + {"Corrupt kernel stack", "stack"}, + {"low stack detected", "stack"}, + {"corrupted stack end", "stack"}, + }; + + ret = "kernel_panic"; + for (auto& s : panicReasons) { + if (console.find(panic + s.first, pos) != std::string::npos) { + ret += "," + s.second; + return true; + } + } + auto reason = getSubreason(console, pos + panic.length(), /* newline */ false); + if (reason.length() > 3) { + ret += "," + reason; + } + return true; + } return false; } @@ -689,7 +828,7 @@ std::string BootReasonStrToReason(const std::string& boot_reason) { static const char cmd[] = "reboot: Restarting system with command '"; size_t pos = console.rfind(cmd); if (pos != std::string::npos) { - std::string subReason(getSubreason(content, pos + strlen(cmd))); + std::string subReason(getSubreason(content, pos + strlen(cmd), /* quoted */ true)); if (subReason != "") { // Will not land "reboot" as that is too blunt. if (isKernelRebootReason(subReason)) { ret = "reboot," + subReason; // User space can't talk kernel reasons.