diff --git a/bootstat/Android.bp b/bootstat/Android.bp index dd357edd7..6734f4d91 100644 --- a/bootstat/Android.bp +++ b/bootstat/Android.bp @@ -63,6 +63,7 @@ cc_binary { name: "bootstat", defaults: ["bootstat_defaults"], static_libs: ["libbootstat"], + shared_libs: ["liblogcat"], init_rc: ["bootstat.rc"], srcs: ["bootstat.cpp"], } diff --git a/bootstat/boot_reason_test.sh b/bootstat/boot_reason_test.sh new file mode 100755 index 000000000..7b0b0daeb --- /dev/null +++ b/bootstat/boot_reason_test.sh @@ -0,0 +1,689 @@ +#! /bin/bash +# +# Bootstat boot reason tests +# +# throughout testing: +# - manual tests can only run on eng/userdebug builds +# - watch adb logcat -b all -d -s bootstat +# - watch adb logcat -b all -d | audit2allow +# - wait until screen is up, boot has completed, can mean wait for +# sys.boot_completed=1 and sys.logbootcomplete=1 to be true +# +# All test frames, and nothing else, must be function names prefixed and +# specifiged with the pattern 'test_() {' as this is also how the +# script discovers the full list of tests by inspecting its own code. +# + +# Helper variables + +SPACE=" " +ESCAPE="" +TAB=" " +GREEN="${ESCAPE}[38;5;40m" +RED="${ESCAPE}[38;5;196m" +NORMAL="${ESCAPE}[0m" + +# Helper functions + +[ "USAGE: inFastboot + +Returns: true if device is in fastboot mode" ] +inFastboot() { + fastboot devices | grep "^${ANDROID_SERIAL}[${SPACE}${TAB}]" > /dev/null +} + +[ "USAGE: format_duration + +human readable output whole seconds, whole minutes or mm:ss" ] +format_duration() { + if [ -z "${1}" ]; then + echo unknown + return + fi + seconds=`expr ${1} % 60` + minutes=`expr ${1} / 60` + if [ 0 -eq ${minutes} ]; then + if [ 1 -eq ${1} ]; then + echo 1 second + return + fi + echo ${1} seconds + return + elif [ 60 -eq ${1} ]; then + echo 1 minute + return + elif [ 0 -eq ${seconds} ]; then + echo ${minutes} minutes + return + fi + echo ${minutes}:`expr ${seconds} / 10``expr ${seconds} % 10` +} + +wait_for_screen_timeout=900 +[ "USAGE: wait_for_screen [-n] [TIMEOUT] + +-n - echo newline at exit +TIMEOUT - default `format_duration ${wait_for_screen_timeout}`" ] +wait_for_screen() { + exit_function=true + if [ X"-n" = X"${1}" ]; then + exit_function=echo + shift + fi + timeout=${wait_for_screen_timeout} + if [ ${#} -gt 0 ]; then + timeout=${1} + shift + fi + counter=0 + while true; do + [ 0 = ${counter} ] || + adb wait-for-device /dev/null 2>/dev/null + vals=`adb shell getprop /dev/null | + sed -n 's/[[]sys[.]\(boot_completed\|logbootcomplete\)[]]: [[]\([01]\)[]]$/\1=\2/p'` + [ 0 = ${counter} ] || + sleep 1 + if [ "${vals}" = "`echo boot_completed=1 ; echo logbootcomplete=1`" ]; then + break + fi + if [ "${vals}" = "`echo logbootcomplete=1 ; echo boot_completed=1`" ]; then + break + fi + counter=`expr ${counter} + 1` + if [ ${counter} -gt ${timeout} ]; then + ${exit_function} + echo "ERROR: wait_for_screen() timed out (`format_duration ${timeout}`)" >&2 + return 1 + fi + done + ${exit_function} +} + +[ "USAGE: EXPECT_EQ [message] + +Returns true if (regex) lval matches rval" ] +EXPECT_EQ() { + lval="${1}" + rval="${2}" + shift 2 + if ! ( echo X"${rval}" | grep '^X'"${lval}"'$' >/dev/null 2>/dev/null ); then + echo "ERROR: expected \"${lval}\" got \"${rval}\"" >&2 + if [ -n "${*}" ] ; then + echo " ${*}" >&2 + fi + return 1 + fi + if [ -n "${*}" ] ; then + if [ X"${lval}" != X"${rval}" ]; then + echo "INFO: ok \"${lval}\"(=\"${rval}\") ${*}" >&2 + else + echo "INFO: ok \"${lval}\" ${*}" >&2 + fi + fi + return 0 +} + +[ "USAGE: EXPECT_PROPERTY + +Returns true if current return (regex) value is true and the result matches" ] +EXPECT_PROPERTY() { + save_ret=${?} + property="${1}" + value="${2}" + shift 2 + val=`adb shell getprop ${property} 2>&1` + EXPECT_EQ "${value}" "${val}" for Android property ${property} || + save_ret=${?} + return ${save_ret} +} + +[ "USAGE: report_bootstat_logs ... + +if not prefixed with a minus (-), will become a series of expected +matches: + + bootstat: Canonical boot reason: + +If prefixed with a minus, will look for an exact match after +removing the minux prefix. All expected content is _dropped_ from the output +and in essence forms a known blacklist, unexpected content will show. + +Report any logs, minus a known blacklist, preserve the current exit status" ] +report_bootstat_logs() { + save_ret=${?} + match= + for i in ${*}; do + if [ X"${i}" != X"${i#-}" ] ; then + match="${match} +${i#-}" + else + match="${match} +bootstat: Canonical boot reason: ${i}" + fi + done + adb logcat -b all -d | + grep bootstat | + grep -v -F "bootstat: Service started: /system/bin/bootstat --record_boot_complete${match} +bootstat: Failed to read /data/misc/bootstat/post_decrypt_time_elapsed: No such file or directory +bootstat: Failed to parse boot time record: /data/misc/bootstat/post_decrypt_time_elapsed +bootstat: Service started: /system/bin/bootstat --record_boot_reason +bootstat: Service started: /system/bin/bootstat --record_time_since_factory_reset +bootstat: Service started: /system/bin/bootstat -l +bootstat: Battery level at shutdown 100% +bootstat: Battery level at startup 100% +init : Parsing file /system/etc/init/bootstat.rc... +init : processing action (post-fs-data) from (/system/etc/init/bootstat.rc +init : processing action (boot) from (/system/etc/init/bootstat.rc +init : processing action (ro.boot.bootreason=*) from (/system/etc/init/bootstat.rc +init : processing action (sys.boot_completed=1 && sys.logbootcomplete=1) from (/system/etc/init/bootstat.rc +init : Command 'exec - system log -- /system/bin/bootstat --record_boot_complete' action=sys.boot_completed=1 && sys.logbootcomplete=1 (/system/etc/init/bootstat.rc: +init : Command 'exec - system log -- /system/bin/bootstat --record_boot_reason' action=sys.boot_completed=1 && sys.logbootcomplete=1 (/system/etc/init/bootstat.rc: +init : Command 'exec - system log -- /system/bin/bootstat --record_time_since_factory_reset' action=sys.boot_completed=1 && sys.logbootcomplete=1 (/system/etc/init/bootstat.rc: + (/system/bin/bootstat --record_boot_complete)'... + (/system/bin/bootstat --record_boot_complete)' (pid${SPACE} + (/system/bin/bootstat --record_boot_reason)'... + (/system/bin/bootstat --record_boot_reason)' (pid${SPACE} + (/system/bin/bootstat --record_time_since_factory_reset)'... + (/system/bin/bootstat --record_time_since_factory_reset)' (pid${SPACE} + (/system/bin/bootstat -l)'... + (/system/bin/bootstat -l)' (pid " | + grep -v 'bootstat: Unknown boot reason: $' # Hikey Special + return ${save_ret} +} + +[ "USAGE: start_test [message] + +Record start of test, preserve exit status" ] +start_test() { + save_ret=${?} + START=`date +%s` + echo "${GREEN}[ RUN ]${NORMAL} ${TEST} ${*}" + return ${save_ret} +} + +[ "USAGE: end_test [message] + +Document duration and success of test, preserve exit status" ] +end_test() { + save_ret=${?} + END=`date +%s` + duration=`expr ${END} - ${START} 2>/dev/null` + [ 0 = ${duration} ] || + echo INFO: ${TEST} test duration `format_duration ${duration}` >&2 + if [ ${save_ret} = 0 ]; then + echo "${GREEN}[ OK ]${NORMAL} ${TEST} ${*}" + else + echo "${RED}[ FAILED ]${NORMAL} ${TEST} ${*}" + fi + return ${save_ret} +} + +[ "USAGE: wrap_test [message] + +All tests below are wrapped with this helper" ] +wrap_test() { + if [ -z "${1}" -o X"nothing" = X"${1}" ]; then + return + fi + TEST=${1} + shift + start_test ${1} + eval test_${TEST} + end_test ${2} +} + +[ "USAGE: validate_property + +Check property for CTS compliance with our expectations. Return a cleansed +string representing what is acceptable. + +NB: must roughly match heuristics in system/core/bootstat/bootstat.cpp" ] +validate_property() { + var=`adb shell getprop ${1} 2>&1` + var=`echo -n ${var} | + tr '[A-Z]' '[a-z]' | + tr ' \f\t\r\n' '_____'` + case ${var} in + watchdog) ;; + watchdog,?*) ;; + kernel_panic) ;; + kernel_panic,?*) ;; + recovery) ;; + recovery,?*) ;; + bootloader) ;; + bootloader,?*) ;; + cold) ;; + cold,?*) ;; + hard) ;; + hard,?*) ;; + warm) ;; + warm,?*) ;; + shutdown) ;; + shutdown,?*) ;; + reboot) ;; + reboot,?*) ;; + # Aliases + *wdog* | *watchdog* ) var="watchdog" ;; + *powerkey* ) var="cold,powerkey" ;; + *panic* | *kernel_panic*) var="kernel_panic" ;; + *thermal*) var="shutdown,thermal" ;; + *s3_wakeup*) var="warm,s3_wakeup" ;; + *hw_reset*) var="hard,hw_reset" ;; + *bootloader*) var="bootloader" ;; + ?*) var="reboot,${var}" ;; + *) var="reboot" ;; + esac + echo ${var} +} + +# +# Actual test frames +# + +[ "USAGE: test_properties + +properties test +- (wait until screen is up, boot has completed) +- adb shell getprop ro.boot.bootreason (bootloader reason) +- adb shell getprop persist.sys.boot.reason (last reason) +- adb shell getprop sys.boot.reason (system reason) +- NB: all should have a value that is compliant with our known set." ] +test_properties() { + wait_for_screen + retval=0 + check_set="ro.boot.bootreason persist.sys.boot.reason sys.boot.reason" + bootloader="" + # NB: this test could fail if performed _after_ optional_factory_reset test + # and will report + # ERROR: expected "reboot" got "" + # for Android property persist.sys.boot.reason + # following is mitigation for the persist.sys.boot.reason, skip it + if [ "reboot,factory_reset" = `validate_property ro.boot_bootreason` ]; then + check_set="ro.boot.bootreason sys.boot.reason" + bootloader="bootloader" + fi + for prop in ${check_set}; do + reason=`validate_property ${prop}` + EXPECT_PROPERTY ${prop} ${reason} || retval=${?} + done + # sys.boot.reason is last for a reason + report_bootstat_logs ${reason} ${bootloader} + return ${retval} +} + +[ "USAGE: test_ota + +ota test +- rm out/.kati_stamp-* out/build_date.txt out/build_number.txt +- rm out/target/product/*/*/*.prop +- rm -r out/target/product/*/obj/ETC/system_build_prop_intermediates +- m +- NB: ro.build.date.utc should update +- fastboot flashall +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report ota + +Decision to change the build itself rather than trick bootstat by +rummaging through its data files was made." ] +test_ota() { + echo "INFO: expected duration of ${TEST} test about 5 minutes or more" >&2 + echo " extended by build and flashing times" >&2 + if [ -z "${TARGET_PRODUCT}" -o \ + -z "${ANDROID_PRODUCT_OUT}" -o \ + -z "${ANDROID_BUILD_TOP}" -o \ + -z "${TARGET_BUILD_VARIANT}" ]; then + echo "ERROR: Missing envsetup.sh and lunch" >&2 + return 1 + fi + rm ${ANDROID_PRODUCT_OUT%/out/*}/out/.kati_stamp-* || + true + rm ${ANDROID_PRODUCT_OUT%/out/*}/out/build_date.txt || + true + rm ${ANDROID_PRODUCT_OUT%/out/*}/out/build_number.txt || + true + rm ${ANDROID_PRODUCT_OUT}/*/*.prop || + true + rm -r ${ANDROID_PRODUCT_OUT}/obj/ETC/system_build_prop_intermediates || + true + pushd ${ANDROID_BUILD_TOP} >&2 + make -j50 >&2 + if [ ${?} != 0 ]; then + popd >&2 + return 1 + fi + if ! inFastboot; then + adb reboot-bootloader >&2 + fi + fastboot flashall >&2 + popd >&2 + wait_for_screen + EXPECT_PROPERTY sys.boot.reason "\(reboot,ota\|bootloader\)" + EXPECT_PROPERTY persist.sys.boot.reason reboot,bootloader + report_bootstat_logs reboot,ota bootloader +} + +[ "USAGE: test_optional_ota + +fast and fake (touch build_date on device to make it different)" ] +test_optional_ota() { + echo "INFO: expected duration of ${TEST} test about 45 seconds" >&2 + adb shell su root touch /data/misc/bootstat/build_date >&2 + adb reboot ota + wait_for_screen + EXPECT_PROPERTY sys.boot.reason reboot,ota + EXPECT_PROPERTY persist.sys.boot.reason reboot,ota + report_bootstat_logs reboot,ota +} + +[ "USAGE: [TEST=] blind_reboot_test [] + +Simple tests helper +- adb reboot +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report , or overriden + +We interleave the simple reboot tests between the hard/complex ones +as a means of checking sanity and any persistent side effect of the +other tests." ] +blind_reboot_test() { + if [ -z "${1}" ]; then + set ${TEST} + fi + echo "INFO: expected duration of ${TEST} test roughly 45 seconds" >&2 + adb reboot ${TEST} + wait_for_screen + EXPECT_PROPERTY sys.boot.reason ${1} + EXPECT_PROPERTY persist.sys.boot.reason reboot,${TEST} + report_bootstat_logs ${1} +} + +[ "USAGE: test_cold + +cold test +- adb reboot cold +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report cold" ] +test_cold() { + blind_reboot_test +} + +[ "USAGE: test_factory_reset + +factory_reset test +- adb shell su root rm /data/misc/bootstat/build_date +- adb reboot +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report factory_reset + +Decision to rummage through bootstat data files was made as +a _real_ factory_reset is too destructive to the device." ] +test_factory_reset() { + echo "INFO: expected duration of ${TEST} test roughly 45 seconds" >&2 + adb shell su root rm /data/misc/bootstat/build_date >&2 + adb reboot >&2 + wait_for_screen + EXPECT_PROPERTY sys.boot.reason reboot,factory_reset + EXPECT_PROPERTY persist.sys.boot.reason reboot, + report_bootstat_logs reboot,factory_reset reboot, \ + "-bootstat: Failed to read /data/misc/bootstat/build_date: No such file or directory" \ + "-bootstat: Failed to parse boot time record: /data/misc/bootstat/build_date" +} + +[ "USAGE: test_optional_factory_reset + +factory_reset test +- adb reboot-bootloader +- fastboot format userdata +- fastboot reboot +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report factory_reset + +For realz, and disruptive" ] +test_optional_factory_reset() { + echo "INFO: expected duration of ${TEST} test roughly a minute" >&2 + if ! inFastboot; then + adb reboot-bootloader + fi + fastboot format userdata >&2 + fastboot reboot >&2 + wait_for_screen + EXPECT_PROPERTY sys.boot.reason reboot,factory_reset + EXPECT_PROPERTY persist.sys.boot.reason "" + report_bootstat_logs reboot,factory_reset bootloader \ + "-bootstat: Failed to read /data/misc/bootstat/last_boot_time_utc: No such file or directory" \ + "-bootstat: Failed to parse boot time record: /data/misc/bootstat/last_boot_time_utc" \ + "-bootstat: Failed to read /data/misc/bootstat/build_date: No such file or directory" \ + "-bootstat: Failed to parse boot time record: /data/misc/bootstat/build_date" \ + "-bootstat: Failed to read /data/misc/bootstat/factory_reset: No such file or directory" \ + "-bootstat: Failed to parse boot time record: /data/misc/bootstat/factory_reset" +} + +[ "USAGE: test_hard + +hard test: +- adb reboot hard +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report hard" ] +test_hard() { + blind_reboot_test +} + +[ "USAGE: test_battery + +battery test (trick): +- echo healthd: battery l=2 | adb shell su root tee /dev/kmsg ; adb reboot warm +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report reboot,battery, unless healthd managed to log + before reboot in above trick. + +- Bonus points (manual extras) +- Make sure the following is added to the /init.rc file in post-fs + section before logd is started: + + setprop logd.kernel false + + rm /sys/fs/pstore/console-ramoops + + rm /sys/fs/pstore/console-ramoops-0 + + write /dev/kmsg \"healthd: battery l=2 + +\" +- adb reboot fs +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report reboot,battery +- (replace set logd.kernel true to the above, and retry test)" ] +test_battery() { + echo "INFO: expected duration of ${TEST} test roughly two minutes" >&2 + echo healthd: battery l=2 | adb shell su root tee /dev/kmsg >/dev/null + adb reboot warm >&2 + wait_for_screen + adb shell su root \ + cat /proc/fs/pstore/console-ramoops \ + /proc/fs/pstore/console-ramoops-0 2>/dev/null | + grep 'healthd: battery l=' | + tail -1 | + grep 'healthd: battery l=2' >/dev/null || ( + if ! EXPECT_PROPERTY sys.boot.reason reboot,battery >/dev/null 2>/dev/null; then + # retry + echo healthd: battery l=2 | adb shell su root tee /dev/kmsg >/dev/null + adb reboot warm >&2 + wait_for_screen + fi + ) + + EXPECT_PROPERTY sys.boot.reason shutdown,battery + EXPECT_PROPERTY persist.sys.boot.reason reboot,warm + report_bootstat_logs shutdown,battery "-bootstat: Battery level at shutdown 2%" +} + +[ "USAGE: test_unknown + +unknown test +- adb reboot unknown +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report reboot,unknown +- NB: expect log \"... I bootstat: Unknown boot reason: reboot,unknown\"" ] +test_unknown() { + blind_reboot_test reboot,unknown +} + +[ "USAGE: test_kernel_panic + +kernel_panic test: +- echo c | adb shell su root tee /proc/sysrq-trigger +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report kernel_panic,sysrq" ] +test_kernel_panic() { + echo "INFO: expected duration of ${TEST} test > 2 minutes" >&2 + echo c | adb shell su root tee /proc/sysrq-trigger >/dev/null + wait_for_screen + EXPECT_PROPERTY sys.boot.reason kernel_panic,sysrq + EXPECT_PROPERTY persist.sys.boot.reason kernel_panic,sysrq + report_bootstat_logs kernel_panic,sysrq +} + +[ "USAGE: test_warm + +warm test +- adb reboot warm +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report warm" ] +test_warm() { + blind_reboot_test +} + +[ "USAGE: test_thermal_shutdown + +thermal shutdown test: +- adb shell setprop sys.powerctl shutdown,thermal +- (power up the device) +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report shutdown,thermal" ] +test_thermal_shutdown() { + echo "INFO: expected duration of ${TEST} test roughly a minute plus" >&2 + echo " power on request" >&2 + adb shell setprop sys.powerctl shutdown,thermal + sleep 5 + echo -n "WARNING: Please power device back up, waiting ... " >&2 + wait_for_screen -n >&2 + EXPECT_PROPERTY sys.boot.reason shutdown,thermal + EXPECT_PROPERTY persist.sys.boot.reason shutdown,thermal + report_bootstat_logs shutdown,thermal +} + +[ "USAGE: test_userrequested_shutdown + +userrequested shutdown test: +- adb shell setprop sys.powerctl shutdown,userrequested +- (power up the device) +- (wait until screen is up, boot has completed) +- adb shell getprop sys.boot.reason +- NB: should report shutdown,userrequested" ] +test_userrequested_shutdown() { + echo "INFO: expected duration of ${TEST} test roughly a minute plus" >&2 + echo " power on request" >&2 + adb shell setprop sys.powerctl shutdown,userrequested + sleep 5 + echo -n "WARNING: Please power device back up, waiting ... " >&2 + wait_for_screen -n >&2 + EXPECT_PROPERTY sys.boot.reason shutdown,userrequested + EXPECT_PROPERTY persist.sys.boot.reason shutdown,userrequested + report_bootstat_logs shutdown,userrequested +} + +[ "USAGE: ${0##*/} [-s SERIAL] [tests] + +Mainline executive to run the above tests" ] + +# Rudimentary argument parsing + +if [ ${#} -ge 2 -a X"-s" = X"${1}" ]; then + export ANDROID_SERIAL="${2}" + shift 2 +fi + +if [ X"--help" = X"${1}" -o X"-h" = X"${1}" -o X"-?" = X"${1}" ]; then + echo "USAGE: ${0##*/} [-s SERIAL] [tests]" + echo tests - `sed -n 's/^test_\([^ ()]*\)() {/\1/p' $0 &2 + echo "${RED}[ FAILED ]${NORMAL}" + exit 1 + fi + echo "WARNING: no target device specified" >&2 +fi + +ret=0 + +# Test Series +if [ X"all" = X"${*}" ]; then + # automagically pick up all test_s. + eval set nothing `sed -n 's/^test_\([^ ()]*\)() {/\1/p' $0 , except test_optional_. + eval set nothing `sed -n 's/^test_\([^ ()]*\)() {/\1/p' $0 &2 +echo +failures= +successes= +for t in "${@}"; do + wrap_test ${t} + retval=${?} + if [ 0 = ${retval} ]; then + if [ -z "${successes}" ]; then + successes=${t} + else + successes="${successes} ${t}" + fi + else + ret=${retval} + if [ -z "${failures}" ]; then + failures=${t} + else + failures="${failures} ${t}" + fi + fi + echo +done + +if [ -n "${successes}" ]; then + echo "${GREEN}[ PASSED ]${NORMAL} ${successes}" +fi +if [ -n "${failures}" ]; then + echo "${RED}[ FAILED ]${NORMAL} ${failures}" +fi +exit ${ret} diff --git a/bootstat/bootstat.cpp b/bootstat/bootstat.cpp index bd611f0b9..418c71165 100644 --- a/bootstat/bootstat.cpp +++ b/bootstat/bootstat.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -32,11 +33,14 @@ #include #include +#include #include #include #include #include +#include #include +#include #include #include "boot_event_record_store.h" @@ -118,6 +122,14 @@ std::string GetProperty(const char* key) { return std::string(&temp[0], len); } +void SetProperty(const char* key, const std::string& val) { + property_set(key, val.c_str()); +} + +void SetProperty(const char* key, const char* val) { + property_set(key, val); +} + constexpr int32_t kUnknownBootReason = 1; // A mapping from boot reason string, as read from the ro.boot.bootreason @@ -175,7 +187,21 @@ const std::map kBootReasonMap = { {"reboot,cold", 48}, {"reboot,recovery", 49}, {"thermal_shutdown", 50}, - {"s3_wakeup", 51} + {"s3_wakeup", 51}, + {"kernel_panic,sysrq", 52}, + {"kernel_panic,NULL", 53}, + {"kernel_panic,BUG", 54}, + {"bootloader", 55}, + {"cold", 56}, + {"hard", 57}, + {"warm", 58}, + {"recovery", 59}, + {"thermal-shutdown", 60}, + {"shutdown,thermal", 61}, + {"shutdown,battery", 62}, + {"reboot,ota", 63}, + {"reboot,factory_reset", 64}, + {"reboot,", 65}, }; // Converts a string value representing the reason the system booted to an @@ -191,6 +217,323 @@ int32_t BootReasonStrToEnum(const std::string& boot_reason) { return kUnknownBootReason; } +// Canonical list of supported primary reboot reasons. +const std::vector knownReasons = { + // kernel + "watchdog", + "kernel_panic", + // strong + "recovery", // Should not happen from ro.boot.bootreason + "bootloader", // Should not happen from ro.boot.bootreason + // blunt + "cold", + "hard", + "warm", + "shutdown", // Can not happen from ro.boot.bootreason + "reboot", // Default catch-all for anything unknown +}; + +// Returns true if the supplied reason prefix is considered detailed enough. +bool isStrongRebootReason(const std::string& r) { + for (auto &s : knownReasons) { + if (s == "cold") break; + // Prefix defined as terminated by a nul or comma (,). + if (android::base::StartsWith(r, s.c_str()) && + ((r.length() == s.length()) || (r[s.length()] == ','))) { + return true; + } + } + return false; +} + +// Returns true if the supplied reason prefix is associated with the kernel. +bool isKernelRebootReason(const std::string& r) { + for (auto &s : knownReasons) { + if (s == "recovery") break; + // Prefix defined as terminated by a nul or comma (,). + if (android::base::StartsWith(r, s.c_str()) && + ((r.length() == s.length()) || (r[s.length()] == ','))) { + return true; + } + } + return false; +} + +// Returns true if the supplied reason prefix is considered known. +bool isKnownRebootReason(const std::string& r) { + for (auto &s : knownReasons) { + // Prefix defined as terminated by a nul or comma (,). + if (android::base::StartsWith(r, s.c_str()) && + ((r.length() == s.length()) || (r[s.length()] == ','))) { + return true; + } + } + return false; +} + +// If the reboot reason should be improved, report true if is too blunt. +bool isBluntRebootReason(const std::string& r) { + if (isStrongRebootReason(r)) return false; + + if (!isKnownRebootReason(r)) return true; // Can not support unknown as detail + + size_t pos = 0; + while ((pos = r.find(',', pos)) != std::string::npos) { + ++pos; + std::string next(r.substr(pos)); + if (next.length() == 0) break; + if (next[0] == ',') continue; + if (!isKnownRebootReason(next)) return false; // Unknown subreason is good. + if (isStrongRebootReason(next)) return false; // eg: reboot,reboot + } + return true; +} + +// std::transform Helper callback functions: +// Converts a string value representing the reason the system booted to a +// string complying with Android system standard reason. +char tounderline(char c) { return ::isblank(c) ? '_' : c; } +char toprintable(char c) { return ::isprint(c) ? c : '?'; } + +const char system_reboot_reason_property[] = "sys.boot.reason"; +const char last_reboot_reason_property[] = LAST_REBOOT_REASON_PROPERTY; +const char bootloader_reboot_reason_property[] = "ro.boot.bootreason"; + +// Scrub, Sanitize, Standardize and Enhance the boot reason string supplied. +std::string BootReasonStrToReason(const std::string& boot_reason) { + std::string ret(GetProperty(system_reboot_reason_property)); + std::string reason(boot_reason); + // If sys.boot.reason == ro.boot.bootreason, let's re-evaluate + if (reason == ret) ret = ""; + + // Cleanup boot_reason regarding acceptable character set + std::transform(reason.begin(), reason.end(), reason.begin(), ::tolower); + std::transform(reason.begin(), reason.end(), reason.begin(), tounderline); + std::transform(reason.begin(), reason.end(), reason.begin(), toprintable); + + // Is the current system boot reason sys.boot.reason valid? + if (!isKnownRebootReason(ret)) ret = ""; + + if (ret == "") { + // Is the bootloader boot reason ro.boot.bootreason known? + std::vector words(android::base::Split(reason, ",_-")); + for (auto &s : knownReasons) { + std::string blunt; + for (auto &r : words) { + if (r == s) { + if (isBluntRebootReason(s)) { + blunt = s; + } else { + ret = s; + break; + } + } + } + if (ret == "") ret = blunt; + if (ret != "") break; + } + } + + if (ret == "") { + // A series of checks to take some officially unsupported reasons + // reported by the bootloader and find some logical and canonical + // sense. In an ideal world, we would require those bootloaders + // to behave and follow our standards. + static const std::vector> aliasReasons = { + {"watchdog", "wdog"}, + {"cold,powerkey", "powerkey"}, + {"kernel_panic", "panic"}, + {"shutdown,thermal", "thermal"}, + {"warm,s3_wakeup", "s3_wakeup"}, + {"hard,hw_reset", "hw_reset"}, + {"bootloader", ""}, + }; + + // Either the primary or alias is found _somewhere_ in the reason string. + for (auto &s : aliasReasons) { + if (reason.find(s.first) != std::string::npos) { + ret = s.first; + break; + } + if (s.second.size() && (reason.find(s.second) != std::string::npos)) { + ret = s.first; + break; + } + } + } + + // If watchdog is the reason, see if there is a security angle? + if (ret == "watchdog") { + if (reason.find("sec") != std::string::npos) { + ret += ",security"; + } + } + + // Check the other reason resources if the reason is still blunt. + if (isBluntRebootReason(ret)) { + // Check to see if last klog has some refinement hints. + std::string content; + if (!android::base::ReadFileToString("/sys/fs/pstore/console-ramoops-0", + &content)) { + android::base::ReadFileToString("/sys/fs/pstore/console-ramoops", + &content); + } + + // The toybox reboot command used directly (unlikely)? But also + // catches init's response to the Android's more controlled reboot command. + if (content.rfind("reboot: Power down") != std::string::npos) { + ret = "shutdown"; // Still too blunt, but more accurate. + // ToDo: init should record the shutdown reason to kernel messages ala: + // init: shutdown system with command 'last_reboot_reason' + // so that if pstore has persistence we can get some details + // that could be missing in last_reboot_reason_property. + } + + static const char cmd[] = "reboot: Restarting system with command '"; + size_t pos = content.rfind(cmd); + if (pos != std::string::npos) { + pos += strlen(cmd); + std::string subReason(content.substr(pos)); + pos = subReason.find('\''); + if (pos != std::string::npos) subReason.erase(pos); + if (subReason != "") { // Will not land "reboot" as that is too blunt. + if (isKernelRebootReason(subReason)) { + ret = "reboot," + subReason; // User space can't talk kernel reasons. + } else { + ret = subReason; + } + } + } + + // Check for kernel panics, but allowed to override reboot command. + if (content.rfind("sysrq: SysRq : Trigger a crash") != std::string::npos) { + // Can not happen, except on userdebug, during testing/debugging. + ret = "kernel_panic,sysrq"; + } else if (content.rfind( + "Unable to handle kernel NULL pointer dereference at virtual address") + != std::string::npos) { + ret = "kernel_panic,NULL"; + } else if (content.rfind("Kernel BUG at ") != std::string::npos) { + ret = "kernel_panic,BUG"; + } else if ((content.rfind("Power held for ") != std::string::npos) || + (content.rfind("charger: [") != std::string::npos)) { + ret = "cold"; + } + + // The following battery test should migrate to a default system health HAL + + // Let us not worry if the reboot command was issued, for the cases of + // reboot -p, reboot , reboot cold, reboot warm and reboot hard. + // Same for bootloader and ro.boot.bootreasons of this set, but a dead + // battery could conceivably lead to these, so worthy of override. + if (isBluntRebootReason(ret)) { + // Heuristic to determine if shutdown possibly because of a dead battery? + // Really a hail-mary pass to find it in last klog content ... + static const int battery_dead_threshold = 2; // percent + static const char battery[] = "healthd: battery l="; + pos = content.rfind(battery); // last one + if (pos != std::string::npos) { + int level = atoi(content.substr(pos + strlen(battery)).c_str()); + LOG(INFO) << "Battery level at shutdown " << level << "%"; + if (level <= battery_dead_threshold) { + ret = "shutdown,battery"; + } + } else { // Most likely + // Content buffer no longer will have console data. Beware if more + // checks added below, that depend on parsing console content. + content = ""; + + LOG(DEBUG) << "Can not find last low battery in last console messages"; + android_logcat_context ctx = create_android_logcat(); + FILE *fp = android_logcat_popen(&ctx, "logcat -b kernel -v brief -d"); + if (fp != nullptr) { + android::base::ReadFdToString(fileno(fp), &content); + } + android_logcat_pclose(&ctx, fp); + android_logcat_destroy(&ctx); + static const char logcat_battery[] = "W/healthd ( 0): battery l="; + const char* match = logcat_battery; + + if (content == "") { + // Service logd.klog not running, go to smaller buffer in the kernel. + int rc = klogctl(KLOG_SIZE_BUFFER, nullptr, 0); + if (rc > 0) { + ssize_t len = rc + 1024; // 1K Margin should it grow between calls. + std::unique_ptr buf(new char[len]); + rc = klogctl(KLOG_READ_ALL, buf.get(), len); + if (rc < len) { + len = rc + 1; + } + buf[--len] = '\0'; + content = buf.get(); + } + match = battery; + } + + pos = content.find(match); // The first one it finds. + if (pos != std::string::npos) { + pos += strlen(match); + int level = atoi(content.substr(pos).c_str()); + LOG(INFO) << "Battery level at startup " << level << "%"; + if (level <= battery_dead_threshold) { + ret = "shutdown,battery"; + } + } else { + LOG(DEBUG) << "Can not find first battery level in dmesg or logcat"; + } + } + } + + // Is there a controlled shutdown hint in last_reboot_reason_property? + if (isBluntRebootReason(ret)) { + // Content buffer no longer will have console data. Beware if more + // checks added below, that depend on parsing console content. + content = GetProperty(last_reboot_reason_property); + + // String is either "reboot," or "shutdown,". + // We will set if default reasons, only override with detail if thermal. + if (!isBluntRebootReason(content)) { + // Ok, we want it, let's squash it if secondReason is known. + pos = content.find(','); + if (pos != std::string::npos) { + ++pos; + std::string secondReason(content.substr(pos)); + ret = isKnownRebootReason(secondReason) ? secondReason : content; + } else { + ret = content; + } + } + } + + // Other System Health HAL reasons? + + // ToDo: /proc/sys/kernel/boot_reason needs a HAL interface to + // possibly offer hardware-specific clues from the PMIC. + } + + // If unknown left over from above, make it "reboot," + if (ret == "") { + ret = "reboot"; + if (android::base::StartsWith(reason, "reboot")) { + reason = reason.substr(strlen("reboot")); + while (reason[0] == ',') { + reason = reason.substr(1); + } + } + if (reason != "") { + ret += ","; + ret += reason; + } + } + + LOG(INFO) << "Canonical boot reason: " << ret; + if (isKernelRebootReason(ret) && (GetProperty(last_reboot_reason_property) != "")) { + // Rewrite as it must be old news, kernel reasons trump user space. + SetProperty(last_reboot_reason_property, ret); + } + return ret; +} + // Returns the appropriate metric key prefix for the boot_complete metric such // that boot metrics after a system update are labeled as ota_boot_complete; // otherwise, they are labeled as boot_complete. This method encapsulates the @@ -212,9 +555,19 @@ std::string CalculateBootCompletePrefix() { if (!boot_event_store.GetBootEvent(kBuildDateKey, &record)) { boot_complete_prefix = "factory_reset_" + boot_complete_prefix; boot_event_store.AddBootEventWithValue(kBuildDateKey, build_date); + LOG(INFO) << "Canonical boot reason: " << "reboot,factory_reset"; + SetProperty(system_reboot_reason_property, "reboot,factory_reset"); + if (GetProperty(bootloader_reboot_reason_property) == "") { + SetProperty(bootloader_reboot_reason_property, "reboot,factory_reset"); + } } else if (build_date != record.second) { boot_complete_prefix = "ota_" + boot_complete_prefix; boot_event_store.AddBootEventWithValue(kBuildDateKey, build_date); + LOG(INFO) << "Canonical boot reason: " << "reboot,ota"; + SetProperty(system_reboot_reason_property, "reboot,ota"); + if (GetProperty(bootloader_reboot_reason_property) == "") { + SetProperty(bootloader_reboot_reason_property, "reboot,ota"); + } } return boot_complete_prefix; @@ -358,9 +711,23 @@ void RecordBootComplete() { // Records the boot_reason metric by querying the ro.boot.bootreason system // property. void RecordBootReason() { - int32_t boot_reason = BootReasonStrToEnum(GetProperty("ro.boot.bootreason")); + const std::string reason(GetProperty(bootloader_reboot_reason_property)); + + // Log the raw bootloader_boot_reason property value. + int32_t boot_reason = BootReasonStrToEnum(reason); BootEventRecordStore boot_event_store; boot_event_store.AddBootEventWithValue("boot_reason", boot_reason); + + // Log the scrubbed system_boot_reason. + const std::string system_reason(BootReasonStrToReason(reason)); + int32_t system_boot_reason = BootReasonStrToEnum(system_reason); + boot_event_store.AddBootEventWithValue("system_boot_reason", system_boot_reason); + + // Record the scrubbed system_boot_reason to the property + SetProperty(system_reboot_reason_property, system_reason); + if (reason == "") { + SetProperty(bootloader_reboot_reason_property, system_reason); + } } // Records two metrics related to the user resetting a device: the time at diff --git a/bootstat/bootstat.rc b/bootstat/bootstat.rc index d697efba0..410b85483 100644 --- a/bootstat/bootstat.rc +++ b/bootstat/bootstat.rc @@ -1,5 +1,9 @@ # This file is the LOCAL_INIT_RC file for the bootstat command. +# mirror bootloader boot reason to system boot reason +on property:ro.boot.bootreason=* + setprop sys.boot.reason ${ro.boot.bootreason} + on post-fs-data mkdir /data/misc/bootstat 0700 system log # To deal with ota transition resulting from a change in DAC from diff --git a/libcutils/fs_config.cpp b/libcutils/fs_config.cpp index a1dbd7881..118504051 100644 --- a/libcutils/fs_config.cpp +++ b/libcutils/fs_config.cpp @@ -175,6 +175,8 @@ static const struct fs_path_config android_files[] = { CAP_MASK_LONG(CAP_AUDIT_CONTROL) | CAP_MASK_LONG(CAP_SETGID), "system/bin/logd" }, + { 00550, AID_SYSTEM, AID_LOG, CAP_MASK_LONG(CAP_SYSLOG), + "system/bin/bootstat" }, { 00750, AID_ROOT, AID_SHELL, CAP_MASK_LONG(CAP_SETUID) | CAP_MASK_LONG(CAP_SETGID), "system/bin/run-as" },