init: Reboot after timeout passes during reboot

There is currently a timeout for reboot, however if the system gets
stuck, particularly during file system operations, there is no safety
mechanism that guarantees the system will still reboot.

This change does all of the optional reboot steps in a separate thread
and waits for this thread with a timeout, such that if the reboot
steps get hung, the system is guaranteed to still reboot.

This is specific to 'reboot'.  Shutdown continues to run unbounded to
run fsck.

Bug: 72781711
Test: Reboot devices hitting and not hitting this timeout
Change-Id: Id5e1b3693bab00602177e28b9b662e1499c32961
This commit is contained in:
Tom Cherry 2018-02-15 14:26:58 -08:00
parent cdf778f5d9
commit c9fec9d2be
1 changed files with 77 additions and 36 deletions

View File

@ -20,6 +20,7 @@
#include <fcntl.h>
#include <linux/fs.h>
#include <mntent.h>
#include <semaphore.h>
#include <sys/capability.h>
#include <sys/cdefs.h>
#include <sys/ioctl.h>
@ -89,12 +90,13 @@ class MountEntry {
mnt_opts_(entry.mnt_opts) {}
bool Umount(bool force) {
LOG(INFO) << "Unmounting " << mnt_fsname_ << ":" << mnt_dir_ << " opts " << mnt_opts_;
int r = umount2(mnt_dir_.c_str(), force ? MNT_FORCE : 0);
if (r == 0) {
LOG(INFO) << "umounted " << mnt_fsname_ << ":" << mnt_dir_ << " opts " << mnt_opts_;
LOG(INFO) << "Umounted " << mnt_fsname_ << ":" << mnt_dir_ << " opts " << mnt_opts_;
return true;
} else {
PLOG(WARNING) << "cannot umount " << mnt_fsname_ << ":" << mnt_dir_ << " opts "
PLOG(WARNING) << "Cannot umount " << mnt_fsname_ << ":" << mnt_dir_ << " opts "
<< mnt_opts_;
return false;
}
@ -328,39 +330,9 @@ static UmountStat TryUmountAndFsck(bool runFsck, std::chrono::milliseconds timeo
return stat;
}
void DoReboot(unsigned int cmd, const std::string& reason, const std::string& rebootTarget,
bool runFsck) {
void RebootThread(unsigned int cmd, std::chrono::milliseconds shutdown_timeout, bool runFsck,
sem_t* reboot_semaphore) {
Timer t;
LOG(INFO) << "Reboot start, reason: " << reason << ", rebootTarget: " << rebootTarget;
// Ensure last reboot reason is reduced to canonical
// alias reported in bootloader or system boot reason.
size_t skip = 0;
std::vector<std::string> reasons = Split(reason, ",");
if (reasons.size() >= 2 && reasons[0] == "reboot" &&
(reasons[1] == "recovery" || reasons[1] == "bootloader" || reasons[1] == "cold" ||
reasons[1] == "hard" || reasons[1] == "warm")) {
skip = strlen("reboot,");
}
property_set(LAST_REBOOT_REASON_PROPERTY, reason.c_str() + skip);
sync();
bool is_thermal_shutdown = cmd == ANDROID_RB_THERMOFF;
auto shutdown_timeout = 0ms;
if (!SHUTDOWN_ZERO_TIMEOUT) {
if (is_thermal_shutdown) {
constexpr unsigned int thermal_shutdown_timeout = 1;
shutdown_timeout = std::chrono::seconds(thermal_shutdown_timeout);
} else {
constexpr unsigned int shutdown_timeout_default = 6;
auto shutdown_timeout_property = android::base::GetUintProperty(
"ro.build.shutdown_timeout", shutdown_timeout_default);
shutdown_timeout = std::chrono::seconds(shutdown_timeout_property);
}
}
LOG(INFO) << "Shutdown timeout: " << shutdown_timeout.count() << " ms";
// keep debugging tools until non critical ones are all gone.
const std::set<std::string> kill_after_apps{"tombstoned", "logd", "adbd"};
// watchdogd is a vendor specific component but should be alive to complete shutdown safely.
@ -384,7 +356,7 @@ void DoReboot(unsigned int cmd, const std::string& reason, const std::string& re
}
// remaining operations (specifically fsck) may take a substantial duration
if (cmd == ANDROID_RB_POWEROFF || is_thermal_shutdown) {
if (cmd == ANDROID_RB_POWEROFF || cmd == ANDROID_RB_THERMOFF) {
TurnOffBacklight();
}
@ -461,8 +433,77 @@ void DoReboot(unsigned int cmd, const std::string& reason, const std::string& re
UmountStat stat = TryUmountAndFsck(runFsck, shutdown_timeout - t.duration());
// Follow what linux shutdown is doing: one more sync with little bit delay
sync();
if (!is_thermal_shutdown) std::this_thread::sleep_for(100ms);
if (cmd != ANDROID_RB_THERMOFF) std::this_thread::sleep_for(100ms);
LogShutdownTime(stat, &t);
if (reboot_semaphore != nullptr) {
sem_post(reboot_semaphore);
}
}
void RunRebootThread(unsigned int cmd, std::chrono::milliseconds shutdown_timeout) {
sem_t reboot_semaphore;
timespec shutdown_timeout_timespec;
if (sem_init(&reboot_semaphore, false, 0) == -1 ||
clock_gettime(CLOCK_REALTIME, &shutdown_timeout_timespec) == -1) {
// These should never fail, but if they do, skip the graceful reboot and reboot immediately.
return;
}
std::thread reboot_thread(&RebootThread, cmd, shutdown_timeout, false, &reboot_semaphore);
reboot_thread.detach();
// One extra second than the timeout passed to the thread as there is a final Umount pass
// after the timeout is reached.
shutdown_timeout_timespec.tv_sec += 1 + shutdown_timeout.count() / 1000;
int sem_return = 0;
while ((sem_return = sem_timedwait(&reboot_semaphore, &shutdown_timeout_timespec)) == -1 &&
errno == EINTR) {
}
if (sem_return == -1) {
LOG(ERROR) << "Reboot thread timed out";
}
}
void DoReboot(unsigned int cmd, const std::string& reason, const std::string& rebootTarget,
bool runFsck) {
LOG(INFO) << "Reboot start, reason: " << reason << ", rebootTarget: " << rebootTarget;
// Ensure last reboot reason is reduced to canonical
// alias reported in bootloader or system boot reason.
size_t skip = 0;
std::vector<std::string> reasons = Split(reason, ",");
if (reasons.size() >= 2 && reasons[0] == "reboot" &&
(reasons[1] == "recovery" || reasons[1] == "bootloader" || reasons[1] == "cold" ||
reasons[1] == "hard" || reasons[1] == "warm")) {
skip = strlen("reboot,");
}
property_set(LAST_REBOOT_REASON_PROPERTY, reason.c_str() + skip);
sync();
auto shutdown_timeout = 0ms;
if (!SHUTDOWN_ZERO_TIMEOUT) {
if (cmd == ANDROID_RB_THERMOFF) {
constexpr auto kThermalShutdownTimeout = 1s;
shutdown_timeout = kThermalShutdownTimeout;
} else {
constexpr unsigned int kShutdownTimeoutDefault = 6;
auto shutdown_timeout_property = android::base::GetUintProperty(
"ro.build.shutdown_timeout", kShutdownTimeoutDefault);
shutdown_timeout = std::chrono::seconds(shutdown_timeout_property);
}
}
LOG(INFO) << "Shutdown timeout: " << shutdown_timeout.count() << " ms";
if (runFsck) {
RebootThread(cmd, shutdown_timeout, true, nullptr);
} else {
RunRebootThread(cmd, shutdown_timeout);
}
// Reboot regardless of umount status. If umount fails, fsck after reboot will fix it.
RebootSystem(cmd, rebootTarget);
abort();