poll umount completion from /proc/mounts

- umount operation is asynchronous except for root partition.
  Returning from umount does not guarantee completion of
  umount. Poll /proc/mounts to confirm completion of umount.
- Treat all devices mounting to /data as emulated devices. This is
  future proof when fs other than sdcardfs is used.
- Drop quota sync from sync step. There is no differences in
  frequencies of quota error.
- Run umount in reverse order from mounting order so that any
  hidden dependency can be auto-resolved.
- Add dump of lsof and /proc/mounts when umount fails. lsof only runs
  when selinux is toggled into permissive mode. The dump is enabled
  only for non-user build.
- Keep logcat until vold shutdown in case vold has any error to report.

bug: 36551218
Test: python packages/services/Car/tools/bootanalyze/bootanalyze.py -r -c packages/services/Car/tools/bootanalyze/config.yaml -n 1000 -f -e 20 -w 30

Change-Id: I87b17b966d7004c205452d81460b02c6acf50d45
This commit is contained in:
Keun-young Park 2017-03-29 12:54:40 -07:00
parent 72ca48e5cb
commit 2ba5c8103d
2 changed files with 124 additions and 145 deletions

View File

@ -9,13 +9,15 @@ init_options += \
-DALLOW_LOCAL_PROP_OVERRIDE=1 \
-DALLOW_PERMISSIVE_SELINUX=1 \
-DREBOOT_BOOTLOADER_ON_PANIC=1 \
-DWORLD_WRITABLE_KMSG=1
-DWORLD_WRITABLE_KMSG=1 \
-DDUMP_ON_UMOUNT_FAILURE=1
else
init_options += \
-DALLOW_LOCAL_PROP_OVERRIDE=0 \
-DALLOW_PERMISSIVE_SELINUX=0 \
-DREBOOT_BOOTLOADER_ON_PANIC=0 \
-DWORLD_WRITABLE_KMSG=0
-DWORLD_WRITABLE_KMSG=0 \
-DDUMP_ON_UMOUNT_FAILURE=0
endif
ifneq (,$(filter eng,$(TARGET_BUILD_VARIANT)))

View File

@ -18,10 +18,12 @@
#include <dirent.h>
#include <fcntl.h>
#include <linux/fs.h>
#include <mntent.h>
#include <selinux/selinux.h>
#include <sys/cdefs.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/quota.h>
#include <sys/reboot.h>
#include <sys/stat.h>
#include <sys/syscall.h>
@ -39,6 +41,7 @@
#include <android-base/properties.h>
#include <android-base/stringprintf.h>
#include <android-base/strings.h>
#include <android-base/unique_fd.h>
#include <bootloader_message/bootloader_message.h>
#include <cutils/android_reboot.h>
#include <fs_mgr.h>
@ -67,39 +70,58 @@ enum UmountStat {
// Utility for struct mntent
class MountEntry {
public:
explicit MountEntry(const mntent& entry, bool isMounted = true)
explicit MountEntry(const mntent& entry)
: mnt_fsname_(entry.mnt_fsname),
mnt_dir_(entry.mnt_dir),
mnt_type_(entry.mnt_type),
is_mounted_(isMounted) {}
mnt_opts_(entry.mnt_opts) {}
bool IsF2Fs() const { return mnt_type_ == "f2fs"; }
bool Umount() {
int r = umount2(mnt_dir_.c_str(), 0);
if (r == 0) {
LOG(INFO) << "umounted " << mnt_fsname_ << ":" << mnt_dir_ << " opts " << mnt_opts_;
return true;
} else {
PLOG(WARNING) << "cannot umount " << mnt_fsname_ << ":" << mnt_dir_ << " opts "
<< mnt_opts_;
return false;
}
}
bool IsExt4() const { return mnt_type_ == "ext4"; }
bool is_mounted() const { return is_mounted_; }
void set_is_mounted() { is_mounted_ = false; }
const std::string& mnt_fsname() const { return mnt_fsname_; }
const std::string& mnt_dir() const { return mnt_dir_; }
void DoFsck() {
int st;
if (IsF2Fs()) {
const char* f2fs_argv[] = {
"/system/bin/fsck.f2fs", "-f", mnt_fsname_.c_str(),
};
android_fork_execvp_ext(arraysize(f2fs_argv), (char**)f2fs_argv, &st, true, LOG_KLOG,
true, nullptr, nullptr, 0);
} else if (IsExt4()) {
const char* ext4_argv[] = {
"/system/bin/e2fsck", "-f", "-y", mnt_fsname_.c_str(),
};
android_fork_execvp_ext(arraysize(ext4_argv), (char**)ext4_argv, &st, true, LOG_KLOG,
true, nullptr, nullptr, 0);
}
}
static bool IsBlockDevice(const struct mntent& mntent) {
return android::base::StartsWith(mntent.mnt_fsname, "/dev/block");
}
static bool IsEmulatedDevice(const struct mntent& mntent) {
static const std::string SDCARDFS_NAME = "sdcardfs";
return android::base::StartsWith(mntent.mnt_fsname, "/data/") &&
SDCARDFS_NAME == mntent.mnt_type;
return android::base::StartsWith(mntent.mnt_fsname, "/data/");
}
private:
bool IsF2Fs() const { return mnt_type_ == "f2fs"; }
bool IsExt4() const { return mnt_type_ == "ext4"; }
std::string mnt_fsname_;
std::string mnt_dir_;
std::string mnt_type_;
bool is_mounted_;
std::string mnt_opts_;
};
// Turn off backlight while we are performing power down cleanup activities.
@ -125,50 +147,6 @@ static void TurnOffBacklight() {
}
}
static void DoFsck(const MountEntry& entry) {
static constexpr int UNMOUNT_CHECK_TIMES = 10;
if (!entry.IsF2Fs() && !entry.IsExt4()) return;
int count = 0;
while (count++ < UNMOUNT_CHECK_TIMES) {
int fd = TEMP_FAILURE_RETRY(open(entry.mnt_fsname().c_str(), O_RDONLY | O_EXCL));
if (fd >= 0) {
/* |entry->mnt_dir| has sucessfully been unmounted. */
close(fd);
break;
} else if (errno == EBUSY) {
// Some processes using |entry->mnt_dir| are still alive. Wait for a
// while then retry.
std::this_thread::sleep_for(5000ms / UNMOUNT_CHECK_TIMES);
continue;
} else {
/* Cannot open the device. Give up. */
return;
}
}
// NB: With watchdog still running, there is no cap on the time it takes
// to complete the fsck, from the users perspective the device graphics
// and responses are locked-up and they may choose to hold the power
// button in frustration if it drags out.
int st;
if (entry.IsF2Fs()) {
const char* f2fs_argv[] = {
"/system/bin/fsck.f2fs", "-f", entry.mnt_fsname().c_str(),
};
android_fork_execvp_ext(arraysize(f2fs_argv), (char**)f2fs_argv, &st, true, LOG_KLOG, true,
nullptr, nullptr, 0);
} else if (entry.IsExt4()) {
const char* ext4_argv[] = {
"/system/bin/e2fsck", "-f", "-y", entry.mnt_fsname().c_str(),
};
android_fork_execvp_ext(arraysize(ext4_argv), (char**)ext4_argv, &st, true, LOG_KLOG, true,
nullptr, nullptr, 0);
}
}
static void ShutdownVold() {
const char* vdc_argv[] = {"/system/bin/vdc", "volume", "shutdown"};
int status;
@ -202,21 +180,11 @@ RebootSystem(unsigned int cmd, const std::string& rebootTarget) {
abort();
}
static void DoSync() {
// quota sync is not done by sync call, so should be done separately.
// quota sync is in VFS level, so do it before sync, which goes down to fs level.
int r = quotactl(QCMD(Q_SYNC, 0), nullptr, 0 /* do not care */, 0 /* do not care */);
if (r < 0) {
PLOG(ERROR) << "quotactl failed";
}
sync();
}
/* Find all read+write block devices and emulated devices in /proc/mounts
* and add them to correpsponding list.
*/
static bool FindPartitionsToUmount(std::vector<MountEntry>* blockDevPartitions,
std::vector<MountEntry>* emulatedPartitions) {
std::vector<MountEntry>* emulatedPartitions, bool dump) {
std::unique_ptr<std::FILE, int (*)(std::FILE*)> fp(setmntent("/proc/mounts", "r"), endmntent);
if (fp == nullptr) {
PLOG(ERROR) << "Failed to open /proc/mounts";
@ -224,44 +192,63 @@ static bool FindPartitionsToUmount(std::vector<MountEntry>* blockDevPartitions,
}
mntent* mentry;
while ((mentry = getmntent(fp.get())) != nullptr) {
if (MountEntry::IsBlockDevice(*mentry) && hasmntopt(mentry, "rw")) {
blockDevPartitions->emplace_back(*mentry);
if (dump) {
LOG(INFO) << "mount entry " << mentry->mnt_fsname << ":" << mentry->mnt_dir << " opts "
<< mentry->mnt_opts << " type " << mentry->mnt_type;
} else if (MountEntry::IsBlockDevice(*mentry) && hasmntopt(mentry, "rw")) {
blockDevPartitions->emplace(blockDevPartitions->begin(), *mentry);
} else if (MountEntry::IsEmulatedDevice(*mentry)) {
emulatedPartitions->emplace_back(*mentry);
emulatedPartitions->emplace(emulatedPartitions->begin(), *mentry);
}
}
return true;
}
static bool UmountPartitions(std::vector<MountEntry>* partitions, int maxRetry, int flags) {
static constexpr int SLEEP_AFTER_RETRY_US = 100000;
bool umountDone;
int retryCounter = 0;
while (true) {
umountDone = true;
for (auto& entry : *partitions) {
if (entry.is_mounted()) {
int r = umount2(entry.mnt_dir().c_str(), flags);
if (r == 0) {
entry.set_is_mounted();
LOG(INFO) << StringPrintf("umounted %s, flags:0x%x", entry.mnt_fsname().c_str(),
flags);
} else {
umountDone = false;
PLOG(WARNING) << StringPrintf("cannot umount %s, mnt_dir %s, flags:0x%x",
entry.mnt_fsname().c_str(),
entry.mnt_dir().c_str(), flags);
}
}
}
if (umountDone) break;
retryCounter++;
if (retryCounter >= maxRetry) break;
usleep(SLEEP_AFTER_RETRY_US);
static void DumpUmountDebuggingInfo() {
int status;
if (!security_getenforce()) {
LOG(INFO) << "Run lsof";
const char* lsof_argv[] = {"/system/bin/lsof"};
android_fork_execvp_ext(arraysize(lsof_argv), (char**)lsof_argv, &status, true, LOG_KLOG,
true, nullptr, nullptr, 0);
}
return umountDone;
FindPartitionsToUmount(nullptr, nullptr, true);
}
static UmountStat UmountPartitions(int timeoutMs) {
Timer t;
UmountStat stat = UMOUNT_STAT_TIMEOUT;
int retry = 0;
/* data partition needs all pending writes to be completed and all emulated partitions
* umounted.If the current waiting is not good enough, give
* up and leave it to e2fsck after reboot to fix it.
*/
while (true) {
std::vector<MountEntry> block_devices;
std::vector<MountEntry> emulated_devices;
if (!FindPartitionsToUmount(&block_devices, &emulated_devices, false)) {
return UMOUNT_STAT_ERROR;
}
if (block_devices.size() == 0) {
stat = UMOUNT_STAT_SUCCESS;
break;
}
if ((timeoutMs < t.duration_ms()) && retry > 0) { // try umount at least once
stat = UMOUNT_STAT_TIMEOUT;
break;
}
if (emulated_devices.size() > 0 &&
std::all_of(emulated_devices.begin(), emulated_devices.end(),
[](auto& entry) { return entry.Umount(); })) {
sync();
}
for (auto& entry : block_devices) {
entry.Umount();
}
retry++;
std::this_thread::sleep_for(100ms);
}
return stat;
}
static void KillAllProcesses() { android::base::WriteStringToFile("i", "/proc/sysrq-trigger"); }
@ -277,56 +264,38 @@ static void KillAllProcesses() { android::base::WriteStringToFile("i", "/proc/sy
*/
static UmountStat TryUmountAndFsck(bool runFsck, int timeoutMs) {
Timer t;
std::vector<MountEntry> emulatedPartitions;
std::vector<MountEntry> blockDevRwPartitions;
std::vector<MountEntry> block_devices;
std::vector<MountEntry> emulated_devices;
TurnOffBacklight(); // this part can take time. save power.
if (!FindPartitionsToUmount(&blockDevRwPartitions, &emulatedPartitions)) {
if (runFsck && !FindPartitionsToUmount(&block_devices, &emulated_devices, false)) {
return UMOUNT_STAT_ERROR;
}
if (emulatedPartitions.size() > 0) {
LOG(WARNING) << "emulated partitions still exist, will umount";
/* Pending writes in emulated partitions can fail umount. After a few trials, detach
* it so that it can be umounted when all writes are done.
*/
if (!UmountPartitions(&emulatedPartitions, 1, 0)) {
UmountPartitions(&emulatedPartitions, 1, MNT_DETACH);
}
}
DoSync(); // emulated partition change can lead to update
UmountStat stat = UMOUNT_STAT_SUCCESS;
/* data partition needs all pending writes to be completed and all emulated partitions
* umounted. If umount failed in the above step, it DETACH is requested, so umount can
* still happen while waiting for /data. If the current waiting is not good enough, give
* up and leave it to e2fsck after reboot to fix it.
*/
int remainingTimeMs = timeoutMs - t.duration_ms();
// each retry takes 100ms, and run at least once.
int retry = std::max(remainingTimeMs / 100, 1);
if (!UmountPartitions(&blockDevRwPartitions, retry, 0)) {
/* Last resort, kill all and try again */
LOG(WARNING) << "umount still failing, trying kill all";
UmountStat stat = UmountPartitions(timeoutMs - t.duration_ms());
if (stat != UMOUNT_STAT_SUCCESS) {
LOG(INFO) << "umount timeout, last resort, kill all and try";
if (DUMP_ON_UMOUNT_FAILURE) DumpUmountDebuggingInfo();
KillAllProcesses();
DoSync();
if (!UmountPartitions(&blockDevRwPartitions, 1, 0)) {
stat = UMOUNT_STAT_TIMEOUT;
}
}
// fsck part is excluded from timeout check. It only runs for user initiated shutdown
// and should not affect reboot time.
if (stat == UMOUNT_STAT_SUCCESS && runFsck) {
for (auto& entry : blockDevRwPartitions) {
DoFsck(entry);
}
// even if it succeeds, still it is timeout and do not run fsck with all processes killed
UmountPartitions(0);
if (DUMP_ON_UMOUNT_FAILURE) DumpUmountDebuggingInfo();
}
if (stat == UMOUNT_STAT_SUCCESS && runFsck) {
// fsck part is excluded from timeout check. It only runs for user initiated shutdown
// and should not affect reboot time.
for (auto& entry : block_devices) {
entry.DoFsck();
}
}
return stat;
}
static void __attribute__((noreturn)) DoThermalOff() {
LOG(WARNING) << "Thermal system shutdown";
DoSync();
sync();
RebootSystem(ANDROID_RB_THERMOFF, "");
abort();
}
@ -426,8 +395,8 @@ void DoReboot(unsigned int cmd, const std::string& reason, const std::string& re
// minimum safety steps before restarting
// 2. kill all services except ones that are necessary for the shutdown sequence.
ServiceManager::GetInstance().ForEachService([&kill_after_apps](Service* s) {
if (!s->IsShutdownCritical() || kill_after_apps.count(s->name())) s->Stop();
ServiceManager::GetInstance().ForEachService([](Service* s) {
if (!s->IsShutdownCritical()) s->Stop();
});
ServiceManager::GetInstance().ReapAnyOutstandingChildren();
@ -435,12 +404,20 @@ void DoReboot(unsigned int cmd, const std::string& reason, const std::string& re
Service* voldService = ServiceManager::GetInstance().FindServiceByName("vold");
if (voldService != nullptr && voldService->IsRunning()) {
ShutdownVold();
voldService->Stop();
} else {
LOG(INFO) << "vold not running, skipping vold shutdown";
}
// logcat stopped here
ServiceManager::GetInstance().ForEachService([&kill_after_apps](Service* s) {
if (kill_after_apps.count(s->name())) s->Stop();
});
// 4. sync, try umount, and optionally run fsck for user shutdown
DoSync();
sync();
UmountStat stat = TryUmountAndFsck(runFsck, shutdownTimeout * 1000 - t.duration_ms());
// Follow what linux shutdown is doing: one more sync with little bit delay
sync();
std::this_thread::sleep_for(100ms);
LogShutdownTime(stat, &t);
// Reboot regardless of umount status. If umount fails, fsck after reboot will fix it.
RebootSystem(cmd, rebootTarget);