init start time tracking.

With this change, init sets a property "init.start" to show the
CLOCK_BOOTTIME time at which init itself started, and for each service
an "init.svc.<name>.start" property to show the CLOCK_BOOTTIME time at
which that service was most recently started.

These times can be used by tools like bootstat to track boot time.

As part of this change, move init over to std::chrono. Also, rather than
make the command-line argument handling more complex, I've switched to
using an environment variable for communication between first- and
second-stage init, and added another environment variable to pass the
start time of the first stage through to the second stage.

Bug: http://b/32780225
Test: manual
Change-Id: Ia65a623e1866ea688b9a5433d6507926ce301dfe
This commit is contained in:
Elliott Hughes 2016-11-10 17:43:47 -08:00
parent 8cbc89527c
commit 9605a945f7
8 changed files with 121 additions and 97 deletions

View File

@ -68,7 +68,7 @@
#define UNMOUNT_CHECK_MS 5000
#define UNMOUNT_CHECK_TIMES 10
static const int kTerminateServiceDelayMicroSeconds = 50000;
static constexpr std::chrono::nanoseconds kCommandRetryTimeout = 5s;
static int insmod(const char *filename, const char *options, int flags) {
int fd = open(filename, O_RDONLY | O_NOFOLLOW | O_CLOEXEC);
@ -444,7 +444,7 @@ static int do_mount(const std::vector<std::string>& args) {
return -1;
} else {
if (wait)
wait_for_file(source, COMMAND_RETRY_TIMEOUT);
wait_for_file(source, kCommandRetryTimeout);
if (mount(source, target, system, flags, options) < 0) {
return -1;
}
@ -740,7 +740,7 @@ static int do_powerctl(const std::vector<std::string>& args) {
}
// Wait a bit before recounting the number or running services.
usleep(kTerminateServiceDelayMicroSeconds);
usleep(50000 /*us*/);
}
LOG(VERBOSE) << "Terminating running services took " << t.duration() << " seconds";
}
@ -947,11 +947,11 @@ static int do_load_system_props(const std::vector<std::string>& args) {
static int do_wait(const std::vector<std::string>& args) {
if (args.size() == 2) {
return wait_for_file(args[1].c_str(), COMMAND_RETRY_TIMEOUT);
return wait_for_file(args[1].c_str(), kCommandRetryTimeout);
} else if (args.size() == 3) {
int timeout;
if (android::base::ParseInt(args[2], &timeout)) {
return wait_for_file(args[1].c_str(), timeout);
return wait_for_file(args[1].c_str(), std::chrono::seconds(timeout));
}
}
return -1;

View File

@ -18,6 +18,7 @@
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <libgen.h>
#include <paths.h>
#include <signal.h>
@ -67,6 +68,8 @@
#include "util.h"
#include "watchdogd.h"
using android::base::StringPrintf;
struct selabel_handle *sehandle;
struct selabel_handle *sehandle_prop;
@ -75,7 +78,7 @@ static int property_triggers_enabled = 0;
static char qemu[32];
std::string default_console = "/dev/console";
static time_t process_needs_restart;
static time_t process_needs_restart_at;
const char *ENV[32];
@ -132,11 +135,10 @@ void property_changed(const char *name, const char *value)
static void restart_processes()
{
process_needs_restart = 0;
ServiceManager::GetInstance().
ForEachServiceWithFlags(SVC_RESTARTING, [] (Service* s) {
s->RestartIfNeeded(process_needs_restart);
});
process_needs_restart_at = 0;
ServiceManager::GetInstance().ForEachServiceWithFlags(SVC_RESTARTING, [](Service* s) {
s->RestartIfNeeded(&process_needs_restart_at);
});
}
void handle_control_message(const std::string& msg, const std::string& name) {
@ -164,7 +166,7 @@ static int wait_for_coldboot_done_action(const std::vector<std::string>& args) {
// Any longer than 1s is an unreasonable length of time to delay booting.
// If you're hitting this timeout, check that you didn't make your
// sepolicy regular expressions too expensive (http://b/19899875).
if (wait_for_file(COLDBOOT_DONE, 1)) {
if (wait_for_file(COLDBOOT_DONE, 1s)) {
LOG(ERROR) << "Timed out waiting for " COLDBOOT_DONE;
}
@ -268,15 +270,14 @@ static void import_kernel_nv(const std::string& key, const std::string& value, b
if (for_emulator) {
// In the emulator, export any kernel option with the "ro.kernel." prefix.
property_set(android::base::StringPrintf("ro.kernel.%s", key.c_str()).c_str(), value.c_str());
property_set(StringPrintf("ro.kernel.%s", key.c_str()).c_str(), value.c_str());
return;
}
if (key == "qemu") {
strlcpy(qemu, value.c_str(), sizeof(qemu));
} else if (android::base::StartsWith(key, "androidboot.")) {
property_set(android::base::StringPrintf("ro.boot.%s", key.c_str() + 12).c_str(),
value.c_str());
property_set(StringPrintf("ro.boot.%s", key.c_str() + 12).c_str(), value.c_str());
}
}
@ -314,7 +315,7 @@ static void export_kernel_boot_props() {
static void process_kernel_dt() {
static const char android_dir[] = "/proc/device-tree/firmware/android";
std::string file_name = android::base::StringPrintf("%s/compatible", android_dir);
std::string file_name = StringPrintf("%s/compatible", android_dir);
std::string dt_file;
android::base::ReadFileToString(file_name, &dt_file);
@ -332,12 +333,12 @@ static void process_kernel_dt() {
continue;
}
file_name = android::base::StringPrintf("%s/%s", android_dir, dp->d_name);
file_name = StringPrintf("%s/%s", android_dir, dp->d_name);
android::base::ReadFileToString(file_name, &dt_file);
std::replace(dt_file.begin(), dt_file.end(), ',', '.');
std::string property_name = android::base::StringPrintf("ro.boot.%s", dp->d_name);
std::string property_name = StringPrintf("ro.boot.%s", dp->d_name);
property_set(property_name.c_str(), dt_file.c_str());
}
}
@ -566,12 +567,14 @@ int main(int argc, char** argv) {
return watchdogd_main(argc, argv);
}
boot_clock::time_point start_time = boot_clock::now();
// Clear the umask.
umask(0);
add_environment("PATH", _PATH_DEFPATH);
bool is_first_stage = (argc == 1) || (strcmp(argv[1], "--second-stage") != 0);
bool is_first_stage = (getenv("INIT_SECOND_STAGE") == nullptr);
// Don't expose the raw commandline to unprivileged processes.
chmod("/proc/cmdline", 0440);
@ -596,32 +599,34 @@ int main(int argc, char** argv) {
// talk to the outside world...
InitKernelLogging(argv);
if (is_first_stage) {
LOG(INFO) << "init first stage started!";
LOG(INFO) << "init " << (is_first_stage ? "first" : "second") << " stage started!";
if (is_first_stage) {
// Mount devices defined in android.early.* kernel commandline
early_mount();
// Set up SELinux, including loading the SELinux policy if we're in the kernel domain.
// Set up SELinux, loading the SELinux policy.
selinux_initialize(true);
// If we're in the kernel domain, re-exec init to transition to the init domain now
// We're in the kernel domain, so re-exec init to transition to the init domain now
// that the SELinux policy has been loaded.
if (restorecon("/init") == -1) {
PLOG(ERROR) << "restorecon failed";
security_failure();
}
setenv("INIT_SECOND_STAGE", "true", 1);
uint64_t start_ns = start_time.time_since_epoch().count();
setenv("INIT_STARTED_AT", StringPrintf("%" PRIu64, start_ns).c_str(), 1);
char* path = argv[0];
char* args[] = { path, const_cast<char*>("--second-stage"), nullptr };
char* args[] = { path, nullptr };
if (execv(path, args) == -1) {
PLOG(ERROR) << "execv(\"" << path << "\") failed";
security_failure();
}
} else {
LOG(INFO) << "init second stage started!";
// Indicate that booting is in progress to background fw loaders, etc.
close(open("/dev/.booting", O_WRONLY | O_CREAT | O_CLOEXEC, 0000));
@ -636,7 +641,10 @@ int main(int argc, char** argv) {
// used by init as well as the current required properties.
export_kernel_boot_props();
// Now set up SELinux for second stage
// Make the time that init started available for bootstat to log.
property_set("init.start", getenv("INIT_STARTED_AT"));
// Now set up SELinux for second stage.
selinux_initialize(false);
}
@ -710,21 +718,22 @@ int main(int argc, char** argv) {
restart_processes();
}
int timeout = -1;
if (process_needs_restart) {
timeout = (process_needs_restart - gettime()) * 1000;
if (timeout < 0)
timeout = 0;
// By default, sleep until something happens.
int epoll_timeout_ms = -1;
// If there's more work to do, wake up again immediately.
if (am.HasMoreCommands()) epoll_timeout_ms = 0;
// If there's a process that needs restarting, wake up in time for that.
if (process_needs_restart_at != 0) {
epoll_timeout_ms = (process_needs_restart_at - time(nullptr)) * 1000;
if (epoll_timeout_ms < 0) epoll_timeout_ms = 0;
}
if (am.HasMoreCommands()) {
timeout = 0;
}
bootchart_sample(&timeout);
bootchart_sample(&epoll_timeout_ms);
epoll_event ev;
int nr = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd, &ev, 1, timeout));
int nr = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd, &ev, 1, epoll_timeout_ms));
if (nr == -1) {
PLOG(ERROR) << "epoll_wait failed";
} else if (nr == 1) {

View File

@ -22,8 +22,6 @@
class Action;
class Service;
#define COMMAND_RETRY_TIMEOUT 5
extern const char *ENV[32];
extern bool waiting_for_exec;
extern std::string default_console;

View File

@ -440,8 +440,16 @@ Properties
Init provides information about the services that it is responsible
for via the below properties.
init.start
Time after boot in ns (via the CLOCK_BOOTTIME clock) at which the first
stage of init started.
init.svc.<name>
State of a named service ("stopped", "stopping", "running", "restarting")
State of a named service ("stopped", "stopping", "running", "restarting")
init.svc.<name>.start
Time after boot in ns (via the CLOCK_BOOTTIME clock) that the service was
most recently started.
Bootcharting
@ -537,10 +545,10 @@ service akmd /system/bin/logwrapper /sbin/akmd
For quicker turnaround when working on init itself, use:
mm -j
m ramdisk-nodeps
m bootimage-nodeps
adb reboot bootloader
mm -j &&
m ramdisk-nodeps &&
m bootimage-nodeps &&
adb reboot bootloader &&
fastboot boot $ANDROID_PRODUCT_OUT/boot.img
Alternatively, use the emulator:

View File

@ -17,6 +17,7 @@
#include "service.h"
#include <fcntl.h>
#include <inttypes.h>
#include <linux/securebits.h>
#include <sched.h>
#include <sys/mount.h>
@ -51,9 +52,6 @@ using android::base::ParseInt;
using android::base::StringPrintf;
using android::base::WriteStringToFile;
#define CRITICAL_CRASH_THRESHOLD 4 // if we crash >4 times ...
#define CRITICAL_CRASH_WINDOW (4*60) // ... in 4 minutes, goto recovery
static std::string ComputeContextFromExecutable(std::string& service_name,
const std::string& service_path) {
std::string computed_context;
@ -154,8 +152,8 @@ ServiceEnvironmentInfo::ServiceEnvironmentInfo(const std::string& name,
Service::Service(const std::string& name, const std::string& classname,
const std::vector<std::string>& args)
: name_(name), classname_(classname), flags_(0), pid_(0), time_started_(0),
time_crashed_(0), nr_crashed_(0), uid_(0), gid_(0), namespace_flags_(0),
: name_(name), classname_(classname), flags_(0), pid_(0),
crash_count_(0), uid_(0), gid_(0), namespace_flags_(0),
seclabel_(""), ioprio_class_(IoSchedClass_NONE), ioprio_pri_(0),
priority_(0), oom_score_adjust_(-1000), args_(args) {
onrestart_.InitSingleTrigger("onrestart");
@ -168,7 +166,7 @@ Service::Service(const std::string& name, const std::string& classname,
const std::string& seclabel,
const std::vector<std::string>& args)
: name_(name), classname_(classname), flags_(flags), pid_(0),
time_started_(0), time_crashed_(0), nr_crashed_(0), uid_(uid), gid_(gid),
crash_count_(0), uid_(uid), gid_(gid),
supp_gids_(supp_gids), capabilities_(capabilities),
namespace_flags_(namespace_flags), seclabel_(seclabel),
ioprio_class_(IoSchedClass_NONE), ioprio_pri_(0), priority_(0),
@ -190,6 +188,12 @@ void Service::NotifyStateChange(const std::string& new_state) const {
}
property_set(prop_name.c_str(), new_state.c_str());
if (new_state == "running") {
prop_name += ".start";
uint64_t start_ns = time_started_.time_since_epoch().count();
property_set(prop_name.c_str(), StringPrintf("%" PRIu64, start_ns).c_str());
}
}
void Service::KillProcessGroup(int signal) {
@ -274,20 +278,19 @@ bool Service::Reap() {
return false;
}
time_t now = gettime();
// If we crash > 4 times in 4 minutes, reboot into recovery.
boot_clock::time_point now = boot_clock::now();
if ((flags_ & SVC_CRITICAL) && !(flags_ & SVC_RESTART)) {
if (time_crashed_ + CRITICAL_CRASH_WINDOW >= now) {
if (++nr_crashed_ > CRITICAL_CRASH_THRESHOLD) {
LOG(ERROR) << "critical process '" << name_ << "' exited "
<< CRITICAL_CRASH_THRESHOLD << " times in "
<< (CRITICAL_CRASH_WINDOW / 60) << " minutes; "
if (now < time_crashed_ + 4min) {
if (++crash_count_ > 4) {
LOG(ERROR) << "critical process '" << name_ << "' exited 4 times in 4 minutes; "
<< "rebooting into recovery mode";
android_reboot(ANDROID_RB_RESTART2, 0, "recovery");
return false;
}
} else {
time_crashed_ = now;
nr_crashed_ = 1;
crash_count_ = 1;
}
}
@ -553,7 +556,6 @@ bool Service::Start() {
// Starting a service removes it from the disabled or reset state and
// immediately takes it out of the restarting state if it was in there.
flags_ &= (~(SVC_DISABLED|SVC_RESTARTING|SVC_RESET|SVC_RESTART|SVC_DISABLED_START));
time_started_ = 0;
// Running processes require no additional work --- if they're in the
// process of exiting, we've ensured that they will immediately restart
@ -667,7 +669,7 @@ bool Service::Start() {
}
}
time_started_ = gettime();
time_started_ = boot_clock::now();
pid_ = pid;
flags_ |= SVC_RUNNING;
@ -731,18 +733,19 @@ void Service::Restart() {
} /* else: Service is restarting anyways. */
}
void Service::RestartIfNeeded(time_t& process_needs_restart) {
time_t next_start_time = time_started_ + 5;
if (next_start_time <= gettime()) {
void Service::RestartIfNeeded(time_t* process_needs_restart_at) {
boot_clock::time_point now = boot_clock::now();
boot_clock::time_point next_start = time_started_ + 5s;
if (now > next_start) {
flags_ &= (~SVC_RESTARTING);
Start();
return;
}
if ((next_start_time < process_needs_restart) ||
(process_needs_restart == 0)) {
process_needs_restart = next_start_time;
time_t next_start_time_t = time(nullptr) +
time_t(std::chrono::duration_cast<std::chrono::seconds>(next_start - now).count());
if (next_start_time_t < *process_needs_restart_at || *process_needs_restart_at == 0) {
*process_needs_restart_at = next_start_time_t;
}
}

View File

@ -30,6 +30,7 @@
#include "descriptors.h"
#include "init_parser.h"
#include "keyword_map.h"
#include "util.h"
#define SVC_DISABLED 0x001 // do not autostart with class
#define SVC_ONESHOT 0x002 // do not restart on exit
@ -75,7 +76,7 @@ public:
void Stop();
void Terminate();
void Restart();
void RestartIfNeeded(time_t& process_needs_restart);
void RestartIfNeeded(time_t* process_needs_restart_at);
bool Reap();
void DumpState() const;
@ -134,9 +135,9 @@ private:
unsigned flags_;
pid_t pid_;
time_t time_started_; // time of last start
time_t time_crashed_; // first crash within inspection window
int nr_crashed_; // number of times crashed within window
boot_clock::time_point time_started_; // time of last start
boot_clock::time_point time_crashed_; // first crash within inspection window
int crash_count_; // number of times crashed within window
uid_t uid_;
gid_t gid_;

View File

@ -258,16 +258,11 @@ int write_file(const char* path, const char* content) {
return result;
}
time_t gettime() {
timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
return now.tv_sec;
}
uint64_t gettime_ns() {
timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000000000) + now.tv_nsec;
boot_clock::time_point boot_clock::now() {
timespec ts;
clock_gettime(CLOCK_BOOTTIME, &ts);
return boot_clock::time_point(std::chrono::seconds(ts.tv_sec) +
std::chrono::nanoseconds(ts.tv_nsec));
}
int mkdir_recursive(const char *pathname, mode_t mode)
@ -325,16 +320,15 @@ void sanitize(char *s)
}
}
int wait_for_file(const char *filename, int timeout)
{
struct stat info;
uint64_t timeout_time_ns = gettime_ns() + timeout * UINT64_C(1000000000);
int ret = -1;
int wait_for_file(const char* filename, std::chrono::nanoseconds timeout) {
boot_clock::time_point timeout_time = boot_clock::now() + timeout;
while (boot_clock::now() < timeout_time) {
struct stat sb;
if (stat(filename, &sb) != -1) return 0;
while (gettime_ns() < timeout_time_ns && ((ret = stat(filename, &info)) < 0))
usleep(10000);
return ret;
}
return -1;
}
void import_kernel_cmdline(bool in_qemu,

View File

@ -20,11 +20,14 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <chrono>
#include <string>
#include <functional>
#define COLDBOOT_DONE "/dev/.coldboot_done"
using namespace std::chrono_literals;
int create_socket(const char *name, int type, mode_t perm,
uid_t uid, gid_t gid, const char *socketcon);
int create_file(const char *path, int mode, mode_t perm,
@ -33,27 +36,35 @@ int create_file(const char *path, int mode, mode_t perm,
bool read_file(const char* path, std::string* content);
int write_file(const char* path, const char* content);
time_t gettime();
uint64_t gettime_ns();
// A std::chrono clock based on CLOCK_BOOTTIME.
class boot_clock {
public:
typedef std::chrono::nanoseconds duration;
typedef std::chrono::time_point<boot_clock, duration> time_point;
static constexpr bool is_steady = true;
static time_point now();
};
class Timer {
public:
Timer() : t0(gettime_ns()) {
Timer() : start_(boot_clock::now()) {
}
double duration() {
return static_cast<double>(gettime_ns() - t0) / 1000000000.0;
typedef std::chrono::duration<double> double_duration;
return std::chrono::duration_cast<double_duration>(boot_clock::now() - start_).count();
}
private:
uint64_t t0;
boot_clock::time_point start_;
};
unsigned int decode_uid(const char *s);
int mkdir_recursive(const char *pathname, mode_t mode);
void sanitize(char *p);
int wait_for_file(const char *filename, int timeout);
int wait_for_file(const char *filename, std::chrono::nanoseconds timeout);
void import_kernel_cmdline(bool in_qemu,
const std::function<void(const std::string&, const std::string&, bool)>&);
int make_dir(const char *path, mode_t mode);