Log active use time between kernel crashes.

Also, initialize the network state from flimflam, just in case -- now
that the metrics daemon process starts a bit late,

BUG=none
TEST=unit tests, ran on the device, emerged arm-generic

Review URL: http://codereview.chromium.org/2864009
This commit is contained in:
Darin Petkov 2010-06-24 12:10:26 -07:00
parent cd8c317453
commit 38d5cb09ef
4 changed files with 174 additions and 23 deletions

View File

@ -35,7 +35,6 @@ LIB_OBJS = \
c_metrics_library.o \
metrics_library.o
TESTLIB_OBJS = \
metrics_library.o \
metrics_library_test.o
TESTCOUNTER_LIBS = -lgmock -lgtest -lbase -lrt -lpthread
@ -71,17 +70,6 @@ $(LIB_TEST): $(TESTLIB_OBJS) $(SHAREDLIB)
%.o: %.cc
$(CXX) $(CXXFLAGS) -c $< -o $@
# dependencies in addition to those defined by the rules
metrics_daemon.o: \
metrics_daemon.h \
network_states.h \
power_states.h
metrics_daemon_test.o: \
metrics_daemon.h \
network_states.h \
power_states.h
clean:
rm -f $(CLIENT) $(DAEMON) $(LIB) $(SHAREDLIB) *.o
rm -f $(COUNTER_TEST) $(DAEMON_TEST) $(LIB_TEST)

View File

@ -6,6 +6,7 @@
#include <dbus/dbus-glib-lowlevel.h>
#include <base/file_util.h>
#include <base/logging.h>
#include "counter.h"
@ -13,6 +14,7 @@
using base::Time;
using base::TimeDelta;
using base::TimeTicks;
using std::string;
#define SAFE_MESSAGE(e) (e.message ? e.message : "unknown error")
#define DBUS_IFACE_CRASH_REPORTER "org.chromium.CrashReporter"
@ -20,13 +22,6 @@ using base::TimeTicks;
#define DBUS_IFACE_POWER_MANAGER "org.chromium.PowerManager"
#define DBUS_IFACE_SESSION_MANAGER "org.chromium.SessionManagerInterface"
// File to aggregate daily usage before sending to UMA.
// TODO(petkov): This file should probably live in a user-specific stateful
// location, e.g., /home/chronos/user.
static const char kDailyUseRecordFile[] = "/var/log/metrics/daily-usage";
static const char kUserCrashIntervalRecordFile[] =
"/var/log/metrics/user-crash-interval";
static const int kSecondsPerMinute = 60;
static const int kMinutesPerHour = 60;
static const int kHoursPerDay = 24;
@ -51,6 +46,12 @@ const int MetricsDaemon::kMetricDailyUseTimeMin = 1;
const int MetricsDaemon::kMetricDailyUseTimeMax = kMinutesPerDay;
const int MetricsDaemon::kMetricDailyUseTimeBuckets = 50;
const char MetricsDaemon::kMetricKernelCrashIntervalName[] =
"Logging.KernelCrashInterval";
const int MetricsDaemon::kMetricKernelCrashIntervalMin = 1;
const int MetricsDaemon::kMetricKernelCrashIntervalMax = 4 * kSecondsPerWeek;
const int MetricsDaemon::kMetricKernelCrashIntervalBuckets = 50;
const char MetricsDaemon::kMetricTimeToNetworkDropName[] =
"Network.TimeToDrop";
const int MetricsDaemon::kMetricTimeToNetworkDropMin = 1;
@ -106,6 +107,53 @@ const char* MetricsDaemon::kSessionStates_[] = {
#include "session_states.h"
};
// Invokes a remote method over D-Bus that takes no input arguments
// and returns a string result. The method call is issued with a 2
// second blocking timeout. Returns an empty string on failure or
// timeout.
static string DBusGetString(DBusConnection* connection,
const string& destination,
const string& path,
const string& interface,
const string& method) {
DBusMessage* message =
dbus_message_new_method_call(destination.c_str(),
path.c_str(),
interface.c_str(),
method.c_str());
if (!message) {
DLOG(WARNING) << "DBusGetString: unable to allocate a message";
return "";
}
DBusError error;
dbus_error_init(&error);
const int kTimeout = 2000; // ms
DLOG(INFO) << "DBusGetString: dest=" << destination << " path=" << path
<< " iface=" << interface << " method=" << method;
DBusMessage* reply =
dbus_connection_send_with_reply_and_block(connection, message, kTimeout,
&error);
dbus_message_unref(message);
if (dbus_error_is_set(&error) || !reply) {
DLOG(WARNING) << "DBusGetString: call failed";
return "";
}
DBusMessageIter iter;
dbus_message_iter_init(reply, &iter);
if (dbus_message_iter_get_arg_type(&iter) != DBUS_TYPE_STRING) {
NOTREACHED();
dbus_message_unref(reply);
return "";
}
const char* c_result = "";
dbus_message_iter_get_basic(&iter, &c_result);
string result = c_result;
DLOG(INFO) << "DBusGetString: result=" << result;
dbus_message_unref(reply);
return result;
}
MetricsDaemon::MetricsDaemon()
: network_state_(kUnknownNetworkState),
power_state_(kUnknownPowerState),
@ -117,21 +165,35 @@ MetricsDaemon::MetricsDaemon()
MetricsDaemon::~MetricsDaemon() {}
void MetricsDaemon::Run(bool run_as_daemon) {
if (!run_as_daemon || daemon(0, 0) == 0) {
Loop();
}
if (run_as_daemon && daemon(0, 0) != 0)
return;
static const char kKernelCrashDetectedFile[] = "/tmp/kernel-crash-detected";
CheckKernelCrash(kKernelCrashDetectedFile);
Loop();
}
void MetricsDaemon::Init(bool testing, MetricsLibraryInterface* metrics_lib) {
testing_ = testing;
DCHECK(metrics_lib != NULL);
metrics_lib_ = metrics_lib;
static const char kDailyUseRecordFile[] = "/var/log/metrics/daily-usage";
daily_use_.reset(new chromeos_metrics::TaggedCounter());
daily_use_->Init(kDailyUseRecordFile, &DailyUseReporter, this);
static const char kUserCrashIntervalRecordFile[] =
"/var/log/metrics/user-crash-interval";
user_crash_interval_.reset(new chromeos_metrics::TaggedCounter());
user_crash_interval_->Init(kUserCrashIntervalRecordFile,
&UserCrashIntervalReporter, this);
static const char kKernelCrashIntervalRecordFile[] =
"/var/log/metrics/kernel-crash-interval";
kernel_crash_interval_.reset(new chromeos_metrics::TaggedCounter());
kernel_crash_interval_->Init(kKernelCrashIntervalRecordFile,
&KernelCrashIntervalReporter, this);
// Don't setup D-Bus and GLib in test mode.
if (testing)
return;
@ -162,6 +224,11 @@ void MetricsDaemon::Init(bool testing, MetricsLibraryInterface* metrics_lib) {
// the registered D-Bus matches is successful. The daemon is not
// activated for D-Bus messages that don't match.
CHECK(dbus_connection_add_filter(connection, MessageFilter, this, NULL));
// Initializes the current network state by retrieving it from flimflam.
string state_name = DBusGetString(connection, "org.chromium.flimflam", "/",
DBUS_IFACE_FLIMFLAM_MANAGER, "GetState");
NetStateChanged(state_name.c_str(), TimeTicks::Now());
}
void MetricsDaemon::Loop() {
@ -320,6 +387,7 @@ void MetricsDaemon::SetUserActiveState(bool active, Time now) {
int day = since_epoch.InDays();
daily_use_->Update(day, seconds);
user_crash_interval_->Update(0, seconds);
kernel_crash_interval_->Update(0, seconds);
// Schedules a use monitor on inactive->active transitions and
// unschedules it on active->inactive transitions.
@ -342,6 +410,27 @@ void MetricsDaemon::ProcessUserCrash() {
user_crash_interval_->Flush();
}
void MetricsDaemon::ProcessKernelCrash() {
// Counts the active use time up to now.
SetUserActiveState(user_active_, Time::Now());
// Reports the active use time since the last crash and resets it.
kernel_crash_interval_->Flush();
}
void MetricsDaemon::CheckKernelCrash(const std::string& crash_file) {
FilePath crash_detected(crash_file);
if (!file_util::PathExists(crash_detected))
return;
ProcessKernelCrash();
// Deletes the crash-detected file so that the daemon doesn't report
// another kernel crash in case it's restarted.
file_util::Delete(crash_detected,
false); // recursive
}
// static
gboolean MetricsDaemon::UseMonitorStatic(gpointer data) {
return static_cast<MetricsDaemon*>(data)->UseMonitor() ? TRUE : FALSE;
@ -425,7 +514,17 @@ void MetricsDaemon::UserCrashIntervalReporter(void* handle,
kMetricUserCrashIntervalBuckets);
}
void MetricsDaemon::SendMetric(const std::string& name, int sample,
// static
void MetricsDaemon::KernelCrashIntervalReporter(void* handle,
int tag, int count) {
MetricsDaemon* daemon = static_cast<MetricsDaemon*>(handle);
daemon->SendMetric(kMetricKernelCrashIntervalName, count,
kMetricKernelCrashIntervalMin,
kMetricKernelCrashIntervalMax,
kMetricKernelCrashIntervalBuckets);
}
void MetricsDaemon::SendMetric(const string& name, int sample,
int min, int max, int nbuckets) {
DLOG(INFO) << "received metric: " << name << " " << sample << " "
<< min << " " << max << " " << nbuckets;

View File

@ -31,7 +31,9 @@ class MetricsDaemon {
private:
friend class MetricsDaemonTest;
FRIEND_TEST(MetricsDaemonTest, CheckKernelCrash);
FRIEND_TEST(MetricsDaemonTest, DailyUseReporter);
FRIEND_TEST(MetricsDaemonTest, KernelCrashIntervalReporter);
FRIEND_TEST(MetricsDaemonTest, LookupNetworkState);
FRIEND_TEST(MetricsDaemonTest, LookupPowerState);
FRIEND_TEST(MetricsDaemonTest, LookupScreenSaverState);
@ -40,6 +42,7 @@ class MetricsDaemon {
FRIEND_TEST(MetricsDaemonTest, NetStateChangedSimpleDrop);
FRIEND_TEST(MetricsDaemonTest, NetStateChangedSuspend);
FRIEND_TEST(MetricsDaemonTest, PowerStateChanged);
FRIEND_TEST(MetricsDaemonTest, ProcessKernelCrash);
FRIEND_TEST(MetricsDaemonTest, ProcessUserCrash);
FRIEND_TEST(MetricsDaemonTest, ScreenSaverStateChanged);
FRIEND_TEST(MetricsDaemonTest, SendMetric);
@ -85,6 +88,10 @@ class MetricsDaemon {
static const int kMetricDailyUseTimeMin;
static const int kMetricDailyUseTimeMax;
static const int kMetricDailyUseTimeBuckets;
static const char kMetricKernelCrashIntervalName[];
static const int kMetricKernelCrashIntervalMin;
static const int kMetricKernelCrashIntervalMax;
static const int kMetricKernelCrashIntervalBuckets;
static const char kMetricTimeToNetworkDropName[];
static const int kMetricTimeToNetworkDropMin;
static const int kMetricTimeToNetworkDropMax;
@ -152,6 +159,14 @@ class MetricsDaemon {
// process crashes.
void ProcessUserCrash();
// Updates the active use time and logs time between kernel crashes.
void ProcessKernelCrash();
// Checks if a kernel crash has been detected and processes if so.
// The method assumes that a kernel crash has happened if
// |crash_file| exists.
void CheckKernelCrash(const std::string& crash_file);
// Callbacks for the daily use monitor. The daily use monitor uses
// LogDailyUseRecord to aggregate current usage data and send it to
// UMA, if necessary. It also reschedules itself using an
@ -187,6 +202,10 @@ class MetricsDaemon {
// crashes and send to UMA.
static void UserCrashIntervalReporter(void* data, int tag, int count);
// TaggedCounter callback to process time between kernel crashes and
// send to UMA.
static void KernelCrashIntervalReporter(void* data, int tag, int count);
// Test mode.
bool testing_;
@ -222,6 +241,9 @@ class MetricsDaemon {
// Active use time between user-space process crashes.
scoped_ptr<chromeos_metrics::TaggedCounterInterface> user_crash_interval_;
// Active use time between kernel crashes.
scoped_ptr<chromeos_metrics::TaggedCounterInterface> kernel_crash_interval_;
// Sleep period until the next daily usage aggregation performed by
// the daily use monitor (see ScheduleUseMonitor).
int usemon_interval_;

View File

@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <base/file_util.h>
#include <gtest/gtest.h>
#include "counter_mock.h"
@ -42,16 +43,20 @@ class MetricsDaemonTest : public testing::Test {
protected:
virtual void SetUp() {
EXPECT_EQ(NULL, daemon_.daily_use_.get());
EXPECT_EQ(NULL, daemon_.kernel_crash_interval_.get());
EXPECT_EQ(NULL, daemon_.user_crash_interval_.get());
daemon_.Init(true, &metrics_lib_);
// Tests constructor initialization. Switches to mock counters.
EXPECT_TRUE(NULL != daemon_.daily_use_.get());
EXPECT_TRUE(NULL != daemon_.kernel_crash_interval_.get());
EXPECT_TRUE(NULL != daemon_.user_crash_interval_.get());
// Allocates mock counter and transfers ownership.
daily_use_ = new StrictMock<TaggedCounterMock>();
daemon_.daily_use_.reset(daily_use_);
kernel_crash_interval_ = new StrictMock<TaggedCounterMock>();
daemon_.kernel_crash_interval_.reset(kernel_crash_interval_);
user_crash_interval_ = new StrictMock<TaggedCounterMock>();
daemon_.user_crash_interval_.reset(user_crash_interval_);
@ -71,6 +76,9 @@ class MetricsDaemonTest : public testing::Test {
EXPECT_CALL(*daily_use_, Update(daily_tag, count))
.Times(1)
.RetiresOnSaturation();
EXPECT_CALL(*kernel_crash_interval_, Update(0, count))
.Times(1)
.RetiresOnSaturation();
EXPECT_CALL(*user_crash_interval_, Update(0, count))
.Times(1)
.RetiresOnSaturation();
@ -82,6 +90,9 @@ class MetricsDaemonTest : public testing::Test {
EXPECT_CALL(*daily_use_, Update(_, _))
.Times(1)
.RetiresOnSaturation();
EXPECT_CALL(*kernel_crash_interval_, Update(_, _))
.Times(1)
.RetiresOnSaturation();
EXPECT_CALL(*user_crash_interval_, Update(_, _))
.Times(1)
.RetiresOnSaturation();
@ -159,9 +170,24 @@ class MetricsDaemonTest : public testing::Test {
// update calls are marked as failures. They are pointers so that
// they can replace the scoped_ptr's allocated by the daemon.
StrictMock<TaggedCounterMock>* daily_use_;
StrictMock<TaggedCounterMock>* kernel_crash_interval_;
StrictMock<TaggedCounterMock>* user_crash_interval_;
};
TEST_F(MetricsDaemonTest, CheckKernelCrash) {
static const char kKernelCrashDetected[] = "test-kernel-crash-detected";
daemon_.CheckKernelCrash(kKernelCrashDetected);
FilePath crash_detected(kKernelCrashDetected);
file_util::WriteFile(crash_detected, "", 0);
IgnoreActiveUseUpdate();
EXPECT_CALL(*kernel_crash_interval_, Flush())
.Times(1)
.RetiresOnSaturation();
daemon_.CheckKernelCrash(kKernelCrashDetected);
file_util::Delete(crash_detected, false);
}
TEST_F(MetricsDaemonTest, DailyUseReporter) {
ExpectDailyUseTimeMetric(/* sample */ 2);
MetricsDaemon::DailyUseReporter(&daemon_, /* tag */ 20, /* count */ 90);
@ -174,6 +200,14 @@ TEST_F(MetricsDaemonTest, DailyUseReporter) {
MetricsDaemon::DailyUseReporter(&daemon_, /* tag */ 60, /* count */ -5);
}
TEST_F(MetricsDaemonTest, KernelCrashIntervalReporter) {
ExpectMetric(MetricsDaemon::kMetricKernelCrashIntervalName, 50,
MetricsDaemon::kMetricKernelCrashIntervalMin,
MetricsDaemon::kMetricKernelCrashIntervalMax,
MetricsDaemon::kMetricKernelCrashIntervalBuckets);
MetricsDaemon::KernelCrashIntervalReporter(&daemon_, 0, 50);
}
TEST_F(MetricsDaemonTest, LookupNetworkState) {
EXPECT_EQ(MetricsDaemon::kNetworkStateOnline,
daemon_.LookupNetworkState("online"));
@ -340,6 +374,14 @@ TEST_F(MetricsDaemonTest, PowerStateChanged) {
EXPECT_EQ(TestTime(7 * kSecondsPerDay + 185), daemon_.user_active_last_);
}
TEST_F(MetricsDaemonTest, ProcessKernelCrash) {
IgnoreActiveUseUpdate();
EXPECT_CALL(*kernel_crash_interval_, Flush())
.Times(1)
.RetiresOnSaturation();
daemon_.ProcessKernelCrash();
}
TEST_F(MetricsDaemonTest, ProcessUserCrash) {
IgnoreActiveUseUpdate();
EXPECT_CALL(*user_crash_interval_, Flush())