// Copyright (c) 2011 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // For PRIu64 in inttypes.h, used by scanf. TODO(semenzato): replace // with libchromeos methods. #define __STDC_FORMAT_MACROS #include "metrics/metrics_daemon.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "uploader/upload_service.h" using base::FilePath; using base::StringPrintf; using base::Time; using base::TimeDelta; using base::TimeTicks; using chromeos_metrics::PersistentInteger; using std::map; using std::string; using std::vector; namespace { #define SAFE_MESSAGE(e) (e.message ? e.message : "unknown error") const char kCrashReporterInterface[] = "org.chromium.CrashReporter"; const char kCrashReporterUserCrashSignal[] = "UserCrash"; const int kSecondsPerMinute = 60; const int kMinutesPerHour = 60; const int kHoursPerDay = 24; const int kMinutesPerDay = kHoursPerDay * kMinutesPerHour; const int kSecondsPerDay = kSecondsPerMinute * kMinutesPerDay; const int kDaysPerWeek = 7; const int kSecondsPerWeek = kSecondsPerDay * kDaysPerWeek; // Interval between calls to UpdateStats(). const guint kUpdateStatsIntervalMs = 300000; const char kKernelCrashDetectedFile[] = "/var/run/kernel-crash-detected"; const char kUncleanShutdownDetectedFile[] = "/var/run/unclean-shutdown-detected"; } // namespace // disk stats metrics // The {Read,Write}Sectors numbers are in sectors/second. // A sector is usually 512 bytes. const char MetricsDaemon::kMetricReadSectorsLongName[] = "Platform.ReadSectorsLong"; const char MetricsDaemon::kMetricWriteSectorsLongName[] = "Platform.WriteSectorsLong"; const char MetricsDaemon::kMetricReadSectorsShortName[] = "Platform.ReadSectorsShort"; const char MetricsDaemon::kMetricWriteSectorsShortName[] = "Platform.WriteSectorsShort"; const int MetricsDaemon::kMetricStatsShortInterval = 1; // seconds const int MetricsDaemon::kMetricStatsLongInterval = 30; // seconds const int MetricsDaemon::kMetricMeminfoInterval = 30; // seconds // Assume a max rate of 250Mb/s for reads (worse for writes) and 512 byte // sectors. const int MetricsDaemon::kMetricSectorsIOMax = 500000; // sectors/second const int MetricsDaemon::kMetricSectorsBuckets = 50; // buckets // Page size is 4k, sector size is 0.5k. We're not interested in page fault // rates that the disk cannot sustain. const int MetricsDaemon::kMetricPageFaultsMax = kMetricSectorsIOMax / 8; const int MetricsDaemon::kMetricPageFaultsBuckets = 50; // Major page faults, i.e. the ones that require data to be read from disk. const char MetricsDaemon::kMetricPageFaultsLongName[] = "Platform.PageFaultsLong"; const char MetricsDaemon::kMetricPageFaultsShortName[] = "Platform.PageFaultsShort"; // Swap in and Swap out const char MetricsDaemon::kMetricSwapInLongName[] = "Platform.SwapInLong"; const char MetricsDaemon::kMetricSwapInShortName[] = "Platform.SwapInShort"; const char MetricsDaemon::kMetricSwapOutLongName[] = "Platform.SwapOutLong"; const char MetricsDaemon::kMetricSwapOutShortName[] = "Platform.SwapOutShort"; const char MetricsDaemon::kMetricsProcStatFileName[] = "/proc/stat"; const int MetricsDaemon::kMetricsProcStatFirstLineItemsCount = 11; // Thermal CPU throttling. const char MetricsDaemon::kMetricScaledCpuFrequencyName[] = "Platform.CpuFrequencyThermalScaling"; // Zram sysfs entries. const char MetricsDaemon::kComprDataSizeName[] = "compr_data_size"; const char MetricsDaemon::kOrigDataSizeName[] = "orig_data_size"; const char MetricsDaemon::kZeroPagesName[] = "zero_pages"; // Memory use stats collection intervals. We collect some memory use interval // at these intervals after boot, and we stop collecting after the last one, // with the assumption that in most cases the memory use won't change much // after that. static const int kMemuseIntervals[] = { 1 * kSecondsPerMinute, // 1 minute mark 4 * kSecondsPerMinute, // 5 minute mark 25 * kSecondsPerMinute, // 0.5 hour mark 120 * kSecondsPerMinute, // 2.5 hour mark 600 * kSecondsPerMinute, // 12.5 hour mark }; MetricsDaemon::MetricsDaemon() : update_stats_timeout_id_(-1), memuse_final_time_(0), memuse_interval_index_(0), read_sectors_(0), write_sectors_(0), vmstats_(), stats_state_(kStatsShort), stats_initial_time_(0), ticks_per_second_(0), latest_cpu_use_ticks_(0) {} MetricsDaemon::~MetricsDaemon() { if (update_stats_timeout_id_ > -1) g_source_remove(update_stats_timeout_id_); } double MetricsDaemon::GetActiveTime() { struct timespec ts; int r = clock_gettime(CLOCK_MONOTONIC, &ts); if (r < 0) { PLOG(WARNING) << "clock_gettime(CLOCK_MONOTONIC) failed"; return 0; } else { return ts.tv_sec + static_cast(ts.tv_nsec) / (1000 * 1000 * 1000); } } void MetricsDaemon::Run(bool run_as_daemon) { if (run_as_daemon && daemon(0, 0) != 0) return; if (CheckSystemCrash(kKernelCrashDetectedFile)) { ProcessKernelCrash(); } if (CheckSystemCrash(kUncleanShutdownDetectedFile)) { ProcessUncleanShutdown(); } // On OS version change, clear version stats (which are reported daily). int32_t version = GetOsVersionHash(); if (version_cycle_->Get() != version) { version_cycle_->Set(version); kernel_crashes_version_count_->Set(0); version_cumulative_active_use_->Set(0); version_cumulative_cpu_use_->Set(0); } Loop(); } void MetricsDaemon::RunUploaderTest() { upload_service_->UploadEvent(); } uint32_t MetricsDaemon::GetOsVersionHash() { static uint32_t cached_version_hash = 0; static bool version_hash_is_cached = false; if (version_hash_is_cached) return cached_version_hash; version_hash_is_cached = true; std::string version; if (base::SysInfo::GetLsbReleaseValue("CHROMEOS_RELEASE_VERSION", &version)) { cached_version_hash = base::Hash(version); } else if (testing_) { cached_version_hash = 42; // return any plausible value for the hash } else { LOG(FATAL) << "could not find CHROMEOS_RELEASE_VERSION"; } return cached_version_hash; } void MetricsDaemon::Init(bool testing, bool uploader_active, MetricsLibraryInterface* metrics_lib, const string& diskstats_path, const string& vmstats_path, const string& scaling_max_freq_path, const string& cpuinfo_max_freq_path) { testing_ = testing; DCHECK(metrics_lib != nullptr); metrics_lib_ = metrics_lib; // Get ticks per second (HZ) on this system. // Sysconf cannot fail, so no sanity checks are needed. ticks_per_second_ = sysconf(_SC_CLK_TCK); daily_active_use_.reset( new PersistentInteger("Logging.DailyUseTime")); version_cumulative_active_use_.reset( new PersistentInteger("Logging.CumulativeDailyUseTime")); version_cumulative_cpu_use_.reset( new PersistentInteger("Logging.CumulativeCpuTime")); kernel_crash_interval_.reset( new PersistentInteger("Logging.KernelCrashInterval")); unclean_shutdown_interval_.reset( new PersistentInteger("Logging.UncleanShutdownInterval")); user_crash_interval_.reset( new PersistentInteger("Logging.UserCrashInterval")); any_crashes_daily_count_.reset( new PersistentInteger("Logging.AnyCrashesDaily")); any_crashes_weekly_count_.reset( new PersistentInteger("Logging.AnyCrashesWeekly")); user_crashes_daily_count_.reset( new PersistentInteger("Logging.UserCrashesDaily")); user_crashes_weekly_count_.reset( new PersistentInteger("Logging.UserCrashesWeekly")); kernel_crashes_daily_count_.reset( new PersistentInteger("Logging.KernelCrashesDaily")); kernel_crashes_weekly_count_.reset( new PersistentInteger("Logging.KernelCrashesWeekly")); kernel_crashes_version_count_.reset( new PersistentInteger("Logging.KernelCrashesSinceUpdate")); unclean_shutdowns_daily_count_.reset( new PersistentInteger("Logging.UncleanShutdownsDaily")); unclean_shutdowns_weekly_count_.reset( new PersistentInteger("Logging.UncleanShutdownsWeekly")); daily_cycle_.reset(new PersistentInteger("daily.cycle")); weekly_cycle_.reset(new PersistentInteger("weekly.cycle")); version_cycle_.reset(new PersistentInteger("version.cycle")); diskstats_path_ = diskstats_path; vmstats_path_ = vmstats_path; scaling_max_freq_path_ = scaling_max_freq_path; cpuinfo_max_freq_path_ = cpuinfo_max_freq_path; StatsReporterInit(); // Start collecting meminfo stats. ScheduleMeminfoCallback(kMetricMeminfoInterval); memuse_final_time_ = GetActiveTime() + kMemuseIntervals[0]; ScheduleMemuseCallback(kMemuseIntervals[0]); // Don't setup D-Bus and GLib in test mode. if (testing) return; g_type_init(); dbus_threads_init_default(); DBusError error; dbus_error_init(&error); DBusConnection* connection = dbus_bus_get(DBUS_BUS_SYSTEM, &error); LOG_IF(FATAL, dbus_error_is_set(&error)) << "No D-Bus connection: " << SAFE_MESSAGE(error); dbus_connection_setup_with_g_main(connection, nullptr); vector matches; matches.push_back( base::StringPrintf("type='signal',interface='%s',path='/',member='%s'", kCrashReporterInterface, kCrashReporterUserCrashSignal)); // Registers D-Bus matches for the signals we would like to catch. for (vector::const_iterator it = matches.begin(); it != matches.end(); ++it) { const char* match = it->c_str(); DLOG(INFO) << "adding dbus match: " << match; dbus_bus_add_match(connection, match, &error); LOG_IF(FATAL, dbus_error_is_set(&error)) << "unable to add a match: " << SAFE_MESSAGE(error); } // Adds the D-Bus filter routine to be called back whenever one of // the registered D-Bus matches is successful. The daemon is not // activated for D-Bus messages that don't match. CHECK(dbus_connection_add_filter(connection, MessageFilter, this, nullptr)); update_stats_timeout_id_ = g_timeout_add(kUpdateStatsIntervalMs, &HandleUpdateStatsTimeout, this); if (uploader_active) { upload_service_.reset(new UploadService()); upload_service_->Init(); } } void MetricsDaemon::Loop() { GMainLoop* loop = g_main_loop_new(nullptr, false); g_main_loop_run(loop); } // static DBusHandlerResult MetricsDaemon::MessageFilter(DBusConnection* connection, DBusMessage* message, void* user_data) { int message_type = dbus_message_get_type(message); if (message_type != DBUS_MESSAGE_TYPE_SIGNAL) { DLOG(WARNING) << "unexpected message type " << message_type; return DBUS_HANDLER_RESULT_NOT_YET_HANDLED; } // Signal messages always have interfaces. const std::string interface(dbus_message_get_interface(message)); const std::string member(dbus_message_get_member(message)); DLOG(INFO) << "Got " << interface << "." << member << " D-Bus signal"; MetricsDaemon* daemon = static_cast(user_data); DBusMessageIter iter; dbus_message_iter_init(message, &iter); if (interface == kCrashReporterInterface) { CHECK_EQ(member, kCrashReporterUserCrashSignal); daemon->ProcessUserCrash(); } else { // Ignore messages from the bus itself. return DBUS_HANDLER_RESULT_NOT_YET_HANDLED; } return DBUS_HANDLER_RESULT_HANDLED; } // One might argue that parts of this should go into // chromium/src/base/sys_info_chromeos.c instead, but put it here for now. TimeDelta MetricsDaemon::GetIncrementalCpuUse() { FilePath proc_stat_path = FilePath(kMetricsProcStatFileName); std::string proc_stat_string; if (!base::ReadFileToString(proc_stat_path, &proc_stat_string)) { LOG(WARNING) << "cannot open " << kMetricsProcStatFileName; return TimeDelta(); } std::vector proc_stat_lines; base::SplitString(proc_stat_string, '\n', &proc_stat_lines); if (proc_stat_lines.empty()) { LOG(WARNING) << "cannot parse " << kMetricsProcStatFileName << ": " << proc_stat_string; return TimeDelta(); } std::vector proc_stat_totals; base::SplitStringAlongWhitespace(proc_stat_lines[0], &proc_stat_totals); uint64_t user_ticks, user_nice_ticks, system_ticks; if (proc_stat_totals.size() != kMetricsProcStatFirstLineItemsCount || proc_stat_totals[0] != "cpu" || !base::StringToUint64(proc_stat_totals[1], &user_ticks) || !base::StringToUint64(proc_stat_totals[2], &user_nice_ticks) || !base::StringToUint64(proc_stat_totals[3], &system_ticks)) { LOG(WARNING) << "cannot parse first line: " << proc_stat_lines[0]; return TimeDelta(base::TimeDelta::FromSeconds(0)); } uint64_t total_cpu_use_ticks = user_ticks + user_nice_ticks + system_ticks; // Sanity check. if (total_cpu_use_ticks < latest_cpu_use_ticks_) { LOG(WARNING) << "CPU time decreasing from " << latest_cpu_use_ticks_ << " to " << total_cpu_use_ticks; return TimeDelta(); } uint64_t diff = total_cpu_use_ticks - latest_cpu_use_ticks_; latest_cpu_use_ticks_ = total_cpu_use_ticks; // Use microseconds to avoid significant truncations. return base::TimeDelta::FromMicroseconds( diff * 1000 * 1000 / ticks_per_second_); } void MetricsDaemon::ProcessUserCrash() { // Counts the active time up to now. UpdateStats(TimeTicks::Now(), Time::Now()); // Reports the active use time since the last crash and resets it. SendCrashIntervalSample(user_crash_interval_); any_crashes_daily_count_->Add(1); any_crashes_weekly_count_->Add(1); user_crashes_daily_count_->Add(1); user_crashes_weekly_count_->Add(1); } void MetricsDaemon::ProcessKernelCrash() { // Counts the active time up to now. UpdateStats(TimeTicks::Now(), Time::Now()); // Reports the active use time since the last crash and resets it. SendCrashIntervalSample(kernel_crash_interval_); any_crashes_daily_count_->Add(1); any_crashes_weekly_count_->Add(1); kernel_crashes_daily_count_->Add(1); kernel_crashes_weekly_count_->Add(1); kernel_crashes_version_count_->Add(1); } void MetricsDaemon::ProcessUncleanShutdown() { // Counts the active time up to now. UpdateStats(TimeTicks::Now(), Time::Now()); // Reports the active use time since the last crash and resets it. SendCrashIntervalSample(unclean_shutdown_interval_); unclean_shutdowns_daily_count_->Add(1); unclean_shutdowns_weekly_count_->Add(1); any_crashes_daily_count_->Add(1); any_crashes_weekly_count_->Add(1); } bool MetricsDaemon::CheckSystemCrash(const string& crash_file) { FilePath crash_detected(crash_file); if (!base::PathExists(crash_detected)) return false; // Deletes the crash-detected file so that the daemon doesn't report // another kernel crash in case it's restarted. base::DeleteFile(crash_detected, false); // not recursive return true; } void MetricsDaemon::StatsReporterInit() { DiskStatsReadStats(&read_sectors_, &write_sectors_); VmStatsReadStats(&vmstats_); // The first time around just run the long stat, so we don't delay boot. stats_state_ = kStatsLong; stats_initial_time_ = GetActiveTime(); if (stats_initial_time_ < 0) { LOG(WARNING) << "not collecting disk stats"; } else { ScheduleStatsCallback(kMetricStatsLongInterval); } } void MetricsDaemon::ScheduleStatsCallback(int wait) { if (testing_) { return; } g_timeout_add_seconds(wait, StatsCallbackStatic, this); } bool MetricsDaemon::DiskStatsReadStats(uint64_t* read_sectors, uint64_t* write_sectors) { int nchars; int nitems; bool success = false; char line[200]; if (diskstats_path_.empty()) { return false; } int file = HANDLE_EINTR(open(diskstats_path_.c_str(), O_RDONLY)); if (file < 0) { PLOG(WARNING) << "cannot open " << diskstats_path_; return false; } nchars = HANDLE_EINTR(read(file, line, sizeof(line))); if (nchars < 0) { PLOG(WARNING) << "cannot read from " << diskstats_path_; return false; } else { LOG_IF(WARNING, nchars == sizeof(line)) << "line too long in " << diskstats_path_; line[nchars] = '\0'; nitems = sscanf(line, "%*d %*d %" PRIu64 "d %*d %*d %*d %" PRIu64 "d", read_sectors, write_sectors); if (nitems == 2) { success = true; } else { LOG(WARNING) << "found " << nitems << " items in " << diskstats_path_ << ", expected 2"; } } IGNORE_EINTR(close(file)); return success; } bool MetricsDaemon::VmStatsParseStats(const char* stats, struct VmstatRecord* record) { // a mapping of string name to field in VmstatRecord and whether we found it struct mapping { const string name; uint64_t* value_p; bool found; } map[] = { { .name = "pgmajfault", .value_p = &record->page_faults_, .found = false }, { .name = "pswpin", .value_p = &record->swap_in_, .found = false }, { .name = "pswpout", .value_p = &record->swap_out_, .found = false }, }; // Each line in the file has the form // // for instance: // nr_free_pages 213427 vector lines; Tokenize(stats, "\n", &lines); for (vector::iterator it = lines.begin(); it != lines.end(); ++it) { vector tokens; base::SplitString(*it, ' ', &tokens); if (tokens.size() == 2) { for (unsigned int i = 0; i < sizeof(map)/sizeof(struct mapping); i++) { if (!tokens[0].compare(map[i].name)) { if (!base::StringToUint64(tokens[1], map[i].value_p)) return false; map[i].found = true; } } } else { LOG(WARNING) << "unexpected vmstat format"; } } // make sure we got all the stats for (unsigned i = 0; i < sizeof(map)/sizeof(struct mapping); i++) { if (map[i].found == false) { LOG(WARNING) << "vmstat missing " << map[i].name; return false; } } return true; } bool MetricsDaemon::VmStatsReadStats(struct VmstatRecord* stats) { string value_string; FilePath* path = new FilePath(vmstats_path_); if (!base::ReadFileToString(*path, &value_string)) { delete path; LOG(WARNING) << "cannot read " << vmstats_path_; return false; } delete path; return VmStatsParseStats(value_string.c_str(), stats); } bool MetricsDaemon::ReadFreqToInt(const string& sysfs_file_name, int* value) { const FilePath sysfs_path(sysfs_file_name); string value_string; if (!base::ReadFileToString(sysfs_path, &value_string)) { LOG(WARNING) << "cannot read " << sysfs_path.value().c_str(); return false; } if (!base::RemoveChars(value_string, "\n", &value_string)) { LOG(WARNING) << "no newline in " << value_string; // Continue even though the lack of newline is suspicious. } if (!base::StringToInt(value_string, value)) { LOG(WARNING) << "cannot convert " << value_string << " to int"; return false; } return true; } void MetricsDaemon::SendCpuThrottleMetrics() { // |max_freq| is 0 only the first time through. static int max_freq = 0; if (max_freq == -1) // Give up, as sysfs did not report max_freq correctly. return; if (max_freq == 0 || testing_) { // One-time initialization of max_freq. (Every time when testing.) if (!ReadFreqToInt(cpuinfo_max_freq_path_, &max_freq)) { max_freq = -1; return; } if (max_freq == 0) { LOG(WARNING) << "sysfs reports 0 max CPU frequency\n"; max_freq = -1; return; } if (max_freq % 10000 == 1000) { // Special case: system has turbo mode, and max non-turbo frequency is // max_freq - 1000. This relies on "normal" (non-turbo) frequencies // being multiples of (at least) 10 MHz. Although there is no guarantee // of this, it seems a fairly reasonable assumption. Otherwise we should // read scaling_available_frequencies, sort the frequencies, compare the // two highest ones, and check if they differ by 1000 (kHz) (and that's a // hack too, no telling when it will change). max_freq -= 1000; } } int scaled_freq = 0; if (!ReadFreqToInt(scaling_max_freq_path_, &scaled_freq)) return; // Frequencies are in kHz. If scaled_freq > max_freq, turbo is on, but // scaled_freq is not the actual turbo frequency. We indicate this situation // with a 101% value. int percent = scaled_freq > max_freq ? 101 : scaled_freq / (max_freq / 100); SendLinearSample(kMetricScaledCpuFrequencyName, percent, 101, 102); } // static gboolean MetricsDaemon::StatsCallbackStatic(void* handle) { (static_cast(handle))->StatsCallback(); return false; // one-time callback } // Collects disk and vm stats alternating over a short and a long interval. void MetricsDaemon::StatsCallback() { uint64_t read_sectors_now, write_sectors_now; struct VmstatRecord vmstats_now; double time_now = GetActiveTime(); double delta_time = time_now - stats_initial_time_; if (testing_) { // Fake the time when testing. delta_time = stats_state_ == kStatsShort ? kMetricStatsShortInterval : kMetricStatsLongInterval; } bool diskstats_success = DiskStatsReadStats(&read_sectors_now, &write_sectors_now); int delta_read = read_sectors_now - read_sectors_; int delta_write = write_sectors_now - write_sectors_; int read_sectors_per_second = delta_read / delta_time; int write_sectors_per_second = delta_write / delta_time; bool vmstats_success = VmStatsReadStats(&vmstats_now); uint64_t delta_faults = vmstats_now.page_faults_ - vmstats_.page_faults_; uint64_t delta_swap_in = vmstats_now.swap_in_ - vmstats_.swap_in_; uint64_t delta_swap_out = vmstats_now.swap_out_ - vmstats_.swap_out_; uint64_t page_faults_per_second = delta_faults / delta_time; uint64_t swap_in_per_second = delta_swap_in / delta_time; uint64_t swap_out_per_second = delta_swap_out / delta_time; switch (stats_state_) { case kStatsShort: if (diskstats_success) { SendSample(kMetricReadSectorsShortName, read_sectors_per_second, 1, kMetricSectorsIOMax, kMetricSectorsBuckets); SendSample(kMetricWriteSectorsShortName, write_sectors_per_second, 1, kMetricSectorsIOMax, kMetricSectorsBuckets); } if (vmstats_success) { SendSample(kMetricPageFaultsShortName, page_faults_per_second, 1, kMetricPageFaultsMax, kMetricPageFaultsBuckets); SendSample(kMetricSwapInShortName, swap_in_per_second, 1, kMetricPageFaultsMax, kMetricPageFaultsBuckets); SendSample(kMetricSwapOutShortName, swap_out_per_second, 1, kMetricPageFaultsMax, kMetricPageFaultsBuckets); } // Schedule long callback. stats_state_ = kStatsLong; ScheduleStatsCallback(kMetricStatsLongInterval - kMetricStatsShortInterval); break; case kStatsLong: if (diskstats_success) { SendSample(kMetricReadSectorsLongName, read_sectors_per_second, 1, kMetricSectorsIOMax, kMetricSectorsBuckets); SendSample(kMetricWriteSectorsLongName, write_sectors_per_second, 1, kMetricSectorsIOMax, kMetricSectorsBuckets); // Reset sector counters. read_sectors_ = read_sectors_now; write_sectors_ = write_sectors_now; } if (vmstats_success) { SendSample(kMetricPageFaultsLongName, page_faults_per_second, 1, kMetricPageFaultsMax, kMetricPageFaultsBuckets); SendSample(kMetricSwapInLongName, swap_in_per_second, 1, kMetricPageFaultsMax, kMetricPageFaultsBuckets); SendSample(kMetricSwapOutLongName, swap_out_per_second, 1, kMetricPageFaultsMax, kMetricPageFaultsBuckets); vmstats_ = vmstats_now; } SendCpuThrottleMetrics(); // Set start time for new cycle. stats_initial_time_ = time_now; // Schedule short callback. stats_state_ = kStatsShort; ScheduleStatsCallback(kMetricStatsShortInterval); break; default: LOG(FATAL) << "Invalid stats state"; } } void MetricsDaemon::ScheduleMeminfoCallback(int wait) { if (testing_) { return; } g_timeout_add_seconds(wait, MeminfoCallbackStatic, this); } // static gboolean MetricsDaemon::MeminfoCallbackStatic(void* handle) { return (static_cast(handle))->MeminfoCallback(); } bool MetricsDaemon::MeminfoCallback() { string meminfo_raw; const FilePath meminfo_path("/proc/meminfo"); if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) { LOG(WARNING) << "cannot read " << meminfo_path.value().c_str(); return false; } // Make both calls even if the first one fails. bool success = ProcessMeminfo(meminfo_raw); return ReportZram(base::FilePath(FILE_PATH_LITERAL("/sys/block/zram0"))) && success; } // static bool MetricsDaemon::ReadFileToUint64(const base::FilePath& path, uint64_t* value) { std::string content; if (!base::ReadFileToString(path, &content)) { PLOG(WARNING) << "cannot read " << path.MaybeAsASCII(); return false; } // Remove final newline. base::TrimWhitespaceASCII(content, base::TRIM_TRAILING, &content); if (!base::StringToUint64(content, value)) { LOG(WARNING) << "invalid integer: " << content; return false; } return true; } bool MetricsDaemon::ReportZram(const base::FilePath& zram_dir) { // Data sizes are in bytes. |zero_pages| is in number of pages. uint64_t compr_data_size, orig_data_size, zero_pages; const size_t page_size = 4096; if (!ReadFileToUint64(zram_dir.Append(kComprDataSizeName), &compr_data_size) || !ReadFileToUint64(zram_dir.Append(kOrigDataSizeName), &orig_data_size) || !ReadFileToUint64(zram_dir.Append(kZeroPagesName), &zero_pages)) { return false; } // |orig_data_size| does not include zero-filled pages. orig_data_size += zero_pages * page_size; const int compr_data_size_mb = compr_data_size >> 20; const int savings_mb = (orig_data_size - compr_data_size) >> 20; const int zero_ratio_percent = zero_pages * page_size * 100 / orig_data_size; // Report compressed size in megabytes. 100 MB or less has little impact. SendSample("Platform.ZramCompressedSize", compr_data_size_mb, 100, 4000, 50); SendSample("Platform.ZramSavings", savings_mb, 100, 4000, 50); // The compression ratio is multiplied by 100 for better resolution. The // ratios of interest are between 1 and 6 (100% and 600% as reported). We // don't want samples when very little memory is being compressed. if (compr_data_size_mb >= 1) { SendSample("Platform.ZramCompressionRatioPercent", orig_data_size * 100 / compr_data_size, 100, 600, 50); } // The values of interest for zero_pages are between 1MB and 1GB. The units // are number of pages. SendSample("Platform.ZramZeroPages", zero_pages, 256, 256 * 1024, 50); SendSample("Platform.ZramZeroRatioPercent", zero_ratio_percent, 1, 50, 50); return true; } bool MetricsDaemon::ProcessMeminfo(const string& meminfo_raw) { static const MeminfoRecord fields_array[] = { { "MemTotal", "MemTotal" }, // SPECIAL CASE: total system memory { "MemFree", "MemFree" }, { "Buffers", "Buffers" }, { "Cached", "Cached" }, // { "SwapCached", "SwapCached" }, { "Active", "Active" }, { "Inactive", "Inactive" }, { "ActiveAnon", "Active(anon)" }, { "InactiveAnon", "Inactive(anon)" }, { "ActiveFile" , "Active(file)" }, { "InactiveFile", "Inactive(file)" }, { "Unevictable", "Unevictable", kMeminfoOp_HistLog }, // { "Mlocked", "Mlocked" }, { "SwapTotal", "SwapTotal", kMeminfoOp_SwapTotal }, { "SwapFree", "SwapFree", kMeminfoOp_SwapFree }, // { "Dirty", "Dirty" }, // { "Writeback", "Writeback" }, { "AnonPages", "AnonPages" }, { "Mapped", "Mapped" }, { "Shmem", "Shmem", kMeminfoOp_HistLog }, { "Slab", "Slab", kMeminfoOp_HistLog }, // { "SReclaimable", "SReclaimable" }, // { "SUnreclaim", "SUnreclaim" }, }; vector fields(fields_array, fields_array + arraysize(fields_array)); if (!FillMeminfo(meminfo_raw, &fields)) { return false; } int total_memory = fields[0].value; if (total_memory == 0) { // this "cannot happen" LOG(WARNING) << "borked meminfo parser"; return false; } int swap_total = 0; int swap_free = 0; // Send all fields retrieved, except total memory. for (unsigned int i = 1; i < fields.size(); i++) { string metrics_name = base::StringPrintf("Platform.Meminfo%s", fields[i].name); int percent; switch (fields[i].op) { case kMeminfoOp_HistPercent: // report value as percent of total memory percent = fields[i].value * 100 / total_memory; SendLinearSample(metrics_name, percent, 100, 101); break; case kMeminfoOp_HistLog: // report value in kbytes, log scale, 4Gb max SendSample(metrics_name, fields[i].value, 1, 4 * 1000 * 1000, 100); break; case kMeminfoOp_SwapTotal: swap_total = fields[i].value; case kMeminfoOp_SwapFree: swap_free = fields[i].value; break; } } if (swap_total > 0) { int swap_used = swap_total - swap_free; int swap_used_percent = swap_used * 100 / swap_total; SendSample("Platform.MeminfoSwapUsed", swap_used, 1, 8 * 1000 * 1000, 100); SendLinearSample("Platform.MeminfoSwapUsedPercent", swap_used_percent, 100, 101); } return true; } bool MetricsDaemon::FillMeminfo(const string& meminfo_raw, vector* fields) { vector lines; unsigned int nlines = Tokenize(meminfo_raw, "\n", &lines); // Scan meminfo output and collect field values. Each field name has to // match a meminfo entry (case insensitive) after removing non-alpha // characters from the entry. unsigned int ifield = 0; for (unsigned int iline = 0; iline < nlines && ifield < fields->size(); iline++) { vector tokens; Tokenize(lines[iline], ": ", &tokens); if (strcmp((*fields)[ifield].match, tokens[0].c_str()) == 0) { // Name matches. Parse value and save. char* rest; (*fields)[ifield].value = static_cast(strtol(tokens[1].c_str(), &rest, 10)); if (*rest != '\0') { LOG(WARNING) << "missing meminfo value"; return false; } ifield++; } } if (ifield < fields->size()) { // End of input reached while scanning. LOG(WARNING) << "cannot find field " << (*fields)[ifield].match << " and following"; return false; } return true; } void MetricsDaemon::ScheduleMemuseCallback(double interval) { if (testing_) { return; } g_timeout_add_seconds(interval, MemuseCallbackStatic, this); } // static gboolean MetricsDaemon::MemuseCallbackStatic(void* handle) { MetricsDaemon* daemon = static_cast(handle); daemon->MemuseCallback(); return false; } void MetricsDaemon::MemuseCallback() { // Since we only care about active time (i.e. uptime minus sleep time) but // the callbacks are driven by real time (uptime), we check if we should // reschedule this callback due to intervening sleep periods. double now = GetActiveTime(); // Avoid intervals of less than one second. double remaining_time = ceil(memuse_final_time_ - now); if (remaining_time > 0) { ScheduleMemuseCallback(remaining_time); } else { // Report stats and advance the measurement interval unless there are // errors or we've completed the last interval. if (MemuseCallbackWork() && memuse_interval_index_ < arraysize(kMemuseIntervals)) { double interval = kMemuseIntervals[memuse_interval_index_++]; memuse_final_time_ = now + interval; ScheduleMemuseCallback(interval); } } } bool MetricsDaemon::MemuseCallbackWork() { string meminfo_raw; const FilePath meminfo_path("/proc/meminfo"); if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) { LOG(WARNING) << "cannot read " << meminfo_path.value().c_str(); return false; } return ProcessMemuse(meminfo_raw); } bool MetricsDaemon::ProcessMemuse(const string& meminfo_raw) { static const MeminfoRecord fields_array[] = { { "MemTotal", "MemTotal" }, // SPECIAL CASE: total system memory { "ActiveAnon", "Active(anon)" }, { "InactiveAnon", "Inactive(anon)" }, }; vector fields(fields_array, fields_array + arraysize(fields_array)); if (!FillMeminfo(meminfo_raw, &fields)) { return false; } int total = fields[0].value; int active_anon = fields[1].value; int inactive_anon = fields[2].value; if (total == 0) { // this "cannot happen" LOG(WARNING) << "borked meminfo parser"; return false; } string metrics_name = base::StringPrintf("Platform.MemuseAnon%d", memuse_interval_index_); SendLinearSample(metrics_name, (active_anon + inactive_anon) * 100 / total, 100, 101); return true; } void MetricsDaemon::ReportDailyUse(int use_seconds) { if (use_seconds <= 0) return; int minutes = (use_seconds + kSecondsPerMinute / 2) / kSecondsPerMinute; SendSample("Logging.DailyUseTime", minutes, 1, kMinutesPerDay * 30 * 2, // cumulative---two months worth 50); } void MetricsDaemon::SendSample(const string& name, int sample, int min, int max, int nbuckets) { metrics_lib_->SendToUMA(name, sample, min, max, nbuckets); } void MetricsDaemon::SendKernelCrashesCumulativeCountStats() { // Report the number of crashes for this OS version, but don't clear the // counter. It is cleared elsewhere on version change. int64_t crashes_count = kernel_crashes_version_count_->Get(); SendSample(kernel_crashes_version_count_->Name(), crashes_count, 1, // value of first bucket 500, // value of last bucket 100); // number of buckets int64_t cpu_use_ms = version_cumulative_cpu_use_->Get(); SendSample(version_cumulative_cpu_use_->Name(), cpu_use_ms / 1000, // stat is in seconds 1, // device may be used very little... 8 * 1000 * 1000, // ... or a lot (a little over 90 days) 100); // On the first run after an autoupdate, cpu_use_ms and active_use_seconds // can be zero. Avoid division by zero. if (cpu_use_ms > 0) { // Send the crash frequency since update in number of crashes per CPU year. SendSample("Logging.KernelCrashesPerCpuYear", crashes_count * kSecondsPerDay * 365 * 1000 / cpu_use_ms, 1, 1000 * 1000, // about one crash every 30s of CPU time 100); } int64_t active_use_seconds = version_cumulative_active_use_->Get(); if (active_use_seconds > 0) { SendSample(version_cumulative_active_use_->Name(), active_use_seconds / 1000, // stat is in seconds 1, // device may be used very little... 8 * 1000 * 1000, // ... or a lot (about 90 days) 100); // Same as above, but per year of active time. SendSample("Logging.KernelCrashesPerActiveYear", crashes_count * kSecondsPerDay * 365 / active_use_seconds, 1, 1000 * 1000, // about one crash every 30s of active time 100); } } void MetricsDaemon::SendDailyUseSample( const scoped_ptr& use) { SendSample(use->Name(), use->GetAndClear(), 1, // value of first bucket kSecondsPerDay, // value of last bucket 50); // number of buckets } void MetricsDaemon::SendCrashIntervalSample( const scoped_ptr& interval) { SendSample(interval->Name(), interval->GetAndClear(), 1, // value of first bucket 4 * kSecondsPerWeek, // value of last bucket 50); // number of buckets } void MetricsDaemon::SendCrashFrequencySample( const scoped_ptr& frequency) { SendSample(frequency->Name(), frequency->GetAndClear(), 1, // value of first bucket 100, // value of last bucket 50); // number of buckets } void MetricsDaemon::SendLinearSample(const string& name, int sample, int max, int nbuckets) { // TODO(semenzato): add a proper linear histogram to the Chrome external // metrics API. LOG_IF(FATAL, nbuckets != max + 1) << "unsupported histogram scale"; metrics_lib_->SendEnumToUMA(name, sample, max); } void MetricsDaemon::UpdateStats(TimeTicks now_ticks, Time now_wall_time) { const int elapsed_seconds = (now_ticks - last_update_stats_time_).InSeconds(); daily_active_use_->Add(elapsed_seconds); version_cumulative_active_use_->Add(elapsed_seconds); user_crash_interval_->Add(elapsed_seconds); kernel_crash_interval_->Add(elapsed_seconds); version_cumulative_cpu_use_->Add(GetIncrementalCpuUse().InMilliseconds()); last_update_stats_time_ = now_ticks; const TimeDelta since_epoch = now_wall_time - Time::UnixEpoch(); const int day = since_epoch.InDays(); const int week = day / 7; if (daily_cycle_->Get() != day) { daily_cycle_->Set(day); SendDailyUseSample(daily_active_use_); SendDailyUseSample(version_cumulative_active_use_); SendCrashFrequencySample(any_crashes_daily_count_); SendCrashFrequencySample(user_crashes_daily_count_); SendCrashFrequencySample(kernel_crashes_daily_count_); SendCrashFrequencySample(unclean_shutdowns_daily_count_); SendKernelCrashesCumulativeCountStats(); } if (weekly_cycle_->Get() != week) { weekly_cycle_->Set(week); SendCrashFrequencySample(any_crashes_weekly_count_); SendCrashFrequencySample(user_crashes_weekly_count_); SendCrashFrequencySample(kernel_crashes_weekly_count_); SendCrashFrequencySample(unclean_shutdowns_weekly_count_); } } // static gboolean MetricsDaemon::HandleUpdateStatsTimeout(gpointer data) { static_cast(data)->UpdateStats(TimeTicks::Now(), Time::Now()); return TRUE; }