245 lines
9.3 KiB
C
245 lines
9.3 KiB
C
|
/*
|
||
|
* time_in_state eBPF program
|
||
|
*
|
||
|
* Copyright (C) 2018 Google
|
||
|
*
|
||
|
* This program is free software; you can redistribute it and/or
|
||
|
* modify it under the terms of the GNU General Public License version
|
||
|
* 2 as published by the Free Software Foundation.
|
||
|
*
|
||
|
* This program is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
#include <bpf_helpers.h>
|
||
|
#include <bpf_timeinstate.h>
|
||
|
|
||
|
DEFINE_BPF_MAP_GRW(total_time_in_state_map, PERCPU_ARRAY, uint32_t, uint64_t, MAX_FREQS_FOR_TOTAL,
|
||
|
AID_SYSTEM)
|
||
|
|
||
|
DEFINE_BPF_MAP_GRW(uid_time_in_state_map, PERCPU_HASH, time_key_t, tis_val_t, 1024, AID_SYSTEM)
|
||
|
|
||
|
DEFINE_BPF_MAP_GRW(uid_concurrent_times_map, PERCPU_HASH, time_key_t, concurrent_val_t, 1024, AID_SYSTEM)
|
||
|
DEFINE_BPF_MAP_GRW(uid_last_update_map, HASH, uint32_t, uint64_t, 1024, AID_SYSTEM)
|
||
|
|
||
|
DEFINE_BPF_MAP_GWO(cpu_last_update_map, PERCPU_ARRAY, uint32_t, uint64_t, 1, AID_SYSTEM)
|
||
|
|
||
|
DEFINE_BPF_MAP_GWO(cpu_policy_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
|
||
|
DEFINE_BPF_MAP_GWO(policy_freq_idx_map, ARRAY, uint32_t, uint8_t, 1024, AID_SYSTEM)
|
||
|
|
||
|
DEFINE_BPF_MAP_GWO(freq_to_idx_map, HASH, freq_idx_key_t, uint8_t, 2048, AID_SYSTEM)
|
||
|
|
||
|
DEFINE_BPF_MAP_GWO(nr_active_map, ARRAY, uint32_t, uint32_t, 1, AID_SYSTEM)
|
||
|
DEFINE_BPF_MAP_GWO(policy_nr_active_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM)
|
||
|
|
||
|
DEFINE_BPF_MAP_GWO(pid_tracked_hash_map, HASH, uint32_t, pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
|
||
|
DEFINE_BPF_MAP_GWO(pid_tracked_map, ARRAY, uint32_t, tracked_pid_t, MAX_TRACKED_PIDS, AID_SYSTEM)
|
||
|
DEFINE_BPF_MAP_GWO(pid_task_aggregation_map, HASH, pid_t, uint16_t, 1024, AID_SYSTEM)
|
||
|
DEFINE_BPF_MAP_GRO(pid_time_in_state_map, PERCPU_HASH, aggregated_task_tis_key_t, tis_val_t, 1024,
|
||
|
AID_SYSTEM)
|
||
|
|
||
|
struct switch_args {
|
||
|
unsigned long long ignore;
|
||
|
char prev_comm[16];
|
||
|
int prev_pid;
|
||
|
int prev_prio;
|
||
|
long long prev_state;
|
||
|
char next_comm[16];
|
||
|
int next_pid;
|
||
|
int next_prio;
|
||
|
};
|
||
|
|
||
|
DEFINE_BPF_PROG("tracepoint/sched/sched_switch", AID_ROOT, AID_SYSTEM, tp_sched_switch)
|
||
|
(struct switch_args* args) {
|
||
|
const int ALLOW = 1; // return 1 to avoid blocking simpleperf from receiving events.
|
||
|
uint32_t zero = 0;
|
||
|
uint64_t* last = bpf_cpu_last_update_map_lookup_elem(&zero);
|
||
|
if (!last) return ALLOW;
|
||
|
uint64_t old_last = *last;
|
||
|
uint64_t time = bpf_ktime_get_ns();
|
||
|
*last = time;
|
||
|
|
||
|
uint32_t* active = bpf_nr_active_map_lookup_elem(&zero);
|
||
|
if (!active) return ALLOW;
|
||
|
|
||
|
uint32_t cpu = bpf_get_smp_processor_id();
|
||
|
uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
|
||
|
if (!policyp) return ALLOW;
|
||
|
uint32_t policy = *policyp;
|
||
|
|
||
|
uint32_t* policy_active = bpf_policy_nr_active_map_lookup_elem(&policy);
|
||
|
if (!policy_active) return ALLOW;
|
||
|
|
||
|
uint32_t nactive = *active - 1;
|
||
|
uint32_t policy_nactive = *policy_active - 1;
|
||
|
|
||
|
if (!args->prev_pid || (!old_last && args->next_pid)) {
|
||
|
__sync_fetch_and_add(active, 1);
|
||
|
__sync_fetch_and_add(policy_active, 1);
|
||
|
}
|
||
|
|
||
|
// Return here in 2 scenarios:
|
||
|
// 1) prev_pid == 0, so we're exiting idle. No UID stats need updating, and active CPUs can't be
|
||
|
// decreasing.
|
||
|
// 2) old_last == 0, so this is the first time we've seen this CPU. Any delta will be invalid,
|
||
|
// and our active CPU counts don't include this CPU yet so we shouldn't decrement them even
|
||
|
// if we're going idle.
|
||
|
if (!args->prev_pid || !old_last) return ALLOW;
|
||
|
|
||
|
if (!args->next_pid) {
|
||
|
__sync_fetch_and_add(active, -1);
|
||
|
__sync_fetch_and_add(policy_active, -1);
|
||
|
}
|
||
|
|
||
|
uint8_t* freq_idxp = bpf_policy_freq_idx_map_lookup_elem(&policy);
|
||
|
if (!freq_idxp || !*freq_idxp) return ALLOW;
|
||
|
// freq_to_idx_map uses 1 as its minimum index so that *freq_idxp == 0 only when uninitialized
|
||
|
uint8_t freq_idx = *freq_idxp - 1;
|
||
|
|
||
|
uint32_t uid = bpf_get_current_uid_gid();
|
||
|
time_key_t key = {.uid = uid, .bucket = freq_idx / FREQS_PER_ENTRY};
|
||
|
tis_val_t* val = bpf_uid_time_in_state_map_lookup_elem(&key);
|
||
|
if (!val) {
|
||
|
tis_val_t zero_val = {.ar = {0}};
|
||
|
bpf_uid_time_in_state_map_update_elem(&key, &zero_val, BPF_NOEXIST);
|
||
|
val = bpf_uid_time_in_state_map_lookup_elem(&key);
|
||
|
}
|
||
|
uint64_t delta = time - old_last;
|
||
|
if (val) val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
|
||
|
|
||
|
// Add delta to total.
|
||
|
const uint32_t total_freq_idx = freq_idx < MAX_FREQS_FOR_TOTAL ? freq_idx :
|
||
|
MAX_FREQS_FOR_TOTAL - 1;
|
||
|
uint64_t* total = bpf_total_time_in_state_map_lookup_elem(&total_freq_idx);
|
||
|
if (total) *total += delta;
|
||
|
|
||
|
const int pid = args->prev_pid;
|
||
|
const pid_t tgid = bpf_get_current_pid_tgid() >> 32;
|
||
|
bool is_tgid_tracked = false;
|
||
|
|
||
|
// eBPF verifier does not currently allow loops.
|
||
|
// Instruct the C compiler to unroll the loop into a series of steps.
|
||
|
#pragma unroll
|
||
|
for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
|
||
|
const uint32_t key = index;
|
||
|
tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
|
||
|
if (!tracked_pid) continue;
|
||
|
if (tracked_pid->state == TRACKED_PID_STATE_UNUSED) {
|
||
|
// Reached the end of the list
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE && tracked_pid->pid == tgid) {
|
||
|
is_tgid_tracked = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (is_tgid_tracked) {
|
||
|
// If this process is marked for time-in-state tracking, aggregate the CPU time-in-state
|
||
|
// with other threads sharing the same TGID and aggregation key.
|
||
|
uint16_t* aggregation_key = bpf_pid_task_aggregation_map_lookup_elem(&pid);
|
||
|
aggregated_task_tis_key_t task_key = {
|
||
|
.tgid = tgid,
|
||
|
.aggregation_key = aggregation_key ? *aggregation_key : 0,
|
||
|
.bucket = freq_idx / FREQS_PER_ENTRY};
|
||
|
tis_val_t* task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
|
||
|
if (!task_val) {
|
||
|
tis_val_t zero_val = {.ar = {0}};
|
||
|
bpf_pid_time_in_state_map_update_elem(&task_key, &zero_val, BPF_NOEXIST);
|
||
|
task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key);
|
||
|
}
|
||
|
if (task_val) task_val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
|
||
|
}
|
||
|
key.bucket = nactive / CPUS_PER_ENTRY;
|
||
|
concurrent_val_t* ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
|
||
|
if (!ct) {
|
||
|
concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
|
||
|
bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
|
||
|
ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
|
||
|
}
|
||
|
if (ct) ct->active[nactive % CPUS_PER_ENTRY] += delta;
|
||
|
|
||
|
if (policy_nactive / CPUS_PER_ENTRY != key.bucket) {
|
||
|
key.bucket = policy_nactive / CPUS_PER_ENTRY;
|
||
|
ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
|
||
|
if (!ct) {
|
||
|
concurrent_val_t zero_val = {.active = {0}, .policy = {0}};
|
||
|
bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
|
||
|
ct = bpf_uid_concurrent_times_map_lookup_elem(&key);
|
||
|
}
|
||
|
}
|
||
|
if (ct) ct->policy[policy_nactive % CPUS_PER_ENTRY] += delta;
|
||
|
uint64_t* uid_last_update = bpf_uid_last_update_map_lookup_elem(&uid);
|
||
|
if (uid_last_update) {
|
||
|
*uid_last_update = time;
|
||
|
} else {
|
||
|
bpf_uid_last_update_map_update_elem(&uid, &time, BPF_NOEXIST);
|
||
|
}
|
||
|
return ALLOW;
|
||
|
}
|
||
|
|
||
|
struct cpufreq_args {
|
||
|
unsigned long long ignore;
|
||
|
unsigned int state;
|
||
|
unsigned int cpu_id;
|
||
|
};
|
||
|
|
||
|
DEFINE_BPF_PROG("tracepoint/power/cpu_frequency", AID_ROOT, AID_SYSTEM, tp_cpufreq)
|
||
|
(struct cpufreq_args* args) {
|
||
|
uint32_t cpu = args->cpu_id;
|
||
|
unsigned int new = args->state;
|
||
|
uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
|
||
|
if (!policyp) return 0;
|
||
|
uint32_t policy = *policyp;
|
||
|
freq_idx_key_t key = {.policy = policy, .freq = new};
|
||
|
uint8_t* idxp = bpf_freq_to_idx_map_lookup_elem(&key);
|
||
|
if (!idxp) return 0;
|
||
|
uint8_t idx = *idxp;
|
||
|
bpf_policy_freq_idx_map_update_elem(&policy, &idx, BPF_ANY);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// The format of the sched/sched_process_free event is described in
|
||
|
// adb shell cat /d/tracing/events/sched/sched_process_free/format
|
||
|
struct sched_process_free_args {
|
||
|
unsigned long long ignore;
|
||
|
char comm[16];
|
||
|
pid_t pid;
|
||
|
int prio;
|
||
|
};
|
||
|
|
||
|
DEFINE_BPF_PROG("tracepoint/sched/sched_process_free", AID_ROOT, AID_SYSTEM, tp_sched_process_free)
|
||
|
(struct sched_process_free_args* args) {
|
||
|
const int ALLOW = 1;
|
||
|
|
||
|
int pid = args->pid;
|
||
|
bool is_last = true;
|
||
|
|
||
|
// eBPF verifier does not currently allow loops.
|
||
|
// Instruct the C compiler to unroll the loop into a series of steps.
|
||
|
#pragma unroll
|
||
|
for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) {
|
||
|
const uint32_t key = MAX_TRACKED_PIDS - index - 1;
|
||
|
tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key);
|
||
|
if (!tracked_pid) continue;
|
||
|
if (tracked_pid->pid == pid) {
|
||
|
tracked_pid->pid = 0;
|
||
|
tracked_pid->state = is_last ? TRACKED_PID_STATE_UNUSED : TRACKED_PID_STATE_EXITED;
|
||
|
bpf_pid_tracked_hash_map_delete_elem(&key);
|
||
|
break;
|
||
|
}
|
||
|
if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE) {
|
||
|
is_last = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bpf_pid_task_aggregation_map_delete_elem(&pid);
|
||
|
return ALLOW;
|
||
|
}
|
||
|
|
||
|
LICENSE("GPL");
|