samples/bpf: offwaketime example

This is simplified version of Brendan Gregg's offwaketime:
This program shows kernel stack traces and task names that were blocked and
"off-CPU", along with the stack traces and task names for the threads that woke
them, and the total elapsed time from when they blocked to when they were woken
up. The combined stacks, task names, and total time is summarized in kernel
context for efficiency.

Example:
$ sudo ./offwaketime | flamegraph.pl > demo.svg
Open demo.svg in the browser as FlameGraph visualization.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Alexei Starovoitov 2016-02-17 19:58:59 -08:00 committed by David S. Miller
parent d5a3b1f691
commit a6ffe7b9df
4 changed files with 322 additions and 0 deletions

View File

@ -16,6 +16,7 @@ hostprogs-y += tracex5
hostprogs-y += tracex6
hostprogs-y += trace_output
hostprogs-y += lathist
hostprogs-y += offwaketime
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
@ -32,6 +33,7 @@ tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
lathist-objs := bpf_load.o libbpf.o lathist_user.o
offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@ -47,6 +49,7 @@ always += tracex6_kern.o
always += trace_output_kern.o
always += tcbpf1_kern.o
always += lathist_kern.o
always += offwaketime_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
@ -63,6 +66,7 @@ HOSTLOADLIBES_tracex5 += -lelf
HOSTLOADLIBES_tracex6 += -lelf
HOSTLOADLIBES_trace_output += -lelf -lrt
HOSTLOADLIBES_lathist += -lelf
HOSTLOADLIBES_offwaketime += -lelf
# point this to your LLVM backend with bpf support
LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc

View File

@ -39,6 +39,8 @@ static int (*bpf_redirect)(int ifindex, int flags) =
(void *) BPF_FUNC_redirect;
static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
(void *) BPF_FUNC_perf_event_output;
static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
(void *) BPF_FUNC_get_stackid;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions

View File

@ -0,0 +1,131 @@
/* Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
#include <uapi/linux/ptrace.h>
#include <uapi/linux/perf_event.h>
#include <linux/version.h>
#include <linux/sched.h>
#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
#define MINBLOCK_US 1
struct key_t {
char waker[TASK_COMM_LEN];
char target[TASK_COMM_LEN];
u32 wret;
u32 tret;
};
struct bpf_map_def SEC("maps") counts = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(struct key_t),
.value_size = sizeof(u64),
.max_entries = 10000,
};
struct bpf_map_def SEC("maps") start = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(u32),
.value_size = sizeof(u64),
.max_entries = 10000,
};
struct wokeby_t {
char name[TASK_COMM_LEN];
u32 ret;
};
struct bpf_map_def SEC("maps") wokeby = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(u32),
.value_size = sizeof(struct wokeby_t),
.max_entries = 10000,
};
struct bpf_map_def SEC("maps") stackmap = {
.type = BPF_MAP_TYPE_STACK_TRACE,
.key_size = sizeof(u32),
.value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
.max_entries = 10000,
};
#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
SEC("kprobe/try_to_wake_up")
int waker(struct pt_regs *ctx)
{
struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
struct wokeby_t woke = {};
u32 pid;
pid = _(p->pid);
bpf_get_current_comm(&woke.name, sizeof(woke.name));
woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
return 0;
}
static inline int update_counts(struct pt_regs *ctx, u32 pid, u64 delta)
{
struct key_t key = {};
struct wokeby_t *woke;
u64 zero = 0, *val;
bpf_get_current_comm(&key.target, sizeof(key.target));
key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
woke = bpf_map_lookup_elem(&wokeby, &pid);
if (woke) {
key.wret = woke->ret;
__builtin_memcpy(&key.waker, woke->name, TASK_COMM_LEN);
bpf_map_delete_elem(&wokeby, &pid);
}
val = bpf_map_lookup_elem(&counts, &key);
if (!val) {
bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
val = bpf_map_lookup_elem(&counts, &key);
if (!val)
return 0;
}
(*val) += delta;
return 0;
}
SEC("kprobe/finish_task_switch")
int oncpu(struct pt_regs *ctx)
{
struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
u64 delta, ts, *tsp;
u32 pid;
/* record previous thread sleep time */
pid = _(p->pid);
ts = bpf_ktime_get_ns();
bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
/* calculate current thread's delta time */
pid = bpf_get_current_pid_tgid();
tsp = bpf_map_lookup_elem(&start, &pid);
if (!tsp)
/* missed start or filtered */
return 0;
delta = bpf_ktime_get_ns() - *tsp;
bpf_map_delete_elem(&start, &pid);
delta = delta / 1000;
if (delta < MINBLOCK_US)
return 0;
return update_counts(ctx, pid, delta);
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;

View File

@ -0,0 +1,185 @@
/* Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
#include <linux/bpf.h>
#include <string.h>
#include <linux/perf_event.h>
#include <errno.h>
#include <assert.h>
#include <stdbool.h>
#include <sys/resource.h>
#include "libbpf.h"
#include "bpf_load.h"
#define MAX_SYMS 300000
#define PRINT_RAW_ADDR 0
static struct ksym {
long addr;
char *name;
} syms[MAX_SYMS];
static int sym_cnt;
static int ksym_cmp(const void *p1, const void *p2)
{
return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
}
static int load_kallsyms(void)
{
FILE *f = fopen("/proc/kallsyms", "r");
char func[256], buf[256];
char symbol;
void *addr;
int i = 0;
if (!f)
return -ENOENT;
while (!feof(f)) {
if (!fgets(buf, sizeof(buf), f))
break;
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
break;
if (!addr)
continue;
syms[i].addr = (long) addr;
syms[i].name = strdup(func);
i++;
}
sym_cnt = i;
qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
return 0;
}
static void *search(long key)
{
int start = 0, end = sym_cnt;
int result;
while (start < end) {
size_t mid = start + (end - start) / 2;
result = key - syms[mid].addr;
if (result < 0)
end = mid;
else if (result > 0)
start = mid + 1;
else
return &syms[mid];
}
if (start >= 1 && syms[start - 1].addr < key &&
key < syms[start].addr)
/* valid ksym */
return &syms[start - 1];
/* out of range. return _stext */
return &syms[0];
}
static void print_ksym(__u64 addr)
{
struct ksym *sym;
if (!addr)
return;
sym = search(addr);
if (PRINT_RAW_ADDR)
printf("%s/%llx;", sym->name, addr);
else
printf("%s;", sym->name);
}
#define TASK_COMM_LEN 16
struct key_t {
char waker[TASK_COMM_LEN];
char target[TASK_COMM_LEN];
__u32 wret;
__u32 tret;
};
static void print_stack(struct key_t *key, __u64 count)
{
__u64 ip[PERF_MAX_STACK_DEPTH] = {};
static bool warned;
int i;
printf("%s;", key->target);
if (bpf_lookup_elem(map_fd[3], &key->tret, ip) != 0) {
printf("---;");
} else {
for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
print_ksym(ip[i]);
}
printf("-;");
if (bpf_lookup_elem(map_fd[3], &key->wret, ip) != 0) {
printf("---;");
} else {
for (i = 0; i < PERF_MAX_STACK_DEPTH; i++)
print_ksym(ip[i]);
}
printf(";%s %lld\n", key->waker, count);
if ((key->tret == -EEXIST || key->wret == -EEXIST) && !warned) {
printf("stackmap collisions seen. Consider increasing size\n");
warned = true;
} else if (((int)(key->tret) < 0 || (int)(key->wret) < 0)) {
printf("err stackid %d %d\n", key->tret, key->wret);
}
}
static void print_stacks(int fd)
{
struct key_t key = {}, next_key;
__u64 value;
while (bpf_get_next_key(fd, &key, &next_key) == 0) {
bpf_lookup_elem(fd, &next_key, &value);
print_stack(&next_key, value);
key = next_key;
}
}
static void int_exit(int sig)
{
print_stacks(map_fd[0]);
exit(0);
}
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256];
int delay = 1;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
setrlimit(RLIMIT_MEMLOCK, &r);
signal(SIGINT, int_exit);
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 2;
}
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
if (argc > 1)
delay = atoi(argv[1]);
sleep(delay);
print_stacks(map_fd[0]);
return 0;
}