mirror of https://gitee.com/openkylin/linux.git
bpf: Separate bpf_get_[stack|stackid] for perf events BPF
Calling get_perf_callchain() on perf_events from PEBS entries may cause unwinder errors. To fix this issue, the callchain is fetched early. Such perf_events are marked with __PERF_SAMPLE_CALLCHAIN_EARLY. Similarly, calling bpf_get_[stack|stackid] on perf_events from PEBS may also cause unwinder errors. To fix this, add separate version of these two helpers, bpf_get_[stack|stackid]_pe. These two hepers use callchain in bpf_perf_event_data_kern->data->callchain. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200723180648.1429892-2-songliubraving@fb.com
This commit is contained in:
parent
909e446b32
commit
7b04d6d60f
|
@ -1675,6 +1675,8 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
|
|||
extern const struct bpf_func_proto bpf_get_stackid_proto;
|
||||
extern const struct bpf_func_proto bpf_get_stack_proto;
|
||||
extern const struct bpf_func_proto bpf_get_task_stack_proto;
|
||||
extern const struct bpf_func_proto bpf_get_stackid_proto_pe;
|
||||
extern const struct bpf_func_proto bpf_get_stack_proto_pe;
|
||||
extern const struct bpf_func_proto bpf_sock_map_update_proto;
|
||||
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
|
||||
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <linux/bpf.h>
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/elf.h>
|
||||
|
@ -387,11 +388,10 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
|
|||
#endif
|
||||
}
|
||||
|
||||
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
||||
u64, flags)
|
||||
static long __bpf_get_stackid(struct bpf_map *map,
|
||||
struct perf_callchain_entry *trace, u64 flags)
|
||||
{
|
||||
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
|
||||
struct perf_callchain_entry *trace;
|
||||
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
|
||||
u32 max_depth = map->value_size / stack_map_data_size(map);
|
||||
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
|
||||
|
@ -399,21 +399,9 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
|||
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||
u32 hash, id, trace_nr, trace_len;
|
||||
bool user = flags & BPF_F_USER_STACK;
|
||||
bool kernel = !user;
|
||||
u64 *ips;
|
||||
bool hash_matches;
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
|
||||
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
|
||||
return -EINVAL;
|
||||
|
||||
trace = get_perf_callchain(regs, init_nr, kernel, user,
|
||||
sysctl_perf_event_max_stack, false, false);
|
||||
|
||||
if (unlikely(!trace))
|
||||
/* couldn't fetch the stack trace */
|
||||
return -EFAULT;
|
||||
|
||||
/* get_perf_callchain() guarantees that trace->nr >= init_nr
|
||||
* and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
|
||||
*/
|
||||
|
@ -478,6 +466,30 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
|||
return id;
|
||||
}
|
||||
|
||||
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
||||
u64, flags)
|
||||
{
|
||||
u32 max_depth = map->value_size / stack_map_data_size(map);
|
||||
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
|
||||
u32 init_nr = sysctl_perf_event_max_stack - max_depth;
|
||||
bool user = flags & BPF_F_USER_STACK;
|
||||
struct perf_callchain_entry *trace;
|
||||
bool kernel = !user;
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
|
||||
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
|
||||
return -EINVAL;
|
||||
|
||||
trace = get_perf_callchain(regs, init_nr, kernel, user,
|
||||
sysctl_perf_event_max_stack, false, false);
|
||||
|
||||
if (unlikely(!trace))
|
||||
/* couldn't fetch the stack trace */
|
||||
return -EFAULT;
|
||||
|
||||
return __bpf_get_stackid(map, trace, flags);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_stackid_proto = {
|
||||
.func = bpf_get_stackid,
|
||||
.gpl_only = true,
|
||||
|
@ -487,7 +499,77 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
|
|||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static __u64 count_kernel_ip(struct perf_callchain_entry *trace)
|
||||
{
|
||||
__u64 nr_kernel = 0;
|
||||
|
||||
while (nr_kernel < trace->nr) {
|
||||
if (trace->ip[nr_kernel] == PERF_CONTEXT_USER)
|
||||
break;
|
||||
nr_kernel++;
|
||||
}
|
||||
return nr_kernel;
|
||||
}
|
||||
|
||||
BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
|
||||
struct bpf_map *, map, u64, flags)
|
||||
{
|
||||
struct perf_event *event = ctx->event;
|
||||
struct perf_callchain_entry *trace;
|
||||
bool kernel, user;
|
||||
__u64 nr_kernel;
|
||||
int ret;
|
||||
|
||||
/* perf_sample_data doesn't have callchain, use bpf_get_stackid */
|
||||
if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
|
||||
return bpf_get_stackid((unsigned long)(ctx->regs),
|
||||
(unsigned long) map, flags, 0, 0);
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
|
||||
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
|
||||
return -EINVAL;
|
||||
|
||||
user = flags & BPF_F_USER_STACK;
|
||||
kernel = !user;
|
||||
|
||||
trace = ctx->data->callchain;
|
||||
if (unlikely(!trace))
|
||||
return -EFAULT;
|
||||
|
||||
nr_kernel = count_kernel_ip(trace);
|
||||
|
||||
if (kernel) {
|
||||
__u64 nr = trace->nr;
|
||||
|
||||
trace->nr = nr_kernel;
|
||||
ret = __bpf_get_stackid(map, trace, flags);
|
||||
|
||||
/* restore nr */
|
||||
trace->nr = nr;
|
||||
} else { /* user */
|
||||
u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||
|
||||
skip += nr_kernel;
|
||||
if (skip > BPF_F_SKIP_FIELD_MASK)
|
||||
return -EFAULT;
|
||||
|
||||
flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
|
||||
ret = __bpf_get_stackid(map, trace, flags);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_stackid_proto_pe = {
|
||||
.func = bpf_get_stackid_pe,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_CONST_MAP_PTR,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
|
||||
struct perf_callchain_entry *trace_in,
|
||||
void *buf, u32 size, u64 flags)
|
||||
{
|
||||
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
|
||||
|
@ -520,7 +602,9 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
|
|||
else
|
||||
init_nr = sysctl_perf_event_max_stack - num_elem;
|
||||
|
||||
if (kernel && task)
|
||||
if (trace_in)
|
||||
trace = trace_in;
|
||||
else if (kernel && task)
|
||||
trace = get_callchain_entry_for_task(task, init_nr);
|
||||
else
|
||||
trace = get_perf_callchain(regs, init_nr, kernel, user,
|
||||
|
@ -556,7 +640,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
|
|||
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
|
||||
u64, flags)
|
||||
{
|
||||
return __bpf_get_stack(regs, NULL, buf, size, flags);
|
||||
return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_stack_proto = {
|
||||
|
@ -574,7 +658,7 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
|
|||
{
|
||||
struct pt_regs *regs = task_pt_regs(task);
|
||||
|
||||
return __bpf_get_stack(regs, task, buf, size, flags);
|
||||
return __bpf_get_stack(regs, task, NULL, buf, size, flags);
|
||||
}
|
||||
|
||||
BTF_ID_LIST(bpf_get_task_stack_btf_ids)
|
||||
|
@ -591,6 +675,70 @@ const struct bpf_func_proto bpf_get_task_stack_proto = {
|
|||
.btf_id = bpf_get_task_stack_btf_ids,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
|
||||
void *, buf, u32, size, u64, flags)
|
||||
{
|
||||
struct perf_event *event = ctx->event;
|
||||
struct perf_callchain_entry *trace;
|
||||
bool kernel, user;
|
||||
int err = -EINVAL;
|
||||
__u64 nr_kernel;
|
||||
|
||||
if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
|
||||
return __bpf_get_stack(ctx->regs, NULL, NULL, buf, size, flags);
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
|
||||
BPF_F_USER_BUILD_ID)))
|
||||
goto clear;
|
||||
|
||||
user = flags & BPF_F_USER_STACK;
|
||||
kernel = !user;
|
||||
|
||||
err = -EFAULT;
|
||||
trace = ctx->data->callchain;
|
||||
if (unlikely(!trace))
|
||||
goto clear;
|
||||
|
||||
nr_kernel = count_kernel_ip(trace);
|
||||
|
||||
if (kernel) {
|
||||
__u64 nr = trace->nr;
|
||||
|
||||
trace->nr = nr_kernel;
|
||||
err = __bpf_get_stack(ctx->regs, NULL, trace, buf,
|
||||
size, flags);
|
||||
|
||||
/* restore nr */
|
||||
trace->nr = nr;
|
||||
} else { /* user */
|
||||
u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||
|
||||
skip += nr_kernel;
|
||||
if (skip > BPF_F_SKIP_FIELD_MASK)
|
||||
goto clear;
|
||||
|
||||
flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
|
||||
err = __bpf_get_stack(ctx->regs, NULL, trace, buf,
|
||||
size, flags);
|
||||
}
|
||||
return err;
|
||||
|
||||
clear:
|
||||
memset(buf, 0, size);
|
||||
return err;
|
||||
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_stack_proto_pe = {
|
||||
.func = bpf_get_stack_pe,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
/* Called from eBPF program */
|
||||
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
|
|
|
@ -1411,9 +1411,9 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
case BPF_FUNC_perf_event_output:
|
||||
return &bpf_perf_event_output_proto_tp;
|
||||
case BPF_FUNC_get_stackid:
|
||||
return &bpf_get_stackid_proto_tp;
|
||||
return &bpf_get_stackid_proto_pe;
|
||||
case BPF_FUNC_get_stack:
|
||||
return &bpf_get_stack_proto_tp;
|
||||
return &bpf_get_stack_proto_pe;
|
||||
case BPF_FUNC_perf_prog_read_value:
|
||||
return &bpf_perf_prog_read_value_proto;
|
||||
case BPF_FUNC_read_branch_records:
|
||||
|
|
Loading…
Reference in New Issue