mirror of https://gitee.com/openkylin/linux.git
bpf: Add per-program recursion prevention mechanism
Since both sleepable and non-sleepable programs execute under migrate_disable add recursion prevention mechanism to both types of programs when they're executed via bpf trampoline. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20210210033634.62081-5-alexei.starovoitov@gmail.com
This commit is contained in:
parent
f2dd3b3946
commit
ca06f55b90
|
@ -1740,8 +1740,11 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
|
|||
struct bpf_prog *p, int stack_size, bool mod_ret)
|
||||
{
|
||||
u8 *prog = *pprog;
|
||||
u8 *jmp_insn;
|
||||
int cnt = 0;
|
||||
|
||||
/* arg1: mov rdi, progs[i] */
|
||||
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
|
||||
if (emit_call(&prog,
|
||||
p->aux->sleepable ? __bpf_prog_enter_sleepable :
|
||||
__bpf_prog_enter, prog))
|
||||
|
@ -1749,6 +1752,14 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
|
|||
/* remember prog start time returned by __bpf_prog_enter */
|
||||
emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
|
||||
|
||||
/* if (__bpf_prog_enter*(prog) == 0)
|
||||
* goto skip_exec_of_prog;
|
||||
*/
|
||||
EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
|
||||
/* emit 2 nops that will be replaced with JE insn */
|
||||
jmp_insn = prog;
|
||||
emit_nops(&prog, 2);
|
||||
|
||||
/* arg1: lea rdi, [rbp - stack_size] */
|
||||
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
|
||||
/* arg2: progs[i]->insnsi for interpreter */
|
||||
|
@ -1767,6 +1778,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
|
|||
if (mod_ret)
|
||||
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
|
||||
|
||||
/* replace 2 nops with JE insn, since jmp target is known */
|
||||
jmp_insn[0] = X86_JE;
|
||||
jmp_insn[1] = prog - jmp_insn - 2;
|
||||
|
||||
/* arg1: mov rdi, progs[i] */
|
||||
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
|
||||
/* arg2: mov rsi, rbx <- start time in nsec */
|
||||
|
|
|
@ -529,7 +529,7 @@ struct btf_func_model {
|
|||
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
|
||||
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
|
||||
*/
|
||||
#define BPF_MAX_TRAMP_PROGS 40
|
||||
#define BPF_MAX_TRAMP_PROGS 38
|
||||
|
||||
struct bpf_tramp_progs {
|
||||
struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS];
|
||||
|
@ -561,9 +561,9 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
|
|||
struct bpf_tramp_progs *tprogs,
|
||||
void *orig_call);
|
||||
/* these two functions are called from generated trampoline */
|
||||
u64 notrace __bpf_prog_enter(void);
|
||||
u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
|
||||
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
|
||||
u64 notrace __bpf_prog_enter_sleepable(void);
|
||||
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
|
||||
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
|
||||
|
||||
struct bpf_ksym {
|
||||
|
|
|
@ -565,6 +565,7 @@ struct bpf_prog {
|
|||
u32 jited_len; /* Size of jited insns in bytes */
|
||||
u8 tag[BPF_TAG_SIZE];
|
||||
struct bpf_prog_stats __percpu *stats;
|
||||
int __percpu *active;
|
||||
unsigned int (*bpf_func)(const void *ctx,
|
||||
const struct bpf_insn *insn);
|
||||
struct bpf_prog_aux *aux; /* Auxiliary fields */
|
||||
|
|
|
@ -91,6 +91,12 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
|
|||
vfree(fp);
|
||||
return NULL;
|
||||
}
|
||||
fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags);
|
||||
if (!fp->active) {
|
||||
vfree(fp);
|
||||
kfree(aux);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fp->pages = size / PAGE_SIZE;
|
||||
fp->aux = aux;
|
||||
|
@ -116,6 +122,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
|
|||
|
||||
prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
|
||||
if (!prog->stats) {
|
||||
free_percpu(prog->active);
|
||||
kfree(prog->aux);
|
||||
vfree(prog);
|
||||
return NULL;
|
||||
|
@ -253,6 +260,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
|
|||
kfree(fp->aux);
|
||||
}
|
||||
free_percpu(fp->stats);
|
||||
free_percpu(fp->active);
|
||||
vfree(fp);
|
||||
}
|
||||
|
||||
|
|
|
@ -381,13 +381,16 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
|
|||
mutex_unlock(&trampoline_mutex);
|
||||
}
|
||||
|
||||
#define NO_START_TIME 0
|
||||
#define NO_START_TIME 1
|
||||
static u64 notrace bpf_prog_start_time(void)
|
||||
{
|
||||
u64 start = NO_START_TIME;
|
||||
|
||||
if (static_branch_unlikely(&bpf_stats_enabled_key))
|
||||
if (static_branch_unlikely(&bpf_stats_enabled_key)) {
|
||||
start = sched_clock();
|
||||
if (unlikely(!start))
|
||||
start = NO_START_TIME;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
|
@ -397,12 +400,20 @@ static u64 notrace bpf_prog_start_time(void)
|
|||
* call __bpf_prog_enter
|
||||
* call prog->bpf_func
|
||||
* call __bpf_prog_exit
|
||||
*
|
||||
* __bpf_prog_enter returns:
|
||||
* 0 - skip execution of the bpf prog
|
||||
* 1 - execute bpf prog
|
||||
* [2..MAX_U64] - excute bpf prog and record execution time.
|
||||
* This is start time.
|
||||
*/
|
||||
u64 notrace __bpf_prog_enter(void)
|
||||
u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
|
||||
__acquires(RCU)
|
||||
{
|
||||
rcu_read_lock();
|
||||
migrate_disable();
|
||||
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
|
||||
return 0;
|
||||
return bpf_prog_start_time();
|
||||
}
|
||||
|
||||
|
@ -430,21 +441,25 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
|
|||
__releases(RCU)
|
||||
{
|
||||
update_prog_stats(prog, start);
|
||||
__this_cpu_dec(*(prog->active));
|
||||
migrate_enable();
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
u64 notrace __bpf_prog_enter_sleepable(void)
|
||||
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
|
||||
{
|
||||
rcu_read_lock_trace();
|
||||
migrate_disable();
|
||||
might_fault();
|
||||
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
|
||||
return 0;
|
||||
return bpf_prog_start_time();
|
||||
}
|
||||
|
||||
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
|
||||
{
|
||||
update_prog_stats(prog, start);
|
||||
__this_cpu_dec(*(prog->active));
|
||||
migrate_enable();
|
||||
rcu_read_unlock_trace();
|
||||
}
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
/* Copyright (c) 2019 Facebook */
|
||||
#include <test_progs.h>
|
||||
|
||||
/* x86-64 fits 55 JITed and 43 interpreted progs into half page */
|
||||
#define CNT 40
|
||||
/* that's kernel internal BPF_MAX_TRAMP_PROGS define */
|
||||
#define CNT 38
|
||||
|
||||
void test_fexit_stress(void)
|
||||
{
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#include <sys/prctl.h>
|
||||
#include <test_progs.h>
|
||||
|
||||
#define MAX_TRAMP_PROGS 40
|
||||
#define MAX_TRAMP_PROGS 38
|
||||
|
||||
struct inst {
|
||||
struct bpf_object *obj;
|
||||
|
@ -52,7 +52,7 @@ void test_trampoline_count(void)
|
|||
struct bpf_link *link;
|
||||
char comm[16] = {};
|
||||
|
||||
/* attach 'allowed' 40 trampoline programs */
|
||||
/* attach 'allowed' trampoline programs */
|
||||
for (i = 0; i < MAX_TRAMP_PROGS; i++) {
|
||||
obj = bpf_object__open_file(object, NULL);
|
||||
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
|
||||
|
|
Loading…
Reference in New Issue