2014-09-26 15:17:02 +08:00
|
|
|
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
|
|
* License as published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful, but
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/bpf.h>
|
|
|
|
#include <linux/filter.h>
|
|
|
|
#include <net/netlink.h>
|
|
|
|
#include <linux/file.h>
|
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
|
|
|
|
/* bpf_check() is a static code analyzer that walks eBPF program
|
|
|
|
* instruction by instruction and updates register/stack state.
|
|
|
|
* All paths of conditional branches are analyzed until 'bpf_exit' insn.
|
|
|
|
*
|
|
|
|
* The first pass is depth-first-search to check that the program is a DAG.
|
|
|
|
* It rejects the following programs:
|
|
|
|
* - larger than BPF_MAXINSNS insns
|
|
|
|
* - if loop is present (detected via back-edge)
|
|
|
|
* - unreachable insns exist (shouldn't be a forest. program = one function)
|
|
|
|
* - out of bounds or malformed jumps
|
|
|
|
* The second pass is all possible path descent from the 1st insn.
|
|
|
|
* Since it's analyzing all pathes through the program, the length of the
|
|
|
|
* analysis is limited to 32k insn, which may be hit even if total number of
|
|
|
|
* insn is less then 4K, but there are too many branches that change stack/regs.
|
|
|
|
* Number of 'branches to be analyzed' is limited to 1k
|
|
|
|
*
|
|
|
|
* On entry to each instruction, each register has a type, and the instruction
|
|
|
|
* changes the types of the registers depending on instruction semantics.
|
|
|
|
* If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
|
|
|
|
* copied to R1.
|
|
|
|
*
|
|
|
|
* All registers are 64-bit.
|
|
|
|
* R0 - return register
|
|
|
|
* R1-R5 argument passing registers
|
|
|
|
* R6-R9 callee saved registers
|
|
|
|
* R10 - frame pointer read-only
|
|
|
|
*
|
|
|
|
* At the start of BPF program the register R1 contains a pointer to bpf_context
|
|
|
|
* and has type PTR_TO_CTX.
|
|
|
|
*
|
|
|
|
* Verifier tracks arithmetic operations on pointers in case:
|
|
|
|
* BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
|
|
|
|
* BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
|
|
|
|
* 1st insn copies R10 (which has FRAME_PTR) type into R1
|
|
|
|
* and 2nd arithmetic instruction is pattern matched to recognize
|
|
|
|
* that it wants to construct a pointer to some element within stack.
|
|
|
|
* So after 2nd insn, the register R1 has type PTR_TO_STACK
|
|
|
|
* (and -20 constant is saved for further stack bounds checking).
|
|
|
|
* Meaning that this reg is a pointer to stack plus known immediate constant.
|
|
|
|
*
|
|
|
|
* Most of the time the registers have UNKNOWN_VALUE type, which
|
|
|
|
* means the register has some value, but it's not a valid pointer.
|
|
|
|
* (like pointer plus pointer becomes UNKNOWN_VALUE type)
|
|
|
|
*
|
|
|
|
* When verifier sees load or store instructions the type of base register
|
|
|
|
* can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
|
|
|
|
* types recognized by check_mem_access() function.
|
|
|
|
*
|
|
|
|
* PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
|
|
|
|
* and the range of [ptr, ptr + map's value_size) is accessible.
|
|
|
|
*
|
|
|
|
* registers used to pass values to function calls are checked against
|
|
|
|
* function argument constraints.
|
|
|
|
*
|
|
|
|
* ARG_PTR_TO_MAP_KEY is one of such argument constraints.
|
|
|
|
* It means that the register type passed to this function must be
|
|
|
|
* PTR_TO_STACK and it will be used inside the function as
|
|
|
|
* 'pointer to map element key'
|
|
|
|
*
|
|
|
|
* For example the argument constraints for bpf_map_lookup_elem():
|
|
|
|
* .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
|
|
|
|
* .arg1_type = ARG_CONST_MAP_PTR,
|
|
|
|
* .arg2_type = ARG_PTR_TO_MAP_KEY,
|
|
|
|
*
|
|
|
|
* ret_type says that this function returns 'pointer to map elem value or null'
|
|
|
|
* function expects 1st argument to be a const pointer to 'struct bpf_map' and
|
|
|
|
* 2nd argument should be a pointer to stack, which will be used inside
|
|
|
|
* the helper function as a pointer to map element key.
|
|
|
|
*
|
|
|
|
* On the kernel side the helper function looks like:
|
|
|
|
* u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
|
|
|
* {
|
|
|
|
* struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
|
|
|
|
* void *key = (void *) (unsigned long) r2;
|
|
|
|
* void *value;
|
|
|
|
*
|
|
|
|
* here kernel can access 'key' and 'map' pointers safely, knowing that
|
|
|
|
* [key, key + map->key_size) bytes are valid and were initialized on
|
|
|
|
* the stack of eBPF program.
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* Corresponding eBPF program may look like:
|
|
|
|
* BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
|
|
|
|
* BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
|
|
|
|
* BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
|
|
|
|
* BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
|
|
|
|
* here verifier looks at prototype of map_lookup_elem() and sees:
|
|
|
|
* .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
|
|
|
|
* Now verifier knows that this map has key of R1->map_ptr->key_size bytes
|
|
|
|
*
|
|
|
|
* Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
|
|
|
|
* Now verifier checks that [R2, R2 + map's key_size) are within stack limits
|
|
|
|
* and were initialized prior to this call.
|
|
|
|
* If it's ok, then verifier allows this BPF_CALL insn and looks at
|
|
|
|
* .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
|
|
|
|
* R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
|
|
|
|
* returns ether pointer to map value or NULL.
|
|
|
|
*
|
|
|
|
* When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
|
|
|
|
* insn, the register holding that pointer in the true branch changes state to
|
|
|
|
* PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
|
|
|
|
* branch. See check_cond_jmp_op().
|
|
|
|
*
|
|
|
|
* After the call R0 is set to return type of the function and registers R1-R5
|
|
|
|
* are set to NOT_INIT to indicate that they are no longer readable.
|
|
|
|
*/
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
/* types of values stored in eBPF registers */
|
|
|
|
enum bpf_reg_type {
|
|
|
|
NOT_INIT = 0, /* nothing was written into register */
|
|
|
|
UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */
|
|
|
|
PTR_TO_CTX, /* reg points to bpf_context */
|
|
|
|
CONST_PTR_TO_MAP, /* reg points to struct bpf_map */
|
|
|
|
PTR_TO_MAP_VALUE, /* reg points to map element value */
|
|
|
|
PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
|
|
|
|
FRAME_PTR, /* reg == frame_pointer */
|
|
|
|
PTR_TO_STACK, /* reg == frame_pointer + imm */
|
|
|
|
CONST_IMM, /* constant integer value */
|
|
|
|
};
|
|
|
|
|
|
|
|
struct reg_state {
|
|
|
|
enum bpf_reg_type type;
|
|
|
|
union {
|
|
|
|
/* valid when type == CONST_IMM | PTR_TO_STACK */
|
2016-04-07 10:39:21 +08:00
|
|
|
long imm;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
|
|
|
|
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
|
|
|
|
* PTR_TO_MAP_VALUE_OR_NULL
|
|
|
|
*/
|
|
|
|
struct bpf_map *map_ptr;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
enum bpf_stack_slot_type {
|
|
|
|
STACK_INVALID, /* nothing was stored in this stack slot */
|
2014-10-29 06:11:41 +08:00
|
|
|
STACK_SPILL, /* register spilled into stack */
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
STACK_MISC /* BPF program wrote some data into this slot */
|
|
|
|
};
|
|
|
|
|
2014-10-29 06:11:41 +08:00
|
|
|
#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
|
|
|
|
/* state of the program:
|
|
|
|
* type of all registers and stack info
|
|
|
|
*/
|
|
|
|
struct verifier_state {
|
|
|
|
struct reg_state regs[MAX_BPF_REG];
|
2014-10-29 06:11:41 +08:00
|
|
|
u8 stack_slot_type[MAX_BPF_STACK];
|
|
|
|
struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* linked list of verifier states used to prune search */
|
|
|
|
struct verifier_state_list {
|
|
|
|
struct verifier_state state;
|
|
|
|
struct verifier_state_list *next;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
|
|
|
|
struct verifier_stack_elem {
|
|
|
|
/* verifer state is 'st'
|
|
|
|
* before processing instruction 'insn_idx'
|
|
|
|
* and after processing instruction 'prev_insn_idx'
|
|
|
|
*/
|
|
|
|
struct verifier_state st;
|
|
|
|
int insn_idx;
|
|
|
|
int prev_insn_idx;
|
|
|
|
struct verifier_stack_elem *next;
|
|
|
|
};
|
|
|
|
|
2014-09-26 15:17:04 +08:00
|
|
|
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
|
|
|
|
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
/* single container for all structs
|
|
|
|
* one verifier_env per bpf_check() call
|
|
|
|
*/
|
|
|
|
struct verifier_env {
|
2014-09-26 15:17:04 +08:00
|
|
|
struct bpf_prog *prog; /* eBPF program being verified */
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
struct verifier_stack_elem *head; /* stack of verifier states to be processed */
|
|
|
|
int stack_size; /* number of states to be processed */
|
|
|
|
struct verifier_state cur_state; /* current verifier state */
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
struct verifier_state_list **explored_states; /* search pruning optimization */
|
2014-09-26 15:17:04 +08:00
|
|
|
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
|
|
|
|
u32 used_map_cnt; /* number of used maps */
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
bool allow_ptr_leaks;
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
};
|
|
|
|
|
bpf, verifier: further improve search pruning
The verifier needs to go through every path of the program in
order to check that it terminates safely, which can be quite a
lot of instructions that need to be processed f.e. in cases with
more branchy programs. With search pruning from f1bca824dabb ("bpf:
add search pruning optimization to verifier") the search space can
already be reduced significantly when the verifier detects that
a previously walked path with same register and stack contents
terminated already (see verifier's states_equal()), so the search
can skip walking those states.
When working with larger programs of > ~2000 (out of max 4096)
insns, we found that the current limit of 32k instructions is easily
hit. For example, a case we ran into is that the search space cannot
be pruned due to branches at the beginning of the program that make
use of certain stack space slots (STACK_MISC), which are never used
in the remaining program (STACK_INVALID). Therefore, the verifier
needs to walk paths for the slots in STACK_INVALID state, but also
all remaining paths with a stack structure, where the slots are in
STACK_MISC, which can nearly double the search space needed. After
various experiments, we find that a limit of 64k processed insns is
a more reasonable choice when dealing with larger programs in practice.
This still allows to reject extreme crafted cases that can have a
much higher complexity (f.e. > ~300k) within the 4096 insns limit
due to search pruning not being able to take effect.
Furthermore, we found that a lot of states can be pruned after a
call instruction, f.e. we were able to reduce the search state by
~35% in some cases with this heuristic, trade-off is to keep a bit
more states in env->explored_states. Usually, call instructions
have a number of preceding register assignments and/or stack stores,
where search pruning has a better chance to suceed in states_equal()
test. The current code marks the branch targets with STATE_LIST_MARK
in case of conditional jumps, and the next (t + 1) instruction in
case of unconditional jump so that f.e. a backjump will walk it. We
also did experiments with using t + insns[t].off + 1 as a marker in
the unconditionally jump case instead of t + 1 with the rationale
that these two branches of execution that converge after the label
might have more potential of pruning. We found that it was a bit
better, but not necessarily significantly better than the current
state, perhaps also due to clang not generating back jumps often.
Hence, we left that as is for now.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-06 04:33:17 +08:00
|
|
|
#define BPF_COMPLEXITY_LIMIT_INSNS 65536
|
|
|
|
#define BPF_COMPLEXITY_LIMIT_STACK 1024
|
|
|
|
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
/* verbose verifier prints what it's seeing
|
|
|
|
* bpf_check() is called under lock, so no race to access these global vars
|
|
|
|
*/
|
|
|
|
static u32 log_level, log_size, log_len;
|
|
|
|
static char *log_buf;
|
|
|
|
|
|
|
|
static DEFINE_MUTEX(bpf_verifier_lock);
|
|
|
|
|
|
|
|
/* log_level controls verbosity level of eBPF verifier.
|
|
|
|
* verbose() is used to dump the verification trace to the log, so the user
|
|
|
|
* can figure out what's wrong with the program
|
|
|
|
*/
|
2015-11-03 18:39:20 +08:00
|
|
|
static __printf(1, 2) void verbose(const char *fmt, ...)
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (log_level == 0 || log_len >= log_size - 1)
|
|
|
|
return;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
log_len += vscnprintf(log_buf + log_len, log_size - log_len, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
/* string representation of 'enum bpf_reg_type' */
|
|
|
|
static const char * const reg_type_str[] = {
|
|
|
|
[NOT_INIT] = "?",
|
|
|
|
[UNKNOWN_VALUE] = "inv",
|
|
|
|
[PTR_TO_CTX] = "ctx",
|
|
|
|
[CONST_PTR_TO_MAP] = "map_ptr",
|
|
|
|
[PTR_TO_MAP_VALUE] = "map_value",
|
|
|
|
[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
|
|
|
|
[FRAME_PTR] = "fp",
|
|
|
|
[PTR_TO_STACK] = "fp",
|
|
|
|
[CONST_IMM] = "imm",
|
|
|
|
};
|
|
|
|
|
2015-08-06 15:02:35 +08:00
|
|
|
static const struct {
|
|
|
|
int map_type;
|
|
|
|
int func_id;
|
|
|
|
} func_limit[] = {
|
|
|
|
{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
|
|
|
|
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
|
2015-10-21 11:02:34 +08:00
|
|
|
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
|
2016-02-18 11:58:58 +08:00
|
|
|
{BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid},
|
2015-08-06 15:02:35 +08:00
|
|
|
};
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
static void print_verifier_state(struct verifier_env *env)
|
|
|
|
{
|
|
|
|
enum bpf_reg_type t;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BPF_REG; i++) {
|
|
|
|
t = env->cur_state.regs[i].type;
|
|
|
|
if (t == NOT_INIT)
|
|
|
|
continue;
|
|
|
|
verbose(" R%d=%s", i, reg_type_str[t]);
|
|
|
|
if (t == CONST_IMM || t == PTR_TO_STACK)
|
2016-04-07 10:39:21 +08:00
|
|
|
verbose("%ld", env->cur_state.regs[i].imm);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
|
|
|
|
t == PTR_TO_MAP_VALUE_OR_NULL)
|
|
|
|
verbose("(ks=%d,vs=%d)",
|
|
|
|
env->cur_state.regs[i].map_ptr->key_size,
|
|
|
|
env->cur_state.regs[i].map_ptr->value_size);
|
|
|
|
}
|
2014-10-29 06:11:41 +08:00
|
|
|
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
|
|
|
|
if (env->cur_state.stack_slot_type[i] == STACK_SPILL)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
verbose(" fp%d=%s", -MAX_BPF_STACK + i,
|
2014-10-29 06:11:41 +08:00
|
|
|
reg_type_str[env->cur_state.spilled_regs[i / BPF_REG_SIZE].type]);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
}
|
|
|
|
verbose("\n");
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
static const char *const bpf_class_string[] = {
|
|
|
|
[BPF_LD] = "ld",
|
|
|
|
[BPF_LDX] = "ldx",
|
|
|
|
[BPF_ST] = "st",
|
|
|
|
[BPF_STX] = "stx",
|
|
|
|
[BPF_ALU] = "alu",
|
|
|
|
[BPF_JMP] = "jmp",
|
|
|
|
[BPF_RET] = "BUG",
|
|
|
|
[BPF_ALU64] = "alu64",
|
|
|
|
};
|
|
|
|
|
2015-09-09 04:40:01 +08:00
|
|
|
static const char *const bpf_alu_string[16] = {
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
[BPF_ADD >> 4] = "+=",
|
|
|
|
[BPF_SUB >> 4] = "-=",
|
|
|
|
[BPF_MUL >> 4] = "*=",
|
|
|
|
[BPF_DIV >> 4] = "/=",
|
|
|
|
[BPF_OR >> 4] = "|=",
|
|
|
|
[BPF_AND >> 4] = "&=",
|
|
|
|
[BPF_LSH >> 4] = "<<=",
|
|
|
|
[BPF_RSH >> 4] = ">>=",
|
|
|
|
[BPF_NEG >> 4] = "neg",
|
|
|
|
[BPF_MOD >> 4] = "%=",
|
|
|
|
[BPF_XOR >> 4] = "^=",
|
|
|
|
[BPF_MOV >> 4] = "=",
|
|
|
|
[BPF_ARSH >> 4] = "s>>=",
|
|
|
|
[BPF_END >> 4] = "endian",
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char *const bpf_ldst_string[] = {
|
|
|
|
[BPF_W >> 3] = "u32",
|
|
|
|
[BPF_H >> 3] = "u16",
|
|
|
|
[BPF_B >> 3] = "u8",
|
|
|
|
[BPF_DW >> 3] = "u64",
|
|
|
|
};
|
|
|
|
|
2015-09-09 04:40:01 +08:00
|
|
|
static const char *const bpf_jmp_string[16] = {
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
[BPF_JA >> 4] = "jmp",
|
|
|
|
[BPF_JEQ >> 4] = "==",
|
|
|
|
[BPF_JGT >> 4] = ">",
|
|
|
|
[BPF_JGE >> 4] = ">=",
|
|
|
|
[BPF_JSET >> 4] = "&",
|
|
|
|
[BPF_JNE >> 4] = "!=",
|
|
|
|
[BPF_JSGT >> 4] = "s>",
|
|
|
|
[BPF_JSGE >> 4] = "s>=",
|
|
|
|
[BPF_CALL >> 4] = "call",
|
|
|
|
[BPF_EXIT >> 4] = "exit",
|
|
|
|
};
|
|
|
|
|
|
|
|
static void print_bpf_insn(struct bpf_insn *insn)
|
|
|
|
{
|
|
|
|
u8 class = BPF_CLASS(insn->code);
|
|
|
|
|
|
|
|
if (class == BPF_ALU || class == BPF_ALU64) {
|
|
|
|
if (BPF_SRC(insn->code) == BPF_X)
|
|
|
|
verbose("(%02x) %sr%d %s %sr%d\n",
|
|
|
|
insn->code, class == BPF_ALU ? "(u32) " : "",
|
|
|
|
insn->dst_reg,
|
|
|
|
bpf_alu_string[BPF_OP(insn->code) >> 4],
|
|
|
|
class == BPF_ALU ? "(u32) " : "",
|
|
|
|
insn->src_reg);
|
|
|
|
else
|
|
|
|
verbose("(%02x) %sr%d %s %s%d\n",
|
|
|
|
insn->code, class == BPF_ALU ? "(u32) " : "",
|
|
|
|
insn->dst_reg,
|
|
|
|
bpf_alu_string[BPF_OP(insn->code) >> 4],
|
|
|
|
class == BPF_ALU ? "(u32) " : "",
|
|
|
|
insn->imm);
|
|
|
|
} else if (class == BPF_STX) {
|
|
|
|
if (BPF_MODE(insn->code) == BPF_MEM)
|
|
|
|
verbose("(%02x) *(%s *)(r%d %+d) = r%d\n",
|
|
|
|
insn->code,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->dst_reg,
|
|
|
|
insn->off, insn->src_reg);
|
|
|
|
else if (BPF_MODE(insn->code) == BPF_XADD)
|
|
|
|
verbose("(%02x) lock *(%s *)(r%d %+d) += r%d\n",
|
|
|
|
insn->code,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->dst_reg, insn->off,
|
|
|
|
insn->src_reg);
|
|
|
|
else
|
|
|
|
verbose("BUG_%02x\n", insn->code);
|
|
|
|
} else if (class == BPF_ST) {
|
|
|
|
if (BPF_MODE(insn->code) != BPF_MEM) {
|
|
|
|
verbose("BUG_st_%02x\n", insn->code);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
verbose("(%02x) *(%s *)(r%d %+d) = %d\n",
|
|
|
|
insn->code,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->dst_reg,
|
|
|
|
insn->off, insn->imm);
|
|
|
|
} else if (class == BPF_LDX) {
|
|
|
|
if (BPF_MODE(insn->code) != BPF_MEM) {
|
|
|
|
verbose("BUG_ldx_%02x\n", insn->code);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
verbose("(%02x) r%d = *(%s *)(r%d %+d)\n",
|
|
|
|
insn->code, insn->dst_reg,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->src_reg, insn->off);
|
|
|
|
} else if (class == BPF_LD) {
|
|
|
|
if (BPF_MODE(insn->code) == BPF_ABS) {
|
|
|
|
verbose("(%02x) r0 = *(%s *)skb[%d]\n",
|
|
|
|
insn->code,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->imm);
|
|
|
|
} else if (BPF_MODE(insn->code) == BPF_IND) {
|
|
|
|
verbose("(%02x) r0 = *(%s *)skb[r%d + %d]\n",
|
|
|
|
insn->code,
|
|
|
|
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
|
|
|
|
insn->src_reg, insn->imm);
|
|
|
|
} else if (BPF_MODE(insn->code) == BPF_IMM) {
|
|
|
|
verbose("(%02x) r%d = 0x%x\n",
|
|
|
|
insn->code, insn->dst_reg, insn->imm);
|
|
|
|
} else {
|
|
|
|
verbose("BUG_ld_%02x\n", insn->code);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else if (class == BPF_JMP) {
|
|
|
|
u8 opcode = BPF_OP(insn->code);
|
|
|
|
|
|
|
|
if (opcode == BPF_CALL) {
|
|
|
|
verbose("(%02x) call %d\n", insn->code, insn->imm);
|
|
|
|
} else if (insn->code == (BPF_JMP | BPF_JA)) {
|
|
|
|
verbose("(%02x) goto pc%+d\n",
|
|
|
|
insn->code, insn->off);
|
|
|
|
} else if (insn->code == (BPF_JMP | BPF_EXIT)) {
|
|
|
|
verbose("(%02x) exit\n", insn->code);
|
|
|
|
} else if (BPF_SRC(insn->code) == BPF_X) {
|
|
|
|
verbose("(%02x) if r%d %s r%d goto pc%+d\n",
|
|
|
|
insn->code, insn->dst_reg,
|
|
|
|
bpf_jmp_string[BPF_OP(insn->code) >> 4],
|
|
|
|
insn->src_reg, insn->off);
|
|
|
|
} else {
|
|
|
|
verbose("(%02x) if r%d %s 0x%x goto pc%+d\n",
|
|
|
|
insn->code, insn->dst_reg,
|
|
|
|
bpf_jmp_string[BPF_OP(insn->code) >> 4],
|
|
|
|
insn->imm, insn->off);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
verbose("(%02x) %s\n", insn->code, bpf_class_string[class]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
|
|
|
|
{
|
|
|
|
struct verifier_stack_elem *elem;
|
|
|
|
int insn_idx;
|
|
|
|
|
|
|
|
if (env->head == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
|
|
|
|
insn_idx = env->head->insn_idx;
|
|
|
|
if (prev_insn_idx)
|
|
|
|
*prev_insn_idx = env->head->prev_insn_idx;
|
|
|
|
elem = env->head->next;
|
|
|
|
kfree(env->head);
|
|
|
|
env->head = elem;
|
|
|
|
env->stack_size--;
|
|
|
|
return insn_idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
|
|
|
|
int prev_insn_idx)
|
|
|
|
{
|
|
|
|
struct verifier_stack_elem *elem;
|
|
|
|
|
|
|
|
elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
|
|
|
|
if (!elem)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
|
|
|
|
elem->insn_idx = insn_idx;
|
|
|
|
elem->prev_insn_idx = prev_insn_idx;
|
|
|
|
elem->next = env->head;
|
|
|
|
env->head = elem;
|
|
|
|
env->stack_size++;
|
bpf, verifier: further improve search pruning
The verifier needs to go through every path of the program in
order to check that it terminates safely, which can be quite a
lot of instructions that need to be processed f.e. in cases with
more branchy programs. With search pruning from f1bca824dabb ("bpf:
add search pruning optimization to verifier") the search space can
already be reduced significantly when the verifier detects that
a previously walked path with same register and stack contents
terminated already (see verifier's states_equal()), so the search
can skip walking those states.
When working with larger programs of > ~2000 (out of max 4096)
insns, we found that the current limit of 32k instructions is easily
hit. For example, a case we ran into is that the search space cannot
be pruned due to branches at the beginning of the program that make
use of certain stack space slots (STACK_MISC), which are never used
in the remaining program (STACK_INVALID). Therefore, the verifier
needs to walk paths for the slots in STACK_INVALID state, but also
all remaining paths with a stack structure, where the slots are in
STACK_MISC, which can nearly double the search space needed. After
various experiments, we find that a limit of 64k processed insns is
a more reasonable choice when dealing with larger programs in practice.
This still allows to reject extreme crafted cases that can have a
much higher complexity (f.e. > ~300k) within the 4096 insns limit
due to search pruning not being able to take effect.
Furthermore, we found that a lot of states can be pruned after a
call instruction, f.e. we were able to reduce the search state by
~35% in some cases with this heuristic, trade-off is to keep a bit
more states in env->explored_states. Usually, call instructions
have a number of preceding register assignments and/or stack stores,
where search pruning has a better chance to suceed in states_equal()
test. The current code marks the branch targets with STATE_LIST_MARK
in case of conditional jumps, and the next (t + 1) instruction in
case of unconditional jump so that f.e. a backjump will walk it. We
also did experiments with using t + insns[t].off + 1 as a marker in
the unconditionally jump case instead of t + 1 with the rationale
that these two branches of execution that converge after the label
might have more potential of pruning. We found that it was a bit
better, but not necessarily significantly better than the current
state, perhaps also due to clang not generating back jumps often.
Hence, we left that as is for now.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-06 04:33:17 +08:00
|
|
|
if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
verbose("BPF program is too complex\n");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
return &elem->st;
|
|
|
|
err:
|
|
|
|
/* pop all elements and return */
|
|
|
|
while (pop_stack(env, NULL) >= 0);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define CALLER_SAVED_REGS 6
|
|
|
|
static const int caller_saved[CALLER_SAVED_REGS] = {
|
|
|
|
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
|
|
|
|
};
|
|
|
|
|
|
|
|
static void init_reg_state(struct reg_state *regs)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BPF_REG; i++) {
|
|
|
|
regs[i].type = NOT_INIT;
|
|
|
|
regs[i].imm = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* frame pointer */
|
|
|
|
regs[BPF_REG_FP].type = FRAME_PTR;
|
|
|
|
|
|
|
|
/* 1st arg to a function */
|
|
|
|
regs[BPF_REG_1].type = PTR_TO_CTX;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
|
|
|
|
{
|
|
|
|
BUG_ON(regno >= MAX_BPF_REG);
|
|
|
|
regs[regno].type = UNKNOWN_VALUE;
|
|
|
|
regs[regno].imm = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
enum reg_arg_type {
|
|
|
|
SRC_OP, /* register is used as source operand */
|
|
|
|
DST_OP, /* register is used as destination operand */
|
|
|
|
DST_OP_NO_MARK /* same as above, check only, don't mark */
|
|
|
|
};
|
|
|
|
|
|
|
|
static int check_reg_arg(struct reg_state *regs, u32 regno,
|
|
|
|
enum reg_arg_type t)
|
|
|
|
{
|
|
|
|
if (regno >= MAX_BPF_REG) {
|
|
|
|
verbose("R%d is invalid\n", regno);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t == SRC_OP) {
|
|
|
|
/* check whether register used as source operand can be read */
|
|
|
|
if (regs[regno].type == NOT_INIT) {
|
|
|
|
verbose("R%d !read_ok\n", regno);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* check whether register used as dest operand can be written to */
|
|
|
|
if (regno == BPF_REG_FP) {
|
|
|
|
verbose("frame pointer is read only\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
if (t == DST_OP)
|
|
|
|
mark_reg_unknown_value(regs, regno);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int bpf_size_to_bytes(int bpf_size)
|
|
|
|
{
|
|
|
|
if (bpf_size == BPF_W)
|
|
|
|
return 4;
|
|
|
|
else if (bpf_size == BPF_H)
|
|
|
|
return 2;
|
|
|
|
else if (bpf_size == BPF_B)
|
|
|
|
return 1;
|
|
|
|
else if (bpf_size == BPF_DW)
|
|
|
|
return 8;
|
|
|
|
else
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
static bool is_spillable_regtype(enum bpf_reg_type type)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case PTR_TO_MAP_VALUE:
|
|
|
|
case PTR_TO_MAP_VALUE_OR_NULL:
|
|
|
|
case PTR_TO_STACK:
|
|
|
|
case PTR_TO_CTX:
|
|
|
|
case FRAME_PTR:
|
|
|
|
case CONST_PTR_TO_MAP:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
/* check_stack_read/write functions track spill/fill of registers,
|
|
|
|
* stack boundary and alignment are checked in check_mem_access()
|
|
|
|
*/
|
|
|
|
static int check_stack_write(struct verifier_state *state, int off, int size,
|
|
|
|
int value_regno)
|
|
|
|
{
|
|
|
|
int i;
|
2014-10-29 06:11:41 +08:00
|
|
|
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
|
|
|
|
* so it's aligned access and [off, off + size) are within stack limits
|
|
|
|
*/
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
|
|
|
|
if (value_regno >= 0 &&
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
is_spillable_regtype(state->regs[value_regno].type)) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
|
|
|
|
/* register containing pointer is being spilled into stack */
|
2014-10-29 06:11:41 +08:00
|
|
|
if (size != BPF_REG_SIZE) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
verbose("invalid size of register spill\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* save register state */
|
2014-10-29 06:11:41 +08:00
|
|
|
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
|
|
|
|
state->regs[value_regno];
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
|
2014-10-29 06:11:41 +08:00
|
|
|
for (i = 0; i < BPF_REG_SIZE; i++)
|
|
|
|
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
|
|
|
|
} else {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
/* regular write of data into stack */
|
2014-10-29 06:11:41 +08:00
|
|
|
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
|
|
|
|
(struct reg_state) {};
|
|
|
|
|
|
|
|
for (i = 0; i < size; i++)
|
|
|
|
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int check_stack_read(struct verifier_state *state, int off, int size,
|
|
|
|
int value_regno)
|
|
|
|
{
|
2014-10-29 06:11:41 +08:00
|
|
|
u8 *slot_type;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
int i;
|
|
|
|
|
2014-10-29 06:11:41 +08:00
|
|
|
slot_type = &state->stack_slot_type[MAX_BPF_STACK + off];
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
|
2014-10-29 06:11:41 +08:00
|
|
|
if (slot_type[0] == STACK_SPILL) {
|
|
|
|
if (size != BPF_REG_SIZE) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
verbose("invalid size of register spill\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2014-10-29 06:11:41 +08:00
|
|
|
for (i = 1; i < BPF_REG_SIZE; i++) {
|
|
|
|
if (slot_type[i] != STACK_SPILL) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
verbose("corrupted spill memory\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (value_regno >= 0)
|
|
|
|
/* restore register state from stack */
|
2014-10-29 06:11:41 +08:00
|
|
|
state->regs[value_regno] =
|
|
|
|
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE];
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < size; i++) {
|
2014-10-29 06:11:41 +08:00
|
|
|
if (slot_type[i] != STACK_MISC) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
verbose("invalid read from stack off %d+%d size %d\n",
|
|
|
|
off, i, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (value_regno >= 0)
|
|
|
|
/* have read misc data from the stack */
|
|
|
|
mark_reg_unknown_value(state->regs, value_regno);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check read/write into map element returned by bpf_map_lookup_elem() */
|
|
|
|
static int check_map_access(struct verifier_env *env, u32 regno, int off,
|
|
|
|
int size)
|
|
|
|
{
|
|
|
|
struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
|
|
|
|
|
|
|
|
if (off < 0 || off + size > map->value_size) {
|
|
|
|
verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
|
|
|
|
map->value_size, off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check access to 'struct bpf_context' fields */
|
|
|
|
static int check_ctx_access(struct verifier_env *env, int off, int size,
|
|
|
|
enum bpf_access_type t)
|
|
|
|
{
|
|
|
|
if (env->prog->aux->ops->is_valid_access &&
|
2016-04-07 09:43:28 +08:00
|
|
|
env->prog->aux->ops->is_valid_access(off, size, t)) {
|
|
|
|
/* remember the offset of last byte accessed in ctx */
|
|
|
|
if (env->prog->aux->max_ctx_offset < off + size)
|
|
|
|
env->prog->aux->max_ctx_offset = off + size;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
return 0;
|
2016-04-07 09:43:28 +08:00
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
|
|
|
|
verbose("invalid bpf_context access off=%d size=%d\n", off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
static bool is_pointer_value(struct verifier_env *env, int regno)
|
|
|
|
{
|
|
|
|
if (env->allow_ptr_leaks)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
switch (env->cur_state.regs[regno].type) {
|
|
|
|
case UNKNOWN_VALUE:
|
|
|
|
case CONST_IMM:
|
|
|
|
return false;
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
/* check whether memory at (regno + off) is accessible for t = (read | write)
|
|
|
|
* if t==write, value_regno is a register which value is stored into memory
|
|
|
|
* if t==read, value_regno is a register which will receive the value from memory
|
|
|
|
* if t==write && value_regno==-1, some unknown value is stored into memory
|
|
|
|
* if t==read && value_regno==-1, don't care what we read from memory
|
|
|
|
*/
|
|
|
|
static int check_mem_access(struct verifier_env *env, u32 regno, int off,
|
|
|
|
int bpf_size, enum bpf_access_type t,
|
|
|
|
int value_regno)
|
|
|
|
{
|
|
|
|
struct verifier_state *state = &env->cur_state;
|
|
|
|
int size, err = 0;
|
|
|
|
|
2015-07-24 05:24:40 +08:00
|
|
|
if (state->regs[regno].type == PTR_TO_STACK)
|
|
|
|
off += state->regs[regno].imm;
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
size = bpf_size_to_bytes(bpf_size);
|
|
|
|
if (size < 0)
|
|
|
|
return size;
|
|
|
|
|
|
|
|
if (off % size != 0) {
|
|
|
|
verbose("misaligned access off %d size %d\n", off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (state->regs[regno].type == PTR_TO_MAP_VALUE) {
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
if (t == BPF_WRITE && value_regno >= 0 &&
|
|
|
|
is_pointer_value(env, value_regno)) {
|
|
|
|
verbose("R%d leaks addr into map\n", value_regno);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
err = check_map_access(env, regno, off, size);
|
|
|
|
if (!err && t == BPF_READ && value_regno >= 0)
|
|
|
|
mark_reg_unknown_value(state->regs, value_regno);
|
|
|
|
|
|
|
|
} else if (state->regs[regno].type == PTR_TO_CTX) {
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
if (t == BPF_WRITE && value_regno >= 0 &&
|
|
|
|
is_pointer_value(env, value_regno)) {
|
|
|
|
verbose("R%d leaks addr into ctx\n", value_regno);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
err = check_ctx_access(env, off, size, t);
|
|
|
|
if (!err && t == BPF_READ && value_regno >= 0)
|
|
|
|
mark_reg_unknown_value(state->regs, value_regno);
|
|
|
|
|
2015-07-24 05:24:40 +08:00
|
|
|
} else if (state->regs[regno].type == FRAME_PTR ||
|
|
|
|
state->regs[regno].type == PTR_TO_STACK) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
if (off >= 0 || off < -MAX_BPF_STACK) {
|
|
|
|
verbose("invalid stack off=%d size=%d\n", off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
if (t == BPF_WRITE) {
|
|
|
|
if (!env->allow_ptr_leaks &&
|
|
|
|
state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
|
|
|
|
size != BPF_REG_SIZE) {
|
|
|
|
verbose("attempt to corrupt spilled pointer on stack\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
err = check_stack_write(state, off, size, value_regno);
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
} else {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
err = check_stack_read(state, off, size, value_regno);
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
} else {
|
|
|
|
verbose("R%d invalid mem access '%s'\n",
|
|
|
|
regno, reg_type_str[state->regs[regno].type]);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
|
|
|
|
{
|
|
|
|
struct reg_state *regs = env->cur_state.regs;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
|
|
|
|
insn->imm != 0) {
|
|
|
|
verbose("BPF_XADD uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src1 operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* check src2 operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* check whether atomic_add can read the memory */
|
|
|
|
err = check_mem_access(env, insn->dst_reg, insn->off,
|
|
|
|
BPF_SIZE(insn->code), BPF_READ, -1);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* check whether atomic_add can write into the same memory */
|
|
|
|
return check_mem_access(env, insn->dst_reg, insn->off,
|
|
|
|
BPF_SIZE(insn->code), BPF_WRITE, -1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* when register 'regno' is passed into function that will read 'access_size'
|
|
|
|
* bytes from that pointer, make sure that it's within stack boundary
|
|
|
|
* and all elements of stack are initialized
|
|
|
|
*/
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-20 06:05:22 +08:00
|
|
|
static int check_stack_boundary(struct verifier_env *env, int regno,
|
|
|
|
int access_size, bool zero_size_allowed)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
{
|
|
|
|
struct verifier_state *state = &env->cur_state;
|
|
|
|
struct reg_state *regs = state->regs;
|
|
|
|
int off, i;
|
|
|
|
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-20 06:05:22 +08:00
|
|
|
if (regs[regno].type != PTR_TO_STACK) {
|
|
|
|
if (zero_size_allowed && access_size == 0 &&
|
|
|
|
regs[regno].type == CONST_IMM &&
|
|
|
|
regs[regno].imm == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
verbose("R%d type=%s expected=%s\n", regno,
|
|
|
|
reg_type_str[regs[regno].type],
|
|
|
|
reg_type_str[PTR_TO_STACK]);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
return -EACCES;
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-20 06:05:22 +08:00
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
|
|
|
|
off = regs[regno].imm;
|
|
|
|
if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
|
|
|
|
access_size <= 0) {
|
|
|
|
verbose("invalid stack type R%d off=%d access_size=%d\n",
|
|
|
|
regno, off, access_size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < access_size; i++) {
|
2014-10-29 06:11:41 +08:00
|
|
|
if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
verbose("invalid indirect read from stack off %d+%d size %d\n",
|
|
|
|
off, i, access_size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int check_func_arg(struct verifier_env *env, u32 regno,
|
|
|
|
enum bpf_arg_type arg_type, struct bpf_map **mapp)
|
|
|
|
{
|
|
|
|
struct reg_state *reg = env->cur_state.regs + regno;
|
|
|
|
enum bpf_reg_type expected_type;
|
|
|
|
int err = 0;
|
|
|
|
|
2015-03-13 00:21:42 +08:00
|
|
|
if (arg_type == ARG_DONTCARE)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (reg->type == NOT_INIT) {
|
|
|
|
verbose("R%d !read_ok\n", regno);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
if (arg_type == ARG_ANYTHING) {
|
|
|
|
if (is_pointer_value(env, regno)) {
|
|
|
|
verbose("R%d leaks addr into helper function\n", regno);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2015-03-13 00:21:42 +08:00
|
|
|
return 0;
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
}
|
2015-03-13 00:21:42 +08:00
|
|
|
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-20 06:05:22 +08:00
|
|
|
if (arg_type == ARG_PTR_TO_MAP_KEY ||
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
arg_type == ARG_PTR_TO_MAP_VALUE) {
|
|
|
|
expected_type = PTR_TO_STACK;
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-20 06:05:22 +08:00
|
|
|
} else if (arg_type == ARG_CONST_STACK_SIZE ||
|
|
|
|
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
expected_type = CONST_IMM;
|
|
|
|
} else if (arg_type == ARG_CONST_MAP_PTR) {
|
|
|
|
expected_type = CONST_PTR_TO_MAP;
|
2015-03-27 10:53:57 +08:00
|
|
|
} else if (arg_type == ARG_PTR_TO_CTX) {
|
|
|
|
expected_type = PTR_TO_CTX;
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-20 06:05:22 +08:00
|
|
|
} else if (arg_type == ARG_PTR_TO_STACK) {
|
|
|
|
expected_type = PTR_TO_STACK;
|
|
|
|
/* One exception here. In case function allows for NULL to be
|
|
|
|
* passed in as argument, it's a CONST_IMM type. Final test
|
|
|
|
* happens during stack boundary checking.
|
|
|
|
*/
|
|
|
|
if (reg->type == CONST_IMM && reg->imm == 0)
|
|
|
|
expected_type = CONST_IMM;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
} else {
|
|
|
|
verbose("unsupported arg_type %d\n", arg_type);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (reg->type != expected_type) {
|
|
|
|
verbose("R%d type=%s expected=%s\n", regno,
|
|
|
|
reg_type_str[reg->type], reg_type_str[expected_type]);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (arg_type == ARG_CONST_MAP_PTR) {
|
|
|
|
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
|
|
|
|
*mapp = reg->map_ptr;
|
|
|
|
|
|
|
|
} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
|
|
|
|
/* bpf_map_xxx(..., map_ptr, ..., key) call:
|
|
|
|
* check that [key, key + map->key_size) are within
|
|
|
|
* stack limits and initialized
|
|
|
|
*/
|
|
|
|
if (!*mapp) {
|
|
|
|
/* in function declaration map_ptr must come before
|
|
|
|
* map_key, so that it's verified and known before
|
|
|
|
* we have to check map_key here. Otherwise it means
|
|
|
|
* that kernel subsystem misconfigured verifier
|
|
|
|
*/
|
|
|
|
verbose("invalid map_ptr to access map->key\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-20 06:05:22 +08:00
|
|
|
err = check_stack_boundary(env, regno, (*mapp)->key_size,
|
|
|
|
false);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
|
|
|
|
/* bpf_map_xxx(..., map_ptr, ..., value) call:
|
|
|
|
* check [value, value + map->value_size) validity
|
|
|
|
*/
|
|
|
|
if (!*mapp) {
|
|
|
|
/* kernel subsystem misconfigured verifier */
|
|
|
|
verbose("invalid map_ptr to access map->value\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-20 06:05:22 +08:00
|
|
|
err = check_stack_boundary(env, regno, (*mapp)->value_size,
|
|
|
|
false);
|
|
|
|
} else if (arg_type == ARG_CONST_STACK_SIZE ||
|
|
|
|
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
|
|
|
|
bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
|
|
|
|
/* bpf_xxx(..., buf, len) call will access 'len' bytes
|
|
|
|
* from stack pointer 'buf'. Check it
|
|
|
|
* note: regno == len, regno - 1 == buf
|
|
|
|
*/
|
|
|
|
if (regno == 0) {
|
|
|
|
/* kernel subsystem misconfigured verifier */
|
|
|
|
verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: add new arg_type that allows for 0 sized stack buffer
Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.
This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.
I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-20 06:05:22 +08:00
|
|
|
err = check_stack_boundary(env, regno - 1, reg->imm,
|
|
|
|
zero_size_allowed);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2015-08-06 15:02:35 +08:00
|
|
|
static int check_map_func_compatibility(struct bpf_map *map, int func_id)
|
|
|
|
{
|
|
|
|
bool bool_map, bool_func;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!map)
|
|
|
|
return 0;
|
|
|
|
|
2015-08-12 22:57:12 +08:00
|
|
|
for (i = 0; i < ARRAY_SIZE(func_limit); i++) {
|
2015-08-06 15:02:35 +08:00
|
|
|
bool_map = (map->map_type == func_limit[i].map_type);
|
|
|
|
bool_func = (func_id == func_limit[i].func_id);
|
|
|
|
/* only when map & func pair match it can continue.
|
|
|
|
* don't allow any other map type to be passed into
|
|
|
|
* the special func;
|
|
|
|
*/
|
2016-02-18 11:58:58 +08:00
|
|
|
if (bool_func && bool_map != bool_func) {
|
|
|
|
verbose("cannot pass map_type %d into func %d\n",
|
|
|
|
map->map_type, func_id);
|
2015-08-06 15:02:35 +08:00
|
|
|
return -EINVAL;
|
2016-02-18 11:58:58 +08:00
|
|
|
}
|
2015-08-06 15:02:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
static int check_call(struct verifier_env *env, int func_id)
|
|
|
|
{
|
|
|
|
struct verifier_state *state = &env->cur_state;
|
|
|
|
const struct bpf_func_proto *fn = NULL;
|
|
|
|
struct reg_state *regs = state->regs;
|
|
|
|
struct bpf_map *map = NULL;
|
|
|
|
struct reg_state *reg;
|
|
|
|
int i, err;
|
|
|
|
|
|
|
|
/* find function prototype */
|
|
|
|
if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
|
|
|
|
verbose("invalid func %d\n", func_id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (env->prog->aux->ops->get_func_proto)
|
|
|
|
fn = env->prog->aux->ops->get_func_proto(func_id);
|
|
|
|
|
|
|
|
if (!fn) {
|
|
|
|
verbose("unknown func %d\n", func_id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* eBPF programs must be GPL compatible to use GPL-ed functions */
|
2015-03-01 19:31:47 +08:00
|
|
|
if (!env->prog->gpl_compatible && fn->gpl_only) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
verbose("cannot call GPL only function from proprietary program\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check args */
|
|
|
|
err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &map);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &map);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &map);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* reset caller saved regs */
|
|
|
|
for (i = 0; i < CALLER_SAVED_REGS; i++) {
|
|
|
|
reg = regs + caller_saved[i];
|
|
|
|
reg->type = NOT_INIT;
|
|
|
|
reg->imm = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* update return register */
|
|
|
|
if (fn->ret_type == RET_INTEGER) {
|
|
|
|
regs[BPF_REG_0].type = UNKNOWN_VALUE;
|
|
|
|
} else if (fn->ret_type == RET_VOID) {
|
|
|
|
regs[BPF_REG_0].type = NOT_INIT;
|
|
|
|
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
|
|
|
|
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
|
|
|
|
/* remember map_ptr, so that check_map_access()
|
|
|
|
* can check 'value_size' boundary of memory access
|
|
|
|
* to map element returned from bpf_map_lookup_elem()
|
|
|
|
*/
|
|
|
|
if (map == NULL) {
|
|
|
|
verbose("kernel subsystem misconfigured verifier\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
regs[BPF_REG_0].map_ptr = map;
|
|
|
|
} else {
|
|
|
|
verbose("unknown return type %d of func %d\n",
|
|
|
|
fn->ret_type, func_id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
bpf: allow bpf programs to tail-call other bpf programs
introduce bpf_tail_call(ctx, &jmp_table, index) helper function
which can be used from BPF programs like:
int bpf_prog(struct pt_regs *ctx)
{
...
bpf_tail_call(ctx, &jmp_table, index);
...
}
that is roughly equivalent to:
int bpf_prog(struct pt_regs *ctx)
{
...
if (jmp_table[index])
return (*jmp_table[index])(ctx);
...
}
The important detail that it's not a normal call, but a tail call.
The kernel stack is precious, so this helper reuses the current
stack frame and jumps into another BPF program without adding
extra call frame.
It's trivially done in interpreter and a bit trickier in JITs.
In case of x64 JIT the bigger part of generated assembler prologue
is common for all programs, so it is simply skipped while jumping.
Other JITs can do similar prologue-skipping optimization or
do stack unwind before jumping into the next program.
bpf_tail_call() arguments:
ctx - context pointer
jmp_table - one of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table
index - index in the jump table
Since all BPF programs are idenitified by file descriptor, user space
need to populate the jmp_table with FDs of other BPF programs.
If jmp_table[index] is empty the bpf_tail_call() doesn't jump anywhere
and program execution continues as normal.
New BPF_MAP_TYPE_PROG_ARRAY map type is introduced so that user space can
populate this jmp_table array with FDs of other bpf programs.
Programs can share the same jmp_table array or use multiple jmp_tables.
The chain of tail calls can form unpredictable dynamic loops therefore
tail_call_cnt is used to limit the number of calls and currently is set to 32.
Use cases:
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
==========
- simplify complex programs by splitting them into a sequence of small programs
- dispatch routine
For tracing and future seccomp the program may be triggered on all system
calls, but processing of syscall arguments will be different. It's more
efficient to implement them as:
int syscall_entry(struct seccomp_data *ctx)
{
bpf_tail_call(ctx, &syscall_jmp_table, ctx->nr /* syscall number */);
... default: process unknown syscall ...
}
int sys_write_event(struct seccomp_data *ctx) {...}
int sys_read_event(struct seccomp_data *ctx) {...}
syscall_jmp_table[__NR_write] = sys_write_event;
syscall_jmp_table[__NR_read] = sys_read_event;
For networking the program may call into different parsers depending on
packet format, like:
int packet_parser(struct __sk_buff *skb)
{
... parse L2, L3 here ...
__u8 ipproto = load_byte(skb, ... offsetof(struct iphdr, protocol));
bpf_tail_call(skb, &ipproto_jmp_table, ipproto);
... default: process unknown protocol ...
}
int parse_tcp(struct __sk_buff *skb) {...}
int parse_udp(struct __sk_buff *skb) {...}
ipproto_jmp_table[IPPROTO_TCP] = parse_tcp;
ipproto_jmp_table[IPPROTO_UDP] = parse_udp;
- for TC use case, bpf_tail_call() allows to implement reclassify-like logic
- bpf_map_update_elem/delete calls into BPF_MAP_TYPE_PROG_ARRAY jump table
are atomic, so user space can build chains of BPF programs on the fly
Implementation details:
=======================
- high performance of bpf_tail_call() is the goal.
It could have been implemented without JIT changes as a wrapper on top of
BPF_PROG_RUN() macro, but with two downsides:
. all programs would have to pay performance penalty for this feature and
tail call itself would be slower, since mandatory stack unwind, return,
stack allocate would be done for every tailcall.
. tailcall would be limited to programs running preempt_disabled, since
generic 'void *ctx' doesn't have room for 'tail_call_cnt' and it would
need to be either global per_cpu variable accessed by helper and by wrapper
or global variable protected by locks.
In this implementation x64 JIT bypasses stack unwind and jumps into the
callee program after prologue.
- bpf_prog_array_compatible() ensures that prog_type of callee and caller
are the same and JITed/non-JITed flag is the same, since calling JITed
program from non-JITed is invalid, since stack frames are different.
Similarly calling kprobe type program from socket type program is invalid.
- jump table is implemented as BPF_MAP_TYPE_PROG_ARRAY to reuse 'map'
abstraction, its user space API and all of verifier logic.
It's in the existing arraymap.c file, since several functions are
shared with regular array map.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-20 07:59:03 +08:00
|
|
|
|
2015-08-06 15:02:35 +08:00
|
|
|
err = check_map_func_compatibility(map, func_id);
|
|
|
|
if (err)
|
|
|
|
return err;
|
bpf: allow bpf programs to tail-call other bpf programs
introduce bpf_tail_call(ctx, &jmp_table, index) helper function
which can be used from BPF programs like:
int bpf_prog(struct pt_regs *ctx)
{
...
bpf_tail_call(ctx, &jmp_table, index);
...
}
that is roughly equivalent to:
int bpf_prog(struct pt_regs *ctx)
{
...
if (jmp_table[index])
return (*jmp_table[index])(ctx);
...
}
The important detail that it's not a normal call, but a tail call.
The kernel stack is precious, so this helper reuses the current
stack frame and jumps into another BPF program without adding
extra call frame.
It's trivially done in interpreter and a bit trickier in JITs.
In case of x64 JIT the bigger part of generated assembler prologue
is common for all programs, so it is simply skipped while jumping.
Other JITs can do similar prologue-skipping optimization or
do stack unwind before jumping into the next program.
bpf_tail_call() arguments:
ctx - context pointer
jmp_table - one of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table
index - index in the jump table
Since all BPF programs are idenitified by file descriptor, user space
need to populate the jmp_table with FDs of other BPF programs.
If jmp_table[index] is empty the bpf_tail_call() doesn't jump anywhere
and program execution continues as normal.
New BPF_MAP_TYPE_PROG_ARRAY map type is introduced so that user space can
populate this jmp_table array with FDs of other bpf programs.
Programs can share the same jmp_table array or use multiple jmp_tables.
The chain of tail calls can form unpredictable dynamic loops therefore
tail_call_cnt is used to limit the number of calls and currently is set to 32.
Use cases:
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
==========
- simplify complex programs by splitting them into a sequence of small programs
- dispatch routine
For tracing and future seccomp the program may be triggered on all system
calls, but processing of syscall arguments will be different. It's more
efficient to implement them as:
int syscall_entry(struct seccomp_data *ctx)
{
bpf_tail_call(ctx, &syscall_jmp_table, ctx->nr /* syscall number */);
... default: process unknown syscall ...
}
int sys_write_event(struct seccomp_data *ctx) {...}
int sys_read_event(struct seccomp_data *ctx) {...}
syscall_jmp_table[__NR_write] = sys_write_event;
syscall_jmp_table[__NR_read] = sys_read_event;
For networking the program may call into different parsers depending on
packet format, like:
int packet_parser(struct __sk_buff *skb)
{
... parse L2, L3 here ...
__u8 ipproto = load_byte(skb, ... offsetof(struct iphdr, protocol));
bpf_tail_call(skb, &ipproto_jmp_table, ipproto);
... default: process unknown protocol ...
}
int parse_tcp(struct __sk_buff *skb) {...}
int parse_udp(struct __sk_buff *skb) {...}
ipproto_jmp_table[IPPROTO_TCP] = parse_tcp;
ipproto_jmp_table[IPPROTO_UDP] = parse_udp;
- for TC use case, bpf_tail_call() allows to implement reclassify-like logic
- bpf_map_update_elem/delete calls into BPF_MAP_TYPE_PROG_ARRAY jump table
are atomic, so user space can build chains of BPF programs on the fly
Implementation details:
=======================
- high performance of bpf_tail_call() is the goal.
It could have been implemented without JIT changes as a wrapper on top of
BPF_PROG_RUN() macro, but with two downsides:
. all programs would have to pay performance penalty for this feature and
tail call itself would be slower, since mandatory stack unwind, return,
stack allocate would be done for every tailcall.
. tailcall would be limited to programs running preempt_disabled, since
generic 'void *ctx' doesn't have room for 'tail_call_cnt' and it would
need to be either global per_cpu variable accessed by helper and by wrapper
or global variable protected by locks.
In this implementation x64 JIT bypasses stack unwind and jumps into the
callee program after prologue.
- bpf_prog_array_compatible() ensures that prog_type of callee and caller
are the same and JITed/non-JITed flag is the same, since calling JITed
program from non-JITed is invalid, since stack frames are different.
Similarly calling kprobe type program from socket type program is invalid.
- jump table is implemented as BPF_MAP_TYPE_PROG_ARRAY to reuse 'map'
abstraction, its user space API and all of verifier logic.
It's in the existing arraymap.c file, since several functions are
shared with regular array map.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-20 07:59:03 +08:00
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check validity of 32-bit and 64-bit arithmetic operations */
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
{
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
struct reg_state *regs = env->cur_state.regs;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
u8 opcode = BPF_OP(insn->code);
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (opcode == BPF_END || opcode == BPF_NEG) {
|
|
|
|
if (opcode == BPF_NEG) {
|
|
|
|
if (BPF_SRC(insn->code) != 0 ||
|
|
|
|
insn->src_reg != BPF_REG_0 ||
|
|
|
|
insn->off != 0 || insn->imm != 0) {
|
|
|
|
verbose("BPF_NEG uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
|
|
|
|
(insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
|
|
|
|
verbose("BPF_END uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
if (is_pointer_value(env, insn->dst_reg)) {
|
|
|
|
verbose("R%d pointer arithmetic prohibited\n",
|
|
|
|
insn->dst_reg);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
/* check dest operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, DST_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
} else if (opcode == BPF_MOV) {
|
|
|
|
|
|
|
|
if (BPF_SRC(insn->code) == BPF_X) {
|
|
|
|
if (insn->imm != 0 || insn->off != 0) {
|
|
|
|
verbose("BPF_MOV uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
} else {
|
|
|
|
if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
|
|
|
|
verbose("BPF_MOV uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check dest operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, DST_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (BPF_SRC(insn->code) == BPF_X) {
|
|
|
|
if (BPF_CLASS(insn->code) == BPF_ALU64) {
|
|
|
|
/* case: R1 = R2
|
|
|
|
* copy register state to dest reg
|
|
|
|
*/
|
|
|
|
regs[insn->dst_reg] = regs[insn->src_reg];
|
|
|
|
} else {
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
if (is_pointer_value(env, insn->src_reg)) {
|
|
|
|
verbose("R%d partial copy of pointer\n",
|
|
|
|
insn->src_reg);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
regs[insn->dst_reg].type = UNKNOWN_VALUE;
|
|
|
|
regs[insn->dst_reg].map_ptr = NULL;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* case: R = imm
|
|
|
|
* remember the value we stored into this reg
|
|
|
|
*/
|
|
|
|
regs[insn->dst_reg].type = CONST_IMM;
|
|
|
|
regs[insn->dst_reg].imm = insn->imm;
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (opcode > BPF_END) {
|
|
|
|
verbose("invalid BPF_ALU opcode %x\n", opcode);
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
} else { /* all other ALU ops: and, sub, xor, add, ... */
|
|
|
|
|
|
|
|
bool stack_relative = false;
|
|
|
|
|
|
|
|
if (BPF_SRC(insn->code) == BPF_X) {
|
|
|
|
if (insn->imm != 0 || insn->off != 0) {
|
|
|
|
verbose("BPF_ALU uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* check src1 operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
} else {
|
|
|
|
if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
|
|
|
|
verbose("BPF_ALU uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src2 operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
|
|
|
|
BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
|
|
|
|
verbose("div by zero\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-01-13 03:17:08 +08:00
|
|
|
if ((opcode == BPF_LSH || opcode == BPF_RSH ||
|
|
|
|
opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
|
|
|
|
int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
|
|
|
|
|
|
|
|
if (insn->imm < 0 || insn->imm >= size) {
|
|
|
|
verbose("invalid shift %d\n", insn->imm);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
/* pattern match 'bpf_add Rx, imm' instruction */
|
|
|
|
if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
|
|
|
|
regs[insn->dst_reg].type == FRAME_PTR &&
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
BPF_SRC(insn->code) == BPF_K) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
stack_relative = true;
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
} else if (is_pointer_value(env, insn->dst_reg)) {
|
|
|
|
verbose("R%d pointer arithmetic prohibited\n",
|
|
|
|
insn->dst_reg);
|
|
|
|
return -EACCES;
|
|
|
|
} else if (BPF_SRC(insn->code) == BPF_X &&
|
|
|
|
is_pointer_value(env, insn->src_reg)) {
|
|
|
|
verbose("R%d pointer arithmetic prohibited\n",
|
|
|
|
insn->src_reg);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
|
|
|
|
/* check dest operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, DST_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (stack_relative) {
|
|
|
|
regs[insn->dst_reg].type = PTR_TO_STACK;
|
|
|
|
regs[insn->dst_reg].imm = insn->imm;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int check_cond_jmp_op(struct verifier_env *env,
|
|
|
|
struct bpf_insn *insn, int *insn_idx)
|
|
|
|
{
|
|
|
|
struct reg_state *regs = env->cur_state.regs;
|
|
|
|
struct verifier_state *other_branch;
|
|
|
|
u8 opcode = BPF_OP(insn->code);
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (opcode > BPF_EXIT) {
|
|
|
|
verbose("invalid BPF_JMP opcode %x\n", opcode);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BPF_SRC(insn->code) == BPF_X) {
|
|
|
|
if (insn->imm != 0) {
|
|
|
|
verbose("BPF_JMP uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src1 operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
|
|
|
|
if (is_pointer_value(env, insn->src_reg)) {
|
|
|
|
verbose("R%d pointer comparison prohibited\n",
|
|
|
|
insn->src_reg);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
} else {
|
|
|
|
if (insn->src_reg != BPF_REG_0) {
|
|
|
|
verbose("BPF_JMP uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src2 operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* detect if R == 0 where R was initialized to zero earlier */
|
|
|
|
if (BPF_SRC(insn->code) == BPF_K &&
|
|
|
|
(opcode == BPF_JEQ || opcode == BPF_JNE) &&
|
|
|
|
regs[insn->dst_reg].type == CONST_IMM &&
|
|
|
|
regs[insn->dst_reg].imm == insn->imm) {
|
|
|
|
if (opcode == BPF_JEQ) {
|
|
|
|
/* if (imm == imm) goto pc+off;
|
|
|
|
* only follow the goto, ignore fall-through
|
|
|
|
*/
|
|
|
|
*insn_idx += insn->off;
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
/* if (imm != imm) goto pc+off;
|
|
|
|
* only follow fall-through branch, since
|
|
|
|
* that's where the program will go
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
|
|
|
|
if (!other_branch)
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
/* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
|
|
|
|
if (BPF_SRC(insn->code) == BPF_K &&
|
|
|
|
insn->imm == 0 && (opcode == BPF_JEQ ||
|
|
|
|
opcode == BPF_JNE) &&
|
|
|
|
regs[insn->dst_reg].type == PTR_TO_MAP_VALUE_OR_NULL) {
|
|
|
|
if (opcode == BPF_JEQ) {
|
|
|
|
/* next fallthrough insn can access memory via
|
|
|
|
* this register
|
|
|
|
*/
|
|
|
|
regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
|
|
|
|
/* branch targer cannot access it, since reg == 0 */
|
|
|
|
other_branch->regs[insn->dst_reg].type = CONST_IMM;
|
|
|
|
other_branch->regs[insn->dst_reg].imm = 0;
|
|
|
|
} else {
|
|
|
|
other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
|
|
|
|
regs[insn->dst_reg].type = CONST_IMM;
|
|
|
|
regs[insn->dst_reg].imm = 0;
|
|
|
|
}
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
} else if (is_pointer_value(env, insn->dst_reg)) {
|
|
|
|
verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
|
|
|
|
return -EACCES;
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
} else if (BPF_SRC(insn->code) == BPF_K &&
|
|
|
|
(opcode == BPF_JEQ || opcode == BPF_JNE)) {
|
|
|
|
|
|
|
|
if (opcode == BPF_JEQ) {
|
|
|
|
/* detect if (R == imm) goto
|
|
|
|
* and in the target state recognize that R = imm
|
|
|
|
*/
|
|
|
|
other_branch->regs[insn->dst_reg].type = CONST_IMM;
|
|
|
|
other_branch->regs[insn->dst_reg].imm = insn->imm;
|
|
|
|
} else {
|
|
|
|
/* detect if (R != imm) goto
|
|
|
|
* and in the fall-through state recognize that R = imm
|
|
|
|
*/
|
|
|
|
regs[insn->dst_reg].type = CONST_IMM;
|
|
|
|
regs[insn->dst_reg].imm = insn->imm;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (log_level)
|
|
|
|
print_verifier_state(env);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-26 15:17:04 +08:00
|
|
|
/* return the map pointer stored inside BPF_LD_IMM64 instruction */
|
|
|
|
static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
|
|
|
|
{
|
|
|
|
u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
|
|
|
|
|
|
|
|
return (struct bpf_map *) (unsigned long) imm64;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
/* verify BPF_LD_IMM64 instruction */
|
|
|
|
static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
|
|
|
|
{
|
|
|
|
struct reg_state *regs = env->cur_state.regs;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (BPF_SIZE(insn->code) != BPF_DW) {
|
|
|
|
verbose("invalid BPF_LD_IMM insn\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (insn->off != 0) {
|
|
|
|
verbose("BPF_LD_IMM64 uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, DST_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (insn->src_reg == 0)
|
|
|
|
/* generic move 64-bit immediate into a register */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
|
|
|
|
BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
|
|
|
|
|
|
|
|
regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
|
|
|
|
regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
ebpf: add sched_cls_type and map it to sk_filter's verifier ops
As discussed recently and at netconf/netdev01, we want to prevent making
bpf_verifier_ops registration available for modules, but have them at a
controlled place inside the kernel instead.
The reason for this is, that out-of-tree modules can go crazy and define
and register any verfifier ops they want, doing all sorts of crap, even
bypassing available GPLed eBPF helper functions. We don't want to offer
such a shiny playground, of course, but keep strict control to ourselves
inside the core kernel.
This also encourages us to design eBPF user helpers carefully and
generically, so they can be shared among various subsystems using eBPF.
For the eBPF traffic classifier (cls_bpf), it's a good start to share
the same helper facilities as we currently do in eBPF for socket filters.
That way, we have BPF_PROG_TYPE_SCHED_CLS look like it's own type, thus
one day if there's a good reason to diverge the set of helper functions
from the set available to socket filters, we keep ABI compatibility.
In future, we could place all bpf_prog_type_list at a central place,
perhaps.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-01 19:31:46 +08:00
|
|
|
static bool may_access_skb(enum bpf_prog_type type)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case BPF_PROG_TYPE_SOCKET_FILTER:
|
|
|
|
case BPF_PROG_TYPE_SCHED_CLS:
|
2015-03-20 22:11:11 +08:00
|
|
|
case BPF_PROG_TYPE_SCHED_ACT:
|
ebpf: add sched_cls_type and map it to sk_filter's verifier ops
As discussed recently and at netconf/netdev01, we want to prevent making
bpf_verifier_ops registration available for modules, but have them at a
controlled place inside the kernel instead.
The reason for this is, that out-of-tree modules can go crazy and define
and register any verfifier ops they want, doing all sorts of crap, even
bypassing available GPLed eBPF helper functions. We don't want to offer
such a shiny playground, of course, but keep strict control to ourselves
inside the core kernel.
This also encourages us to design eBPF user helpers carefully and
generically, so they can be shared among various subsystems using eBPF.
For the eBPF traffic classifier (cls_bpf), it's a good start to share
the same helper facilities as we currently do in eBPF for socket filters.
That way, we have BPF_PROG_TYPE_SCHED_CLS look like it's own type, thus
one day if there's a good reason to diverge the set of helper functions
from the set available to socket filters, we keep ABI compatibility.
In future, we could place all bpf_prog_type_list at a central place,
perhaps.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-01 19:31:46 +08:00
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-12-02 07:06:34 +08:00
|
|
|
/* verify safety of LD_ABS|LD_IND instructions:
|
|
|
|
* - they can only appear in the programs where ctx == skb
|
|
|
|
* - since they are wrappers of function calls, they scratch R1-R5 registers,
|
|
|
|
* preserve R6-R9, and store return value into R0
|
|
|
|
*
|
|
|
|
* Implicit input:
|
|
|
|
* ctx == skb == R6 == CTX
|
|
|
|
*
|
|
|
|
* Explicit input:
|
|
|
|
* SRC == any register
|
|
|
|
* IMM == 32-bit immediate
|
|
|
|
*
|
|
|
|
* Output:
|
|
|
|
* R0 - 8/16/32-bit skb data converted to cpu endianness
|
|
|
|
*/
|
|
|
|
static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
|
|
|
|
{
|
|
|
|
struct reg_state *regs = env->cur_state.regs;
|
|
|
|
u8 mode = BPF_MODE(insn->code);
|
|
|
|
struct reg_state *reg;
|
|
|
|
int i, err;
|
|
|
|
|
2015-03-01 19:31:47 +08:00
|
|
|
if (!may_access_skb(env->prog->type)) {
|
ebpf: add sched_cls_type and map it to sk_filter's verifier ops
As discussed recently and at netconf/netdev01, we want to prevent making
bpf_verifier_ops registration available for modules, but have them at a
controlled place inside the kernel instead.
The reason for this is, that out-of-tree modules can go crazy and define
and register any verfifier ops they want, doing all sorts of crap, even
bypassing available GPLed eBPF helper functions. We don't want to offer
such a shiny playground, of course, but keep strict control to ourselves
inside the core kernel.
This also encourages us to design eBPF user helpers carefully and
generically, so they can be shared among various subsystems using eBPF.
For the eBPF traffic classifier (cls_bpf), it's a good start to share
the same helper facilities as we currently do in eBPF for socket filters.
That way, we have BPF_PROG_TYPE_SCHED_CLS look like it's own type, thus
one day if there's a good reason to diverge the set of helper functions
from the set available to socket filters, we keep ABI compatibility.
In future, we could place all bpf_prog_type_list at a central place,
perhaps.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-01 19:31:46 +08:00
|
|
|
verbose("BPF_LD_ABS|IND instructions not allowed for this program type\n");
|
2014-12-02 07:06:34 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
|
|
|
|
(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
|
|
|
|
verbose("BPF_LD_ABS uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check whether implicit source operand (register R6) is readable */
|
|
|
|
err = check_reg_arg(regs, BPF_REG_6, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (regs[BPF_REG_6].type != PTR_TO_CTX) {
|
|
|
|
verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mode == BPF_IND) {
|
|
|
|
/* check explicit source operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* reset caller saved regs to unreadable */
|
|
|
|
for (i = 0; i < CALLER_SAVED_REGS; i++) {
|
|
|
|
reg = regs + caller_saved[i];
|
|
|
|
reg->type = NOT_INIT;
|
|
|
|
reg->imm = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mark destination R0 register as readable, since it contains
|
|
|
|
* the value fetched from the packet
|
|
|
|
*/
|
|
|
|
regs[BPF_REG_0].type = UNKNOWN_VALUE;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-26 15:17:05 +08:00
|
|
|
/* non-recursive DFS pseudo code
|
|
|
|
* 1 procedure DFS-iterative(G,v):
|
|
|
|
* 2 label v as discovered
|
|
|
|
* 3 let S be a stack
|
|
|
|
* 4 S.push(v)
|
|
|
|
* 5 while S is not empty
|
|
|
|
* 6 t <- S.pop()
|
|
|
|
* 7 if t is what we're looking for:
|
|
|
|
* 8 return t
|
|
|
|
* 9 for all edges e in G.adjacentEdges(t) do
|
|
|
|
* 10 if edge e is already labelled
|
|
|
|
* 11 continue with the next edge
|
|
|
|
* 12 w <- G.adjacentVertex(t,e)
|
|
|
|
* 13 if vertex w is not discovered and not explored
|
|
|
|
* 14 label e as tree-edge
|
|
|
|
* 15 label w as discovered
|
|
|
|
* 16 S.push(w)
|
|
|
|
* 17 continue at 5
|
|
|
|
* 18 else if vertex w is discovered
|
|
|
|
* 19 label e as back-edge
|
|
|
|
* 20 else
|
|
|
|
* 21 // vertex w is explored
|
|
|
|
* 22 label e as forward- or cross-edge
|
|
|
|
* 23 label t as explored
|
|
|
|
* 24 S.pop()
|
|
|
|
*
|
|
|
|
* convention:
|
|
|
|
* 0x10 - discovered
|
|
|
|
* 0x11 - discovered and fall-through edge labelled
|
|
|
|
* 0x12 - discovered and fall-through and branch edges labelled
|
|
|
|
* 0x20 - explored
|
|
|
|
*/
|
|
|
|
|
|
|
|
enum {
|
|
|
|
DISCOVERED = 0x10,
|
|
|
|
EXPLORED = 0x20,
|
|
|
|
FALLTHROUGH = 1,
|
|
|
|
BRANCH = 2,
|
|
|
|
};
|
|
|
|
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
#define STATE_LIST_MARK ((struct verifier_state_list *) -1L)
|
|
|
|
|
2014-09-26 15:17:05 +08:00
|
|
|
static int *insn_stack; /* stack of insns to process */
|
|
|
|
static int cur_stack; /* current stack index */
|
|
|
|
static int *insn_state;
|
|
|
|
|
|
|
|
/* t, w, e - match pseudo-code above:
|
|
|
|
* t - index of current instruction
|
|
|
|
* w - next instruction
|
|
|
|
* e - edge
|
|
|
|
*/
|
|
|
|
static int push_insn(int t, int w, int e, struct verifier_env *env)
|
|
|
|
{
|
|
|
|
if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (w < 0 || w >= env->prog->len) {
|
|
|
|
verbose("jump out of range from insn %d to %d\n", t, w);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
if (e == BRANCH)
|
|
|
|
/* mark branch target for state pruning */
|
|
|
|
env->explored_states[w] = STATE_LIST_MARK;
|
|
|
|
|
2014-09-26 15:17:05 +08:00
|
|
|
if (insn_state[w] == 0) {
|
|
|
|
/* tree-edge */
|
|
|
|
insn_state[t] = DISCOVERED | e;
|
|
|
|
insn_state[w] = DISCOVERED;
|
|
|
|
if (cur_stack >= env->prog->len)
|
|
|
|
return -E2BIG;
|
|
|
|
insn_stack[cur_stack++] = w;
|
|
|
|
return 1;
|
|
|
|
} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
|
|
|
|
verbose("back-edge from insn %d to %d\n", t, w);
|
|
|
|
return -EINVAL;
|
|
|
|
} else if (insn_state[w] == EXPLORED) {
|
|
|
|
/* forward- or cross-edge */
|
|
|
|
insn_state[t] = DISCOVERED | e;
|
|
|
|
} else {
|
|
|
|
verbose("insn state internal bug\n");
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* non-recursive depth-first-search to detect loops in BPF program
|
|
|
|
* loop == back-edge in directed graph
|
|
|
|
*/
|
|
|
|
static int check_cfg(struct verifier_env *env)
|
|
|
|
{
|
|
|
|
struct bpf_insn *insns = env->prog->insnsi;
|
|
|
|
int insn_cnt = env->prog->len;
|
|
|
|
int ret = 0;
|
|
|
|
int i, t;
|
|
|
|
|
|
|
|
insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
|
|
|
|
if (!insn_state)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
|
|
|
|
if (!insn_stack) {
|
|
|
|
kfree(insn_state);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
|
|
|
|
insn_stack[0] = 0; /* 0 is the first instruction */
|
|
|
|
cur_stack = 1;
|
|
|
|
|
|
|
|
peek_stack:
|
|
|
|
if (cur_stack == 0)
|
|
|
|
goto check_state;
|
|
|
|
t = insn_stack[cur_stack - 1];
|
|
|
|
|
|
|
|
if (BPF_CLASS(insns[t].code) == BPF_JMP) {
|
|
|
|
u8 opcode = BPF_OP(insns[t].code);
|
|
|
|
|
|
|
|
if (opcode == BPF_EXIT) {
|
|
|
|
goto mark_explored;
|
|
|
|
} else if (opcode == BPF_CALL) {
|
|
|
|
ret = push_insn(t, t + 1, FALLTHROUGH, env);
|
|
|
|
if (ret == 1)
|
|
|
|
goto peek_stack;
|
|
|
|
else if (ret < 0)
|
|
|
|
goto err_free;
|
bpf, verifier: further improve search pruning
The verifier needs to go through every path of the program in
order to check that it terminates safely, which can be quite a
lot of instructions that need to be processed f.e. in cases with
more branchy programs. With search pruning from f1bca824dabb ("bpf:
add search pruning optimization to verifier") the search space can
already be reduced significantly when the verifier detects that
a previously walked path with same register and stack contents
terminated already (see verifier's states_equal()), so the search
can skip walking those states.
When working with larger programs of > ~2000 (out of max 4096)
insns, we found that the current limit of 32k instructions is easily
hit. For example, a case we ran into is that the search space cannot
be pruned due to branches at the beginning of the program that make
use of certain stack space slots (STACK_MISC), which are never used
in the remaining program (STACK_INVALID). Therefore, the verifier
needs to walk paths for the slots in STACK_INVALID state, but also
all remaining paths with a stack structure, where the slots are in
STACK_MISC, which can nearly double the search space needed. After
various experiments, we find that a limit of 64k processed insns is
a more reasonable choice when dealing with larger programs in practice.
This still allows to reject extreme crafted cases that can have a
much higher complexity (f.e. > ~300k) within the 4096 insns limit
due to search pruning not being able to take effect.
Furthermore, we found that a lot of states can be pruned after a
call instruction, f.e. we were able to reduce the search state by
~35% in some cases with this heuristic, trade-off is to keep a bit
more states in env->explored_states. Usually, call instructions
have a number of preceding register assignments and/or stack stores,
where search pruning has a better chance to suceed in states_equal()
test. The current code marks the branch targets with STATE_LIST_MARK
in case of conditional jumps, and the next (t + 1) instruction in
case of unconditional jump so that f.e. a backjump will walk it. We
also did experiments with using t + insns[t].off + 1 as a marker in
the unconditionally jump case instead of t + 1 with the rationale
that these two branches of execution that converge after the label
might have more potential of pruning. We found that it was a bit
better, but not necessarily significantly better than the current
state, perhaps also due to clang not generating back jumps often.
Hence, we left that as is for now.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-06 04:33:17 +08:00
|
|
|
if (t + 1 < insn_cnt)
|
|
|
|
env->explored_states[t + 1] = STATE_LIST_MARK;
|
2014-09-26 15:17:05 +08:00
|
|
|
} else if (opcode == BPF_JA) {
|
|
|
|
if (BPF_SRC(insns[t].code) != BPF_K) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
/* unconditional jump with single edge */
|
|
|
|
ret = push_insn(t, t + insns[t].off + 1,
|
|
|
|
FALLTHROUGH, env);
|
|
|
|
if (ret == 1)
|
|
|
|
goto peek_stack;
|
|
|
|
else if (ret < 0)
|
|
|
|
goto err_free;
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
/* tell verifier to check for equivalent states
|
|
|
|
* after every call and jump
|
|
|
|
*/
|
2015-04-15 06:57:13 +08:00
|
|
|
if (t + 1 < insn_cnt)
|
|
|
|
env->explored_states[t + 1] = STATE_LIST_MARK;
|
2014-09-26 15:17:05 +08:00
|
|
|
} else {
|
|
|
|
/* conditional jump with two edges */
|
|
|
|
ret = push_insn(t, t + 1, FALLTHROUGH, env);
|
|
|
|
if (ret == 1)
|
|
|
|
goto peek_stack;
|
|
|
|
else if (ret < 0)
|
|
|
|
goto err_free;
|
|
|
|
|
|
|
|
ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
|
|
|
|
if (ret == 1)
|
|
|
|
goto peek_stack;
|
|
|
|
else if (ret < 0)
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* all other non-branch instructions with single
|
|
|
|
* fall-through edge
|
|
|
|
*/
|
|
|
|
ret = push_insn(t, t + 1, FALLTHROUGH, env);
|
|
|
|
if (ret == 1)
|
|
|
|
goto peek_stack;
|
|
|
|
else if (ret < 0)
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
mark_explored:
|
|
|
|
insn_state[t] = EXPLORED;
|
|
|
|
if (cur_stack-- <= 0) {
|
|
|
|
verbose("pop stack internal bug\n");
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
goto peek_stack;
|
|
|
|
|
|
|
|
check_state:
|
|
|
|
for (i = 0; i < insn_cnt; i++) {
|
|
|
|
if (insn_state[i] != EXPLORED) {
|
|
|
|
verbose("unreachable insn %d\n", i);
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ret = 0; /* cfg looks good */
|
|
|
|
|
|
|
|
err_free:
|
|
|
|
kfree(insn_state);
|
|
|
|
kfree(insn_stack);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
/* compare two verifier states
|
|
|
|
*
|
|
|
|
* all states stored in state_list are known to be valid, since
|
|
|
|
* verifier reached 'bpf_exit' instruction through them
|
|
|
|
*
|
|
|
|
* this function is called when verifier exploring different branches of
|
|
|
|
* execution popped from the state stack. If it sees an old state that has
|
|
|
|
* more strict register state and more strict stack state then this execution
|
|
|
|
* branch doesn't need to be explored further, since verifier already
|
|
|
|
* concluded that more strict state leads to valid finish.
|
|
|
|
*
|
|
|
|
* Therefore two states are equivalent if register state is more conservative
|
|
|
|
* and explored stack state is more conservative than the current one.
|
|
|
|
* Example:
|
|
|
|
* explored current
|
|
|
|
* (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
|
|
|
|
* (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
|
|
|
|
*
|
|
|
|
* In other words if current stack state (one being explored) has more
|
|
|
|
* valid slots than old one that already passed validation, it means
|
|
|
|
* the verifier can stop exploring and conclude that current state is valid too
|
|
|
|
*
|
|
|
|
* Similarly with registers. If explored state has register type as invalid
|
|
|
|
* whereas register type in current state is meaningful, it means that
|
|
|
|
* the current state will reach 'bpf_exit' instruction safely
|
|
|
|
*/
|
|
|
|
static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BPF_REG; i++) {
|
|
|
|
if (memcmp(&old->regs[i], &cur->regs[i],
|
|
|
|
sizeof(old->regs[0])) != 0) {
|
|
|
|
if (old->regs[i].type == NOT_INIT ||
|
2014-10-21 05:54:57 +08:00
|
|
|
(old->regs[i].type == UNKNOWN_VALUE &&
|
|
|
|
cur->regs[i].type != NOT_INIT))
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
continue;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_BPF_STACK; i++) {
|
2014-10-29 06:11:41 +08:00
|
|
|
if (old->stack_slot_type[i] == STACK_INVALID)
|
|
|
|
continue;
|
|
|
|
if (old->stack_slot_type[i] != cur->stack_slot_type[i])
|
|
|
|
/* Ex: old explored (safe) state has STACK_SPILL in
|
|
|
|
* this stack slot, but current has has STACK_MISC ->
|
|
|
|
* this verifier states are not equivalent,
|
|
|
|
* return false to continue verification of this path
|
|
|
|
*/
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
return false;
|
2014-10-29 06:11:41 +08:00
|
|
|
if (i % BPF_REG_SIZE)
|
|
|
|
continue;
|
|
|
|
if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE],
|
|
|
|
&cur->spilled_regs[i / BPF_REG_SIZE],
|
|
|
|
sizeof(old->spilled_regs[0])))
|
|
|
|
/* when explored and current stack slot types are
|
|
|
|
* the same, check that stored pointers types
|
|
|
|
* are the same as well.
|
|
|
|
* Ex: explored safe path could have stored
|
|
|
|
* (struct reg_state) {.type = PTR_TO_STACK, .imm = -8}
|
|
|
|
* but current path has stored:
|
|
|
|
* (struct reg_state) {.type = PTR_TO_STACK, .imm = -16}
|
|
|
|
* such verifier states are not equivalent.
|
|
|
|
* return false to continue verification of this path
|
|
|
|
*/
|
|
|
|
return false;
|
|
|
|
else
|
|
|
|
continue;
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int is_state_visited(struct verifier_env *env, int insn_idx)
|
|
|
|
{
|
|
|
|
struct verifier_state_list *new_sl;
|
|
|
|
struct verifier_state_list *sl;
|
|
|
|
|
|
|
|
sl = env->explored_states[insn_idx];
|
|
|
|
if (!sl)
|
|
|
|
/* this 'insn_idx' instruction wasn't marked, so we will not
|
|
|
|
* be doing state search here
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
while (sl != STATE_LIST_MARK) {
|
|
|
|
if (states_equal(&sl->state, &env->cur_state))
|
|
|
|
/* reached equivalent register/stack state,
|
|
|
|
* prune the search
|
|
|
|
*/
|
|
|
|
return 1;
|
|
|
|
sl = sl->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* there were no equivalent states, remember current one.
|
|
|
|
* technically the current state is not proven to be safe yet,
|
|
|
|
* but it will either reach bpf_exit (which means it's safe) or
|
|
|
|
* it will be rejected. Since there are no loops, we won't be
|
|
|
|
* seeing this 'insn_idx' instruction again on the way to bpf_exit
|
|
|
|
*/
|
|
|
|
new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER);
|
|
|
|
if (!new_sl)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
/* add new state to the head of linked list */
|
|
|
|
memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state));
|
|
|
|
new_sl->next = env->explored_states[insn_idx];
|
|
|
|
env->explored_states[insn_idx] = new_sl;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
static int do_check(struct verifier_env *env)
|
|
|
|
{
|
|
|
|
struct verifier_state *state = &env->cur_state;
|
|
|
|
struct bpf_insn *insns = env->prog->insnsi;
|
|
|
|
struct reg_state *regs = state->regs;
|
|
|
|
int insn_cnt = env->prog->len;
|
|
|
|
int insn_idx, prev_insn_idx = 0;
|
|
|
|
int insn_processed = 0;
|
|
|
|
bool do_print_state = false;
|
|
|
|
|
|
|
|
init_reg_state(regs);
|
|
|
|
insn_idx = 0;
|
|
|
|
for (;;) {
|
|
|
|
struct bpf_insn *insn;
|
|
|
|
u8 class;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (insn_idx >= insn_cnt) {
|
|
|
|
verbose("invalid insn idx %d insn_cnt %d\n",
|
|
|
|
insn_idx, insn_cnt);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
insn = &insns[insn_idx];
|
|
|
|
class = BPF_CLASS(insn->code);
|
|
|
|
|
bpf, verifier: further improve search pruning
The verifier needs to go through every path of the program in
order to check that it terminates safely, which can be quite a
lot of instructions that need to be processed f.e. in cases with
more branchy programs. With search pruning from f1bca824dabb ("bpf:
add search pruning optimization to verifier") the search space can
already be reduced significantly when the verifier detects that
a previously walked path with same register and stack contents
terminated already (see verifier's states_equal()), so the search
can skip walking those states.
When working with larger programs of > ~2000 (out of max 4096)
insns, we found that the current limit of 32k instructions is easily
hit. For example, a case we ran into is that the search space cannot
be pruned due to branches at the beginning of the program that make
use of certain stack space slots (STACK_MISC), which are never used
in the remaining program (STACK_INVALID). Therefore, the verifier
needs to walk paths for the slots in STACK_INVALID state, but also
all remaining paths with a stack structure, where the slots are in
STACK_MISC, which can nearly double the search space needed. After
various experiments, we find that a limit of 64k processed insns is
a more reasonable choice when dealing with larger programs in practice.
This still allows to reject extreme crafted cases that can have a
much higher complexity (f.e. > ~300k) within the 4096 insns limit
due to search pruning not being able to take effect.
Furthermore, we found that a lot of states can be pruned after a
call instruction, f.e. we were able to reduce the search state by
~35% in some cases with this heuristic, trade-off is to keep a bit
more states in env->explored_states. Usually, call instructions
have a number of preceding register assignments and/or stack stores,
where search pruning has a better chance to suceed in states_equal()
test. The current code marks the branch targets with STATE_LIST_MARK
in case of conditional jumps, and the next (t + 1) instruction in
case of unconditional jump so that f.e. a backjump will walk it. We
also did experiments with using t + insns[t].off + 1 as a marker in
the unconditionally jump case instead of t + 1 with the rationale
that these two branches of execution that converge after the label
might have more potential of pruning. We found that it was a bit
better, but not necessarily significantly better than the current
state, perhaps also due to clang not generating back jumps often.
Hence, we left that as is for now.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-06 04:33:17 +08:00
|
|
|
if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
verbose("BPF program is too large. Proccessed %d insn\n",
|
|
|
|
insn_processed);
|
|
|
|
return -E2BIG;
|
|
|
|
}
|
|
|
|
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
err = is_state_visited(env, insn_idx);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
if (err == 1) {
|
|
|
|
/* found equivalent state, can prune the search */
|
|
|
|
if (log_level) {
|
|
|
|
if (do_print_state)
|
|
|
|
verbose("\nfrom %d to %d: safe\n",
|
|
|
|
prev_insn_idx, insn_idx);
|
|
|
|
else
|
|
|
|
verbose("%d: safe\n", insn_idx);
|
|
|
|
}
|
|
|
|
goto process_bpf_exit;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
if (log_level && do_print_state) {
|
|
|
|
verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
|
|
|
|
print_verifier_state(env);
|
|
|
|
do_print_state = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (log_level) {
|
|
|
|
verbose("%d: ", insn_idx);
|
|
|
|
print_bpf_insn(insn);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (class == BPF_ALU || class == BPF_ALU64) {
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
err = check_alu_op(env, insn);
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
} else if (class == BPF_LDX) {
|
2015-03-14 02:57:42 +08:00
|
|
|
enum bpf_reg_type src_reg_type;
|
|
|
|
|
|
|
|
/* check for reserved fields is already done */
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
/* check src operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2015-04-16 07:19:33 +08:00
|
|
|
src_reg_type = regs[insn->src_reg].type;
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
/* check that memory (src_reg + off) is readable,
|
|
|
|
* the state of dst_reg will be updated by this func
|
|
|
|
*/
|
|
|
|
err = check_mem_access(env, insn->src_reg, insn->off,
|
|
|
|
BPF_SIZE(insn->code), BPF_READ,
|
|
|
|
insn->dst_reg);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2015-04-16 07:19:33 +08:00
|
|
|
if (BPF_SIZE(insn->code) != BPF_W) {
|
|
|
|
insn_idx++;
|
|
|
|
continue;
|
|
|
|
}
|
2015-03-14 02:57:42 +08:00
|
|
|
|
2015-04-16 07:19:33 +08:00
|
|
|
if (insn->imm == 0) {
|
2015-03-14 02:57:42 +08:00
|
|
|
/* saw a valid insn
|
|
|
|
* dst_reg = *(u32 *)(src_reg + off)
|
|
|
|
* use reserved 'imm' field to mark this insn
|
|
|
|
*/
|
|
|
|
insn->imm = src_reg_type;
|
|
|
|
|
|
|
|
} else if (src_reg_type != insn->imm &&
|
|
|
|
(src_reg_type == PTR_TO_CTX ||
|
|
|
|
insn->imm == PTR_TO_CTX)) {
|
|
|
|
/* ABuser program is trying to use the same insn
|
|
|
|
* dst_reg = *(u32*) (src_reg + off)
|
|
|
|
* with different pointer types:
|
|
|
|
* src_reg == ctx in one branch and
|
|
|
|
* src_reg == stack|map in some other branch.
|
|
|
|
* Reject it.
|
|
|
|
*/
|
|
|
|
verbose("same insn cannot be used with different pointers\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
} else if (class == BPF_STX) {
|
2015-06-05 01:11:54 +08:00
|
|
|
enum bpf_reg_type dst_reg_type;
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
if (BPF_MODE(insn->code) == BPF_XADD) {
|
|
|
|
err = check_xadd(env, insn);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
insn_idx++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src1 operand */
|
|
|
|
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
/* check src2 operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2015-06-05 01:11:54 +08:00
|
|
|
dst_reg_type = regs[insn->dst_reg].type;
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
/* check that memory (dst_reg + off) is writeable */
|
|
|
|
err = check_mem_access(env, insn->dst_reg, insn->off,
|
|
|
|
BPF_SIZE(insn->code), BPF_WRITE,
|
|
|
|
insn->src_reg);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2015-06-05 01:11:54 +08:00
|
|
|
if (insn->imm == 0) {
|
|
|
|
insn->imm = dst_reg_type;
|
|
|
|
} else if (dst_reg_type != insn->imm &&
|
|
|
|
(dst_reg_type == PTR_TO_CTX ||
|
|
|
|
insn->imm == PTR_TO_CTX)) {
|
|
|
|
verbose("same insn cannot be used with different pointers\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
} else if (class == BPF_ST) {
|
|
|
|
if (BPF_MODE(insn->code) != BPF_MEM ||
|
|
|
|
insn->src_reg != BPF_REG_0) {
|
|
|
|
verbose("BPF_ST uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* check src operand */
|
|
|
|
err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* check that memory (dst_reg + off) is writeable */
|
|
|
|
err = check_mem_access(env, insn->dst_reg, insn->off,
|
|
|
|
BPF_SIZE(insn->code), BPF_WRITE,
|
|
|
|
-1);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
} else if (class == BPF_JMP) {
|
|
|
|
u8 opcode = BPF_OP(insn->code);
|
|
|
|
|
|
|
|
if (opcode == BPF_CALL) {
|
|
|
|
if (BPF_SRC(insn->code) != BPF_K ||
|
|
|
|
insn->off != 0 ||
|
|
|
|
insn->src_reg != BPF_REG_0 ||
|
|
|
|
insn->dst_reg != BPF_REG_0) {
|
|
|
|
verbose("BPF_CALL uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = check_call(env, insn->imm);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
} else if (opcode == BPF_JA) {
|
|
|
|
if (BPF_SRC(insn->code) != BPF_K ||
|
|
|
|
insn->imm != 0 ||
|
|
|
|
insn->src_reg != BPF_REG_0 ||
|
|
|
|
insn->dst_reg != BPF_REG_0) {
|
|
|
|
verbose("BPF_JA uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
insn_idx += insn->off + 1;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
} else if (opcode == BPF_EXIT) {
|
|
|
|
if (BPF_SRC(insn->code) != BPF_K ||
|
|
|
|
insn->imm != 0 ||
|
|
|
|
insn->src_reg != BPF_REG_0 ||
|
|
|
|
insn->dst_reg != BPF_REG_0) {
|
|
|
|
verbose("BPF_EXIT uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* eBPF calling convetion is such that R0 is used
|
|
|
|
* to return the value from eBPF program.
|
|
|
|
* Make sure that it's readable at this time
|
|
|
|
* of bpf_exit, which means that program wrote
|
|
|
|
* something into it earlier
|
|
|
|
*/
|
|
|
|
err = check_reg_arg(regs, BPF_REG_0, SRC_OP);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
if (is_pointer_value(env, BPF_REG_0)) {
|
|
|
|
verbose("R0 leaks addr as return value\n");
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
process_bpf_exit:
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
insn_idx = pop_stack(env, &prev_insn_idx);
|
|
|
|
if (insn_idx < 0) {
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
do_print_state = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
err = check_cond_jmp_op(env, insn, &insn_idx);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
} else if (class == BPF_LD) {
|
|
|
|
u8 mode = BPF_MODE(insn->code);
|
|
|
|
|
|
|
|
if (mode == BPF_ABS || mode == BPF_IND) {
|
2014-12-02 07:06:34 +08:00
|
|
|
err = check_ld_abs(env, insn);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
} else if (mode == BPF_IMM) {
|
|
|
|
err = check_ld_imm(env, insn);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
insn_idx++;
|
|
|
|
} else {
|
|
|
|
verbose("invalid BPF_LD mode\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
verbose("unknown insn class %d\n", class);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
insn_idx++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-26 15:17:04 +08:00
|
|
|
/* look for pseudo eBPF instructions that access map FDs and
|
|
|
|
* replace them with actual map pointers
|
|
|
|
*/
|
|
|
|
static int replace_map_fd_with_map_ptr(struct verifier_env *env)
|
|
|
|
{
|
|
|
|
struct bpf_insn *insn = env->prog->insnsi;
|
|
|
|
int insn_cnt = env->prog->len;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < insn_cnt; i++, insn++) {
|
2015-03-14 02:57:42 +08:00
|
|
|
if (BPF_CLASS(insn->code) == BPF_LDX &&
|
2015-06-05 01:11:54 +08:00
|
|
|
(BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
|
2015-03-14 02:57:42 +08:00
|
|
|
verbose("BPF_LDX uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2015-06-05 01:11:54 +08:00
|
|
|
if (BPF_CLASS(insn->code) == BPF_STX &&
|
|
|
|
((BPF_MODE(insn->code) != BPF_MEM &&
|
|
|
|
BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
|
|
|
|
verbose("BPF_STX uses reserved fields\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2014-09-26 15:17:04 +08:00
|
|
|
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
|
|
|
|
struct bpf_map *map;
|
|
|
|
struct fd f;
|
|
|
|
|
|
|
|
if (i == insn_cnt - 1 || insn[1].code != 0 ||
|
|
|
|
insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
|
|
|
|
insn[1].off != 0) {
|
|
|
|
verbose("invalid bpf_ld_imm64 insn\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (insn->src_reg == 0)
|
|
|
|
/* valid generic load 64-bit imm */
|
|
|
|
goto next_insn;
|
|
|
|
|
|
|
|
if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
|
|
|
|
verbose("unrecognized bpf_ld_imm64 insn\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
f = fdget(insn->imm);
|
2015-10-29 21:58:07 +08:00
|
|
|
map = __bpf_map_get(f);
|
2014-09-26 15:17:04 +08:00
|
|
|
if (IS_ERR(map)) {
|
|
|
|
verbose("fd %d is not pointing to valid bpf_map\n",
|
|
|
|
insn->imm);
|
|
|
|
fdput(f);
|
|
|
|
return PTR_ERR(map);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* store map pointer inside BPF_LD_IMM64 instruction */
|
|
|
|
insn[0].imm = (u32) (unsigned long) map;
|
|
|
|
insn[1].imm = ((u64) (unsigned long) map) >> 32;
|
|
|
|
|
|
|
|
/* check whether we recorded this map already */
|
|
|
|
for (j = 0; j < env->used_map_cnt; j++)
|
|
|
|
if (env->used_maps[j] == map) {
|
|
|
|
fdput(f);
|
|
|
|
goto next_insn;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (env->used_map_cnt >= MAX_USED_MAPS) {
|
|
|
|
fdput(f);
|
|
|
|
return -E2BIG;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* remember this map */
|
|
|
|
env->used_maps[env->used_map_cnt++] = map;
|
|
|
|
|
|
|
|
/* hold the map. If the program is rejected by verifier,
|
|
|
|
* the map will be released by release_maps() or it
|
|
|
|
* will be used by the valid program until it's unloaded
|
|
|
|
* and all maps are released in free_bpf_prog_info()
|
|
|
|
*/
|
bpf: fix clearing on persistent program array maps
Currently, when having map file descriptors pointing to program arrays,
there's still the issue that we unconditionally flush program array
contents via bpf_fd_array_map_clear() in bpf_map_release(). This happens
when such a file descriptor is released and is independent of the map's
refcount.
Having this flush independent of the refcount is for a reason: there
can be arbitrary complex dependency chains among tail calls, also circular
ones (direct or indirect, nesting limit determined during runtime), and
we need to make sure that the map drops all references to eBPF programs
it holds, so that the map's refcount can eventually drop to zero and
initiate its freeing. Btw, a walk of the whole dependency graph would
not be possible for various reasons, one being complexity and another
one inconsistency, i.e. new programs can be added to parts of the graph
at any time, so there's no guaranteed consistent state for the time of
such a walk.
Now, the program array pinning itself works, but the issue is that each
derived file descriptor on close would nevertheless call unconditionally
into bpf_fd_array_map_clear(). Instead, keep track of users and postpone
this flush until the last reference to a user is dropped. As this only
concerns a subset of references (f.e. a prog array could hold a program
that itself has reference on the prog array holding it, etc), we need to
track them separately.
Short analysis on the refcounting: on map creation time usercnt will be
one, so there's no change in behaviour for bpf_map_release(), if unpinned.
If we already fail in map_create(), we are immediately freed, and no
file descriptor has been made public yet. In bpf_obj_pin_user(), we need
to probe for a possible map in bpf_fd_probe_obj() already with a usercnt
reference, so before we drop the reference on the fd with fdput().
Therefore, if actual pinning fails, we need to drop that reference again
in bpf_any_put(), otherwise we keep holding it. When last reference
drops on the inode, the bpf_any_put() in bpf_evict_inode() will take
care of dropping the usercnt again. In the bpf_obj_get_user() case, the
bpf_any_get() will grab a reference on the usercnt, still at a time when
we have the reference on the path. Should we later on fail to grab a new
file descriptor, bpf_any_put() will drop it, otherwise we hold it until
bpf_map_release() time.
Joint work with Alexei.
Fixes: b2197755b263 ("bpf: add support for persistent maps/progs")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-11-25 04:28:15 +08:00
|
|
|
bpf_map_inc(map, false);
|
2014-09-26 15:17:04 +08:00
|
|
|
fdput(f);
|
|
|
|
next_insn:
|
|
|
|
insn++;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* now all pseudo BPF_LD_IMM64 instructions load valid
|
|
|
|
* 'struct bpf_map *' into a register instead of user map_fd.
|
|
|
|
* These pointers will be used later by verifier to validate map access.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* drop refcnt of maps used by the rejected program */
|
|
|
|
static void release_maps(struct verifier_env *env)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < env->used_map_cnt; i++)
|
|
|
|
bpf_map_put(env->used_maps[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
|
|
|
|
static void convert_pseudo_ld_imm64(struct verifier_env *env)
|
|
|
|
{
|
|
|
|
struct bpf_insn *insn = env->prog->insnsi;
|
|
|
|
int insn_cnt = env->prog->len;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < insn_cnt; i++, insn++)
|
|
|
|
if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
|
|
|
|
insn->src_reg = 0;
|
|
|
|
}
|
|
|
|
|
2015-03-14 02:57:42 +08:00
|
|
|
static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
|
|
|
|
{
|
|
|
|
struct bpf_insn *insn = prog->insnsi;
|
|
|
|
int insn_cnt = prog->len;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < insn_cnt; i++, insn++) {
|
|
|
|
if (BPF_CLASS(insn->code) != BPF_JMP ||
|
|
|
|
BPF_OP(insn->code) == BPF_CALL ||
|
|
|
|
BPF_OP(insn->code) == BPF_EXIT)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* adjust offset of jmps if necessary */
|
|
|
|
if (i < pos && i + insn->off + 1 > pos)
|
|
|
|
insn->off += delta;
|
bpf: fix branch offset adjustment on backjumps after patching ctx expansion
When ctx access is used, the kernel often needs to expand/rewrite
instructions, so after that patching, branch offsets have to be
adjusted for both forward and backward jumps in the new eBPF program,
but for backward jumps it fails to account the delta. Meaning, for
example, if the expansion happens exactly on the insn that sits at
the jump target, it doesn't fix up the back jump offset.
Analysis on what the check in adjust_branches() is currently doing:
/* adjust offset of jmps if necessary */
if (i < pos && i + insn->off + 1 > pos)
insn->off += delta;
else if (i > pos && i + insn->off + 1 < pos)
insn->off -= delta;
First condition (forward jumps):
Before: After:
insns[0] insns[0]
insns[1] <--- i/insn insns[1] <--- i/insn
insns[2] <--- pos insns[P] <--- pos
insns[3] insns[P] `------| delta
insns[4] <--- target_X insns[P] `-----|
insns[5] insns[3]
insns[4] <--- target_X
insns[5]
First case is if we cross pos-boundary and the jump instruction was
before pos. This is handeled correctly. I.e. if i == pos, then this
would mean our jump that we currently check was the patchlet itself
that we just injected. Since such patchlets are self-contained and
have no awareness of any insns before or after the patched one, the
delta is correctly not adjusted. Also, for the second condition in
case of i + insn->off + 1 == pos, means we jump to that newly patched
instruction, so no offset adjustment are needed. That part is correct.
Second condition (backward jumps):
Before: After:
insns[0] insns[0]
insns[1] <--- target_X insns[1] <--- target_X
insns[2] <--- pos <-- target_Y insns[P] <--- pos <-- target_Y
insns[3] insns[P] `------| delta
insns[4] <--- i/insn insns[P] `-----|
insns[5] insns[3]
insns[4] <--- i/insn
insns[5]
Second interesting case is where we cross pos-boundary and the jump
instruction was after pos. Backward jump with i == pos would be
impossible and pose a bug somewhere in the patchlet, so the first
condition checking i > pos is okay only by itself. However, i +
insn->off + 1 < pos does not always work as intended to trigger the
adjustment. It works when jump targets would be far off where the
delta wouldn't matter. But, for example, where the fixed insn->off
before pointed to pos (target_Y), it now points to pos + delta, so
that additional room needs to be taken into account for the check.
This means that i) both tests here need to be adjusted into pos + delta,
and ii) for the second condition, the test needs to be <= as pos
itself can be a target in the backjump, too.
Fixes: 9bac3d6d548e ("bpf: allow extended BPF programs access skb fields")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-10 23:47:11 +08:00
|
|
|
else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
|
2015-03-14 02:57:42 +08:00
|
|
|
insn->off -= delta;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* convert load instructions that access fields of 'struct __sk_buff'
|
|
|
|
* into sequence of instructions that access fields of 'struct sk_buff'
|
|
|
|
*/
|
|
|
|
static int convert_ctx_accesses(struct verifier_env *env)
|
|
|
|
{
|
|
|
|
struct bpf_insn *insn = env->prog->insnsi;
|
|
|
|
int insn_cnt = env->prog->len;
|
|
|
|
struct bpf_insn insn_buf[16];
|
|
|
|
struct bpf_prog *new_prog;
|
|
|
|
u32 cnt;
|
|
|
|
int i;
|
2015-06-05 01:11:54 +08:00
|
|
|
enum bpf_access_type type;
|
2015-03-14 02:57:42 +08:00
|
|
|
|
|
|
|
if (!env->prog->aux->ops->convert_ctx_access)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for (i = 0; i < insn_cnt; i++, insn++) {
|
2015-06-05 01:11:54 +08:00
|
|
|
if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
|
|
|
|
type = BPF_READ;
|
|
|
|
else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
|
|
|
|
type = BPF_WRITE;
|
|
|
|
else
|
2015-03-14 02:57:42 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (insn->imm != PTR_TO_CTX) {
|
|
|
|
/* clear internal mark */
|
|
|
|
insn->imm = 0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
cnt = env->prog->aux->ops->
|
2015-06-05 01:11:54 +08:00
|
|
|
convert_ctx_access(type, insn->dst_reg, insn->src_reg,
|
2015-10-08 01:55:41 +08:00
|
|
|
insn->off, insn_buf, env->prog);
|
2015-03-14 02:57:42 +08:00
|
|
|
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
|
|
|
|
verbose("bpf verifier is misconfigured\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cnt == 1) {
|
|
|
|
memcpy(insn, insn_buf, sizeof(*insn));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* several new insns need to be inserted. Make room for them */
|
|
|
|
insn_cnt += cnt - 1;
|
|
|
|
new_prog = bpf_prog_realloc(env->prog,
|
|
|
|
bpf_prog_size(insn_cnt),
|
|
|
|
GFP_USER);
|
|
|
|
if (!new_prog)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
new_prog->len = insn_cnt;
|
|
|
|
|
|
|
|
memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1,
|
|
|
|
sizeof(*insn) * (insn_cnt - i - cnt));
|
|
|
|
|
|
|
|
/* copy substitute insns in place of load instruction */
|
|
|
|
memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt);
|
|
|
|
|
|
|
|
/* adjust branches in the whole program */
|
|
|
|
adjust_branches(new_prog, i, cnt - 1);
|
|
|
|
|
|
|
|
/* keep walking new program and skip insns we just inserted */
|
|
|
|
env->prog = new_prog;
|
|
|
|
insn = new_prog->insnsi + i + cnt - 1;
|
|
|
|
i += cnt - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
static void free_states(struct verifier_env *env)
|
|
|
|
{
|
|
|
|
struct verifier_state_list *sl, *sln;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!env->explored_states)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < env->prog->len; i++) {
|
|
|
|
sl = env->explored_states[i];
|
|
|
|
|
|
|
|
if (sl)
|
|
|
|
while (sl != STATE_LIST_MARK) {
|
|
|
|
sln = sl->next;
|
|
|
|
kfree(sl);
|
|
|
|
sl = sln;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
kfree(env->explored_states);
|
|
|
|
}
|
|
|
|
|
2015-03-14 02:57:42 +08:00
|
|
|
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
|
2014-09-26 15:17:02 +08:00
|
|
|
{
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
char __user *log_ubuf = NULL;
|
|
|
|
struct verifier_env *env;
|
2014-09-26 15:17:02 +08:00
|
|
|
int ret = -EINVAL;
|
|
|
|
|
2015-03-14 02:57:42 +08:00
|
|
|
if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
return -E2BIG;
|
|
|
|
|
|
|
|
/* 'struct verifier_env' can be global, but since it's not small,
|
|
|
|
* allocate/free it every time bpf_check() is called
|
|
|
|
*/
|
|
|
|
env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
|
|
|
|
if (!env)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2015-03-14 02:57:42 +08:00
|
|
|
env->prog = *prog;
|
2014-09-26 15:17:04 +08:00
|
|
|
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
/* grab the mutex to protect few globals used by verifier */
|
|
|
|
mutex_lock(&bpf_verifier_lock);
|
|
|
|
|
|
|
|
if (attr->log_level || attr->log_buf || attr->log_size) {
|
|
|
|
/* user requested verbose verifier output
|
|
|
|
* and supplied buffer to store the verification trace
|
|
|
|
*/
|
|
|
|
log_level = attr->log_level;
|
|
|
|
log_ubuf = (char __user *) (unsigned long) attr->log_buf;
|
|
|
|
log_size = attr->log_size;
|
|
|
|
log_len = 0;
|
|
|
|
|
|
|
|
ret = -EINVAL;
|
|
|
|
/* log_* values have to be sane */
|
|
|
|
if (log_size < 128 || log_size > UINT_MAX >> 8 ||
|
|
|
|
log_level == 0 || log_ubuf == NULL)
|
|
|
|
goto free_env;
|
|
|
|
|
|
|
|
ret = -ENOMEM;
|
|
|
|
log_buf = vmalloc(log_size);
|
|
|
|
if (!log_buf)
|
|
|
|
goto free_env;
|
|
|
|
} else {
|
|
|
|
log_level = 0;
|
|
|
|
}
|
|
|
|
|
2014-09-26 15:17:04 +08:00
|
|
|
ret = replace_map_fd_with_map_ptr(env);
|
|
|
|
if (ret < 0)
|
|
|
|
goto skip_full_check;
|
|
|
|
|
2015-03-14 02:57:42 +08:00
|
|
|
env->explored_states = kcalloc(env->prog->len,
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
sizeof(struct verifier_state_list *),
|
|
|
|
GFP_USER);
|
|
|
|
ret = -ENOMEM;
|
|
|
|
if (!env->explored_states)
|
|
|
|
goto skip_full_check;
|
|
|
|
|
2014-09-26 15:17:05 +08:00
|
|
|
ret = check_cfg(env);
|
|
|
|
if (ret < 0)
|
|
|
|
goto skip_full_check;
|
|
|
|
|
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs
teach verifier to prevent pointer leaks.
Verifier will prevent
- any arithmetic on pointers
(except R10+Imm which is used to compute stack addresses)
- comparison of pointers
(except if (map_value_ptr == 0) ... )
- passing pointers to helper functions
- indirectly passing pointers in stack to helper functions
- returning pointer from bpf program
- storing pointers into ctx or maps
Spill/fill of pointers into stack is allowed, but mangling
of pointers stored in the stack or reading them byte by byte is not.
Within bpf programs the pointers do exist, since programs need to
be able to access maps, pass skb pointer to LD_ABS insns, etc
but programs cannot pass such pointer values to the outside
or obfuscate them.
Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs,
so that socket filters (tcpdump), af_packet (quic acceleration)
and future kcm can use it.
tracing and tc cls/act program types still require root permissions,
since tracing actually needs to be able to see all kernel pointers
and tc is for root only.
For example, the following unprivileged socket filter program is allowed:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += skb->len;
return 0;
}
but the following program is not:
int bpf_prog1(struct __sk_buff *skb)
{
u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
u64 *value = bpf_map_lookup_elem(&my_map, &index);
if (value)
*value += (u64) skb;
return 0;
}
since it would leak the kernel address into the map.
Unprivileged socket filter bpf programs have access to the
following helper functions:
- map lookup/update/delete (but they cannot store kernel pointers into them)
- get_random (it's already exposed to unprivileged user space)
- get_smp_processor_id
- tail_call into another socket filter program
- ktime_get_ns
The feature is controlled by sysctl kernel.unprivileged_bpf_disabled.
This toggle defaults to off (0), but can be set true (1). Once true,
bpf programs and maps cannot be accessed from unprivileged process,
and the toggle cannot be set back to false.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-08 13:23:21 +08:00
|
|
|
env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
|
|
|
|
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
ret = do_check(env);
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
|
2014-09-26 15:17:04 +08:00
|
|
|
skip_full_check:
|
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6
This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields
Kernel subsystem configures the verifier with two callbacks:
- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
that provides information to the verifer which fields of 'ctx'
are accessible (remember 'ctx' is the first argument to eBPF program)
- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
returns argument constraints of kernel helper functions that eBPF program
may call, so that verifier can checks that R1-R5 types match the prototype
More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:06 +08:00
|
|
|
while (pop_stack(env, NULL) >= 0);
|
bpf: add search pruning optimization to verifier
consider C program represented in eBPF:
int filter(int arg)
{
int a, b, c, *ptr;
if (arg == 1)
ptr = &a;
else if (arg == 2)
ptr = &b;
else
ptr = &c;
*ptr = 0;
return 0;
}
eBPF verifier has to follow all possible paths through the program
to recognize that '*ptr = 0' instruction would be safe to execute
in all situations.
It's doing it by picking a path towards the end and observes changes
to registers and stack at every insn until it reaches bpf_exit.
Then it comes back to one of the previous branches and goes towards
the end again with potentially different values in registers.
When program has a lot of branches, the number of possible combinations
of branches is huge, so verifer has a hard limit of walking no more
than 32k instructions. This limit can be reached and complex (but valid)
programs could be rejected. Therefore it's important to recognize equivalent
verifier states to prune this depth first search.
Basic idea can be illustrated by the program (where .. are some eBPF insns):
1: ..
2: if (rX == rY) goto 4
3: ..
4: ..
5: ..
6: bpf_exit
In the first pass towards bpf_exit the verifier will walk insns: 1, 2, 3, 4, 5, 6
Since insn#2 is a branch the verifier will remember its state in verifier stack
to come back to it later.
Since insn#4 is marked as 'branch target', the verifier will remember its state
in explored_states[4] linked list.
Once it reaches insn#6 successfully it will pop the state recorded at insn#2 and
will continue.
Without search pruning optimization verifier would have to walk 4, 5, 6 again,
effectively simulating execution of insns 1, 2, 4, 5, 6
With search pruning it will check whether state at #4 after jumping from #2
is equivalent to one recorded in explored_states[4] during first pass.
If there is an equivalent state, verifier can prune the search at #4 and declare
this path to be safe as well.
In other words two states at #4 are equivalent if execution of 1, 2, 3, 4 insns
and 1, 2, 4 insns produces equivalent registers and stack.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 09:50:01 +08:00
|
|
|
free_states(env);
|
2014-09-26 15:17:04 +08:00
|
|
|
|
2015-03-14 02:57:42 +08:00
|
|
|
if (ret == 0)
|
|
|
|
/* program is valid, convert *(u32*)(ctx + off) accesses */
|
|
|
|
ret = convert_ctx_accesses(env);
|
|
|
|
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
if (log_level && log_len >= log_size - 1) {
|
|
|
|
BUG_ON(log_len >= log_size);
|
|
|
|
/* verifier log exceeded user supplied buffer */
|
|
|
|
ret = -ENOSPC;
|
|
|
|
/* fall through to return what was recorded */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* copy verifier log back to user space including trailing zero */
|
|
|
|
if (log_level && copy_to_user(log_ubuf, log_buf, log_len + 1) != 0) {
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto free_log_buf;
|
|
|
|
}
|
|
|
|
|
2014-09-26 15:17:04 +08:00
|
|
|
if (ret == 0 && env->used_map_cnt) {
|
|
|
|
/* if program passed verifier, update used_maps in bpf_prog_info */
|
2015-03-14 02:57:42 +08:00
|
|
|
env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
|
|
|
|
sizeof(env->used_maps[0]),
|
|
|
|
GFP_KERNEL);
|
2014-09-26 15:17:04 +08:00
|
|
|
|
2015-03-14 02:57:42 +08:00
|
|
|
if (!env->prog->aux->used_maps) {
|
2014-09-26 15:17:04 +08:00
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free_log_buf;
|
|
|
|
}
|
|
|
|
|
2015-03-14 02:57:42 +08:00
|
|
|
memcpy(env->prog->aux->used_maps, env->used_maps,
|
2014-09-26 15:17:04 +08:00
|
|
|
sizeof(env->used_maps[0]) * env->used_map_cnt);
|
2015-03-14 02:57:42 +08:00
|
|
|
env->prog->aux->used_map_cnt = env->used_map_cnt;
|
2014-09-26 15:17:04 +08:00
|
|
|
|
|
|
|
/* program is valid. Convert pseudo bpf_ld_imm64 into generic
|
|
|
|
* bpf_ld_imm64 instructions
|
|
|
|
*/
|
|
|
|
convert_pseudo_ld_imm64(env);
|
|
|
|
}
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
|
|
|
|
free_log_buf:
|
|
|
|
if (log_level)
|
|
|
|
vfree(log_buf);
|
|
|
|
free_env:
|
2015-03-14 02:57:42 +08:00
|
|
|
if (!env->prog->aux->used_maps)
|
2014-09-26 15:17:04 +08:00
|
|
|
/* if we didn't copy map pointers into bpf_prog_info, release
|
|
|
|
* them now. Otherwise free_bpf_prog_info() will release them.
|
|
|
|
*/
|
|
|
|
release_maps(env);
|
2015-03-14 02:57:42 +08:00
|
|
|
*prog = env->prog;
|
bpf: verifier (add ability to receive verification log)
add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
struct {
...
__u32 log_level; /* verbosity level of eBPF verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied 'char *buffer' */
};
};
when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.
'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
BPF_EXIT_INSN(),
will be rejected with the following multi-line message in log_buf:
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (b7) r1 = 0
4: (85) call 1
5: (15) if r0 == 0x0 goto pc+1
R0=map_ptr R10=fp
6: (7a) *(u64 *)(r0 +4) = 0
misaligned access off 4 size 8
The format of the output can change at any time as verifier evolves.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:03 +08:00
|
|
|
kfree(env);
|
|
|
|
mutex_unlock(&bpf_verifier_lock);
|
2014-09-26 15:17:02 +08:00
|
|
|
return ret;
|
|
|
|
}
|