linux_old1/arch/s390/net/bpf_jit_comp.c

1368 lines
36 KiB
C
Raw Normal View History

/*
* BPF Jit compiler for s390.
*
* Minimum build requirements:
*
* - HAVE_MARCH_Z196_FEATURES: laal, laalg
* - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
* - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
* - PACK_STACK
* - 64BIT
*
* Copyright IBM Corp. 2012,2015
*
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
#define KMSG_COMPONENT "bpf_jit"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/init.h>
s390/bpf: implement bpf_tail_call() helper bpf_tail_call() arguments: - ctx......: Context pointer - jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table - index....: Index in the jump table In this implementation s390x JIT does stack unwinding and jumps into the callee program prologue. Caller and callee use the same stack. With this patch a tail call generates the following code on s390x: if (index >= array->map.max_entries) goto out 000003ff8001c7e4: e31030100016 llgf %r1,16(%r3) 000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828 if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) goto out; 000003ff8001c7f0: a7080001 lhi %r0,1 000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15) 000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828 prog = array->prog[index]; if (prog == NULL) goto out; 000003ff8001c800: eb140003000d sllg %r1,%r4,3 000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1) 000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828 Restore registers before calling function 000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15) 000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15) goto *(prog->bpf_func + tail_call_start); 000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0) 000003ff8001c824: 47f01006 bc 15,6(%r1) Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-06-09 12:51:06 +08:00
#include <linux/bpf.h>
#include <asm/cacheflush.h>
#include <asm/dis.h>
#include "bpf_jit.h"
int bpf_jit_enable __read_mostly;
struct bpf_jit {
u32 seen; /* Flags to remember seen eBPF instructions */
u32 seen_reg[16]; /* Array to remember which registers are used */
u32 *addrs; /* Array with relative instruction addresses */
u8 *prg_buf; /* Start of program */
int size; /* Size of program and literal pool */
int size_prg; /* Size of program */
int prg; /* Current position in program */
int lit_start; /* Start of literal pool */
int lit; /* Current position in literal pool */
int base_ip; /* Base address for literal pool */
int ret0_ip; /* Address of return 0 */
int exit_ip; /* Address of exit */
s390/bpf: implement bpf_tail_call() helper bpf_tail_call() arguments: - ctx......: Context pointer - jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table - index....: Index in the jump table In this implementation s390x JIT does stack unwinding and jumps into the callee program prologue. Caller and callee use the same stack. With this patch a tail call generates the following code on s390x: if (index >= array->map.max_entries) goto out 000003ff8001c7e4: e31030100016 llgf %r1,16(%r3) 000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828 if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) goto out; 000003ff8001c7f0: a7080001 lhi %r0,1 000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15) 000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828 prog = array->prog[index]; if (prog == NULL) goto out; 000003ff8001c800: eb140003000d sllg %r1,%r4,3 000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1) 000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828 Restore registers before calling function 000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15) 000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15) goto *(prog->bpf_func + tail_call_start); 000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0) 000003ff8001c824: 47f01006 bc 15,6(%r1) Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-06-09 12:51:06 +08:00
int tail_call_start; /* Tail call start offset */
int labels[1]; /* Labels for local jumps */
};
#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */
#define SEEN_SKB 1 /* skb access */
#define SEEN_MEM 2 /* use mem[] for temporary storage */
#define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */
#define SEEN_LITERAL 8 /* code uses literals */
#define SEEN_FUNC 16 /* calls C functions */
s390/bpf: implement bpf_tail_call() helper bpf_tail_call() arguments: - ctx......: Context pointer - jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table - index....: Index in the jump table In this implementation s390x JIT does stack unwinding and jumps into the callee program prologue. Caller and callee use the same stack. With this patch a tail call generates the following code on s390x: if (index >= array->map.max_entries) goto out 000003ff8001c7e4: e31030100016 llgf %r1,16(%r3) 000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828 if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) goto out; 000003ff8001c7f0: a7080001 lhi %r0,1 000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15) 000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828 prog = array->prog[index]; if (prog == NULL) goto out; 000003ff8001c800: eb140003000d sllg %r1,%r4,3 000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1) 000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828 Restore registers before calling function 000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15) 000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15) goto *(prog->bpf_func + tail_call_start); 000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0) 000003ff8001c824: 47f01006 bc 15,6(%r1) Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-06-09 12:51:06 +08:00
#define SEEN_TAIL_CALL 32 /* code uses tail calls */
#define SEEN_SKB_CHANGE 64 /* code changes skb data */
#define SEEN_REG_AX 128 /* code uses constant blinding */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
/*
* s390 registers
*/
#define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */
#define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */
#define REG_SKB_DATA (MAX_BPF_JIT_REG + 2) /* SKB data register */
#define REG_L (MAX_BPF_JIT_REG + 3) /* Literal pool register */
#define REG_15 (MAX_BPF_JIT_REG + 4) /* Register 15 */
#define REG_0 REG_W0 /* Register 0 */
s390/bpf: implement bpf_tail_call() helper bpf_tail_call() arguments: - ctx......: Context pointer - jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table - index....: Index in the jump table In this implementation s390x JIT does stack unwinding and jumps into the callee program prologue. Caller and callee use the same stack. With this patch a tail call generates the following code on s390x: if (index >= array->map.max_entries) goto out 000003ff8001c7e4: e31030100016 llgf %r1,16(%r3) 000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828 if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) goto out; 000003ff8001c7f0: a7080001 lhi %r0,1 000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15) 000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828 prog = array->prog[index]; if (prog == NULL) goto out; 000003ff8001c800: eb140003000d sllg %r1,%r4,3 000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1) 000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828 Restore registers before calling function 000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15) 000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15) goto *(prog->bpf_func + tail_call_start); 000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0) 000003ff8001c824: 47f01006 bc 15,6(%r1) Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-06-09 12:51:06 +08:00
#define REG_1 REG_W1 /* Register 1 */
#define REG_2 BPF_REG_1 /* Register 2 */
#define REG_14 BPF_REG_0 /* Register 14 */
/*
* Mapping of BPF registers to s390 registers
*/
static const int reg2hex[] = {
/* Return code */
[BPF_REG_0] = 14,
/* Function parameters */
[BPF_REG_1] = 2,
[BPF_REG_2] = 3,
[BPF_REG_3] = 4,
[BPF_REG_4] = 5,
[BPF_REG_5] = 6,
/* Call saved registers */
[BPF_REG_6] = 7,
[BPF_REG_7] = 8,
[BPF_REG_8] = 9,
[BPF_REG_9] = 10,
/* BPF stack pointer */
[BPF_REG_FP] = 13,
/* Register for blinding (shared with REG_SKB_DATA) */
[BPF_REG_AX] = 12,
/* SKB data pointer */
[REG_SKB_DATA] = 12,
/* Work registers for s390x backend */
[REG_W0] = 0,
[REG_W1] = 1,
[REG_L] = 11,
[REG_15] = 15,
};
static inline u32 reg(u32 dst_reg, u32 src_reg)
{
return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
}
static inline u32 reg_high(u32 reg)
{
return reg2hex[reg] << 4;
}
static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
{
u32 r1 = reg2hex[b1];
if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15)
jit->seen_reg[r1] = 1;
}
#define REG_SET_SEEN(b1) \
({ \
reg_set_seen(jit, b1); \
})
#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
/*
* EMIT macros for code generation
*/
#define _EMIT2(op) \
({ \
if (jit->prg_buf) \
*(u16 *) (jit->prg_buf + jit->prg) = op; \
jit->prg += 2; \
})
#define EMIT2(op, b1, b2) \
({ \
_EMIT2(op | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define _EMIT4(op) \
({ \
if (jit->prg_buf) \
*(u32 *) (jit->prg_buf + jit->prg) = op; \
jit->prg += 4; \
})
#define EMIT4(op, b1, b2) \
({ \
_EMIT4(op | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define EMIT4_RRF(op, b1, b2, b3) \
({ \
_EMIT4(op | reg_high(b3) << 8 | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
REG_SET_SEEN(b3); \
})
#define _EMIT4_DISP(op, disp) \
({ \
unsigned int __disp = (disp) & 0xfff; \
_EMIT4(op | __disp); \
})
#define EMIT4_DISP(op, b1, b2, disp) \
({ \
_EMIT4_DISP(op | reg_high(b1) << 16 | \
reg_high(b2) << 8, disp); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define EMIT4_IMM(op, b1, imm) \
({ \
unsigned int __imm = (imm) & 0xffff; \
_EMIT4(op | reg_high(b1) << 16 | __imm); \
REG_SET_SEEN(b1); \
})
#define EMIT4_PCREL(op, pcrel) \
({ \
long __pcrel = ((pcrel) >> 1) & 0xffff; \
_EMIT4(op | __pcrel); \
})
#define _EMIT6(op1, op2) \
({ \
if (jit->prg_buf) { \
*(u32 *) (jit->prg_buf + jit->prg) = op1; \
*(u16 *) (jit->prg_buf + jit->prg + 4) = op2; \
} \
jit->prg += 6; \
})
#define _EMIT6_DISP(op1, op2, disp) \
({ \
unsigned int __disp = (disp) & 0xfff; \
_EMIT6(op1 | __disp, op2); \
})
#define _EMIT6_DISP_LH(op1, op2, disp) \
({ \
u32 _disp = (u32) disp; \
unsigned int __disp_h = _disp & 0xff000; \
unsigned int __disp_l = _disp & 0x00fff; \
_EMIT6(op1 | __disp_l, op2 | __disp_h >> 4); \
})
#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp) \
({ \
_EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 | \
reg_high(b3) << 8, op2, disp); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
REG_SET_SEEN(b3); \
})
s390/bpf: implement bpf_tail_call() helper bpf_tail_call() arguments: - ctx......: Context pointer - jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table - index....: Index in the jump table In this implementation s390x JIT does stack unwinding and jumps into the callee program prologue. Caller and callee use the same stack. With this patch a tail call generates the following code on s390x: if (index >= array->map.max_entries) goto out 000003ff8001c7e4: e31030100016 llgf %r1,16(%r3) 000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828 if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) goto out; 000003ff8001c7f0: a7080001 lhi %r0,1 000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15) 000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828 prog = array->prog[index]; if (prog == NULL) goto out; 000003ff8001c800: eb140003000d sllg %r1,%r4,3 000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1) 000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828 Restore registers before calling function 000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15) 000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15) goto *(prog->bpf_func + tail_call_start); 000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0) 000003ff8001c824: 47f01006 bc 15,6(%r1) Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-06-09 12:51:06 +08:00
#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \
({ \
int rel = (jit->labels[label] - jit->prg) >> 1; \
_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), \
op2 | mask << 12); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \
({ \
int rel = (jit->labels[label] - jit->prg) >> 1; \
_EMIT6(op1 | (reg_high(b1) | mask) << 16 | \
(rel & 0xffff), op2 | (imm & 0xff) << 8); \
REG_SET_SEEN(b1); \
BUILD_BUG_ON(((unsigned long) imm) > 0xff); \
})
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
/* Branch instruction needs 6 bytes */ \
int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define _EMIT6_IMM(op, imm) \
({ \
unsigned int __imm = (imm); \
_EMIT6(op | (__imm >> 16), __imm & 0xffff); \
})
#define EMIT6_IMM(op, b1, imm) \
({ \
_EMIT6_IMM(op | reg_high(b1) << 16, imm); \
REG_SET_SEEN(b1); \
})
#define EMIT_CONST_U32(val) \
({ \
unsigned int ret; \
ret = jit->lit - jit->base_ip; \
jit->seen |= SEEN_LITERAL; \
if (jit->prg_buf) \
*(u32 *) (jit->prg_buf + jit->lit) = (u32) val; \
jit->lit += 4; \
ret; \
})
#define EMIT_CONST_U64(val) \
({ \
unsigned int ret; \
ret = jit->lit - jit->base_ip; \
jit->seen |= SEEN_LITERAL; \
if (jit->prg_buf) \
*(u64 *) (jit->prg_buf + jit->lit) = (u64) val; \
jit->lit += 8; \
ret; \
})
#define EMIT_ZERO(b1) \
({ \
/* llgfr %dst,%dst (zero extend to 64 bit) */ \
EMIT4(0xb9160000, b1, b1); \
REG_SET_SEEN(b1); \
})
/*
* Fill whole space with illegal instructions
*/
static void jit_fill_hole(void *area, unsigned int size)
{
memset(area, 0, size);
}
/*
* Save registers from "rs" (register start) to "re" (register end) on stack
*/
static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
{
s390/bpf: implement bpf_tail_call() helper bpf_tail_call() arguments: - ctx......: Context pointer - jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table - index....: Index in the jump table In this implementation s390x JIT does stack unwinding and jumps into the callee program prologue. Caller and callee use the same stack. With this patch a tail call generates the following code on s390x: if (index >= array->map.max_entries) goto out 000003ff8001c7e4: e31030100016 llgf %r1,16(%r3) 000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828 if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) goto out; 000003ff8001c7f0: a7080001 lhi %r0,1 000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15) 000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828 prog = array->prog[index]; if (prog == NULL) goto out; 000003ff8001c800: eb140003000d sllg %r1,%r4,3 000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1) 000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828 Restore registers before calling function 000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15) 000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15) goto *(prog->bpf_func + tail_call_start); 000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0) 000003ff8001c824: 47f01006 bc 15,6(%r1) Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-06-09 12:51:06 +08:00
u32 off = STK_OFF_R6 + (rs - 6) * 8;
if (rs == re)
/* stg %rs,off(%r15) */
_EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
else
/* stmg %rs,%re,off(%r15) */
_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
}
/*
* Restore registers from "rs" (register start) to "re" (register end) on stack
*/
static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
{
s390/bpf: implement bpf_tail_call() helper bpf_tail_call() arguments: - ctx......: Context pointer - jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table - index....: Index in the jump table In this implementation s390x JIT does stack unwinding and jumps into the callee program prologue. Caller and callee use the same stack. With this patch a tail call generates the following code on s390x: if (index >= array->map.max_entries) goto out 000003ff8001c7e4: e31030100016 llgf %r1,16(%r3) 000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828 if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) goto out; 000003ff8001c7f0: a7080001 lhi %r0,1 000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15) 000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828 prog = array->prog[index]; if (prog == NULL) goto out; 000003ff8001c800: eb140003000d sllg %r1,%r4,3 000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1) 000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828 Restore registers before calling function 000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15) 000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15) goto *(prog->bpf_func + tail_call_start); 000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0) 000003ff8001c824: 47f01006 bc 15,6(%r1) Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-06-09 12:51:06 +08:00
u32 off = STK_OFF_R6 + (rs - 6) * 8;
if (jit->seen & SEEN_STACK)
off += STK_OFF;
if (rs == re)
/* lg %rs,off(%r15) */
_EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
else
/* lmg %rs,%re,off(%r15) */
_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
}
/*
* Return first seen register (from start)
*/
static int get_start(struct bpf_jit *jit, int start)
{
int i;
for (i = start; i <= 15; i++) {
if (jit->seen_reg[i])
return i;
}
return 0;
}
/*
* Return last seen register (from start) (gap >= 2)
*/
static int get_end(struct bpf_jit *jit, int start)
{
int i;
for (i = start; i < 15; i++) {
if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
return i - 1;
}
return jit->seen_reg[15] ? 15 : 14;
}
#define REGS_SAVE 1
#define REGS_RESTORE 0
/*
* Save and restore clobbered registers (6-15) on stack.
* We save/restore registers in chunks with gap >= 2 registers.
*/
static void save_restore_regs(struct bpf_jit *jit, int op)
{
int re = 6, rs;
do {
rs = get_start(jit, re);
if (!rs)
break;
re = get_end(jit, rs + 1);
if (op == REGS_SAVE)
save_regs(jit, rs, re);
else
restore_regs(jit, rs, re);
re++;
} while (re <= 15);
}
/*
* For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
* we store the SKB header length on the stack and the SKB data
* pointer in REG_SKB_DATA if BPF_REG_AX is not used.
*/
static void emit_load_skb_data_hlen(struct bpf_jit *jit)
{
/* Header length: llgf %w1,<len>(%b1) */
EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
offsetof(struct sk_buff, len));
/* s %w1,<data_len>(%b1) */
EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
offsetof(struct sk_buff, data_len));
/* stg %w1,ST_OFF_HLEN(%r0,%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN);
if (!(jit->seen & SEEN_REG_AX))
/* lg %skb_data,data_off(%b1) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
BPF_REG_1, offsetof(struct sk_buff, data));
}
/*
* Emit function prologue
*
* Save registers and create stack frame if necessary.
* See stack frame layout desription in "bpf_jit.h"!
*/
bpf: move clearing of A/X into classic to eBPF migration prologue Back in the days where eBPF (or back then "internal BPF" ;->) was not exposed to user space, and only the classic BPF programs internally translated into eBPF programs, we missed the fact that for classic BPF A and X needed to be cleared. It was fixed back then via 83d5b7ef99c9 ("net: filter: initialize A and X registers"), and thus classic BPF specifics were added to the eBPF interpreter core to work around it. This added some confusion for JIT developers later on that take the eBPF interpreter code as an example for deriving their JIT. F.e. in f75298f5c3fe ("s390/bpf: clear correct BPF accumulator register"), at least X could leak stack memory. Furthermore, since this is only needed for classic BPF translations and not for eBPF (verifier takes care that read access to regs cannot be done uninitialized), more complexity is added to JITs as they need to determine whether they deal with migrations or native eBPF where they can just omit clearing A/X in their prologue and thus reduce image size a bit, see f.e. cde66c2d88da ("s390/bpf: Only clear A and X for converted BPF programs"). In other cases (x86, arm64), A and X is being cleared in the prologue also for eBPF case, which is unnecessary. Lets move this into the BPF migration in bpf_convert_filter() where it actually belongs as long as the number of eBPF JITs are still few. It can thus be done generically; allowing us to remove the quirk from __bpf_prog_run() and to slightly reduce JIT image size in case of eBPF, while reducing code duplication on this matter in current(/future) eBPF JITs. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Cc: Zi Shen Lim <zlim.lnx@gmail.com> Cc: Yang Shi <yang.shi@linaro.org> Acked-by: Yang Shi <yang.shi@linaro.org> Acked-by: Zi Shen Lim <zlim.lnx@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-12-18 06:51:54 +08:00
static void bpf_jit_prologue(struct bpf_jit *jit)
{
s390/bpf: implement bpf_tail_call() helper bpf_tail_call() arguments: - ctx......: Context pointer - jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table - index....: Index in the jump table In this implementation s390x JIT does stack unwinding and jumps into the callee program prologue. Caller and callee use the same stack. With this patch a tail call generates the following code on s390x: if (index >= array->map.max_entries) goto out 000003ff8001c7e4: e31030100016 llgf %r1,16(%r3) 000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828 if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) goto out; 000003ff8001c7f0: a7080001 lhi %r0,1 000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15) 000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828 prog = array->prog[index]; if (prog == NULL) goto out; 000003ff8001c800: eb140003000d sllg %r1,%r4,3 000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1) 000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828 Restore registers before calling function 000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15) 000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15) goto *(prog->bpf_func + tail_call_start); 000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0) 000003ff8001c824: 47f01006 bc 15,6(%r1) Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-06-09 12:51:06 +08:00
if (jit->seen & SEEN_TAIL_CALL) {
/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
} else {
/* j tail_call_start: NOP if no tail calls are used */
EMIT4_PCREL(0xa7f40000, 6);
_EMIT2(0);
}
/* Tail calls have to skip above initialization */
jit->tail_call_start = jit->prg;
/* Save registers */
save_restore_regs(jit, REGS_SAVE);
/* Setup literal pool */
if (jit->seen & SEEN_LITERAL) {
/* basr %r13,0 */
EMIT2(0x0d00, REG_L, REG_0);
jit->base_ip = jit->prg;
}
/* Setup stack and backchain */
if (jit->seen & SEEN_STACK) {
if (jit->seen & SEEN_FUNC)
/* lgr %w1,%r15 (backchain) */
EMIT4(0xb9040000, REG_W1, REG_15);
/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
/* aghi %r15,-STK_OFF */
EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF);
if (jit->seen & SEEN_FUNC)
/* stg %w1,152(%r15) (backchain) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
}
if (jit->seen & SEEN_SKB)
emit_load_skb_data_hlen(jit);
if (jit->seen & SEEN_SKB_CHANGE)
/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
STK_OFF_SKBP);
}
/*
* Function epilogue
*/
static void bpf_jit_epilogue(struct bpf_jit *jit)
{
/* Return 0 */
if (jit->seen & SEEN_RET0) {
jit->ret0_ip = jit->prg;
/* lghi %b0,0 */
EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
}
jit->exit_ip = jit->prg;
/* Load exit code: lgr %r2,%b0 */
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
save_restore_regs(jit, REGS_RESTORE);
/* br %r14 */
_EMIT2(0x07fe);
}
/*
* Compile one eBPF instruction into s390x code
*
* NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
* stack space for the large switch statement.
*/
static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
{
struct bpf_insn *insn = &fp->insnsi[i];
int jmp_off, last, insn_count = 1;
unsigned int func_addr, mask;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
u32 *addrs = jit->addrs;
s32 imm = insn->imm;
s16 off = insn->off;
if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
jit->seen |= SEEN_REG_AX;
switch (insn->code) {
/*
* BPF_MOV
*/
case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
/* llgfr %dst,%src */
EMIT4(0xb9160000, dst_reg, src_reg);
break;
case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
/* lgr %dst,%src */
EMIT4(0xb9040000, dst_reg, src_reg);
break;
case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
/* llilf %dst,imm */
EMIT6_IMM(0xc00f0000, dst_reg, imm);
break;
case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
/* lgfi %dst,imm */
EMIT6_IMM(0xc0010000, dst_reg, imm);
break;
/*
* BPF_LD 64
*/
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
{
/* 16 byte instruction that uses two 'struct bpf_insn' */
u64 imm64;
imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
/* lg %dst,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
EMIT_CONST_U64(imm64));
insn_count = 2;
break;
}
/*
* BPF_ADD
*/
case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
/* ar %dst,%src */
EMIT2(0x1a00, dst_reg, src_reg);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
/* agr %dst,%src */
EMIT4(0xb9080000, dst_reg, src_reg);
break;
case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
if (!imm)
break;
/* alfi %dst,imm */
EMIT6_IMM(0xc20b0000, dst_reg, imm);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
if (!imm)
break;
/* agfi %dst,imm */
EMIT6_IMM(0xc2080000, dst_reg, imm);
break;
/*
* BPF_SUB
*/
case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
/* sr %dst,%src */
EMIT2(0x1b00, dst_reg, src_reg);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
/* sgr %dst,%src */
EMIT4(0xb9090000, dst_reg, src_reg);
break;
case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
if (!imm)
break;
/* alfi %dst,-imm */
EMIT6_IMM(0xc20b0000, dst_reg, -imm);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
if (!imm)
break;
/* agfi %dst,-imm */
EMIT6_IMM(0xc2080000, dst_reg, -imm);
break;
/*
* BPF_MUL
*/
case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
/* msr %dst,%src */
EMIT4(0xb2520000, dst_reg, src_reg);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
/* msgr %dst,%src */
EMIT4(0xb90c0000, dst_reg, src_reg);
break;
case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
if (imm == 1)
break;
/* msfi %r5,imm */
EMIT6_IMM(0xc2010000, dst_reg, imm);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
if (imm == 1)
break;
/* msgfi %dst,imm */
EMIT6_IMM(0xc2000000, dst_reg, imm);
break;
/*
* BPF_DIV / BPF_MOD
*/
case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
jit->seen |= SEEN_RET0;
/* ltr %src,%src (if src == 0 goto fail) */
EMIT2(0x1200, src_reg, src_reg);
/* jz <ret0> */
EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
/* lhi %w0,0 */
EMIT4_IMM(0xa7080000, REG_W0, 0);
/* lr %w1,%dst */
EMIT2(0x1800, REG_W1, dst_reg);
/* dlr %w0,%src */
EMIT4(0xb9970000, REG_W0, src_reg);
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
break;
}
case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
jit->seen |= SEEN_RET0;
/* ltgr %src,%src (if src == 0 goto fail) */
EMIT4(0xb9020000, src_reg, src_reg);
/* jz <ret0> */
EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
/* lghi %w0,0 */
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
EMIT4(0xb9040000, REG_W1, dst_reg);
/* dlgr %w0,%dst */
EMIT4(0xb9870000, REG_W0, src_reg);
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
}
case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
if (imm == 1) {
if (BPF_OP(insn->code) == BPF_MOD)
/* lhgi %dst,0 */
EMIT4_IMM(0xa7090000, dst_reg, 0);
break;
}
/* lhi %w0,0 */
EMIT4_IMM(0xa7080000, REG_W0, 0);
/* lr %w1,%dst */
EMIT2(0x1800, REG_W1, dst_reg);
/* dl %w0,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
EMIT_CONST_U32(imm));
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
break;
}
case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
if (imm == 1) {
if (BPF_OP(insn->code) == BPF_MOD)
/* lhgi %dst,0 */
EMIT4_IMM(0xa7090000, dst_reg, 0);
break;
}
/* lghi %w0,0 */
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
EMIT4(0xb9040000, REG_W1, dst_reg);
/* dlg %w0,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
EMIT_CONST_U64(imm));
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
}
/*
* BPF_AND
*/
case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
/* nr %dst,%src */
EMIT2(0x1400, dst_reg, src_reg);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
/* ngr %dst,%src */
EMIT4(0xb9800000, dst_reg, src_reg);
break;
case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
/* nilf %dst,imm */
EMIT6_IMM(0xc00b0000, dst_reg, imm);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
/* ng %dst,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
EMIT_CONST_U64(imm));
break;
/*
* BPF_OR
*/
case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
/* or %dst,%src */
EMIT2(0x1600, dst_reg, src_reg);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
/* ogr %dst,%src */
EMIT4(0xb9810000, dst_reg, src_reg);
break;
case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
/* oilf %dst,imm */
EMIT6_IMM(0xc00d0000, dst_reg, imm);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
/* og %dst,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
EMIT_CONST_U64(imm));
break;
/*
* BPF_XOR
*/
case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
/* xr %dst,%src */
EMIT2(0x1700, dst_reg, src_reg);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
/* xgr %dst,%src */
EMIT4(0xb9820000, dst_reg, src_reg);
break;
case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
if (!imm)
break;
/* xilf %dst,imm */
EMIT6_IMM(0xc0070000, dst_reg, imm);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
/* xg %dst,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
EMIT_CONST_U64(imm));
break;
/*
* BPF_LSH
*/
case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
/* sll %dst,0(%src) */
EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
/* sllg %dst,%dst,0(%src) */
EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
break;
case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
if (imm == 0)
break;
/* sll %dst,imm(%r0) */
EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
if (imm == 0)
break;
/* sllg %dst,%dst,imm(%r0) */
EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
break;
/*
* BPF_RSH
*/
case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
/* srl %dst,0(%src) */
EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
/* srlg %dst,%dst,0(%src) */
EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
break;
case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
if (imm == 0)
break;
/* srl %dst,imm(%r0) */
EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
if (imm == 0)
break;
/* srlg %dst,%dst,imm(%r0) */
EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
break;
/*
* BPF_ARSH
*/
case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
/* srag %dst,%dst,0(%src) */
EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
break;
case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
if (imm == 0)
break;
/* srag %dst,%dst,imm(%r0) */
EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
break;
/*
* BPF_NEG
*/
case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
/* lcr %dst,%dst */
EMIT2(0x1300, dst_reg, dst_reg);
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_NEG: /* dst = -dst */
/* lcgr %dst,%dst */
EMIT4(0xb9130000, dst_reg, dst_reg);
break;
/*
* BPF_FROM_BE/LE
*/
case BPF_ALU | BPF_END | BPF_FROM_BE:
/* s390 is big endian, therefore only clear high order bytes */
switch (imm) {
case 16: /* dst = (u16) cpu_to_be16(dst) */
/* llghr %dst,%dst */
EMIT4(0xb9850000, dst_reg, dst_reg);
break;
case 32: /* dst = (u32) cpu_to_be32(dst) */
/* llgfr %dst,%dst */
EMIT4(0xb9160000, dst_reg, dst_reg);
break;
case 64: /* dst = (u64) cpu_to_be64(dst) */
break;
}
break;
case BPF_ALU | BPF_END | BPF_FROM_LE:
switch (imm) {
case 16: /* dst = (u16) cpu_to_le16(dst) */
/* lrvr %dst,%dst */
EMIT4(0xb91f0000, dst_reg, dst_reg);
/* srl %dst,16(%r0) */
EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
/* llghr %dst,%dst */
EMIT4(0xb9850000, dst_reg, dst_reg);
break;
case 32: /* dst = (u32) cpu_to_le32(dst) */
/* lrvr %dst,%dst */
EMIT4(0xb91f0000, dst_reg, dst_reg);
/* llgfr %dst,%dst */
EMIT4(0xb9160000, dst_reg, dst_reg);
break;
case 64: /* dst = (u64) cpu_to_le64(dst) */
/* lrvgr %dst,%dst */
EMIT4(0xb90f0000, dst_reg, dst_reg);
break;
}
break;
/*
* BPF_ST(X)
*/
case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
/* stcy %src,off(%dst) */
EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
jit->seen |= SEEN_MEM;
break;
case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
/* sthy %src,off(%dst) */
EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
jit->seen |= SEEN_MEM;
break;
case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
/* sty %src,off(%dst) */
EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
jit->seen |= SEEN_MEM;
break;
case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
/* stg %src,off(%dst) */
EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
/* lhi %w0,imm */
EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
/* stcy %w0,off(dst) */
EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
/* lhi %w0,imm */
EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
/* sthy %w0,off(dst) */
EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
/* llilf %w0,imm */
EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
/* sty %w0,off(%dst) */
EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
/* lgfi %w0,imm */
EMIT6_IMM(0xc0010000, REG_W0, imm);
/* stg %w0,off(%dst) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
jit->seen |= SEEN_MEM;
break;
/*
* BPF_STX XADD (atomic_add)
*/
case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */
/* laal %w0,%src,off(%dst) */
EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W0, src_reg,
dst_reg, off);
jit->seen |= SEEN_MEM;
break;
case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */
/* laalg %w0,%src,off(%dst) */
EMIT6_DISP_LH(0xeb000000, 0x00ea, REG_W0, src_reg,
dst_reg, off);
jit->seen |= SEEN_MEM;
break;
/*
* BPF_LDX
*/
case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
/* llgc %dst,0(off,%src) */
EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
jit->seen |= SEEN_MEM;
break;
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
/* llgh %dst,0(off,%src) */
EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
jit->seen |= SEEN_MEM;
break;
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
/* llgf %dst,off(%src) */
jit->seen |= SEEN_MEM;
EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
break;
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
/* lg %dst,0(off,%src) */
jit->seen |= SEEN_MEM;
EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
break;
/*
* BPF_JMP / CALL
*/
case BPF_JMP | BPF_CALL:
{
/*
* b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5)
*/
const u64 func = (u64)__bpf_call_base + imm;
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
/* lg %w1,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
EMIT_CONST_U64(func));
/* basr %r14,%w1 */
EMIT2(0x0d00, REG_14, REG_W1);
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
if (bpf_helper_changes_skb_data((void *)func)) {
jit->seen |= SEEN_SKB_CHANGE;
/* lg %b1,ST_OFF_SKBP(%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
REG_15, STK_OFF_SKBP);
emit_load_skb_data_hlen(jit);
}
break;
}
s390/bpf: implement bpf_tail_call() helper bpf_tail_call() arguments: - ctx......: Context pointer - jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table - index....: Index in the jump table In this implementation s390x JIT does stack unwinding and jumps into the callee program prologue. Caller and callee use the same stack. With this patch a tail call generates the following code on s390x: if (index >= array->map.max_entries) goto out 000003ff8001c7e4: e31030100016 llgf %r1,16(%r3) 000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828 if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) goto out; 000003ff8001c7f0: a7080001 lhi %r0,1 000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15) 000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828 prog = array->prog[index]; if (prog == NULL) goto out; 000003ff8001c800: eb140003000d sllg %r1,%r4,3 000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1) 000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828 Restore registers before calling function 000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15) 000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15) goto *(prog->bpf_func + tail_call_start); 000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0) 000003ff8001c824: 47f01006 bc 15,6(%r1) Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-06-09 12:51:06 +08:00
case BPF_JMP | BPF_CALL | BPF_X:
/*
* Implicit input:
* B1: pointer to ctx
* B2: pointer to bpf_array
* B3: index in bpf_array
*/
jit->seen |= SEEN_TAIL_CALL;
/*
* if (index >= array->map.max_entries)
* goto out;
*/
/* llgf %w1,map.max_entries(%b2) */
EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
offsetof(struct bpf_array, map.max_entries));
/* clgrj %b3,%w1,0xa,label0: if %b3 >= %w1 goto out */
EMIT6_PCREL_LABEL(0xec000000, 0x0065, BPF_REG_3,
REG_W1, 0, 0xa);
/*
* if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
* goto out;
*/
if (jit->seen & SEEN_STACK)
off = STK_OFF_TCCNT + STK_OFF;
else
off = STK_OFF_TCCNT;
/* lhi %w0,1 */
EMIT4_IMM(0xa7080000, REG_W0, 1);
/* laal %w1,%w0,off(%r15) */
EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
/* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
MAX_TAIL_CALL_CNT, 0, 0x2);
/*
* prog = array->ptrs[index];
s390/bpf: implement bpf_tail_call() helper bpf_tail_call() arguments: - ctx......: Context pointer - jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table - index....: Index in the jump table In this implementation s390x JIT does stack unwinding and jumps into the callee program prologue. Caller and callee use the same stack. With this patch a tail call generates the following code on s390x: if (index >= array->map.max_entries) goto out 000003ff8001c7e4: e31030100016 llgf %r1,16(%r3) 000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828 if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) goto out; 000003ff8001c7f0: a7080001 lhi %r0,1 000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15) 000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828 prog = array->prog[index]; if (prog == NULL) goto out; 000003ff8001c800: eb140003000d sllg %r1,%r4,3 000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1) 000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828 Restore registers before calling function 000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15) 000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15) goto *(prog->bpf_func + tail_call_start); 000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0) 000003ff8001c824: 47f01006 bc 15,6(%r1) Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-06-09 12:51:06 +08:00
* if (prog == NULL)
* goto out;
*/
/* sllg %r1,%b3,3: %r1 = index * 8 */
EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, BPF_REG_3, REG_0, 3);
/* lg %r1,prog(%b2,%r1) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
REG_1, offsetof(struct bpf_array, ptrs));
s390/bpf: implement bpf_tail_call() helper bpf_tail_call() arguments: - ctx......: Context pointer - jmp_table: One of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table - index....: Index in the jump table In this implementation s390x JIT does stack unwinding and jumps into the callee program prologue. Caller and callee use the same stack. With this patch a tail call generates the following code on s390x: if (index >= array->map.max_entries) goto out 000003ff8001c7e4: e31030100016 llgf %r1,16(%r3) 000003ff8001c7ea: ec41001fa065 clgrj %r4,%r1,10,3ff8001c828 if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) goto out; 000003ff8001c7f0: a7080001 lhi %r0,1 000003ff8001c7f4: eb10f25000fa laal %r1,%r0,592(%r15) 000003ff8001c7fa: ec120017207f clij %r1,32,2,3ff8001c828 prog = array->prog[index]; if (prog == NULL) goto out; 000003ff8001c800: eb140003000d sllg %r1,%r4,3 000003ff8001c806: e31310800004 lg %r1,128(%r3,%r1) 000003ff8001c80c: ec18000e007d clgij %r1,0,8,3ff8001c828 Restore registers before calling function 000003ff8001c812: eb68f2980004 lmg %r6,%r8,664(%r15) 000003ff8001c818: ebbff2c00004 lmg %r11,%r15,704(%r15) goto *(prog->bpf_func + tail_call_start); 000003ff8001c81e: e31100200004 lg %r1,32(%r1,%r0) 000003ff8001c824: 47f01006 bc 15,6(%r1) Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-06-09 12:51:06 +08:00
/* clgij %r1,0,0x8,label0 */
EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
/*
* Restore registers before calling function
*/
save_restore_regs(jit, REGS_RESTORE);
/*
* goto *(prog->bpf_func + tail_call_start);
*/
/* lg %r1,bpf_func(%r1) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
offsetof(struct bpf_prog, bpf_func));
/* bc 0xf,tail_call_start(%r1) */
_EMIT4(0x47f01000 + jit->tail_call_start);
/* out: */
jit->labels[0] = jit->prg;
break;
case BPF_JMP | BPF_EXIT: /* return b0 */
last = (i == fp->len - 1) ? 1 : 0;
if (last && !(jit->seen & SEEN_RET0))
break;
/* j <exit> */
EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
break;
/*
* Branch relative (number of skipped instructions) to offset on
* condition.
*
* Condition code to mask mapping:
*
* CC | Description | Mask
* ------------------------------
* 0 | Operands equal | 8
* 1 | First operand low | 4
* 2 | First operand high | 2
* 3 | Unused | 1
*
* For s390x relative branches: ip = ip + off_bytes
* For BPF relative branches: insn = insn + off_insns + 1
*
* For example for s390x with offset 0 we jump to the branch
* instruction itself (loop) and for BPF with offset 0 we
* branch to the instruction behind the branch.
*/
case BPF_JMP | BPF_JA: /* if (true) */
mask = 0xf000; /* j */
goto branch_oc;
case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
mask = 0x2000; /* jh */
goto branch_ks;
case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
mask = 0xa000; /* jhe */
goto branch_ks;
case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
mask = 0x2000; /* jh */
goto branch_ku;
case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
mask = 0xa000; /* jhe */
goto branch_ku;
case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
mask = 0x7000; /* jne */
goto branch_ku;
case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
mask = 0x8000; /* je */
goto branch_ku;
case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
mask = 0x7000; /* jnz */
/* lgfi %w1,imm (load sign extend imm) */
EMIT6_IMM(0xc0010000, REG_W1, imm);
/* ngr %w1,%dst */
EMIT4(0xb9800000, REG_W1, dst_reg);
goto branch_oc;
case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
mask = 0x2000; /* jh */
goto branch_xs;
case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
mask = 0xa000; /* jhe */
goto branch_xs;
case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
mask = 0x2000; /* jh */
goto branch_xu;
case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
mask = 0xa000; /* jhe */
goto branch_xu;
case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
mask = 0x7000; /* jne */
goto branch_xu;
case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
mask = 0x8000; /* je */
goto branch_xu;
case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
mask = 0x7000; /* jnz */
/* ngrk %w1,%dst,%src */
EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
goto branch_oc;
branch_ks:
/* lgfi %w1,imm (load sign extend imm) */
EMIT6_IMM(0xc0010000, REG_W1, imm);
/* cgrj %dst,%w1,mask,off */
EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask);
break;
branch_ku:
/* lgfi %w1,imm (load sign extend imm) */
EMIT6_IMM(0xc0010000, REG_W1, imm);
/* clgrj %dst,%w1,mask,off */
EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask);
break;
branch_xs:
/* cgrj %dst,%src,mask,off */
EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask);
break;
branch_xu:
/* clgrj %dst,%src,mask,off */
EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask);
break;
branch_oc:
/* brc mask,jmp_off (branch instruction needs 4 bytes) */
jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
break;
/*
* BPF_LD
*/
case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */
case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */
if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
func_addr = __pa(sk_load_byte_pos);
else
func_addr = __pa(sk_load_byte);
goto call_fn;
case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */
case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */
if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
func_addr = __pa(sk_load_half_pos);
else
func_addr = __pa(sk_load_half);
goto call_fn;
case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */
case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */
if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
func_addr = __pa(sk_load_word_pos);
else
func_addr = __pa(sk_load_word);
goto call_fn;
call_fn:
jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC;
REG_SET_SEEN(REG_14); /* Return address of possible func call */
/*
* Implicit input:
* BPF_REG_6 (R7) : skb pointer
* REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX)
*
* Calculated input:
* BPF_REG_2 (R3) : offset of byte(s) to fetch in skb
* BPF_REG_5 (R6) : return address
*
* Output:
* BPF_REG_0 (R14): data read from skb
*
* Scratch registers (BPF_REG_1-5)
*/
/* Call function: llilf %w1,func_addr */
EMIT6_IMM(0xc00f0000, REG_W1, func_addr);
/* Offset: lgfi %b2,imm */
EMIT6_IMM(0xc0010000, BPF_REG_2, imm);
if (BPF_MODE(insn->code) == BPF_IND)
/* agfr %b2,%src (%src is s32 here) */
EMIT4(0xb9180000, BPF_REG_2, src_reg);
/* Reload REG_SKB_DATA if BPF_REG_AX is used */
if (jit->seen & SEEN_REG_AX)
/* lg %skb_data,data_off(%b6) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
BPF_REG_6, offsetof(struct sk_buff, data));
/* basr %b5,%w1 (%b5 is call saved) */
EMIT2(0x0d00, BPF_REG_5, REG_W1);
/*
* Note: For fast access we jump directly after the
* jnz instruction from bpf_jit.S
*/
/* jnz <ret0> */
EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg);
break;
default: /* too complex, give up */
pr_err("Unknown opcode %02x\n", insn->code);
return -1;
}
return insn_count;
}
/*
* Compile eBPF program into s390x code
*/
static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
{
int i, insn_count;
jit->lit = jit->lit_start;
jit->prg = 0;
bpf: move clearing of A/X into classic to eBPF migration prologue Back in the days where eBPF (or back then "internal BPF" ;->) was not exposed to user space, and only the classic BPF programs internally translated into eBPF programs, we missed the fact that for classic BPF A and X needed to be cleared. It was fixed back then via 83d5b7ef99c9 ("net: filter: initialize A and X registers"), and thus classic BPF specifics were added to the eBPF interpreter core to work around it. This added some confusion for JIT developers later on that take the eBPF interpreter code as an example for deriving their JIT. F.e. in f75298f5c3fe ("s390/bpf: clear correct BPF accumulator register"), at least X could leak stack memory. Furthermore, since this is only needed for classic BPF translations and not for eBPF (verifier takes care that read access to regs cannot be done uninitialized), more complexity is added to JITs as they need to determine whether they deal with migrations or native eBPF where they can just omit clearing A/X in their prologue and thus reduce image size a bit, see f.e. cde66c2d88da ("s390/bpf: Only clear A and X for converted BPF programs"). In other cases (x86, arm64), A and X is being cleared in the prologue also for eBPF case, which is unnecessary. Lets move this into the BPF migration in bpf_convert_filter() where it actually belongs as long as the number of eBPF JITs are still few. It can thus be done generically; allowing us to remove the quirk from __bpf_prog_run() and to slightly reduce JIT image size in case of eBPF, while reducing code duplication on this matter in current(/future) eBPF JITs. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Cc: Zi Shen Lim <zlim.lnx@gmail.com> Cc: Yang Shi <yang.shi@linaro.org> Acked-by: Yang Shi <yang.shi@linaro.org> Acked-by: Zi Shen Lim <zlim.lnx@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-12-18 06:51:54 +08:00
bpf_jit_prologue(jit);
for (i = 0; i < fp->len; i += insn_count) {
insn_count = bpf_jit_insn(jit, fp, i);
if (insn_count < 0)
return -1;
jit->addrs[i + 1] = jit->prg; /* Next instruction address */
}
bpf_jit_epilogue(jit);
jit->lit_start = jit->prg;
jit->size = jit->lit;
jit->size_prg = jit->prg;
return 0;
}
/*
* Classic BPF function stub. BPF programs will be converted into
* eBPF and then bpf_int_jit_compile() will be called.
*/
net: filter: split 'struct sk_filter' into socket and bpf parts clean up names related to socket filtering and bpf in the following way: - everything that deals with sockets keeps 'sk_*' prefix - everything that is pure BPF is changed to 'bpf_*' prefix split 'struct sk_filter' into struct sk_filter { atomic_t refcnt; struct rcu_head rcu; struct bpf_prog *prog; }; and struct bpf_prog { u32 jited:1, len:31; struct sock_fprog_kern *orig_prog; unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); union { struct sock_filter insns[0]; struct bpf_insn insnsi[0]; struct work_struct work; }; }; so that 'struct bpf_prog' can be used independent of sockets and cleans up 'unattached' bpf use cases split SK_RUN_FILTER macro into: SK_RUN_FILTER to be used with 'struct sk_filter *' and BPF_PROG_RUN to be used with 'struct bpf_prog *' __sk_filter_release(struct sk_filter *) gains __bpf_prog_release(struct bpf_prog *) helper function also perform related renames for the functions that work with 'struct bpf_prog *', since they're on the same lines: sk_filter_size -> bpf_prog_size sk_filter_select_runtime -> bpf_prog_select_runtime sk_filter_free -> bpf_prog_free sk_unattached_filter_create -> bpf_prog_create sk_unattached_filter_destroy -> bpf_prog_destroy sk_store_orig_filter -> bpf_prog_store_orig_filter sk_release_orig_filter -> bpf_release_orig_filter __sk_migrate_filter -> bpf_migrate_filter __sk_prepare_filter -> bpf_prepare_filter API for attaching classic BPF to a socket stays the same: sk_attach_filter(prog, struct sock *)/sk_detach_filter(struct sock *) and SK_RUN_FILTER(struct sk_filter *, ctx) to execute a program which is used by sockets, tun, af_packet API for 'unattached' BPF programs becomes: bpf_prog_create(struct bpf_prog **)/bpf_prog_destroy(struct bpf_prog *) and BPF_PROG_RUN(struct bpf_prog *, ctx) to execute a program which is used by isdn, ppp, team, seccomp, ptp, xt_bpf, cls_bpf, test_bpf Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-07-31 11:34:16 +08:00
void bpf_jit_compile(struct bpf_prog *fp)
{
}
/*
* Compile eBPF program "fp"
*/
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
struct bpf_prog *tmp, *orig_fp = fp;
struct bpf_binary_header *header;
bool tmp_blinded = false;
struct bpf_jit jit;
int pass;
if (!bpf_jit_enable)
return orig_fp;
tmp = bpf_jit_blind_constants(fp);
/*
* If blinding was requested and we failed during blinding,
* we must fall back to the interpreter.
*/
if (IS_ERR(tmp))
return orig_fp;
if (tmp != fp) {
tmp_blinded = true;
fp = tmp;
}
memset(&jit, 0, sizeof(jit));
jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
if (jit.addrs == NULL) {
fp = orig_fp;
goto out;
}
/*
* Three initial passes:
* - 1/2: Determine clobbered registers
* - 3: Calculate program size and addrs arrray
*/
for (pass = 1; pass <= 3; pass++) {
if (bpf_jit_prog(&jit, fp)) {
fp = orig_fp;
goto free_addrs;
}
}
/*
* Final pass: Allocate and generate program
*/
if (jit.size >= BPF_SIZE_MAX) {
fp = orig_fp;
goto free_addrs;
}
header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
if (!header) {
fp = orig_fp;
goto free_addrs;
}
if (bpf_jit_prog(&jit, fp)) {
fp = orig_fp;
goto free_addrs;
}
if (bpf_jit_enable > 1) {
bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
if (jit.prg_buf)
print_fn_code(jit.prg_buf, jit.size_prg);
}
if (jit.prg_buf) {
set_memory_ro((unsigned long)header, header->pages);
fp->bpf_func = (void *) jit.prg_buf;
fp->jited = 1;
}
free_addrs:
kfree(jit.addrs);
out:
if (tmp_blinded)
bpf_jit_prog_release_other(fp, fp == orig_fp ?
tmp : orig_fp);
return fp;
}
/*
* Free eBPF program
*/
net: filter: split 'struct sk_filter' into socket and bpf parts clean up names related to socket filtering and bpf in the following way: - everything that deals with sockets keeps 'sk_*' prefix - everything that is pure BPF is changed to 'bpf_*' prefix split 'struct sk_filter' into struct sk_filter { atomic_t refcnt; struct rcu_head rcu; struct bpf_prog *prog; }; and struct bpf_prog { u32 jited:1, len:31; struct sock_fprog_kern *orig_prog; unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); union { struct sock_filter insns[0]; struct bpf_insn insnsi[0]; struct work_struct work; }; }; so that 'struct bpf_prog' can be used independent of sockets and cleans up 'unattached' bpf use cases split SK_RUN_FILTER macro into: SK_RUN_FILTER to be used with 'struct sk_filter *' and BPF_PROG_RUN to be used with 'struct bpf_prog *' __sk_filter_release(struct sk_filter *) gains __bpf_prog_release(struct bpf_prog *) helper function also perform related renames for the functions that work with 'struct bpf_prog *', since they're on the same lines: sk_filter_size -> bpf_prog_size sk_filter_select_runtime -> bpf_prog_select_runtime sk_filter_free -> bpf_prog_free sk_unattached_filter_create -> bpf_prog_create sk_unattached_filter_destroy -> bpf_prog_destroy sk_store_orig_filter -> bpf_prog_store_orig_filter sk_release_orig_filter -> bpf_release_orig_filter __sk_migrate_filter -> bpf_migrate_filter __sk_prepare_filter -> bpf_prepare_filter API for attaching classic BPF to a socket stays the same: sk_attach_filter(prog, struct sock *)/sk_detach_filter(struct sock *) and SK_RUN_FILTER(struct sk_filter *, ctx) to execute a program which is used by sockets, tun, af_packet API for 'unattached' BPF programs becomes: bpf_prog_create(struct bpf_prog **)/bpf_prog_destroy(struct bpf_prog *) and BPF_PROG_RUN(struct bpf_prog *, ctx) to execute a program which is used by isdn, ppp, team, seccomp, ptp, xt_bpf, cls_bpf, test_bpf Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-07-31 11:34:16 +08:00
void bpf_jit_free(struct bpf_prog *fp)
{
unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
struct bpf_binary_header *header = (void *)addr;
if (!fp->jited)
net: fix unsafe set_memory_rw from softirq on x86 system with net.core.bpf_jit_enable = 1 sudo tcpdump -i eth1 'tcp port 22' causes the warning: [ 56.766097] Possible unsafe locking scenario: [ 56.766097] [ 56.780146] CPU0 [ 56.786807] ---- [ 56.793188] lock(&(&vb->lock)->rlock); [ 56.799593] <Interrupt> [ 56.805889] lock(&(&vb->lock)->rlock); [ 56.812266] [ 56.812266] *** DEADLOCK *** [ 56.812266] [ 56.830670] 1 lock held by ksoftirqd/1/13: [ 56.836838] #0: (rcu_read_lock){.+.+..}, at: [<ffffffff8118f44c>] vm_unmap_aliases+0x8c/0x380 [ 56.849757] [ 56.849757] stack backtrace: [ 56.862194] CPU: 1 PID: 13 Comm: ksoftirqd/1 Not tainted 3.12.0-rc3+ #45 [ 56.868721] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 56.882004] ffffffff821944c0 ffff88080bbdb8c8 ffffffff8175a145 0000000000000007 [ 56.895630] ffff88080bbd5f40 ffff88080bbdb928 ffffffff81755b14 0000000000000001 [ 56.909313] ffff880800000001 ffff880800000000 ffffffff8101178f 0000000000000001 [ 56.923006] Call Trace: [ 56.929532] [<ffffffff8175a145>] dump_stack+0x55/0x76 [ 56.936067] [<ffffffff81755b14>] print_usage_bug+0x1f7/0x208 [ 56.942445] [<ffffffff8101178f>] ? save_stack_trace+0x2f/0x50 [ 56.948932] [<ffffffff810cc0a0>] ? check_usage_backwards+0x150/0x150 [ 56.955470] [<ffffffff810ccb52>] mark_lock+0x282/0x2c0 [ 56.961945] [<ffffffff810ccfed>] __lock_acquire+0x45d/0x1d50 [ 56.968474] [<ffffffff810cce6e>] ? __lock_acquire+0x2de/0x1d50 [ 56.975140] [<ffffffff81393bf5>] ? cpumask_next_and+0x55/0x90 [ 56.981942] [<ffffffff810cef72>] lock_acquire+0x92/0x1d0 [ 56.988745] [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380 [ 56.995619] [<ffffffff817628f1>] _raw_spin_lock+0x41/0x50 [ 57.002493] [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380 [ 57.009447] [<ffffffff8118f52a>] vm_unmap_aliases+0x16a/0x380 [ 57.016477] [<ffffffff8118f44c>] ? vm_unmap_aliases+0x8c/0x380 [ 57.023607] [<ffffffff810436b0>] change_page_attr_set_clr+0xc0/0x460 [ 57.030818] [<ffffffff810cfb8d>] ? trace_hardirqs_on+0xd/0x10 [ 57.037896] [<ffffffff811a8330>] ? kmem_cache_free+0xb0/0x2b0 [ 57.044789] [<ffffffff811b59c3>] ? free_object_rcu+0x93/0xa0 [ 57.051720] [<ffffffff81043d9f>] set_memory_rw+0x2f/0x40 [ 57.058727] [<ffffffff8104e17c>] bpf_jit_free+0x2c/0x40 [ 57.065577] [<ffffffff81642cba>] sk_filter_release_rcu+0x1a/0x30 [ 57.072338] [<ffffffff811108e2>] rcu_process_callbacks+0x202/0x7c0 [ 57.078962] [<ffffffff81057f17>] __do_softirq+0xf7/0x3f0 [ 57.085373] [<ffffffff81058245>] run_ksoftirqd+0x35/0x70 cannot reuse jited filter memory, since it's readonly, so use original bpf insns memory to hold work_struct defer kfree of sk_filter until jit completed freeing tested on x86_64 and i386 Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-10-04 15:14:06 +08:00
goto free_filter;
set_memory_rw(addr, header->pages);
bpf_jit_binary_free(header);
net: fix unsafe set_memory_rw from softirq on x86 system with net.core.bpf_jit_enable = 1 sudo tcpdump -i eth1 'tcp port 22' causes the warning: [ 56.766097] Possible unsafe locking scenario: [ 56.766097] [ 56.780146] CPU0 [ 56.786807] ---- [ 56.793188] lock(&(&vb->lock)->rlock); [ 56.799593] <Interrupt> [ 56.805889] lock(&(&vb->lock)->rlock); [ 56.812266] [ 56.812266] *** DEADLOCK *** [ 56.812266] [ 56.830670] 1 lock held by ksoftirqd/1/13: [ 56.836838] #0: (rcu_read_lock){.+.+..}, at: [<ffffffff8118f44c>] vm_unmap_aliases+0x8c/0x380 [ 56.849757] [ 56.849757] stack backtrace: [ 56.862194] CPU: 1 PID: 13 Comm: ksoftirqd/1 Not tainted 3.12.0-rc3+ #45 [ 56.868721] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 56.882004] ffffffff821944c0 ffff88080bbdb8c8 ffffffff8175a145 0000000000000007 [ 56.895630] ffff88080bbd5f40 ffff88080bbdb928 ffffffff81755b14 0000000000000001 [ 56.909313] ffff880800000001 ffff880800000000 ffffffff8101178f 0000000000000001 [ 56.923006] Call Trace: [ 56.929532] [<ffffffff8175a145>] dump_stack+0x55/0x76 [ 56.936067] [<ffffffff81755b14>] print_usage_bug+0x1f7/0x208 [ 56.942445] [<ffffffff8101178f>] ? save_stack_trace+0x2f/0x50 [ 56.948932] [<ffffffff810cc0a0>] ? check_usage_backwards+0x150/0x150 [ 56.955470] [<ffffffff810ccb52>] mark_lock+0x282/0x2c0 [ 56.961945] [<ffffffff810ccfed>] __lock_acquire+0x45d/0x1d50 [ 56.968474] [<ffffffff810cce6e>] ? __lock_acquire+0x2de/0x1d50 [ 56.975140] [<ffffffff81393bf5>] ? cpumask_next_and+0x55/0x90 [ 56.981942] [<ffffffff810cef72>] lock_acquire+0x92/0x1d0 [ 56.988745] [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380 [ 56.995619] [<ffffffff817628f1>] _raw_spin_lock+0x41/0x50 [ 57.002493] [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380 [ 57.009447] [<ffffffff8118f52a>] vm_unmap_aliases+0x16a/0x380 [ 57.016477] [<ffffffff8118f44c>] ? vm_unmap_aliases+0x8c/0x380 [ 57.023607] [<ffffffff810436b0>] change_page_attr_set_clr+0xc0/0x460 [ 57.030818] [<ffffffff810cfb8d>] ? trace_hardirqs_on+0xd/0x10 [ 57.037896] [<ffffffff811a8330>] ? kmem_cache_free+0xb0/0x2b0 [ 57.044789] [<ffffffff811b59c3>] ? free_object_rcu+0x93/0xa0 [ 57.051720] [<ffffffff81043d9f>] set_memory_rw+0x2f/0x40 [ 57.058727] [<ffffffff8104e17c>] bpf_jit_free+0x2c/0x40 [ 57.065577] [<ffffffff81642cba>] sk_filter_release_rcu+0x1a/0x30 [ 57.072338] [<ffffffff811108e2>] rcu_process_callbacks+0x202/0x7c0 [ 57.078962] [<ffffffff81057f17>] __do_softirq+0xf7/0x3f0 [ 57.085373] [<ffffffff81058245>] run_ksoftirqd+0x35/0x70 cannot reuse jited filter memory, since it's readonly, so use original bpf insns memory to hold work_struct defer kfree of sk_filter until jit completed freeing tested on x86_64 and i386 Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-10-04 15:14:06 +08:00
free_filter:
net: bpf: make eBPF interpreter images read-only With eBPF getting more extended and exposure to user space is on it's way, hardening the memory range the interpreter uses to steer its command flow seems appropriate. This patch moves the to be interpreted bytecode to read-only pages. In case we execute a corrupted BPF interpreter image for some reason e.g. caused by an attacker which got past a verifier stage, it would not only provide arbitrary read/write memory access but arbitrary function calls as well. After setting up the BPF interpreter image, its contents do not change until destruction time, thus we can setup the image on immutable made pages in order to mitigate modifications to that code. The idea is derived from commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit against spraying attacks"). This is possible because bpf_prog is not part of sk_filter anymore. After setup bpf_prog cannot be altered during its life-time. This prevents any modifications to the entire bpf_prog structure (incl. function/JIT image pointer). Every eBPF program (including classic BPF that are migrated) have to call bpf_prog_select_runtime() to select either interpreter or a JIT image as a last setup step, and they all are being freed via bpf_prog_free(), including non-JIT. Therefore, we can easily integrate this into the eBPF life-time, plus since we directly allocate a bpf_prog, we have no performance penalty. Tested with seccomp and test_bpf testsuite in JIT/non-JIT mode and manual inspection of kernel_page_tables. Brad Spengler proposed the same idea via Twitter during development of this patch. Joint work with Hannes Frederic Sowa. Suggested-by: Brad Spengler <spender@grsecurity.net> Signed-off-by: Daniel Borkmann <dborkman@redhat.com> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Kees Cook <keescook@chromium.org> Acked-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-03 04:53:44 +08:00
bpf_prog_unlock_free(fp);
}