bpf, arm64: Optimize BPF store/load using arm64 str/ldr(immediate offset)
The current BPF store/load instruction is translated by the JIT into two instructions. The first instruction moves the immediate offset into a temporary register. The second instruction uses this temporary register to do the real store/load. In fact, arm64 supports addressing with immediate offsets. So This patch introduces optimization that uses arm64 str/ldr instruction with immediate offset when the offset fits. Example of generated instuction for r2 = *(u64 *)(r1 + 0): without optimization: mov x10, 0 ldr x1, [x0, x10] with optimization: ldr x1, [x0, 0] If the offset is negative, or is not aligned correctly, or exceeds max value, rollback to the use of temporary register. Signed-off-by: Xu Kuohai <xukuohai@huawei.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20220321152852.2334294-3-xukuohai@huawei.com
This commit is contained in:
parent
30c90f6757
commit
7db6c0f1d8
|
@ -66,6 +66,20 @@
|
|||
#define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE)
|
||||
#define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD)
|
||||
|
||||
/* Load/store register (immediate offset) */
|
||||
#define A64_LS_IMM(Rt, Rn, imm, size, type) \
|
||||
aarch64_insn_gen_load_store_imm(Rt, Rn, imm, \
|
||||
AARCH64_INSN_SIZE_##size, \
|
||||
AARCH64_INSN_LDST_##type##_IMM_OFFSET)
|
||||
#define A64_STRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, STORE)
|
||||
#define A64_LDRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, LOAD)
|
||||
#define A64_STRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, STORE)
|
||||
#define A64_LDRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, LOAD)
|
||||
#define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE)
|
||||
#define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD)
|
||||
#define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE)
|
||||
#define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD)
|
||||
|
||||
/* Load/store register pair */
|
||||
#define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \
|
||||
aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \
|
||||
|
|
|
@ -191,6 +191,47 @@ static bool is_addsub_imm(u32 imm)
|
|||
return !(imm & ~0xfff) || !(imm & ~0xfff000);
|
||||
}
|
||||
|
||||
/*
|
||||
* There are 3 types of AArch64 LDR/STR (immediate) instruction:
|
||||
* Post-index, Pre-index, Unsigned offset.
|
||||
*
|
||||
* For BPF ldr/str, the "unsigned offset" type is sufficient.
|
||||
*
|
||||
* "Unsigned offset" type LDR(immediate) format:
|
||||
*
|
||||
* 3 2 1 0
|
||||
* 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
* |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt |
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
* scale
|
||||
*
|
||||
* "Unsigned offset" type STR(immediate) format:
|
||||
* 3 2 1 0
|
||||
* 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
* |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt |
|
||||
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
* scale
|
||||
*
|
||||
* The offset is calculated from imm12 and scale in the following way:
|
||||
*
|
||||
* offset = (u64)imm12 << scale
|
||||
*/
|
||||
static bool is_lsi_offset(s16 offset, int scale)
|
||||
{
|
||||
if (offset < 0)
|
||||
return false;
|
||||
|
||||
if (offset > (0xFFF << scale))
|
||||
return false;
|
||||
|
||||
if (offset & ((1 << scale) - 1))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Tail call offset to jump into */
|
||||
#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
|
||||
#define PROLOGUE_OFFSET 8
|
||||
|
@ -971,19 +1012,38 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
|
|||
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
|
||||
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
|
||||
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
switch (BPF_SIZE(code)) {
|
||||
case BPF_W:
|
||||
emit(A64_LDR32(dst, src, tmp), ctx);
|
||||
if (is_lsi_offset(off, 2)) {
|
||||
emit(A64_LDR32I(dst, src, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
emit(A64_LDR32(dst, src, tmp), ctx);
|
||||
}
|
||||
break;
|
||||
case BPF_H:
|
||||
emit(A64_LDRH(dst, src, tmp), ctx);
|
||||
if (is_lsi_offset(off, 1)) {
|
||||
emit(A64_LDRHI(dst, src, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
emit(A64_LDRH(dst, src, tmp), ctx);
|
||||
}
|
||||
break;
|
||||
case BPF_B:
|
||||
emit(A64_LDRB(dst, src, tmp), ctx);
|
||||
if (is_lsi_offset(off, 0)) {
|
||||
emit(A64_LDRBI(dst, src, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
emit(A64_LDRB(dst, src, tmp), ctx);
|
||||
}
|
||||
break;
|
||||
case BPF_DW:
|
||||
emit(A64_LDR64(dst, src, tmp), ctx);
|
||||
if (is_lsi_offset(off, 3)) {
|
||||
emit(A64_LDR64I(dst, src, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
emit(A64_LDR64(dst, src, tmp), ctx);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1011,20 +1071,39 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
|
|||
case BPF_ST | BPF_MEM | BPF_B:
|
||||
case BPF_ST | BPF_MEM | BPF_DW:
|
||||
/* Load imm to a register then store it */
|
||||
emit_a64_mov_i(1, tmp2, off, ctx);
|
||||
emit_a64_mov_i(1, tmp, imm, ctx);
|
||||
switch (BPF_SIZE(code)) {
|
||||
case BPF_W:
|
||||
emit(A64_STR32(tmp, dst, tmp2), ctx);
|
||||
if (is_lsi_offset(off, 2)) {
|
||||
emit(A64_STR32I(tmp, dst, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp2, off, ctx);
|
||||
emit(A64_STR32(tmp, dst, tmp2), ctx);
|
||||
}
|
||||
break;
|
||||
case BPF_H:
|
||||
emit(A64_STRH(tmp, dst, tmp2), ctx);
|
||||
if (is_lsi_offset(off, 1)) {
|
||||
emit(A64_STRHI(tmp, dst, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp2, off, ctx);
|
||||
emit(A64_STRH(tmp, dst, tmp2), ctx);
|
||||
}
|
||||
break;
|
||||
case BPF_B:
|
||||
emit(A64_STRB(tmp, dst, tmp2), ctx);
|
||||
if (is_lsi_offset(off, 0)) {
|
||||
emit(A64_STRBI(tmp, dst, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp2, off, ctx);
|
||||
emit(A64_STRB(tmp, dst, tmp2), ctx);
|
||||
}
|
||||
break;
|
||||
case BPF_DW:
|
||||
emit(A64_STR64(tmp, dst, tmp2), ctx);
|
||||
if (is_lsi_offset(off, 3)) {
|
||||
emit(A64_STR64I(tmp, dst, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp2, off, ctx);
|
||||
emit(A64_STR64(tmp, dst, tmp2), ctx);
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -1034,19 +1113,38 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
|
|||
case BPF_STX | BPF_MEM | BPF_H:
|
||||
case BPF_STX | BPF_MEM | BPF_B:
|
||||
case BPF_STX | BPF_MEM | BPF_DW:
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
switch (BPF_SIZE(code)) {
|
||||
case BPF_W:
|
||||
emit(A64_STR32(src, dst, tmp), ctx);
|
||||
if (is_lsi_offset(off, 2)) {
|
||||
emit(A64_STR32I(src, dst, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
emit(A64_STR32(src, dst, tmp), ctx);
|
||||
}
|
||||
break;
|
||||
case BPF_H:
|
||||
emit(A64_STRH(src, dst, tmp), ctx);
|
||||
if (is_lsi_offset(off, 1)) {
|
||||
emit(A64_STRHI(src, dst, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
emit(A64_STRH(src, dst, tmp), ctx);
|
||||
}
|
||||
break;
|
||||
case BPF_B:
|
||||
emit(A64_STRB(src, dst, tmp), ctx);
|
||||
if (is_lsi_offset(off, 0)) {
|
||||
emit(A64_STRBI(src, dst, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
emit(A64_STRB(src, dst, tmp), ctx);
|
||||
}
|
||||
break;
|
||||
case BPF_DW:
|
||||
emit(A64_STR64(src, dst, tmp), ctx);
|
||||
if (is_lsi_offset(off, 3)) {
|
||||
emit(A64_STR64I(src, dst, off), ctx);
|
||||
} else {
|
||||
emit_a64_mov_i(1, tmp, off, ctx);
|
||||
emit(A64_STR64(src, dst, tmp), ctx);
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue