From ed888cb0d1ebce69f12794e89fbd5e2c86d40b8d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 11 Sep 2020 19:16:11 +0100 Subject: [PATCH 1/3] arm64: Allow CPUs unffected by ARM erratum 1418040 to come in late Now that we allow CPUs affected by erratum 1418040 to come in late, this prevents their unaffected sibblings from coming in late (or coming back after a suspend or hotplug-off, which amounts to the same thing). To allow this, we need to add ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU, which amounts to set .type to ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE. Fixes: bf87bb0881d0 ("arm64: Allow booting of late CPUs affected by erratum 1418040") Reported-by: Matthias Kaehlcke Signed-off-by: Marc Zyngier Tested-by: Sai Prakash Ranjan Tested-by: Matthias Kaehlcke Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200911181611.2073183-1-maz@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index c332d49780dc..560ba69e13c1 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -910,8 +910,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM erratum 1418040", .capability = ARM64_WORKAROUND_1418040, ERRATA_MIDR_RANGE_LIST(erratum_1418040_list), - .type = (ARM64_CPUCAP_SCOPE_LOCAL_CPU | - ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU), + /* + * We need to allow affected CPUs to come in late, but + * also need the non-affected CPUs to be able to come + * in at any point in time. Wonderful. + */ + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, }, #endif #ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT From 32f6865c7aa3c422f710903baa6eb81abc6f559b Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Thu, 17 Sep 2020 11:49:25 +0300 Subject: [PATCH 2/3] arm64: bpf: Fix branch offset in JIT Running the eBPF test_verifier leads to random errors looking like this: [ 6525.735488] Unexpected kernel BRK exception at EL1 [ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP [ 6525.741609] Modules linked in: nls_utf8 cifs libdes libarc4 dns_resolver fscache binfmt_misc nls_ascii nls_cp437 vfat fat aes_ce_blk crypto_simd cryptd aes_ce_cipher ghash_ce gf128mul efi_pstore sha2_ce sha256_arm64 sha1_ce evdev efivars efivarfs ip_tables x_tables autofs4 btrfs blake2b_generic xor xor_neon zstd_compress raid6_pq libcrc32c crc32c_generic ahci xhci_pci libahci xhci_hcd igb libata i2c_algo_bit nvme realtek usbcore nvme_core scsi_mod t10_pi netsec mdio_devres of_mdio gpio_keys fixed_phy libphy gpio_mb86s7x [ 6525.787760] CPU: 3 PID: 7881 Comm: test_verifier Tainted: G W 5.9.0-rc1+ #47 [ 6525.796111] Hardware name: Socionext SynQuacer E-series DeveloperBox, BIOS build #1 Jun 6 2020 [ 6525.804812] pstate: 20000005 (nzCv daif -PAN -UAO BTYPE=--) [ 6525.810390] pc : bpf_prog_c3d01833289b6311_F+0xc8/0x9f4 [ 6525.815613] lr : bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c [ 6525.820832] sp : ffff8000130cbb80 [ 6525.824141] x29: ffff8000130cbbb0 x28: 0000000000000000 [ 6525.829451] x27: 000005ef6fcbf39b x26: 0000000000000000 [ 6525.834759] x25: ffff8000130cbb80 x24: ffff800011dc7038 [ 6525.840067] x23: ffff8000130cbd00 x22: ffff0008f624d080 [ 6525.845375] x21: 0000000000000001 x20: ffff800011dc7000 [ 6525.850682] x19: 0000000000000000 x18: 0000000000000000 [ 6525.855990] x17: 0000000000000000 x16: 0000000000000000 [ 6525.861298] x15: 0000000000000000 x14: 0000000000000000 [ 6525.866606] x13: 0000000000000000 x12: 0000000000000000 [ 6525.871913] x11: 0000000000000001 x10: ffff8000000a660c [ 6525.877220] x9 : ffff800010951810 x8 : ffff8000130cbc38 [ 6525.882528] x7 : 0000000000000000 x6 : 0000009864cfa881 [ 6525.887836] x5 : 00ffffffffffffff x4 : 002880ba1a0b3e9f [ 6525.893144] x3 : 0000000000000018 x2 : ffff8000000a4374 [ 6525.898452] x1 : 000000000000000a x0 : 0000000000000009 [ 6525.903760] Call trace: [ 6525.906202] bpf_prog_c3d01833289b6311_F+0xc8/0x9f4 [ 6525.911076] bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c [ 6525.915957] bpf_dispatcher_xdp_func+0x14/0x20 [ 6525.920398] bpf_test_run+0x70/0x1b0 [ 6525.923969] bpf_prog_test_run_xdp+0xec/0x190 [ 6525.928326] __do_sys_bpf+0xc88/0x1b28 [ 6525.932072] __arm64_sys_bpf+0x24/0x30 [ 6525.935820] el0_svc_common.constprop.0+0x70/0x168 [ 6525.940607] do_el0_svc+0x28/0x88 [ 6525.943920] el0_sync_handler+0x88/0x190 [ 6525.947838] el0_sync+0x140/0x180 [ 6525.951154] Code: d4202000 d4202000 d4202000 d4202000 (d4202000) [ 6525.957249] ---[ end trace cecc3f93b14927e2 ]--- The reason is the offset[] creation and later usage, while building the eBPF body. The code currently omits the first instruction, since build_insn() will increase our ctx->idx before saving it. That was fine up until bounded eBPF loops were introduced. After that introduction, offset[0] must be the offset of the end of prologue which is the start of the 1st insn while, offset[n] holds the offset of the end of n-th insn. When "taken loop with back jump to 1st insn" test runs, it will eventually call bpf2a64_offset(-1, 2, ctx). Since negative indexing is permitted, the current outcome depends on the value stored in ctx->offset[-1], which has nothing to do with our array. If the value happens to be 0 the tests will work. If not this error triggers. commit 7c2e988f400e ("bpf: fix x64 JIT code generation for jmp to 1st insn") fixed an indentical bug on x86 when eBPF bounded loops were introduced. So let's fix it by creating the ctx->offset[] differently. Track the beginning of instruction and account for the extra instruction while calculating the arm instruction offsets. Fixes: 2589726d12a1 ("bpf: introduce bounded loops") Reported-by: Naresh Kamboju Reported-by: Jiri Olsa Co-developed-by: Jean-Philippe Brucker Co-developed-by: Yauheni Kaliuta Signed-off-by: Jean-Philippe Brucker Signed-off-by: Yauheni Kaliuta Signed-off-by: Ilias Apalodimas Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200917084925.177348-1-ilias.apalodimas@linaro.org Signed-off-by: Catalin Marinas --- arch/arm64/net/bpf_jit_comp.c | 43 +++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index f8912e45be7a..ef9f1d5e989d 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -143,14 +143,17 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val, } } -static inline int bpf2a64_offset(int bpf_to, int bpf_from, +static inline int bpf2a64_offset(int bpf_insn, int off, const struct jit_ctx *ctx) { - int to = ctx->offset[bpf_to]; - /* -1 to account for the Branch instruction */ - int from = ctx->offset[bpf_from] - 1; - - return to - from; + /* BPF JMP offset is relative to the next instruction */ + bpf_insn++; + /* + * Whereas arm64 branch instructions encode the offset + * from the branch itself, so we must subtract 1 from the + * instruction offset. + */ + return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1); } static void jit_fill_hole(void *area, unsigned int size) @@ -642,7 +645,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, /* JUMP off */ case BPF_JMP | BPF_JA: - jmp_offset = bpf2a64_offset(i + off, i, ctx); + jmp_offset = bpf2a64_offset(i, off, ctx); check_imm26(jmp_offset); emit(A64_B(jmp_offset), ctx); break; @@ -669,7 +672,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_JMP32 | BPF_JSLE | BPF_X: emit(A64_CMP(is64, dst, src), ctx); emit_cond_jmp: - jmp_offset = bpf2a64_offset(i + off, i, ctx); + jmp_offset = bpf2a64_offset(i, off, ctx); check_imm19(jmp_offset); switch (BPF_OP(code)) { case BPF_JEQ: @@ -908,10 +911,21 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass) const struct bpf_prog *prog = ctx->prog; int i; + /* + * - offset[0] offset of the end of prologue, + * start of the 1st instruction. + * - offset[1] - offset of the end of 1st instruction, + * start of the 2nd instruction + * [....] + * - offset[3] - offset of the end of 3rd instruction, + * start of 4th instruction + */ for (i = 0; i < prog->len; i++) { const struct bpf_insn *insn = &prog->insnsi[i]; int ret; + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; ret = build_insn(insn, ctx, extra_pass); if (ret > 0) { i++; @@ -919,11 +933,16 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass) ctx->offset[i] = ctx->idx; continue; } - if (ctx->image == NULL) - ctx->offset[i] = ctx->idx; if (ret) return ret; } + /* + * offset is allocated with prog->len + 1 so fill in + * the last element with the offset after the last + * instruction (end of program) + */ + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; return 0; } @@ -1002,7 +1021,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; - ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); + ctx.offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); if (ctx.offset == NULL) { prog = orig_prog; goto out_off; @@ -1089,7 +1108,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog->jited_len = prog_size; if (!prog->is_func || extra_pass) { - bpf_prog_fill_jited_linfo(prog, ctx.offset); + bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); out_off: kfree(ctx.offset); kfree(jit_data); From 75df529bec9110dad43ab30e2d9490242529e8b8 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 16 Sep 2020 17:45:30 +0200 Subject: [PATCH 3/3] arm64: paravirt: Initialize steal time when cpu is online Steal time initialization requires mapping a memory region which invokes a memory allocation. Doing this at CPU starting time results in the following trace when CONFIG_DEBUG_ATOMIC_SLEEP is enabled: BUG: sleeping function called from invalid context at mm/slab.h:498 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/1 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc5+ #1 Call trace: dump_backtrace+0x0/0x208 show_stack+0x1c/0x28 dump_stack+0xc4/0x11c ___might_sleep+0xf8/0x130 __might_sleep+0x58/0x90 slab_pre_alloc_hook.constprop.101+0xd0/0x118 kmem_cache_alloc_node_trace+0x84/0x270 __get_vm_area_node+0x88/0x210 get_vm_area_caller+0x38/0x40 __ioremap_caller+0x70/0xf8 ioremap_cache+0x78/0xb0 memremap+0x9c/0x1a8 init_stolen_time_cpu+0x54/0xf0 cpuhp_invoke_callback+0xa8/0x720 notify_cpu_starting+0xc8/0xd8 secondary_start_kernel+0x114/0x180 CPU1: Booted secondary processor 0x0000000001 [0x431f0a11] However we don't need to initialize steal time at CPU starting time. We can simply wait until CPU online time, just sacrificing a bit of accuracy by returning zero for steal time until we know better. While at it, add __init to the functions that are only called by pv_time_init() which is __init. Signed-off-by: Andrew Jones Fixes: e0685fa228fd ("arm64: Retrieve stolen time as paravirtualized guest") Cc: stable@vger.kernel.org Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20200916154530.40809-1-drjones@redhat.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/paravirt.c | 26 +++++++++++++++----------- include/linux/cpuhotplug.h | 1 - 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 295d66490584..c07d7a034941 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -50,16 +50,19 @@ static u64 pv_steal_clock(int cpu) struct pv_time_stolen_time_region *reg; reg = per_cpu_ptr(&stolen_time_region, cpu); - if (!reg->kaddr) { - pr_warn_once("stolen time enabled but not configured for cpu %d\n", - cpu); + + /* + * paravirt_steal_clock() may be called before the CPU + * online notification callback runs. Until the callback + * has run we just return zero. + */ + if (!reg->kaddr) return 0; - } return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time)); } -static int stolen_time_dying_cpu(unsigned int cpu) +static int stolen_time_cpu_down_prepare(unsigned int cpu) { struct pv_time_stolen_time_region *reg; @@ -73,7 +76,7 @@ static int stolen_time_dying_cpu(unsigned int cpu) return 0; } -static int init_stolen_time_cpu(unsigned int cpu) +static int stolen_time_cpu_online(unsigned int cpu) { struct pv_time_stolen_time_region *reg; struct arm_smccc_res res; @@ -103,19 +106,20 @@ static int init_stolen_time_cpu(unsigned int cpu) return 0; } -static int pv_time_init_stolen_time(void) +static int __init pv_time_init_stolen_time(void) { int ret; - ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING, - "hypervisor/arm/pvtime:starting", - init_stolen_time_cpu, stolen_time_dying_cpu); + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "hypervisor/arm/pvtime:online", + stolen_time_cpu_online, + stolen_time_cpu_down_prepare); if (ret < 0) return ret; return 0; } -static bool has_pv_steal_clock(void) +static bool __init has_pv_steal_clock(void) { struct arm_smccc_res res; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 3215023d4852..bf9181cef444 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -142,7 +142,6 @@ enum cpuhp_state { /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, - CPUHP_AP_ARM_KVMPV_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING,