From 099eae11a8beb13e17f6b9371c7c090c61bc061d Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 10 Dec 2014 10:23:59 +0100 Subject: [PATCH 1/9] s390/zcrypt: kernel oops at insmod of the z90crypt device driver Kernel oops caused by invalid parameter at TAPQ instruction: On older systems where the QCI instruction is not available all possible domains are probed via TAPQ instruction. The range for the probe has been extended with the > 16 domain support now leading to a possible specification exception when this instruction is called for probing higher values within the new range. This may happen during insmod and/or ap bus reset only on machines without a QCI instruction (z10, z196, z114), zEC12 and newer systems are not affected. The fix modifies the domain checking function to limit the allowed range if no QCI info is available. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 91e97ec01418..4d41bf75c233 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1163,9 +1163,13 @@ static inline int ap_test_config_card_id(unsigned int id) */ static inline int ap_test_config_domain(unsigned int domain) { - if (!ap_configuration) - return 1; - return ap_test_config(ap_configuration->aqm, domain); + if (!ap_configuration) /* QCI not supported */ + if (domain < 16) + return 1; /* then domains 0...15 are configured */ + else + return 0; + else + return ap_test_config(ap_configuration->aqm, domain); } /** From 032014bc04ae97fd0474f311259db7d669e47507 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 12 Dec 2014 10:44:20 +0100 Subject: [PATCH 2/9] s390/vtime: Get rid of redundant WARN_ON in the cpu time accounting function vtime_account_irq_enter (vtime_account_system) we use a WARN_ON_ONCE(!irqs_disabled()). This is redundant as the function virt_timer_forward is always called and has a BUG_ON(!irqs_disabled()). This saves several nanoseconds in my specific testcase (KVM entry/exit) and probably all other callers like (soft)irq entry/exit. Signed-off-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vtime.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 7f0089d9a4aa..e34122e539a1 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -128,8 +128,6 @@ void vtime_account_irq_enter(struct task_struct *tsk) struct thread_info *ti = task_thread_info(tsk); u64 timer, system; - WARN_ON_ONCE(!irqs_disabled()); - timer = S390_lowcore.last_update_timer; S390_lowcore.last_update_timer = get_vtimer(); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; From 81fc77fbfc1553aee35e33af89959be1d8c81db2 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 16 Dec 2014 10:25:37 +0100 Subject: [PATCH 3/9] s390/kernel: use stnsm 255 instead of stosm 0 On some models, stnsm 255 might be slightly faster than stosm 0. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/irqflags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h index 37b9091ab8c0..16aa0c779e07 100644 --- a/arch/s390/include/asm/irqflags.h +++ b/arch/s390/include/asm/irqflags.h @@ -36,7 +36,7 @@ static inline notrace void __arch_local_irq_ssm(unsigned long flags) static inline notrace unsigned long arch_local_save_flags(void) { - return __arch_local_irq_stosm(0x00); + return __arch_local_irq_stnsm(0xff); } static inline notrace unsigned long arch_local_irq_save(void) From 98590460d44f4dc2020e594c5ac3357c8278f45b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 18 Dec 2014 10:04:21 +0100 Subject: [PATCH 4/9] s390: wire up execveat syscall Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/unistd.h | 3 ++- arch/s390/kernel/syscalls.S | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 2b446cf0cc65..67878af257a0 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -289,7 +289,8 @@ #define __NR_bpf 351 #define __NR_s390_pci_mmio_write 352 #define __NR_s390_pci_mmio_read 353 -#define NR_syscalls 354 +#define __NR_execveat 354 +#define NR_syscalls 355 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index a2987243bc76..939ec474b1dd 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -362,3 +362,4 @@ SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */ SYSCALL(sys_bpf,sys_bpf,compat_sys_bpf) SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write) SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read) +SYSCALL(sys_execveat,sys_execveat,compat_sys_execveat) From e38f97813302065fbc9c9eab5c1a94dc021d71e2 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Thu, 1 Jan 2015 22:27:32 +0800 Subject: [PATCH 5/9] s390/timex: fix get_tod_clock_ext() inline assembly For C language, it treats array parameter as a pointer, so sizeof for an array parameter is equal to sizeof for a pointer, which causes compiler warning (with allmodconfig by gcc 5): ./arch/s390/include/asm/timex.h: In function 'get_tod_clock_ext': ./arch/s390/include/asm/timex.h:76:32: warning: 'sizeof' on array function parameter 'clk' will return size of 'char *' [-Wsizeof-array-argument] typedef struct { char _[sizeof(clk)]; } addrtype; ^ Can use macro CLOCK_STORE_SIZE instead of all related hard code numbers, which also can avoid this warning. And also add a tab to CLOCK_TICK_RATE definition to match coding styles. [heiko.carstens@de.ibm.com]: Chen's patch actually fixes a bug within the get_tod_clock_ext() inline assembly where we incorrectly tell the compiler that only 8 bytes of memory get changed instead of 16 bytes. This would allow gcc to generate incorrect code. Right now this doesn't seem to be the case. Also slightly changed the patch a bit. - renamed CLOCK_STORE_SIZE to STORE_CLOCK_EXT_SIZE - changed get_tod_clock_ext() to receive a char pointer parameter Signed-off-by: Chen Gang Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs_vm.c | 2 +- arch/s390/include/asm/timex.h | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 32040ace00ea..afbe07907c10 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -231,7 +231,7 @@ int hypfs_vm_create_files(struct dentry *root) struct dbfs_d2fc_hdr { u64 len; /* Length of d2fc buffer without header */ u16 version; /* Version of header */ - char tod_ext[16]; /* TOD clock for d2fc */ + char tod_ext[STORE_CLOCK_EXT_SIZE]; /* TOD clock for d2fc */ u64 count; /* Number of VM guests in d2fc buffer */ char reserved[30]; } __attribute__ ((packed)); diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 8beee1cceba4..98eb2a579223 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -67,20 +67,22 @@ static inline void local_tick_enable(unsigned long long comp) set_clock_comparator(S390_lowcore.clock_comparator); } -#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ +#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ +#define STORE_CLOCK_EXT_SIZE 16 /* stcke writes 16 bytes */ typedef unsigned long long cycles_t; -static inline void get_tod_clock_ext(char clk[16]) +static inline void get_tod_clock_ext(char *clk) { - typedef struct { char _[sizeof(clk)]; } addrtype; + typedef struct { char _[STORE_CLOCK_EXT_SIZE]; } addrtype; asm volatile("stcke %0" : "=Q" (*(addrtype *) clk) : : "cc"); } static inline unsigned long long get_tod_clock(void) { - unsigned char clk[16]; + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + get_tod_clock_ext(clk); return *((unsigned long long *)&clk[1]); } From fbc89c952f004fb9191c23605a1428df6dd39a90 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 7 Jan 2015 11:00:02 +0100 Subject: [PATCH 6/9] s390/mm: avoid using pmd_to_page for !USE_SPLIT_PMD_PTLOCKS pmd_to_page() is only available if USE_SPLIT_PMD_PTLOCKS is defined. The use of pmd_to_page in the gmap code can cause compile errors if NR_CPUS is smaller than SPLIT_PTLOCK_CPUS. Do not use pmd_to_page outside of USE_SPLIT_PMD_PTLOCKS sections. Reported-by: Mike Frysinger Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 71c7eff2c89f..601deb85d2a0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -322,11 +322,12 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, static unsigned long __gmap_segment_gaddr(unsigned long *entry) { struct page *page; - unsigned long offset; + unsigned long offset, mask; offset = (unsigned long) entry / sizeof(unsigned long); offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - page = pmd_to_page((pmd_t *) entry); + mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); + page = virt_to_page((void *)((unsigned long) entry & mask)); return page->index + offset; } From df3eed3d282f2fe1ffb73d3545fcde4e9b80a0d3 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 8 Jan 2015 14:36:21 +0100 Subject: [PATCH 7/9] s390/bpf: Fix ALU_NEG (A = -A) Currently the LOAD NEGATIVE (lnr) instruction is used for ALU_NEG. This instruction always loads the negative value. Therefore, if A is already negative, it remains unchanged. To fix this use LOAD COMPLEMENT (lcr) instead. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/net/bpf_jit_comp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index c52ac77408ca..8bc474fb52fd 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -431,8 +431,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, EMIT4_DISP(0x88500000, K); break; case BPF_ALU | BPF_NEG: /* A = -A */ - /* lnr %r5,%r5 */ - EMIT2(0x1155); + /* lcr %r5,%r5 */ + EMIT2(0x1355); break; case BPF_JMP | BPF_JA: /* ip += K */ offset = addrs[i + K] + jit->start - jit->prg; From ae750974591bb9431b1f84b1323dc2fb7d8fe360 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 8 Jan 2015 14:46:18 +0100 Subject: [PATCH 8/9] s390/bpf: Fix JMP_JGE_X (A > X) and JMP_JGT_X (A >= X) Currently the signed COMPARE (cr) instruction is used to compare "A" with "X". This is not correct because "A" and "X" are both unsigned. To fix this use the unsigned COMPARE LOGICAL (clr) instruction instead. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/net/bpf_jit_comp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 8bc474fb52fd..524496d47ef5 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -502,8 +502,8 @@ branch: if (filter->jt == filter->jf) { xbranch: /* Emit compare if the branch targets are different */ if (filter->jt != filter->jf) { jit->seen |= SEEN_XREG; - /* cr %r5,%r12 */ - EMIT2(0x195c); + /* clr %r5,%r12 */ + EMIT2(0x155c); } goto branch; case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */ From 8d1a2427d8fc0cb26ee72dfad7ad1033420089a1 Mon Sep 17 00:00:00 2001 From: Jan Willeke Date: Thu, 8 Jan 2015 16:56:01 +0100 Subject: [PATCH 9/9] s390/uprobes: fix user space PER events If uprobes are single stepped for example with gdb, the behavior should now be correct. Before this patch, when gdb was single stepping a uprobe, the result was a SIGILL. When PER is active for any storage alteration and a uprobe is hit, a storage alteration event is indicated. These over indications are filterd out by gdb, if no change has happened within the observed area. Signed-off-by: Jan Willeke Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/uprobes.c | 69 +++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index f6b3cd056ec2..cc7328080b60 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -48,6 +48,30 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *tsk) return false; } +static int check_per_event(unsigned short cause, unsigned long control, + struct pt_regs *regs) +{ + if (!(regs->psw.mask & PSW_MASK_PER)) + return 0; + /* user space single step */ + if (control == 0) + return 1; + /* over indication for storage alteration */ + if ((control & 0x20200000) && (cause & 0x2000)) + return 1; + if (cause & 0x8000) { + /* all branches */ + if ((control & 0x80800000) == 0x80000000) + return 1; + /* branch into selected range */ + if (((control & 0x80800000) == 0x80800000) && + regs->psw.addr >= current->thread.per_user.start && + regs->psw.addr <= current->thread.per_user.end) + return 1; + } + return 0; +} + int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { int fixup = probe_get_fixup_type(auprobe->insn); @@ -71,9 +95,13 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) if (regs->psw.addr - utask->xol_vaddr == ilen) regs->psw.addr = utask->vaddr + ilen; } - /* If per tracing was active generate trap */ - if (regs->psw.mask & PSW_MASK_PER) - do_per_trap(regs); + if (check_per_event(current->thread.per_event.cause, + current->thread.per_user.control, regs)) { + /* fix per address */ + current->thread.per_event.address = utask->vaddr; + /* trigger per event */ + set_pt_regs_flag(regs, PIF_PER_TRAP); + } return 0; } @@ -106,6 +134,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) clear_thread_flag(TIF_UPROBE_SINGLESTEP); regs->int_code = auprobe->saved_int_code; regs->psw.addr = current->utask->vaddr; + current->thread.per_event.address = current->utask->vaddr; } unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline, @@ -146,17 +175,20 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len) __rc; \ }) -#define emu_store_ril(ptr, input) \ +#define emu_store_ril(regs, ptr, input) \ ({ \ unsigned int mask = sizeof(*(ptr)) - 1; \ + __typeof__(ptr) __ptr = (ptr); \ int __rc = 0; \ \ if (!test_facility(34)) \ __rc = EMU_ILLEGAL_OP; \ - else if ((u64 __force)ptr & mask) \ + else if ((u64 __force)__ptr & mask) \ __rc = EMU_SPECIFICATION; \ - else if (put_user(*(input), ptr)) \ + else if (put_user(*(input), __ptr)) \ __rc = EMU_ADDRESSING; \ + if (__rc == 0) \ + sim_stor_event(regs, __ptr, mask + 1); \ __rc; \ }) @@ -197,6 +229,25 @@ union split_register { s16 s16[4]; }; +/* + * If user per registers are setup to trace storage alterations and an + * emulated store took place on a fitting address a user trap is generated. + */ +static void sim_stor_event(struct pt_regs *regs, void *addr, int len) +{ + if (!(regs->psw.mask & PSW_MASK_PER)) + return; + if (!(current->thread.per_user.control & PER_EVENT_STORE)) + return; + if ((void *)current->thread.per_user.start > (addr + len)) + return; + if ((void *)current->thread.per_user.end < addr) + return; + current->thread.per_event.address = regs->psw.addr; + current->thread.per_event.cause = PER_EVENT_STORE >> 16; + set_pt_regs_flag(regs, PIF_PER_TRAP); +} + /* * pc relative instructions are emulated, since parameters may not be * accessible from the xol area due to range limitations. @@ -249,13 +300,13 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) rc = emu_load_ril((u32 __user *)uptr, &rx->u64); break; case 0x07: /* sthrl */ - rc = emu_store_ril((u16 __user *)uptr, &rx->u16[3]); + rc = emu_store_ril(regs, (u16 __user *)uptr, &rx->u16[3]); break; case 0x0b: /* stgrl */ - rc = emu_store_ril((u64 __user *)uptr, &rx->u64); + rc = emu_store_ril(regs, (u64 __user *)uptr, &rx->u64); break; case 0x0f: /* strl */ - rc = emu_store_ril((u32 __user *)uptr, &rx->u32[1]); + rc = emu_store_ril(regs, (u32 __user *)uptr, &rx->u32[1]); break; } break;