From 916cda1aa1b412d7cf2991c3af7479544942d121 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 26 Jan 2016 14:10:34 +0100 Subject: [PATCH] s390: add a system call for guarded storage This adds a new system call to enable the use of guarded storage for user space processes. The system call takes two arguments, a command and pointer to a guarded storage control block: s390_guarded_storage(int command, struct gs_cb *gs_cb); The second argument is relevant only for the GS_SET_BC_CB command. The commands in detail: 0 - GS_ENABLE Enable the guarded storage facility for the current task. The initial content of the guarded storage control block will be all zeros. After the enablement the user space code can use load-guarded-storage-controls instruction (LGSC) to load an arbitrary control block. While a task is enabled the kernel will save and restore the current content of the guarded storage registers on context switch. 1 - GS_DISABLE Disables the use of the guarded storage facility for the current task. The kernel will cease to save and restore the content of the guarded storage registers, the task specific content of these registers is lost. 2 - GS_SET_BC_CB Set a broadcast guarded storage control block. This is called per thread and stores a specific guarded storage control block in the task struct of the current task. This control block will be used for the broadcast event GS_BROADCAST. 3 - GS_CLEAR_BC_CB Clears the broadcast guarded storage control block. The guarded- storage control block is removed from the task struct that was established by GS_SET_BC_CB. 4 - GS_BROADCAST Sends a broadcast to all thread siblings of the current task. Every sibling that has established a broadcast guarded storage control block will load this control block and will be enabled for guarded storage. The broadcast guarded storage control block is used up, a second broadcast without a refresh of the stored control block with GS_SET_BC_CB will not have any effect. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 1 + arch/s390/include/asm/lowcore.h | 9 +- arch/s390/include/asm/nmi.h | 12 +- arch/s390/include/asm/processor.h | 5 + arch/s390/include/asm/setup.h | 2 + arch/s390/include/asm/switch_to.h | 3 + arch/s390/include/asm/thread_info.h | 12 +- arch/s390/include/uapi/asm/Kbuild | 1 + arch/s390/include/uapi/asm/guarded_storage.h | 77 +++++++++++ arch/s390/include/uapi/asm/unistd.h | 2 +- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/asm-offsets.c | 2 +- arch/s390/kernel/compat_wrapper.c | 1 + arch/s390/kernel/early.c | 2 + arch/s390/kernel/entry.S | 26 +++- arch/s390/kernel/entry.h | 2 + arch/s390/kernel/guarded_storage.c | 128 +++++++++++++++++++ arch/s390/kernel/machine_kexec.c | 13 +- arch/s390/kernel/nmi.c | 19 ++- arch/s390/kernel/process.c | 7 +- arch/s390/kernel/processor.c | 2 +- arch/s390/kernel/ptrace.c | 84 ++++++++++-- arch/s390/kernel/setup.c | 18 ++- arch/s390/kernel/smp.c | 43 ++++++- arch/s390/kernel/syscalls.S | 2 +- arch/s390/kvm/interrupt.c | 4 +- include/uapi/linux/elf.h | 1 + 27 files changed, 435 insertions(+), 45 deletions(-) create mode 100644 arch/s390/include/uapi/asm/guarded_storage.h create mode 100644 arch/s390/kernel/guarded_storage.c diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 1d48880b3cc1..e8f623041769 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -105,6 +105,7 @@ #define HWCAP_S390_VXRS 2048 #define HWCAP_S390_VXRS_BCD 4096 #define HWCAP_S390_VXRS_EXT 8192 +#define HWCAP_S390_GS 16384 /* Internal bits, not exposed via elf */ #define HWCAP_INT_SIE 1UL diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 61261e0e95c0..8a5b082797f8 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -157,8 +157,8 @@ struct lowcore { __u64 stfle_fac_list[32]; /* 0x0f00 */ __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ - /* Pointer to vector register save area */ - __u64 vector_save_area_addr; /* 0x11b0 */ + /* Pointer to the machine check extended save area */ + __u64 mcesad; /* 0x11b0 */ /* 64 bit extparam used for pfault/diag 250: defined by architecture */ __u64 ext_params2; /* 0x11B8 */ @@ -182,10 +182,7 @@ struct lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ - __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */ - - /* Software defined save area for vector registers */ - __u8 vector_save_area[1024]; /* 0x1c00 */ + __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ } __packed; #define S390_lowcore (*((struct lowcore *) 0)) diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index b75fd910386a..e3e8895f5d3e 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -58,7 +58,9 @@ union mci { u64 ie : 1; /* 32 indirect storage error */ u64 ar : 1; /* 33 access register validity */ u64 da : 1; /* 34 delayed access exception */ - u64 : 7; /* 35-41 */ + u64 : 1; /* 35 */ + u64 gs : 1; /* 36 guarded storage registers */ + u64 : 5; /* 37-41 */ u64 pr : 1; /* 42 tod programmable register validity */ u64 fc : 1; /* 43 fp control register validity */ u64 ap : 1; /* 44 ancillary report */ @@ -69,6 +71,14 @@ union mci { }; }; +#define MCESA_ORIGIN_MASK (~0x3ffUL) +#define MCESA_LC_MASK (0xfUL) + +struct mcesa { + u8 vector_save_area[1024]; + u8 guarded_storage_save_area[32]; +}; + struct pt_regs; extern void s390_handle_mcck(void); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index e4988710aa86..cc101f9371cb 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -135,6 +135,8 @@ struct thread_struct { struct list_head list; /* cpu runtime instrumentation */ struct runtime_instr_cb *ri_cb; + struct gs_cb *gs_cb; /* Current guarded storage cb */ + struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ /* * Warning: 'fpu' is dynamically-sized. It *MUST* be at @@ -215,6 +217,9 @@ void show_cacheinfo(struct seq_file *m); /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); +/* Free guarded storage control block for current */ +void exit_thread_gs(void); + /* * Return saved PC of a blocked thread. */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 30bdb5a027f3..383bd8358a8c 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -31,6 +31,7 @@ #define MACHINE_FLAG_VX _BITUL(13) #define MACHINE_FLAG_CAD _BITUL(14) #define MACHINE_FLAG_NX _BITUL(15) +#define MACHINE_FLAG_GS _BITUL(16) #define LPP_MAGIC _BITUL(31) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) @@ -70,6 +71,7 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) #define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD) #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) +#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) /* * Console mode. Override with conmode= diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 12d45f0cfdd9..f6c2b5814ab0 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -10,6 +10,7 @@ #include #include #include +#include extern struct task_struct *__switch_to(void *, void *); extern void update_cr_regs(struct task_struct *task); @@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs) save_fpu_regs(); \ save_access_regs(&prev->thread.acrs[0]); \ save_ri_cb(prev->thread.ri_cb); \ + save_gs_cb(prev->thread.gs_cb); \ } \ if (next->mm) { \ update_cr_regs(next); \ set_cpu_flag(CIF_FPU); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ + restore_gs_cb(next->thread.gs_cb); \ } \ prev = __switch_to(prev,next); \ } while (0) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index a5b54a445eb8..f36e6e2b73f0 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SYSCALL_TRACE 3 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ -#define TIF_SECCOMP 5 /* secure computing */ -#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_UPROBE 7 /* breakpointed or single-stepping */ +#define TIF_UPROBE 3 /* breakpointed or single-stepping */ +#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ +#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ +#define TIF_SECCOMP 10 /* secure computing */ +#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ @@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define _TIF_UPROBE _BITUL(TIF_UPROBE) #define _TIF_31BIT _BITUL(TIF_31BIT) #define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) +#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE) #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 6848ba5c1454..86b761e583e3 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -12,6 +12,7 @@ header-y += dasd.h header-y += debug.h header-y += errno.h header-y += fcntl.h +header-y += guarded_storage.h header-y += hypfs.h header-y += ioctl.h header-y += ioctls.h diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h new file mode 100644 index 000000000000..852850e8e17e --- /dev/null +++ b/arch/s390/include/uapi/asm/guarded_storage.h @@ -0,0 +1,77 @@ +#ifndef _GUARDED_STORAGE_H +#define _GUARDED_STORAGE_H + +#include + +struct gs_cb { + __u64 reserved; + __u64 gsd; + __u64 gssm; + __u64 gs_epl_a; +}; + +struct gs_epl { + __u8 pad1; + union { + __u8 gs_eam; + struct { + __u8 : 6; + __u8 e : 1; + __u8 b : 1; + }; + }; + union { + __u8 gs_eci; + struct { + __u8 tx : 1; + __u8 cx : 1; + __u8 : 5; + __u8 in : 1; + }; + }; + union { + __u8 gs_eai; + struct { + __u8 : 1; + __u8 t : 1; + __u8 as : 2; + __u8 ar : 4; + }; + }; + __u32 pad2; + __u64 gs_eha; + __u64 gs_eia; + __u64 gs_eoa; + __u64 gs_eir; + __u64 gs_era; +}; + +#define GS_ENABLE 0 +#define GS_DISABLE 1 +#define GS_SET_BC_CB 2 +#define GS_CLEAR_BC_CB 3 +#define GS_BROADCAST 4 + +static inline void load_gs_cb(struct gs_cb *gs_cb) +{ + asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb)); +} + +static inline void store_gs_cb(struct gs_cb *gs_cb) +{ + asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb)); +} + +static inline void save_gs_cb(struct gs_cb *gs_cb) +{ + if (gs_cb) + store_gs_cb(gs_cb); +} + +static inline void restore_gs_cb(struct gs_cb *gs_cb) +{ + if (gs_cb) + load_gs_cb(gs_cb); +} + +#endif /* _GUARDED_STORAGE_H */ diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 152de9b796e1..ea42290e7d51 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -313,7 +313,7 @@ #define __NR_copy_file_range 375 #define __NR_preadv2 376 #define __NR_pwritev2 377 -/* Number 378 is reserved for guarded storage */ +#define __NR_s390_guarded_storage 378 #define __NR_statx 379 #define NR_syscalls 380 diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 060ce548fe8b..aa5adbdaf200 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -57,7 +57,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o -obj-y += runtime_instr.o cache.o fpu.o dumpstack.o +obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o obj-y += entry.o reipl.o relocate_kernel.o extra-y += head.o head64.o vmlinux.lds diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index c4b3570ded5b..6bb29633e1f1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -175,7 +175,7 @@ int main(void) /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ - OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr); + OFFSET(__LC_MCESAD, lowcore, mcesad); OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index e89cc2e71db1..986642a3543b 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 4e65c79cc5f2..95298a41076f 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -358,6 +358,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_NX; __ctl_set_bit(0, 20); } + if (test_facility(133)) + S390_lowcore.machine_flags |= MACHINE_FLAG_GS; } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6a7d737d514c..fa8b8f28e08b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE) + _TIF_UPROBE | _TIF_GUARDED_STORAGE) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ @@ -332,6 +332,8 @@ ENTRY(system_call) TSTMSK __TI_flags(%r12),_TIF_UPROBE jo .Lsysc_uprobe_notify #endif + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lsysc_guarded_storage TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP jo .Lsysc_singlestep TSTMSK __TI_flags(%r12),_TIF_SIGPENDING @@ -408,6 +410,14 @@ ENTRY(system_call) jg uprobe_notify_resume #endif +# +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lsysc_guarded_storage: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,.Lsysc_return + jg gs_load_bc_cb + # # _PIF_PER_TRAP is set, call do_per_trap # @@ -663,6 +673,8 @@ ENTRY(io_int_handler) jo .Lio_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lio_notify_resume + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lio_guarded_storage TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lio_vxrs TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) @@ -696,6 +708,18 @@ ENTRY(io_int_handler) larl %r14,.Lio_return jg load_fpu_regs +# +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lio_guarded_storage: + # TRACE_IRQS_ON already done at .Lio_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,gs_load_bc_cb + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j .Lio_return + # # _TIF_NEED_RESCHED is set, call schedule # diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 33f901865326..dbf5f7e18246 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -74,12 +74,14 @@ long sys_sigreturn(void); long sys_s390_personality(unsigned int personality); long sys_s390_runtime_instr(int command, int signum); +long sys_s390_guarded_storage(int command, struct gs_cb __user *); long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); DECLARE_PER_CPU(u64, mt_cycles[8]); void verify_facilities(void); +void gs_load_bc_cb(struct pt_regs *regs); void set_fs_fixup(void); #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c new file mode 100644 index 000000000000..6f064745c3b1 --- /dev/null +++ b/arch/s390/kernel/guarded_storage.c @@ -0,0 +1,128 @@ +/* + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include +#include +#include "entry.h" + +void exit_thread_gs(void) +{ + kfree(current->thread.gs_cb); + kfree(current->thread.gs_bc_cb); + current->thread.gs_cb = current->thread.gs_bc_cb = NULL; +} + +static int gs_enable(void) +{ + struct gs_cb *gs_cb; + + if (!current->thread.gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + gs_cb->gsd = 25; + preempt_disable(); + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + preempt_enable(); + } + return 0; +} + +static int gs_disable(void) +{ + if (current->thread.gs_cb) { + preempt_disable(); + kfree(current->thread.gs_cb); + current->thread.gs_cb = NULL; + __ctl_clear_bit(2, 4); + preempt_enable(); + } + return 0; +} + +static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + if (!gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + current->thread.gs_bc_cb = gs_cb; + } + if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb))) + return -EFAULT; + return 0; +} + +static int gs_clear_bc_cb(void) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + current->thread.gs_bc_cb = NULL; + kfree(gs_cb); + return 0; +} + +void gs_load_bc_cb(struct pt_regs *regs) +{ + struct gs_cb *gs_cb; + + preempt_disable(); + clear_thread_flag(TIF_GUARDED_STORAGE); + gs_cb = current->thread.gs_bc_cb; + if (gs_cb) { + kfree(current->thread.gs_cb); + current->thread.gs_bc_cb = NULL; + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + } + preempt_enable(); +} + +static int gs_broadcast(void) +{ + struct task_struct *sibling; + + read_lock(&tasklist_lock); + for_each_thread(current, sibling) { + if (!sibling->thread.gs_bc_cb) + continue; + if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE)) + kick_process(sibling); + } + read_unlock(&tasklist_lock); + return 0; +} + +SYSCALL_DEFINE2(s390_guarded_storage, int, command, + struct gs_cb __user *, gs_cb) +{ + if (!MACHINE_HAS_GS) + return -EOPNOTSUPP; + switch (command) { + case GS_ENABLE: + return gs_enable(); + case GS_DISABLE: + return gs_disable(); + case GS_SET_BC_CB: + return gs_set_bc_cb(gs_cb); + case GS_CLEAR_BC_CB: + return gs_clear_bc_cb(); + case GS_BROADCAST: + return gs_broadcast(); + default: + return -EINVAL; + } +} diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3074c1d83829..db5658daf994 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -27,6 +27,7 @@ #include #include #include +#include typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); @@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image) */ static noinline void __machine_kdump(void *image) { + struct mcesa *mcesa; + unsigned long cr2_old, cr2_new; int this_cpu, cpu; lgr_info_log(); @@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image) continue; } /* Store status of the boot CPU */ + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (MACHINE_HAS_VX) - save_vx_regs((void *) &S390_lowcore.vector_save_area); + save_vx_regs((__vector128 *) mcesa->vector_save_area); + if (MACHINE_HAS_GS) { + __ctl_store(cr2_old, 2, 2); + cr2_new = cr2_old | (1UL << 4); + __ctl_load(cr2_new, 2, 2); + save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area); + __ctl_load(cr2_old, 2, 2); + } /* * To create a good backchain for this CPU in the dump store_status * is passed the address of a function. The address is saved into diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 9bf8327154ee..985589523970 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) int kill_task; u64 zero; void *fpt_save_area; + struct mcesa *mcesa; kill_task = 0; zero = 0; @@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) : : "Q" (S390_lowcore.fpt_creg_save_area)); } + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (!MACHINE_HAS_VX) { /* Validate floating point registers */ asm volatile( @@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode) " la 1,%0\n" " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ - : : "Q" (*(struct vx_array *) - &S390_lowcore.vector_save_area) : "1"); + : : "Q" (*(struct vx_array *) mcesa->vector_save_area) + : "1"); __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Validate access registers */ @@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode) */ kill_task = 1; } + /* Validate guarded storage registers */ + if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) { + if (!mci.gs) + /* + * Guarded storage register can't be restored and + * the current processes uses guarded storage. + * It has to be terminated. + */ + kill_task = 1; + else + load_gs_cb((struct gs_cb *) + mcesa->guarded_storage_save_area); + } /* * We don't even try to validate the TOD register, since we simply * can't write something sensible into that register. diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index f29e41c5e2ec..999d7154bbdc 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -73,8 +73,10 @@ extern void kernel_thread_starter(void); */ void exit_thread(struct task_struct *tsk) { - if (tsk == current) + if (tsk == current) { exit_thread_runtime_instr(); + exit_thread_gs(); + } } void flush_thread(void) @@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, /* Don't copy runtime instrumentation info */ p->thread.ri_cb = NULL; frame->childregs.psw.mask &= ~PSW_MASK_RI; + /* Don't copy guarded storage control block */ + p->thread.gs_cb = NULL; + p->thread.gs_bc_cb = NULL; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 928b929a6261..c73709869447 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -95,7 +95,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe" + "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs" }; static const char * const int_hwcap_str[] = { "sie" diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index c14df0a1ec3c..c933e255b5d5 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task) struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; struct per_regs old, new; + unsigned long cr0_old, cr0_new; + unsigned long cr2_old, cr2_new; + int cr0_changed, cr2_changed; + __ctl_store(cr0_old, 0, 0); + __ctl_store(cr2_old, 2, 2); + cr0_new = cr0_old; + cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ if (MACHINE_HAS_TE) { - unsigned long cr, cr_new; - - __ctl_store(cr, 0, 0); /* Set or clear transaction execution TXC bit 8. */ - cr_new = cr | (1UL << 55); + cr0_new |= (1UL << 55); if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new &= ~(1UL << 55); - if (cr_new != cr) - __ctl_load(cr_new, 0, 0); + cr0_new &= ~(1UL << 55); /* Set or clear transaction execution TDC bits 62 and 63. */ - __ctl_store(cr, 2, 2); - cr_new = cr & ~3UL; + cr2_new &= ~3UL; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr_new |= 1UL; + cr2_new |= 1UL; else - cr_new |= 2UL; + cr2_new |= 2UL; } - if (cr_new != cr) - __ctl_load(cr_new, 2, 2); } + /* Take care of enable/disable of guarded storage. */ + if (MACHINE_HAS_GS) { + cr2_new &= ~(1UL << 4); + if (task->thread.gs_cb) + cr2_new |= (1UL << 4); + } + /* Load control register 0/2 iff changed */ + cr0_changed = cr0_new != cr0_old; + cr2_changed = cr2_new != cr2_old; + if (cr0_changed) + __ctl_load(cr0_new, 0, 0); + if (cr2_changed) + __ctl_load(cr2_new, 2, 2); /* Copy user specified PER registers */ new.control = thread->per_user.control; new.start = thread->per_user.start; @@ -1137,6 +1149,36 @@ static int s390_system_call_set(struct task_struct *target, data, 0, sizeof(unsigned int)); } +static int s390_gs_cb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_cb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + static const struct user_regset s390_regsets[] = { { .core_note_type = NT_PRSTATUS, @@ -1194,6 +1236,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1422,6 +1472,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_regs_high_get, .set = s390_compat_regs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, }; static const struct user_regset_view user_s390_compat_view = { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 911dc0b49be0..3ae756c0db3d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -339,9 +339,15 @@ static void __init setup_lowcore(void) lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, MAX_FACILITY_BIT/8); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + unsigned long bits, size; + + bits = MACHINE_HAS_GS ? 11 : 10; + size = 1UL << bits; + lc->mcesad = (__u64) memblock_virt_alloc(size, size); + if (MACHINE_HAS_GS) + lc->mcesad |= bits; + } lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; lc->sync_enter_timer = S390_lowcore.sync_enter_timer; lc->async_enter_timer = S390_lowcore.async_enter_timer; @@ -779,6 +785,12 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_S390_VXRS_BCD; } + /* + * Guarded storage support HWCAP_S390_GS is bit 12. + */ + if (MACHINE_HAS_GS) + elf_hwcap |= HWCAP_S390_GS; + get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 47a973b5b4f1..286bcee800f4 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "entry.h" enum { @@ -78,6 +79,8 @@ struct pcpu { static u8 boot_core_type; static struct pcpu pcpu_devices[NR_CPUS]; +static struct kmem_cache *pcpu_mcesa_cache; + unsigned int smp_cpu_mt_shift; EXPORT_SYMBOL(smp_cpu_mt_shift); @@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, panic_stack; + unsigned long mcesa_origin, mcesa_bits; struct lowcore *lc; + mcesa_origin = mcesa_bits = 0; if (pcpu != &pcpu_devices[0]) { pcpu->lowcore = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); @@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) panic_stack = __get_free_page(GFP_KERNEL); if (!pcpu->lowcore || !panic_stack || !async_stack) goto out; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = (unsigned long) + kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL); + if (!mcesa_origin) + goto out; + mcesa_bits = MACHINE_HAS_GS ? 11 : 0; + } } else { async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK; } lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; + lc->mcesad = mcesa_origin | mcesa_bits; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; if (vdso_alloc_per_cpu(lc)) goto out; lowcore_ptr[cpu] = lc; @@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) return 0; out: if (pcpu != &pcpu_devices[0]) { + if (mcesa_origin) + kmem_cache_free(pcpu_mcesa_cache, + (void *) mcesa_origin); free_page(panic_stack); free_pages(async_stack, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -229,11 +244,17 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) static void pcpu_free_lowcore(struct pcpu *pcpu) { + unsigned long mcesa_origin; + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; vdso_free_per_cpu(pcpu->lowcore); if (pcpu == &pcpu_devices[0]) return; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin); + } free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -550,9 +571,11 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!MACHINE_HAS_VX) + if (!MACHINE_HAS_VX && !MACHINE_HAS_GS) return 0; - pa = __pa(pcpu->lowcore->vector_save_area_addr); + pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK); + if (MACHINE_HAS_GS) + pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; @@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void) void __init smp_prepare_cpus(unsigned int max_cpus) { + unsigned long size; + /* request the 0x1201 emergency signal external interrupt */ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); /* request the 0x1202 external call external interrupt */ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1202"); + /* create slab cache for the machine-check-extended-save-areas */ + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + size = 1UL << (MACHINE_HAS_GS ? 11 : 10); + pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas", + size, size, 0, NULL); + if (!pcpu_mcesa_cache) + panic("Couldn't create nmi save area cache"); + } } void __init smp_prepare_boot_cpu(void) diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 2659b5cfeddb..54fce7b065de 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2) SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ SYSCALL(sys_preadv2,compat_sys_preadv2) SYSCALL(sys_pwritev2,compat_sys_pwritev2) -NI_SYSCALL +SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */ SYSCALL(sys_statx,compat_sys_statx) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 0f8f14199734..169558dc7daf 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -420,8 +420,8 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, save_access_regs(vcpu->run->s.regs.acrs); /* Extended save area */ - rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr, - sizeof(unsigned long)); + rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr, + sizeof(unsigned long)); /* Only bits 0-53 are used for address formation */ ext_sa_addr &= ~0x3ffUL; if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b59ee077a596..8c6d3bdb9a00 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -409,6 +409,7 @@ typedef struct elf64_shdr { #define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ #define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ #define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ +#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */