Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar: "The biggest changes in this cycle were: - Revamp, simplify (and in some cases fix) Time Stamp Counter (TSC) primitives. (Andy Lutomirski) - Add new, comprehensible entry and exit handlers written in C. (Andy Lutomirski) - vm86 mode cleanups and fixes. (Brian Gerst) - 32-bit compat code cleanups. (Brian Gerst) The amount of simplification in low level assembly code is already palpable: arch/x86/entry/entry_32.S | 130 +---- arch/x86/entry/entry_64.S | 197 ++----- but more simplifications are planned. There's also the usual laudry mix of low level changes - see the changelog for details" * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (83 commits) x86/asm: Drop repeated macro of X86_EFLAGS_AC definition x86/asm/msr: Make wrmsrl() a function x86/asm/delay: Introduce an MWAITX-based delay with a configurable timer x86/asm: Add MONITORX/MWAITX instruction support x86/traps: Weaken context tracking entry assertions x86/asm/tsc: Add rdtscll() merge helper selftests/x86: Add syscall_nt selftest selftests/x86: Disable sigreturn_64 x86/vdso: Emit a GNU hash x86/entry: Remove do_notify_resume(), syscall_trace_leave(), and their TIF masks x86/entry/32: Migrate to C exit path x86/entry/32: Remove 32-bit syscall audit optimizations x86/vm86: Rename vm86->v86flags and v86mask x86/vm86: Rename vm86->vm86_info to user_vm86 x86/vm86: Clean up vm86.h includes x86/vm86: Move the vm86 IRQ definitions to vm86.h x86/vm86: Use the normal pt_regs area for vm86 x86/vm86: Eliminate 'struct kernel_vm86_struct' x86/vm86: Move fields from 'struct kernel_vm86_struct' to 'struct vm86' x86/vm86: Move vm86 fields out of 'thread_struct' ...
This commit is contained in:
commit
5778077d03
|
@ -22,7 +22,8 @@ extern int kmalloc_ok;
|
|||
extern unsigned long alloc_stack(int order, int atomic);
|
||||
extern void free_stack(unsigned long stack, int order);
|
||||
|
||||
extern int do_signal(void);
|
||||
struct pt_regs;
|
||||
extern void do_signal(struct pt_regs *regs);
|
||||
extern void interrupt_end(void);
|
||||
extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs);
|
||||
|
||||
|
|
|
@ -90,12 +90,14 @@ void *__switch_to(struct task_struct *from, struct task_struct *to)
|
|||
|
||||
void interrupt_end(void)
|
||||
{
|
||||
struct pt_regs *regs = ¤t->thread.regs;
|
||||
|
||||
if (need_resched())
|
||||
schedule();
|
||||
if (test_thread_flag(TIF_SIGPENDING))
|
||||
do_signal();
|
||||
do_signal(regs);
|
||||
if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
|
||||
tracehook_notify_resume(¤t->thread.regs);
|
||||
tracehook_notify_resume(regs);
|
||||
}
|
||||
|
||||
void exit_thread(void)
|
||||
|
|
|
@ -64,7 +64,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
|||
signal_setup_done(err, ksig, singlestep);
|
||||
}
|
||||
|
||||
static int kern_do_signal(struct pt_regs *regs)
|
||||
void do_signal(struct pt_regs *regs)
|
||||
{
|
||||
struct ksignal ksig;
|
||||
int handled_sig = 0;
|
||||
|
@ -110,10 +110,4 @@ static int kern_do_signal(struct pt_regs *regs)
|
|||
*/
|
||||
if (!handled_sig)
|
||||
restore_saved_sigmask();
|
||||
return handled_sig;
|
||||
}
|
||||
|
||||
int do_signal(void)
|
||||
{
|
||||
return kern_do_signal(¤t->thread.regs);
|
||||
}
|
||||
|
|
|
@ -291,7 +291,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
|
|||
/* We are under mmap_sem, release it such that current can terminate */
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
force_sig(SIGKILL, current);
|
||||
do_signal();
|
||||
do_signal(¤t->thread.regs);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -173,7 +173,7 @@ static void bad_segv(struct faultinfo fi, unsigned long ip)
|
|||
void fatal_sigsegv(void)
|
||||
{
|
||||
force_sigsegv(SIGSEGV, current);
|
||||
do_signal();
|
||||
do_signal(¤t->thread.regs);
|
||||
/*
|
||||
* This is to tell gcc that we're not returning - do_signal
|
||||
* can, in general, return, but in this case, it's not, since
|
||||
|
|
|
@ -133,7 +133,7 @@ config X86
|
|||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_UID16 if X86_32
|
||||
select HAVE_UID16 if X86_32 || IA32_EMULATION
|
||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||
select HAVE_USER_RETURN_NOTIFIER
|
||||
select IRQ_FORCED_THREADING
|
||||
|
@ -1003,19 +1003,41 @@ config X86_THERMAL_VECTOR
|
|||
def_bool y
|
||||
depends on X86_MCE_INTEL
|
||||
|
||||
config VM86
|
||||
bool "Enable VM86 support" if EXPERT
|
||||
default y
|
||||
config X86_LEGACY_VM86
|
||||
bool "Legacy VM86 support (obsolete)"
|
||||
default n
|
||||
depends on X86_32
|
||||
---help---
|
||||
This option is required by programs like DOSEMU to run
|
||||
16-bit real mode legacy code on x86 processors. It also may
|
||||
be needed by software like XFree86 to initialize some video
|
||||
cards via BIOS. Disabling this option saves about 6K.
|
||||
This option allows user programs to put the CPU into V8086
|
||||
mode, which is an 80286-era approximation of 16-bit real mode.
|
||||
|
||||
Some very old versions of X and/or vbetool require this option
|
||||
for user mode setting. Similarly, DOSEMU will use it if
|
||||
available to accelerate real mode DOS programs. However, any
|
||||
recent version of DOSEMU, X, or vbetool should be fully
|
||||
functional even without kernel VM86 support, as they will all
|
||||
fall back to (pretty well performing) software emulation.
|
||||
|
||||
Anything that works on a 64-bit kernel is unlikely to need
|
||||
this option, as 64-bit kernels don't, and can't, support V8086
|
||||
mode. This option is also unrelated to 16-bit protected mode
|
||||
and is not needed to run most 16-bit programs under Wine.
|
||||
|
||||
Enabling this option adds considerable attack surface to the
|
||||
kernel and slows down system calls and exception handling.
|
||||
|
||||
Unless you use very old userspace or need the last drop of
|
||||
performance in your real mode DOS games and can't use KVM,
|
||||
say N here.
|
||||
|
||||
config VM86
|
||||
bool
|
||||
default X86_LEGACY_VM86
|
||||
|
||||
config X86_16BIT
|
||||
bool "Enable support for 16-bit segments" if EXPERT
|
||||
default y
|
||||
depends on MODIFY_LDT_SYSCALL
|
||||
---help---
|
||||
This option is required by programs like Wine to run 16-bit
|
||||
protected mode legacy code on x86 processors. Disabling
|
||||
|
@ -1510,6 +1532,7 @@ config X86_RESERVE_LOW
|
|||
|
||||
config MATH_EMULATION
|
||||
bool
|
||||
depends on MODIFY_LDT_SYSCALL
|
||||
prompt "Math emulation" if X86_32
|
||||
---help---
|
||||
Linux can emulate a math coprocessor (used for floating point
|
||||
|
@ -2054,6 +2077,22 @@ config CMDLINE_OVERRIDE
|
|||
This is used to work around broken boot loaders. This should
|
||||
be set to 'N' under normal conditions.
|
||||
|
||||
config MODIFY_LDT_SYSCALL
|
||||
bool "Enable the LDT (local descriptor table)" if EXPERT
|
||||
default y
|
||||
---help---
|
||||
Linux can allow user programs to install a per-process x86
|
||||
Local Descriptor Table (LDT) using the modify_ldt(2) system
|
||||
call. This is required to run 16-bit or segmented code such as
|
||||
DOSEMU or some Wine programs. It is also used by some very old
|
||||
threading libraries.
|
||||
|
||||
Enabling this feature adds a small amount of overhead to
|
||||
context switches and increases the low-level kernel attack
|
||||
surface. Disabling it removes the modify_ldt(2) system call.
|
||||
|
||||
Saying 'N' here may make sense for embedded or server kernels.
|
||||
|
||||
source "kernel/livepatch/Kconfig"
|
||||
|
||||
endmenu
|
||||
|
@ -2523,7 +2562,7 @@ config IA32_EMULATION
|
|||
depends on X86_64
|
||||
select BINFMT_ELF
|
||||
select COMPAT_BINFMT_ELF
|
||||
select HAVE_UID16
|
||||
select ARCH_WANT_OLD_COMPAT_IPC
|
||||
---help---
|
||||
Include code to run legacy 32-bit programs under a
|
||||
64-bit kernel. You should likely turn this on, unless you're
|
||||
|
@ -2537,7 +2576,7 @@ config IA32_AOUT
|
|||
|
||||
config X86_X32
|
||||
bool "x32 ABI for 64-bit mode"
|
||||
depends on X86_64 && IA32_EMULATION
|
||||
depends on X86_64
|
||||
---help---
|
||||
Include code to run binaries for the x32 native 32-bit ABI
|
||||
for 64-bit processors. An x32 process gets access to the
|
||||
|
@ -2551,7 +2590,6 @@ config X86_X32
|
|||
config COMPAT
|
||||
def_bool y
|
||||
depends on IA32_EMULATION || X86_X32
|
||||
select ARCH_WANT_OLD_COMPAT_IPC
|
||||
|
||||
if COMPAT
|
||||
config COMPAT_FOR_U64_ALIGNMENT
|
||||
|
|
|
@ -39,6 +39,16 @@ ifdef CONFIG_X86_NEED_RELOCS
|
|||
LDFLAGS_vmlinux := --emit-relocs
|
||||
endif
|
||||
|
||||
#
|
||||
# Prevent GCC from generating any FP code by mistake.
|
||||
#
|
||||
# This must happen before we try the -mpreferred-stack-boundary, see:
|
||||
#
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
|
||||
#
|
||||
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
|
||||
|
||||
ifeq ($(CONFIG_X86_32),y)
|
||||
BITS := 32
|
||||
UTS_MACHINE := i386
|
||||
|
@ -167,9 +177,6 @@ KBUILD_CFLAGS += -pipe
|
|||
KBUILD_CFLAGS += -Wno-sign-compare
|
||||
#
|
||||
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
|
||||
# prevent gcc from generating any FP code by mistake
|
||||
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
|
||||
|
||||
KBUILD_CFLAGS += $(mflags-y)
|
||||
KBUILD_AFLAGS += $(mflags-y)
|
||||
|
|
|
@ -82,7 +82,7 @@ static unsigned long get_random_long(void)
|
|||
|
||||
if (has_cpuflag(X86_FEATURE_TSC)) {
|
||||
debug_putstr(" RDTSC");
|
||||
rdtscll(raw);
|
||||
raw = rdtsc();
|
||||
|
||||
random ^= raw;
|
||||
use_i8254 = false;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# Makefile for the x86 low level entry code
|
||||
#
|
||||
obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
|
||||
obj-y += common.o
|
||||
|
||||
obj-y += vdso/
|
||||
obj-y += vsyscall/
|
||||
|
|
|
@ -135,9 +135,6 @@ For 32-bit we have the following conventions - kernel is built with
|
|||
movq %rbp, 4*8+\offset(%rsp)
|
||||
movq %rbx, 5*8+\offset(%rsp)
|
||||
.endm
|
||||
.macro SAVE_EXTRA_REGS_RBP offset=0
|
||||
movq %rbp, 4*8+\offset(%rsp)
|
||||
.endm
|
||||
|
||||
.macro RESTORE_EXTRA_REGS offset=0
|
||||
movq 0*8+\offset(%rsp), %r15
|
||||
|
@ -193,12 +190,6 @@ For 32-bit we have the following conventions - kernel is built with
|
|||
.macro RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
RESTORE_C_REGS_HELPER 1,0,0,1,1
|
||||
.endm
|
||||
.macro RESTORE_RSI_RDI
|
||||
RESTORE_C_REGS_HELPER 0,0,0,0,0
|
||||
.endm
|
||||
.macro RESTORE_RSI_RDI_RDX
|
||||
RESTORE_C_REGS_HELPER 0,0,0,0,1
|
||||
.endm
|
||||
|
||||
.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
|
||||
subq $-(15*8+\addskip), %rsp
|
||||
|
|
|
@ -0,0 +1,318 @@
|
|||
/*
|
||||
* common.c - C code for kernel entry and exit
|
||||
* Copyright (c) 2015 Andrew Lutomirski
|
||||
* GPL v2
|
||||
*
|
||||
* Based on asm and ptrace code by many authors. The code here originated
|
||||
* in ptrace.c and signal.c.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/user-return-notifier.h>
|
||||
#include <linux/uprobes.h>
|
||||
|
||||
#include <asm/desc.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
/* Called on entry from user mode with IRQs off. */
|
||||
__visible void enter_from_user_mode(void)
|
||||
{
|
||||
CT_WARN_ON(ct_state() != CONTEXT_USER);
|
||||
user_exit();
|
||||
}
|
||||
#endif
|
||||
|
||||
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
if (arch == AUDIT_ARCH_X86_64) {
|
||||
audit_syscall_entry(regs->orig_ax, regs->di,
|
||||
regs->si, regs->dx, regs->r10);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
audit_syscall_entry(regs->orig_ax, regs->bx,
|
||||
regs->cx, regs->dx, regs->si);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We can return 0 to resume the syscall or anything else to go to phase
|
||||
* 2. If we resume the syscall, we need to put something appropriate in
|
||||
* regs->orig_ax.
|
||||
*
|
||||
* NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
|
||||
* are fully functional.
|
||||
*
|
||||
* For phase 2's benefit, our return value is:
|
||||
* 0: resume the syscall
|
||||
* 1: go to phase 2; no seccomp phase 2 needed
|
||||
* anything else: go to phase 2; pass return value to seccomp
|
||||
*/
|
||||
unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
|
||||
{
|
||||
unsigned long ret = 0;
|
||||
u32 work;
|
||||
|
||||
BUG_ON(regs != task_pt_regs(current));
|
||||
|
||||
work = ACCESS_ONCE(current_thread_info()->flags) &
|
||||
_TIF_WORK_SYSCALL_ENTRY;
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
/*
|
||||
* If TIF_NOHZ is set, we are required to call user_exit() before
|
||||
* doing anything that could touch RCU.
|
||||
*/
|
||||
if (work & _TIF_NOHZ) {
|
||||
enter_from_user_mode();
|
||||
work &= ~_TIF_NOHZ;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Do seccomp first -- it should minimize exposure of other
|
||||
* code, and keeping seccomp fast is probably more valuable
|
||||
* than the rest of this.
|
||||
*/
|
||||
if (work & _TIF_SECCOMP) {
|
||||
struct seccomp_data sd;
|
||||
|
||||
sd.arch = arch;
|
||||
sd.nr = regs->orig_ax;
|
||||
sd.instruction_pointer = regs->ip;
|
||||
#ifdef CONFIG_X86_64
|
||||
if (arch == AUDIT_ARCH_X86_64) {
|
||||
sd.args[0] = regs->di;
|
||||
sd.args[1] = regs->si;
|
||||
sd.args[2] = regs->dx;
|
||||
sd.args[3] = regs->r10;
|
||||
sd.args[4] = regs->r8;
|
||||
sd.args[5] = regs->r9;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
sd.args[0] = regs->bx;
|
||||
sd.args[1] = regs->cx;
|
||||
sd.args[2] = regs->dx;
|
||||
sd.args[3] = regs->si;
|
||||
sd.args[4] = regs->di;
|
||||
sd.args[5] = regs->bp;
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
|
||||
BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
|
||||
|
||||
ret = seccomp_phase1(&sd);
|
||||
if (ret == SECCOMP_PHASE1_SKIP) {
|
||||
regs->orig_ax = -1;
|
||||
ret = 0;
|
||||
} else if (ret != SECCOMP_PHASE1_OK) {
|
||||
return ret; /* Go directly to phase 2 */
|
||||
}
|
||||
|
||||
work &= ~_TIF_SECCOMP;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Do our best to finish without phase 2. */
|
||||
if (work == 0)
|
||||
return ret; /* seccomp and/or nohz only (ret == 0 here) */
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
if (work == _TIF_SYSCALL_AUDIT) {
|
||||
/*
|
||||
* If there is no more work to be done except auditing,
|
||||
* then audit in phase 1. Phase 2 always audits, so, if
|
||||
* we audit here, then we can't go on to phase 2.
|
||||
*/
|
||||
do_audit_syscall_entry(regs, arch);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 1; /* Something is enabled that we can't handle in phase 1 */
|
||||
}
|
||||
|
||||
/* Returns the syscall nr to run (which should match regs->orig_ax). */
|
||||
long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
|
||||
unsigned long phase1_result)
|
||||
{
|
||||
long ret = 0;
|
||||
u32 work = ACCESS_ONCE(current_thread_info()->flags) &
|
||||
_TIF_WORK_SYSCALL_ENTRY;
|
||||
|
||||
BUG_ON(regs != task_pt_regs(current));
|
||||
|
||||
/*
|
||||
* If we stepped into a sysenter/syscall insn, it trapped in
|
||||
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
|
||||
* If user-mode had set TF itself, then it's still clear from
|
||||
* do_debug() and we need to set it again to restore the user
|
||||
* state. If we entered on the slow path, TF was already set.
|
||||
*/
|
||||
if (work & _TIF_SINGLESTEP)
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Call seccomp_phase2 before running the other hooks so that
|
||||
* they can see any changes made by a seccomp tracer.
|
||||
*/
|
||||
if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
|
||||
/* seccomp failures shouldn't expose any additional code. */
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(work & _TIF_SYSCALL_EMU))
|
||||
ret = -1L;
|
||||
|
||||
if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
|
||||
tracehook_report_syscall_entry(regs))
|
||||
ret = -1L;
|
||||
|
||||
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
|
||||
trace_sys_enter(regs, regs->orig_ax);
|
||||
|
||||
do_audit_syscall_entry(regs, arch);
|
||||
|
||||
return ret ?: regs->orig_ax;
|
||||
}
|
||||
|
||||
long syscall_trace_enter(struct pt_regs *regs)
|
||||
{
|
||||
u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
|
||||
unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
|
||||
|
||||
if (phase1_result == 0)
|
||||
return regs->orig_ax;
|
||||
else
|
||||
return syscall_trace_enter_phase2(regs, arch, phase1_result);
|
||||
}
|
||||
|
||||
static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long top_of_stack =
|
||||
(unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
|
||||
return (struct thread_info *)(top_of_stack - THREAD_SIZE);
|
||||
}
|
||||
|
||||
/* Called with IRQs disabled. */
|
||||
__visible void prepare_exit_to_usermode(struct pt_regs *regs)
|
||||
{
|
||||
if (WARN_ON(!irqs_disabled()))
|
||||
local_irq_disable();
|
||||
|
||||
/*
|
||||
* In order to return to user mode, we need to have IRQs off with
|
||||
* none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY,
|
||||
* _TIF_UPROBE, or _TIF_NEED_RESCHED set. Several of these flags
|
||||
* can be set at any time on preemptable kernels if we have IRQs on,
|
||||
* so we need to loop. Disabling preemption wouldn't help: doing the
|
||||
* work to clear some of the flags can sleep.
|
||||
*/
|
||||
while (true) {
|
||||
u32 cached_flags =
|
||||
READ_ONCE(pt_regs_to_thread_info(regs)->flags);
|
||||
|
||||
if (!(cached_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME |
|
||||
_TIF_UPROBE | _TIF_NEED_RESCHED |
|
||||
_TIF_USER_RETURN_NOTIFY)))
|
||||
break;
|
||||
|
||||
/* We have work to do. */
|
||||
local_irq_enable();
|
||||
|
||||
if (cached_flags & _TIF_NEED_RESCHED)
|
||||
schedule();
|
||||
|
||||
if (cached_flags & _TIF_UPROBE)
|
||||
uprobe_notify_resume(regs);
|
||||
|
||||
/* deal with pending signal delivery */
|
||||
if (cached_flags & _TIF_SIGPENDING)
|
||||
do_signal(regs);
|
||||
|
||||
if (cached_flags & _TIF_NOTIFY_RESUME) {
|
||||
clear_thread_flag(TIF_NOTIFY_RESUME);
|
||||
tracehook_notify_resume(regs);
|
||||
}
|
||||
|
||||
if (cached_flags & _TIF_USER_RETURN_NOTIFY)
|
||||
fire_user_return_notifiers();
|
||||
|
||||
/* Disable IRQs and retry */
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
user_enter();
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with IRQs on and fully valid regs. Returns with IRQs off in a
|
||||
* state such that we can immediately switch to user mode.
|
||||
*/
|
||||
__visible void syscall_return_slowpath(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
u32 cached_flags = READ_ONCE(ti->flags);
|
||||
bool step;
|
||||
|
||||
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
|
||||
|
||||
if (WARN(irqs_disabled(), "syscall %ld left IRQs disabled",
|
||||
regs->orig_ax))
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* First do one-time work. If these work items are enabled, we
|
||||
* want to run them exactly once per syscall exit with IRQs on.
|
||||
*/
|
||||
if (cached_flags & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |
|
||||
_TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)) {
|
||||
audit_syscall_exit(regs);
|
||||
|
||||
if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
|
||||
trace_sys_exit(regs, regs->ax);
|
||||
|
||||
/*
|
||||
* If TIF_SYSCALL_EMU is set, we only get here because of
|
||||
* TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
|
||||
* We already reported this syscall instruction in
|
||||
* syscall_trace_enter().
|
||||
*/
|
||||
step = unlikely(
|
||||
(cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
|
||||
== _TIF_SINGLESTEP);
|
||||
if (step || cached_flags & _TIF_SYSCALL_TRACE)
|
||||
tracehook_report_syscall_exit(regs, step);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
/*
|
||||
* Compat syscalls set TS_COMPAT. Make sure we clear it before
|
||||
* returning to user mode.
|
||||
*/
|
||||
ti->status &= ~TS_COMPAT;
|
||||
#endif
|
||||
|
||||
local_irq_disable();
|
||||
prepare_exit_to_usermode(regs);
|
||||
}
|
|
@ -45,16 +45,6 @@
|
|||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
|
||||
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
|
||||
#include <linux/elf-em.h>
|
||||
#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
|
||||
#define __AUDIT_ARCH_LE 0x40000000
|
||||
|
||||
#ifndef CONFIG_AUDITSYSCALL
|
||||
# define sysenter_audit syscall_trace_entry
|
||||
# define sysexit_audit syscall_exit_work
|
||||
#endif
|
||||
|
||||
.section .entry.text, "ax"
|
||||
|
||||
/*
|
||||
|
@ -266,14 +256,10 @@ ret_from_intr:
|
|||
|
||||
ENTRY(resume_userspace)
|
||||
LOCKDEP_SYS_EXIT
|
||||
DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
|
||||
# setting need_resched or sigpending
|
||||
# between sampling and the iret
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF
|
||||
movl TI_flags(%ebp), %ecx
|
||||
andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
|
||||
# int/exception return?
|
||||
jne work_pending
|
||||
movl %esp, %eax
|
||||
call prepare_exit_to_usermode
|
||||
jmp restore_all
|
||||
END(ret_from_exception)
|
||||
|
||||
|
@ -339,7 +325,7 @@ sysenter_past_esp:
|
|||
GET_THREAD_INFO(%ebp)
|
||||
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
|
||||
jnz sysenter_audit
|
||||
jnz syscall_trace_entry
|
||||
sysenter_do_call:
|
||||
cmpl $(NR_syscalls), %eax
|
||||
jae sysenter_badsys
|
||||
|
@ -351,7 +337,7 @@ sysenter_after_call:
|
|||
TRACE_IRQS_OFF
|
||||
movl TI_flags(%ebp), %ecx
|
||||
testl $_TIF_ALLWORK_MASK, %ecx
|
||||
jnz sysexit_audit
|
||||
jnz syscall_exit_work_irqs_off
|
||||
sysenter_exit:
|
||||
/* if something modifies registers it must also disable sysexit */
|
||||
movl PT_EIP(%esp), %edx
|
||||
|
@ -362,40 +348,6 @@ sysenter_exit:
|
|||
PTGS_TO_GS
|
||||
ENABLE_INTERRUPTS_SYSEXIT
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
sysenter_audit:
|
||||
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), TI_flags(%ebp)
|
||||
jnz syscall_trace_entry
|
||||
/* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */
|
||||
movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */
|
||||
/* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */
|
||||
pushl PT_ESI(%esp) /* a3: 5th arg */
|
||||
pushl PT_EDX+4(%esp) /* a2: 4th arg */
|
||||
call __audit_syscall_entry
|
||||
popl %ecx /* get that remapped edx off the stack */
|
||||
popl %ecx /* get that remapped esi off the stack */
|
||||
movl PT_EAX(%esp), %eax /* reload syscall number */
|
||||
jmp sysenter_do_call
|
||||
|
||||
sysexit_audit:
|
||||
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
|
||||
jnz syscall_exit_work
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_ANY)
|
||||
movl %eax, %edx /* second arg, syscall return value */
|
||||
cmpl $-MAX_ERRNO, %eax /* is it an error ? */
|
||||
setbe %al /* 1 if so, 0 if not */
|
||||
movzbl %al, %eax /* zero-extend that */
|
||||
call __audit_syscall_exit
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF
|
||||
movl TI_flags(%ebp), %ecx
|
||||
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
|
||||
jnz syscall_exit_work
|
||||
movl PT_EAX(%esp), %eax /* reload syscall return value */
|
||||
jmp sysenter_exit
|
||||
#endif
|
||||
|
||||
.pushsection .fixup, "ax"
|
||||
2: movl $0, PT_FS(%esp)
|
||||
jmp 1b
|
||||
|
@ -421,13 +373,7 @@ syscall_after_call:
|
|||
movl %eax, PT_EAX(%esp) # store the return value
|
||||
syscall_exit:
|
||||
LOCKDEP_SYS_EXIT
|
||||
DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
|
||||
# setting need_resched or sigpending
|
||||
# between sampling and the iret
|
||||
TRACE_IRQS_OFF
|
||||
movl TI_flags(%ebp), %ecx
|
||||
testl $_TIF_ALLWORK_MASK, %ecx # current->work
|
||||
jnz syscall_exit_work
|
||||
jmp syscall_exit_work
|
||||
|
||||
restore_all:
|
||||
TRACE_IRQS_IRET
|
||||
|
@ -504,57 +450,6 @@ ldt_ss:
|
|||
#endif
|
||||
ENDPROC(entry_INT80_32)
|
||||
|
||||
# perform work that needs to be done immediately before resumption
|
||||
ALIGN
|
||||
work_pending:
|
||||
testb $_TIF_NEED_RESCHED, %cl
|
||||
jz work_notifysig
|
||||
work_resched:
|
||||
call schedule
|
||||
LOCKDEP_SYS_EXIT
|
||||
DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
|
||||
# setting need_resched or sigpending
|
||||
# between sampling and the iret
|
||||
TRACE_IRQS_OFF
|
||||
movl TI_flags(%ebp), %ecx
|
||||
andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
|
||||
# than syscall tracing?
|
||||
jz restore_all
|
||||
testb $_TIF_NEED_RESCHED, %cl
|
||||
jnz work_resched
|
||||
|
||||
work_notifysig: # deal with pending signals and
|
||||
# notify-resume requests
|
||||
#ifdef CONFIG_VM86
|
||||
testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
|
||||
movl %esp, %eax
|
||||
jnz work_notifysig_v86 # returning to kernel-space or
|
||||
# vm86-space
|
||||
1:
|
||||
#else
|
||||
movl %esp, %eax
|
||||
#endif
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
movb PT_CS(%esp), %bl
|
||||
andb $SEGMENT_RPL_MASK, %bl
|
||||
cmpb $USER_RPL, %bl
|
||||
jb resume_kernel
|
||||
xorl %edx, %edx
|
||||
call do_notify_resume
|
||||
jmp resume_userspace
|
||||
|
||||
#ifdef CONFIG_VM86
|
||||
ALIGN
|
||||
work_notifysig_v86:
|
||||
pushl %ecx # save ti_flags for do_notify_resume
|
||||
call save_v86_state # %eax contains pt_regs pointer
|
||||
popl %ecx
|
||||
movl %eax, %esp
|
||||
jmp 1b
|
||||
#endif
|
||||
END(work_pending)
|
||||
|
||||
# perform syscall exit tracing
|
||||
ALIGN
|
||||
syscall_trace_entry:
|
||||
|
@ -569,15 +464,14 @@ END(syscall_trace_entry)
|
|||
|
||||
# perform syscall exit tracing
|
||||
ALIGN
|
||||
syscall_exit_work:
|
||||
testl $_TIF_WORK_SYSCALL_EXIT, %ecx
|
||||
jz work_pending
|
||||
syscall_exit_work_irqs_off:
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
|
||||
# schedule() instead
|
||||
ENABLE_INTERRUPTS(CLBR_ANY)
|
||||
|
||||
syscall_exit_work:
|
||||
movl %esp, %eax
|
||||
call syscall_trace_leave
|
||||
jmp resume_userspace
|
||||
call syscall_return_slowpath
|
||||
jmp restore_all
|
||||
END(syscall_exit_work)
|
||||
|
||||
syscall_fault:
|
||||
|
|
|
@ -33,7 +33,6 @@
|
|||
#include <asm/paravirt.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/context_tracking.h>
|
||||
#include <asm/smap.h>
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <linux/err.h>
|
||||
|
@ -229,6 +228,11 @@ entry_SYSCALL_64_fastpath:
|
|||
*/
|
||||
USERGS_SYSRET64
|
||||
|
||||
GLOBAL(int_ret_from_sys_call_irqs_off)
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
jmp int_ret_from_sys_call
|
||||
|
||||
/* Do syscall entry tracing */
|
||||
tracesys:
|
||||
movq %rsp, %rdi
|
||||
|
@ -272,69 +276,11 @@ tracesys_phase2:
|
|||
* Has correct iret frame.
|
||||
*/
|
||||
GLOBAL(int_ret_from_sys_call)
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */
|
||||
TRACE_IRQS_OFF
|
||||
movl $_TIF_ALLWORK_MASK, %edi
|
||||
/* edi: mask to check */
|
||||
GLOBAL(int_with_check)
|
||||
LOCKDEP_SYS_EXIT_IRQ
|
||||
GET_THREAD_INFO(%rcx)
|
||||
movl TI_flags(%rcx), %edx
|
||||
andl %edi, %edx
|
||||
jnz int_careful
|
||||
andl $~TS_COMPAT, TI_status(%rcx)
|
||||
jmp syscall_return
|
||||
|
||||
/*
|
||||
* Either reschedule or signal or syscall exit tracking needed.
|
||||
* First do a reschedule test.
|
||||
* edx: work, edi: workmask
|
||||
*/
|
||||
int_careful:
|
||||
bt $TIF_NEED_RESCHED, %edx
|
||||
jnc int_very_careful
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
pushq %rdi
|
||||
SCHEDULE_USER
|
||||
popq %rdi
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
jmp int_with_check
|
||||
|
||||
/* handle signals and tracing -- both require a full pt_regs */
|
||||
int_very_careful:
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
SAVE_EXTRA_REGS
|
||||
/* Check for syscall exit trace */
|
||||
testl $_TIF_WORK_SYSCALL_EXIT, %edx
|
||||
jz int_signal
|
||||
pushq %rdi
|
||||
leaq 8(%rsp), %rdi /* &ptregs -> arg1 */
|
||||
call syscall_trace_leave
|
||||
popq %rdi
|
||||
andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU), %edi
|
||||
jmp int_restore_rest
|
||||
|
||||
int_signal:
|
||||
testl $_TIF_DO_NOTIFY_MASK, %edx
|
||||
jz 1f
|
||||
movq %rsp, %rdi /* &ptregs -> arg1 */
|
||||
xorl %esi, %esi /* oldset -> arg2 */
|
||||
call do_notify_resume
|
||||
1: movl $_TIF_WORK_MASK, %edi
|
||||
int_restore_rest:
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
RESTORE_EXTRA_REGS
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
jmp int_with_check
|
||||
|
||||
syscall_return:
|
||||
/* The IRETQ could re-enable interrupts: */
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_IRETQ
|
||||
TRACE_IRQS_IRETQ /* we're about to change IF */
|
||||
|
||||
/*
|
||||
* Try to use SYSRET instead of IRET if we're returning to
|
||||
|
@ -555,23 +501,22 @@ END(irq_entries_start)
|
|||
/* 0(%rsp): ~(interrupt number) */
|
||||
.macro interrupt func
|
||||
cld
|
||||
/*
|
||||
* Since nothing in interrupt handling code touches r12...r15 members
|
||||
* of "struct pt_regs", and since interrupts can nest, we can save
|
||||
* four stack slots and simultaneously provide
|
||||
* an unwind-friendly stack layout by saving "truncated" pt_regs
|
||||
* exactly up to rbp slot, without these members.
|
||||
*/
|
||||
ALLOC_PT_GPREGS_ON_STACK -RBP
|
||||
SAVE_C_REGS -RBP
|
||||
/* this goes to 0(%rsp) for unwinder, not for saving the value: */
|
||||
SAVE_EXTRA_REGS_RBP -RBP
|
||||
ALLOC_PT_GPREGS_ON_STACK
|
||||
SAVE_C_REGS
|
||||
SAVE_EXTRA_REGS
|
||||
|
||||
leaq -RBP(%rsp), %rdi /* arg1 for \func (pointer to pt_regs) */
|
||||
|
||||
testb $3, CS-RBP(%rsp)
|
||||
testb $3, CS(%rsp)
|
||||
jz 1f
|
||||
|
||||
/*
|
||||
* IRQ from user mode. Switch to kernel gsbase and inform context
|
||||
* tracking that we're in kernel mode.
|
||||
*/
|
||||
SWAPGS
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
call enter_from_user_mode
|
||||
#endif
|
||||
|
||||
1:
|
||||
/*
|
||||
* Save previous stack pointer, optionally switch to interrupt stack.
|
||||
|
@ -580,14 +525,14 @@ END(irq_entries_start)
|
|||
* a little cheaper to use a separate counter in the PDA (short of
|
||||
* moving irq_enter into assembly, which would be too much work)
|
||||
*/
|
||||
movq %rsp, %rsi
|
||||
movq %rsp, %rdi
|
||||
incl PER_CPU_VAR(irq_count)
|
||||
cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
|
||||
pushq %rsi
|
||||
pushq %rdi
|
||||
/* We entered an interrupt context - irqs are off: */
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
call \func
|
||||
call \func /* rdi points to pt_regs */
|
||||
.endm
|
||||
|
||||
/*
|
||||
|
@ -606,34 +551,19 @@ ret_from_intr:
|
|||
decl PER_CPU_VAR(irq_count)
|
||||
|
||||
/* Restore saved previous stack */
|
||||
popq %rsi
|
||||
/* return code expects complete pt_regs - adjust rsp accordingly: */
|
||||
leaq -RBP(%rsi), %rsp
|
||||
popq %rsp
|
||||
|
||||
testb $3, CS(%rsp)
|
||||
jz retint_kernel
|
||||
|
||||
/* Interrupt came from user space */
|
||||
retint_user:
|
||||
GET_THREAD_INFO(%rcx)
|
||||
|
||||
/* %rcx: thread info. Interrupts are off. */
|
||||
retint_with_reschedule:
|
||||
movl $_TIF_WORK_MASK, %edi
|
||||
retint_check:
|
||||
LOCKDEP_SYS_EXIT_IRQ
|
||||
movl TI_flags(%rcx), %edx
|
||||
andl %edi, %edx
|
||||
jnz retint_careful
|
||||
|
||||
retint_swapgs: /* return to user-space */
|
||||
/*
|
||||
* The iretq could re-enable interrupts:
|
||||
*/
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
GLOBAL(retint_user)
|
||||
mov %rsp,%rdi
|
||||
call prepare_exit_to_usermode
|
||||
TRACE_IRQS_IRETQ
|
||||
|
||||
SWAPGS
|
||||
jmp restore_c_regs_and_iret
|
||||
jmp restore_regs_and_iret
|
||||
|
||||
/* Returning to kernel space */
|
||||
retint_kernel:
|
||||
|
@ -657,6 +587,8 @@ retint_kernel:
|
|||
* At this label, code paths which return to kernel and to user,
|
||||
* which come from interrupts/exception and from syscalls, merge.
|
||||
*/
|
||||
restore_regs_and_iret:
|
||||
RESTORE_EXTRA_REGS
|
||||
restore_c_regs_and_iret:
|
||||
RESTORE_C_REGS
|
||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
|
@ -707,37 +639,6 @@ native_irq_return_ldt:
|
|||
popq %rax
|
||||
jmp native_irq_return_iret
|
||||
#endif
|
||||
|
||||
/* edi: workmask, edx: work */
|
||||
retint_careful:
|
||||
bt $TIF_NEED_RESCHED, %edx
|
||||
jnc retint_signal
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
pushq %rdi
|
||||
SCHEDULE_USER
|
||||
popq %rdi
|
||||
GET_THREAD_INFO(%rcx)
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
jmp retint_check
|
||||
|
||||
retint_signal:
|
||||
testl $_TIF_DO_NOTIFY_MASK, %edx
|
||||
jz retint_swapgs
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
SAVE_EXTRA_REGS
|
||||
movq $-1, ORIG_RAX(%rsp)
|
||||
xorl %esi, %esi /* oldset */
|
||||
movq %rsp, %rdi /* &pt_regs */
|
||||
call do_notify_resume
|
||||
RESTORE_EXTRA_REGS
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
GET_THREAD_INFO(%rcx)
|
||||
jmp retint_with_reschedule
|
||||
|
||||
END(common_interrupt)
|
||||
|
||||
/*
|
||||
|
@ -1143,12 +1044,22 @@ ENTRY(error_entry)
|
|||
SAVE_EXTRA_REGS 8
|
||||
xorl %ebx, %ebx
|
||||
testb $3, CS+8(%rsp)
|
||||
jz error_kernelspace
|
||||
jz .Lerror_kernelspace
|
||||
|
||||
/* We entered from user mode */
|
||||
.Lerror_entry_from_usermode_swapgs:
|
||||
/*
|
||||
* We entered from user mode or we're pretending to have entered
|
||||
* from user mode due to an IRET fault.
|
||||
*/
|
||||
SWAPGS
|
||||
|
||||
error_entry_done:
|
||||
.Lerror_entry_from_usermode_after_swapgs:
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
call enter_from_user_mode
|
||||
#endif
|
||||
|
||||
.Lerror_entry_done:
|
||||
|
||||
TRACE_IRQS_OFF
|
||||
ret
|
||||
|
||||
|
@ -1158,31 +1069,30 @@ error_entry_done:
|
|||
* truncated RIP for IRET exceptions returning to compat mode. Check
|
||||
* for these here too.
|
||||
*/
|
||||
error_kernelspace:
|
||||
.Lerror_kernelspace:
|
||||
incl %ebx
|
||||
leaq native_irq_return_iret(%rip), %rcx
|
||||
cmpq %rcx, RIP+8(%rsp)
|
||||
je error_bad_iret
|
||||
je .Lerror_bad_iret
|
||||
movl %ecx, %eax /* zero extend */
|
||||
cmpq %rax, RIP+8(%rsp)
|
||||
je bstep_iret
|
||||
je .Lbstep_iret
|
||||
cmpq $gs_change, RIP+8(%rsp)
|
||||
jne error_entry_done
|
||||
jne .Lerror_entry_done
|
||||
|
||||
/*
|
||||
* hack: gs_change can fail with user gsbase. If this happens, fix up
|
||||
* gsbase and proceed. We'll fix up the exception and land in
|
||||
* gs_change's error handler with kernel gsbase.
|
||||
*/
|
||||
SWAPGS
|
||||
jmp error_entry_done
|
||||
jmp .Lerror_entry_from_usermode_swapgs
|
||||
|
||||
bstep_iret:
|
||||
.Lbstep_iret:
|
||||
/* Fix truncated RIP */
|
||||
movq %rcx, RIP+8(%rsp)
|
||||
/* fall through */
|
||||
|
||||
error_bad_iret:
|
||||
.Lerror_bad_iret:
|
||||
/*
|
||||
* We came from an IRET to user mode, so we have user gsbase.
|
||||
* Switch to kernel gsbase:
|
||||
|
@ -1198,7 +1108,7 @@ error_bad_iret:
|
|||
call fixup_bad_iret
|
||||
mov %rax, %rsp
|
||||
decl %ebx
|
||||
jmp error_entry_done
|
||||
jmp .Lerror_entry_from_usermode_after_swapgs
|
||||
END(error_entry)
|
||||
|
||||
|
||||
|
@ -1209,7 +1119,6 @@ END(error_entry)
|
|||
*/
|
||||
ENTRY(error_exit)
|
||||
movl %ebx, %eax
|
||||
RESTORE_EXTRA_REGS
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
testl %eax, %eax
|
||||
|
|
|
@ -22,8 +22,8 @@
|
|||
#define __AUDIT_ARCH_LE 0x40000000
|
||||
|
||||
#ifndef CONFIG_AUDITSYSCALL
|
||||
# define sysexit_audit ia32_ret_from_sys_call
|
||||
# define sysretl_audit ia32_ret_from_sys_call
|
||||
# define sysexit_audit ia32_ret_from_sys_call_irqs_off
|
||||
# define sysretl_audit ia32_ret_from_sys_call_irqs_off
|
||||
#endif
|
||||
|
||||
.section .entry.text, "ax"
|
||||
|
@ -141,7 +141,8 @@ sysexit_from_sys_call:
|
|||
andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
movl RIP(%rsp), %ecx /* User %eip */
|
||||
movq RAX(%rsp), %rax
|
||||
RESTORE_RSI_RDI
|
||||
movl RSI(%rsp), %esi
|
||||
movl RDI(%rsp), %edi
|
||||
xorl %edx, %edx /* Do not leak kernel information */
|
||||
xorq %r8, %r8
|
||||
xorq %r9, %r9
|
||||
|
@ -209,10 +210,10 @@ sysexit_from_sys_call:
|
|||
.endm
|
||||
|
||||
.macro auditsys_exit exit
|
||||
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz ia32_ret_from_sys_call
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz ia32_ret_from_sys_call
|
||||
movl %eax, %esi /* second arg, syscall return value */
|
||||
cmpl $-MAX_ERRNO, %eax /* is it an error ? */
|
||||
jbe 1f
|
||||
|
@ -230,7 +231,7 @@ sysexit_from_sys_call:
|
|||
movq %rax, R10(%rsp)
|
||||
movq %rax, R9(%rsp)
|
||||
movq %rax, R8(%rsp)
|
||||
jmp int_with_check
|
||||
jmp int_ret_from_sys_call_irqs_off
|
||||
.endm
|
||||
|
||||
sysenter_auditsys:
|
||||
|
@ -365,7 +366,9 @@ cstar_dispatch:
|
|||
|
||||
sysretl_from_sys_call:
|
||||
andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
RESTORE_RSI_RDI_RDX
|
||||
movl RDX(%rsp), %edx
|
||||
movl RSI(%rsp), %esi
|
||||
movl RDI(%rsp), %edi
|
||||
movl RIP(%rsp), %ecx
|
||||
movl EFLAGS(%rsp), %r11d
|
||||
movq RAX(%rsp), %rax
|
||||
|
@ -430,8 +433,48 @@ cstar_tracesys:
|
|||
END(entry_SYSCALL_compat)
|
||||
|
||||
ia32_badarg:
|
||||
ASM_CLAC
|
||||
movq $-EFAULT, RAX(%rsp)
|
||||
/*
|
||||
* So far, we've entered kernel mode, set AC, turned on IRQs, and
|
||||
* saved C regs except r8-r11. We haven't done any of the other
|
||||
* standard entry work, though. We want to bail, but we shouldn't
|
||||
* treat this as a syscall entry since we don't even know what the
|
||||
* args are. Instead, treat this as a non-syscall entry, finish
|
||||
* the entry work, and immediately exit after setting AX = -EFAULT.
|
||||
*
|
||||
* We're really just being polite here. Killing the task outright
|
||||
* would be a reasonable action, too. Given that the only valid
|
||||
* way to have gotten here is through the vDSO, and we already know
|
||||
* that the stack pointer is bad, the task isn't going to survive
|
||||
* for long no matter what we do.
|
||||
*/
|
||||
|
||||
ASM_CLAC /* undo STAC */
|
||||
movq $-EFAULT, RAX(%rsp) /* return -EFAULT if possible */
|
||||
|
||||
/* Fill in the rest of pt_regs */
|
||||
xorl %eax, %eax
|
||||
movq %rax, R11(%rsp)
|
||||
movq %rax, R10(%rsp)
|
||||
movq %rax, R9(%rsp)
|
||||
movq %rax, R8(%rsp)
|
||||
SAVE_EXTRA_REGS
|
||||
|
||||
/* Turn IRQs back off. */
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
/* Now finish entering normal kernel mode. */
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
call enter_from_user_mode
|
||||
#endif
|
||||
|
||||
/* And exit again. */
|
||||
jmp retint_user
|
||||
|
||||
ia32_ret_from_sys_call_irqs_off:
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
||||
ia32_ret_from_sys_call:
|
||||
xorl %eax, %eax /* Do not leak kernel information */
|
||||
movq %rax, R11(%rsp)
|
||||
|
|
|
@ -365,3 +365,18 @@
|
|||
356 i386 memfd_create sys_memfd_create
|
||||
357 i386 bpf sys_bpf
|
||||
358 i386 execveat sys_execveat stub32_execveat
|
||||
359 i386 socket sys_socket
|
||||
360 i386 socketpair sys_socketpair
|
||||
361 i386 bind sys_bind
|
||||
362 i386 connect sys_connect
|
||||
363 i386 listen sys_listen
|
||||
364 i386 accept4 sys_accept4
|
||||
365 i386 getsockopt sys_getsockopt compat_sys_getsockopt
|
||||
366 i386 setsockopt sys_setsockopt compat_sys_setsockopt
|
||||
367 i386 getsockname sys_getsockname
|
||||
368 i386 getpeername sys_getpeername
|
||||
369 i386 sendto sys_sendto
|
||||
370 i386 sendmsg sys_sendmsg compat_sys_sendmsg
|
||||
371 i386 recvfrom sys_recvfrom compat_sys_recvfrom
|
||||
372 i386 recvmsg sys_recvmsg compat_sys_recvmsg
|
||||
373 i386 shutdown sys_shutdown
|
||||
|
|
|
@ -8,7 +8,7 @@ KASAN_SANITIZE := n
|
|||
VDSO64-$(CONFIG_X86_64) := y
|
||||
VDSOX32-$(CONFIG_X86_X32_ABI) := y
|
||||
VDSO32-$(CONFIG_X86_32) := y
|
||||
VDSO32-$(CONFIG_COMPAT) := y
|
||||
VDSO32-$(CONFIG_IA32_EMULATION) := y
|
||||
|
||||
# files to link into the vdso
|
||||
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
|
||||
|
@ -20,7 +20,7 @@ obj-y += vma.o
|
|||
vdso_img-$(VDSO64-y) += 64
|
||||
vdso_img-$(VDSOX32-y) += x32
|
||||
vdso_img-$(VDSO32-y) += 32-int80
|
||||
vdso_img-$(CONFIG_COMPAT) += 32-syscall
|
||||
vdso_img-$(CONFIG_IA32_EMULATION) += 32-syscall
|
||||
vdso_img-$(VDSO32-y) += 32-sysenter
|
||||
|
||||
obj-$(VDSO32-y) += vdso32-setup.o
|
||||
|
@ -126,7 +126,7 @@ $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
|
|||
# Build multiple 32-bit vDSO images to choose from at boot time.
|
||||
#
|
||||
vdso32.so-$(VDSO32-y) += int80
|
||||
vdso32.so-$(CONFIG_COMPAT) += syscall
|
||||
vdso32.so-$(CONFIG_IA32_EMULATION) += syscall
|
||||
vdso32.so-$(VDSO32-y) += sysenter
|
||||
|
||||
vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
|
||||
|
@ -175,7 +175,7 @@ quiet_cmd_vdso = VDSO $@
|
|||
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
|
||||
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
|
||||
|
||||
VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
|
||||
VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=both) \
|
||||
$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS)
|
||||
GCOV_PROFILE := n
|
||||
|
||||
|
|
|
@ -175,20 +175,8 @@ static notrace cycle_t vread_pvclock(int *mode)
|
|||
|
||||
notrace static cycle_t vread_tsc(void)
|
||||
{
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
|
||||
/*
|
||||
* Empirically, a fence (of type that depends on the CPU)
|
||||
* before rdtsc is enough to ensure that rdtsc is ordered
|
||||
* with respect to loads. The various CPU manuals are unclear
|
||||
* as to whether rdtsc can be reordered with later loads,
|
||||
* but no one has ever seen it happen.
|
||||
*/
|
||||
rdtsc_barrier();
|
||||
ret = (cycle_t)__native_read_tsc();
|
||||
|
||||
last = gtod->cycle_last;
|
||||
cycle_t ret = (cycle_t)rdtsc_ordered();
|
||||
u64 last = gtod->cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
|
|
@ -177,7 +177,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
|||
return ret;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
|
||||
static int load_vdso32(void)
|
||||
{
|
||||
int ret;
|
||||
|
@ -219,8 +219,11 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
|
|||
return map_vdso(&vdso_image_x32, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
return load_vdso32();
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
|
|
|
@ -290,7 +290,7 @@ static struct vm_area_struct gate_vma = {
|
|||
|
||||
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (!mm || mm->context.ia32_compat)
|
||||
return NULL;
|
||||
#endif
|
||||
|
|
|
@ -34,99 +34,6 @@
|
|||
#include <asm/sys_ia32.h>
|
||||
#include <asm/smap.h>
|
||||
|
||||
int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
|
||||
{
|
||||
int err = 0;
|
||||
bool ia32 = test_thread_flag(TIF_IA32);
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
|
||||
return -EFAULT;
|
||||
|
||||
put_user_try {
|
||||
/* If you change siginfo_t structure, please make sure that
|
||||
this code is fixed accordingly.
|
||||
It should never copy any pad contained in the structure
|
||||
to avoid security leaks, but must copy the generic
|
||||
3 ints plus the relevant union member. */
|
||||
put_user_ex(from->si_signo, &to->si_signo);
|
||||
put_user_ex(from->si_errno, &to->si_errno);
|
||||
put_user_ex((short)from->si_code, &to->si_code);
|
||||
|
||||
if (from->si_code < 0) {
|
||||
put_user_ex(from->si_pid, &to->si_pid);
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr);
|
||||
} else {
|
||||
/*
|
||||
* First 32bits of unions are always present:
|
||||
* si_pid === si_band === si_tid === si_addr(LS half)
|
||||
*/
|
||||
put_user_ex(from->_sifields._pad[0],
|
||||
&to->_sifields._pad[0]);
|
||||
switch (from->si_code >> 16) {
|
||||
case __SI_FAULT >> 16:
|
||||
break;
|
||||
case __SI_SYS >> 16:
|
||||
put_user_ex(from->si_syscall, &to->si_syscall);
|
||||
put_user_ex(from->si_arch, &to->si_arch);
|
||||
break;
|
||||
case __SI_CHLD >> 16:
|
||||
if (ia32) {
|
||||
put_user_ex(from->si_utime, &to->si_utime);
|
||||
put_user_ex(from->si_stime, &to->si_stime);
|
||||
} else {
|
||||
put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime);
|
||||
put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime);
|
||||
}
|
||||
put_user_ex(from->si_status, &to->si_status);
|
||||
/* FALL THROUGH */
|
||||
default:
|
||||
case __SI_KILL >> 16:
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
break;
|
||||
case __SI_POLL >> 16:
|
||||
put_user_ex(from->si_fd, &to->si_fd);
|
||||
break;
|
||||
case __SI_TIMER >> 16:
|
||||
put_user_ex(from->si_overrun, &to->si_overrun);
|
||||
put_user_ex(ptr_to_compat(from->si_ptr),
|
||||
&to->si_ptr);
|
||||
break;
|
||||
/* This is not generated by the kernel as of now. */
|
||||
case __SI_RT >> 16:
|
||||
case __SI_MESGQ >> 16:
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
put_user_ex(from->si_int, &to->si_int);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} put_user_catch(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
|
||||
{
|
||||
int err = 0;
|
||||
u32 ptr32;
|
||||
|
||||
if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
|
||||
return -EFAULT;
|
||||
|
||||
get_user_try {
|
||||
get_user_ex(to->si_signo, &from->si_signo);
|
||||
get_user_ex(to->si_errno, &from->si_errno);
|
||||
get_user_ex(to->si_code, &from->si_code);
|
||||
|
||||
get_user_ex(to->si_pid, &from->si_pid);
|
||||
get_user_ex(to->si_uid, &from->si_uid);
|
||||
get_user_ex(ptr32, &from->si_ptr);
|
||||
to->si_ptr = compat_ptr(ptr32);
|
||||
} get_user_catch(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do a signal return; undo the signal stack.
|
||||
*/
|
||||
|
|
|
@ -91,15 +91,4 @@ do { \
|
|||
#define smp_mb__before_atomic() barrier()
|
||||
#define smp_mb__after_atomic() barrier()
|
||||
|
||||
/*
|
||||
* Stop RDTSC speculation. This is needed when you need to use RDTSC
|
||||
* (or get_cycles or vread that possibly accesses the TSC) in a defined
|
||||
* code region.
|
||||
*/
|
||||
static __always_inline void rdtsc_barrier(void)
|
||||
{
|
||||
alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
|
||||
"lfence", X86_FEATURE_LFENCE_RDTSC);
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_BARRIER_H */
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
#ifndef _ASM_X86_CONTEXT_TRACKING_H
|
||||
#define _ASM_X86_CONTEXT_TRACKING_H
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
# define SCHEDULE_USER call schedule_user
|
||||
#else
|
||||
# define SCHEDULE_USER call schedule
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -176,6 +176,7 @@
|
|||
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
|
||||
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
|
||||
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
|
||||
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
|
||||
|
||||
/*
|
||||
* Auxiliary flags: Linux defined - For features scattered in various
|
||||
|
|
|
@ -4,5 +4,6 @@
|
|||
#include <asm-generic/delay.h>
|
||||
|
||||
void use_tsc_delay(void);
|
||||
void use_mwaitx_delay(void);
|
||||
|
||||
#endif /* _ASM_X86_DELAY_H */
|
||||
|
|
|
@ -78,7 +78,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
|
|||
#ifdef CONFIG_X86_64
|
||||
extern unsigned int vdso64_enabled;
|
||||
#endif
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
|
||||
extern unsigned int vdso32_enabled;
|
||||
#endif
|
||||
|
||||
|
@ -187,8 +187,8 @@ static inline void elf_common_init(struct thread_struct *t,
|
|||
#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \
|
||||
elf_common_init(¤t->thread, regs, __USER_DS)
|
||||
|
||||
void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp);
|
||||
#define compat_start_thread start_thread_ia32
|
||||
void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp);
|
||||
#define compat_start_thread compat_start_thread
|
||||
|
||||
void set_personality_ia32(bool);
|
||||
#define COMPAT_SET_PERSONALITY(ex) \
|
||||
|
@ -344,14 +344,9 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
|
|||
*/
|
||||
static inline int mmap_is_ia32(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
return 1;
|
||||
#endif
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
if (test_thread_flag(TIF_ADDR32))
|
||||
return 1;
|
||||
#endif
|
||||
return 0;
|
||||
return config_enabled(CONFIG_X86_32) ||
|
||||
(config_enabled(CONFIG_COMPAT) &&
|
||||
test_thread_flag(TIF_ADDR32));
|
||||
}
|
||||
|
||||
/* Do not change the values. See get_align_mask() */
|
||||
|
|
|
@ -22,15 +22,6 @@ struct ucontext_ia32 {
|
|||
compat_sigset_t uc_sigmask; /* mask last for extensibility */
|
||||
};
|
||||
|
||||
struct ucontext_x32 {
|
||||
unsigned int uc_flags;
|
||||
unsigned int uc_link;
|
||||
compat_stack_t uc_stack;
|
||||
unsigned int uc__pad0; /* needed for alignment */
|
||||
struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */
|
||||
compat_sigset_t uc_sigmask; /* mask last for extensibility */
|
||||
};
|
||||
|
||||
/* This matches struct stat64 in glibc2.2, hence the absolutely
|
||||
* insane amounts of padding around dev_t's.
|
||||
*/
|
||||
|
|
|
@ -117,16 +117,6 @@
|
|||
|
||||
#define FPU_IRQ 13
|
||||
|
||||
#define FIRST_VM86_IRQ 3
|
||||
#define LAST_VM86_IRQ 15
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
static inline int invalid_vm86_irq(int irq)
|
||||
{
|
||||
return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Size the maximum number of interrupts.
|
||||
*
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
#define _ASM_X86_MATH_EMU_H
|
||||
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
/* This structure matches the layout of the data saved to the stack
|
||||
following a device-not-present interrupt, part of it saved
|
||||
|
@ -10,9 +9,6 @@
|
|||
*/
|
||||
struct math_emu_info {
|
||||
long ___orig_eip;
|
||||
union {
|
||||
struct pt_regs *regs;
|
||||
struct kernel_vm86_regs *vm86;
|
||||
};
|
||||
struct pt_regs *regs;
|
||||
};
|
||||
#endif /* _ASM_X86_MATH_EMU_H */
|
||||
|
|
|
@ -9,7 +9,9 @@
|
|||
* we put the segment information here.
|
||||
*/
|
||||
typedef struct {
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
struct ldt_struct *ldt;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* True if mm supports a task running in 32 bit compatibility mode. */
|
||||
|
|
|
@ -33,6 +33,7 @@ static inline void load_mm_cr4(struct mm_struct *mm)
|
|||
static inline void load_mm_cr4(struct mm_struct *mm) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
/*
|
||||
* ldt_structs can be allocated, used, and freed, but they are never
|
||||
* modified while live.
|
||||
|
@ -48,8 +49,23 @@ struct ldt_struct {
|
|||
int size;
|
||||
};
|
||||
|
||||
/*
|
||||
* Used for LDT copy/destruction.
|
||||
*/
|
||||
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
|
||||
void destroy_context(struct mm_struct *mm);
|
||||
#else /* CONFIG_MODIFY_LDT_SYSCALL */
|
||||
static inline int init_new_context(struct task_struct *tsk,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void destroy_context(struct mm_struct *mm) {}
|
||||
#endif
|
||||
|
||||
static inline void load_mm_ldt(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
struct ldt_struct *ldt;
|
||||
|
||||
/* lockless_dereference synchronizes with smp_store_release */
|
||||
|
@ -73,17 +89,13 @@ static inline void load_mm_ldt(struct mm_struct *mm)
|
|||
set_ldt(ldt->entries, ldt->size);
|
||||
else
|
||||
clear_LDT();
|
||||
#else
|
||||
clear_LDT();
|
||||
#endif
|
||||
|
||||
DEBUG_LOCKS_WARN_ON(preemptible());
|
||||
}
|
||||
|
||||
/*
|
||||
* Used for LDT copy/destruction.
|
||||
*/
|
||||
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
|
||||
void destroy_context(struct mm_struct *mm);
|
||||
|
||||
|
||||
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
|
@ -114,6 +126,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|||
/* Load per-mm CR4 state */
|
||||
load_mm_cr4(next);
|
||||
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
/*
|
||||
* Load the LDT, if the LDT is different.
|
||||
*
|
||||
|
@ -128,6 +141,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|||
*/
|
||||
if (unlikely(prev->context.ldt != next->context.ldt))
|
||||
load_mm_ldt(next);
|
||||
#endif
|
||||
}
|
||||
#ifdef CONFIG_SMP
|
||||
else {
|
||||
|
|
|
@ -47,14 +47,13 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
|
|||
* it means rax *or* rdx.
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
#define DECLARE_ARGS(val, low, high) unsigned low, high
|
||||
#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32))
|
||||
#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high)
|
||||
/* Using 64-bit values saves one instruction clearing the high half of low */
|
||||
#define DECLARE_ARGS(val, low, high) unsigned long low, high
|
||||
#define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32)
|
||||
#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
|
||||
#else
|
||||
#define DECLARE_ARGS(val, low, high) unsigned long long val
|
||||
#define EAX_EDX_VAL(val, low, high) (val)
|
||||
#define EAX_EDX_ARGS(val, low, high) "A" (val)
|
||||
#define EAX_EDX_RET(val, low, high) "=A" (val)
|
||||
#endif
|
||||
|
||||
|
@ -106,12 +105,19 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
|
|||
return err;
|
||||
}
|
||||
|
||||
extern unsigned long long native_read_tsc(void);
|
||||
|
||||
extern int rdmsr_safe_regs(u32 regs[8]);
|
||||
extern int wrmsr_safe_regs(u32 regs[8]);
|
||||
|
||||
static __always_inline unsigned long long __native_read_tsc(void)
|
||||
/**
|
||||
* rdtsc() - returns the current TSC without ordering constraints
|
||||
*
|
||||
* rdtsc() returns the result of RDTSC as a 64-bit integer. The
|
||||
* only ordering constraint it supplies is the ordering implied by
|
||||
* "asm volatile": it will put the RDTSC in the place you expect. The
|
||||
* CPU can and will speculatively execute that RDTSC, though, so the
|
||||
* results can be non-monotonic if compared on different CPUs.
|
||||
*/
|
||||
static __always_inline unsigned long long rdtsc(void)
|
||||
{
|
||||
DECLARE_ARGS(val, low, high);
|
||||
|
||||
|
@ -120,6 +126,35 @@ static __always_inline unsigned long long __native_read_tsc(void)
|
|||
return EAX_EDX_VAL(val, low, high);
|
||||
}
|
||||
|
||||
/**
|
||||
* rdtsc_ordered() - read the current TSC in program order
|
||||
*
|
||||
* rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
|
||||
* It is ordered like a load to a global in-memory counter. It should
|
||||
* be impossible to observe non-monotonic rdtsc_unordered() behavior
|
||||
* across multiple CPUs as long as the TSC is synced.
|
||||
*/
|
||||
static __always_inline unsigned long long rdtsc_ordered(void)
|
||||
{
|
||||
/*
|
||||
* The RDTSC instruction is not ordered relative to memory
|
||||
* access. The Intel SDM and the AMD APM are both vague on this
|
||||
* point, but empirically an RDTSC instruction can be
|
||||
* speculatively executed before prior loads. An RDTSC
|
||||
* immediately after an appropriate barrier appears to be
|
||||
* ordered as a normal load, that is, it provides the same
|
||||
* ordering guarantees as reading from a global memory location
|
||||
* that some other imaginary CPU is updating continuously with a
|
||||
* time stamp.
|
||||
*/
|
||||
alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
|
||||
"lfence", X86_FEATURE_LFENCE_RDTSC);
|
||||
return rdtsc();
|
||||
}
|
||||
|
||||
/* Deprecated, keep it for a cycle for easier merging: */
|
||||
#define rdtscll(now) do { (now) = rdtsc_ordered(); } while (0)
|
||||
|
||||
static inline unsigned long long native_read_pmc(int counter)
|
||||
{
|
||||
DECLARE_ARGS(val, low, high);
|
||||
|
@ -153,8 +188,10 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
|
|||
#define rdmsrl(msr, val) \
|
||||
((val) = native_read_msr((msr)))
|
||||
|
||||
#define wrmsrl(msr, val) \
|
||||
native_write_msr((msr), (u32)((u64)(val)), (u32)((u64)(val) >> 32))
|
||||
static inline void wrmsrl(unsigned msr, u64 val)
|
||||
{
|
||||
native_write_msr(msr, (u32)val, (u32)(val >> 32));
|
||||
}
|
||||
|
||||
/* wrmsr with exception handling */
|
||||
static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
|
||||
|
@ -180,12 +217,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
|
|||
return err;
|
||||
}
|
||||
|
||||
#define rdtscl(low) \
|
||||
((low) = (u32)__native_read_tsc())
|
||||
|
||||
#define rdtscll(val) \
|
||||
((val) = __native_read_tsc())
|
||||
|
||||
#define rdpmc(counter, low, high) \
|
||||
do { \
|
||||
u64 _l = native_read_pmc((counter)); \
|
||||
|
@ -195,15 +226,6 @@ do { \
|
|||
|
||||
#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
|
||||
|
||||
#define rdtscp(low, high, aux) \
|
||||
do { \
|
||||
unsigned long long _val = native_read_tscp(&(aux)); \
|
||||
(low) = (u32)_val; \
|
||||
(high) = (u32)(_val >> 32); \
|
||||
} while (0)
|
||||
|
||||
#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux))
|
||||
|
||||
#endif /* !CONFIG_PARAVIRT */
|
||||
|
||||
/*
|
||||
|
|
|
@ -14,6 +14,9 @@
|
|||
#define CPUID5_ECX_INTERRUPT_BREAK 0x2
|
||||
|
||||
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
|
||||
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
|
||||
#define MWAITX_MAX_LOOPS ((u32)-1)
|
||||
#define MWAITX_DISABLE_CSTATES 0xf
|
||||
|
||||
static inline void __monitor(const void *eax, unsigned long ecx,
|
||||
unsigned long edx)
|
||||
|
@ -23,6 +26,14 @@ static inline void __monitor(const void *eax, unsigned long ecx,
|
|||
:: "a" (eax), "c" (ecx), "d"(edx));
|
||||
}
|
||||
|
||||
static inline void __monitorx(const void *eax, unsigned long ecx,
|
||||
unsigned long edx)
|
||||
{
|
||||
/* "monitorx %eax, %ecx, %edx;" */
|
||||
asm volatile(".byte 0x0f, 0x01, 0xfa;"
|
||||
:: "a" (eax), "c" (ecx), "d"(edx));
|
||||
}
|
||||
|
||||
static inline void __mwait(unsigned long eax, unsigned long ecx)
|
||||
{
|
||||
/* "mwait %eax, %ecx;" */
|
||||
|
@ -30,6 +41,40 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
|
|||
:: "a" (eax), "c" (ecx));
|
||||
}
|
||||
|
||||
/*
|
||||
* MWAITX allows for a timer expiration to get the core out a wait state in
|
||||
* addition to the default MWAIT exit condition of a store appearing at a
|
||||
* monitored virtual address.
|
||||
*
|
||||
* Registers:
|
||||
*
|
||||
* MWAITX ECX[1]: enable timer if set
|
||||
* MWAITX EBX[31:0]: max wait time expressed in SW P0 clocks. The software P0
|
||||
* frequency is the same as the TSC frequency.
|
||||
*
|
||||
* Below is a comparison between MWAIT and MWAITX on AMD processors:
|
||||
*
|
||||
* MWAIT MWAITX
|
||||
* opcode 0f 01 c9 | 0f 01 fb
|
||||
* ECX[0] value of RFLAGS.IF seen by instruction
|
||||
* ECX[1] unused/#GP if set | enable timer if set
|
||||
* ECX[31:2] unused/#GP if set
|
||||
* EAX unused (reserve for hint)
|
||||
* EBX[31:0] unused | max wait time (P0 clocks)
|
||||
*
|
||||
* MONITOR MONITORX
|
||||
* opcode 0f 01 c8 | 0f 01 fa
|
||||
* EAX (logical) address to monitor
|
||||
* ECX #GP if not zero
|
||||
*/
|
||||
static inline void __mwaitx(unsigned long eax, unsigned long ebx,
|
||||
unsigned long ecx)
|
||||
{
|
||||
/* "mwaitx %eax, %ebx, %ecx;" */
|
||||
asm volatile(".byte 0x0f, 0x01, 0xfb;"
|
||||
:: "a" (eax), "b" (ebx), "c" (ecx));
|
||||
}
|
||||
|
||||
static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
|
||||
{
|
||||
trace_hardirqs_on();
|
||||
|
|
|
@ -153,7 +153,11 @@ do { \
|
|||
val = paravirt_read_msr(msr, &_err); \
|
||||
} while (0)
|
||||
|
||||
#define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32)
|
||||
static inline void wrmsrl(unsigned msr, u64 val)
|
||||
{
|
||||
wrmsr(msr, (u32)val, (u32)(val>>32));
|
||||
}
|
||||
|
||||
#define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b)
|
||||
|
||||
/* rdmsr with exception handling */
|
||||
|
@ -174,19 +178,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
|
|||
return err;
|
||||
}
|
||||
|
||||
static inline u64 paravirt_read_tsc(void)
|
||||
{
|
||||
return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
|
||||
}
|
||||
|
||||
#define rdtscl(low) \
|
||||
do { \
|
||||
u64 _l = paravirt_read_tsc(); \
|
||||
low = (int)_l; \
|
||||
} while (0)
|
||||
|
||||
#define rdtscll(val) (val = paravirt_read_tsc())
|
||||
|
||||
static inline unsigned long long paravirt_sched_clock(void)
|
||||
{
|
||||
return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
|
||||
|
@ -215,27 +206,6 @@ do { \
|
|||
|
||||
#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
|
||||
|
||||
static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
|
||||
{
|
||||
return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
|
||||
}
|
||||
|
||||
#define rdtscp(low, high, aux) \
|
||||
do { \
|
||||
int __aux; \
|
||||
unsigned long __val = paravirt_rdtscp(&__aux); \
|
||||
(low) = (u32)__val; \
|
||||
(high) = (u32)(__val >> 32); \
|
||||
(aux) = __aux; \
|
||||
} while (0)
|
||||
|
||||
#define rdtscpll(val, aux) \
|
||||
do { \
|
||||
unsigned long __aux; \
|
||||
val = paravirt_rdtscp(&__aux); \
|
||||
(aux) = __aux; \
|
||||
} while (0)
|
||||
|
||||
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
|
||||
{
|
||||
PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
|
||||
|
|
|
@ -156,9 +156,7 @@ struct pv_cpu_ops {
|
|||
u64 (*read_msr)(unsigned int msr, int *err);
|
||||
int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
|
||||
|
||||
u64 (*read_tsc)(void);
|
||||
u64 (*read_pmc)(int counter);
|
||||
unsigned long long (*read_tscp)(unsigned int *aux);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
/* Forward declaration, a strange C thing */
|
||||
struct task_struct;
|
||||
struct mm_struct;
|
||||
struct vm86;
|
||||
|
||||
#include <asm/vm86.h>
|
||||
#include <asm/math_emu.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/types.h>
|
||||
|
@ -400,15 +400,9 @@ struct thread_struct {
|
|||
unsigned long cr2;
|
||||
unsigned long trap_nr;
|
||||
unsigned long error_code;
|
||||
#ifdef CONFIG_X86_32
|
||||
#ifdef CONFIG_VM86
|
||||
/* Virtual 86 mode info */
|
||||
struct vm86_struct __user *vm86_info;
|
||||
unsigned long screen_bitmap;
|
||||
unsigned long v86flags;
|
||||
unsigned long v86mask;
|
||||
unsigned long saved_sp0;
|
||||
unsigned int saved_fs;
|
||||
unsigned int saved_gs;
|
||||
struct vm86 *vm86;
|
||||
#endif
|
||||
/* IO permissions: */
|
||||
unsigned long *io_bitmap_ptr;
|
||||
|
@ -720,7 +714,6 @@ static inline void spin_lock_prefetch(const void *x)
|
|||
|
||||
#define INIT_THREAD { \
|
||||
.sp0 = TOP_OF_INIT_STACK, \
|
||||
.vm86_info = NULL, \
|
||||
.sysenter_cs = __KERNEL_CS, \
|
||||
.io_bitmap_ptr = NULL, \
|
||||
}
|
||||
|
|
|
@ -88,7 +88,6 @@ extern long syscall_trace_enter_phase2(struct pt_regs *, u32 arch,
|
|||
unsigned long phase1_result);
|
||||
|
||||
extern long syscall_trace_enter(struct pt_regs *);
|
||||
extern void syscall_trace_leave(struct pt_regs *);
|
||||
|
||||
static inline unsigned long regs_return_value(struct pt_regs *regs)
|
||||
{
|
||||
|
|
|
@ -62,7 +62,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
|
|||
static __always_inline
|
||||
u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
|
||||
{
|
||||
u64 delta = __native_read_tsc() - src->tsc_timestamp;
|
||||
u64 delta = rdtsc_ordered() - src->tsc_timestamp;
|
||||
return pvclock_scale_delta(delta, src->tsc_to_system_mul,
|
||||
src->tsc_shift);
|
||||
}
|
||||
|
@ -76,13 +76,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
|
|||
u8 ret_flags;
|
||||
|
||||
version = src->version;
|
||||
/* Note: emulated platforms which do not advertise SSE2 support
|
||||
* result in kvmclock not using the necessary RDTSC barriers.
|
||||
* Without barriers, it is possible that RDTSC instruction reads from
|
||||
* the time stamp counter outside rdtsc_barrier protected section
|
||||
* below, resulting in violation of monotonicity.
|
||||
*/
|
||||
rdtsc_barrier();
|
||||
|
||||
offset = pvclock_get_nsec_offset(src);
|
||||
ret = src->system_time + offset;
|
||||
ret_flags = src->flags;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <asm/sigcontext.h>
|
||||
#include <asm/siginfo.h>
|
||||
#include <asm/ucontext.h>
|
||||
#include <linux/compat.h>
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
#define sigframe_ia32 sigframe
|
||||
|
@ -69,6 +70,15 @@ struct rt_sigframe {
|
|||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
|
||||
struct ucontext_x32 {
|
||||
unsigned int uc_flags;
|
||||
unsigned int uc_link;
|
||||
compat_stack_t uc_stack;
|
||||
unsigned int uc__pad0; /* needed for alignment */
|
||||
struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */
|
||||
compat_sigset_t uc_sigmask; /* mask last for extensibility */
|
||||
};
|
||||
|
||||
struct rt_sigframe_x32 {
|
||||
u64 pretcode;
|
||||
struct ucontext_x32 uc;
|
||||
|
|
|
@ -30,7 +30,7 @@ typedef sigset_t compat_sigset_t;
|
|||
#endif /* __ASSEMBLY__ */
|
||||
#include <uapi/asm/signal.h>
|
||||
#ifndef __ASSEMBLY__
|
||||
extern void do_notify_resume(struct pt_regs *, void *, __u32);
|
||||
extern void do_signal(struct pt_regs *regs);
|
||||
|
||||
#define __ARCH_HAS_SA_RESTORER
|
||||
|
||||
|
|
|
@ -72,7 +72,7 @@ static __always_inline void boot_init_stack_canary(void)
|
|||
* on during the bootup the random pool has true entropy too.
|
||||
*/
|
||||
get_random_bytes(&canary, sizeof(canary));
|
||||
tsc = __native_read_tsc();
|
||||
tsc = rdtsc();
|
||||
canary += tsc + (tsc << 32UL);
|
||||
|
||||
current->stack_canary = canary;
|
||||
|
|
|
@ -37,6 +37,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *);
|
|||
asmlinkage unsigned long sys_sigreturn(void);
|
||||
|
||||
/* kernel/vm86_32.c */
|
||||
struct vm86_struct;
|
||||
asmlinkage long sys_vm86old(struct vm86_struct __user *);
|
||||
asmlinkage long sys_vm86(unsigned long, unsigned long);
|
||||
|
||||
|
|
|
@ -27,14 +27,17 @@
|
|||
* Without this offset, that can result in a page fault. (We are
|
||||
* careful that, in this case, the value we read doesn't matter.)
|
||||
*
|
||||
* In vm86 mode, the hardware frame is much longer still, but we neither
|
||||
* access the extra members from NMI context, nor do we write such a
|
||||
* frame at sp0 at all.
|
||||
* In vm86 mode, the hardware frame is much longer still, so add 16
|
||||
* bytes to make room for the real-mode segments.
|
||||
*
|
||||
* x86_64 has a fixed-length stack frame.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
# define TOP_OF_KERNEL_STACK_PADDING 8
|
||||
# ifdef CONFIG_VM86
|
||||
# define TOP_OF_KERNEL_STACK_PADDING 16
|
||||
# else
|
||||
# define TOP_OF_KERNEL_STACK_PADDING 8
|
||||
# endif
|
||||
#else
|
||||
# define TOP_OF_KERNEL_STACK_PADDING 0
|
||||
#endif
|
||||
|
@ -140,27 +143,11 @@ struct thread_info {
|
|||
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
|
||||
_TIF_NOHZ)
|
||||
|
||||
/* work to do in syscall_trace_leave() */
|
||||
#define _TIF_WORK_SYSCALL_EXIT \
|
||||
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
|
||||
_TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
|
||||
|
||||
/* work to do on interrupt/exception return */
|
||||
#define _TIF_WORK_MASK \
|
||||
(0x0000FFFF & \
|
||||
~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \
|
||||
_TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
|
||||
|
||||
/* work to do on any return to user space */
|
||||
#define _TIF_ALLWORK_MASK \
|
||||
((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
|
||||
_TIF_NOHZ)
|
||||
|
||||
/* Only used for 64 bit */
|
||||
#define _TIF_DO_NOTIFY_MASK \
|
||||
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
|
||||
_TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
|
||||
|
||||
/* flags to check in __switch_to() */
|
||||
#define _TIF_WORK_CTXSW \
|
||||
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
|
||||
|
|
|
@ -112,8 +112,8 @@ asmlinkage void smp_threshold_interrupt(void);
|
|||
asmlinkage void smp_deferred_error_interrupt(void);
|
||||
#endif
|
||||
|
||||
extern enum ctx_state ist_enter(struct pt_regs *regs);
|
||||
extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state);
|
||||
extern void ist_enter(struct pt_regs *regs);
|
||||
extern void ist_exit(struct pt_regs *regs);
|
||||
extern void ist_begin_non_atomic(struct pt_regs *regs);
|
||||
extern void ist_end_non_atomic(void);
|
||||
|
||||
|
|
|
@ -21,28 +21,12 @@ extern void disable_TSC(void);
|
|||
|
||||
static inline cycles_t get_cycles(void)
|
||||
{
|
||||
unsigned long long ret = 0;
|
||||
|
||||
#ifndef CONFIG_X86_TSC
|
||||
if (!cpu_has_tsc)
|
||||
return 0;
|
||||
#endif
|
||||
rdtscll(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline cycles_t vget_cycles(void)
|
||||
{
|
||||
/*
|
||||
* We only do VDSOs on TSC capable CPUs, so this shouldn't
|
||||
* access boot_cpu_data (which is not VDSO-safe):
|
||||
*/
|
||||
#ifndef CONFIG_X86_TSC
|
||||
if (!cpu_has_tsc)
|
||||
return 0;
|
||||
#endif
|
||||
return (cycles_t)__native_read_tsc();
|
||||
return rdtsc();
|
||||
}
|
||||
|
||||
extern void tsc_init(void);
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#ifndef _ASM_X86_VM86_H
|
||||
#define _ASM_X86_VM86_H
|
||||
|
||||
|
||||
#include <asm/ptrace.h>
|
||||
#include <uapi/asm/vm86.h>
|
||||
|
||||
|
@ -28,43 +27,49 @@ struct kernel_vm86_regs {
|
|||
unsigned short gs, __gsh;
|
||||
};
|
||||
|
||||
struct kernel_vm86_struct {
|
||||
struct kernel_vm86_regs regs;
|
||||
/*
|
||||
* the below part remains on the kernel stack while we are in VM86 mode.
|
||||
* 'tss.esp0' then contains the address of VM86_TSS_ESP0 below, and when we
|
||||
* get forced back from VM86, the CPU and "SAVE_ALL" will restore the above
|
||||
* 'struct kernel_vm86_regs' with the then actual values.
|
||||
* Therefore, pt_regs in fact points to a complete 'kernel_vm86_struct'
|
||||
* in kernelspace, hence we need not reget the data from userspace.
|
||||
*/
|
||||
#define VM86_TSS_ESP0 flags
|
||||
struct vm86 {
|
||||
struct vm86plus_struct __user *user_vm86;
|
||||
struct pt_regs regs32;
|
||||
unsigned long veflags;
|
||||
unsigned long veflags_mask;
|
||||
unsigned long saved_sp0;
|
||||
|
||||
unsigned long flags;
|
||||
unsigned long screen_bitmap;
|
||||
unsigned long cpu_type;
|
||||
struct revectored_struct int_revectored;
|
||||
struct revectored_struct int21_revectored;
|
||||
struct vm86plus_info_struct vm86plus;
|
||||
struct pt_regs *regs32; /* here we save the pointer to the old regs */
|
||||
/*
|
||||
* The below is not part of the structure, but the stack layout continues
|
||||
* this way. In front of 'return-eip' may be some data, depending on
|
||||
* compilation, so we don't rely on this and save the pointer to 'oldregs'
|
||||
* in 'regs32' above.
|
||||
* However, with GCC-2.7.2 and the current CFLAGS you see exactly this:
|
||||
|
||||
long return-eip; from call to vm86()
|
||||
struct pt_regs oldregs; user space registers as saved by syscall
|
||||
*/
|
||||
};
|
||||
|
||||
#ifdef CONFIG_VM86
|
||||
|
||||
void handle_vm86_fault(struct kernel_vm86_regs *, long);
|
||||
int handle_vm86_trap(struct kernel_vm86_regs *, long, int);
|
||||
struct pt_regs *save_v86_state(struct kernel_vm86_regs *);
|
||||
void save_v86_state(struct kernel_vm86_regs *, int);
|
||||
|
||||
struct task_struct;
|
||||
|
||||
#define free_vm86(t) do { \
|
||||
struct thread_struct *__t = (t); \
|
||||
if (__t->vm86 != NULL) { \
|
||||
kfree(__t->vm86); \
|
||||
__t->vm86 = NULL; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Support for VM86 programs to request interrupts for
|
||||
* real mode hardware drivers:
|
||||
*/
|
||||
#define FIRST_VM86_IRQ 3
|
||||
#define LAST_VM86_IRQ 15
|
||||
|
||||
static inline int invalid_vm86_irq(int irq)
|
||||
{
|
||||
return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
|
||||
}
|
||||
|
||||
void release_vm86_irqs(struct task_struct *);
|
||||
|
||||
#else
|
||||
|
@ -77,6 +82,10 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { }
|
||||
|
||||
#define free_vm86(t) do { } while(0)
|
||||
|
||||
#endif /* CONFIG_VM86 */
|
||||
|
||||
#endif /* _ASM_X86_VM86_H */
|
||||
|
|
|
@ -37,8 +37,6 @@
|
|||
#define X86_EFLAGS_VM _BITUL(X86_EFLAGS_VM_BIT)
|
||||
#define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */
|
||||
#define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT)
|
||||
#define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */
|
||||
#define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT)
|
||||
#define X86_EFLAGS_VIF_BIT 19 /* Virtual Interrupt Flag */
|
||||
#define X86_EFLAGS_VIF _BITUL(X86_EFLAGS_VIF_BIT)
|
||||
#define X86_EFLAGS_VIP_BIT 20 /* Virtual Interrupt Pending */
|
||||
|
|
|
@ -23,8 +23,10 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
|||
CFLAGS_irq.o := -I$(src)/../include/asm/trace
|
||||
|
||||
obj-y := process_$(BITS).o signal.o
|
||||
obj-$(CONFIG_COMPAT) += signal_compat.o
|
||||
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
|
||||
obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
|
||||
obj-y += time.o ioport.o dumpstack.o nmi.o
|
||||
obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
|
||||
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
|
||||
obj-$(CONFIG_IRQ_WORK) += irq_work.o
|
||||
obj-y += probe_roms.o
|
||||
|
|
|
@ -263,7 +263,7 @@ static int apbt_clocksource_register(void)
|
|||
|
||||
/* Verify whether apbt counter works */
|
||||
t1 = dw_apb_clocksource_read(clocksource_apbt);
|
||||
rdtscll(start);
|
||||
start = rdtsc();
|
||||
|
||||
/*
|
||||
* We don't know the TSC frequency yet, but waiting for
|
||||
|
@ -273,7 +273,7 @@ static int apbt_clocksource_register(void)
|
|||
*/
|
||||
do {
|
||||
rep_nop();
|
||||
rdtscll(now);
|
||||
now = rdtsc();
|
||||
} while ((now - start) < 200000UL);
|
||||
|
||||
/* APBT is the only always on clocksource, it has to work! */
|
||||
|
@ -390,13 +390,13 @@ unsigned long apbt_quick_calibrate(void)
|
|||
old = dw_apb_clocksource_read(clocksource_apbt);
|
||||
old += loop;
|
||||
|
||||
t1 = __native_read_tsc();
|
||||
t1 = rdtsc();
|
||||
|
||||
do {
|
||||
new = dw_apb_clocksource_read(clocksource_apbt);
|
||||
} while (new < old);
|
||||
|
||||
t2 = __native_read_tsc();
|
||||
t2 = rdtsc();
|
||||
|
||||
shift = 5;
|
||||
if (unlikely(loop >> shift == 0)) {
|
||||
|
|
|
@ -457,7 +457,7 @@ static int lapic_next_deadline(unsigned long delta,
|
|||
{
|
||||
u64 tsc;
|
||||
|
||||
rdtscll(tsc);
|
||||
tsc = rdtsc();
|
||||
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
|
||||
return 0;
|
||||
}
|
||||
|
@ -592,7 +592,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
|
|||
unsigned long pm = acpi_pm_read_early();
|
||||
|
||||
if (cpu_has_tsc)
|
||||
rdtscll(tsc);
|
||||
tsc = rdtsc();
|
||||
|
||||
switch (lapic_cal_loops++) {
|
||||
case 0:
|
||||
|
@ -1209,7 +1209,7 @@ void setup_local_APIC(void)
|
|||
long long max_loops = cpu_khz ? cpu_khz : 1000000;
|
||||
|
||||
if (cpu_has_tsc)
|
||||
rdtscll(tsc);
|
||||
tsc = rdtsc();
|
||||
|
||||
if (disable_apic) {
|
||||
disable_ioapic_support();
|
||||
|
@ -1293,7 +1293,7 @@ void setup_local_APIC(void)
|
|||
}
|
||||
if (queued) {
|
||||
if (cpu_has_tsc && cpu_khz) {
|
||||
rdtscll(ntsc);
|
||||
ntsc = rdtsc();
|
||||
max_loops = (cpu_khz << 10) - (ntsc - tsc);
|
||||
} else
|
||||
max_loops--;
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <asm/cpu.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/delay.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# include <asm/mmconfig.h>
|
||||
|
@ -114,7 +115,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
|
|||
const int K6_BUG_LOOP = 1000000;
|
||||
int n;
|
||||
void (*f_vide)(void);
|
||||
unsigned long d, d2;
|
||||
u64 d, d2;
|
||||
|
||||
printk(KERN_INFO "AMD K6 stepping B detected - ");
|
||||
|
||||
|
@ -125,10 +126,10 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
|
|||
|
||||
n = K6_BUG_LOOP;
|
||||
f_vide = vide;
|
||||
rdtscl(d);
|
||||
d = rdtsc();
|
||||
while (n--)
|
||||
f_vide();
|
||||
rdtscl(d2);
|
||||
d2 = rdtsc();
|
||||
d = d2-d;
|
||||
|
||||
if (d > 20*K6_BUG_LOOP)
|
||||
|
@ -506,6 +507,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
|
|||
/* A random value per boot for bit slice [12:upper_bit) */
|
||||
va_align.bits = get_random_int() & va_align.mask;
|
||||
}
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_MWAITX))
|
||||
use_mwaitx_delay();
|
||||
}
|
||||
|
||||
static void early_init_amd(struct cpuinfo_x86 *c)
|
||||
|
|
|
@ -1185,10 +1185,10 @@ void syscall_init(void)
|
|||
* set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
|
||||
*/
|
||||
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
|
||||
wrmsrl(MSR_LSTAR, entry_SYSCALL_64);
|
||||
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
wrmsrl(MSR_CSTAR, entry_SYSCALL_compat);
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
|
||||
/*
|
||||
* This only works on Intel CPUs.
|
||||
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
|
||||
|
@ -1199,7 +1199,7 @@ void syscall_init(void)
|
|||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
|
||||
#else
|
||||
wrmsrl(MSR_CSTAR, ignore_sysret);
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
|
||||
|
|
|
@ -127,7 +127,7 @@ void mce_setup(struct mce *m)
|
|||
{
|
||||
memset(m, 0, sizeof(struct mce));
|
||||
m->cpu = m->extcpu = smp_processor_id();
|
||||
rdtscll(m->tsc);
|
||||
m->tsc = rdtsc();
|
||||
/* We hope get_seconds stays lockless */
|
||||
m->time = get_seconds();
|
||||
m->cpuvendor = boot_cpu_data.x86_vendor;
|
||||
|
@ -974,7 +974,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
{
|
||||
struct mca_config *cfg = &mca_cfg;
|
||||
struct mce m, *final;
|
||||
enum ctx_state prev_state;
|
||||
int i;
|
||||
int worst = 0;
|
||||
int severity;
|
||||
|
@ -1000,7 +999,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
int flags = MF_ACTION_REQUIRED;
|
||||
int lmce = 0;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
this_cpu_inc(mce_exception_count);
|
||||
|
||||
|
@ -1166,7 +1165,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
local_irq_disable();
|
||||
ist_end_non_atomic();
|
||||
done:
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(do_machine_check);
|
||||
|
||||
|
@ -1754,7 +1753,7 @@ static void collect_tscs(void *data)
|
|||
{
|
||||
unsigned long *cpu_tsc = (unsigned long *)data;
|
||||
|
||||
rdtscll(cpu_tsc[smp_processor_id()]);
|
||||
cpu_tsc[smp_processor_id()] = rdtsc();
|
||||
}
|
||||
|
||||
static int mce_apei_read_done;
|
||||
|
|
|
@ -19,10 +19,9 @@ int mce_p5_enabled __read_mostly;
|
|||
/* Machine check handler for Pentium class Intel CPUs: */
|
||||
static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
u32 loaddr, hi, lotype;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
|
||||
|
@ -39,7 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
|||
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE: */
|
||||
|
|
|
@ -15,12 +15,12 @@
|
|||
/* Machine check handler for WinChip C6: */
|
||||
static void winchip_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting on the Winchip C6 series */
|
||||
|
|
|
@ -2179,6 +2179,7 @@ static unsigned long get_segment_base(unsigned int segment)
|
|||
int idx = segment >> 3;
|
||||
|
||||
if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
struct ldt_struct *ldt;
|
||||
|
||||
if (idx > LDT_ENTRIES)
|
||||
|
@ -2190,6 +2191,9 @@ static unsigned long get_segment_base(unsigned int segment)
|
|||
return 0;
|
||||
|
||||
desc = &ldt->entries[idx];
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
} else {
|
||||
if (idx > GDT_ENTRIES)
|
||||
return 0;
|
||||
|
@ -2200,7 +2204,7 @@ static unsigned long get_segment_base(unsigned int segment)
|
|||
return get_desc_base(desc);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
|
||||
#include <asm/compat.h>
|
||||
|
||||
|
|
|
@ -110,7 +110,7 @@ static void init_espfix_random(void)
|
|||
*/
|
||||
if (!arch_get_random_long(&rand)) {
|
||||
/* The constant is an arbitrary large prime */
|
||||
rdtscll(rand);
|
||||
rand = rdtsc();
|
||||
rand *= 0xc345c6b72fd16123UL;
|
||||
}
|
||||
|
||||
|
|
|
@ -735,7 +735,7 @@ static int hpet_clocksource_register(void)
|
|||
|
||||
/* Verify whether hpet counter works */
|
||||
t1 = hpet_readl(HPET_COUNTER);
|
||||
rdtscll(start);
|
||||
start = rdtsc();
|
||||
|
||||
/*
|
||||
* We don't know the TSC frequency yet, but waiting for
|
||||
|
@ -745,7 +745,7 @@ static int hpet_clocksource_register(void)
|
|||
*/
|
||||
do {
|
||||
rep_nop();
|
||||
rdtscll(now);
|
||||
now = rdtsc();
|
||||
} while ((now - start) < 200000UL);
|
||||
|
||||
if (t1 == hpet_readl(HPET_COUNTER)) {
|
||||
|
|
|
@ -216,8 +216,23 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
|
|||
unsigned vector = ~regs->orig_ax;
|
||||
unsigned irq;
|
||||
|
||||
/*
|
||||
* NB: Unlike exception entries, IRQ entries do not reliably
|
||||
* handle context tracking in the low-level entry code. This is
|
||||
* because syscall entries execute briefly with IRQs on before
|
||||
* updating context tracking state, so we can take an IRQ from
|
||||
* kernel mode with CONTEXT_USER. The low-level entry code only
|
||||
* updates the context if we came from user mode, so we won't
|
||||
* switch to CONTEXT_KERNEL. We'll fix that once the syscall
|
||||
* code is cleaned up enough that we can cleanly defer enabling
|
||||
* IRQs.
|
||||
*/
|
||||
|
||||
entering_irq();
|
||||
|
||||
/* entering_irq() tells RCU that we're not quiescent. Check it. */
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
|
||||
|
||||
irq = __this_cpu_read(vector_irq[vector]);
|
||||
|
||||
if (!handle_irq(irq, regs)) {
|
||||
|
|
|
@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w)
|
|||
a->handler, whole_msecs, decimal_msecs);
|
||||
}
|
||||
|
||||
static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
|
||||
static int nmi_handle(unsigned int type, struct pt_regs *regs)
|
||||
{
|
||||
struct nmi_desc *desc = nmi_to_desc(type);
|
||||
struct nmiaction *a;
|
||||
|
@ -213,7 +213,7 @@ static void
|
|||
pci_serr_error(unsigned char reason, struct pt_regs *regs)
|
||||
{
|
||||
/* check to see if anyone registered against these types of errors */
|
||||
if (nmi_handle(NMI_SERR, regs, false))
|
||||
if (nmi_handle(NMI_SERR, regs))
|
||||
return;
|
||||
|
||||
pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
|
||||
|
@ -247,7 +247,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
|
|||
unsigned long i;
|
||||
|
||||
/* check to see if anyone registered against these types of errors */
|
||||
if (nmi_handle(NMI_IO_CHECK, regs, false))
|
||||
if (nmi_handle(NMI_IO_CHECK, regs))
|
||||
return;
|
||||
|
||||
pr_emerg(
|
||||
|
@ -284,7 +284,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
|
|||
* as only the first one is ever run (unless it can actually determine
|
||||
* if it caused the NMI)
|
||||
*/
|
||||
handled = nmi_handle(NMI_UNKNOWN, regs, false);
|
||||
handled = nmi_handle(NMI_UNKNOWN, regs);
|
||||
if (handled) {
|
||||
__this_cpu_add(nmi_stats.unknown, handled);
|
||||
return;
|
||||
|
@ -332,7 +332,7 @@ static void default_do_nmi(struct pt_regs *regs)
|
|||
|
||||
__this_cpu_write(last_nmi_rip, regs->ip);
|
||||
|
||||
handled = nmi_handle(NMI_LOCAL, regs, b2b);
|
||||
handled = nmi_handle(NMI_LOCAL, regs);
|
||||
__this_cpu_add(nmi_stats.normal, handled);
|
||||
if (handled) {
|
||||
/*
|
||||
|
|
|
@ -351,9 +351,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
|
|||
.wbinvd = native_wbinvd,
|
||||
.read_msr = native_read_msr_safe,
|
||||
.write_msr = native_write_msr_safe,
|
||||
.read_tsc = native_read_tsc,
|
||||
.read_pmc = native_read_pmc,
|
||||
.read_tscp = native_read_tscp,
|
||||
.load_tr_desc = native_load_tr_desc,
|
||||
.set_ldt = native_set_ldt,
|
||||
.load_gdt = native_load_gdt,
|
||||
|
|
|
@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
|
|||
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
|
||||
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
|
||||
DEF_NATIVE(pv_cpu_ops, clts, "clts");
|
||||
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
|
||||
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
|
||||
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
|
||||
|
@ -52,7 +51,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
|||
PATCH_SITE(pv_mmu_ops, read_cr3);
|
||||
PATCH_SITE(pv_mmu_ops, write_cr3);
|
||||
PATCH_SITE(pv_cpu_ops, clts);
|
||||
PATCH_SITE(pv_cpu_ops, read_tsc);
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
|
||||
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
|
||||
if (pv_is_native_spin_unlock()) {
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include <asm/nmi.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
/*
|
||||
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
|
||||
|
@ -111,6 +112,8 @@ void exit_thread(void)
|
|||
kfree(bp);
|
||||
}
|
||||
|
||||
free_vm86(t);
|
||||
|
||||
fpu__drop(fpu);
|
||||
}
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@
|
|||
#include <asm/syscalls.h>
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
||||
asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
|
||||
|
|
|
@ -121,6 +121,7 @@ void __show_regs(struct pt_regs *regs, int all)
|
|||
void release_thread(struct task_struct *dead_task)
|
||||
{
|
||||
if (dead_task->mm) {
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
if (dead_task->mm->context.ldt) {
|
||||
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
|
||||
dead_task->comm,
|
||||
|
@ -128,6 +129,7 @@ void release_thread(struct task_struct *dead_task)
|
|||
dead_task->mm->context.ldt->size);
|
||||
BUG();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -248,8 +250,8 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
|||
__USER_CS, __USER_DS, 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
|
||||
#ifdef CONFIG_COMPAT
|
||||
void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
|
||||
{
|
||||
start_thread_common(regs, new_ip, new_sp,
|
||||
test_thread_flag(TIF_X32)
|
||||
|
|
|
@ -37,12 +37,10 @@
|
|||
#include <asm/proto.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#include "tls.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
|
||||
enum x86_regset {
|
||||
REGSET_GENERAL,
|
||||
REGSET_FP,
|
||||
|
@ -1123,6 +1121,73 @@ static int genregs32_set(struct task_struct *target,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static long ia32_arch_ptrace(struct task_struct *child, compat_long_t request,
|
||||
compat_ulong_t caddr, compat_ulong_t cdata)
|
||||
{
|
||||
unsigned long addr = caddr;
|
||||
unsigned long data = cdata;
|
||||
void __user *datap = compat_ptr(data);
|
||||
int ret;
|
||||
__u32 val;
|
||||
|
||||
switch (request) {
|
||||
case PTRACE_PEEKUSR:
|
||||
ret = getreg32(child, addr, &val);
|
||||
if (ret == 0)
|
||||
ret = put_user(val, (__u32 __user *)datap);
|
||||
break;
|
||||
|
||||
case PTRACE_POKEUSR:
|
||||
ret = putreg32(child, addr, data);
|
||||
break;
|
||||
|
||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL, 0,
|
||||
sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_FP, 0,
|
||||
sizeof(struct user_i387_ia32_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||
return copy_regset_from_user(
|
||||
child, &user_x86_32_view, REGSET_FP,
|
||||
0, sizeof(struct user_i387_ia32_struct), datap);
|
||||
|
||||
case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_GET_THREAD_AREA:
|
||||
case PTRACE_SET_THREAD_AREA:
|
||||
return arch_ptrace(child, request, addr, data);
|
||||
|
||||
default:
|
||||
return compat_ptrace_request(child, request, addr, data);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_IA32_EMULATION */
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
static long x32_arch_ptrace(struct task_struct *child,
|
||||
compat_long_t request, compat_ulong_t caddr,
|
||||
|
@ -1211,78 +1276,21 @@ static long x32_arch_ptrace(struct task_struct *child,
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
|
||||
compat_ulong_t caddr, compat_ulong_t cdata)
|
||||
{
|
||||
unsigned long addr = caddr;
|
||||
unsigned long data = cdata;
|
||||
void __user *datap = compat_ptr(data);
|
||||
int ret;
|
||||
__u32 val;
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
if (!is_ia32_task())
|
||||
return x32_arch_ptrace(child, request, caddr, cdata);
|
||||
#endif
|
||||
|
||||
switch (request) {
|
||||
case PTRACE_PEEKUSR:
|
||||
ret = getreg32(child, addr, &val);
|
||||
if (ret == 0)
|
||||
ret = put_user(val, (__u32 __user *)datap);
|
||||
break;
|
||||
|
||||
case PTRACE_POKEUSR:
|
||||
ret = putreg32(child, addr, data);
|
||||
break;
|
||||
|
||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL, 0,
|
||||
sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_FP, 0,
|
||||
sizeof(struct user_i387_ia32_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||
return copy_regset_from_user(
|
||||
child, &user_x86_32_view, REGSET_FP,
|
||||
0, sizeof(struct user_i387_ia32_struct), datap);
|
||||
|
||||
case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_GET_THREAD_AREA:
|
||||
case PTRACE_SET_THREAD_AREA:
|
||||
return arch_ptrace(child, request, addr, data);
|
||||
|
||||
default:
|
||||
return compat_ptrace_request(child, request, addr, data);
|
||||
}
|
||||
|
||||
return ret;
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
return ia32_arch_ptrace(child, request, caddr, cdata);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* CONFIG_IA32_EMULATION */
|
||||
#endif /* CONFIG_COMPAT */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
|
@ -1434,201 +1442,3 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
|
|||
/* Send us the fake SIGTRAP */
|
||||
force_sig_info(SIGTRAP, &info, tsk);
|
||||
}
|
||||
|
||||
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
if (arch == AUDIT_ARCH_X86_64) {
|
||||
audit_syscall_entry(regs->orig_ax, regs->di,
|
||||
regs->si, regs->dx, regs->r10);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
audit_syscall_entry(regs->orig_ax, regs->bx,
|
||||
regs->cx, regs->dx, regs->si);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We can return 0 to resume the syscall or anything else to go to phase
|
||||
* 2. If we resume the syscall, we need to put something appropriate in
|
||||
* regs->orig_ax.
|
||||
*
|
||||
* NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
|
||||
* are fully functional.
|
||||
*
|
||||
* For phase 2's benefit, our return value is:
|
||||
* 0: resume the syscall
|
||||
* 1: go to phase 2; no seccomp phase 2 needed
|
||||
* anything else: go to phase 2; pass return value to seccomp
|
||||
*/
|
||||
unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
|
||||
{
|
||||
unsigned long ret = 0;
|
||||
u32 work;
|
||||
|
||||
BUG_ON(regs != task_pt_regs(current));
|
||||
|
||||
work = ACCESS_ONCE(current_thread_info()->flags) &
|
||||
_TIF_WORK_SYSCALL_ENTRY;
|
||||
|
||||
/*
|
||||
* If TIF_NOHZ is set, we are required to call user_exit() before
|
||||
* doing anything that could touch RCU.
|
||||
*/
|
||||
if (work & _TIF_NOHZ) {
|
||||
user_exit();
|
||||
work &= ~_TIF_NOHZ;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Do seccomp first -- it should minimize exposure of other
|
||||
* code, and keeping seccomp fast is probably more valuable
|
||||
* than the rest of this.
|
||||
*/
|
||||
if (work & _TIF_SECCOMP) {
|
||||
struct seccomp_data sd;
|
||||
|
||||
sd.arch = arch;
|
||||
sd.nr = regs->orig_ax;
|
||||
sd.instruction_pointer = regs->ip;
|
||||
#ifdef CONFIG_X86_64
|
||||
if (arch == AUDIT_ARCH_X86_64) {
|
||||
sd.args[0] = regs->di;
|
||||
sd.args[1] = regs->si;
|
||||
sd.args[2] = regs->dx;
|
||||
sd.args[3] = regs->r10;
|
||||
sd.args[4] = regs->r8;
|
||||
sd.args[5] = regs->r9;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
sd.args[0] = regs->bx;
|
||||
sd.args[1] = regs->cx;
|
||||
sd.args[2] = regs->dx;
|
||||
sd.args[3] = regs->si;
|
||||
sd.args[4] = regs->di;
|
||||
sd.args[5] = regs->bp;
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
|
||||
BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
|
||||
|
||||
ret = seccomp_phase1(&sd);
|
||||
if (ret == SECCOMP_PHASE1_SKIP) {
|
||||
regs->orig_ax = -1;
|
||||
ret = 0;
|
||||
} else if (ret != SECCOMP_PHASE1_OK) {
|
||||
return ret; /* Go directly to phase 2 */
|
||||
}
|
||||
|
||||
work &= ~_TIF_SECCOMP;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Do our best to finish without phase 2. */
|
||||
if (work == 0)
|
||||
return ret; /* seccomp and/or nohz only (ret == 0 here) */
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
if (work == _TIF_SYSCALL_AUDIT) {
|
||||
/*
|
||||
* If there is no more work to be done except auditing,
|
||||
* then audit in phase 1. Phase 2 always audits, so, if
|
||||
* we audit here, then we can't go on to phase 2.
|
||||
*/
|
||||
do_audit_syscall_entry(regs, arch);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 1; /* Something is enabled that we can't handle in phase 1 */
|
||||
}
|
||||
|
||||
/* Returns the syscall nr to run (which should match regs->orig_ax). */
|
||||
long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
|
||||
unsigned long phase1_result)
|
||||
{
|
||||
long ret = 0;
|
||||
u32 work = ACCESS_ONCE(current_thread_info()->flags) &
|
||||
_TIF_WORK_SYSCALL_ENTRY;
|
||||
|
||||
BUG_ON(regs != task_pt_regs(current));
|
||||
|
||||
/*
|
||||
* If we stepped into a sysenter/syscall insn, it trapped in
|
||||
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
|
||||
* If user-mode had set TF itself, then it's still clear from
|
||||
* do_debug() and we need to set it again to restore the user
|
||||
* state. If we entered on the slow path, TF was already set.
|
||||
*/
|
||||
if (work & _TIF_SINGLESTEP)
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Call seccomp_phase2 before running the other hooks so that
|
||||
* they can see any changes made by a seccomp tracer.
|
||||
*/
|
||||
if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
|
||||
/* seccomp failures shouldn't expose any additional code. */
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(work & _TIF_SYSCALL_EMU))
|
||||
ret = -1L;
|
||||
|
||||
if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
|
||||
tracehook_report_syscall_entry(regs))
|
||||
ret = -1L;
|
||||
|
||||
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
|
||||
trace_sys_enter(regs, regs->orig_ax);
|
||||
|
||||
do_audit_syscall_entry(regs, arch);
|
||||
|
||||
return ret ?: regs->orig_ax;
|
||||
}
|
||||
|
||||
long syscall_trace_enter(struct pt_regs *regs)
|
||||
{
|
||||
u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
|
||||
unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
|
||||
|
||||
if (phase1_result == 0)
|
||||
return regs->orig_ax;
|
||||
else
|
||||
return syscall_trace_enter_phase2(regs, arch, phase1_result);
|
||||
}
|
||||
|
||||
void syscall_trace_leave(struct pt_regs *regs)
|
||||
{
|
||||
bool step;
|
||||
|
||||
/*
|
||||
* We may come here right after calling schedule_user()
|
||||
* or do_notify_resume(), in which case we can be in RCU
|
||||
* user mode.
|
||||
*/
|
||||
user_exit();
|
||||
|
||||
audit_syscall_exit(regs);
|
||||
|
||||
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
|
||||
trace_sys_exit(regs, regs->ax);
|
||||
|
||||
/*
|
||||
* If TIF_SYSCALL_EMU is set, we only get here because of
|
||||
* TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
|
||||
* We already reported this syscall instruction in
|
||||
* syscall_trace_enter().
|
||||
*/
|
||||
step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
|
||||
!test_thread_flag(TIF_SYSCALL_EMU);
|
||||
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
tracehook_report_syscall_exit(regs, step);
|
||||
|
||||
user_enter();
|
||||
}
|
||||
|
|
|
@ -31,11 +31,11 @@
|
|||
#include <asm/vdso.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/sighandling.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/proto.h>
|
||||
#include <asm/ia32_unistd.h>
|
||||
#include <asm/sys_ia32.h>
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#include <asm/syscall.h>
|
||||
|
@ -632,6 +632,9 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
|||
bool stepping, failed;
|
||||
struct fpu *fpu = ¤t->thread.fpu;
|
||||
|
||||
if (v8086_mode(regs))
|
||||
save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL);
|
||||
|
||||
/* Are we from a system call? */
|
||||
if (syscall_get_nr(current, regs) >= 0) {
|
||||
/* If so, check system call restarting.. */
|
||||
|
@ -697,7 +700,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
|||
* want to handle. Thus you cannot kill init even with a SIGKILL even by
|
||||
* mistake.
|
||||
*/
|
||||
static void do_signal(struct pt_regs *regs)
|
||||
void do_signal(struct pt_regs *regs)
|
||||
{
|
||||
struct ksignal ksig;
|
||||
|
||||
|
@ -732,32 +735,6 @@ static void do_signal(struct pt_regs *regs)
|
|||
restore_saved_sigmask();
|
||||
}
|
||||
|
||||
/*
|
||||
* notification of userspace execution resumption
|
||||
* - triggered by the TIF_WORK_MASK flags
|
||||
*/
|
||||
__visible void
|
||||
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
{
|
||||
user_exit();
|
||||
|
||||
if (thread_info_flags & _TIF_UPROBE)
|
||||
uprobe_notify_resume(regs);
|
||||
|
||||
/* deal with pending signal delivery */
|
||||
if (thread_info_flags & _TIF_SIGPENDING)
|
||||
do_signal(regs);
|
||||
|
||||
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
|
||||
clear_thread_flag(TIF_NOTIFY_RESUME);
|
||||
tracehook_notify_resume(regs);
|
||||
}
|
||||
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
|
||||
fire_user_return_notifiers();
|
||||
|
||||
user_enter();
|
||||
}
|
||||
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
|
||||
{
|
||||
struct task_struct *me = current;
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
#include <linux/compat.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
|
||||
{
|
||||
int err = 0;
|
||||
bool ia32 = test_thread_flag(TIF_IA32);
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
|
||||
return -EFAULT;
|
||||
|
||||
put_user_try {
|
||||
/* If you change siginfo_t structure, please make sure that
|
||||
this code is fixed accordingly.
|
||||
It should never copy any pad contained in the structure
|
||||
to avoid security leaks, but must copy the generic
|
||||
3 ints plus the relevant union member. */
|
||||
put_user_ex(from->si_signo, &to->si_signo);
|
||||
put_user_ex(from->si_errno, &to->si_errno);
|
||||
put_user_ex((short)from->si_code, &to->si_code);
|
||||
|
||||
if (from->si_code < 0) {
|
||||
put_user_ex(from->si_pid, &to->si_pid);
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr);
|
||||
} else {
|
||||
/*
|
||||
* First 32bits of unions are always present:
|
||||
* si_pid === si_band === si_tid === si_addr(LS half)
|
||||
*/
|
||||
put_user_ex(from->_sifields._pad[0],
|
||||
&to->_sifields._pad[0]);
|
||||
switch (from->si_code >> 16) {
|
||||
case __SI_FAULT >> 16:
|
||||
break;
|
||||
case __SI_SYS >> 16:
|
||||
put_user_ex(from->si_syscall, &to->si_syscall);
|
||||
put_user_ex(from->si_arch, &to->si_arch);
|
||||
break;
|
||||
case __SI_CHLD >> 16:
|
||||
if (ia32) {
|
||||
put_user_ex(from->si_utime, &to->si_utime);
|
||||
put_user_ex(from->si_stime, &to->si_stime);
|
||||
} else {
|
||||
put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime);
|
||||
put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime);
|
||||
}
|
||||
put_user_ex(from->si_status, &to->si_status);
|
||||
/* FALL THROUGH */
|
||||
default:
|
||||
case __SI_KILL >> 16:
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
break;
|
||||
case __SI_POLL >> 16:
|
||||
put_user_ex(from->si_fd, &to->si_fd);
|
||||
break;
|
||||
case __SI_TIMER >> 16:
|
||||
put_user_ex(from->si_overrun, &to->si_overrun);
|
||||
put_user_ex(ptr_to_compat(from->si_ptr),
|
||||
&to->si_ptr);
|
||||
break;
|
||||
/* This is not generated by the kernel as of now. */
|
||||
case __SI_RT >> 16:
|
||||
case __SI_MESGQ >> 16:
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
put_user_ex(from->si_int, &to->si_int);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} put_user_catch(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
|
||||
{
|
||||
int err = 0;
|
||||
u32 ptr32;
|
||||
|
||||
if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
|
||||
return -EFAULT;
|
||||
|
||||
get_user_try {
|
||||
get_user_ex(to->si_signo, &from->si_signo);
|
||||
get_user_ex(to->si_errno, &from->si_errno);
|
||||
get_user_ex(to->si_code, &from->si_code);
|
||||
|
||||
get_user_ex(to->si_pid, &from->si_pid);
|
||||
get_user_ex(to->si_uid, &from->si_uid);
|
||||
get_user_ex(ptr32, &from->si_ptr);
|
||||
to->si_ptr = compat_ptr(ptr32);
|
||||
} get_user_catch(err);
|
||||
|
||||
return err;
|
||||
}
|
|
@ -18,6 +18,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
|
|||
return addr;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
/*
|
||||
* We'll assume that the code segments in the GDT
|
||||
* are all zero-based. That is largely true: the
|
||||
|
@ -45,6 +46,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
|
|||
}
|
||||
mutex_unlock(&child->mm->context.lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
|
|
@ -12,10 +12,5 @@
|
|||
*/
|
||||
u64 notrace trace_clock_x86_tsc(void)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
rdtsc_barrier();
|
||||
rdtscll(ret);
|
||||
|
||||
return ret;
|
||||
return rdtsc_ordered();
|
||||
}
|
||||
|
|
|
@ -62,6 +62,7 @@
|
|||
#include <asm/fpu/xstate.h>
|
||||
#include <asm/trace/mpx.h>
|
||||
#include <asm/mpx.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/x86_init.h>
|
||||
|
@ -108,13 +109,10 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
|
|||
preempt_count_dec();
|
||||
}
|
||||
|
||||
enum ctx_state ist_enter(struct pt_regs *regs)
|
||||
void ist_enter(struct pt_regs *regs)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
if (user_mode(regs)) {
|
||||
/* Other than that, we're just an exception. */
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
} else {
|
||||
/*
|
||||
* We might have interrupted pretty much anything. In
|
||||
|
@ -123,32 +121,25 @@ enum ctx_state ist_enter(struct pt_regs *regs)
|
|||
* but we need to notify RCU.
|
||||
*/
|
||||
rcu_nmi_enter();
|
||||
prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */
|
||||
}
|
||||
|
||||
/*
|
||||
* We are atomic because we're on the IST stack (or we're on x86_32,
|
||||
* in which case we still shouldn't schedule).
|
||||
*
|
||||
* This must be after exception_enter(), because exception_enter()
|
||||
* won't do anything if in_interrupt() returns true.
|
||||
* We are atomic because we're on the IST stack; or we're on
|
||||
* x86_32, in which case we still shouldn't schedule; or we're
|
||||
* on x86_64 and entered from user mode, in which case we're
|
||||
* still atomic unless ist_begin_non_atomic is called.
|
||||
*/
|
||||
preempt_count_add(HARDIRQ_OFFSET);
|
||||
|
||||
/* This code is a bit fragile. Test it. */
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
|
||||
|
||||
return prev_state;
|
||||
}
|
||||
|
||||
void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
|
||||
void ist_exit(struct pt_regs *regs)
|
||||
{
|
||||
/* Must be before exception_exit. */
|
||||
preempt_count_sub(HARDIRQ_OFFSET);
|
||||
|
||||
if (user_mode(regs))
|
||||
return exception_exit(prev_state);
|
||||
else
|
||||
if (!user_mode(regs))
|
||||
rcu_nmi_exit();
|
||||
}
|
||||
|
||||
|
@ -162,7 +153,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
|
|||
* a double fault, it can be safe to schedule. ist_begin_non_atomic()
|
||||
* begins a non-atomic section within an ist_enter()/ist_exit() region.
|
||||
* Callers are responsible for enabling interrupts themselves inside
|
||||
* the non-atomic section, and callers must call is_end_non_atomic()
|
||||
* the non-atomic section, and callers must call ist_end_non_atomic()
|
||||
* before ist_exit().
|
||||
*/
|
||||
void ist_begin_non_atomic(struct pt_regs *regs)
|
||||
|
@ -289,17 +280,16 @@ NOKPROBE_SYMBOL(do_trap);
|
|||
static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
|
||||
unsigned long trapnr, int signr)
|
||||
{
|
||||
enum ctx_state prev_state = exception_enter();
|
||||
siginfo_t info;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
|
||||
NOTIFY_STOP) {
|
||||
conditional_sti(regs);
|
||||
do_trap(trapnr, signr, str, regs, error_code,
|
||||
fill_trap_info(regs, signr, trapnr, &info));
|
||||
}
|
||||
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
#define DO_ERROR(trapnr, signr, str, name) \
|
||||
|
@ -351,7 +341,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
|||
}
|
||||
#endif
|
||||
|
||||
ist_enter(regs); /* Discard prev_state because we won't return. */
|
||||
ist_enter(regs);
|
||||
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
|
@ -371,14 +361,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
|||
|
||||
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
const struct bndcsr *bndcsr;
|
||||
siginfo_t *info;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
|
||||
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
|
||||
goto exit;
|
||||
return;
|
||||
conditional_sti(regs);
|
||||
|
||||
if (!user_mode(regs))
|
||||
|
@ -435,9 +424,8 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
|||
die("bounds", regs, error_code);
|
||||
}
|
||||
|
||||
exit:
|
||||
exception_exit(prev_state);
|
||||
return;
|
||||
|
||||
exit_trap:
|
||||
/*
|
||||
* This path out is for all the cases where we could not
|
||||
|
@ -447,35 +435,33 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
|||
* time..
|
||||
*/
|
||||
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL);
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_general_protection(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
conditional_sti(regs);
|
||||
|
||||
if (v8086_mode(regs)) {
|
||||
local_irq_enable();
|
||||
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
|
||||
goto exit;
|
||||
return;
|
||||
}
|
||||
|
||||
tsk = current;
|
||||
if (!user_mode(regs)) {
|
||||
if (fixup_exception(regs))
|
||||
goto exit;
|
||||
return;
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
|
||||
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
|
||||
die("general protection fault", regs, error_code);
|
||||
goto exit;
|
||||
return;
|
||||
}
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
|
@ -491,16 +477,12 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
|||
}
|
||||
|
||||
force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
|
||||
exit:
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_general_protection);
|
||||
|
||||
/* May run on IST stack. */
|
||||
dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
/*
|
||||
* ftrace must be first, everything else may cause a recursive crash.
|
||||
|
@ -513,7 +495,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
|||
if (poke_int3_handler(regs))
|
||||
return;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
|
||||
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
|
@ -539,7 +522,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
|||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
exit:
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_int3);
|
||||
|
||||
|
@ -615,12 +598,11 @@ NOKPROBE_SYMBOL(fixup_bad_iret);
|
|||
dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
enum ctx_state prev_state;
|
||||
int user_icebp = 0;
|
||||
unsigned long dr6;
|
||||
int si_code;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
get_debugreg(dr6, 6);
|
||||
|
||||
|
@ -695,7 +677,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
|||
debug_stack_usage_dec();
|
||||
|
||||
exit:
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_debug);
|
||||
|
||||
|
@ -747,21 +729,15 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
|||
|
||||
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
math_error(regs, error_code, X86_TRAP_MF);
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
math_error(regs, error_code, X86_TRAP_XF);
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
|
@ -773,9 +749,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
|
|||
dotraplinkage void
|
||||
do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
BUG_ON(use_eager_fpu());
|
||||
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
|
@ -786,7 +760,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
|||
|
||||
info.regs = regs;
|
||||
math_emulate(&info);
|
||||
exception_exit(prev_state);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -794,7 +767,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
|||
#ifdef CONFIG_X86_32
|
||||
conditional_sti(regs);
|
||||
#endif
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_device_not_available);
|
||||
|
||||
|
@ -802,9 +774,8 @@ NOKPROBE_SYMBOL(do_device_not_available);
|
|||
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
siginfo_t info;
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
local_irq_enable();
|
||||
|
||||
info.si_signo = SIGILL;
|
||||
|
@ -816,7 +787,6 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
|||
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
|
||||
&info);
|
||||
}
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
|
|||
|
||||
data = cyc2ns_write_begin(cpu);
|
||||
|
||||
rdtscll(tsc_now);
|
||||
tsc_now = rdtsc();
|
||||
ns_now = cycles_2_ns(tsc_now);
|
||||
|
||||
/*
|
||||
|
@ -290,7 +290,7 @@ u64 native_sched_clock(void)
|
|||
}
|
||||
|
||||
/* read the Time Stamp Counter: */
|
||||
rdtscll(tsc_now);
|
||||
tsc_now = rdtsc();
|
||||
|
||||
/* return the value in ns */
|
||||
return cycles_2_ns(tsc_now);
|
||||
|
@ -316,12 +316,6 @@ unsigned long long
|
|||
sched_clock(void) __attribute__((alias("native_sched_clock")));
|
||||
#endif
|
||||
|
||||
unsigned long long native_read_tsc(void)
|
||||
{
|
||||
return __native_read_tsc();
|
||||
}
|
||||
EXPORT_SYMBOL(native_read_tsc);
|
||||
|
||||
int check_tsc_unstable(void)
|
||||
{
|
||||
return tsc_unstable;
|
||||
|
@ -984,7 +978,7 @@ static struct clocksource clocksource_tsc;
|
|||
*/
|
||||
static cycle_t read_tsc(struct clocksource *cs)
|
||||
{
|
||||
return (cycle_t)get_cycles();
|
||||
return (cycle_t)rdtsc_ordered();
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -39,16 +39,15 @@ static cycles_t max_warp;
|
|||
static int nr_warps;
|
||||
|
||||
/*
|
||||
* TSC-warp measurement loop running on both CPUs:
|
||||
* TSC-warp measurement loop running on both CPUs. This is not called
|
||||
* if there is no TSC.
|
||||
*/
|
||||
static void check_tsc_warp(unsigned int timeout)
|
||||
{
|
||||
cycles_t start, now, prev, end;
|
||||
int i;
|
||||
|
||||
rdtsc_barrier();
|
||||
start = get_cycles();
|
||||
rdtsc_barrier();
|
||||
start = rdtsc_ordered();
|
||||
/*
|
||||
* The measurement runs for 'timeout' msecs:
|
||||
*/
|
||||
|
@ -63,9 +62,7 @@ static void check_tsc_warp(unsigned int timeout)
|
|||
*/
|
||||
arch_spin_lock(&sync_lock);
|
||||
prev = last_tsc;
|
||||
rdtsc_barrier();
|
||||
now = get_cycles();
|
||||
rdtsc_barrier();
|
||||
now = rdtsc_ordered();
|
||||
last_tsc = now;
|
||||
arch_spin_unlock(&sync_lock);
|
||||
|
||||
|
@ -126,7 +123,7 @@ void check_tsc_sync_source(int cpu)
|
|||
|
||||
/*
|
||||
* No need to check if we already know that the TSC is not
|
||||
* synchronized:
|
||||
* synchronized or if we have no TSC.
|
||||
*/
|
||||
if (unsynchronized_tsc())
|
||||
return;
|
||||
|
@ -190,6 +187,7 @@ void check_tsc_sync_target(void)
|
|||
{
|
||||
int cpus = 2;
|
||||
|
||||
/* Also aborts if there is no TSC. */
|
||||
if (unsynchronized_tsc() || tsc_clocksource_reliable)
|
||||
return;
|
||||
|
||||
|
|
|
@ -44,11 +44,14 @@
|
|||
#include <linux/ptrace.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
/*
|
||||
* Known problems:
|
||||
|
@ -66,10 +69,6 @@
|
|||
*/
|
||||
|
||||
|
||||
#define KVM86 ((struct kernel_vm86_struct *)regs)
|
||||
#define VMPI KVM86->vm86plus
|
||||
|
||||
|
||||
/*
|
||||
* 8- and 16-bit register defines..
|
||||
*/
|
||||
|
@ -81,8 +80,8 @@
|
|||
/*
|
||||
* virtual flags (16 and 32-bit versions)
|
||||
*/
|
||||
#define VFLAGS (*(unsigned short *)&(current->thread.v86flags))
|
||||
#define VEFLAGS (current->thread.v86flags)
|
||||
#define VFLAGS (*(unsigned short *)&(current->thread.vm86->veflags))
|
||||
#define VEFLAGS (current->thread.vm86->veflags)
|
||||
|
||||
#define set_flags(X, new, mask) \
|
||||
((X) = ((X) & ~(mask)) | ((new) & (mask)))
|
||||
|
@ -90,46 +89,13 @@
|
|||
#define SAFE_MASK (0xDD5)
|
||||
#define RETURN_MASK (0xDFF)
|
||||
|
||||
/* convert kernel_vm86_regs to vm86_regs */
|
||||
static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
|
||||
const struct kernel_vm86_regs *regs)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* kernel_vm86_regs is missing gs, so copy everything up to
|
||||
* (but not including) orig_eax, and then rest including orig_eax.
|
||||
*/
|
||||
ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax));
|
||||
ret += copy_to_user(&user->orig_eax, ®s->pt.orig_ax,
|
||||
sizeof(struct kernel_vm86_regs) -
|
||||
offsetof(struct kernel_vm86_regs, pt.orig_ax));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* convert vm86_regs to kernel_vm86_regs */
|
||||
static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
|
||||
const struct vm86_regs __user *user,
|
||||
unsigned extra)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/* copy ax-fs inclusive */
|
||||
ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax));
|
||||
/* copy orig_ax-__gsh+extra */
|
||||
ret += copy_from_user(®s->pt.orig_ax, &user->orig_eax,
|
||||
sizeof(struct kernel_vm86_regs) -
|
||||
offsetof(struct kernel_vm86_regs, pt.orig_ax) +
|
||||
extra);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
|
||||
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
{
|
||||
struct tss_struct *tss;
|
||||
struct pt_regs *ret;
|
||||
unsigned long tmp;
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86plus_struct __user *user;
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
long err = 0;
|
||||
|
||||
/*
|
||||
* This gets called from entry.S with interrupts disabled, but
|
||||
|
@ -138,31 +104,57 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
|
|||
*/
|
||||
local_irq_enable();
|
||||
|
||||
if (!current->thread.vm86_info) {
|
||||
pr_alert("no vm86_info: BAD\n");
|
||||
if (!vm86 || !vm86->user_vm86) {
|
||||
pr_alert("no user_vm86: BAD\n");
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask);
|
||||
tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs);
|
||||
tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap);
|
||||
if (tmp) {
|
||||
pr_alert("could not access userspace vm86_info\n");
|
||||
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
|
||||
user = vm86->user_vm86;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, user, vm86->vm86plus.is_vm86pus ?
|
||||
sizeof(struct vm86plus_struct) :
|
||||
sizeof(struct vm86_struct))) {
|
||||
pr_alert("could not access userspace vm86 info\n");
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
put_user_try {
|
||||
put_user_ex(regs->pt.bx, &user->regs.ebx);
|
||||
put_user_ex(regs->pt.cx, &user->regs.ecx);
|
||||
put_user_ex(regs->pt.dx, &user->regs.edx);
|
||||
put_user_ex(regs->pt.si, &user->regs.esi);
|
||||
put_user_ex(regs->pt.di, &user->regs.edi);
|
||||
put_user_ex(regs->pt.bp, &user->regs.ebp);
|
||||
put_user_ex(regs->pt.ax, &user->regs.eax);
|
||||
put_user_ex(regs->pt.ip, &user->regs.eip);
|
||||
put_user_ex(regs->pt.cs, &user->regs.cs);
|
||||
put_user_ex(regs->pt.flags, &user->regs.eflags);
|
||||
put_user_ex(regs->pt.sp, &user->regs.esp);
|
||||
put_user_ex(regs->pt.ss, &user->regs.ss);
|
||||
put_user_ex(regs->es, &user->regs.es);
|
||||
put_user_ex(regs->ds, &user->regs.ds);
|
||||
put_user_ex(regs->fs, &user->regs.fs);
|
||||
put_user_ex(regs->gs, &user->regs.gs);
|
||||
|
||||
put_user_ex(vm86->screen_bitmap, &user->screen_bitmap);
|
||||
} put_user_catch(err);
|
||||
if (err) {
|
||||
pr_alert("could not access userspace vm86 info\n");
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
current->thread.sp0 = current->thread.saved_sp0;
|
||||
current->thread.sysenter_cs = __KERNEL_CS;
|
||||
load_sp0(tss, ¤t->thread);
|
||||
current->thread.saved_sp0 = 0;
|
||||
tsk->thread.sp0 = vm86->saved_sp0;
|
||||
tsk->thread.sysenter_cs = __KERNEL_CS;
|
||||
load_sp0(tss, &tsk->thread);
|
||||
vm86->saved_sp0 = 0;
|
||||
put_cpu();
|
||||
|
||||
ret = KVM86->regs32;
|
||||
memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs));
|
||||
|
||||
ret->fs = current->thread.saved_fs;
|
||||
set_user_gs(ret, current->thread.saved_gs);
|
||||
lazy_load_gs(vm86->regs32.gs);
|
||||
|
||||
return ret;
|
||||
regs->pt.ax = retval;
|
||||
}
|
||||
|
||||
static void mark_screen_rdonly(struct mm_struct *mm)
|
||||
|
@ -200,45 +192,16 @@ static void mark_screen_rdonly(struct mm_struct *mm)
|
|||
|
||||
|
||||
static int do_vm86_irq_handling(int subfunction, int irqnumber);
|
||||
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
|
||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
|
||||
|
||||
SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, v86)
|
||||
SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86)
|
||||
{
|
||||
struct kernel_vm86_struct info; /* declare this _on top_,
|
||||
* this avoids wasting of stack space.
|
||||
* This remains on the stack until we
|
||||
* return to 32 bit user space.
|
||||
*/
|
||||
struct task_struct *tsk = current;
|
||||
int tmp;
|
||||
|
||||
if (tsk->thread.saved_sp0)
|
||||
return -EPERM;
|
||||
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
|
||||
offsetof(struct kernel_vm86_struct, vm86plus) -
|
||||
sizeof(info.regs));
|
||||
if (tmp)
|
||||
return -EFAULT;
|
||||
memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
|
||||
info.regs32 = current_pt_regs();
|
||||
tsk->thread.vm86_info = v86;
|
||||
do_sys_vm86(&info, tsk);
|
||||
return 0; /* we never return here */
|
||||
return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false);
|
||||
}
|
||||
|
||||
|
||||
SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
|
||||
{
|
||||
struct kernel_vm86_struct info; /* declare this _on top_,
|
||||
* this avoids wasting of stack space.
|
||||
* This remains on the stack until we
|
||||
* return to 32 bit user space.
|
||||
*/
|
||||
struct task_struct *tsk;
|
||||
int tmp;
|
||||
struct vm86plus_struct __user *v86;
|
||||
|
||||
tsk = current;
|
||||
switch (cmd) {
|
||||
case VM86_REQUEST_IRQ:
|
||||
case VM86_FREE_IRQ:
|
||||
|
@ -256,114 +219,133 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
|
|||
}
|
||||
|
||||
/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
|
||||
if (tsk->thread.saved_sp0)
|
||||
return -EPERM;
|
||||
v86 = (struct vm86plus_struct __user *)arg;
|
||||
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
|
||||
offsetof(struct kernel_vm86_struct, regs32) -
|
||||
sizeof(info.regs));
|
||||
if (tmp)
|
||||
return -EFAULT;
|
||||
info.regs32 = current_pt_regs();
|
||||
info.vm86plus.is_vm86pus = 1;
|
||||
tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
|
||||
do_sys_vm86(&info, tsk);
|
||||
return 0; /* we never return here */
|
||||
return do_sys_vm86((struct vm86plus_struct __user *) arg, true);
|
||||
}
|
||||
|
||||
|
||||
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
|
||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
{
|
||||
struct tss_struct *tss;
|
||||
/*
|
||||
* make sure the vm86() system call doesn't try to do anything silly
|
||||
*/
|
||||
info->regs.pt.ds = 0;
|
||||
info->regs.pt.es = 0;
|
||||
info->regs.pt.fs = 0;
|
||||
#ifndef CONFIG_X86_32_LAZY_GS
|
||||
info->regs.pt.gs = 0;
|
||||
#endif
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86 *vm86 = tsk->thread.vm86;
|
||||
struct kernel_vm86_regs vm86regs;
|
||||
struct pt_regs *regs = current_pt_regs();
|
||||
unsigned long err = 0;
|
||||
|
||||
if (!vm86) {
|
||||
if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
|
||||
return -ENOMEM;
|
||||
tsk->thread.vm86 = vm86;
|
||||
}
|
||||
if (vm86->saved_sp0)
|
||||
return -EPERM;
|
||||
|
||||
if (!access_ok(VERIFY_READ, user_vm86, plus ?
|
||||
sizeof(struct vm86_struct) :
|
||||
sizeof(struct vm86plus_struct)))
|
||||
return -EFAULT;
|
||||
|
||||
memset(&vm86regs, 0, sizeof(vm86regs));
|
||||
get_user_try {
|
||||
unsigned short seg;
|
||||
get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx);
|
||||
get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx);
|
||||
get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx);
|
||||
get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi);
|
||||
get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi);
|
||||
get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp);
|
||||
get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax);
|
||||
get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip);
|
||||
get_user_ex(seg, &user_vm86->regs.cs);
|
||||
vm86regs.pt.cs = seg;
|
||||
get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags);
|
||||
get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp);
|
||||
get_user_ex(seg, &user_vm86->regs.ss);
|
||||
vm86regs.pt.ss = seg;
|
||||
get_user_ex(vm86regs.es, &user_vm86->regs.es);
|
||||
get_user_ex(vm86regs.ds, &user_vm86->regs.ds);
|
||||
get_user_ex(vm86regs.fs, &user_vm86->regs.fs);
|
||||
get_user_ex(vm86regs.gs, &user_vm86->regs.gs);
|
||||
|
||||
get_user_ex(vm86->flags, &user_vm86->flags);
|
||||
get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap);
|
||||
get_user_ex(vm86->cpu_type, &user_vm86->cpu_type);
|
||||
} get_user_catch(err);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (copy_from_user(&vm86->int_revectored,
|
||||
&user_vm86->int_revectored,
|
||||
sizeof(struct revectored_struct)))
|
||||
return -EFAULT;
|
||||
if (copy_from_user(&vm86->int21_revectored,
|
||||
&user_vm86->int21_revectored,
|
||||
sizeof(struct revectored_struct)))
|
||||
return -EFAULT;
|
||||
if (plus) {
|
||||
if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus,
|
||||
sizeof(struct vm86plus_info_struct)))
|
||||
return -EFAULT;
|
||||
vm86->vm86plus.is_vm86pus = 1;
|
||||
} else
|
||||
memset(&vm86->vm86plus, 0,
|
||||
sizeof(struct vm86plus_info_struct));
|
||||
|
||||
memcpy(&vm86->regs32, regs, sizeof(struct pt_regs));
|
||||
vm86->user_vm86 = user_vm86;
|
||||
|
||||
/*
|
||||
* The flags register is also special: we cannot trust that the user
|
||||
* has set it up safely, so this makes sure interrupt etc flags are
|
||||
* inherited from protected mode.
|
||||
*/
|
||||
VEFLAGS = info->regs.pt.flags;
|
||||
info->regs.pt.flags &= SAFE_MASK;
|
||||
info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK;
|
||||
info->regs.pt.flags |= X86_VM_MASK;
|
||||
VEFLAGS = vm86regs.pt.flags;
|
||||
vm86regs.pt.flags &= SAFE_MASK;
|
||||
vm86regs.pt.flags |= regs->flags & ~SAFE_MASK;
|
||||
vm86regs.pt.flags |= X86_VM_MASK;
|
||||
|
||||
switch (info->cpu_type) {
|
||||
vm86regs.pt.orig_ax = regs->orig_ax;
|
||||
|
||||
switch (vm86->cpu_type) {
|
||||
case CPU_286:
|
||||
tsk->thread.v86mask = 0;
|
||||
vm86->veflags_mask = 0;
|
||||
break;
|
||||
case CPU_386:
|
||||
tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
break;
|
||||
case CPU_486:
|
||||
tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
break;
|
||||
default:
|
||||
tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save old state, set default return value (%ax) to 0 (VM86_SIGNAL)
|
||||
* Save old state
|
||||
*/
|
||||
info->regs32->ax = VM86_SIGNAL;
|
||||
tsk->thread.saved_sp0 = tsk->thread.sp0;
|
||||
tsk->thread.saved_fs = info->regs32->fs;
|
||||
tsk->thread.saved_gs = get_user_gs(info->regs32);
|
||||
vm86->saved_sp0 = tsk->thread.sp0;
|
||||
lazy_save_gs(vm86->regs32.gs);
|
||||
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
|
||||
/* make room for real-mode segments */
|
||||
tsk->thread.sp0 += 16;
|
||||
if (cpu_has_sep)
|
||||
tsk->thread.sysenter_cs = 0;
|
||||
load_sp0(tss, &tsk->thread);
|
||||
put_cpu();
|
||||
|
||||
tsk->thread.screen_bitmap = info->screen_bitmap;
|
||||
if (info->flags & VM86_SCREEN_BITMAP)
|
||||
if (vm86->flags & VM86_SCREEN_BITMAP)
|
||||
mark_screen_rdonly(tsk->mm);
|
||||
|
||||
/*call __audit_syscall_exit since we do not exit via the normal paths */
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
if (unlikely(current->audit_context))
|
||||
__audit_syscall_exit(1, 0);
|
||||
#endif
|
||||
|
||||
__asm__ __volatile__(
|
||||
"movl %0,%%esp\n\t"
|
||||
"movl %1,%%ebp\n\t"
|
||||
#ifdef CONFIG_X86_32_LAZY_GS
|
||||
"mov %2, %%gs\n\t"
|
||||
#endif
|
||||
"jmp resume_userspace"
|
||||
: /* no outputs */
|
||||
:"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
|
||||
/* we never return here */
|
||||
}
|
||||
|
||||
static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval)
|
||||
{
|
||||
struct pt_regs *regs32;
|
||||
|
||||
regs32 = save_v86_state(regs16);
|
||||
regs32->ax = retval;
|
||||
__asm__ __volatile__("movl %0,%%esp\n\t"
|
||||
"movl %1,%%ebp\n\t"
|
||||
"jmp resume_userspace"
|
||||
: : "r" (regs32), "r" (current_thread_info()));
|
||||
memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
|
||||
force_iret();
|
||||
return regs->ax;
|
||||
}
|
||||
|
||||
static inline void set_IF(struct kernel_vm86_regs *regs)
|
||||
{
|
||||
VEFLAGS |= X86_EFLAGS_VIF;
|
||||
if (VEFLAGS & X86_EFLAGS_VIP)
|
||||
return_to_32bit(regs, VM86_STI);
|
||||
}
|
||||
|
||||
static inline void clear_IF(struct kernel_vm86_regs *regs)
|
||||
|
@ -395,7 +377,7 @@ static inline void clear_AC(struct kernel_vm86_regs *regs)
|
|||
|
||||
static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs)
|
||||
{
|
||||
set_flags(VEFLAGS, flags, current->thread.v86mask);
|
||||
set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask);
|
||||
set_flags(regs->pt.flags, flags, SAFE_MASK);
|
||||
if (flags & X86_EFLAGS_IF)
|
||||
set_IF(regs);
|
||||
|
@ -405,7 +387,7 @@ static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs
|
|||
|
||||
static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs)
|
||||
{
|
||||
set_flags(VFLAGS, flags, current->thread.v86mask);
|
||||
set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask);
|
||||
set_flags(regs->pt.flags, flags, SAFE_MASK);
|
||||
if (flags & X86_EFLAGS_IF)
|
||||
set_IF(regs);
|
||||
|
@ -420,7 +402,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
|
|||
if (VEFLAGS & X86_EFLAGS_VIF)
|
||||
flags |= X86_EFLAGS_IF;
|
||||
flags |= X86_EFLAGS_IOPL;
|
||||
return flags | (VEFLAGS & current->thread.v86mask);
|
||||
return flags | (VEFLAGS & current->thread.vm86->veflags_mask);
|
||||
}
|
||||
|
||||
static inline int is_revectored(int nr, struct revectored_struct *bitmap)
|
||||
|
@ -518,12 +500,13 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
|
|||
{
|
||||
unsigned long __user *intr_ptr;
|
||||
unsigned long segoffs;
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
|
||||
if (regs->pt.cs == BIOSSEG)
|
||||
goto cannot_handle;
|
||||
if (is_revectored(i, &KVM86->int_revectored))
|
||||
if (is_revectored(i, &vm86->int_revectored))
|
||||
goto cannot_handle;
|
||||
if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored))
|
||||
if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored))
|
||||
goto cannot_handle;
|
||||
intr_ptr = (unsigned long __user *) (i << 2);
|
||||
if (get_user(segoffs, intr_ptr))
|
||||
|
@ -542,18 +525,16 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
|
|||
return;
|
||||
|
||||
cannot_handle:
|
||||
return_to_32bit(regs, VM86_INTx + (i << 8));
|
||||
save_v86_state(regs, VM86_INTx + (i << 8));
|
||||
}
|
||||
|
||||
int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
|
||||
{
|
||||
if (VMPI.is_vm86pus) {
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
|
||||
if (vm86->vm86plus.is_vm86pus) {
|
||||
if ((trapno == 3) || (trapno == 1)) {
|
||||
KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
|
||||
/* setting this flag forces the code in entry_32.S to
|
||||
the path where we call save_v86_state() and change
|
||||
the stack pointer to KVM86->regs32 */
|
||||
set_thread_flag(TIF_NOTIFY_RESUME);
|
||||
save_v86_state(regs, VM86_TRAP + (trapno << 8));
|
||||
return 0;
|
||||
}
|
||||
do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
|
||||
|
@ -574,16 +555,11 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
|||
unsigned char __user *ssp;
|
||||
unsigned short ip, sp, orig_flags;
|
||||
int data32, pref_done;
|
||||
struct vm86plus_info_struct *vmpi = ¤t->thread.vm86->vm86plus;
|
||||
|
||||
#define CHECK_IF_IN_TRAP \
|
||||
if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \
|
||||
if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \
|
||||
newflags |= X86_EFLAGS_TF
|
||||
#define VM86_FAULT_RETURN do { \
|
||||
if (VMPI.force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \
|
||||
return_to_32bit(regs, VM86_PICRETURN); \
|
||||
if (orig_flags & X86_EFLAGS_TF) \
|
||||
handle_vm86_trap(regs, 0, 1); \
|
||||
return; } while (0)
|
||||
|
||||
orig_flags = *(unsigned short *)®s->pt.flags;
|
||||
|
||||
|
@ -622,7 +598,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
|||
SP(regs) -= 2;
|
||||
}
|
||||
IP(regs) = ip;
|
||||
VM86_FAULT_RETURN;
|
||||
goto vm86_fault_return;
|
||||
|
||||
/* popf */
|
||||
case 0x9d:
|
||||
|
@ -642,16 +618,18 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
|||
else
|
||||
set_vflags_short(newflags, regs);
|
||||
|
||||
VM86_FAULT_RETURN;
|
||||
goto check_vip;
|
||||
}
|
||||
|
||||
/* int xx */
|
||||
case 0xcd: {
|
||||
int intno = popb(csp, ip, simulate_sigsegv);
|
||||
IP(regs) = ip;
|
||||
if (VMPI.vm86dbg_active) {
|
||||
if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3])
|
||||
return_to_32bit(regs, VM86_INTx + (intno << 8));
|
||||
if (vmpi->vm86dbg_active) {
|
||||
if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) {
|
||||
save_v86_state(regs, VM86_INTx + (intno << 8));
|
||||
return;
|
||||
}
|
||||
}
|
||||
do_int(regs, intno, ssp, sp);
|
||||
return;
|
||||
|
@ -682,14 +660,14 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
|||
} else {
|
||||
set_vflags_short(newflags, regs);
|
||||
}
|
||||
VM86_FAULT_RETURN;
|
||||
goto check_vip;
|
||||
}
|
||||
|
||||
/* cli */
|
||||
case 0xfa:
|
||||
IP(regs) = ip;
|
||||
clear_IF(regs);
|
||||
VM86_FAULT_RETURN;
|
||||
goto vm86_fault_return;
|
||||
|
||||
/* sti */
|
||||
/*
|
||||
|
@ -701,14 +679,29 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
|||
case 0xfb:
|
||||
IP(regs) = ip;
|
||||
set_IF(regs);
|
||||
VM86_FAULT_RETURN;
|
||||
goto check_vip;
|
||||
|
||||
default:
|
||||
return_to_32bit(regs, VM86_UNKNOWN);
|
||||
save_v86_state(regs, VM86_UNKNOWN);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
check_vip:
|
||||
if (VEFLAGS & X86_EFLAGS_VIP) {
|
||||
save_v86_state(regs, VM86_STI);
|
||||
return;
|
||||
}
|
||||
|
||||
vm86_fault_return:
|
||||
if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) {
|
||||
save_v86_state(regs, VM86_PICRETURN);
|
||||
return;
|
||||
}
|
||||
if (orig_flags & X86_EFLAGS_TF)
|
||||
handle_vm86_trap(regs, 0, X86_TRAP_DB);
|
||||
return;
|
||||
|
||||
simulate_sigsegv:
|
||||
/* FIXME: After a long discussion with Stas we finally
|
||||
* agreed, that this is wrong. Here we should
|
||||
|
@ -720,7 +713,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
|||
* should be a mixture of the two, but how do we
|
||||
* get the information? [KD]
|
||||
*/
|
||||
return_to_32bit(regs, VM86_UNKNOWN);
|
||||
save_v86_state(regs, VM86_UNKNOWN);
|
||||
}
|
||||
|
||||
/* ---------------- vm86 special IRQ passing stuff ----------------- */
|
||||
|
|
|
@ -1172,7 +1172,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
|
|||
|
||||
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
|
||||
apic->lapic_timer.expired_tscdeadline = 0;
|
||||
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
|
||||
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc());
|
||||
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
|
||||
|
||||
/* __delay is delay_tsc whenever the hardware has TSC, thus always. */
|
||||
|
@ -1240,7 +1240,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
|||
local_irq_save(flags);
|
||||
|
||||
now = apic->lapic_timer.timer.base->get_time();
|
||||
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
|
||||
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc());
|
||||
if (likely(tscdeadline > guest_tsc)) {
|
||||
ns = (tscdeadline - guest_tsc) * 1000000ULL;
|
||||
do_div(ns, this_tsc_khz);
|
||||
|
|
|
@ -1139,7 +1139,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
|
|||
{
|
||||
u64 tsc;
|
||||
|
||||
tsc = svm_scale_tsc(vcpu, native_read_tsc());
|
||||
tsc = svm_scale_tsc(vcpu, rdtsc());
|
||||
|
||||
return target_tsc - tsc;
|
||||
}
|
||||
|
@ -3174,7 +3174,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
switch (msr_info->index) {
|
||||
case MSR_IA32_TSC: {
|
||||
msr_info->data = svm->vmcb->control.tsc_offset +
|
||||
svm_scale_tsc(vcpu, native_read_tsc());
|
||||
svm_scale_tsc(vcpu, rdtsc());
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -2236,7 +2236,7 @@ static u64 guest_read_tsc(void)
|
|||
{
|
||||
u64 host_tsc, tsc_offset;
|
||||
|
||||
rdtscll(host_tsc);
|
||||
host_tsc = rdtsc();
|
||||
tsc_offset = vmcs_read64(TSC_OFFSET);
|
||||
return host_tsc + tsc_offset;
|
||||
}
|
||||
|
@ -2317,7 +2317,7 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
|
|||
|
||||
static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
|
||||
{
|
||||
return target_tsc - native_read_tsc();
|
||||
return target_tsc - rdtsc();
|
||||
}
|
||||
|
||||
static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -1441,20 +1441,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
|
|||
|
||||
static cycle_t read_tsc(void)
|
||||
{
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
|
||||
/*
|
||||
* Empirically, a fence (of type that depends on the CPU)
|
||||
* before rdtsc is enough to ensure that rdtsc is ordered
|
||||
* with respect to loads. The various CPU manuals are unclear
|
||||
* as to whether rdtsc can be reordered with later loads,
|
||||
* but no one has ever seen it happen.
|
||||
*/
|
||||
rdtsc_barrier();
|
||||
ret = (cycle_t)vget_cycles();
|
||||
|
||||
last = pvclock_gtod_data.clock.cycle_last;
|
||||
cycle_t ret = (cycle_t)rdtsc_ordered();
|
||||
u64 last = pvclock_gtod_data.clock.cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
@ -1643,7 +1631,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
|||
return 1;
|
||||
}
|
||||
if (!use_master_clock) {
|
||||
host_tsc = native_read_tsc();
|
||||
host_tsc = rdtsc();
|
||||
kernel_ns = get_kernel_ns();
|
||||
}
|
||||
|
||||
|
@ -2620,7 +2608,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
|
||||
if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
|
||||
s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
|
||||
native_read_tsc() - vcpu->arch.last_host_tsc;
|
||||
rdtsc() - vcpu->arch.last_host_tsc;
|
||||
if (tsc_delta < 0)
|
||||
mark_tsc_unstable("KVM discovered backwards TSC");
|
||||
if (check_tsc_unstable()) {
|
||||
|
@ -2648,7 +2636,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
kvm_x86_ops->vcpu_put(vcpu);
|
||||
kvm_put_guest_fpu(vcpu);
|
||||
vcpu->arch.last_host_tsc = native_read_tsc();
|
||||
vcpu->arch.last_host_tsc = rdtsc();
|
||||
}
|
||||
|
||||
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
|
||||
|
@ -6387,7 +6375,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
hw_breakpoint_restore();
|
||||
|
||||
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
|
||||
native_read_tsc());
|
||||
rdtsc());
|
||||
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
smp_wmb();
|
||||
|
@ -7196,7 +7184,7 @@ int kvm_arch_hardware_enable(void)
|
|||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
local_tsc = native_read_tsc();
|
||||
local_tsc = rdtsc();
|
||||
stable = !check_tsc_unstable();
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <asm/processor.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/timer.h>
|
||||
#include <asm/mwait.h>
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
# include <asm/smp.h>
|
||||
|
@ -49,16 +50,14 @@ static void delay_loop(unsigned long loops)
|
|||
/* TSC based delay: */
|
||||
static void delay_tsc(unsigned long __loops)
|
||||
{
|
||||
u32 bclock, now, loops = __loops;
|
||||
u64 bclock, now, loops = __loops;
|
||||
int cpu;
|
||||
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
rdtsc_barrier();
|
||||
rdtscl(bclock);
|
||||
bclock = rdtsc_ordered();
|
||||
for (;;) {
|
||||
rdtsc_barrier();
|
||||
rdtscl(now);
|
||||
now = rdtsc_ordered();
|
||||
if ((now - bclock) >= loops)
|
||||
break;
|
||||
|
||||
|
@ -79,13 +78,50 @@ static void delay_tsc(unsigned long __loops)
|
|||
if (unlikely(cpu != smp_processor_id())) {
|
||||
loops -= (now - bclock);
|
||||
cpu = smp_processor_id();
|
||||
rdtsc_barrier();
|
||||
rdtscl(bclock);
|
||||
bclock = rdtsc_ordered();
|
||||
}
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that
|
||||
* counts with TSC frequency. The input value is the loop of the
|
||||
* counter, it will exit when the timer expires.
|
||||
*/
|
||||
static void delay_mwaitx(unsigned long __loops)
|
||||
{
|
||||
u64 start, end, delay, loops = __loops;
|
||||
|
||||
start = rdtsc_ordered();
|
||||
|
||||
for (;;) {
|
||||
delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
|
||||
|
||||
/*
|
||||
* Use cpu_tss as a cacheline-aligned, seldomly
|
||||
* accessed per-cpu variable as the monitor target.
|
||||
*/
|
||||
__monitorx(this_cpu_ptr(&cpu_tss), 0, 0);
|
||||
|
||||
/*
|
||||
* AMD, like Intel, supports the EAX hint and EAX=0xf
|
||||
* means, do not enter any deep C-state and we use it
|
||||
* here in delay() to minimize wakeup latency.
|
||||
*/
|
||||
__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
|
||||
|
||||
end = rdtsc_ordered();
|
||||
|
||||
if (loops <= end - start)
|
||||
break;
|
||||
|
||||
loops -= end - start;
|
||||
|
||||
start = end;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Since we calibrate only once at boot, this
|
||||
* function should be set once at boot and not changed
|
||||
|
@ -94,13 +130,19 @@ static void (*delay_fn)(unsigned long) = delay_loop;
|
|||
|
||||
void use_tsc_delay(void)
|
||||
{
|
||||
delay_fn = delay_tsc;
|
||||
if (delay_fn == delay_loop)
|
||||
delay_fn = delay_tsc;
|
||||
}
|
||||
|
||||
void use_mwaitx_delay(void)
|
||||
{
|
||||
delay_fn = delay_mwaitx;
|
||||
}
|
||||
|
||||
int read_current_timer(unsigned long *timer_val)
|
||||
{
|
||||
if (delay_fn == delay_tsc) {
|
||||
rdtscll(*timer_val);
|
||||
*timer_val = rdtsc();
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <linux/stddef.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
#include "fpu_system.h"
|
||||
#include "exception.h"
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
|
||||
#include <asm/fixmap.h> /* VSYSCALL_ADDR */
|
||||
#include <asm/vsyscall.h> /* emulate_vsyscall */
|
||||
#include <asm/vm86.h> /* struct vm86 */
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <asm/trace/exceptions.h>
|
||||
|
@ -301,14 +302,16 @@ static inline void
|
|||
check_v8086_mode(struct pt_regs *regs, unsigned long address,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
#ifdef CONFIG_VM86
|
||||
unsigned long bit;
|
||||
|
||||
if (!v8086_mode(regs))
|
||||
if (!v8086_mode(regs) || !tsk->thread.vm86)
|
||||
return;
|
||||
|
||||
bit = (address - 0xA0000) >> PAGE_SHIFT;
|
||||
if (bit < 32)
|
||||
tsk->thread.screen_bitmap |= 1 << bit;
|
||||
tsk->thread.vm86->screen_bitmap |= 1 << bit;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool low_pfn(unsigned long pfn)
|
||||
|
|
|
@ -45,17 +45,4 @@
|
|||
#define read_barrier_depends() do { } while (0)
|
||||
#define smp_read_barrier_depends() do { } while (0)
|
||||
|
||||
/*
|
||||
* Stop RDTSC speculation. This is needed when you need to use RDTSC
|
||||
* (or get_cycles or vread that possibly accesses the TSC) in a defined
|
||||
* code region.
|
||||
*
|
||||
* (Could use an alternative three way for this if there was one.)
|
||||
*/
|
||||
static inline void rdtsc_barrier(void)
|
||||
{
|
||||
alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
|
||||
"lfence", X86_FEATURE_LFENCE_RDTSC);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1215,11 +1215,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
|
|||
.read_msr = xen_read_msr_safe,
|
||||
.write_msr = xen_write_msr_safe,
|
||||
|
||||
.read_tsc = native_read_tsc,
|
||||
.read_pmc = native_read_pmc,
|
||||
|
||||
.read_tscp = native_read_tscp,
|
||||
|
||||
.iret = xen_iret,
|
||||
#ifdef CONFIG_X86_64
|
||||
.usergs_sysret32 = xen_sysret32,
|
||||
|
|
|
@ -766,7 +766,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
|
|||
local_irq_save(flags);
|
||||
rdmsrl(MSR_IA32_APERF, aperf);
|
||||
rdmsrl(MSR_IA32_MPERF, mperf);
|
||||
tsc = native_read_tsc();
|
||||
tsc = rdtsc();
|
||||
local_irq_restore(flags);
|
||||
|
||||
cpu->last_sample_time = cpu->sample.time;
|
||||
|
|
|
@ -149,9 +149,9 @@ static int old_gameport_measure_speed(struct gameport *gameport)
|
|||
|
||||
for(i = 0; i < 50; i++) {
|
||||
local_irq_save(flags);
|
||||
rdtscl(t1);
|
||||
t1 = rdtsc();
|
||||
for (t = 0; t < 50; t++) gameport_read(gameport);
|
||||
rdtscl(t2);
|
||||
t2 = rdtsc();
|
||||
local_irq_restore(flags);
|
||||
udelay(i * 10);
|
||||
if (t2 - t1 < tx) tx = t2 - t1;
|
||||
|
|
|
@ -143,7 +143,7 @@ struct analog_port {
|
|||
|
||||
#include <linux/i8253.h>
|
||||
|
||||
#define GET_TIME(x) do { if (cpu_has_tsc) rdtscl(x); else x = get_time_pit(); } while (0)
|
||||
#define GET_TIME(x) do { if (cpu_has_tsc) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0)
|
||||
#define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
|
||||
#define TIME_NAME (cpu_has_tsc?"TSC":"PIT")
|
||||
static unsigned int get_time_pit(void)
|
||||
|
@ -160,7 +160,7 @@ static unsigned int get_time_pit(void)
|
|||
return count;
|
||||
}
|
||||
#elif defined(__x86_64__)
|
||||
#define GET_TIME(x) rdtscl(x)
|
||||
#define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0)
|
||||
#define DELTA(x,y) ((y)-(x))
|
||||
#define TIME_NAME "TSC"
|
||||
#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE)
|
||||
|
|
|
@ -638,7 +638,7 @@ static int receive(struct net_device *dev, int cnt)
|
|||
#define GETTICK(x) \
|
||||
({ \
|
||||
if (cpu_has_tsc) \
|
||||
rdtscl(x); \
|
||||
x = (unsigned int)rdtsc(); \
|
||||
})
|
||||
#else /* __i386__ */
|
||||
#define GETTICK(x)
|
||||
|
|
|
@ -1924,6 +1924,9 @@ static void adpt_alpha_info(sysInfo_S* si)
|
|||
#endif
|
||||
|
||||
#if defined __i386__
|
||||
|
||||
#include <uapi/asm/vm86.h>
|
||||
|
||||
static void adpt_i386_info(sysInfo_S* si)
|
||||
{
|
||||
// This is all the info we need for now
|
||||
|
|
|
@ -327,9 +327,6 @@ static void safe_udelay(unsigned long usecs)
|
|||
* time
|
||||
*/
|
||||
|
||||
/* So send_pulse can quickly convert microseconds to clocks */
|
||||
static unsigned long conv_us_to_clocks;
|
||||
|
||||
static int init_timing_params(unsigned int new_duty_cycle,
|
||||
unsigned int new_freq)
|
||||
{
|
||||
|
@ -344,7 +341,6 @@ static int init_timing_params(unsigned int new_duty_cycle,
|
|||
/* How many clocks in a microsecond?, avoiding long long divide */
|
||||
work = loops_per_sec;
|
||||
work *= 4295; /* 4295 = 2^32 / 1e6 */
|
||||
conv_us_to_clocks = work >> 32;
|
||||
|
||||
/*
|
||||
* Carrier period in clocks, approach good up to 32GHz clock,
|
||||
|
@ -357,10 +353,9 @@ static int init_timing_params(unsigned int new_duty_cycle,
|
|||
pulse_width = period * duty_cycle / 100;
|
||||
space_width = period - pulse_width;
|
||||
dprintk("in init_timing_params, freq=%d, duty_cycle=%d, "
|
||||
"clk/jiffy=%ld, pulse=%ld, space=%ld, "
|
||||
"conv_us_to_clocks=%ld\n",
|
||||
"clk/jiffy=%ld, pulse=%ld, space=%ld\n",
|
||||
freq, duty_cycle, __this_cpu_read(cpu_info.loops_per_jiffy),
|
||||
pulse_width, space_width, conv_us_to_clocks);
|
||||
pulse_width, space_width);
|
||||
return 0;
|
||||
}
|
||||
#else /* ! USE_RDTSC */
|
||||
|
@ -431,63 +426,14 @@ static long send_pulse_irdeo(unsigned long length)
|
|||
return ret;
|
||||
}
|
||||
|
||||
#ifdef USE_RDTSC
|
||||
/* Version that uses Pentium rdtsc instruction to measure clocks */
|
||||
|
||||
/*
|
||||
* This version does sub-microsecond timing using rdtsc instruction,
|
||||
* and does away with the fudged LIRC_SERIAL_TRANSMITTER_LATENCY
|
||||
* Implicitly i586 architecture... - Steve
|
||||
*/
|
||||
|
||||
static long send_pulse_homebrew_softcarrier(unsigned long length)
|
||||
{
|
||||
int flag;
|
||||
unsigned long target, start, now;
|
||||
|
||||
/* Get going quick as we can */
|
||||
rdtscl(start);
|
||||
on();
|
||||
/* Convert length from microseconds to clocks */
|
||||
length *= conv_us_to_clocks;
|
||||
/* And loop till time is up - flipping at right intervals */
|
||||
now = start;
|
||||
target = pulse_width;
|
||||
flag = 1;
|
||||
/*
|
||||
* FIXME: This looks like a hard busy wait, without even an occasional,
|
||||
* polite, cpu_relax() call. There's got to be a better way?
|
||||
*
|
||||
* The i2c code has the result of a lot of bit-banging work, I wonder if
|
||||
* there's something there which could be helpful here.
|
||||
*/
|
||||
while ((now - start) < length) {
|
||||
/* Delay till flip time */
|
||||
do {
|
||||
rdtscl(now);
|
||||
} while ((now - start) < target);
|
||||
|
||||
/* flip */
|
||||
if (flag) {
|
||||
rdtscl(now);
|
||||
off();
|
||||
target += space_width;
|
||||
} else {
|
||||
rdtscl(now); on();
|
||||
target += pulse_width;
|
||||
}
|
||||
flag = !flag;
|
||||
}
|
||||
rdtscl(now);
|
||||
return ((now - start) - length) / conv_us_to_clocks;
|
||||
}
|
||||
#else /* ! USE_RDTSC */
|
||||
/* Version using udelay() */
|
||||
|
||||
/*
|
||||
* here we use fixed point arithmetic, with 8
|
||||
* fractional bits. that gets us within 0.1% or so of the right average
|
||||
* frequency, albeit with some jitter in pulse length - Steve
|
||||
*
|
||||
* This should use ndelay instead.
|
||||
*/
|
||||
|
||||
/* To match 8 fractional bits used for pulse/space length */
|
||||
|
@ -520,7 +466,6 @@ static long send_pulse_homebrew_softcarrier(unsigned long length)
|
|||
}
|
||||
return (actual-length) >> 8;
|
||||
}
|
||||
#endif /* USE_RDTSC */
|
||||
|
||||
static long send_pulse_homebrew(unsigned long length)
|
||||
{
|
||||
|
|
|
@ -340,7 +340,7 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio,
|
|||
|
||||
/* check result for the last window */
|
||||
msr_now = pkg_state_counter();
|
||||
rdtscll(tsc_now);
|
||||
tsc_now = rdtsc();
|
||||
|
||||
/* calculate pkg cstate vs tsc ratio */
|
||||
if (!msr_last || !tsc_last)
|
||||
|
@ -482,7 +482,7 @@ static void poll_pkg_cstate(struct work_struct *dummy)
|
|||
u64 val64;
|
||||
|
||||
msr_now = pkg_state_counter();
|
||||
rdtscll(tsc_now);
|
||||
tsc_now = rdtsc();
|
||||
jiffies_now = jiffies;
|
||||
|
||||
/* calculate pkg cstate vs tsc ratio */
|
||||
|
|
|
@ -49,13 +49,28 @@ static inline void exception_exit(enum ctx_state prev_ctx)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* ct_state() - return the current context tracking state if known
|
||||
*
|
||||
* Returns the current cpu's context tracking state if context tracking
|
||||
* is enabled. If context tracking is disabled, returns
|
||||
* CONTEXT_DISABLED. This should be used primarily for debugging.
|
||||
*/
|
||||
static inline enum ctx_state ct_state(void)
|
||||
{
|
||||
return context_tracking_is_enabled() ?
|
||||
this_cpu_read(context_tracking.state) : CONTEXT_DISABLED;
|
||||
}
|
||||
#else
|
||||
static inline void user_enter(void) { }
|
||||
static inline void user_exit(void) { }
|
||||
static inline enum ctx_state exception_enter(void) { return 0; }
|
||||
static inline void exception_exit(enum ctx_state prev_ctx) { }
|
||||
static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
|
||||
#endif /* !CONFIG_CONTEXT_TRACKING */
|
||||
|
||||
#define CT_WARN_ON(cond) WARN_ON(context_tracking_is_enabled() && (cond))
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
|
||||
extern void context_tracking_init(void);
|
||||
|
|
|
@ -14,6 +14,7 @@ struct context_tracking {
|
|||
bool active;
|
||||
int recursion;
|
||||
enum ctx_state {
|
||||
CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */
|
||||
CONTEXT_KERNEL = 0,
|
||||
CONTEXT_USER,
|
||||
CONTEXT_GUEST,
|
||||
|
|
|
@ -286,7 +286,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
|
|||
* Map the spin_lock functions to the raw variants for PREEMPT_RT=n
|
||||
*/
|
||||
|
||||
static inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
|
||||
static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
|
||||
{
|
||||
return &lock->rlock;
|
||||
}
|
||||
|
@ -297,17 +297,17 @@ do { \
|
|||
raw_spin_lock_init(&(_lock)->rlock); \
|
||||
} while (0)
|
||||
|
||||
static inline void spin_lock(spinlock_t *lock)
|
||||
static __always_inline void spin_lock(spinlock_t *lock)
|
||||
{
|
||||
raw_spin_lock(&lock->rlock);
|
||||
}
|
||||
|
||||
static inline void spin_lock_bh(spinlock_t *lock)
|
||||
static __always_inline void spin_lock_bh(spinlock_t *lock)
|
||||
{
|
||||
raw_spin_lock_bh(&lock->rlock);
|
||||
}
|
||||
|
||||
static inline int spin_trylock(spinlock_t *lock)
|
||||
static __always_inline int spin_trylock(spinlock_t *lock)
|
||||
{
|
||||
return raw_spin_trylock(&lock->rlock);
|
||||
}
|
||||
|
@ -327,7 +327,7 @@ do { \
|
|||
raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \
|
||||
} while (0)
|
||||
|
||||
static inline void spin_lock_irq(spinlock_t *lock)
|
||||
static __always_inline void spin_lock_irq(spinlock_t *lock)
|
||||
{
|
||||
raw_spin_lock_irq(&lock->rlock);
|
||||
}
|
||||
|
@ -342,32 +342,32 @@ do { \
|
|||
raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \
|
||||
} while (0)
|
||||
|
||||
static inline void spin_unlock(spinlock_t *lock)
|
||||
static __always_inline void spin_unlock(spinlock_t *lock)
|
||||
{
|
||||
raw_spin_unlock(&lock->rlock);
|
||||
}
|
||||
|
||||
static inline void spin_unlock_bh(spinlock_t *lock)
|
||||
static __always_inline void spin_unlock_bh(spinlock_t *lock)
|
||||
{
|
||||
raw_spin_unlock_bh(&lock->rlock);
|
||||
}
|
||||
|
||||
static inline void spin_unlock_irq(spinlock_t *lock)
|
||||
static __always_inline void spin_unlock_irq(spinlock_t *lock)
|
||||
{
|
||||
raw_spin_unlock_irq(&lock->rlock);
|
||||
}
|
||||
|
||||
static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
|
||||
static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
|
||||
{
|
||||
raw_spin_unlock_irqrestore(&lock->rlock, flags);
|
||||
}
|
||||
|
||||
static inline int spin_trylock_bh(spinlock_t *lock)
|
||||
static __always_inline int spin_trylock_bh(spinlock_t *lock)
|
||||
{
|
||||
return raw_spin_trylock_bh(&lock->rlock);
|
||||
}
|
||||
|
||||
static inline int spin_trylock_irq(spinlock_t *lock)
|
||||
static __always_inline int spin_trylock_irq(spinlock_t *lock)
|
||||
{
|
||||
return raw_spin_trylock_irq(&lock->rlock);
|
||||
}
|
||||
|
@ -377,22 +377,22 @@ static inline int spin_trylock_irq(spinlock_t *lock)
|
|||
raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
|
||||
})
|
||||
|
||||
static inline void spin_unlock_wait(spinlock_t *lock)
|
||||
static __always_inline void spin_unlock_wait(spinlock_t *lock)
|
||||
{
|
||||
raw_spin_unlock_wait(&lock->rlock);
|
||||
}
|
||||
|
||||
static inline int spin_is_locked(spinlock_t *lock)
|
||||
static __always_inline int spin_is_locked(spinlock_t *lock)
|
||||
{
|
||||
return raw_spin_is_locked(&lock->rlock);
|
||||
}
|
||||
|
||||
static inline int spin_is_contended(spinlock_t *lock)
|
||||
static __always_inline int spin_is_contended(spinlock_t *lock)
|
||||
{
|
||||
return raw_spin_is_contended(&lock->rlock);
|
||||
}
|
||||
|
||||
static inline int spin_can_lock(spinlock_t *lock)
|
||||
static __always_inline int spin_can_lock(spinlock_t *lock)
|
||||
{
|
||||
return raw_spin_can_lock(&lock->rlock);
|
||||
}
|
||||
|
|
|
@ -544,6 +544,8 @@ int notrace notify_die(enum die_val val, const char *str,
|
|||
.signr = sig,
|
||||
|
||||
};
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(),
|
||||
"notify_die called but RCU thinks we're quiescent");
|
||||
return atomic_notifier_call_chain(&die_chain, val, &args);
|
||||
}
|
||||
NOKPROBE_SYMBOL(notify_die);
|
||||
|
|
|
@ -140,6 +140,7 @@ cond_syscall(sys_sgetmask);
|
|||
cond_syscall(sys_ssetmask);
|
||||
cond_syscall(sys_vm86old);
|
||||
cond_syscall(sys_vm86);
|
||||
cond_syscall(sys_modify_ldt);
|
||||
cond_syscall(sys_ipc);
|
||||
cond_syscall(compat_sys_ipc);
|
||||
cond_syscall(compat_sys_sysctl);
|
||||
|
|
|
@ -81,11 +81,11 @@ static int __init cpufreq_test_tsc(void)
|
|||
|
||||
printk(KERN_DEBUG "start--> \n");
|
||||
then = read_pmtmr();
|
||||
rdtscll(then_tsc);
|
||||
then_tsc = rdtsc();
|
||||
for (i=0;i<20;i++) {
|
||||
mdelay(100);
|
||||
now = read_pmtmr();
|
||||
rdtscll(now_tsc);
|
||||
now_tsc = rdtsc();
|
||||
diff = (now - then) & 0xFFFFFF;
|
||||
diff_tsc = now_tsc - then_tsc;
|
||||
printk(KERN_DEBUG "t1: %08u t2: %08u diff_pmtmr: %08u diff_tsc: %016llu\n", then, now, diff, diff_tsc);
|
||||
|
|
|
@ -4,8 +4,8 @@ include ../lib.mk
|
|||
|
||||
.PHONY: all all_32 all_64 warn_32bit_failure clean
|
||||
|
||||
TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs
|
||||
TARGETS_C_32BIT_ONLY := entry_from_vm86
|
||||
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt
|
||||
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn
|
||||
|
||||
TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
|
||||
BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
|
||||
|
|
|
@ -28,6 +28,55 @@
|
|||
static unsigned long load_addr = 0x10000;
|
||||
static int nerrs = 0;
|
||||
|
||||
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
|
||||
int flags)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_sigaction = handler;
|
||||
sa.sa_flags = SA_SIGINFO | flags;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
if (sigaction(sig, &sa, 0))
|
||||
err(1, "sigaction");
|
||||
}
|
||||
|
||||
static void clearhandler(int sig)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_handler = SIG_DFL;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
if (sigaction(sig, &sa, 0))
|
||||
err(1, "sigaction");
|
||||
}
|
||||
|
||||
static sig_atomic_t got_signal;
|
||||
|
||||
static void sighandler(int sig, siginfo_t *info, void *ctx_void)
|
||||
{
|
||||
ucontext_t *ctx = (ucontext_t*)ctx_void;
|
||||
|
||||
if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
|
||||
(ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
|
||||
printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
|
||||
nerrs++;
|
||||
}
|
||||
|
||||
const char *signame;
|
||||
if (sig == SIGSEGV)
|
||||
signame = "SIGSEGV";
|
||||
else if (sig == SIGILL)
|
||||
signame = "SIGILL";
|
||||
else
|
||||
signame = "unexpected signal";
|
||||
|
||||
printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
|
||||
(unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
|
||||
(unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
|
||||
|
||||
got_signal = 1;
|
||||
}
|
||||
|
||||
asm (
|
||||
".pushsection .rodata\n\t"
|
||||
".type vmcode_bound, @object\n\t"
|
||||
|
@ -38,6 +87,14 @@ asm (
|
|||
"int3\n\t"
|
||||
"vmcode_sysenter:\n\t"
|
||||
"sysenter\n\t"
|
||||
"vmcode_syscall:\n\t"
|
||||
"syscall\n\t"
|
||||
"vmcode_sti:\n\t"
|
||||
"sti\n\t"
|
||||
"vmcode_int3:\n\t"
|
||||
"int3\n\t"
|
||||
"vmcode_int80:\n\t"
|
||||
"int $0x80\n\t"
|
||||
".size vmcode, . - vmcode\n\t"
|
||||
"end_vmcode:\n\t"
|
||||
".code32\n\t"
|
||||
|
@ -45,9 +102,12 @@ asm (
|
|||
);
|
||||
|
||||
extern unsigned char vmcode[], end_vmcode[];
|
||||
extern unsigned char vmcode_bound[], vmcode_sysenter[];
|
||||
extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
|
||||
vmcode_sti[], vmcode_int3[], vmcode_int80[];
|
||||
|
||||
static void do_test(struct vm86plus_struct *v86, unsigned long eip,
|
||||
/* Returns false if the test was skipped. */
|
||||
static bool do_test(struct vm86plus_struct *v86, unsigned long eip,
|
||||
unsigned int rettype, unsigned int retarg,
|
||||
const char *text)
|
||||
{
|
||||
long ret;
|
||||
|
@ -58,7 +118,7 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip,
|
|||
|
||||
if (ret == -1 && errno == ENOSYS) {
|
||||
printf("[SKIP]\tvm86 not supported\n");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (VM86_TYPE(ret) == VM86_INTx) {
|
||||
|
@ -73,13 +133,30 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip,
|
|||
else
|
||||
sprintf(trapname, "%d", trapno);
|
||||
|
||||
printf("[OK]\tExited vm86 mode due to #%s\n", trapname);
|
||||
printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
|
||||
} else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
|
||||
printf("[OK]\tExited vm86 mode due to unhandled GP fault\n");
|
||||
printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
|
||||
} else if (VM86_TYPE(ret) == VM86_TRAP) {
|
||||
printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
|
||||
VM86_ARG(ret));
|
||||
} else if (VM86_TYPE(ret) == VM86_SIGNAL) {
|
||||
printf("[INFO]\tExited vm86 mode due to a signal\n");
|
||||
} else if (VM86_TYPE(ret) == VM86_STI) {
|
||||
printf("[INFO]\tExited vm86 mode due to STI\n");
|
||||
} else {
|
||||
printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n",
|
||||
printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
|
||||
VM86_TYPE(ret), VM86_ARG(ret));
|
||||
}
|
||||
|
||||
if (rettype == -1 ||
|
||||
(VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
|
||||
printf("[OK]\tReturned correctly\n");
|
||||
} else {
|
||||
printf("[FAIL]\tIncorrect return reason\n");
|
||||
nerrs++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
|
@ -105,10 +182,52 @@ int main(void)
|
|||
assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */
|
||||
|
||||
/* #BR -- should deliver SIG??? */
|
||||
do_test(&v86, vmcode_bound - vmcode, "#BR");
|
||||
do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
|
||||
|
||||
/* SYSENTER -- should cause #GP or #UD depending on CPU */
|
||||
do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER");
|
||||
/*
|
||||
* SYSENTER -- should cause #GP or #UD depending on CPU.
|
||||
* Expected return type -1 means that we shouldn't validate
|
||||
* the vm86 return value. This will avoid problems on non-SEP
|
||||
* CPUs.
|
||||
*/
|
||||
sethandler(SIGILL, sighandler, 0);
|
||||
do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
|
||||
clearhandler(SIGILL);
|
||||
|
||||
/*
|
||||
* SYSCALL would be a disaster in VM86 mode. Fortunately,
|
||||
* there is no kernel that both enables SYSCALL and sets
|
||||
* EFER.SCE, so it's #UD on all systems. But vm86 is
|
||||
* buggy (or has a "feature"), so the SIGILL will actually
|
||||
* be delivered.
|
||||
*/
|
||||
sethandler(SIGILL, sighandler, 0);
|
||||
do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
|
||||
clearhandler(SIGILL);
|
||||
|
||||
/* STI with VIP set */
|
||||
v86.regs.eflags |= X86_EFLAGS_VIP;
|
||||
v86.regs.eflags &= ~X86_EFLAGS_IF;
|
||||
do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
|
||||
|
||||
/* INT3 -- should cause #BP */
|
||||
do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
|
||||
|
||||
/* INT80 -- should exit with "INTx 0x80" */
|
||||
v86.regs.eax = (unsigned int)-1;
|
||||
do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
|
||||
|
||||
/* Execute a null pointer */
|
||||
v86.regs.cs = 0;
|
||||
v86.regs.ss = 0;
|
||||
sethandler(SIGSEGV, sighandler, 0);
|
||||
got_signal = 0;
|
||||
if (do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer") &&
|
||||
!got_signal) {
|
||||
printf("[FAIL]\tDid not receive SIGSEGV\n");
|
||||
nerrs++;
|
||||
}
|
||||
clearhandler(SIGSEGV);
|
||||
|
||||
return (nerrs == 0 ? 0 : 1);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,576 @@
|
|||
/*
|
||||
* ldt_gdt.c - Test cases for LDT and GDT access
|
||||
* Copyright (c) 2015 Andrew Lutomirski
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <err.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <signal.h>
|
||||
#include <setjmp.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <asm/ldt.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <stdbool.h>
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include <linux/futex.h>
|
||||
|
||||
#define AR_ACCESSED (1<<8)
|
||||
|
||||
#define AR_TYPE_RODATA (0 * (1<<9))
|
||||
#define AR_TYPE_RWDATA (1 * (1<<9))
|
||||
#define AR_TYPE_RODATA_EXPDOWN (2 * (1<<9))
|
||||
#define AR_TYPE_RWDATA_EXPDOWN (3 * (1<<9))
|
||||
#define AR_TYPE_XOCODE (4 * (1<<9))
|
||||
#define AR_TYPE_XRCODE (5 * (1<<9))
|
||||
#define AR_TYPE_XOCODE_CONF (6 * (1<<9))
|
||||
#define AR_TYPE_XRCODE_CONF (7 * (1<<9))
|
||||
|
||||
#define AR_DPL3 (3 * (1<<13))
|
||||
|
||||
#define AR_S (1 << 12)
|
||||
#define AR_P (1 << 15)
|
||||
#define AR_AVL (1 << 20)
|
||||
#define AR_L (1 << 21)
|
||||
#define AR_DB (1 << 22)
|
||||
#define AR_G (1 << 23)
|
||||
|
||||
static int nerrs;
|
||||
|
||||
static void check_invalid_segment(uint16_t index, int ldt)
|
||||
{
|
||||
uint32_t has_limit = 0, has_ar = 0, limit, ar;
|
||||
uint32_t selector = (index << 3) | (ldt << 2) | 3;
|
||||
|
||||
asm ("lsl %[selector], %[limit]\n\t"
|
||||
"jnz 1f\n\t"
|
||||
"movl $1, %[has_limit]\n\t"
|
||||
"1:"
|
||||
: [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
|
||||
: [selector] "r" (selector));
|
||||
asm ("larl %[selector], %[ar]\n\t"
|
||||
"jnz 1f\n\t"
|
||||
"movl $1, %[has_ar]\n\t"
|
||||
"1:"
|
||||
: [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
|
||||
: [selector] "r" (selector));
|
||||
|
||||
if (has_limit || has_ar) {
|
||||
printf("[FAIL]\t%s entry %hu is valid but should be invalid\n",
|
||||
(ldt ? "LDT" : "GDT"), index);
|
||||
nerrs++;
|
||||
} else {
|
||||
printf("[OK]\t%s entry %hu is invalid\n",
|
||||
(ldt ? "LDT" : "GDT"), index);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_valid_segment(uint16_t index, int ldt,
|
||||
uint32_t expected_ar, uint32_t expected_limit,
|
||||
bool verbose)
|
||||
{
|
||||
uint32_t has_limit = 0, has_ar = 0, limit, ar;
|
||||
uint32_t selector = (index << 3) | (ldt << 2) | 3;
|
||||
|
||||
asm ("lsl %[selector], %[limit]\n\t"
|
||||
"jnz 1f\n\t"
|
||||
"movl $1, %[has_limit]\n\t"
|
||||
"1:"
|
||||
: [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
|
||||
: [selector] "r" (selector));
|
||||
asm ("larl %[selector], %[ar]\n\t"
|
||||
"jnz 1f\n\t"
|
||||
"movl $1, %[has_ar]\n\t"
|
||||
"1:"
|
||||
: [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
|
||||
: [selector] "r" (selector));
|
||||
|
||||
if (!has_limit || !has_ar) {
|
||||
printf("[FAIL]\t%s entry %hu is invalid but should be valid\n",
|
||||
(ldt ? "LDT" : "GDT"), index);
|
||||
nerrs++;
|
||||
return;
|
||||
}
|
||||
|
||||
if (ar != expected_ar) {
|
||||
printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
|
||||
(ldt ? "LDT" : "GDT"), index, ar, expected_ar);
|
||||
nerrs++;
|
||||
} else if (limit != expected_limit) {
|
||||
printf("[FAIL]\t%s entry %hu has limit 0x%08X but expected 0x%08X\n",
|
||||
(ldt ? "LDT" : "GDT"), index, limit, expected_limit);
|
||||
nerrs++;
|
||||
} else if (verbose) {
|
||||
printf("[OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n",
|
||||
(ldt ? "LDT" : "GDT"), index, ar, limit);
|
||||
}
|
||||
}
|
||||
|
||||
static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
|
||||
bool oldmode)
|
||||
{
|
||||
int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
|
||||
desc, sizeof(*desc));
|
||||
if (ret < -1)
|
||||
errno = -ret;
|
||||
if (ret == 0) {
|
||||
uint32_t limit = desc->limit;
|
||||
if (desc->limit_in_pages)
|
||||
limit = (limit << 12) + 4095;
|
||||
check_valid_segment(desc->entry_number, 1, ar, limit, true);
|
||||
return true;
|
||||
} else if (errno == ENOSYS) {
|
||||
printf("[OK]\tmodify_ldt returned -ENOSYS\n");
|
||||
return false;
|
||||
} else {
|
||||
if (desc->seg_32bit) {
|
||||
printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
|
||||
errno);
|
||||
nerrs++;
|
||||
return false;
|
||||
} else {
|
||||
printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool install_valid(const struct user_desc *desc, uint32_t ar)
|
||||
{
|
||||
return install_valid_mode(desc, ar, false);
|
||||
}
|
||||
|
||||
static void install_invalid(const struct user_desc *desc, bool oldmode)
|
||||
{
|
||||
int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
|
||||
desc, sizeof(*desc));
|
||||
if (ret < -1)
|
||||
errno = -ret;
|
||||
if (ret == 0) {
|
||||
check_invalid_segment(desc->entry_number, 1);
|
||||
} else if (errno == ENOSYS) {
|
||||
printf("[OK]\tmodify_ldt returned -ENOSYS\n");
|
||||
} else {
|
||||
if (desc->seg_32bit) {
|
||||
printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
|
||||
errno);
|
||||
nerrs++;
|
||||
} else {
|
||||
printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int safe_modify_ldt(int func, struct user_desc *ptr,
|
||||
unsigned long bytecount)
|
||||
{
|
||||
int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount);
|
||||
if (ret < -1)
|
||||
errno = -ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void fail_install(struct user_desc *desc)
|
||||
{
|
||||
if (safe_modify_ldt(0x11, desc, sizeof(*desc)) == 0) {
|
||||
printf("[FAIL]\tmodify_ldt accepted a bad descriptor\n");
|
||||
nerrs++;
|
||||
} else if (errno == ENOSYS) {
|
||||
printf("[OK]\tmodify_ldt returned -ENOSYS\n");
|
||||
} else {
|
||||
printf("[OK]\tmodify_ldt failure %d\n", errno);
|
||||
}
|
||||
}
|
||||
|
||||
static void do_simple_tests(void)
|
||||
{
|
||||
struct user_desc desc = {
|
||||
.entry_number = 0,
|
||||
.base_addr = 0,
|
||||
.limit = 10,
|
||||
.seg_32bit = 1,
|
||||
.contents = 2, /* Code, not conforming */
|
||||
.read_exec_only = 0,
|
||||
.limit_in_pages = 0,
|
||||
.seg_not_present = 0,
|
||||
.useable = 0
|
||||
};
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
|
||||
|
||||
desc.limit_in_pages = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
|
||||
AR_S | AR_P | AR_DB | AR_G);
|
||||
|
||||
check_invalid_segment(1, 1);
|
||||
|
||||
desc.entry_number = 2;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
|
||||
AR_S | AR_P | AR_DB | AR_G);
|
||||
|
||||
check_invalid_segment(1, 1);
|
||||
|
||||
desc.base_addr = 0xf0000000;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
|
||||
AR_S | AR_P | AR_DB | AR_G);
|
||||
|
||||
desc.useable = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
|
||||
AR_S | AR_P | AR_DB | AR_G | AR_AVL);
|
||||
|
||||
desc.seg_not_present = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
|
||||
AR_S | AR_DB | AR_G | AR_AVL);
|
||||
|
||||
desc.seg_32bit = 0;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
|
||||
AR_S | AR_G | AR_AVL);
|
||||
|
||||
desc.seg_32bit = 1;
|
||||
desc.contents = 0;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA |
|
||||
AR_S | AR_DB | AR_G | AR_AVL);
|
||||
|
||||
desc.read_exec_only = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA |
|
||||
AR_S | AR_DB | AR_G | AR_AVL);
|
||||
|
||||
desc.contents = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN |
|
||||
AR_S | AR_DB | AR_G | AR_AVL);
|
||||
|
||||
desc.read_exec_only = 0;
|
||||
desc.limit_in_pages = 0;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN |
|
||||
AR_S | AR_DB | AR_AVL);
|
||||
|
||||
desc.contents = 3;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE_CONF |
|
||||
AR_S | AR_DB | AR_AVL);
|
||||
|
||||
desc.read_exec_only = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE_CONF |
|
||||
AR_S | AR_DB | AR_AVL);
|
||||
|
||||
desc.read_exec_only = 0;
|
||||
desc.contents = 2;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
|
||||
AR_S | AR_DB | AR_AVL);
|
||||
|
||||
desc.read_exec_only = 1;
|
||||
|
||||
#ifdef __x86_64__
|
||||
desc.lm = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
|
||||
AR_S | AR_DB | AR_AVL);
|
||||
desc.lm = 0;
|
||||
#endif
|
||||
|
||||
bool entry1_okay = install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
|
||||
AR_S | AR_DB | AR_AVL);
|
||||
|
||||
if (entry1_okay) {
|
||||
printf("[RUN]\tTest fork\n");
|
||||
pid_t child = fork();
|
||||
if (child == 0) {
|
||||
nerrs = 0;
|
||||
check_valid_segment(desc.entry_number, 1,
|
||||
AR_DPL3 | AR_TYPE_XOCODE |
|
||||
AR_S | AR_DB | AR_AVL, desc.limit,
|
||||
true);
|
||||
check_invalid_segment(1, 1);
|
||||
exit(nerrs ? 1 : 0);
|
||||
} else {
|
||||
int status;
|
||||
if (waitpid(child, &status, 0) != child ||
|
||||
!WIFEXITED(status)) {
|
||||
printf("[FAIL]\tChild died\n");
|
||||
nerrs++;
|
||||
} else if (WEXITSTATUS(status) != 0) {
|
||||
printf("[FAIL]\tChild failed\n");
|
||||
nerrs++;
|
||||
} else {
|
||||
printf("[OK]\tChild succeeded\n");
|
||||
}
|
||||
}
|
||||
|
||||
printf("[RUN]\tTest size\n");
|
||||
int i;
|
||||
for (i = 0; i < 8192; i++) {
|
||||
desc.entry_number = i;
|
||||
desc.limit = i;
|
||||
if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
|
||||
printf("[FAIL]\tFailed to install entry %d\n", i);
|
||||
nerrs++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int j = 0; j < i; j++) {
|
||||
check_valid_segment(j, 1, AR_DPL3 | AR_TYPE_XOCODE |
|
||||
AR_S | AR_DB | AR_AVL, j, false);
|
||||
}
|
||||
printf("[DONE]\tSize test\n");
|
||||
} else {
|
||||
printf("[SKIP]\tSkipping fork and size tests because we have no LDT\n");
|
||||
}
|
||||
|
||||
/* Test entry_number too high. */
|
||||
desc.entry_number = 8192;
|
||||
fail_install(&desc);
|
||||
|
||||
/* Test deletion and actions mistakeable for deletion. */
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P);
|
||||
|
||||
desc.seg_not_present = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
|
||||
|
||||
desc.seg_not_present = 0;
|
||||
desc.read_exec_only = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P);
|
||||
|
||||
desc.read_exec_only = 0;
|
||||
desc.seg_not_present = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
|
||||
|
||||
desc.read_exec_only = 1;
|
||||
desc.limit = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
|
||||
|
||||
desc.limit = 0;
|
||||
desc.base_addr = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
|
||||
|
||||
desc.base_addr = 0;
|
||||
install_invalid(&desc, false);
|
||||
|
||||
desc.seg_not_present = 0;
|
||||
desc.read_exec_only = 0;
|
||||
desc.seg_32bit = 1;
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
|
||||
install_invalid(&desc, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* 0: thread is idle
|
||||
* 1: thread armed
|
||||
* 2: thread should clear LDT entry 0
|
||||
* 3: thread should exit
|
||||
*/
|
||||
static volatile unsigned int ftx;
|
||||
|
||||
static void *threadproc(void *ctx)
|
||||
{
|
||||
cpu_set_t cpuset;
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(1, &cpuset);
|
||||
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
|
||||
err(1, "sched_setaffinity to CPU 1"); /* should never fail */
|
||||
|
||||
while (1) {
|
||||
syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
|
||||
while (ftx != 2) {
|
||||
if (ftx >= 3)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* clear LDT entry 0 */
|
||||
const struct user_desc desc = {};
|
||||
if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) != 0)
|
||||
err(1, "modify_ldt");
|
||||
|
||||
/* If ftx == 2, set it to zero. If ftx == 100, quit. */
|
||||
unsigned int x = -2;
|
||||
asm volatile ("lock xaddl %[x], %[ftx]" :
|
||||
[x] "+r" (x), [ftx] "+m" (ftx));
|
||||
if (x != 2)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
|
||||
int flags)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_sigaction = handler;
|
||||
sa.sa_flags = SA_SIGINFO | flags;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
if (sigaction(sig, &sa, 0))
|
||||
err(1, "sigaction");
|
||||
|
||||
}
|
||||
|
||||
static jmp_buf jmpbuf;
|
||||
|
||||
static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
|
||||
{
|
||||
siglongjmp(jmpbuf, 1);
|
||||
}
|
||||
|
||||
static void do_multicpu_tests(void)
|
||||
{
|
||||
cpu_set_t cpuset;
|
||||
pthread_t thread;
|
||||
int failures = 0, iters = 5, i;
|
||||
unsigned short orig_ss;
|
||||
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(1, &cpuset);
|
||||
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
|
||||
printf("[SKIP]\tCannot set affinity to CPU 1\n");
|
||||
return;
|
||||
}
|
||||
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(0, &cpuset);
|
||||
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
|
||||
printf("[SKIP]\tCannot set affinity to CPU 0\n");
|
||||
return;
|
||||
}
|
||||
|
||||
sethandler(SIGSEGV, sigsegv, 0);
|
||||
#ifdef __i386__
|
||||
/* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */
|
||||
sethandler(SIGILL, sigsegv, 0);
|
||||
#endif
|
||||
|
||||
printf("[RUN]\tCross-CPU LDT invalidation\n");
|
||||
|
||||
if (pthread_create(&thread, 0, threadproc, 0) != 0)
|
||||
err(1, "pthread_create");
|
||||
|
||||
asm volatile ("mov %%ss, %0" : "=rm" (orig_ss));
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
if (sigsetjmp(jmpbuf, 1) != 0)
|
||||
continue;
|
||||
|
||||
/* Make sure the thread is ready after the last test. */
|
||||
while (ftx != 0)
|
||||
;
|
||||
|
||||
struct user_desc desc = {
|
||||
.entry_number = 0,
|
||||
.base_addr = 0,
|
||||
.limit = 0xfffff,
|
||||
.seg_32bit = 1,
|
||||
.contents = 0, /* Data */
|
||||
.read_exec_only = 0,
|
||||
.limit_in_pages = 1,
|
||||
.seg_not_present = 0,
|
||||
.useable = 0
|
||||
};
|
||||
|
||||
if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
|
||||
if (errno != ENOSYS)
|
||||
err(1, "modify_ldt");
|
||||
printf("[SKIP]\tmodify_ldt unavailable\n");
|
||||
break;
|
||||
}
|
||||
|
||||
/* Arm the thread. */
|
||||
ftx = 1;
|
||||
syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
|
||||
|
||||
asm volatile ("mov %0, %%ss" : : "r" (0x7));
|
||||
|
||||
/* Go! */
|
||||
ftx = 2;
|
||||
|
||||
while (ftx != 0)
|
||||
;
|
||||
|
||||
/*
|
||||
* On success, modify_ldt will segfault us synchronously,
|
||||
* and we'll escape via siglongjmp.
|
||||
*/
|
||||
|
||||
failures++;
|
||||
asm volatile ("mov %0, %%ss" : : "rm" (orig_ss));
|
||||
};
|
||||
|
||||
ftx = 100; /* Kill the thread. */
|
||||
syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
|
||||
|
||||
if (pthread_join(thread, NULL) != 0)
|
||||
err(1, "pthread_join");
|
||||
|
||||
if (failures) {
|
||||
printf("[FAIL]\t%d of %d iterations failed\n", failures, iters);
|
||||
nerrs++;
|
||||
} else {
|
||||
printf("[OK]\tAll %d iterations succeeded\n", iters);
|
||||
}
|
||||
}
|
||||
|
||||
static int finish_exec_test(void)
|
||||
{
|
||||
/*
|
||||
* In a sensible world, this would be check_invalid_segment(0, 1);
|
||||
* For better or for worse, though, the LDT is inherited across exec.
|
||||
* We can probably change this safely, but for now we test it.
|
||||
*/
|
||||
check_valid_segment(0, 1,
|
||||
AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
|
||||
42, true);
|
||||
|
||||
return nerrs ? 1 : 0;
|
||||
}
|
||||
|
||||
static void do_exec_test(void)
|
||||
{
|
||||
printf("[RUN]\tTest exec\n");
|
||||
|
||||
struct user_desc desc = {
|
||||
.entry_number = 0,
|
||||
.base_addr = 0,
|
||||
.limit = 42,
|
||||
.seg_32bit = 1,
|
||||
.contents = 2, /* Code, not conforming */
|
||||
.read_exec_only = 0,
|
||||
.limit_in_pages = 0,
|
||||
.seg_not_present = 0,
|
||||
.useable = 0
|
||||
};
|
||||
install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
|
||||
|
||||
pid_t child = fork();
|
||||
if (child == 0) {
|
||||
execl("/proc/self/exe", "ldt_gdt_test_exec", NULL);
|
||||
printf("[FAIL]\tCould not exec self\n");
|
||||
exit(1); /* exec failed */
|
||||
} else {
|
||||
int status;
|
||||
if (waitpid(child, &status, 0) != child ||
|
||||
!WIFEXITED(status)) {
|
||||
printf("[FAIL]\tChild died\n");
|
||||
nerrs++;
|
||||
} else if (WEXITSTATUS(status) != 0) {
|
||||
printf("[FAIL]\tChild failed\n");
|
||||
nerrs++;
|
||||
} else {
|
||||
printf("[OK]\tChild succeeded\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec"))
|
||||
return finish_exec_test();
|
||||
|
||||
do_simple_tests();
|
||||
|
||||
do_multicpu_tests();
|
||||
|
||||
do_exec_test();
|
||||
|
||||
return nerrs ? 1 : 0;
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
|
||||
* Copyright (c) 2015 Andrew Lutomirski
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/signal.h>
|
||||
#include <sys/ucontext.h>
|
||||
#include <err.h>
|
||||
#include <setjmp.h>
|
||||
#include <errno.h>
|
||||
|
||||
/* Our sigaltstack scratch space. */
|
||||
static unsigned char altstack_data[SIGSTKSZ];
|
||||
|
||||
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
|
||||
int flags)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_sigaction = handler;
|
||||
sa.sa_flags = SA_SIGINFO | flags;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
if (sigaction(sig, &sa, 0))
|
||||
err(1, "sigaction");
|
||||
}
|
||||
|
||||
static volatile sig_atomic_t sig_traps;
|
||||
static sigjmp_buf jmpbuf;
|
||||
|
||||
static volatile sig_atomic_t n_errs;
|
||||
|
||||
static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
|
||||
{
|
||||
ucontext_t *ctx = (ucontext_t*)ctx_void;
|
||||
|
||||
if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) {
|
||||
printf("[FAIL]\tAX had the wrong value: 0x%x\n",
|
||||
ctx->uc_mcontext.gregs[REG_EAX]);
|
||||
n_errs++;
|
||||
} else {
|
||||
printf("[OK]\tSeems okay\n");
|
||||
}
|
||||
|
||||
siglongjmp(jmpbuf, 1);
|
||||
}
|
||||
|
||||
static void sigill(int sig, siginfo_t *info, void *ctx_void)
|
||||
{
|
||||
printf("[SKIP]\tIllegal instruction\n");
|
||||
siglongjmp(jmpbuf, 1);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
stack_t stack = {
|
||||
.ss_sp = altstack_data,
|
||||
.ss_size = SIGSTKSZ,
|
||||
};
|
||||
if (sigaltstack(&stack, NULL) != 0)
|
||||
err(1, "sigaltstack");
|
||||
|
||||
sethandler(SIGSEGV, sigsegv, SA_ONSTACK);
|
||||
sethandler(SIGILL, sigill, SA_ONSTACK);
|
||||
|
||||
/*
|
||||
* Exercise another nasty special case. The 32-bit SYSCALL
|
||||
* and SYSENTER instructions (even in compat mode) each
|
||||
* clobber one register. A Linux system call has a syscall
|
||||
* number and six arguments, and the user stack pointer
|
||||
* needs to live in some register on return. That means
|
||||
* that we need eight registers, but SYSCALL and SYSENTER
|
||||
* only preserve seven registers. As a result, one argument
|
||||
* ends up on the stack. The stack is user memory, which
|
||||
* means that the kernel can fail to read it.
|
||||
*
|
||||
* The 32-bit fast system calls don't have a defined ABI:
|
||||
* we're supposed to invoke them through the vDSO. So we'll
|
||||
* fudge it: we set all regs to invalid pointer values and
|
||||
* invoke the entry instruction. The return will fail no
|
||||
* matter what, and we completely lose our program state,
|
||||
* but we can fix it up with a signal handler.
|
||||
*/
|
||||
|
||||
printf("[RUN]\tSYSENTER with invalid state\n");
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
asm volatile (
|
||||
"movl $-1, %%eax\n\t"
|
||||
"movl $-1, %%ebx\n\t"
|
||||
"movl $-1, %%ecx\n\t"
|
||||
"movl $-1, %%edx\n\t"
|
||||
"movl $-1, %%esi\n\t"
|
||||
"movl $-1, %%edi\n\t"
|
||||
"movl $-1, %%ebp\n\t"
|
||||
"movl $-1, %%esp\n\t"
|
||||
"sysenter"
|
||||
: : : "memory", "flags");
|
||||
}
|
||||
|
||||
printf("[RUN]\tSYSCALL with invalid state\n");
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
asm volatile (
|
||||
"movl $-1, %%eax\n\t"
|
||||
"movl $-1, %%ebx\n\t"
|
||||
"movl $-1, %%ecx\n\t"
|
||||
"movl $-1, %%edx\n\t"
|
||||
"movl $-1, %%esi\n\t"
|
||||
"movl $-1, %%edi\n\t"
|
||||
"movl $-1, %%ebp\n\t"
|
||||
"movl $-1, %%esp\n\t"
|
||||
"syscall\n\t"
|
||||
"pushl $0" /* make sure we segfault cleanly */
|
||||
: : : "memory", "flags");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* syscall_nt.c - checks syscalls with NT set
|
||||
* Copyright (c) 2014-2015 Andrew Lutomirski
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Some obscure user-space code requires the ability to make system calls
|
||||
* with FLAGS.NT set. Make sure it works.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <asm/processor-flags.h>
|
||||
|
||||
#ifdef __x86_64__
|
||||
# define WIDTH "q"
|
||||
#else
|
||||
# define WIDTH "l"
|
||||
#endif
|
||||
|
||||
static unsigned long get_eflags(void)
|
||||
{
|
||||
unsigned long eflags;
|
||||
asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
|
||||
return eflags;
|
||||
}
|
||||
|
||||
static void set_eflags(unsigned long eflags)
|
||||
{
|
||||
asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
|
||||
: : "rm" (eflags) : "flags");
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
printf("[RUN]\tSet NT and issue a syscall\n");
|
||||
set_eflags(get_eflags() | X86_EFLAGS_NT);
|
||||
syscall(SYS_getpid);
|
||||
if (get_eflags() & X86_EFLAGS_NT) {
|
||||
printf("[OK]\tThe syscall worked and NT is still set\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("[FAIL]\tThe syscall worked but NT was cleared\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue