mirror of https://gitee.com/openkylin/linux.git
x86/vdso: Remove runtime 32-bit vDSO selection
32-bit userspace will now always see the same vDSO, which is exactly what used to be the int80 vDSO. Subsequent patches will clean it up and make it support SYSENTER and SYSCALL using alternatives. Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/e7e6b3526fa442502e6125fe69486aab50813c32.1444091584.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
b611acf473
commit
0a6d1fa0d2
|
@ -19,9 +19,7 @@ obj-y += vma.o
|
|||
# vDSO images to build
|
||||
vdso_img-$(VDSO64-y) += 64
|
||||
vdso_img-$(VDSOX32-y) += x32
|
||||
vdso_img-$(VDSO32-y) += 32-int80
|
||||
vdso_img-$(CONFIG_IA32_EMULATION) += 32-syscall
|
||||
vdso_img-$(VDSO32-y) += 32-sysenter
|
||||
vdso_img-$(VDSO32-y) += 32
|
||||
|
||||
obj-$(VDSO32-y) += vdso32-setup.o
|
||||
|
||||
|
@ -122,15 +120,6 @@ $(obj)/%.so: $(obj)/%.so.dbg
|
|||
$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
|
||||
$(call if_changed,vdso)
|
||||
|
||||
#
|
||||
# Build multiple 32-bit vDSO images to choose from at boot time.
|
||||
#
|
||||
vdso32.so-$(VDSO32-y) += int80
|
||||
vdso32.so-$(CONFIG_IA32_EMULATION) += syscall
|
||||
vdso32.so-$(VDSO32-y) += sysenter
|
||||
|
||||
vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
|
||||
|
||||
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
|
||||
VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
|
||||
|
||||
|
@ -139,14 +128,12 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
|
|||
override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
|
||||
|
||||
targets += vdso32/vdso32.lds
|
||||
targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
|
||||
targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o
|
||||
targets += vdso32/vclock_gettime.o
|
||||
|
||||
$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
|
||||
|
||||
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
|
||||
$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
|
||||
$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
|
||||
$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
|
||||
$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32
|
||||
|
||||
KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
|
||||
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
|
||||
|
@ -157,13 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
|
|||
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
|
||||
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
|
||||
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
|
||||
$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
|
||||
$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
|
||||
|
||||
$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
|
||||
$(obj)/vdso32/vdso32.lds \
|
||||
$(obj)/vdso32/vclock_gettime.o \
|
||||
$(obj)/vdso32/note.o \
|
||||
$(obj)/vdso32/%.o
|
||||
$(obj)/vdso32.so.dbg: FORCE \
|
||||
$(obj)/vdso32/vdso32.lds \
|
||||
$(obj)/vdso32/vclock_gettime.o \
|
||||
$(obj)/vdso32/note.o \
|
||||
$(obj)/vdso32/system_call.o
|
||||
$(call if_changed,vdso)
|
||||
|
||||
#
|
||||
|
@ -206,4 +193,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
|
|||
PHONY += vdso_install $(vdso_img_insttargets)
|
||||
vdso_install: $(vdso_img_insttargets) FORCE
|
||||
|
||||
clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so*
|
||||
clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so*
|
||||
|
|
|
@ -98,7 +98,6 @@ struct vdso_sym required_syms[] = {
|
|||
"VDSO_FAKE_SECTION_TABLE_END", false
|
||||
},
|
||||
{"VDSO32_NOTE_MASK", true},
|
||||
{"VDSO32_SYSENTER_RETURN", true},
|
||||
{"__kernel_vsyscall", true},
|
||||
{"__kernel_sigreturn", true},
|
||||
{"__kernel_rt_sigreturn", true},
|
||||
|
|
|
@ -48,35 +48,9 @@ __setup("vdso32=", vdso32_setup);
|
|||
__setup_param("vdso=", vdso_setup, vdso32_setup, 0);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
|
||||
#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
|
||||
|
||||
#else /* CONFIG_X86_32 */
|
||||
|
||||
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
|
||||
#define vdso32_syscall() (0)
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
|
||||
const struct vdso_image *selected_vdso32;
|
||||
#endif
|
||||
|
||||
int __init sysenter_setup(void)
|
||||
{
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (vdso32_syscall())
|
||||
selected_vdso32 = &vdso_image_32_syscall;
|
||||
else
|
||||
#endif
|
||||
if (vdso32_sysenter())
|
||||
selected_vdso32 = &vdso_image_32_sysenter;
|
||||
else
|
||||
selected_vdso32 = &vdso_image_32_int80;
|
||||
|
||||
init_vdso_image(selected_vdso32);
|
||||
init_vdso_image(&vdso_image_32);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,75 +0,0 @@
|
|||
/*
|
||||
* Code for the vDSO. This version uses the syscall instruction.
|
||||
*
|
||||
* First get the common code for the sigreturn entry points.
|
||||
* This must come first.
|
||||
*/
|
||||
#define SYSCALL_ENTER_KERNEL syscall
|
||||
#include "sigreturn.S"
|
||||
|
||||
#include <asm/segment.h>
|
||||
|
||||
.text
|
||||
.globl __kernel_vsyscall
|
||||
.type __kernel_vsyscall,@function
|
||||
ALIGN
|
||||
__kernel_vsyscall:
|
||||
.LSTART_vsyscall:
|
||||
push %ebp
|
||||
.Lpush_ebp:
|
||||
movl %ecx, %ebp
|
||||
syscall
|
||||
movl %ebp, %ecx
|
||||
popl %ebp
|
||||
.Lpop_ebp:
|
||||
ret
|
||||
.LEND_vsyscall:
|
||||
.size __kernel_vsyscall,.-.LSTART_vsyscall
|
||||
|
||||
.section .eh_frame,"a",@progbits
|
||||
.LSTARTFRAME:
|
||||
.long .LENDCIE-.LSTARTCIE
|
||||
.LSTARTCIE:
|
||||
.long 0 /* CIE ID */
|
||||
.byte 1 /* Version number */
|
||||
.string "zR" /* NUL-terminated augmentation string */
|
||||
.uleb128 1 /* Code alignment factor */
|
||||
.sleb128 -4 /* Data alignment factor */
|
||||
.byte 8 /* Return address register column */
|
||||
.uleb128 1 /* Augmentation value length */
|
||||
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
|
||||
.byte 0x0c /* DW_CFA_def_cfa */
|
||||
.uleb128 4
|
||||
.uleb128 4
|
||||
.byte 0x88 /* DW_CFA_offset, column 0x8 */
|
||||
.uleb128 1
|
||||
.align 4
|
||||
.LENDCIE:
|
||||
|
||||
.long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
|
||||
.LSTARTFDE1:
|
||||
.long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
|
||||
.long .LSTART_vsyscall-. /* PC-relative start address */
|
||||
.long .LEND_vsyscall-.LSTART_vsyscall
|
||||
.uleb128 0 /* Augmentation length */
|
||||
/* What follows are the instructions for the table generation.
|
||||
We have to record all changes of the stack pointer. */
|
||||
.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.uleb128 8
|
||||
.byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
|
||||
.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
|
||||
.byte 0xc5 /* DW_CFA_restore %ebp */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.uleb128 4
|
||||
.align 4
|
||||
.LENDFDE1:
|
||||
.previous
|
||||
|
||||
/*
|
||||
* Pad out the segment to match the size of the sysenter.S version.
|
||||
*/
|
||||
VDSO32_vsyscall_eh_frame_size = 0x40
|
||||
.section .data,"aw",@progbits
|
||||
.space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
|
||||
.previous
|
|
@ -1,116 +0,0 @@
|
|||
/*
|
||||
* Code for the vDSO. This version uses the sysenter instruction.
|
||||
*
|
||||
* First get the common code for the sigreturn entry points.
|
||||
* This must come first.
|
||||
*/
|
||||
#include "sigreturn.S"
|
||||
|
||||
/*
|
||||
* The caller puts arg2 in %ecx, which gets pushed. The kernel will use
|
||||
* %ecx itself for arg2. The pushing is because the sysexit instruction
|
||||
* (found in entry.S) requires that we clobber %ecx with the desired %esp.
|
||||
* User code might expect that %ecx is unclobbered though, as it would be
|
||||
* for returning via the iret instruction, so we must push and pop.
|
||||
*
|
||||
* The caller puts arg3 in %edx, which the sysexit instruction requires
|
||||
* for %eip. Thus, exactly as for arg2, we must push and pop.
|
||||
*
|
||||
* Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter
|
||||
* instruction clobbers %esp, the user's %esp won't even survive entry
|
||||
* into the kernel. We store %esp in %ebp. Code in entry.S must fetch
|
||||
* arg6 from the stack.
|
||||
*
|
||||
* You can not use this vsyscall for the clone() syscall because the
|
||||
* three words on the parent stack do not get copied to the child.
|
||||
*/
|
||||
.text
|
||||
.globl __kernel_vsyscall
|
||||
.type __kernel_vsyscall,@function
|
||||
ALIGN
|
||||
__kernel_vsyscall:
|
||||
.LSTART_vsyscall:
|
||||
push %ecx
|
||||
.Lpush_ecx:
|
||||
push %edx
|
||||
.Lpush_edx:
|
||||
push %ebp
|
||||
.Lenter_kernel:
|
||||
movl %esp,%ebp
|
||||
sysenter
|
||||
|
||||
/* 7: align return point with nop's to make disassembly easier */
|
||||
.space 7,0x90
|
||||
|
||||
/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
|
||||
int $0x80
|
||||
/* 16: System call normal return point is here! */
|
||||
VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */
|
||||
pop %ebp
|
||||
.Lpop_ebp:
|
||||
pop %edx
|
||||
.Lpop_edx:
|
||||
pop %ecx
|
||||
.Lpop_ecx:
|
||||
ret
|
||||
.LEND_vsyscall:
|
||||
.size __kernel_vsyscall,.-.LSTART_vsyscall
|
||||
.previous
|
||||
|
||||
.section .eh_frame,"a",@progbits
|
||||
.LSTARTFRAMEDLSI:
|
||||
.long .LENDCIEDLSI-.LSTARTCIEDLSI
|
||||
.LSTARTCIEDLSI:
|
||||
.long 0 /* CIE ID */
|
||||
.byte 1 /* Version number */
|
||||
.string "zR" /* NUL-terminated augmentation string */
|
||||
.uleb128 1 /* Code alignment factor */
|
||||
.sleb128 -4 /* Data alignment factor */
|
||||
.byte 8 /* Return address register column */
|
||||
.uleb128 1 /* Augmentation value length */
|
||||
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
|
||||
.byte 0x0c /* DW_CFA_def_cfa */
|
||||
.uleb128 4
|
||||
.uleb128 4
|
||||
.byte 0x88 /* DW_CFA_offset, column 0x8 */
|
||||
.uleb128 1
|
||||
.align 4
|
||||
.LENDCIEDLSI:
|
||||
.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
|
||||
.LSTARTFDEDLSI:
|
||||
.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
|
||||
.long .LSTART_vsyscall-. /* PC-relative start address */
|
||||
.long .LEND_vsyscall-.LSTART_vsyscall
|
||||
.uleb128 0
|
||||
/* What follows are the instructions for the table generation.
|
||||
We have to record all changes of the stack pointer. */
|
||||
.byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x08 /* RA at offset 8 now */
|
||||
.byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x0c /* RA at offset 12 now */
|
||||
.byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x10 /* RA at offset 16 now */
|
||||
.byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
|
||||
/* Finally the epilogue. */
|
||||
.byte 0x40 + (.Lpop_ebp-.Lenter_kernel) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x0c /* RA at offset 12 now */
|
||||
.byte 0xc5 /* DW_CFA_restore %ebp */
|
||||
.byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x08 /* RA at offset 8 now */
|
||||
.byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x04 /* RA at offset 4 now */
|
||||
.align 4
|
||||
.LENDFDEDLSI:
|
||||
.previous
|
||||
|
||||
/*
|
||||
* Emit a symbol with the size of this .eh_frame data,
|
||||
* to verify it matches the other versions.
|
||||
*/
|
||||
VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)
|
|
@ -180,21 +180,10 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
|||
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
|
||||
static int load_vdso32(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (vdso32_enabled != 1) /* Other values all mean "disabled" */
|
||||
return 0;
|
||||
|
||||
ret = map_vdso(selected_vdso32, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
|
||||
current_thread_info()->sysenter_return =
|
||||
current->mm->context.vdso +
|
||||
selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
|
||||
|
||||
return 0;
|
||||
return map_vdso(&vdso_image_32, false);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -289,7 +289,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
|
|||
/* Return stub is in 32bit vsyscall page */
|
||||
if (current->mm->context.vdso)
|
||||
restorer = current->mm->context.vdso +
|
||||
selected_vdso32->sym___kernel_sigreturn;
|
||||
vdso_image_32.sym___kernel_sigreturn;
|
||||
else
|
||||
restorer = &frame->retcode;
|
||||
}
|
||||
|
@ -368,7 +368,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
|
|||
restorer = ksig->ka.sa.sa_restorer;
|
||||
else
|
||||
restorer = current->mm->context.vdso +
|
||||
selected_vdso32->sym___kernel_rt_sigreturn;
|
||||
vdso_image_32.sym___kernel_rt_sigreturn;
|
||||
put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
|
||||
|
||||
/*
|
||||
|
|
|
@ -328,7 +328,7 @@ else \
|
|||
|
||||
#define VDSO_ENTRY \
|
||||
((unsigned long)current->mm->context.vdso + \
|
||||
selected_vdso32->sym___kernel_vsyscall)
|
||||
vdso_image_32.sym___kernel_vsyscall)
|
||||
|
||||
struct linux_binprm;
|
||||
|
||||
|
|
|
@ -26,7 +26,6 @@ struct vdso_image {
|
|||
long sym___kernel_sigreturn;
|
||||
long sym___kernel_rt_sigreturn;
|
||||
long sym___kernel_vsyscall;
|
||||
long sym_VDSO32_SYSENTER_RETURN;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -38,13 +37,7 @@ extern const struct vdso_image vdso_image_x32;
|
|||
#endif
|
||||
|
||||
#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
|
||||
extern const struct vdso_image vdso_image_32_int80;
|
||||
#ifdef CONFIG_COMPAT
|
||||
extern const struct vdso_image vdso_image_32_syscall;
|
||||
#endif
|
||||
extern const struct vdso_image vdso_image_32_sysenter;
|
||||
|
||||
extern const struct vdso_image *selected_vdso32;
|
||||
extern const struct vdso_image vdso_image_32;
|
||||
#endif
|
||||
|
||||
extern void __init init_vdso_image(const struct vdso_image *image);
|
||||
|
|
|
@ -299,7 +299,7 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
|
|||
|
||||
if (current->mm->context.vdso)
|
||||
restorer = current->mm->context.vdso +
|
||||
selected_vdso32->sym___kernel_sigreturn;
|
||||
vdso_image_32.sym___kernel_sigreturn;
|
||||
else
|
||||
restorer = &frame->retcode;
|
||||
if (ksig->ka.sa.sa_flags & SA_RESTORER)
|
||||
|
@ -363,7 +363,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
|
|||
|
||||
/* Set up to return from userspace. */
|
||||
restorer = current->mm->context.vdso +
|
||||
selected_vdso32->sym___kernel_rt_sigreturn;
|
||||
vdso_image_32.sym___kernel_rt_sigreturn;
|
||||
if (ksig->ka.sa.sa_flags & SA_RESTORER)
|
||||
restorer = ksig->ka.sa.sa_restorer;
|
||||
put_user_ex(restorer, &frame->pretcode);
|
||||
|
|
|
@ -965,17 +965,8 @@ char * __init xen_auto_xlated_memory_setup(void)
|
|||
static void __init fiddle_vdso(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* This could be called before selected_vdso32 is initialized, so
|
||||
* just fiddle with both possible images. vdso_image_32_syscall
|
||||
* can't be selected, since it only exists on 64-bit systems.
|
||||
*/
|
||||
u32 *mask;
|
||||
mask = vdso_image_32_int80.data +
|
||||
vdso_image_32_int80.sym_VDSO32_NOTE_MASK;
|
||||
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
|
||||
mask = vdso_image_32_sysenter.data +
|
||||
vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK;
|
||||
u32 *mask = vdso_image_32.data +
|
||||
vdso_image_32.sym_VDSO32_NOTE_MASK;
|
||||
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue