mirror of https://gitee.com/openkylin/linux.git
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "Misc fixes: a binutils fix, an lguest fix, an mcelog fix and a missing documentation fix" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Avoid using object after free in genpool lguest, x86/entry/32: Fix handling of guest syscalls using interrupt gates x86/build: Build compressed x86 kernels as PIE x86/mm/pkeys: Add missing Documentation
This commit is contained in:
commit
806fdcce01
|
@ -0,0 +1,27 @@
|
||||||
|
Memory Protection Keys for Userspace (PKU aka PKEYs) is a CPU feature
|
||||||
|
which will be found on future Intel CPUs.
|
||||||
|
|
||||||
|
Memory Protection Keys provides a mechanism for enforcing page-based
|
||||||
|
protections, but without requiring modification of the page tables
|
||||||
|
when an application changes protection domains. It works by
|
||||||
|
dedicating 4 previously ignored bits in each page table entry to a
|
||||||
|
"protection key", giving 16 possible keys.
|
||||||
|
|
||||||
|
There is also a new user-accessible register (PKRU) with two separate
|
||||||
|
bits (Access Disable and Write Disable) for each key. Being a CPU
|
||||||
|
register, PKRU is inherently thread-local, potentially giving each
|
||||||
|
thread a different set of protections from every other thread.
|
||||||
|
|
||||||
|
There are two new instructions (RDPKRU/WRPKRU) for reading and writing
|
||||||
|
to the new register. The feature is only available in 64-bit mode,
|
||||||
|
even though there is theoretically space in the PAE PTEs. These
|
||||||
|
permissions are enforced on data access only and have no effect on
|
||||||
|
instruction fetches.
|
||||||
|
|
||||||
|
=========================== Config Option ===========================
|
||||||
|
|
||||||
|
This config option adds approximately 1.5kb of text. and 50 bytes of
|
||||||
|
data to the executable. A workload which does large O_DIRECT reads
|
||||||
|
of holes in XFS files was run to exercise get_user_pages_fast(). No
|
||||||
|
performance delta was observed with the config option
|
||||||
|
enabled or disabled.
|
|
@ -26,7 +26,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
|
||||||
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
|
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
|
||||||
|
|
||||||
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
|
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
|
||||||
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
|
KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
|
||||||
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
|
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
|
||||||
cflags-$(CONFIG_X86_32) := -march=i386
|
cflags-$(CONFIG_X86_32) := -march=i386
|
||||||
cflags-$(CONFIG_X86_64) := -mcmodel=small
|
cflags-$(CONFIG_X86_64) := -mcmodel=small
|
||||||
|
@ -40,6 +40,18 @@ GCOV_PROFILE := n
|
||||||
UBSAN_SANITIZE :=n
|
UBSAN_SANITIZE :=n
|
||||||
|
|
||||||
LDFLAGS := -m elf_$(UTS_MACHINE)
|
LDFLAGS := -m elf_$(UTS_MACHINE)
|
||||||
|
ifeq ($(CONFIG_RELOCATABLE),y)
|
||||||
|
# If kernel is relocatable, build compressed kernel as PIE.
|
||||||
|
ifeq ($(CONFIG_X86_32),y)
|
||||||
|
LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
|
||||||
|
else
|
||||||
|
# To build 64-bit compressed kernel as PIE, we disable relocation
|
||||||
|
# overflow check to avoid relocation overflow error with a new linker
|
||||||
|
# command-line option, -z noreloc-overflow.
|
||||||
|
LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
|
||||||
|
&& echo "-z noreloc-overflow -pie --no-dynamic-linker")
|
||||||
|
endif
|
||||||
|
endif
|
||||||
LDFLAGS_vmlinux := -T
|
LDFLAGS_vmlinux := -T
|
||||||
|
|
||||||
hostprogs-y := mkpiggy
|
hostprogs-y := mkpiggy
|
||||||
|
|
|
@ -31,6 +31,34 @@
|
||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
#include <asm/bootparam.h>
|
#include <asm/bootparam.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The 32-bit x86 assembler in binutils 2.26 will generate R_386_GOT32X
|
||||||
|
* relocation to get the symbol address in PIC. When the compressed x86
|
||||||
|
* kernel isn't built as PIC, the linker optimizes R_386_GOT32X
|
||||||
|
* relocations to their fixed symbol addresses. However, when the
|
||||||
|
* compressed x86 kernel is loaded at a different address, it leads
|
||||||
|
* to the following load failure:
|
||||||
|
*
|
||||||
|
* Failed to allocate space for phdrs
|
||||||
|
*
|
||||||
|
* during the decompression stage.
|
||||||
|
*
|
||||||
|
* If the compressed x86 kernel is relocatable at run-time, it should be
|
||||||
|
* compiled with -fPIE, instead of -fPIC, if possible and should be built as
|
||||||
|
* Position Independent Executable (PIE) so that linker won't optimize
|
||||||
|
* R_386_GOT32X relocation to its fixed symbol address. Older
|
||||||
|
* linkers generate R_386_32 relocations against locally defined symbols,
|
||||||
|
* _bss, _ebss, _got and _egot, in PIE. It isn't wrong, just less
|
||||||
|
* optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle
|
||||||
|
* R_386_32 relocations when relocating the kernel. To generate
|
||||||
|
* R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as
|
||||||
|
* hidden:
|
||||||
|
*/
|
||||||
|
.hidden _bss
|
||||||
|
.hidden _ebss
|
||||||
|
.hidden _got
|
||||||
|
.hidden _egot
|
||||||
|
|
||||||
__HEAD
|
__HEAD
|
||||||
ENTRY(startup_32)
|
ENTRY(startup_32)
|
||||||
#ifdef CONFIG_EFI_STUB
|
#ifdef CONFIG_EFI_STUB
|
||||||
|
|
|
@ -33,6 +33,14 @@
|
||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
#include <asm/bootparam.h>
|
#include <asm/bootparam.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Locally defined symbols should be marked hidden:
|
||||||
|
*/
|
||||||
|
.hidden _bss
|
||||||
|
.hidden _ebss
|
||||||
|
.hidden _got
|
||||||
|
.hidden _egot
|
||||||
|
|
||||||
__HEAD
|
__HEAD
|
||||||
.code32
|
.code32
|
||||||
ENTRY(startup_32)
|
ENTRY(startup_32)
|
||||||
|
|
|
@ -29,7 +29,7 @@ static char gen_pool_buf[MCE_POOLSZ];
|
||||||
void mce_gen_pool_process(void)
|
void mce_gen_pool_process(void)
|
||||||
{
|
{
|
||||||
struct llist_node *head;
|
struct llist_node *head;
|
||||||
struct mce_evt_llist *node;
|
struct mce_evt_llist *node, *tmp;
|
||||||
struct mce *mce;
|
struct mce *mce;
|
||||||
|
|
||||||
head = llist_del_all(&mce_event_llist);
|
head = llist_del_all(&mce_event_llist);
|
||||||
|
@ -37,7 +37,7 @@ void mce_gen_pool_process(void)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
head = llist_reverse_order(head);
|
head = llist_reverse_order(head);
|
||||||
llist_for_each_entry(node, head, llnode) {
|
llist_for_each_entry_safe(node, tmp, head, llnode) {
|
||||||
mce = &node->mce;
|
mce = &node->mce;
|
||||||
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
|
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
|
||||||
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
|
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
|
||||||
|
|
|
@ -331,7 +331,7 @@ void set_interrupt(struct lg_cpu *cpu, unsigned int irq)
|
||||||
* Actually now I think of it, it's possible that Ron *is* half the Plan 9
|
* Actually now I think of it, it's possible that Ron *is* half the Plan 9
|
||||||
* userbase. Oh well.
|
* userbase. Oh well.
|
||||||
*/
|
*/
|
||||||
static bool could_be_syscall(unsigned int num)
|
bool could_be_syscall(unsigned int num)
|
||||||
{
|
{
|
||||||
/* Normal Linux IA32_SYSCALL_VECTOR or reserved vector? */
|
/* Normal Linux IA32_SYSCALL_VECTOR or reserved vector? */
|
||||||
return num == IA32_SYSCALL_VECTOR || num == syscall_vector;
|
return num == IA32_SYSCALL_VECTOR || num == syscall_vector;
|
||||||
|
@ -416,6 +416,10 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
|
||||||
*
|
*
|
||||||
* This routine indicates if a particular trap number could be delivered
|
* This routine indicates if a particular trap number could be delivered
|
||||||
* directly.
|
* directly.
|
||||||
|
*
|
||||||
|
* Unfortunately, Linux 4.6 started using an interrupt gate instead of a
|
||||||
|
* trap gate for syscalls, so this trick is ineffective. See Mastery for
|
||||||
|
* how we could do this anyway...
|
||||||
*/
|
*/
|
||||||
static bool direct_trap(unsigned int num)
|
static bool direct_trap(unsigned int num)
|
||||||
{
|
{
|
||||||
|
|
|
@ -167,6 +167,7 @@ void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta);
|
||||||
bool send_notify_to_eventfd(struct lg_cpu *cpu);
|
bool send_notify_to_eventfd(struct lg_cpu *cpu);
|
||||||
void init_clockdev(struct lg_cpu *cpu);
|
void init_clockdev(struct lg_cpu *cpu);
|
||||||
bool check_syscall_vector(struct lguest *lg);
|
bool check_syscall_vector(struct lguest *lg);
|
||||||
|
bool could_be_syscall(unsigned int num);
|
||||||
int init_interrupts(void);
|
int init_interrupts(void);
|
||||||
void free_interrupts(void);
|
void free_interrupts(void);
|
||||||
|
|
||||||
|
|
|
@ -429,8 +429,12 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
|
||||||
return;
|
return;
|
||||||
break;
|
break;
|
||||||
case 32 ... 255:
|
case 32 ... 255:
|
||||||
|
/* This might be a syscall. */
|
||||||
|
if (could_be_syscall(cpu->regs->trapnum))
|
||||||
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These values mean a real interrupt occurred, in which case
|
* Other values mean a real interrupt occurred, in which case
|
||||||
* the Host handler has already been run. We just do a
|
* the Host handler has already been run. We just do a
|
||||||
* friendly check if another process should now be run, then
|
* friendly check if another process should now be run, then
|
||||||
* return to run the Guest again.
|
* return to run the Guest again.
|
||||||
|
|
Loading…
Reference in New Issue