Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar: "A laundry list of changes: KASAN improvements/fixes for ptdump, a self-test fix, PAT cleanup and wbinvd() avoidance, removal of stale code and documentation updates" * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm/ptdump: Add address marker for KASAN shadow region x86/mm/ptdump: Optimize check for W+X mappings for CONFIG_KASAN=y x86/mm/pat: Use rb_entry() x86/mpx: Re-add MPX to selftests Makefile x86/mm: Remove CONFIG_DEBUG_NX_TEST x86/mm/cpa: Avoid wbinvd() for PREEMPT x86/mm: Improve documentation for low-level device I/O functions
This commit is contained in:
commit
8b5abde16b
|
@ -120,14 +120,6 @@ config DEBUG_SET_MODULE_RONX
|
|||
against certain classes of kernel exploits.
|
||||
If in doubt, say "N".
|
||||
|
||||
config DEBUG_NX_TEST
|
||||
tristate "Testcase for the NX non-executable stack feature"
|
||||
depends on DEBUG_KERNEL && m
|
||||
---help---
|
||||
This option enables a testcase for the CPU NX capability
|
||||
and the software setup of this feature.
|
||||
If in doubt, say "N"
|
||||
|
||||
config DOUBLEFAULT
|
||||
default y
|
||||
bool "Enable doublefault exception handler" if EXPERT
|
||||
|
|
|
@ -164,6 +164,17 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
|
|||
#define virt_to_bus virt_to_phys
|
||||
#define bus_to_virt phys_to_virt
|
||||
|
||||
/*
|
||||
* The default ioremap() behavior is non-cached; if you need something
|
||||
* else, you probably want one of the following.
|
||||
*/
|
||||
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
|
||||
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
|
||||
#define ioremap_uc ioremap_uc
|
||||
|
||||
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
|
||||
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val);
|
||||
|
||||
/**
|
||||
* ioremap - map bus memory into CPU space
|
||||
* @offset: bus address of the memory
|
||||
|
@ -178,17 +189,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
|
|||
* If the area you are trying to map is a PCI BAR you should have a
|
||||
* look at pci_iomap().
|
||||
*/
|
||||
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
|
||||
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
|
||||
#define ioremap_uc ioremap_uc
|
||||
|
||||
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
|
||||
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
|
||||
unsigned long prot_val);
|
||||
|
||||
/*
|
||||
* The default ioremap() behavior is non-cached:
|
||||
*/
|
||||
static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
|
||||
{
|
||||
return ioremap_nocache(offset, size);
|
||||
|
@ -207,18 +207,42 @@ extern void set_iounmap_nonlazy(void);
|
|||
*/
|
||||
#define xlate_dev_kmem_ptr(p) p
|
||||
|
||||
/**
|
||||
* memset_io Set a range of I/O memory to a constant value
|
||||
* @addr: The beginning of the I/O-memory range to set
|
||||
* @val: The value to set the memory to
|
||||
* @count: The number of bytes to set
|
||||
*
|
||||
* Set a range of I/O memory to a given value.
|
||||
*/
|
||||
static inline void
|
||||
memset_io(volatile void __iomem *addr, unsigned char val, size_t count)
|
||||
{
|
||||
memset((void __force *)addr, val, count);
|
||||
}
|
||||
|
||||
/**
|
||||
* memcpy_fromio Copy a block of data from I/O memory
|
||||
* @dst: The (RAM) destination for the copy
|
||||
* @src: The (I/O memory) source for the data
|
||||
* @count: The number of bytes to copy
|
||||
*
|
||||
* Copy a block of data from I/O memory.
|
||||
*/
|
||||
static inline void
|
||||
memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count)
|
||||
{
|
||||
memcpy(dst, (const void __force *)src, count);
|
||||
}
|
||||
|
||||
/**
|
||||
* memcpy_toio Copy a block of data into I/O memory
|
||||
* @dst: The (I/O memory) destination for the copy
|
||||
* @src: The (RAM) source for the data
|
||||
* @count: The number of bytes to copy
|
||||
*
|
||||
* Copy a block of data to I/O memory.
|
||||
*/
|
||||
static inline void
|
||||
memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
|
||||
{
|
||||
|
|
|
@ -101,7 +101,6 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o
|
|||
|
||||
obj-$(CONFIG_AMD_NB) += amd_nb.o
|
||||
obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
|
||||
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
|
||||
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
|
||||
|
||||
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
|
||||
|
|
|
@ -1,173 +0,0 @@
|
|||
/*
|
||||
* test_nx.c: functional test for NX functionality
|
||||
*
|
||||
* (C) Copyright 2008 Intel Corporation
|
||||
* Author: Arjan van de Ven <arjan@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/asm.h>
|
||||
|
||||
extern int rodata_test_data;
|
||||
|
||||
/*
|
||||
* This file checks 4 things:
|
||||
* 1) Check if the stack is not executable
|
||||
* 2) Check if kmalloc memory is not executable
|
||||
* 3) Check if the .rodata section is not executable
|
||||
* 4) Check if the .data section of a module is not executable
|
||||
*
|
||||
* To do this, the test code tries to execute memory in stack/kmalloc/etc,
|
||||
* and then checks if the expected trap happens.
|
||||
*
|
||||
* Sadly, this implies having a dynamic exception handling table entry.
|
||||
* ... which can be done (and will make Rusty cry)... but it can only
|
||||
* be done in a stand-alone module with only 1 entry total.
|
||||
* (otherwise we'd have to sort and that's just too messy)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* We want to set up an exception handling point on our stack,
|
||||
* which means a variable value. This function is rather dirty
|
||||
* and walks the exception table of the module, looking for a magic
|
||||
* marker and replaces it with a specific function.
|
||||
*/
|
||||
static void fudze_exception_table(void *marker, void *new)
|
||||
{
|
||||
struct module *mod = THIS_MODULE;
|
||||
struct exception_table_entry *extable;
|
||||
|
||||
/*
|
||||
* Note: This module has only 1 exception table entry,
|
||||
* so searching and sorting is not needed. If that changes,
|
||||
* this would be the place to search and re-sort the exception
|
||||
* table.
|
||||
*/
|
||||
if (mod->num_exentries > 1) {
|
||||
printk(KERN_ERR "test_nx: too many exception table entries!\n");
|
||||
printk(KERN_ERR "test_nx: test results are not reliable.\n");
|
||||
return;
|
||||
}
|
||||
extable = (struct exception_table_entry *)mod->extable;
|
||||
extable[0].insn = (unsigned long)new;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* exception tables get their symbols translated so we need
|
||||
* to use a fake function to put in there, which we can then
|
||||
* replace at runtime.
|
||||
*/
|
||||
void foo_label(void);
|
||||
|
||||
/*
|
||||
* returns 0 for not-executable, negative for executable
|
||||
*
|
||||
* Note: we cannot allow this function to be inlined, because
|
||||
* that would give us more than 1 exception table entry.
|
||||
* This in turn would break the assumptions above.
|
||||
*/
|
||||
static noinline int test_address(void *address)
|
||||
{
|
||||
unsigned long result;
|
||||
|
||||
/* Set up an exception table entry for our address */
|
||||
fudze_exception_table(&foo_label, address);
|
||||
result = 1;
|
||||
asm volatile(
|
||||
"foo_label:\n"
|
||||
"0: call *%[fake_code]\n"
|
||||
"1:\n"
|
||||
".section .fixup,\"ax\"\n"
|
||||
"2: mov %[zero], %[rslt]\n"
|
||||
" ret\n"
|
||||
".previous\n"
|
||||
_ASM_EXTABLE(0b,2b)
|
||||
: [rslt] "=r" (result)
|
||||
: [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
|
||||
);
|
||||
/* change the exception table back for the next round */
|
||||
fudze_exception_table(address, &foo_label);
|
||||
|
||||
if (result)
|
||||
return -ENODEV;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned char test_data = 0xC3; /* 0xC3 is the opcode for "ret" */
|
||||
|
||||
static int test_NX(void)
|
||||
{
|
||||
int ret = 0;
|
||||
/* 0xC3 is the opcode for "ret" */
|
||||
char stackcode[] = {0xC3, 0x90, 0 };
|
||||
char *heap;
|
||||
|
||||
test_data = 0xC3;
|
||||
|
||||
printk(KERN_INFO "Testing NX protection\n");
|
||||
|
||||
/* Test 1: check if the stack is not executable */
|
||||
if (test_address(&stackcode)) {
|
||||
printk(KERN_ERR "test_nx: stack was executable\n");
|
||||
ret = -ENODEV;
|
||||
}
|
||||
|
||||
|
||||
/* Test 2: Check if the heap is executable */
|
||||
heap = kmalloc(64, GFP_KERNEL);
|
||||
if (!heap)
|
||||
return -ENOMEM;
|
||||
heap[0] = 0xC3; /* opcode for "ret" */
|
||||
|
||||
if (test_address(heap)) {
|
||||
printk(KERN_ERR "test_nx: heap was executable\n");
|
||||
ret = -ENODEV;
|
||||
}
|
||||
kfree(heap);
|
||||
|
||||
/*
|
||||
* The following 2 tests currently fail, this needs to get fixed
|
||||
* Until then, don't run them to avoid too many people getting scared
|
||||
* by the error message
|
||||
*/
|
||||
|
||||
/* Test 3: Check if the .rodata section is executable */
|
||||
if (rodata_test_data != 0xC3) {
|
||||
printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
|
||||
ret = -ENODEV;
|
||||
} else if (test_address(&rodata_test_data)) {
|
||||
printk(KERN_ERR "test_nx: .rodata section is executable\n");
|
||||
ret = -ENODEV;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* Test 4: Check if the .data section of a module is executable */
|
||||
if (test_address(&test_data)) {
|
||||
printk(KERN_ERR "test_nx: .data section is executable\n");
|
||||
ret = -ENODEV;
|
||||
}
|
||||
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void test_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(test_NX);
|
||||
module_exit(test_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Testcase for the NX infrastructure");
|
||||
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
|
|
@ -18,6 +18,7 @@
|
|||
#include <linux/sched.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include <asm/kasan.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
/*
|
||||
|
@ -51,6 +52,10 @@ enum address_markers_idx {
|
|||
LOW_KERNEL_NR,
|
||||
VMALLOC_START_NR,
|
||||
VMEMMAP_START_NR,
|
||||
#ifdef CONFIG_KASAN
|
||||
KASAN_SHADOW_START_NR,
|
||||
KASAN_SHADOW_END_NR,
|
||||
#endif
|
||||
# ifdef CONFIG_X86_ESPFIX64
|
||||
ESPFIX_START_NR,
|
||||
# endif
|
||||
|
@ -76,6 +81,10 @@ static struct addr_marker address_markers[] = {
|
|||
{ 0/* PAGE_OFFSET */, "Low Kernel Mapping" },
|
||||
{ 0/* VMALLOC_START */, "vmalloc() Area" },
|
||||
{ 0/* VMEMMAP_START */, "Vmemmap" },
|
||||
#ifdef CONFIG_KASAN
|
||||
{ KASAN_SHADOW_START, "KASAN shadow" },
|
||||
{ KASAN_SHADOW_END, "KASAN shadow end" },
|
||||
#endif
|
||||
# ifdef CONFIG_X86_ESPFIX64
|
||||
{ ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
|
||||
# endif
|
||||
|
@ -327,18 +336,31 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
|
|||
|
||||
#if PTRS_PER_PUD > 1
|
||||
|
||||
/*
|
||||
* This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y
|
||||
* KASAN fills page tables with the same values. Since there is no
|
||||
* point in checking page table more than once we just skip repeated
|
||||
* entries. This saves us dozens of seconds during boot.
|
||||
*/
|
||||
static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
|
||||
{
|
||||
return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
|
||||
}
|
||||
|
||||
static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
|
||||
unsigned long P)
|
||||
{
|
||||
int i;
|
||||
pud_t *start;
|
||||
pgprotval_t prot;
|
||||
pud_t *prev_pud = NULL;
|
||||
|
||||
start = (pud_t *) pgd_page_vaddr(addr);
|
||||
|
||||
for (i = 0; i < PTRS_PER_PUD; i++) {
|
||||
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
|
||||
if (!pud_none(*start)) {
|
||||
if (!pud_none(*start) &&
|
||||
!pud_already_checked(prev_pud, start, st->check_wx)) {
|
||||
if (pud_large(*start) || !pud_present(*start)) {
|
||||
prot = pud_flags(*start);
|
||||
note_page(m, st, __pgprot(prot), 2);
|
||||
|
@ -349,6 +371,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
|
|||
} else
|
||||
note_page(m, st, __pgprot(0), 2);
|
||||
|
||||
prev_pud = start;
|
||||
start++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -214,7 +214,20 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
|
|||
int in_flags, struct page **pages)
|
||||
{
|
||||
unsigned int i, level;
|
||||
#ifdef CONFIG_PREEMPT
|
||||
/*
|
||||
* Avoid wbinvd() because it causes latencies on all CPUs,
|
||||
* regardless of any CPU isolation that may be in effect.
|
||||
*
|
||||
* This should be extended for CAT enabled systems independent of
|
||||
* PREEMPT because wbinvd() does not respect the CAT partitions and
|
||||
* this is exposed to unpriviledged users through the graphics
|
||||
* subsystem.
|
||||
*/
|
||||
unsigned long do_wbinvd = 0;
|
||||
#else
|
||||
unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
|
||||
#endif
|
||||
|
||||
BUG_ON(irqs_disabled());
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ static u64 get_subtree_max_end(struct rb_node *node)
|
|||
{
|
||||
u64 ret = 0;
|
||||
if (node) {
|
||||
struct memtype *data = container_of(node, struct memtype, rb);
|
||||
struct memtype *data = rb_entry(node, struct memtype, rb);
|
||||
ret = data->subtree_max_end;
|
||||
}
|
||||
return ret;
|
||||
|
@ -79,7 +79,7 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
|
|||
struct memtype *last_lower = NULL;
|
||||
|
||||
while (node) {
|
||||
struct memtype *data = container_of(node, struct memtype, rb);
|
||||
struct memtype *data = rb_entry(node, struct memtype, rb);
|
||||
|
||||
if (get_subtree_max_end(node->rb_left) > start) {
|
||||
/* Lowest overlap if any must be on left side */
|
||||
|
@ -121,7 +121,7 @@ static struct memtype *memtype_rb_match(struct rb_root *root,
|
|||
|
||||
node = rb_next(&match->rb);
|
||||
if (node)
|
||||
match = container_of(node, struct memtype, rb);
|
||||
match = rb_entry(node, struct memtype, rb);
|
||||
else
|
||||
match = NULL;
|
||||
}
|
||||
|
@ -150,7 +150,7 @@ static int memtype_rb_check_conflict(struct rb_root *root,
|
|||
|
||||
node = rb_next(&match->rb);
|
||||
while (node) {
|
||||
match = container_of(node, struct memtype, rb);
|
||||
match = rb_entry(node, struct memtype, rb);
|
||||
|
||||
if (match->start >= end) /* Checked all possible matches */
|
||||
goto success;
|
||||
|
@ -181,7 +181,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
|
|||
struct rb_node *parent = NULL;
|
||||
|
||||
while (*node) {
|
||||
struct memtype *data = container_of(*node, struct memtype, rb);
|
||||
struct memtype *data = rb_entry(*node, struct memtype, rb);
|
||||
|
||||
parent = *node;
|
||||
if (data->subtree_max_end < newdata->end)
|
||||
|
@ -270,7 +270,7 @@ int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
|
|||
}
|
||||
|
||||
if (node) { /* pos == i */
|
||||
struct memtype *this = container_of(node, struct memtype, rb);
|
||||
struct memtype *this = rb_entry(node, struct memtype, rb);
|
||||
*out = *this;
|
||||
return 0;
|
||||
} else {
|
||||
|
|
|
@ -5,7 +5,7 @@ include ../lib.mk
|
|||
.PHONY: all all_32 all_64 warn_32bit_failure clean
|
||||
|
||||
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
|
||||
check_initial_reg_state sigreturn ldt_gdt iopl \
|
||||
check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \
|
||||
protection_keys test_vdso
|
||||
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
|
||||
test_FCMOV test_FCOMI test_FISTTP \
|
||||
|
|
Loading…
Reference in New Issue