Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:

 - the high latency PIT detection fix, which slipped through the cracks
   for rc1

 - a regression fix for the early printk mechanism

 - the x86 part to plug irq/vector related hotplug races

 - move the allocation of the espfix pages on cpu hotplug to non atomic
   context.  The current code triggers a might_sleep() warning.

 - a series of KASAN fixes addressing boot crashes and usability

 - a trivial typo fix for Kconfig help text

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/kconfig: Fix typo in the CONFIG_CMDLINE_BOOL help text
  x86/irq: Retrieve irq data after locking irq_desc
  x86/irq: Use proper locking in check_irq_vectors_for_cpu_disable()
  x86/irq: Plug irq vector hotplug race
  x86/earlyprintk: Allow early_printk() to use console style parameters like '115200n8'
  x86/espfix: Init espfix on the boot CPU side
  x86/espfix: Add 'cpu' parameter to init_espfix_ap()
  x86/kasan: Move KASAN_SHADOW_OFFSET to the arch Kconfig
  x86/kasan: Add message about KASAN being initialized
  x86/kasan: Fix boot crash on AMD processors
  x86/kasan: Flush TLBs after switching CR3
  x86/kasan: Fix KASAN shadow region page tables
  x86/init: Clear 'init_level4_pgt' earlier
  x86/tsc: Let high latency PIT fail fast in quick_pit_calibrate()
This commit is contained in:
Linus Torvalds 2015-07-12 10:02:38 -07:00
commit 1daa1cfb7a
13 changed files with 116 additions and 91 deletions

View File

@ -254,6 +254,11 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y
config KASAN_SHADOW_OFFSET
hex
depends on KASAN
default 0xdffffc0000000000
config HAVE_INTEL_TXT
def_bool y
depends on INTEL_IOMMU && ACPI
@ -2015,7 +2020,7 @@ config CMDLINE_BOOL
To compile command line arguments into the kernel,
set this option to 'Y', then fill in the
the boot arguments in CONFIG_CMDLINE.
boot arguments in CONFIG_CMDLINE.
Systems with fully functional boot loaders (i.e. non-embedded)
should leave this option set to 'N'.

View File

@ -9,7 +9,7 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
extern void init_espfix_bsp(void);
extern void init_espfix_ap(void);
extern void init_espfix_ap(int cpu);
#endif /* CONFIG_X86_64 */

View File

@ -14,15 +14,11 @@
#ifndef __ASSEMBLY__
extern pte_t kasan_zero_pte[];
extern pte_t kasan_zero_pmd[];
extern pte_t kasan_zero_pud[];
#ifdef CONFIG_KASAN
void __init kasan_map_early_shadow(pgd_t *pgd);
void __init kasan_early_init(void);
void __init kasan_init(void);
#else
static inline void kasan_map_early_shadow(pgd_t *pgd) { }
static inline void kasan_early_init(void) { }
static inline void kasan_init(void) { }
#endif

View File

@ -409,12 +409,6 @@ static void __setup_vector_irq(int cpu)
int irq, vector;
struct apic_chip_data *data;
/*
* vector_lock will make sure that we don't run into irq vector
* assignments that might be happening on another cpu in parallel,
* while we setup our initial vector to irq mappings.
*/
raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
for_each_active_irq(irq) {
data = apic_chip_data(irq_get_irq_data(irq));
@ -436,16 +430,16 @@ static void __setup_vector_irq(int cpu)
if (!cpumask_test_cpu(cpu, data->domain))
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
}
raw_spin_unlock(&vector_lock);
}
/*
* Setup the vector to irq mappings.
* Setup the vector to irq mappings. Must be called with vector_lock held.
*/
void setup_vector_irq(int cpu)
{
int irq;
lockdep_assert_held(&vector_lock);
/*
* On most of the platforms, legacy PIC delivers the interrupts on the
* boot cpu. But there are certain platforms where PIC interrupts are

View File

@ -175,7 +175,9 @@ static __init void early_serial_init(char *s)
}
if (*s) {
if (kstrtoul(s, 0, &baud) < 0 || baud == 0)
baud = simple_strtoull(s, &e, 0);
if (baud == 0 || s == e)
baud = DEFAULT_BAUD;
}

View File

@ -131,25 +131,24 @@ void __init init_espfix_bsp(void)
init_espfix_random();
/* The rest is the same as for any other processor */
init_espfix_ap();
init_espfix_ap(0);
}
void init_espfix_ap(void)
void init_espfix_ap(int cpu)
{
unsigned int cpu, page;
unsigned int page;
unsigned long addr;
pud_t pud, *pud_p;
pmd_t pmd, *pmd_p;
pte_t pte, *pte_p;
int n;
int n, node;
void *stack_page;
pteval_t ptemask;
/* We only have to do this once... */
if (likely(this_cpu_read(espfix_stack)))
if (likely(per_cpu(espfix_stack, cpu)))
return; /* Already initialized */
cpu = smp_processor_id();
addr = espfix_base_addr(cpu);
page = cpu/ESPFIX_STACKS_PER_PAGE;
@ -165,12 +164,15 @@ void init_espfix_ap(void)
if (stack_page)
goto unlock_done;
node = cpu_to_node(cpu);
ptemask = __supported_pte_mask;
pud_p = &espfix_pud_page[pud_index(addr)];
pud = *pud_p;
if (!pud_present(pud)) {
pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
pmd_p = (pmd_t *)page_address(page);
pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
for (n = 0; n < ESPFIX_PUD_CLONES; n++)
@ -180,7 +182,9 @@ void init_espfix_ap(void)
pmd_p = pmd_offset(&pud, addr);
pmd = *pmd_p;
if (!pmd_present(pmd)) {
pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
pte_p = (pte_t *)page_address(page);
pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
for (n = 0; n < ESPFIX_PMD_CLONES; n++)
@ -188,7 +192,7 @@ void init_espfix_ap(void)
}
pte_p = pte_offset_kernel(&pmd, addr);
stack_page = (void *)__get_free_page(GFP_KERNEL);
stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
for (n = 0; n < ESPFIX_PTE_CLONES; n++)
set_pte(&pte_p[n*PTE_STRIDE], pte);
@ -199,7 +203,7 @@ void init_espfix_ap(void)
unlock_done:
mutex_unlock(&espfix_init_mutex);
done:
this_cpu_write(espfix_stack, addr);
this_cpu_write(espfix_waddr, (unsigned long)stack_page
+ (addr & ~PAGE_MASK));
per_cpu(espfix_stack, cpu) = addr;
per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
+ (addr & ~PAGE_MASK);
}

View File

@ -161,11 +161,12 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
/* Kill off the identity-map trampoline */
reset_early_page_tables();
kasan_map_early_shadow(early_level4_pgt);
/* clear bss before set_intr_gate with early_idt_handler */
clear_bss();
clear_page(init_level4_pgt);
kasan_early_init();
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
set_intr_gate(i, early_idt_handler_array[i]);
load_idt((const struct desc_ptr *)&idt_descr);
@ -177,12 +178,9 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
*/
load_ucode_bsp();
clear_page(init_level4_pgt);
/* set init_level4_pgt kernel high mapping*/
init_level4_pgt[511] = early_level4_pgt[511];
kasan_map_early_shadow(init_level4_pgt);
x86_64_start_reservations(real_mode_data);
}

View File

@ -516,38 +516,9 @@ ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */
.quad 0x0000000000000000
#ifdef CONFIG_KASAN
#define FILL(VAL, COUNT) \
.rept (COUNT) ; \
.quad (VAL) ; \
.endr
NEXT_PAGE(kasan_zero_pte)
FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512)
NEXT_PAGE(kasan_zero_pmd)
FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512)
NEXT_PAGE(kasan_zero_pud)
FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512)
#undef FILL
#endif
#include "../../x86/xen/xen-head.S"
__PAGE_ALIGNED_BSS
NEXT_PAGE(empty_zero_page)
.skip PAGE_SIZE
#ifdef CONFIG_KASAN
/*
* This page used as early shadow. We don't use empty_zero_page
* at early stages, stack instrumentation could write some garbage
* to this page.
* Latter we reuse it as zero shadow for large ranges of memory
* that allowed to access, but not instrumented by kasan
* (vmalloc/vmemmap ...).
*/
NEXT_PAGE(kasan_zero_page)
.skip PAGE_SIZE
#endif

View File

@ -347,14 +347,22 @@ int check_irq_vectors_for_cpu_disable(void)
if (!desc)
continue;
/*
* Protect against concurrent action removal,
* affinity changes etc.
*/
raw_spin_lock(&desc->lock);
data = irq_desc_get_irq_data(desc);
cpumask_copy(&affinity_new, data->affinity);
cpumask_clear_cpu(this_cpu, &affinity_new);
/* Do not count inactive or per-cpu irqs. */
if (!irq_has_action(irq) || irqd_is_per_cpu(data))
if (!irq_has_action(irq) || irqd_is_per_cpu(data)) {
raw_spin_unlock(&desc->lock);
continue;
}
raw_spin_unlock(&desc->lock);
/*
* A single irq may be mapped to multiple
* cpu's vector_irq[] (for example IOAPIC cluster
@ -385,6 +393,9 @@ int check_irq_vectors_for_cpu_disable(void)
* vector. If the vector is marked in the used vectors
* bitmap or an irq is assigned to it, we don't count
* it as available.
*
* As this is an inaccurate snapshot anyway, we can do
* this w/o holding vector_lock.
*/
for (vector = FIRST_EXTERNAL_VECTOR;
vector < first_system_vector; vector++) {
@ -486,6 +497,11 @@ void fixup_irqs(void)
*/
mdelay(1);
/*
* We can walk the vector array of this cpu without holding
* vector_lock because the cpu is already marked !online, so
* nothing else will touch it.
*/
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
unsigned int irr;
@ -497,9 +513,9 @@ void fixup_irqs(void)
irq = __this_cpu_read(vector_irq[vector]);
desc = irq_to_desc(irq);
raw_spin_lock(&desc->lock);
data = irq_desc_get_irq_data(desc);
chip = irq_data_get_irq_chip(data);
raw_spin_lock(&desc->lock);
if (chip->irq_retrigger) {
chip->irq_retrigger(data);
__this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED);

View File

@ -170,11 +170,6 @@ static void smp_callin(void)
*/
apic_ap_setup();
/*
* Need to setup vector mappings before we enable interrupts.
*/
setup_vector_irq(smp_processor_id());
/*
* Save our processor parameters. Note: this information
* is needed for clock calibration.
@ -239,18 +234,13 @@ static void notrace start_secondary(void *unused)
check_tsc_sync_target();
/*
* Enable the espfix hack for this CPU
*/
#ifdef CONFIG_X86_ESPFIX64
init_espfix_ap();
#endif
/*
* We need to hold vector_lock so there the set of online cpus
* does not change while we are assigning vectors to cpus. Holding
* this lock ensures we don't half assign or remove an irq from a cpu.
* Lock vector_lock and initialize the vectors on this cpu
* before setting the cpu online. We must set it online with
* vector_lock held to prevent a concurrent setup/teardown
* from seeing a half valid vector space.
*/
lock_vector_lock();
setup_vector_irq(smp_processor_id());
set_cpu_online(smp_processor_id(), true);
unlock_vector_lock();
cpu_set_state_online(smp_processor_id());
@ -854,6 +844,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
initial_code = (unsigned long)start_secondary;
stack_start = idle->thread.sp;
/*
* Enable the espfix hack for this CPU
*/
#ifdef CONFIG_X86_ESPFIX64
init_espfix_ap(cpu);
#endif
/* So we see what's up */
announce_cpu(cpu, apicid);

View File

@ -598,10 +598,19 @@ static unsigned long quick_pit_calibrate(void)
if (!pit_expect_msb(0xff-i, &delta, &d2))
break;
delta -= tsc;
/*
* Extrapolate the error and fail fast if the error will
* never be below 500 ppm.
*/
if (i == 1 &&
d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
return 0;
/*
* Iterate until the error is less than 500 ppm
*/
delta -= tsc;
if (d1+d2 >= delta >> 11)
continue;

View File

@ -1,3 +1,4 @@
#define pr_fmt(fmt) "kasan: " fmt
#include <linux/bootmem.h>
#include <linux/kasan.h>
#include <linux/kdebug.h>
@ -11,7 +12,19 @@
extern pgd_t early_level4_pgt[PTRS_PER_PGD];
extern struct range pfn_mapped[E820_X_MAX];
extern unsigned char kasan_zero_page[PAGE_SIZE];
static pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
static pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss;
static pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss;
/*
* This page used as early shadow. We don't use empty_zero_page
* at early stages, stack instrumentation could write some garbage
* to this page.
* Latter we reuse it as zero shadow for large ranges of memory
* that allowed to access, but not instrumented by kasan
* (vmalloc/vmemmap ...).
*/
static unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
static int __init map_range(struct range *range)
{
@ -36,7 +49,7 @@ static void __init clear_pgds(unsigned long start,
pgd_clear(pgd_offset_k(start));
}
void __init kasan_map_early_shadow(pgd_t *pgd)
static void __init kasan_map_early_shadow(pgd_t *pgd)
{
int i;
unsigned long start = KASAN_SHADOW_START;
@ -73,7 +86,7 @@ static int __init zero_pmd_populate(pud_t *pud, unsigned long addr,
while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) {
WARN_ON(!pmd_none(*pmd));
set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte)
| __PAGE_KERNEL_RO));
| _KERNPG_TABLE));
addr += PMD_SIZE;
pmd = pmd_offset(pud, addr);
}
@ -99,7 +112,7 @@ static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) {
WARN_ON(!pud_none(*pud));
set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd)
| __PAGE_KERNEL_RO));
| _KERNPG_TABLE));
addr += PUD_SIZE;
pud = pud_offset(pgd, addr);
}
@ -124,7 +137,7 @@ static int __init zero_pgd_populate(unsigned long addr, unsigned long end)
while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) {
WARN_ON(!pgd_none(*pgd));
set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud)
| __PAGE_KERNEL_RO));
| _KERNPG_TABLE));
addr += PGDIR_SIZE;
pgd = pgd_offset_k(addr);
}
@ -166,6 +179,26 @@ static struct notifier_block kasan_die_notifier = {
};
#endif
void __init kasan_early_init(void)
{
int i;
pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL;
pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE;
pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE;
for (i = 0; i < PTRS_PER_PTE; i++)
kasan_zero_pte[i] = __pte(pte_val);
for (i = 0; i < PTRS_PER_PMD; i++)
kasan_zero_pmd[i] = __pmd(pmd_val);
for (i = 0; i < PTRS_PER_PUD; i++)
kasan_zero_pud[i] = __pud(pud_val);
kasan_map_early_shadow(early_level4_pgt);
kasan_map_early_shadow(init_level4_pgt);
}
void __init kasan_init(void)
{
int i;
@ -176,6 +209,7 @@ void __init kasan_init(void)
memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt));
load_cr3(early_level4_pgt);
__flush_tlb_all();
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
@ -202,5 +236,8 @@ void __init kasan_init(void)
memset(kasan_zero_page, 0, PAGE_SIZE);
load_cr3(init_level4_pgt);
__flush_tlb_all();
init_task.kasan_depth = 0;
pr_info("Kernel address sanitizer initialized\n");
}

View File

@ -18,10 +18,6 @@ config KASAN
For better error detection enable CONFIG_STACKTRACE,
and add slub_debug=U to boot cmdline.
config KASAN_SHADOW_OFFSET
hex
default 0xdffffc0000000000 if X86_64
choice
prompt "Instrumentation type"
depends on KASAN