mm, x86/mm: Untangle address space layout definitions from basic pgtable type definitions
- Untangle the somewhat incestous way of how VMALLOC_START is used all across the kernel, but is, on x86, defined deep inside one of the lowest level page table headers. It doesn't help that vmalloc.h only includes a single asm header: #include <asm/page.h> /* pgprot_t */ So there was no existing cross-arch way to decouple address layout definitions from page.h details. I used this: #ifndef VMALLOC_START # include <asm/vmalloc.h> #endif This way every architecture that wants to simplify page.h can do so. - Also on x86 we had a couple of LDT related inline functions that used the late-stage address space layout positions - but these could be uninlined without real trouble - the end result is cleaner this way as well. Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Rik van Riel <riel@redhat.com> Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
1f059dfdf5
commit
186525bd6b
|
@ -6,6 +6,7 @@
|
|||
#include <linux/percpu-defs.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/intel_ds.h>
|
||||
#include <asm/pgtable_areas.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
|
@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
|
|||
extern void setup_cpu_entry_areas(void);
|
||||
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
|
||||
|
||||
/* Single page reserved for the readonly IDT mapping: */
|
||||
#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
|
||||
#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
|
||||
|
||||
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
|
||||
|
||||
#define CPU_ENTRY_AREA_MAP_SIZE \
|
||||
(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
|
||||
|
||||
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
|
||||
|
||||
static inline struct entry_stack *cpu_entry_stack(int cpu)
|
||||
|
|
|
@ -69,14 +69,6 @@ struct ldt_struct {
|
|||
int slot;
|
||||
};
|
||||
|
||||
/* This is a multiple of PAGE_SIZE. */
|
||||
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
|
||||
|
||||
static inline void *ldt_slot_va(int slot)
|
||||
{
|
||||
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
|
||||
}
|
||||
|
||||
/*
|
||||
* Used for LDT copy/destruction.
|
||||
*/
|
||||
|
@ -99,87 +91,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
|
|||
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
extern void load_mm_ldt(struct mm_struct *mm);
|
||||
extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
|
||||
#else
|
||||
static inline void load_mm_ldt(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
struct ldt_struct *ldt;
|
||||
|
||||
/* READ_ONCE synchronizes with smp_store_release */
|
||||
ldt = READ_ONCE(mm->context.ldt);
|
||||
|
||||
/*
|
||||
* Any change to mm->context.ldt is followed by an IPI to all
|
||||
* CPUs with the mm active. The LDT will not be freed until
|
||||
* after the IPI is handled by all such CPUs. This means that,
|
||||
* if the ldt_struct changes before we return, the values we see
|
||||
* will be safe, and the new values will be loaded before we run
|
||||
* any user code.
|
||||
*
|
||||
* NB: don't try to convert this to use RCU without extreme care.
|
||||
* We would still need IRQs off, because we don't want to change
|
||||
* the local LDT after an IPI loaded a newer value than the one
|
||||
* that we can see.
|
||||
*/
|
||||
|
||||
if (unlikely(ldt)) {
|
||||
if (static_cpu_has(X86_FEATURE_PTI)) {
|
||||
if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
|
||||
/*
|
||||
* Whoops -- either the new LDT isn't mapped
|
||||
* (if slot == -1) or is mapped into a bogus
|
||||
* slot (if slot > 1).
|
||||
*/
|
||||
clear_LDT();
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If page table isolation is enabled, ldt->entries
|
||||
* will not be mapped in the userspace pagetables.
|
||||
* Tell the CPU to access the LDT through the alias
|
||||
* at ldt_slot_va(ldt->slot).
|
||||
*/
|
||||
set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
|
||||
} else {
|
||||
set_ldt(ldt->entries, ldt->nr_entries);
|
||||
}
|
||||
} else {
|
||||
clear_LDT();
|
||||
}
|
||||
#else
|
||||
clear_LDT();
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
|
||||
{
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
/*
|
||||
* Load the LDT if either the old or new mm had an LDT.
|
||||
*
|
||||
* An mm will never go from having an LDT to not having an LDT. Two
|
||||
* mms never share an LDT, so we don't gain anything by checking to
|
||||
* see whether the LDT changed. There's also no guarantee that
|
||||
* prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
|
||||
* then prev->context.ldt will also be non-NULL.
|
||||
*
|
||||
* If we really cared, we could optimize the case where prev == next
|
||||
* and we're exiting lazy mode. Most of the time, if this happens,
|
||||
* we don't actually need to reload LDTR, but modify_ldt() is mostly
|
||||
* used by legacy code and emulators where we don't need this level of
|
||||
* performance.
|
||||
*
|
||||
* This uses | instead of || because it generates better code.
|
||||
*/
|
||||
if (unlikely((unsigned long)prev->context.ldt |
|
||||
(unsigned long)next->context.ldt))
|
||||
load_mm_ldt(next);
|
||||
#endif
|
||||
|
||||
DEBUG_LOCKS_WARN_ON(preemptible());
|
||||
}
|
||||
#endif
|
||||
|
||||
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
|
||||
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
|
||||
|
||||
/*
|
||||
* Init a new mm. Used on mm copies, like at fork()
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
#ifndef _ASM_X86_PGTABLE_32_AREAS_H
|
||||
#define _ASM_X86_PGTABLE_32_AREAS_H
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
|
||||
/*
|
||||
* Just any arbitrary offset to the start of the vmalloc VM area: the
|
||||
* current 8MB value just means that there will be a 8MB "hole" after the
|
||||
* physical memory until the kernel virtual memory starts. That means that
|
||||
* any out-of-bounds memory accesses will hopefully be caught.
|
||||
* The vmalloc() routines leaves a hole of 4kB between each vmalloced
|
||||
* area for the same reason. ;)
|
||||
*/
|
||||
#define VMALLOC_OFFSET (8 * 1024 * 1024)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
extern bool __vmalloc_start_set; /* set once high_memory is set */
|
||||
#endif
|
||||
|
||||
#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
|
||||
#ifdef CONFIG_X86_PAE
|
||||
#define LAST_PKMAP 512
|
||||
#else
|
||||
#define LAST_PKMAP 1024
|
||||
#endif
|
||||
|
||||
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))
|
||||
|
||||
/* The +1 is for the readonly IDT page: */
|
||||
#define CPU_ENTRY_AREA_BASE \
|
||||
((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
|
||||
|
||||
#define LDT_BASE_ADDR \
|
||||
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
|
||||
|
||||
#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
|
||||
|
||||
#define PKMAP_BASE \
|
||||
((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
|
||||
#else
|
||||
# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
|
||||
#endif
|
||||
|
||||
#define MODULES_VADDR VMALLOC_START
|
||||
#define MODULES_END VMALLOC_END
|
||||
#define MODULES_LEN (MODULES_VADDR - MODULES_END)
|
||||
|
||||
#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
|
||||
|
||||
#endif /* _ASM_X86_PGTABLE_32_AREAS_H */
|
|
@ -1,6 +1,6 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_X86_PGTABLE_32_DEFS_H
|
||||
#define _ASM_X86_PGTABLE_32_DEFS_H
|
||||
#ifndef _ASM_X86_PGTABLE_32_TYPES_H
|
||||
#define _ASM_X86_PGTABLE_32_TYPES_H
|
||||
|
||||
/*
|
||||
* The Linux x86 paging architecture is 'compile-time dual-mode', it
|
||||
|
@ -20,55 +20,4 @@
|
|||
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
|
||||
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
|
||||
|
||||
/* Just any arbitrary offset to the start of the vmalloc VM area: the
|
||||
* current 8MB value just means that there will be a 8MB "hole" after the
|
||||
* physical memory until the kernel virtual memory starts. That means that
|
||||
* any out-of-bounds memory accesses will hopefully be caught.
|
||||
* The vmalloc() routines leaves a hole of 4kB between each vmalloced
|
||||
* area for the same reason. ;)
|
||||
*/
|
||||
#define VMALLOC_OFFSET (8 * 1024 * 1024)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
extern bool __vmalloc_start_set; /* set once high_memory is set */
|
||||
#endif
|
||||
|
||||
#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
|
||||
#ifdef CONFIG_X86_PAE
|
||||
#define LAST_PKMAP 512
|
||||
#else
|
||||
#define LAST_PKMAP 1024
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE.
|
||||
* Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c
|
||||
* to avoid include recursion hell.
|
||||
*/
|
||||
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43)
|
||||
|
||||
/* The +1 is for the readonly IDT page: */
|
||||
#define CPU_ENTRY_AREA_BASE \
|
||||
((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
|
||||
|
||||
#define LDT_BASE_ADDR \
|
||||
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
|
||||
|
||||
#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
|
||||
|
||||
#define PKMAP_BASE \
|
||||
((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
|
||||
#else
|
||||
# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
|
||||
#endif
|
||||
|
||||
#define MODULES_VADDR VMALLOC_START
|
||||
#define MODULES_END VMALLOC_END
|
||||
#define MODULES_LEN (MODULES_VADDR - MODULES_END)
|
||||
|
||||
#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
|
||||
|
||||
#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
|
||||
#endif /* _ASM_X86_PGTABLE_32_TYPES_H */
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
#ifndef _ASM_X86_PGTABLE_AREAS_H
|
||||
#define _ASM_X86_PGTABLE_AREAS_H
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
# include <asm/pgtable_32_areas.h>
|
||||
#endif
|
||||
|
||||
/* Single page reserved for the readonly IDT mapping: */
|
||||
#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
|
||||
#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
|
||||
|
||||
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
|
||||
|
||||
#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
|
||||
|
||||
#endif /* _ASM_X86_PGTABLE_AREAS_H */
|
|
@ -1,4 +1,6 @@
|
|||
#ifndef _ASM_X86_VMALLOC_H
|
||||
#define _ASM_X86_VMALLOC_H
|
||||
|
||||
#include <asm/pgtable_areas.h>
|
||||
|
||||
#endif /* _ASM_X86_VMALLOC_H */
|
||||
|
|
|
@ -28,6 +28,89 @@
|
|||
#include <asm/desc.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/syscalls.h>
|
||||
#include <asm/pgtable_areas.h>
|
||||
|
||||
/* This is a multiple of PAGE_SIZE. */
|
||||
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
|
||||
|
||||
static inline void *ldt_slot_va(int slot)
|
||||
{
|
||||
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
|
||||
}
|
||||
|
||||
void load_mm_ldt(struct mm_struct *mm)
|
||||
{
|
||||
struct ldt_struct *ldt;
|
||||
|
||||
/* READ_ONCE synchronizes with smp_store_release */
|
||||
ldt = READ_ONCE(mm->context.ldt);
|
||||
|
||||
/*
|
||||
* Any change to mm->context.ldt is followed by an IPI to all
|
||||
* CPUs with the mm active. The LDT will not be freed until
|
||||
* after the IPI is handled by all such CPUs. This means that,
|
||||
* if the ldt_struct changes before we return, the values we see
|
||||
* will be safe, and the new values will be loaded before we run
|
||||
* any user code.
|
||||
*
|
||||
* NB: don't try to convert this to use RCU without extreme care.
|
||||
* We would still need IRQs off, because we don't want to change
|
||||
* the local LDT after an IPI loaded a newer value than the one
|
||||
* that we can see.
|
||||
*/
|
||||
|
||||
if (unlikely(ldt)) {
|
||||
if (static_cpu_has(X86_FEATURE_PTI)) {
|
||||
if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
|
||||
/*
|
||||
* Whoops -- either the new LDT isn't mapped
|
||||
* (if slot == -1) or is mapped into a bogus
|
||||
* slot (if slot > 1).
|
||||
*/
|
||||
clear_LDT();
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If page table isolation is enabled, ldt->entries
|
||||
* will not be mapped in the userspace pagetables.
|
||||
* Tell the CPU to access the LDT through the alias
|
||||
* at ldt_slot_va(ldt->slot).
|
||||
*/
|
||||
set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
|
||||
} else {
|
||||
set_ldt(ldt->entries, ldt->nr_entries);
|
||||
}
|
||||
} else {
|
||||
clear_LDT();
|
||||
}
|
||||
}
|
||||
|
||||
void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
|
||||
{
|
||||
/*
|
||||
* Load the LDT if either the old or new mm had an LDT.
|
||||
*
|
||||
* An mm will never go from having an LDT to not having an LDT. Two
|
||||
* mms never share an LDT, so we don't gain anything by checking to
|
||||
* see whether the LDT changed. There's also no guarantee that
|
||||
* prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
|
||||
* then prev->context.ldt will also be non-NULL.
|
||||
*
|
||||
* If we really cared, we could optimize the case where prev == next
|
||||
* and we're exiting lazy mode. Most of the time, if this happens,
|
||||
* we don't actually need to reload LDTR, but modify_ldt() is mostly
|
||||
* used by legacy code and emulators where we don't need this level of
|
||||
* performance.
|
||||
*
|
||||
* This uses | instead of || because it generates better code.
|
||||
*/
|
||||
if (unlikely((unsigned long)prev->context.ldt |
|
||||
(unsigned long)next->context.ldt))
|
||||
load_mm_ldt(next);
|
||||
|
||||
DEBUG_LOCKS_WARN_ON(preemptible());
|
||||
}
|
||||
|
||||
static void refresh_ldt_segments(void)
|
||||
{
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include <asm/proto.h>
|
||||
#include <asm/unwind.h>
|
||||
#include <asm/vsyscall.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
/*
|
||||
* max_low_pfn_mapped: highest directly mapped pfn < 4 GB
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
|
||||
#include <asm/desc.h> /* store_idt(), ... */
|
||||
#include <asm/cpu_entry_area.h> /* exception stack */
|
||||
#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <asm/trace/exceptions.h>
|
||||
|
|
|
@ -52,6 +52,7 @@
|
|||
#include <asm/page_types.h>
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/init.h>
|
||||
#include <asm/pgtable_areas.h>
|
||||
|
||||
#include "mm_internal.h"
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <asm/tlb.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/io.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
unsigned int __VMALLOC_RESERVE = 128 << 20;
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <linux/mm.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include "physaddr.h"
|
||||
|
||||
|
|
|
@ -625,24 +625,19 @@ unsigned long vmalloc_to_pfn(const void *addr);
|
|||
* On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there
|
||||
* is no special casing required.
|
||||
*/
|
||||
static inline bool is_vmalloc_addr(const void *x)
|
||||
{
|
||||
#ifdef CONFIG_MMU
|
||||
unsigned long addr = (unsigned long)x;
|
||||
|
||||
return addr >= VMALLOC_START && addr < VMALLOC_END;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef is_ioremap_addr
|
||||
#define is_ioremap_addr(x) is_vmalloc_addr(x)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
extern bool is_vmalloc_addr(const void *x);
|
||||
extern int is_vmalloc_or_module_addr(const void *x);
|
||||
#else
|
||||
static inline bool is_vmalloc_addr(const void *x)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline int is_vmalloc_or_module_addr(const void *x)
|
||||
{
|
||||
return 0;
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include <linux/highmem.h>
|
||||
#include <linux/kgdb.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
|
||||
DEFINE_PER_CPU(int, __kmap_atomic_idx);
|
||||
|
|
|
@ -41,6 +41,14 @@
|
|||
|
||||
#include "internal.h"
|
||||
|
||||
bool is_vmalloc_addr(const void *x)
|
||||
{
|
||||
unsigned long addr = (unsigned long)x;
|
||||
|
||||
return addr >= VMALLOC_START && addr < VMALLOC_END;
|
||||
}
|
||||
EXPORT_SYMBOL(is_vmalloc_addr);
|
||||
|
||||
struct vfree_deferred {
|
||||
struct llist_head list;
|
||||
struct work_struct wq;
|
||||
|
|
Loading…
Reference in New Issue