Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar: "The main changes in this cycle were: - make the debugfs 'kernel_page_tables' file read-only, as it only has read ops. (Borislav Petkov) - micro-optimize clflush_cache_range() (Chris Wilson) - swiotlb enhancements, which fixes certain KVM emulated devices (Igor Mammedov) - fix an LDT related debug message (Jan Beulich) - modularize CONFIG_X86_PTDUMP (Kees Cook) - tone down an overly alarming warning (Laura Abbott) - Mark variable __initdata (Rasmus Villemoes) - PAT additions (Toshi Kani)" * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Micro-optimise clflush_cache_range() x86/mm/pat: Change free_memtype() to support shrinking case x86/mm/pat: Add untrack_pfn_moved for mremap x86/mm: Drop WARN from multi-BAR check x86/LDT: Print the real LDT base address x86/mm/64: Enable SWIOTLB if system has SRAT memory regions above MAX_DMA32_PFN x86/mm: Introduce max_possible_pfn x86/mm/ptdump: Make (debugfs)/kernel_page_tables read-only x86/mm/mtrr: Mark the 'range_new' static variable in mtrr_calc_range_state() as __initdata x86/mm: Turn CONFIG_X86_PTDUMP into a module
This commit is contained in:
commit
0ffedcda63
|
@ -69,7 +69,7 @@ config X86_PTDUMP_CORE
|
|||
def_bool n
|
||||
|
||||
config X86_PTDUMP
|
||||
bool "Export kernel pagetable layout to userspace via debugfs"
|
||||
tristate "Export kernel pagetable layout to userspace via debugfs"
|
||||
depends on DEBUG_KERNEL
|
||||
select DEBUG_FS
|
||||
select X86_PTDUMP_CORE
|
||||
|
|
|
@ -593,9 +593,16 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
|
|||
unsigned long x_remove_base,
|
||||
unsigned long x_remove_size, int i)
|
||||
{
|
||||
static struct range range_new[RANGE_NUM];
|
||||
/*
|
||||
* range_new should really be an automatic variable, but
|
||||
* putting 4096 bytes on the stack is frowned upon, to put it
|
||||
* mildly. It is safe to make it a static __initdata variable,
|
||||
* since mtrr_calc_range_state is only called during init and
|
||||
* there's no way it will call itself recursively.
|
||||
*/
|
||||
static struct range range_new[RANGE_NUM] __initdata;
|
||||
unsigned long range_sums_new;
|
||||
static int nr_range_new;
|
||||
int nr_range_new;
|
||||
int num_reg;
|
||||
|
||||
/* Convert ranges to var ranges state: */
|
||||
|
|
|
@ -88,7 +88,7 @@ int __init pci_swiotlb_detect_4gb(void)
|
|||
{
|
||||
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
|
||||
#ifdef CONFIG_X86_64
|
||||
if (!no_iommu && max_pfn > MAX_DMA32_PFN)
|
||||
if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
|
||||
swiotlb = 1;
|
||||
#endif
|
||||
return swiotlb;
|
||||
|
|
|
@ -125,7 +125,7 @@ void release_thread(struct task_struct *dead_task)
|
|||
if (dead_task->mm->context.ldt) {
|
||||
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
|
||||
dead_task->comm,
|
||||
dead_task->mm->context.ldt,
|
||||
dead_task->mm->context.ldt->entries,
|
||||
dead_task->mm->context.ldt->size);
|
||||
BUG();
|
||||
}
|
||||
|
|
|
@ -1048,6 +1048,8 @@ void __init setup_arch(char **cmdline_p)
|
|||
if (mtrr_trim_uncached_memory(max_pfn))
|
||||
max_pfn = e820_end_of_ram_pfn();
|
||||
|
||||
max_possible_pfn = max_pfn;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* max_low_pfn get updated here */
|
||||
find_low_pfn_range();
|
||||
|
|
|
@ -15,6 +15,7 @@ obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
|
|||
|
||||
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
|
||||
obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o
|
||||
obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o
|
||||
|
||||
obj-$(CONFIG_HIGHMEM) += highmem_32.o
|
||||
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
#include <linux/debugfs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
static int ptdump_show(struct seq_file *m, void *v)
|
||||
{
|
||||
ptdump_walk_pgd_level(m, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ptdump_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, ptdump_show, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations ptdump_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = ptdump_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static struct dentry *pe;
|
||||
|
||||
static int __init pt_dump_debug_init(void)
|
||||
{
|
||||
pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
|
||||
&ptdump_fops);
|
||||
if (!pe)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit pt_dump_debug_exit(void)
|
||||
{
|
||||
debugfs_remove_recursive(pe);
|
||||
}
|
||||
|
||||
module_init(pt_dump_debug_init);
|
||||
module_exit(pt_dump_debug_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
|
||||
MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
|
|
@ -426,38 +426,15 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
|
|||
{
|
||||
ptdump_walk_pgd_level_core(m, pgd, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
|
||||
|
||||
void ptdump_walk_pgd_level_checkwx(void)
|
||||
{
|
||||
ptdump_walk_pgd_level_core(NULL, NULL, true);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_PTDUMP
|
||||
static int ptdump_show(struct seq_file *m, void *v)
|
||||
static int __init pt_dump_init(void)
|
||||
{
|
||||
ptdump_walk_pgd_level(m, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ptdump_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, ptdump_show, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations ptdump_fops = {
|
||||
.open = ptdump_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
#endif
|
||||
|
||||
static int pt_dump_init(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_PTDUMP
|
||||
struct dentry *pe;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Not a compile-time constant on x86-32 */
|
||||
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
|
||||
|
@ -468,13 +445,6 @@ static int pt_dump_init(void)
|
|||
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_PTDUMP
|
||||
pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
|
||||
&ptdump_fops);
|
||||
if (!pe)
|
||||
return -ENOMEM;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -194,8 +194,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
|
|||
* Check if the request spans more than any BAR in the iomem resource
|
||||
* tree.
|
||||
*/
|
||||
WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
|
||||
KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
|
||||
if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
|
||||
pr_warn("caller %pS mapping multiple BARs\n", caller);
|
||||
|
||||
return ret_addr;
|
||||
err_free_area:
|
||||
|
|
|
@ -129,14 +129,16 @@ within(unsigned long addr, unsigned long start, unsigned long end)
|
|||
*/
|
||||
void clflush_cache_range(void *vaddr, unsigned int size)
|
||||
{
|
||||
unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
|
||||
const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
|
||||
void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
|
||||
void *vend = vaddr + size;
|
||||
void *p;
|
||||
|
||||
if (p >= vend)
|
||||
return;
|
||||
|
||||
mb();
|
||||
|
||||
for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
|
||||
p < vend; p += boot_cpu_data.x86_clflush_size)
|
||||
for (; p < vend; p += clflush_size)
|
||||
clflushopt(p);
|
||||
|
||||
mb();
|
||||
|
|
|
@ -586,7 +586,7 @@ int free_memtype(u64 start, u64 end)
|
|||
entry = rbt_memtype_erase(start, end);
|
||||
spin_unlock(&memtype_lock);
|
||||
|
||||
if (!entry) {
|
||||
if (IS_ERR(entry)) {
|
||||
pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
|
||||
current->comm, current->pid, start, end - 1);
|
||||
return -EINVAL;
|
||||
|
@ -992,6 +992,16 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
|
|||
vma->vm_flags &= ~VM_PAT;
|
||||
}
|
||||
|
||||
/*
|
||||
* untrack_pfn_moved is called, while mremapping a pfnmap for a new region,
|
||||
* with the old vma after its pfnmap page table has been removed. The new
|
||||
* vma has a new pfnmap to the same pfn & cache type with VM_PAT set.
|
||||
*/
|
||||
void untrack_pfn_moved(struct vm_area_struct *vma)
|
||||
{
|
||||
vma->vm_flags &= ~VM_PAT;
|
||||
}
|
||||
|
||||
pgprot_t pgprot_writecombine(pgprot_t prot)
|
||||
{
|
||||
return __pgprot(pgprot_val(prot) |
|
||||
|
|
|
@ -98,8 +98,13 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
|
|||
return last_lower; /* Returns NULL if there is no overlap */
|
||||
}
|
||||
|
||||
static struct memtype *memtype_rb_exact_match(struct rb_root *root,
|
||||
u64 start, u64 end)
|
||||
enum {
|
||||
MEMTYPE_EXACT_MATCH = 0,
|
||||
MEMTYPE_END_MATCH = 1
|
||||
};
|
||||
|
||||
static struct memtype *memtype_rb_match(struct rb_root *root,
|
||||
u64 start, u64 end, int match_type)
|
||||
{
|
||||
struct memtype *match;
|
||||
|
||||
|
@ -107,7 +112,12 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
|
|||
while (match != NULL && match->start < end) {
|
||||
struct rb_node *node;
|
||||
|
||||
if (match->start == start && match->end == end)
|
||||
if ((match_type == MEMTYPE_EXACT_MATCH) &&
|
||||
(match->start == start) && (match->end == end))
|
||||
return match;
|
||||
|
||||
if ((match_type == MEMTYPE_END_MATCH) &&
|
||||
(match->start < start) && (match->end == end))
|
||||
return match;
|
||||
|
||||
node = rb_next(&match->rb);
|
||||
|
@ -117,7 +127,7 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
|
|||
match = NULL;
|
||||
}
|
||||
|
||||
return NULL; /* Returns NULL if there is no exact match */
|
||||
return NULL; /* Returns NULL if there is no match */
|
||||
}
|
||||
|
||||
static int memtype_rb_check_conflict(struct rb_root *root,
|
||||
|
@ -210,12 +220,36 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
|
|||
{
|
||||
struct memtype *data;
|
||||
|
||||
data = memtype_rb_exact_match(&memtype_rbroot, start, end);
|
||||
if (!data)
|
||||
goto out;
|
||||
/*
|
||||
* Since the memtype_rbroot tree allows overlapping ranges,
|
||||
* rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
|
||||
* a whole node for the munmap case. If no such entry is found,
|
||||
* it then checks with END_MATCH, i.e. shrink the size of a node
|
||||
* from the end for the mremap case.
|
||||
*/
|
||||
data = memtype_rb_match(&memtype_rbroot, start, end,
|
||||
MEMTYPE_EXACT_MATCH);
|
||||
if (!data) {
|
||||
data = memtype_rb_match(&memtype_rbroot, start, end,
|
||||
MEMTYPE_END_MATCH);
|
||||
if (!data)
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (data->start == start) {
|
||||
/* munmap: erase this node */
|
||||
rb_erase_augmented(&data->rb, &memtype_rbroot,
|
||||
&memtype_rb_augment_cb);
|
||||
} else {
|
||||
/* mremap: update the end value of this node */
|
||||
rb_erase_augmented(&data->rb, &memtype_rbroot,
|
||||
&memtype_rb_augment_cb);
|
||||
data->end = start;
|
||||
data->subtree_max_end = data->end;
|
||||
memtype_rb_insert(&memtype_rbroot, data);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
|
||||
out:
|
||||
return data;
|
||||
}
|
||||
|
||||
|
|
|
@ -203,6 +203,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
|
|||
pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
|
||||
(unsigned long long)start, (unsigned long long)end - 1);
|
||||
|
||||
max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
|
||||
|
||||
return 0;
|
||||
out_err_bad_srat:
|
||||
bad_srat();
|
||||
|
|
|
@ -569,7 +569,7 @@ static inline int track_pfn_copy(struct vm_area_struct *vma)
|
|||
}
|
||||
|
||||
/*
|
||||
* untrack_pfn_vma is called while unmapping a pfnmap for a region.
|
||||
* untrack_pfn is called while unmapping a pfnmap for a region.
|
||||
* untrack can be called for a specific region indicated by pfn and size or
|
||||
* can be for the entire vma (in which case pfn, size are zero).
|
||||
*/
|
||||
|
@ -577,6 +577,13 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
|
|||
unsigned long pfn, unsigned long size)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* untrack_pfn_moved is called while mremapping a pfnmap for a new region.
|
||||
*/
|
||||
static inline void untrack_pfn_moved(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
#else
|
||||
extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
|
||||
unsigned long pfn, unsigned long addr,
|
||||
|
@ -586,6 +593,7 @@ extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
|
|||
extern int track_pfn_copy(struct vm_area_struct *vma);
|
||||
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
|
||||
unsigned long size);
|
||||
extern void untrack_pfn_moved(struct vm_area_struct *vma);
|
||||
#endif
|
||||
|
||||
#ifdef __HAVE_COLOR_ZERO_PAGE
|
||||
|
|
|
@ -19,6 +19,10 @@ extern unsigned long min_low_pfn;
|
|||
* highest page
|
||||
*/
|
||||
extern unsigned long max_pfn;
|
||||
/*
|
||||
* highest possible page
|
||||
*/
|
||||
extern unsigned long long max_possible_pfn;
|
||||
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
/*
|
||||
|
|
|
@ -33,6 +33,7 @@ EXPORT_SYMBOL(contig_page_data);
|
|||
unsigned long max_low_pfn;
|
||||
unsigned long min_low_pfn;
|
||||
unsigned long max_pfn;
|
||||
unsigned long long max_possible_pfn;
|
||||
|
||||
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
|
||||
|
||||
|
|
|
@ -319,6 +319,10 @@ static unsigned long move_vma(struct vm_area_struct *vma,
|
|||
hiwater_vm = mm->hiwater_vm;
|
||||
vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
|
||||
|
||||
/* Tell pfnmap has moved from this vma */
|
||||
if (unlikely(vma->vm_flags & VM_PFNMAP))
|
||||
untrack_pfn_moved(vma);
|
||||
|
||||
if (do_munmap(mm, old_addr, old_len) < 0) {
|
||||
/* OOM: unable to split vma, just get accounts right */
|
||||
vm_unacct_memory(excess >> PAGE_SHIFT);
|
||||
|
|
|
@ -31,6 +31,7 @@ EXPORT_SYMBOL(contig_page_data);
|
|||
unsigned long max_low_pfn;
|
||||
unsigned long min_low_pfn;
|
||||
unsigned long max_pfn;
|
||||
unsigned long long max_possible_pfn;
|
||||
|
||||
static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
|
||||
u64 goal, u64 limit)
|
||||
|
|
Loading…
Reference in New Issue