Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:
 "The main changes in this cycle were:

   - make the debugfs 'kernel_page_tables' file read-only, as it only
     has read ops.  (Borislav Petkov)

   - micro-optimize clflush_cache_range() (Chris Wilson)

   - swiotlb enhancements, which fixes certain KVM emulated devices
     (Igor Mammedov)

   - fix an LDT related debug message (Jan Beulich)

   - modularize CONFIG_X86_PTDUMP (Kees Cook)

   - tone down an overly alarming warning (Laura Abbott)

   - Mark variable __initdata (Rasmus Villemoes)

   - PAT additions (Toshi Kani)"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Micro-optimise clflush_cache_range()
  x86/mm/pat: Change free_memtype() to support shrinking case
  x86/mm/pat: Add untrack_pfn_moved for mremap
  x86/mm: Drop WARN from multi-BAR check
  x86/LDT: Print the real LDT base address
  x86/mm/64: Enable SWIOTLB if system has SRAT memory regions above MAX_DMA32_PFN
  x86/mm: Introduce max_possible_pfn
  x86/mm/ptdump: Make (debugfs)/kernel_page_tables read-only
  x86/mm/mtrr: Mark the 'range_new' static variable in mtrr_calc_range_state() as __initdata
  x86/mm: Turn CONFIG_X86_PTDUMP into a module
This commit is contained in:
Linus Torvalds 2016-01-11 17:16:01 -08:00
commit 0ffedcda63
18 changed files with 146 additions and 54 deletions

View File

@ -69,7 +69,7 @@ config X86_PTDUMP_CORE
def_bool n
config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
tristate "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
select DEBUG_FS
select X86_PTDUMP_CORE

View File

@ -593,9 +593,16 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
unsigned long x_remove_base,
unsigned long x_remove_size, int i)
{
static struct range range_new[RANGE_NUM];
/*
* range_new should really be an automatic variable, but
* putting 4096 bytes on the stack is frowned upon, to put it
* mildly. It is safe to make it a static __initdata variable,
* since mtrr_calc_range_state is only called during init and
* there's no way it will call itself recursively.
*/
static struct range range_new[RANGE_NUM] __initdata;
unsigned long range_sums_new;
static int nr_range_new;
int nr_range_new;
int num_reg;
/* Convert ranges to var ranges state: */

View File

@ -88,7 +88,7 @@ int __init pci_swiotlb_detect_4gb(void)
{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
#ifdef CONFIG_X86_64
if (!no_iommu && max_pfn > MAX_DMA32_PFN)
if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
swiotlb = 1;
#endif
return swiotlb;

View File

@ -125,7 +125,7 @@ void release_thread(struct task_struct *dead_task)
if (dead_task->mm->context.ldt) {
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
dead_task->comm,
dead_task->mm->context.ldt,
dead_task->mm->context.ldt->entries,
dead_task->mm->context.ldt->size);
BUG();
}

View File

@ -1048,6 +1048,8 @@ void __init setup_arch(char **cmdline_p)
if (mtrr_trim_uncached_memory(max_pfn))
max_pfn = e820_end_of_ram_pfn();
max_possible_pfn = max_pfn;
#ifdef CONFIG_X86_32
/* max_low_pfn get updated here */
find_low_pfn_range();

View File

@ -15,6 +15,7 @@ obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o
obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o

View File

@ -0,0 +1,46 @@
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <asm/pgtable.h>
static int ptdump_show(struct seq_file *m, void *v)
{
ptdump_walk_pgd_level(m, NULL);
return 0;
}
static int ptdump_open(struct inode *inode, struct file *filp)
{
return single_open(filp, ptdump_show, NULL);
}
static const struct file_operations ptdump_fops = {
.owner = THIS_MODULE,
.open = ptdump_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static struct dentry *pe;
static int __init pt_dump_debug_init(void)
{
pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
&ptdump_fops);
if (!pe)
return -ENOMEM;
return 0;
}
static void __exit pt_dump_debug_exit(void)
{
debugfs_remove_recursive(pe);
}
module_init(pt_dump_debug_init);
module_exit(pt_dump_debug_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");

View File

@ -426,38 +426,15 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
{
ptdump_walk_pgd_level_core(m, pgd, false);
}
EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
void ptdump_walk_pgd_level_checkwx(void)
{
ptdump_walk_pgd_level_core(NULL, NULL, true);
}
#ifdef CONFIG_X86_PTDUMP
static int ptdump_show(struct seq_file *m, void *v)
static int __init pt_dump_init(void)
{
ptdump_walk_pgd_level(m, NULL);
return 0;
}
static int ptdump_open(struct inode *inode, struct file *filp)
{
return single_open(filp, ptdump_show, NULL);
}
static const struct file_operations ptdump_fops = {
.open = ptdump_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#endif
static int pt_dump_init(void)
{
#ifdef CONFIG_X86_PTDUMP
struct dentry *pe;
#endif
#ifdef CONFIG_X86_32
/* Not a compile-time constant on x86-32 */
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
@ -468,13 +445,6 @@ static int pt_dump_init(void)
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
#endif
#ifdef CONFIG_X86_PTDUMP
pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
&ptdump_fops);
if (!pe)
return -ENOMEM;
#endif
return 0;
}

View File

@ -194,8 +194,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
* Check if the request spans more than any BAR in the iomem resource
* tree.
*/
WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
pr_warn("caller %pS mapping multiple BARs\n", caller);
return ret_addr;
err_free_area:

View File

@ -129,14 +129,16 @@ within(unsigned long addr, unsigned long start, unsigned long end)
*/
void clflush_cache_range(void *vaddr, unsigned int size)
{
unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
void *vend = vaddr + size;
void *p;
if (p >= vend)
return;
mb();
for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
p < vend; p += boot_cpu_data.x86_clflush_size)
for (; p < vend; p += clflush_size)
clflushopt(p);
mb();

View File

@ -586,7 +586,7 @@ int free_memtype(u64 start, u64 end)
entry = rbt_memtype_erase(start, end);
spin_unlock(&memtype_lock);
if (!entry) {
if (IS_ERR(entry)) {
pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid, start, end - 1);
return -EINVAL;
@ -992,6 +992,16 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
vma->vm_flags &= ~VM_PAT;
}
/*
* untrack_pfn_moved is called, while mremapping a pfnmap for a new region,
* with the old vma after its pfnmap page table has been removed. The new
* vma has a new pfnmap to the same pfn & cache type with VM_PAT set.
*/
void untrack_pfn_moved(struct vm_area_struct *vma)
{
vma->vm_flags &= ~VM_PAT;
}
pgprot_t pgprot_writecombine(pgprot_t prot)
{
return __pgprot(pgprot_val(prot) |

View File

@ -98,8 +98,13 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
return last_lower; /* Returns NULL if there is no overlap */
}
static struct memtype *memtype_rb_exact_match(struct rb_root *root,
u64 start, u64 end)
enum {
MEMTYPE_EXACT_MATCH = 0,
MEMTYPE_END_MATCH = 1
};
static struct memtype *memtype_rb_match(struct rb_root *root,
u64 start, u64 end, int match_type)
{
struct memtype *match;
@ -107,7 +112,12 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
while (match != NULL && match->start < end) {
struct rb_node *node;
if (match->start == start && match->end == end)
if ((match_type == MEMTYPE_EXACT_MATCH) &&
(match->start == start) && (match->end == end))
return match;
if ((match_type == MEMTYPE_END_MATCH) &&
(match->start < start) && (match->end == end))
return match;
node = rb_next(&match->rb);
@ -117,7 +127,7 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
match = NULL;
}
return NULL; /* Returns NULL if there is no exact match */
return NULL; /* Returns NULL if there is no match */
}
static int memtype_rb_check_conflict(struct rb_root *root,
@ -210,12 +220,36 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
{
struct memtype *data;
data = memtype_rb_exact_match(&memtype_rbroot, start, end);
if (!data)
goto out;
/*
* Since the memtype_rbroot tree allows overlapping ranges,
* rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
* a whole node for the munmap case. If no such entry is found,
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
*/
data = memtype_rb_match(&memtype_rbroot, start, end,
MEMTYPE_EXACT_MATCH);
if (!data) {
data = memtype_rb_match(&memtype_rbroot, start, end,
MEMTYPE_END_MATCH);
if (!data)
return ERR_PTR(-EINVAL);
}
if (data->start == start) {
/* munmap: erase this node */
rb_erase_augmented(&data->rb, &memtype_rbroot,
&memtype_rb_augment_cb);
} else {
/* mremap: update the end value of this node */
rb_erase_augmented(&data->rb, &memtype_rbroot,
&memtype_rb_augment_cb);
data->end = start;
data->subtree_max_end = data->end;
memtype_rb_insert(&memtype_rbroot, data);
return NULL;
}
rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
out:
return data;
}

View File

@ -203,6 +203,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
(unsigned long long)start, (unsigned long long)end - 1);
max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
return 0;
out_err_bad_srat:
bad_srat();

View File

@ -569,7 +569,7 @@ static inline int track_pfn_copy(struct vm_area_struct *vma)
}
/*
* untrack_pfn_vma is called while unmapping a pfnmap for a region.
* untrack_pfn is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or
* can be for the entire vma (in which case pfn, size are zero).
*/
@ -577,6 +577,13 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
unsigned long pfn, unsigned long size)
{
}
/*
* untrack_pfn_moved is called while mremapping a pfnmap for a new region.
*/
static inline void untrack_pfn_moved(struct vm_area_struct *vma)
{
}
#else
extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long addr,
@ -586,6 +593,7 @@ extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
extern int track_pfn_copy(struct vm_area_struct *vma);
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size);
extern void untrack_pfn_moved(struct vm_area_struct *vma);
#endif
#ifdef __HAVE_COLOR_ZERO_PAGE

View File

@ -19,6 +19,10 @@ extern unsigned long min_low_pfn;
* highest page
*/
extern unsigned long max_pfn;
/*
* highest possible page
*/
extern unsigned long long max_possible_pfn;
#ifndef CONFIG_NO_BOOTMEM
/*

View File

@ -33,6 +33,7 @@ EXPORT_SYMBOL(contig_page_data);
unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;
unsigned long long max_possible_pfn;
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;

View File

@ -319,6 +319,10 @@ static unsigned long move_vma(struct vm_area_struct *vma,
hiwater_vm = mm->hiwater_vm;
vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
untrack_pfn_moved(vma);
if (do_munmap(mm, old_addr, old_len) < 0) {
/* OOM: unable to split vma, just get accounts right */
vm_unacct_memory(excess >> PAGE_SHIFT);

View File

@ -31,6 +31,7 @@ EXPORT_SYMBOL(contig_page_data);
unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;
unsigned long long max_possible_pfn;
static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
u64 goal, u64 limit)