mirror of https://gitee.com/openkylin/linux.git
Merge branch 'akpm' (patches from Andrew)
Merge misc mm fixes from Andrew Morton: "15 patches. VM subsystems affected by this patch series: userfaultfd, kfence, highmem, pagealloc, memblock, pagecache, secretmem, pagemap, and hugetlbfs" * akpm: hugetlbfs: fix mount mode command line processing mm: fix the deadlock in finish_fault() mm: mmap_lock: fix disabling preemption directly mm/secretmem: wire up ->set_page_dirty writeback, cgroup: do not reparent dax inodes writeback, cgroup: remove wb from offline list before releasing refcnt memblock: make for_each_mem_range() traverse MEMBLOCK_HOTPLUG regions mm: page_alloc: fix page_poison=1 / INIT_ON_ALLOC_DEFAULT_ON interaction mm: use kmap_local_page in memzero_page mm: call flush_dcache_page() in memcpy_to_page() and memzero_page() kfence: skip all GFP_ZONEMASK allocations kfence: move the size check to the beginning of __kfence_alloc() kfence: defer kfence_test_init to ensure that kunit debugfs is created selftest: use mmap instead of posix_memalign to allocate memory userfaultfd: do not untag user pointers
This commit is contained in:
commit
bca1d4de39
|
@ -45,8 +45,9 @@ how the user addresses are used by the kernel:
|
|||
|
||||
1. User addresses not accessed by the kernel but used for address space
|
||||
management (e.g. ``mprotect()``, ``madvise()``). The use of valid
|
||||
tagged pointers in this context is allowed with the exception of
|
||||
``brk()``, ``mmap()`` and the ``new_address`` argument to
|
||||
tagged pointers in this context is allowed with these exceptions:
|
||||
|
||||
- ``brk()``, ``mmap()`` and the ``new_address`` argument to
|
||||
``mremap()`` as these have the potential to alias with existing
|
||||
user addresses.
|
||||
|
||||
|
@ -54,6 +55,15 @@ how the user addresses are used by the kernel:
|
|||
incorrectly accept valid tagged pointers for the ``brk()``,
|
||||
``mmap()`` and ``mremap()`` system calls.
|
||||
|
||||
- The ``range.start``, ``start`` and ``dst`` arguments to the
|
||||
``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from
|
||||
``userfaultfd()``, as fault addresses subsequently obtained by reading
|
||||
the file descriptor will be untagged, which may otherwise confuse
|
||||
tag-unaware programs.
|
||||
|
||||
NOTE: This behaviour changed in v5.14 and so some earlier kernels may
|
||||
incorrectly accept valid tagged pointers for this system call.
|
||||
|
||||
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
|
||||
relaxation is disabled by default and the application thread needs to
|
||||
explicitly enable it via ``prctl()`` as follows:
|
||||
|
|
|
@ -521,6 +521,9 @@ static bool inode_prepare_wbs_switch(struct inode *inode,
|
|||
*/
|
||||
smp_mb();
|
||||
|
||||
if (IS_DAX(inode))
|
||||
return false;
|
||||
|
||||
/* while holding I_WB_SWITCH, no one else can update the association */
|
||||
spin_lock(&inode->i_lock);
|
||||
if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
|
||||
|
|
|
@ -77,7 +77,7 @@ enum hugetlb_param {
|
|||
static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
|
||||
fsparam_u32 ("gid", Opt_gid),
|
||||
fsparam_string("min_size", Opt_min_size),
|
||||
fsparam_u32 ("mode", Opt_mode),
|
||||
fsparam_u32oct("mode", Opt_mode),
|
||||
fsparam_string("nr_inodes", Opt_nr_inodes),
|
||||
fsparam_string("pagesize", Opt_pagesize),
|
||||
fsparam_string("size", Opt_size),
|
||||
|
|
|
@ -1236,23 +1236,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
|
|||
}
|
||||
|
||||
static __always_inline int validate_range(struct mm_struct *mm,
|
||||
__u64 *start, __u64 len)
|
||||
__u64 start, __u64 len)
|
||||
{
|
||||
__u64 task_size = mm->task_size;
|
||||
|
||||
*start = untagged_addr(*start);
|
||||
|
||||
if (*start & ~PAGE_MASK)
|
||||
if (start & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
if (len & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
if (!len)
|
||||
return -EINVAL;
|
||||
if (*start < mmap_min_addr)
|
||||
if (start < mmap_min_addr)
|
||||
return -EINVAL;
|
||||
if (*start >= task_size)
|
||||
if (start >= task_size)
|
||||
return -EINVAL;
|
||||
if (len > task_size - *start)
|
||||
if (len > task_size - start)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
@ -1316,7 +1314,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||
vm_flags |= VM_UFFD_MINOR;
|
||||
}
|
||||
|
||||
ret = validate_range(mm, &uffdio_register.range.start,
|
||||
ret = validate_range(mm, uffdio_register.range.start,
|
||||
uffdio_register.range.len);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -1522,7 +1520,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
|
||||
goto out;
|
||||
|
||||
ret = validate_range(mm, &uffdio_unregister.start,
|
||||
ret = validate_range(mm, uffdio_unregister.start,
|
||||
uffdio_unregister.len);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -1671,7 +1669,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx,
|
|||
if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
|
||||
goto out;
|
||||
|
||||
ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len);
|
||||
ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
|
@ -1711,7 +1709,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
|
|||
sizeof(uffdio_copy)-sizeof(__s64)))
|
||||
goto out;
|
||||
|
||||
ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len);
|
||||
ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
|
||||
if (ret)
|
||||
goto out;
|
||||
/*
|
||||
|
@ -1768,7 +1766,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
|
|||
sizeof(uffdio_zeropage)-sizeof(__s64)))
|
||||
goto out;
|
||||
|
||||
ret = validate_range(ctx->mm, &uffdio_zeropage.range.start,
|
||||
ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
|
||||
uffdio_zeropage.range.len);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -1818,7 +1816,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
|
|||
sizeof(struct uffdio_writeprotect)))
|
||||
return -EFAULT;
|
||||
|
||||
ret = validate_range(ctx->mm, &uffdio_wp.range.start,
|
||||
ret = validate_range(ctx->mm, uffdio_wp.range.start,
|
||||
uffdio_wp.range.len);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -1866,7 +1864,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
|
|||
sizeof(uffdio_continue) - (sizeof(__s64))))
|
||||
goto out;
|
||||
|
||||
ret = validate_range(ctx->mm, &uffdio_continue.range.start,
|
||||
ret = validate_range(ctx->mm, uffdio_continue.range.start,
|
||||
uffdio_continue.range.len);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
|
|
@ -318,14 +318,16 @@ static inline void memcpy_to_page(struct page *page, size_t offset,
|
|||
|
||||
VM_BUG_ON(offset + len > PAGE_SIZE);
|
||||
memcpy(to + offset, from, len);
|
||||
flush_dcache_page(page);
|
||||
kunmap_local(to);
|
||||
}
|
||||
|
||||
static inline void memzero_page(struct page *page, size_t offset, size_t len)
|
||||
{
|
||||
char *addr = kmap_atomic(page);
|
||||
char *addr = kmap_local_page(page);
|
||||
memset(addr + offset, 0, len);
|
||||
kunmap_atomic(addr);
|
||||
flush_dcache_page(page);
|
||||
kunmap_local(addr);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_HIGHMEM_H */
|
||||
|
|
|
@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
|
|||
*/
|
||||
#define for_each_mem_range(i, p_start, p_end) \
|
||||
__for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \
|
||||
MEMBLOCK_NONE, p_start, p_end, NULL)
|
||||
MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
|
||||
|
||||
/**
|
||||
* for_each_mem_range_rev - reverse iterate through memblock areas from
|
||||
|
@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
|
|||
*/
|
||||
#define for_each_mem_range_rev(i, p_start, p_end) \
|
||||
__for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \
|
||||
MEMBLOCK_NONE, p_start, p_end, NULL)
|
||||
MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
|
||||
|
||||
/**
|
||||
* for_each_reserved_mem_range - iterate over all reserved memblock areas
|
||||
|
|
|
@ -398,12 +398,12 @@ static void cgwb_release_workfn(struct work_struct *work)
|
|||
blkcg_unpin_online(blkcg);
|
||||
|
||||
fprop_local_destroy_percpu(&wb->memcg_completions);
|
||||
percpu_ref_exit(&wb->refcnt);
|
||||
|
||||
spin_lock_irq(&cgwb_lock);
|
||||
list_del(&wb->offline_node);
|
||||
spin_unlock_irq(&cgwb_lock);
|
||||
|
||||
percpu_ref_exit(&wb->refcnt);
|
||||
wb_exit(wb);
|
||||
WARN_ON_ONCE(!list_empty(&wb->b_attached));
|
||||
kfree_rcu(wb, rcu);
|
||||
|
|
|
@ -733,6 +733,22 @@ void kfence_shutdown_cache(struct kmem_cache *s)
|
|||
|
||||
void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
|
||||
{
|
||||
/*
|
||||
* Perform size check before switching kfence_allocation_gate, so that
|
||||
* we don't disable KFENCE without making an allocation.
|
||||
*/
|
||||
if (size > PAGE_SIZE)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Skip allocations from non-default zones, including DMA. We cannot
|
||||
* guarantee that pages in the KFENCE pool will have the requested
|
||||
* properties (e.g. reside in DMAable memory).
|
||||
*/
|
||||
if ((flags & GFP_ZONEMASK) ||
|
||||
(s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32)))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* allocation_gate only needs to become non-zero, so it doesn't make
|
||||
* sense to continue writing to it and pay the associated contention
|
||||
|
@ -757,9 +773,6 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
|
|||
if (!READ_ONCE(kfence_enabled))
|
||||
return NULL;
|
||||
|
||||
if (size > PAGE_SIZE)
|
||||
return NULL;
|
||||
|
||||
return kfence_guarded_alloc(s, size, flags);
|
||||
}
|
||||
|
||||
|
|
|
@ -852,7 +852,7 @@ static void kfence_test_exit(void)
|
|||
tracepoint_synchronize_unregister();
|
||||
}
|
||||
|
||||
late_initcall(kfence_test_init);
|
||||
late_initcall_sync(kfence_test_init);
|
||||
module_exit(kfence_test_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
|
|
@ -947,7 +947,8 @@ static bool should_skip_region(struct memblock_type *type,
|
|||
return true;
|
||||
|
||||
/* skip hotpluggable memory regions if needed */
|
||||
if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
|
||||
if (movable_node_is_enabled() && memblock_is_hotpluggable(m) &&
|
||||
!(flags & MEMBLOCK_HOTPLUG))
|
||||
return true;
|
||||
|
||||
/* if we want mirror memory skip non-mirror memory regions */
|
||||
|
|
11
mm/memory.c
11
mm/memory.c
|
@ -4026,9 +4026,18 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
|
|||
return ret;
|
||||
}
|
||||
|
||||
if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
|
||||
if (vmf->prealloc_pte) {
|
||||
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
|
||||
if (likely(pmd_none(*vmf->pmd))) {
|
||||
mm_inc_nr_ptes(vma->vm_mm);
|
||||
pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
|
||||
vmf->prealloc_pte = NULL;
|
||||
}
|
||||
spin_unlock(vmf->ptl);
|
||||
} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
|
||||
return VM_FAULT_OOM;
|
||||
}
|
||||
}
|
||||
|
||||
/* See comment in handle_pte_fault() */
|
||||
if (pmd_devmap_trans_unstable(vmf->pmd))
|
||||
|
|
|
@ -156,14 +156,14 @@ static inline void put_memcg_path_buf(void)
|
|||
#define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \
|
||||
do { \
|
||||
const char *memcg_path; \
|
||||
preempt_disable(); \
|
||||
local_lock(&memcg_paths.lock); \
|
||||
memcg_path = get_mm_memcg_path(mm); \
|
||||
trace_mmap_lock_##type(mm, \
|
||||
memcg_path != NULL ? memcg_path : "", \
|
||||
##__VA_ARGS__); \
|
||||
if (likely(memcg_path != NULL)) \
|
||||
put_memcg_path_buf(); \
|
||||
preempt_enable(); \
|
||||
local_unlock(&memcg_paths.lock); \
|
||||
} while (0)
|
||||
|
||||
#else /* !CONFIG_MEMCG */
|
||||
|
|
|
@ -840,20 +840,23 @@ void init_mem_debugging_and_hardening(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (_init_on_alloc_enabled_early) {
|
||||
if (page_poisoning_requested)
|
||||
if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) &&
|
||||
page_poisoning_requested) {
|
||||
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
|
||||
"will take precedence over init_on_alloc\n");
|
||||
else
|
||||
"will take precedence over init_on_alloc and init_on_free\n");
|
||||
_init_on_alloc_enabled_early = false;
|
||||
_init_on_free_enabled_early = false;
|
||||
}
|
||||
|
||||
if (_init_on_alloc_enabled_early)
|
||||
static_branch_enable(&init_on_alloc);
|
||||
}
|
||||
if (_init_on_free_enabled_early) {
|
||||
if (page_poisoning_requested)
|
||||
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
|
||||
"will take precedence over init_on_free\n");
|
||||
else
|
||||
static_branch_disable(&init_on_alloc);
|
||||
|
||||
if (_init_on_free_enabled_early)
|
||||
static_branch_enable(&init_on_free);
|
||||
}
|
||||
else
|
||||
static_branch_disable(&init_on_free);
|
||||
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
if (!debug_pagealloc_enabled())
|
||||
|
|
|
@ -152,6 +152,7 @@ static void secretmem_freepage(struct page *page)
|
|||
}
|
||||
|
||||
const struct address_space_operations secretmem_aops = {
|
||||
.set_page_dirty = __set_page_dirty_no_writeback,
|
||||
.freepage = secretmem_freepage,
|
||||
.migratepage = secretmem_migratepage,
|
||||
.isolate_page = secretmem_isolate_page,
|
||||
|
|
|
@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area)
|
|||
|
||||
static void anon_allocate_area(void **alloc_area)
|
||||
{
|
||||
if (posix_memalign(alloc_area, page_size, nr_pages * page_size))
|
||||
err("posix_memalign() failed");
|
||||
*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
|
||||
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
if (*alloc_area == MAP_FAILED)
|
||||
err("mmap of anonymous memory failed");
|
||||
}
|
||||
|
||||
static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
|
||||
|
|
Loading…
Reference in New Issue