mirror of https://gitee.com/openkylin/linux.git
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "31 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (31 commits) ocfs2: fix potential use after free mm/khugepaged: fix the xas_create_range() error path mm/khugepaged: collapse_shmem() do not crash on Compound mm/khugepaged: collapse_shmem() without freezing new_page mm/khugepaged: minor reorderings in collapse_shmem() mm/khugepaged: collapse_shmem() remember to clear holes mm/khugepaged: fix crashes due to misaccounted holes mm/khugepaged: collapse_shmem() stop if punched or truncated mm/huge_memory: fix lockdep complaint on 32-bit i_size_read() mm/huge_memory: splitting set mapping+index before unfreeze mm/huge_memory: rename freeze_page() to unmap_page() initramfs: clean old path before creating a hardlink kernel/kcov.c: mark funcs in __sanitizer_cov_trace_pc() as notrace psi: make disabling/enabling easier for vendor kernels proc: fixup map_files test on arm debugobjects: avoid recursive calls with kmemleak userfaultfd: shmem: UFFDIO_COPY: set the page dirty if VM_WRITE is not set userfaultfd: shmem: add i_size checks userfaultfd: shmem/hugetlbfs: only allow to register VM_MAYWRITE vmas userfaultfd: shmem: allocate anonymous memory for MAP_PRIVATE shmem ...
This commit is contained in:
commit
d8f190ee83
|
@ -3505,6 +3505,10 @@
|
|||
before loading.
|
||||
See Documentation/blockdev/ramdisk.txt.
|
||||
|
||||
psi= [KNL] Enable or disable pressure stall information
|
||||
tracking.
|
||||
Format: <bool>
|
||||
|
||||
psmouse.proto= [HW,MOUSE] Highest PS2 mouse protocol extension to
|
||||
probe for; one of (bare|imps|exps|lifebook|any).
|
||||
psmouse.rate= [HW,MOUSE] Set desired mouse report rate, in reports
|
||||
|
|
13
MAINTAINERS
13
MAINTAINERS
|
@ -2491,7 +2491,7 @@ F: drivers/net/wireless/ath/*
|
|||
ATHEROS ATH5K WIRELESS DRIVER
|
||||
M: Jiri Slaby <jirislaby@gmail.com>
|
||||
M: Nick Kossifidis <mickflemm@gmail.com>
|
||||
M: "Luis R. Rodriguez" <mcgrof@do-not-panic.com>
|
||||
M: Luis Chamberlain <mcgrof@kernel.org>
|
||||
L: linux-wireless@vger.kernel.org
|
||||
W: http://wireless.kernel.org/en/users/Drivers/ath5k
|
||||
S: Maintained
|
||||
|
@ -5835,7 +5835,7 @@ F: include/uapi/linux/firewire*.h
|
|||
F: tools/firewire/
|
||||
|
||||
FIRMWARE LOADER (request_firmware)
|
||||
M: Luis R. Rodriguez <mcgrof@kernel.org>
|
||||
M: Luis Chamberlain <mcgrof@kernel.org>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/firmware_class/
|
||||
|
@ -8135,7 +8135,7 @@ F: tools/testing/selftests/
|
|||
F: Documentation/dev-tools/kselftest*
|
||||
|
||||
KERNEL USERMODE HELPER
|
||||
M: "Luis R. Rodriguez" <mcgrof@kernel.org>
|
||||
M: Luis Chamberlain <mcgrof@kernel.org>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Maintained
|
||||
F: kernel/umh.c
|
||||
|
@ -8311,7 +8311,7 @@ F: mm/kmemleak.c
|
|||
F: mm/kmemleak-test.c
|
||||
|
||||
KMOD KERNEL MODULE LOADER - USERMODE HELPER
|
||||
M: "Luis R. Rodriguez" <mcgrof@kernel.org>
|
||||
M: Luis Chamberlain <mcgrof@kernel.org>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Maintained
|
||||
F: kernel/kmod.c
|
||||
|
@ -12061,7 +12061,7 @@ F: kernel/printk/
|
|||
F: include/linux/printk.h
|
||||
|
||||
PRISM54 WIRELESS DRIVER
|
||||
M: "Luis R. Rodriguez" <mcgrof@gmail.com>
|
||||
M: Luis Chamberlain <mcgrof@kernel.org>
|
||||
L: linux-wireless@vger.kernel.org
|
||||
W: http://wireless.kernel.org/en/users/Drivers/p54
|
||||
S: Obsolete
|
||||
|
@ -12075,9 +12075,10 @@ S: Maintained
|
|||
F: fs/proc/
|
||||
F: include/linux/proc_fs.h
|
||||
F: tools/testing/selftests/proc/
|
||||
F: Documentation/filesystems/proc.txt
|
||||
|
||||
PROC SYSCTL
|
||||
M: "Luis R. Rodriguez" <mcgrof@kernel.org>
|
||||
M: Luis Chamberlain <mcgrof@kernel.org>
|
||||
M: Kees Cook <keescook@chromium.org>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
L: linux-fsdevel@vger.kernel.org
|
||||
|
|
|
@ -338,13 +338,14 @@ void hfs_bmap_free(struct hfs_bnode *node)
|
|||
|
||||
nidx -= len * 8;
|
||||
i = node->next;
|
||||
hfs_bnode_put(node);
|
||||
if (!i) {
|
||||
/* panic */;
|
||||
pr_crit("unable to free bnode %u. bmap not found!\n",
|
||||
node->this);
|
||||
hfs_bnode_put(node);
|
||||
return;
|
||||
}
|
||||
hfs_bnode_put(node);
|
||||
node = hfs_bnode_find(tree, i);
|
||||
if (IS_ERR(node))
|
||||
return;
|
||||
|
|
|
@ -466,14 +466,15 @@ void hfs_bmap_free(struct hfs_bnode *node)
|
|||
|
||||
nidx -= len * 8;
|
||||
i = node->next;
|
||||
hfs_bnode_put(node);
|
||||
if (!i) {
|
||||
/* panic */;
|
||||
pr_crit("unable to free bnode %u. "
|
||||
"bmap not found!\n",
|
||||
node->this);
|
||||
hfs_bnode_put(node);
|
||||
return;
|
||||
}
|
||||
hfs_bnode_put(node);
|
||||
node = hfs_bnode_find(tree, i);
|
||||
if (IS_ERR(node))
|
||||
return;
|
||||
|
|
|
@ -125,10 +125,10 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
|
|||
|
||||
check_gen:
|
||||
if (handle->ih_generation != inode->i_generation) {
|
||||
iput(inode);
|
||||
trace_ocfs2_get_dentry_generation((unsigned long long)blkno,
|
||||
handle->ih_generation,
|
||||
inode->i_generation);
|
||||
iput(inode);
|
||||
result = ERR_PTR(-ESTALE);
|
||||
goto bail;
|
||||
}
|
||||
|
|
|
@ -157,18 +157,14 @@ static int __ocfs2_move_extent(handle_t *handle,
|
|||
}
|
||||
|
||||
/*
|
||||
* lock allocators, and reserving appropriate number of bits for
|
||||
* meta blocks and data clusters.
|
||||
*
|
||||
* in some cases, we don't need to reserve clusters, just let data_ac
|
||||
* be NULL.
|
||||
* lock allocator, and reserve appropriate number of bits for
|
||||
* meta blocks.
|
||||
*/
|
||||
static int ocfs2_lock_allocators_move_extents(struct inode *inode,
|
||||
static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode,
|
||||
struct ocfs2_extent_tree *et,
|
||||
u32 clusters_to_move,
|
||||
u32 extents_to_split,
|
||||
struct ocfs2_alloc_context **meta_ac,
|
||||
struct ocfs2_alloc_context **data_ac,
|
||||
int extra_blocks,
|
||||
int *credits)
|
||||
{
|
||||
|
@ -193,13 +189,6 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (data_ac) {
|
||||
ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
*credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el);
|
||||
|
||||
|
@ -259,10 +248,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
|
|||
}
|
||||
}
|
||||
|
||||
ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1,
|
||||
&context->meta_ac,
|
||||
&context->data_ac,
|
||||
extra_blocks, &credits);
|
||||
ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
|
||||
*len, 1,
|
||||
&context->meta_ac,
|
||||
extra_blocks, &credits);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
|
@ -285,6 +274,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure ocfs2_reserve_cluster is called after
|
||||
* __ocfs2_flush_truncate_log, otherwise, dead lock may happen.
|
||||
*
|
||||
* If ocfs2_reserve_cluster is called
|
||||
* before __ocfs2_flush_truncate_log, dead lock on global bitmap
|
||||
* may happen.
|
||||
*
|
||||
*/
|
||||
ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out_unlock_mutex;
|
||||
}
|
||||
|
||||
handle = ocfs2_start_trans(osb, credits);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
|
@ -617,9 +621,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
|
|||
}
|
||||
}
|
||||
|
||||
ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1,
|
||||
&context->meta_ac,
|
||||
NULL, extra_blocks, &credits);
|
||||
ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
|
||||
len, 1,
|
||||
&context->meta_ac,
|
||||
extra_blocks, &credits);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
|
|
|
@ -1361,6 +1361,19 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||
ret = -EINVAL;
|
||||
if (!vma_can_userfault(cur))
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* UFFDIO_COPY will fill file holes even without
|
||||
* PROT_WRITE. This check enforces that if this is a
|
||||
* MAP_SHARED, the process has write permission to the backing
|
||||
* file. If VM_MAYWRITE is set it also enforces that on a
|
||||
* MAP_SHARED vma: there is no F_WRITE_SEAL and no further
|
||||
* F_WRITE_SEAL can be taken until the vma is destroyed.
|
||||
*/
|
||||
ret = -EPERM;
|
||||
if (unlikely(!(cur->vm_flags & VM_MAYWRITE)))
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* If this vma contains ending address, and huge pages
|
||||
* check alignment.
|
||||
|
@ -1406,6 +1419,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||
BUG_ON(!vma_can_userfault(vma));
|
||||
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
|
||||
vma->vm_userfaultfd_ctx.ctx != ctx);
|
||||
WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
|
||||
|
||||
/*
|
||||
* Nothing to do: this vma is already registered into this
|
||||
|
@ -1552,6 +1566,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||
cond_resched();
|
||||
|
||||
BUG_ON(!vma_can_userfault(vma));
|
||||
WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
|
||||
|
||||
/*
|
||||
* Nothing to do: this vma is already registered into this
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef _LINUX_PSI_H
|
||||
#define _LINUX_PSI_H
|
||||
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/psi_types.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
|
@ -9,7 +10,7 @@ struct css_set;
|
|||
|
||||
#ifdef CONFIG_PSI
|
||||
|
||||
extern bool psi_disabled;
|
||||
extern struct static_key_false psi_disabled;
|
||||
|
||||
void psi_init(void);
|
||||
|
||||
|
|
|
@ -509,6 +509,15 @@ config PSI
|
|||
|
||||
Say N if unsure.
|
||||
|
||||
config PSI_DEFAULT_DISABLED
|
||||
bool "Require boot parameter to enable pressure stall information tracking"
|
||||
default n
|
||||
depends on PSI
|
||||
help
|
||||
If set, pressure stall information tracking will be disabled
|
||||
per default but can be enabled through passing psi_enable=1
|
||||
on the kernel commandline during boot.
|
||||
|
||||
endmenu # "CPU/Task time and stats accounting"
|
||||
|
||||
config CPU_ISOLATION
|
||||
|
|
|
@ -291,16 +291,6 @@ static int __init do_reset(void)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int __init maybe_link(void)
|
||||
{
|
||||
if (nlink >= 2) {
|
||||
char *old = find_link(major, minor, ino, mode, collected);
|
||||
if (old)
|
||||
return (ksys_link(old, collected) < 0) ? -1 : 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init clean_path(char *path, umode_t fmode)
|
||||
{
|
||||
struct kstat st;
|
||||
|
@ -313,6 +303,18 @@ static void __init clean_path(char *path, umode_t fmode)
|
|||
}
|
||||
}
|
||||
|
||||
static int __init maybe_link(void)
|
||||
{
|
||||
if (nlink >= 2) {
|
||||
char *old = find_link(major, minor, ino, mode, collected);
|
||||
if (old) {
|
||||
clean_path(collected, 0);
|
||||
return (ksys_link(old, collected) < 0) ? -1 : 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __initdata int wfd;
|
||||
|
||||
static int __init do_name(void)
|
||||
|
|
|
@ -56,7 +56,7 @@ struct kcov {
|
|||
struct task_struct *t;
|
||||
};
|
||||
|
||||
static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
|
||||
static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
|
||||
{
|
||||
unsigned int mode;
|
||||
|
||||
|
@ -78,7 +78,7 @@ static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
|
|||
return mode == needed_mode;
|
||||
}
|
||||
|
||||
static unsigned long canonicalize_ip(unsigned long ip)
|
||||
static notrace unsigned long canonicalize_ip(unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_RANDOMIZE_BASE
|
||||
ip -= kaslr_offset();
|
||||
|
|
|
@ -136,8 +136,18 @@
|
|||
|
||||
static int psi_bug __read_mostly;
|
||||
|
||||
bool psi_disabled __read_mostly;
|
||||
core_param(psi_disabled, psi_disabled, bool, 0644);
|
||||
DEFINE_STATIC_KEY_FALSE(psi_disabled);
|
||||
|
||||
#ifdef CONFIG_PSI_DEFAULT_DISABLED
|
||||
bool psi_enable;
|
||||
#else
|
||||
bool psi_enable = true;
|
||||
#endif
|
||||
static int __init setup_psi(char *str)
|
||||
{
|
||||
return kstrtobool(str, &psi_enable) == 0;
|
||||
}
|
||||
__setup("psi=", setup_psi);
|
||||
|
||||
/* Running averages - we need to be higher-res than loadavg */
|
||||
#define PSI_FREQ (2*HZ+1) /* 2 sec intervals */
|
||||
|
@ -169,8 +179,10 @@ static void group_init(struct psi_group *group)
|
|||
|
||||
void __init psi_init(void)
|
||||
{
|
||||
if (psi_disabled)
|
||||
if (!psi_enable) {
|
||||
static_branch_enable(&psi_disabled);
|
||||
return;
|
||||
}
|
||||
|
||||
psi_period = jiffies_to_nsecs(PSI_FREQ);
|
||||
group_init(&psi_system);
|
||||
|
@ -549,7 +561,7 @@ void psi_memstall_enter(unsigned long *flags)
|
|||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
if (psi_disabled)
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
*flags = current->flags & PF_MEMSTALL;
|
||||
|
@ -579,7 +591,7 @@ void psi_memstall_leave(unsigned long *flags)
|
|||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
if (psi_disabled)
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
if (*flags)
|
||||
|
@ -600,7 +612,7 @@ void psi_memstall_leave(unsigned long *flags)
|
|||
#ifdef CONFIG_CGROUPS
|
||||
int psi_cgroup_alloc(struct cgroup *cgroup)
|
||||
{
|
||||
if (psi_disabled)
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return 0;
|
||||
|
||||
cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu);
|
||||
|
@ -612,7 +624,7 @@ int psi_cgroup_alloc(struct cgroup *cgroup)
|
|||
|
||||
void psi_cgroup_free(struct cgroup *cgroup)
|
||||
{
|
||||
if (psi_disabled)
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
cancel_delayed_work_sync(&cgroup->psi.clock_work);
|
||||
|
@ -637,7 +649,7 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to)
|
|||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
if (psi_disabled) {
|
||||
if (static_branch_likely(&psi_disabled)) {
|
||||
/*
|
||||
* Lame to do this here, but the scheduler cannot be locked
|
||||
* from the outside, so we move cgroups from inside sched/.
|
||||
|
@ -673,7 +685,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
|
|||
{
|
||||
int full;
|
||||
|
||||
if (psi_disabled)
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
update_stats(group);
|
||||
|
|
|
@ -66,7 +66,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
|
|||
{
|
||||
int clear = 0, set = TSK_RUNNING;
|
||||
|
||||
if (psi_disabled)
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
if (!wakeup || p->sched_psi_wake_requeue) {
|
||||
|
@ -86,7 +86,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
|
|||
{
|
||||
int clear = TSK_RUNNING, set = 0;
|
||||
|
||||
if (psi_disabled)
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
if (!sleep) {
|
||||
|
@ -102,7 +102,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
|
|||
|
||||
static inline void psi_ttwu_dequeue(struct task_struct *p)
|
||||
{
|
||||
if (psi_disabled)
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
/*
|
||||
* Is the task being migrated during a wakeup? Make sure to
|
||||
|
@ -128,7 +128,7 @@ static inline void psi_ttwu_dequeue(struct task_struct *p)
|
|||
|
||||
static inline void psi_task_tick(struct rq *rq)
|
||||
{
|
||||
if (psi_disabled)
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
if (unlikely(rq->curr->flags & PF_MEMSTALL))
|
||||
|
|
|
@ -135,7 +135,6 @@ static void fill_pool(void)
|
|||
if (!new)
|
||||
return;
|
||||
|
||||
kmemleak_ignore(new);
|
||||
raw_spin_lock_irqsave(&pool_lock, flags);
|
||||
hlist_add_head(&new->node, &obj_pool);
|
||||
debug_objects_allocated++;
|
||||
|
@ -1128,7 +1127,6 @@ static int __init debug_objects_replace_static_objects(void)
|
|||
obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL);
|
||||
if (!obj)
|
||||
goto free;
|
||||
kmemleak_ignore(obj);
|
||||
hlist_add_head(&obj->node, &objects);
|
||||
}
|
||||
|
||||
|
@ -1184,7 +1182,8 @@ void __init debug_objects_mem_init(void)
|
|||
|
||||
obj_cache = kmem_cache_create("debug_objects_cache",
|
||||
sizeof (struct debug_obj), 0,
|
||||
SLAB_DEBUG_OBJECTS, NULL);
|
||||
SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE,
|
||||
NULL);
|
||||
|
||||
if (!obj_cache || debug_objects_replace_static_objects()) {
|
||||
debug_objects_enabled = 0;
|
||||
|
|
|
@ -1214,7 +1214,6 @@ void unregister_test_dev_kmod(struct kmod_test_device *test_dev)
|
|||
|
||||
dev_info(test_dev->dev, "removing interface\n");
|
||||
misc_deregister(&test_dev->misc_dev);
|
||||
kfree(&test_dev->misc_dev.name);
|
||||
|
||||
mutex_unlock(&test_dev->config_mutex);
|
||||
mutex_unlock(&test_dev->trigger_mutex);
|
||||
|
|
3
mm/gup.c
3
mm/gup.c
|
@ -702,12 +702,11 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
|||
if (!vma || start >= vma->vm_end) {
|
||||
vma = find_extend_vma(mm, start);
|
||||
if (!vma && in_gate_area(mm, start)) {
|
||||
int ret;
|
||||
ret = get_gate_page(mm, start & PAGE_MASK,
|
||||
gup_flags, &vma,
|
||||
pages ? &pages[i] : NULL);
|
||||
if (ret)
|
||||
return i ? : ret;
|
||||
goto out;
|
||||
ctx.page_mask = 0;
|
||||
goto next_page;
|
||||
}
|
||||
|
|
|
@ -2350,7 +2350,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
|
|||
}
|
||||
}
|
||||
|
||||
static void freeze_page(struct page *page)
|
||||
static void unmap_page(struct page *page)
|
||||
{
|
||||
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
|
||||
TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
|
||||
|
@ -2365,7 +2365,7 @@ static void freeze_page(struct page *page)
|
|||
VM_BUG_ON_PAGE(!unmap_success, page);
|
||||
}
|
||||
|
||||
static void unfreeze_page(struct page *page)
|
||||
static void remap_page(struct page *page)
|
||||
{
|
||||
int i;
|
||||
if (PageTransHuge(page)) {
|
||||
|
@ -2402,6 +2402,12 @@ static void __split_huge_page_tail(struct page *head, int tail,
|
|||
(1L << PG_unevictable) |
|
||||
(1L << PG_dirty)));
|
||||
|
||||
/* ->mapping in first tail page is compound_mapcount */
|
||||
VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
|
||||
page_tail);
|
||||
page_tail->mapping = head->mapping;
|
||||
page_tail->index = head->index + tail;
|
||||
|
||||
/* Page flags must be visible before we make the page non-compound. */
|
||||
smp_wmb();
|
||||
|
||||
|
@ -2422,12 +2428,6 @@ static void __split_huge_page_tail(struct page *head, int tail,
|
|||
if (page_is_idle(head))
|
||||
set_page_idle(page_tail);
|
||||
|
||||
/* ->mapping in first tail page is compound_mapcount */
|
||||
VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
|
||||
page_tail);
|
||||
page_tail->mapping = head->mapping;
|
||||
|
||||
page_tail->index = head->index + tail;
|
||||
page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
|
||||
|
||||
/*
|
||||
|
@ -2439,12 +2439,11 @@ static void __split_huge_page_tail(struct page *head, int tail,
|
|||
}
|
||||
|
||||
static void __split_huge_page(struct page *page, struct list_head *list,
|
||||
unsigned long flags)
|
||||
pgoff_t end, unsigned long flags)
|
||||
{
|
||||
struct page *head = compound_head(page);
|
||||
struct zone *zone = page_zone(head);
|
||||
struct lruvec *lruvec;
|
||||
pgoff_t end = -1;
|
||||
int i;
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
|
||||
|
@ -2452,9 +2451,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
|||
/* complete memcg works before add pages to LRU */
|
||||
mem_cgroup_split_huge_fixup(head);
|
||||
|
||||
if (!PageAnon(page))
|
||||
end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
|
||||
|
||||
for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
|
||||
__split_huge_page_tail(head, i, lruvec, list);
|
||||
/* Some pages can be beyond i_size: drop them from page cache */
|
||||
|
@ -2483,7 +2479,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
|||
|
||||
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
|
||||
|
||||
unfreeze_page(head);
|
||||
remap_page(head);
|
||||
|
||||
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
||||
struct page *subpage = head + i;
|
||||
|
@ -2626,6 +2622,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|||
int count, mapcount, extra_pins, ret;
|
||||
bool mlocked;
|
||||
unsigned long flags;
|
||||
pgoff_t end;
|
||||
|
||||
VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
|
@ -2648,6 +2645,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
end = -1;
|
||||
mapping = NULL;
|
||||
anon_vma_lock_write(anon_vma);
|
||||
} else {
|
||||
|
@ -2661,10 +2659,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|||
|
||||
anon_vma = NULL;
|
||||
i_mmap_lock_read(mapping);
|
||||
|
||||
/*
|
||||
*__split_huge_page() may need to trim off pages beyond EOF:
|
||||
* but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
|
||||
* which cannot be nested inside the page tree lock. So note
|
||||
* end now: i_size itself may be changed at any moment, but
|
||||
* head page lock is good enough to serialize the trimming.
|
||||
*/
|
||||
end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Racy check if we can split the page, before freeze_page() will
|
||||
* Racy check if we can split the page, before unmap_page() will
|
||||
* split PMDs
|
||||
*/
|
||||
if (!can_split_huge_page(head, &extra_pins)) {
|
||||
|
@ -2673,7 +2680,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|||
}
|
||||
|
||||
mlocked = PageMlocked(page);
|
||||
freeze_page(head);
|
||||
unmap_page(head);
|
||||
VM_BUG_ON_PAGE(compound_mapcount(head), head);
|
||||
|
||||
/* Make sure the page is not on per-CPU pagevec as it takes pin */
|
||||
|
@ -2707,7 +2714,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|||
if (mapping)
|
||||
__dec_node_page_state(page, NR_SHMEM_THPS);
|
||||
spin_unlock(&pgdata->split_queue_lock);
|
||||
__split_huge_page(page, list, flags);
|
||||
__split_huge_page(page, list, end, flags);
|
||||
if (PageSwapCache(head)) {
|
||||
swp_entry_t entry = { .val = page_private(head) };
|
||||
|
||||
|
@ -2727,7 +2734,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
|||
fail: if (mapping)
|
||||
xa_unlock(&mapping->i_pages);
|
||||
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
|
||||
unfreeze_page(head);
|
||||
remap_page(head);
|
||||
ret = -EBUSY;
|
||||
}
|
||||
|
||||
|
|
|
@ -4080,7 +4080,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
|
|||
|
||||
/* fallback to copy_from_user outside mmap_sem */
|
||||
if (unlikely(ret)) {
|
||||
ret = -EFAULT;
|
||||
ret = -ENOENT;
|
||||
*pagep = page;
|
||||
/* don't free the page */
|
||||
goto out;
|
||||
|
|
142
mm/khugepaged.c
142
mm/khugepaged.c
|
@ -1287,7 +1287,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
|||
* collapse_shmem - collapse small tmpfs/shmem pages into huge one.
|
||||
*
|
||||
* Basic scheme is simple, details are more complex:
|
||||
* - allocate and freeze a new huge page;
|
||||
* - allocate and lock a new huge page;
|
||||
* - scan page cache replacing old pages with the new one
|
||||
* + swap in pages if necessary;
|
||||
* + fill in gaps;
|
||||
|
@ -1295,11 +1295,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
|||
* - if replacing succeeds:
|
||||
* + copy data over;
|
||||
* + free old pages;
|
||||
* + unfreeze huge page;
|
||||
* + unlock huge page;
|
||||
* - if replacing failed;
|
||||
* + put all pages back and unfreeze them;
|
||||
* + restore gaps in the page cache;
|
||||
* + free huge page;
|
||||
* + unlock and free huge page;
|
||||
*/
|
||||
static void collapse_shmem(struct mm_struct *mm,
|
||||
struct address_space *mapping, pgoff_t start,
|
||||
|
@ -1329,19 +1329,6 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
goto out;
|
||||
}
|
||||
|
||||
new_page->index = start;
|
||||
new_page->mapping = mapping;
|
||||
__SetPageSwapBacked(new_page);
|
||||
__SetPageLocked(new_page);
|
||||
BUG_ON(!page_ref_freeze(new_page, 1));
|
||||
|
||||
/*
|
||||
* At this point the new_page is 'frozen' (page_count() is zero),
|
||||
* locked and not up-to-date. It's safe to insert it into the page
|
||||
* cache, because nobody would be able to map it or use it in other
|
||||
* way until we unfreeze it.
|
||||
*/
|
||||
|
||||
/* This will be less messy when we use multi-index entries */
|
||||
do {
|
||||
xas_lock_irq(&xas);
|
||||
|
@ -1349,19 +1336,44 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
if (!xas_error(&xas))
|
||||
break;
|
||||
xas_unlock_irq(&xas);
|
||||
if (!xas_nomem(&xas, GFP_KERNEL))
|
||||
if (!xas_nomem(&xas, GFP_KERNEL)) {
|
||||
mem_cgroup_cancel_charge(new_page, memcg, true);
|
||||
result = SCAN_FAIL;
|
||||
goto out;
|
||||
}
|
||||
} while (1);
|
||||
|
||||
__SetPageLocked(new_page);
|
||||
__SetPageSwapBacked(new_page);
|
||||
new_page->index = start;
|
||||
new_page->mapping = mapping;
|
||||
|
||||
/*
|
||||
* At this point the new_page is locked and not up-to-date.
|
||||
* It's safe to insert it into the page cache, because nobody would
|
||||
* be able to map it or use it in another way until we unlock it.
|
||||
*/
|
||||
|
||||
xas_set(&xas, start);
|
||||
for (index = start; index < end; index++) {
|
||||
struct page *page = xas_next(&xas);
|
||||
|
||||
VM_BUG_ON(index != xas.xa_index);
|
||||
if (!page) {
|
||||
/*
|
||||
* Stop if extent has been truncated or hole-punched,
|
||||
* and is now completely empty.
|
||||
*/
|
||||
if (index == start) {
|
||||
if (!xas_next_entry(&xas, end - 1)) {
|
||||
result = SCAN_TRUNCATED;
|
||||
goto xa_locked;
|
||||
}
|
||||
xas_set(&xas, index);
|
||||
}
|
||||
if (!shmem_charge(mapping->host, 1)) {
|
||||
result = SCAN_FAIL;
|
||||
break;
|
||||
goto xa_locked;
|
||||
}
|
||||
xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
|
||||
nr_none++;
|
||||
|
@ -1376,13 +1388,12 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
result = SCAN_FAIL;
|
||||
goto xa_unlocked;
|
||||
}
|
||||
xas_lock_irq(&xas);
|
||||
xas_set(&xas, index);
|
||||
} else if (trylock_page(page)) {
|
||||
get_page(page);
|
||||
xas_unlock_irq(&xas);
|
||||
} else {
|
||||
result = SCAN_PAGE_LOCK;
|
||||
break;
|
||||
goto xa_locked;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1391,17 +1402,24 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
*/
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
VM_BUG_ON_PAGE(!PageUptodate(page), page);
|
||||
VM_BUG_ON_PAGE(PageTransCompound(page), page);
|
||||
|
||||
/*
|
||||
* If file was truncated then extended, or hole-punched, before
|
||||
* we locked the first page, then a THP might be there already.
|
||||
*/
|
||||
if (PageTransCompound(page)) {
|
||||
result = SCAN_PAGE_COMPOUND;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (page_mapping(page) != mapping) {
|
||||
result = SCAN_TRUNCATED;
|
||||
goto out_unlock;
|
||||
}
|
||||
xas_unlock_irq(&xas);
|
||||
|
||||
if (isolate_lru_page(page)) {
|
||||
result = SCAN_DEL_PAGE_LRU;
|
||||
goto out_isolate_failed;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (page_mapped(page))
|
||||
|
@ -1421,7 +1439,9 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
*/
|
||||
if (!page_ref_freeze(page, 3)) {
|
||||
result = SCAN_PAGE_COUNT;
|
||||
goto out_lru;
|
||||
xas_unlock_irq(&xas);
|
||||
putback_lru_page(page);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1433,71 +1453,74 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
/* Finally, replace with the new page. */
|
||||
xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
|
||||
continue;
|
||||
out_lru:
|
||||
xas_unlock_irq(&xas);
|
||||
putback_lru_page(page);
|
||||
out_isolate_failed:
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto xa_unlocked;
|
||||
out_unlock:
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
break;
|
||||
goto xa_unlocked;
|
||||
}
|
||||
xas_unlock_irq(&xas);
|
||||
|
||||
__inc_node_page_state(new_page, NR_SHMEM_THPS);
|
||||
if (nr_none) {
|
||||
struct zone *zone = page_zone(new_page);
|
||||
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
|
||||
}
|
||||
|
||||
xa_locked:
|
||||
xas_unlock_irq(&xas);
|
||||
xa_unlocked:
|
||||
|
||||
if (result == SCAN_SUCCEED) {
|
||||
struct page *page, *tmp;
|
||||
struct zone *zone = page_zone(new_page);
|
||||
|
||||
/*
|
||||
* Replacing old pages with new one has succeeded, now we
|
||||
* need to copy the content and free the old pages.
|
||||
*/
|
||||
index = start;
|
||||
list_for_each_entry_safe(page, tmp, &pagelist, lru) {
|
||||
while (index < page->index) {
|
||||
clear_highpage(new_page + (index % HPAGE_PMD_NR));
|
||||
index++;
|
||||
}
|
||||
copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
|
||||
page);
|
||||
list_del(&page->lru);
|
||||
unlock_page(page);
|
||||
page_ref_unfreeze(page, 1);
|
||||
page->mapping = NULL;
|
||||
page_ref_unfreeze(page, 1);
|
||||
ClearPageActive(page);
|
||||
ClearPageUnevictable(page);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
index++;
|
||||
}
|
||||
while (index < end) {
|
||||
clear_highpage(new_page + (index % HPAGE_PMD_NR));
|
||||
index++;
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
__inc_node_page_state(new_page, NR_SHMEM_THPS);
|
||||
if (nr_none) {
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
|
||||
}
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* Remove pte page tables, so we can re-fault
|
||||
* the page as huge.
|
||||
*/
|
||||
retract_page_tables(mapping, start);
|
||||
|
||||
/* Everything is ready, let's unfreeze the new_page */
|
||||
set_page_dirty(new_page);
|
||||
SetPageUptodate(new_page);
|
||||
page_ref_unfreeze(new_page, HPAGE_PMD_NR);
|
||||
page_ref_add(new_page, HPAGE_PMD_NR - 1);
|
||||
set_page_dirty(new_page);
|
||||
mem_cgroup_commit_charge(new_page, memcg, false, true);
|
||||
lru_cache_add_anon(new_page);
|
||||
unlock_page(new_page);
|
||||
|
||||
/*
|
||||
* Remove pte page tables, so we can re-fault the page as huge.
|
||||
*/
|
||||
retract_page_tables(mapping, start);
|
||||
*hpage = NULL;
|
||||
|
||||
khugepaged_pages_collapsed++;
|
||||
} else {
|
||||
struct page *page;
|
||||
|
||||
/* Something went wrong: roll back page cache changes */
|
||||
shmem_uncharge(mapping->host, nr_none);
|
||||
xas_lock_irq(&xas);
|
||||
mapping->nrpages -= nr_none;
|
||||
shmem_uncharge(mapping->host, nr_none);
|
||||
|
||||
xas_set(&xas, start);
|
||||
xas_for_each(&xas, page, end - 1) {
|
||||
page = list_first_entry_or_null(&pagelist,
|
||||
|
@ -1519,19 +1542,18 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
xas_store(&xas, page);
|
||||
xas_pause(&xas);
|
||||
xas_unlock_irq(&xas);
|
||||
putback_lru_page(page);
|
||||
unlock_page(page);
|
||||
putback_lru_page(page);
|
||||
xas_lock_irq(&xas);
|
||||
}
|
||||
VM_BUG_ON(nr_none);
|
||||
xas_unlock_irq(&xas);
|
||||
|
||||
/* Unfreeze new_page, caller would take care about freeing it */
|
||||
page_ref_unfreeze(new_page, 1);
|
||||
mem_cgroup_cancel_charge(new_page, memcg, true);
|
||||
unlock_page(new_page);
|
||||
new_page->mapping = NULL;
|
||||
}
|
||||
|
||||
unlock_page(new_page);
|
||||
out:
|
||||
VM_BUG_ON(!list_empty(&pagelist));
|
||||
/* TODO: tracepoints */
|
||||
|
|
|
@ -5813,8 +5813,10 @@ void __meminit init_currently_empty_zone(struct zone *zone,
|
|||
unsigned long size)
|
||||
{
|
||||
struct pglist_data *pgdat = zone->zone_pgdat;
|
||||
int zone_idx = zone_idx(zone) + 1;
|
||||
|
||||
pgdat->nr_zones = zone_idx(zone) + 1;
|
||||
if (zone_idx > pgdat->nr_zones)
|
||||
pgdat->nr_zones = zone_idx;
|
||||
|
||||
zone->zone_start_pfn = zone_start_pfn;
|
||||
|
||||
|
|
13
mm/rmap.c
13
mm/rmap.c
|
@ -1627,16 +1627,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
|||
address + PAGE_SIZE);
|
||||
} else {
|
||||
/*
|
||||
* We should not need to notify here as we reach this
|
||||
* case only from freeze_page() itself only call from
|
||||
* split_huge_page_to_list() so everything below must
|
||||
* be true:
|
||||
* - page is not anonymous
|
||||
* - page is locked
|
||||
*
|
||||
* So as it is a locked file back page thus it can not
|
||||
* be remove from the page cache and replace by a new
|
||||
* page before mmu_notifier_invalidate_range_end so no
|
||||
* This is a locked file-backed page, thus it cannot
|
||||
* be removed from the page cache and replaced by a new
|
||||
* page before mmu_notifier_invalidate_range_end, so no
|
||||
* concurrent thread might update its page table to
|
||||
* point at new page while a device still is using this
|
||||
* page.
|
||||
|
|
43
mm/shmem.c
43
mm/shmem.c
|
@ -297,12 +297,14 @@ bool shmem_charge(struct inode *inode, long pages)
|
|||
if (!shmem_inode_acct_block(inode, pages))
|
||||
return false;
|
||||
|
||||
/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
|
||||
inode->i_mapping->nrpages += pages;
|
||||
|
||||
spin_lock_irqsave(&info->lock, flags);
|
||||
info->alloced += pages;
|
||||
inode->i_blocks += pages * BLOCKS_PER_PAGE;
|
||||
shmem_recalc_inode(inode);
|
||||
spin_unlock_irqrestore(&info->lock, flags);
|
||||
inode->i_mapping->nrpages += pages;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -312,6 +314,8 @@ void shmem_uncharge(struct inode *inode, long pages)
|
|||
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||
unsigned long flags;
|
||||
|
||||
/* nrpages adjustment done by __delete_from_page_cache() or caller */
|
||||
|
||||
spin_lock_irqsave(&info->lock, flags);
|
||||
info->alloced -= pages;
|
||||
inode->i_blocks -= pages * BLOCKS_PER_PAGE;
|
||||
|
@ -1509,11 +1513,13 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
|
|||
{
|
||||
struct page *oldpage, *newpage;
|
||||
struct address_space *swap_mapping;
|
||||
swp_entry_t entry;
|
||||
pgoff_t swap_index;
|
||||
int error;
|
||||
|
||||
oldpage = *pagep;
|
||||
swap_index = page_private(oldpage);
|
||||
entry.val = page_private(oldpage);
|
||||
swap_index = swp_offset(entry);
|
||||
swap_mapping = page_mapping(oldpage);
|
||||
|
||||
/*
|
||||
|
@ -1532,7 +1538,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
|
|||
__SetPageLocked(newpage);
|
||||
__SetPageSwapBacked(newpage);
|
||||
SetPageUptodate(newpage);
|
||||
set_page_private(newpage, swap_index);
|
||||
set_page_private(newpage, entry.val);
|
||||
SetPageSwapCache(newpage);
|
||||
|
||||
/*
|
||||
|
@ -2214,6 +2220,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
|
|||
struct page *page;
|
||||
pte_t _dst_pte, *dst_pte;
|
||||
int ret;
|
||||
pgoff_t offset, max_off;
|
||||
|
||||
ret = -ENOMEM;
|
||||
if (!shmem_inode_acct_block(inode, 1))
|
||||
|
@ -2236,7 +2243,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
|
|||
*pagep = page;
|
||||
shmem_inode_unacct_blocks(inode, 1);
|
||||
/* don't free the page */
|
||||
return -EFAULT;
|
||||
return -ENOENT;
|
||||
}
|
||||
} else { /* mfill_zeropage_atomic */
|
||||
clear_highpage(page);
|
||||
|
@ -2251,6 +2258,12 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
|
|||
__SetPageSwapBacked(page);
|
||||
__SetPageUptodate(page);
|
||||
|
||||
ret = -EFAULT;
|
||||
offset = linear_page_index(dst_vma, dst_addr);
|
||||
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
|
||||
if (unlikely(offset >= max_off))
|
||||
goto out_release;
|
||||
|
||||
ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
|
||||
if (ret)
|
||||
goto out_release;
|
||||
|
@ -2265,9 +2278,25 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
|
|||
_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
|
||||
if (dst_vma->vm_flags & VM_WRITE)
|
||||
_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
|
||||
else {
|
||||
/*
|
||||
* We don't set the pte dirty if the vma has no
|
||||
* VM_WRITE permission, so mark the page dirty or it
|
||||
* could be freed from under us. We could do it
|
||||
* unconditionally before unlock_page(), but doing it
|
||||
* only if VM_WRITE is not set is faster.
|
||||
*/
|
||||
set_page_dirty(page);
|
||||
}
|
||||
|
||||
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
|
||||
|
||||
ret = -EFAULT;
|
||||
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
|
||||
if (unlikely(offset >= max_off))
|
||||
goto out_release_uncharge_unlock;
|
||||
|
||||
ret = -EEXIST;
|
||||
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
|
||||
if (!pte_none(*dst_pte))
|
||||
goto out_release_uncharge_unlock;
|
||||
|
||||
|
@ -2285,13 +2314,15 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
|
|||
|
||||
/* No need to invalidate - it was non-present before */
|
||||
update_mmu_cache(dst_vma, dst_addr, dst_pte);
|
||||
unlock_page(page);
|
||||
pte_unmap_unlock(dst_pte, ptl);
|
||||
unlock_page(page);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
out_release_uncharge_unlock:
|
||||
pte_unmap_unlock(dst_pte, ptl);
|
||||
ClearPageDirty(page);
|
||||
delete_from_page_cache(page);
|
||||
out_release_uncharge:
|
||||
mem_cgroup_cancel_charge(page, memcg, false);
|
||||
out_release:
|
||||
|
|
|
@ -517,9 +517,13 @@ void truncate_inode_pages_final(struct address_space *mapping)
|
|||
*/
|
||||
xa_lock_irq(&mapping->i_pages);
|
||||
xa_unlock_irq(&mapping->i_pages);
|
||||
|
||||
truncate_inode_pages(mapping, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Cleancache needs notification even if there are no pages or shadow
|
||||
* entries.
|
||||
*/
|
||||
truncate_inode_pages(mapping, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(truncate_inode_pages_final);
|
||||
|
||||
|
|
|
@ -33,6 +33,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
|
|||
void *page_kaddr;
|
||||
int ret;
|
||||
struct page *page;
|
||||
pgoff_t offset, max_off;
|
||||
struct inode *inode;
|
||||
|
||||
if (!*pagep) {
|
||||
ret = -ENOMEM;
|
||||
|
@ -48,7 +50,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
|
|||
|
||||
/* fallback to copy_from_user outside mmap_sem */
|
||||
if (unlikely(ret)) {
|
||||
ret = -EFAULT;
|
||||
ret = -ENOENT;
|
||||
*pagep = page;
|
||||
/* don't free the page */
|
||||
goto out;
|
||||
|
@ -73,8 +75,17 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
|
|||
if (dst_vma->vm_flags & VM_WRITE)
|
||||
_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
|
||||
|
||||
ret = -EEXIST;
|
||||
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
|
||||
if (dst_vma->vm_file) {
|
||||
/* the shmem MAP_PRIVATE case requires checking the i_size */
|
||||
inode = dst_vma->vm_file->f_inode;
|
||||
offset = linear_page_index(dst_vma, dst_addr);
|
||||
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
|
||||
ret = -EFAULT;
|
||||
if (unlikely(offset >= max_off))
|
||||
goto out_release_uncharge_unlock;
|
||||
}
|
||||
ret = -EEXIST;
|
||||
if (!pte_none(*dst_pte))
|
||||
goto out_release_uncharge_unlock;
|
||||
|
||||
|
@ -108,11 +119,22 @@ static int mfill_zeropage_pte(struct mm_struct *dst_mm,
|
|||
pte_t _dst_pte, *dst_pte;
|
||||
spinlock_t *ptl;
|
||||
int ret;
|
||||
pgoff_t offset, max_off;
|
||||
struct inode *inode;
|
||||
|
||||
_dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
|
||||
dst_vma->vm_page_prot));
|
||||
ret = -EEXIST;
|
||||
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
|
||||
if (dst_vma->vm_file) {
|
||||
/* the shmem MAP_PRIVATE case requires checking the i_size */
|
||||
inode = dst_vma->vm_file->f_inode;
|
||||
offset = linear_page_index(dst_vma, dst_addr);
|
||||
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
|
||||
ret = -EFAULT;
|
||||
if (unlikely(offset >= max_off))
|
||||
goto out_unlock;
|
||||
}
|
||||
ret = -EEXIST;
|
||||
if (!pte_none(*dst_pte))
|
||||
goto out_unlock;
|
||||
set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
|
||||
|
@ -205,8 +227,9 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
|
|||
if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
|
||||
goto out_unlock;
|
||||
/*
|
||||
* Only allow __mcopy_atomic_hugetlb on userfaultfd
|
||||
* registered ranges.
|
||||
* Check the vma is registered in uffd, this is
|
||||
* required to enforce the VM_MAYWRITE check done at
|
||||
* uffd registration time.
|
||||
*/
|
||||
if (!dst_vma->vm_userfaultfd_ctx.ctx)
|
||||
goto out_unlock;
|
||||
|
@ -274,7 +297,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
|
|||
|
||||
cond_resched();
|
||||
|
||||
if (unlikely(err == -EFAULT)) {
|
||||
if (unlikely(err == -ENOENT)) {
|
||||
up_read(&dst_mm->mmap_sem);
|
||||
BUG_ON(!page);
|
||||
|
||||
|
@ -380,7 +403,17 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
|
|||
{
|
||||
ssize_t err;
|
||||
|
||||
if (vma_is_anonymous(dst_vma)) {
|
||||
/*
|
||||
* The normal page fault path for a shmem will invoke the
|
||||
* fault, fill the hole in the file and COW it right away. The
|
||||
* result generates plain anonymous memory. So when we are
|
||||
* asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
|
||||
* generate anonymous memory directly without actually filling
|
||||
* the hole. For the MAP_PRIVATE case the robustness check
|
||||
* only happens in the pagetable (to verify it's still none)
|
||||
* and not in the radix tree.
|
||||
*/
|
||||
if (!(dst_vma->vm_flags & VM_SHARED)) {
|
||||
if (!zeropage)
|
||||
err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
|
||||
dst_addr, src_addr, page);
|
||||
|
@ -449,13 +482,9 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
|
|||
if (!dst_vma)
|
||||
goto out_unlock;
|
||||
/*
|
||||
* Be strict and only allow __mcopy_atomic on userfaultfd
|
||||
* registered ranges to prevent userland errors going
|
||||
* unnoticed. As far as the VM consistency is concerned, it
|
||||
* would be perfectly safe to remove this check, but there's
|
||||
* no useful usage for __mcopy_atomic ouside of userfaultfd
|
||||
* registered ranges. This is after all why these are ioctls
|
||||
* belonging to the userfaultfd and not syscalls.
|
||||
* Check the vma is registered in uffd, this is required to
|
||||
* enforce the VM_MAYWRITE check done at uffd registration
|
||||
* time.
|
||||
*/
|
||||
if (!dst_vma->vm_userfaultfd_ctx.ctx)
|
||||
goto out_unlock;
|
||||
|
@ -489,7 +518,8 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
|
|||
* dst_vma.
|
||||
*/
|
||||
err = -ENOMEM;
|
||||
if (vma_is_anonymous(dst_vma) && unlikely(anon_vma_prepare(dst_vma)))
|
||||
if (!(dst_vma->vm_flags & VM_SHARED) &&
|
||||
unlikely(anon_vma_prepare(dst_vma)))
|
||||
goto out_unlock;
|
||||
|
||||
while (src_addr < src_start + len) {
|
||||
|
@ -530,7 +560,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
|
|||
src_addr, &page, zeropage);
|
||||
cond_resched();
|
||||
|
||||
if (unlikely(err == -EFAULT)) {
|
||||
if (unlikely(err == -ENOENT)) {
|
||||
void *page_kaddr;
|
||||
|
||||
up_read(&dst_mm->mmap_sem);
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
/* Test readlink /proc/self/map_files/... with address 0. */
|
||||
/* Test readlink /proc/self/map_files/... with minimum address. */
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
@ -47,6 +47,11 @@ static void fail(const char *fmt, unsigned long a, unsigned long b)
|
|||
int main(void)
|
||||
{
|
||||
const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
|
||||
#ifdef __arm__
|
||||
unsigned long va = 2 * PAGE_SIZE;
|
||||
#else
|
||||
unsigned long va = 0;
|
||||
#endif
|
||||
void *p;
|
||||
int fd;
|
||||
unsigned long a, b;
|
||||
|
@ -55,7 +60,7 @@ int main(void)
|
|||
if (fd == -1)
|
||||
return 1;
|
||||
|
||||
p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
|
||||
p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
|
||||
if (p == MAP_FAILED) {
|
||||
if (errno == EPERM)
|
||||
return 2;
|
||||
|
|
Loading…
Reference in New Issue