mm/gup: rename "nonblocking" to "locked" where proper
Patch series "mm: Page fault enhancements", v6. This series contains cleanups and enhancements to current page fault logic. The whole idea comes from the discussion between Andrea and Linus on the bug reported by syzbot here: https://lkml.org/lkml/2017/11/2/833 Basically it does two things: (a) Allows the page fault logic to be more interactive on not only SIGKILL, but also the rest of userspace signals, and, (b) Allows the page fault retry (VM_FAULT_RETRY) to happen for more than once. For (a): with the changes we should be able to react faster when page faults are working in parallel with userspace signals like SIGSTOP and SIGCONT (and more), and with that we can remove the buggy part in userfaultfd and benefit the whole page fault mechanism on faster signal processing to reach the userspace. For (b), we should be able to allow the page fault handler to loop for even more than twice. Some context: for now since we have FAULT_FLAG_ALLOW_RETRY we can allow to retry the page fault once with the same interrupt context, however never more than twice. This can be not only a potential cleanup to remove this assumption since AFAIU the code itself doesn't really have this twice-only limitation (though that should be a protective approach in the past), at the same time it'll greatly simplify future works like userfaultfd write-protect where it's possible to retry for more than twice (please have a look at [1] below for a possible user that might require the page fault to be handled for a third time; if we can remove the retry limitation we can simply drop that patch and those complexity). This patch (of 16): There's plenty of places around __get_user_pages() that has a parameter "nonblocking" which does not really mean that "it won't block" (because it can really block) but instead it shows whether the mmap_sem is released by up_read() during the page fault handling mostly when VM_FAULT_RETRY is returned. We have the correct naming in e.g. get_user_pages_locked() or get_user_pages_remote() as "locked", however there're still many places that are using the "nonblocking" as name. Renaming the places to "locked" where proper to better suite the functionality of the variable. While at it, fixing up some of the comments accordingly. Signed-off-by: Peter Xu <peterx@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Tested-by: Brian Geffon <bgeffon@google.com> Reviewed-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Reviewed-by: Jerome Glisse <jglisse@redhat.com> Reviewed-by: David Hildenbrand <david@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Martin Cracauer <cracauer@cons.org> Cc: "Kirill A . Shutemov" <kirill@shutemov.name> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com> Cc: Bobby Powers <bobbypowers@gmail.com> Cc: Maya Gokhale <gokhale2@llnl.gov> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Marty McFadden <mcfadden8@llnl.gov> Cc: Mel Gorman <mgorman@suse.de> Cc: Hugh Dickins <hughd@google.com> Cc: Denis Plotnikov <dplotnikov@virtuozzo.com> Cc: Pavel Emelyanov <xemul@openvz.org> Link: http://lkml.kernel.org/r/20200220155353.8676-2-peterx@redhat.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
767e5ee54e
commit
4f6da93411
44
mm/gup.c
44
mm/gup.c
|
@ -846,12 +846,12 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* mmap_sem must be held on entry. If @nonblocking != NULL and
|
* mmap_sem must be held on entry. If @locked != NULL and *@flags
|
||||||
* *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
|
* does not include FOLL_NOWAIT, the mmap_sem may be released. If it
|
||||||
* If it is, *@nonblocking will be set to 0 and -EBUSY returned.
|
* is, *@locked will be set to 0 and -EBUSY returned.
|
||||||
*/
|
*/
|
||||||
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
|
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
|
||||||
unsigned long address, unsigned int *flags, int *nonblocking)
|
unsigned long address, unsigned int *flags, int *locked)
|
||||||
{
|
{
|
||||||
unsigned int fault_flags = 0;
|
unsigned int fault_flags = 0;
|
||||||
vm_fault_t ret;
|
vm_fault_t ret;
|
||||||
|
@ -863,7 +863,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
|
||||||
fault_flags |= FAULT_FLAG_WRITE;
|
fault_flags |= FAULT_FLAG_WRITE;
|
||||||
if (*flags & FOLL_REMOTE)
|
if (*flags & FOLL_REMOTE)
|
||||||
fault_flags |= FAULT_FLAG_REMOTE;
|
fault_flags |= FAULT_FLAG_REMOTE;
|
||||||
if (nonblocking)
|
if (locked)
|
||||||
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
|
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
|
||||||
if (*flags & FOLL_NOWAIT)
|
if (*flags & FOLL_NOWAIT)
|
||||||
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
|
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
|
||||||
|
@ -889,8 +889,8 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret & VM_FAULT_RETRY) {
|
if (ret & VM_FAULT_RETRY) {
|
||||||
if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
|
if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
|
||||||
*nonblocking = 0;
|
*locked = 0;
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -967,7 +967,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
|
||||||
* only intends to ensure the pages are faulted in.
|
* only intends to ensure the pages are faulted in.
|
||||||
* @vmas: array of pointers to vmas corresponding to each page.
|
* @vmas: array of pointers to vmas corresponding to each page.
|
||||||
* Or NULL if the caller does not require them.
|
* Or NULL if the caller does not require them.
|
||||||
* @nonblocking: whether waiting for disk IO or mmap_sem contention
|
* @locked: whether we're still with the mmap_sem held
|
||||||
*
|
*
|
||||||
* Returns either number of pages pinned (which may be less than the
|
* Returns either number of pages pinned (which may be less than the
|
||||||
* number requested), or an error. Details about the return value:
|
* number requested), or an error. Details about the return value:
|
||||||
|
@ -1002,13 +1002,11 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
|
||||||
* appropriate) must be called after the page is finished with, and
|
* appropriate) must be called after the page is finished with, and
|
||||||
* before put_page is called.
|
* before put_page is called.
|
||||||
*
|
*
|
||||||
* If @nonblocking != NULL, __get_user_pages will not wait for disk IO
|
* If @locked != NULL, *@locked will be set to 0 when mmap_sem is
|
||||||
* or mmap_sem contention, and if waiting is needed to pin all pages,
|
* released by an up_read(). That can happen if @gup_flags does not
|
||||||
* *@nonblocking will be set to 0. Further, if @gup_flags does not
|
* have FOLL_NOWAIT.
|
||||||
* include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
|
|
||||||
* this case.
|
|
||||||
*
|
*
|
||||||
* A caller using such a combination of @nonblocking and @gup_flags
|
* A caller using such a combination of @locked and @gup_flags
|
||||||
* must therefore hold the mmap_sem for reading only, and recognize
|
* must therefore hold the mmap_sem for reading only, and recognize
|
||||||
* when it's been released. Otherwise, it must be held for either
|
* when it's been released. Otherwise, it must be held for either
|
||||||
* reading or writing and will not be released.
|
* reading or writing and will not be released.
|
||||||
|
@ -1020,7 +1018,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
|
||||||
static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
||||||
unsigned long start, unsigned long nr_pages,
|
unsigned long start, unsigned long nr_pages,
|
||||||
unsigned int gup_flags, struct page **pages,
|
unsigned int gup_flags, struct page **pages,
|
||||||
struct vm_area_struct **vmas, int *nonblocking)
|
struct vm_area_struct **vmas, int *locked)
|
||||||
{
|
{
|
||||||
long ret = 0, i = 0;
|
long ret = 0, i = 0;
|
||||||
struct vm_area_struct *vma = NULL;
|
struct vm_area_struct *vma = NULL;
|
||||||
|
@ -1066,7 +1064,7 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
||||||
if (is_vm_hugetlb_page(vma)) {
|
if (is_vm_hugetlb_page(vma)) {
|
||||||
i = follow_hugetlb_page(mm, vma, pages, vmas,
|
i = follow_hugetlb_page(mm, vma, pages, vmas,
|
||||||
&start, &nr_pages, i,
|
&start, &nr_pages, i,
|
||||||
gup_flags, nonblocking);
|
gup_flags, locked);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1084,7 +1082,7 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
||||||
page = follow_page_mask(vma, start, foll_flags, &ctx);
|
page = follow_page_mask(vma, start, foll_flags, &ctx);
|
||||||
if (!page) {
|
if (!page) {
|
||||||
ret = faultin_page(tsk, vma, start, &foll_flags,
|
ret = faultin_page(tsk, vma, start, &foll_flags,
|
||||||
nonblocking);
|
locked);
|
||||||
switch (ret) {
|
switch (ret) {
|
||||||
case 0:
|
case 0:
|
||||||
goto retry;
|
goto retry;
|
||||||
|
@ -1345,7 +1343,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
|
||||||
* @vma: target vma
|
* @vma: target vma
|
||||||
* @start: start address
|
* @start: start address
|
||||||
* @end: end address
|
* @end: end address
|
||||||
* @nonblocking:
|
* @locked: whether the mmap_sem is still held
|
||||||
*
|
*
|
||||||
* This takes care of mlocking the pages too if VM_LOCKED is set.
|
* This takes care of mlocking the pages too if VM_LOCKED is set.
|
||||||
*
|
*
|
||||||
|
@ -1353,14 +1351,14 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
|
||||||
*
|
*
|
||||||
* vma->vm_mm->mmap_sem must be held.
|
* vma->vm_mm->mmap_sem must be held.
|
||||||
*
|
*
|
||||||
* If @nonblocking is NULL, it may be held for read or write and will
|
* If @locked is NULL, it may be held for read or write and will
|
||||||
* be unperturbed.
|
* be unperturbed.
|
||||||
*
|
*
|
||||||
* If @nonblocking is non-NULL, it must held for read only and may be
|
* If @locked is non-NULL, it must held for read only and may be
|
||||||
* released. If it's released, *@nonblocking will be set to 0.
|
* released. If it's released, *@locked will be set to 0.
|
||||||
*/
|
*/
|
||||||
long populate_vma_page_range(struct vm_area_struct *vma,
|
long populate_vma_page_range(struct vm_area_struct *vma,
|
||||||
unsigned long start, unsigned long end, int *nonblocking)
|
unsigned long start, unsigned long end, int *locked)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
struct mm_struct *mm = vma->vm_mm;
|
||||||
unsigned long nr_pages = (end - start) / PAGE_SIZE;
|
unsigned long nr_pages = (end - start) / PAGE_SIZE;
|
||||||
|
@ -1395,7 +1393,7 @@ long populate_vma_page_range(struct vm_area_struct *vma,
|
||||||
* not result in a stack expansion that recurses back here.
|
* not result in a stack expansion that recurses back here.
|
||||||
*/
|
*/
|
||||||
return __get_user_pages(current, mm, start, nr_pages, gup_flags,
|
return __get_user_pages(current, mm, start, nr_pages, gup_flags,
|
||||||
NULL, NULL, nonblocking);
|
NULL, NULL, locked);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -4272,7 +4272,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
|
||||||
long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
struct page **pages, struct vm_area_struct **vmas,
|
struct page **pages, struct vm_area_struct **vmas,
|
||||||
unsigned long *position, unsigned long *nr_pages,
|
unsigned long *position, unsigned long *nr_pages,
|
||||||
long i, unsigned int flags, int *nonblocking)
|
long i, unsigned int flags, int *locked)
|
||||||
{
|
{
|
||||||
unsigned long pfn_offset;
|
unsigned long pfn_offset;
|
||||||
unsigned long vaddr = *position;
|
unsigned long vaddr = *position;
|
||||||
|
@ -4343,7 +4343,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
spin_unlock(ptl);
|
spin_unlock(ptl);
|
||||||
if (flags & FOLL_WRITE)
|
if (flags & FOLL_WRITE)
|
||||||
fault_flags |= FAULT_FLAG_WRITE;
|
fault_flags |= FAULT_FLAG_WRITE;
|
||||||
if (nonblocking)
|
if (locked)
|
||||||
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
|
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
|
||||||
if (flags & FOLL_NOWAIT)
|
if (flags & FOLL_NOWAIT)
|
||||||
fault_flags |= FAULT_FLAG_ALLOW_RETRY |
|
fault_flags |= FAULT_FLAG_ALLOW_RETRY |
|
||||||
|
@ -4360,9 +4360,9 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ret & VM_FAULT_RETRY) {
|
if (ret & VM_FAULT_RETRY) {
|
||||||
if (nonblocking &&
|
if (locked &&
|
||||||
!(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
|
!(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
|
||||||
*nonblocking = 0;
|
*locked = 0;
|
||||||
*nr_pages = 0;
|
*nr_pages = 0;
|
||||||
/*
|
/*
|
||||||
* VM_FAULT_RETRY must not return an
|
* VM_FAULT_RETRY must not return an
|
||||||
|
|
Loading…
Reference in New Issue