mm: introduce VM_POPULATE flag to better deal with racy userspace programs

The vm_populate() code populates user mappings without constantly
holding the mmap_sem.  This makes it susceptible to racy userspace
programs: the user mappings may change while vm_populate() is running,
and in this case vm_populate() may end up populating the new mapping
instead of the old one.

In order to reduce the possibility of userspace getting surprised by
this behavior, this change introduces the VM_POPULATE vma flag which
gets set on vmas we want vm_populate() to work on.  This way
vm_populate() may still end up populating the new mapping after such a
race, but only if the new mapping is also one that the user has
requested (using MAP_SHARED, MAP_LOCKED or mlock) to be populated.

Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Andy Lutomirski <luto@amacapital.net>
Cc: Greg Ungerer <gregungerer@westnet.com.au>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Michel Lespinasse 2013-02-22 16:32:46 -08:00 committed by Linus Torvalds
parent cea10a19b7
commit 1869305009
5 changed files with 25 additions and 15 deletions

View File

@ -87,6 +87,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
#define VM_POPULATE 0x00001000
#define VM_LOCKED 0x00002000 #define VM_LOCKED 0x00002000
#define VM_IO 0x00004000 /* Memory mapped I/O or similar */ #define VM_IO 0x00004000 /* Memory mapped I/O or similar */

View File

@ -79,6 +79,8 @@ calc_vm_flag_bits(unsigned long flags)
{ {
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ); ((flags & MAP_LOCKED) ? (VM_LOCKED | VM_POPULATE) : 0) |
(((flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE) ?
VM_POPULATE : 0);
} }
#endif /* _LINUX_MMAN_H */ #endif /* _LINUX_MMAN_H */

View File

@ -204,8 +204,10 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
unsigned long addr; unsigned long addr;
struct file *file = get_file(vma->vm_file); struct file *file = get_file(vma->vm_file);
addr = mmap_region(file, start, size, vm_flags = vma->vm_flags;
vma->vm_flags, pgoff); if (!(flags & MAP_NONBLOCK))
vm_flags |= VM_POPULATE;
addr = mmap_region(file, start, size, vm_flags, pgoff);
fput(file); fput(file);
if (IS_ERR_VALUE(addr)) { if (IS_ERR_VALUE(addr)) {
err = addr; err = addr;
@ -224,6 +226,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
mutex_unlock(&mapping->i_mmap_mutex); mutex_unlock(&mapping->i_mmap_mutex);
} }
if (!(flags & MAP_NONBLOCK) && !(vma->vm_flags & VM_POPULATE)) {
if (!has_write_lock)
goto get_write_lock;
vma->vm_flags |= VM_POPULATE;
}
if (vma->vm_flags & VM_LOCKED) { if (vma->vm_flags & VM_LOCKED) {
/* /*
* drop PG_Mlocked flag for over-mapped range * drop PG_Mlocked flag for over-mapped range

View File

@ -340,9 +340,9 @@ static int do_mlock(unsigned long start, size_t len, int on)
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */ /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
newflags = vma->vm_flags | VM_LOCKED; newflags = vma->vm_flags & ~VM_LOCKED;
if (!on) if (on)
newflags &= ~VM_LOCKED; newflags |= VM_LOCKED | VM_POPULATE;
tmp = vma->vm_end; tmp = vma->vm_end;
if (tmp > end) if (tmp > end)
@ -402,7 +402,8 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
* range with the first VMA. Also, skip undesirable VMA types. * range with the first VMA. Also, skip undesirable VMA types.
*/ */
nend = min(end, vma->vm_end); nend = min(end, vma->vm_end);
if (vma->vm_flags & (VM_IO | VM_PFNMAP)) if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_POPULATE)) !=
VM_POPULATE)
continue; continue;
if (nstart < vma->vm_start) if (nstart < vma->vm_start)
nstart = vma->vm_start; nstart = vma->vm_start;
@ -475,18 +476,18 @@ static int do_mlockall(int flags)
struct vm_area_struct * vma, * prev = NULL; struct vm_area_struct * vma, * prev = NULL;
if (flags & MCL_FUTURE) if (flags & MCL_FUTURE)
current->mm->def_flags |= VM_LOCKED; current->mm->def_flags |= VM_LOCKED | VM_POPULATE;
else else
current->mm->def_flags &= ~VM_LOCKED; current->mm->def_flags &= ~(VM_LOCKED | VM_POPULATE);
if (flags == MCL_FUTURE) if (flags == MCL_FUTURE)
goto out; goto out;
for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
vm_flags_t newflags; vm_flags_t newflags;
newflags = vma->vm_flags | VM_LOCKED; newflags = vma->vm_flags & ~VM_LOCKED;
if (!(flags & MCL_CURRENT)) if (flags & MCL_CURRENT)
newflags &= ~VM_LOCKED; newflags |= VM_LOCKED | VM_POPULATE;
/* Ignore errors */ /* Ignore errors */
mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);

View File

@ -1306,9 +1306,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
} }
addr = mmap_region(file, addr, len, vm_flags, pgoff); addr = mmap_region(file, addr, len, vm_flags, pgoff);
if (!IS_ERR_VALUE(addr) && if (!IS_ERR_VALUE(addr) && (vm_flags & VM_POPULATE))
((vm_flags & VM_LOCKED) ||
(flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
*populate = true; *populate = true;
return addr; return addr;
} }