Merge branch 'akpm' (patches from Andrew)

Mergr misc fixes from Andrew Morton:
 "28 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (28 commits)
  fs/hugetlbfs/inode.c: change put_page/unlock_page order in hugetlbfs_fallocate()
  mm/hugetlb: fix NULL-pointer dereference on 5-level paging machine
  autofs: revert "autofs: fix AT_NO_AUTOMOUNT not being honored"
  autofs: revert "autofs: take more care to not update last_used on path walk"
  fs/fat/inode.c: fix sb_rdonly() change
  mm, memcg: fix mem_cgroup_swapout() for THPs
  mm: migrate: fix an incorrect call of prep_transhuge_page()
  kmemleak: add scheduling point to kmemleak_scan()
  scripts/bloat-o-meter: don't fail with division by 0
  fs/mbcache.c: make count_objects() more robust
  Revert "mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical"
  mm/madvise.c: fix madvise() infinite loop under special circumstances
  exec: avoid RLIMIT_STACK races with prlimit()
  IB/core: disable memory registration of filesystem-dax vmas
  v4l2: disable filesystem-dax mapping support
  mm: fail get_vaddr_frames() for filesystem-dax mappings
  mm: introduce get_user_pages_longterm
  device-dax: implement ->split() to catch invalid munmap attempts
  mm, hugetlbfs: introduce ->split() to vm_operations_struct
  scripts/faddr2line: extend usage on generic arch
  ...
This commit is contained in:
Linus Torvalds 2017-11-29 19:12:44 -08:00
commit a0908a1b7d
40 changed files with 238 additions and 101 deletions

View File

@ -158,10 +158,6 @@ Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
value lower than this limit will be ignored and the old configuration will be
retained.
Note: the value of dirty_bytes also must be set greater than
dirty_background_bytes or the amount of memory corresponding to
dirty_background_ratio.
==============================================================
dirty_expire_centisecs
@ -181,9 +177,6 @@ generating disk writes will itself start writing out dirty data.
The total available memory is not equal to total system memory.
Note: dirty_ratio must be set greater than dirty_background_ratio or
ratio corresponding to dirty_background_bytes.
==============================================================
dirty_writeback_centisecs

View File

@ -221,7 +221,6 @@ static inline pte_t pte_mkspecial(pte_t pte)
}
#define __HAVE_ARCH_PTE_SPECIAL
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
#define pud_page(pud) pmd_page(__pmd(pud_val(pud)))

View File

@ -345,7 +345,6 @@ static inline int pmd_protnone(pmd_t pmd)
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))

View File

@ -552,7 +552,7 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_WRITE);

View File

@ -1005,7 +1005,6 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif /* CONFIG_NUMA_BALANCING */
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
#define __pmd_write(pmd) __pte_write(pmd_pte(pmd))
#define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd))

View File

@ -709,7 +709,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
}
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
@ -1264,6 +1264,12 @@ static inline pud_t pud_mkwrite(pud_t pud)
return pud;
}
#define pud_write pud_write
static inline int pud_write(pud_t pud)
{
return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
}
static inline pud_t pud_mkclean(pud_t pud)
{
if (pud_large(pud)) {

View File

@ -715,7 +715,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return pte_pfn(pte);
}
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write pmd_write
static inline unsigned long pmd_write(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));

View File

@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
if (!(pmd_val(pmd) & _PAGE_VALID))
return 0;
if (write && !pmd_write(pmd))
if (!pmd_access_permitted(pmd, write))
return 0;
refs = 0;
@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
if (!(pud_val(pud) & _PAGE_VALID))
return 0;
if (write && !pud_write(pud))
if (!pud_access_permitted(pud, write))
return 0;
refs = 0;

View File

@ -475,7 +475,6 @@ static inline void pmd_clear(pmd_t *pmdp)
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd))
#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd)))
#define __HAVE_ARCH_PMD_WRITE
#define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot)))
#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))

View File

@ -1061,7 +1061,7 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_RW;
@ -1088,6 +1088,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
}
#define pud_write pud_write
static inline int pud_write(pud_t pud)
{
return pud_flags(pud) & _PAGE_RW;
}
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
*

View File

@ -428,9 +428,21 @@ static int dev_dax_fault(struct vm_fault *vmf)
return dev_dax_huge_fault(vmf, PE_SIZE_PTE);
}
static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr)
{
struct file *filp = vma->vm_file;
struct dev_dax *dev_dax = filp->private_data;
struct dax_region *dax_region = dev_dax->region;
if (!IS_ALIGNED(addr, dax_region->align))
return -EINVAL;
return 0;
}
static const struct vm_operations_struct dax_vm_ops = {
.fault = dev_dax_fault,
.huge_fault = dev_dax_huge_fault,
.split = dev_dax_split,
};
static int dax_mmap(struct file *filp, struct vm_area_struct *vma)

View File

@ -191,7 +191,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
sg_list_start = umem->sg_head.sgl;
while (npages) {
ret = get_user_pages(cur_base,
ret = get_user_pages_longterm(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)),
gup_flags, page_list, vma_list);

View File

@ -185,12 +185,13 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n",
data, size, dma->nr_pages);
err = get_user_pages(data & PAGE_MASK, dma->nr_pages,
err = get_user_pages_longterm(data & PAGE_MASK, dma->nr_pages,
flags, dma->pages, NULL);
if (err != dma->nr_pages) {
dma->nr_pages = (err >= 0) ? err : 0;
dprintk(1, "get_user_pages: err=%d [%d]\n", err, dma->nr_pages);
dprintk(1, "get_user_pages_longterm: err=%d [%d]\n", err,
dma->nr_pages);
return err < 0 ? err : -EINVAL;
}
return 0;

View File

@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk)
pr_debug("waiting for mount name=%pd\n", path->dentry);
status = autofs4_wait(sbi, path, NFY_MOUNT);
pr_debug("mount wait done status=%d\n", status);
ino->last_used = jiffies;
}
ino->last_used = jiffies;
return status;
}
@ -321,22 +321,17 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
*/
if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
struct dentry *parent = dentry->d_parent;
struct autofs_info *ino;
struct dentry *new;
new = d_lookup(parent, &dentry->d_name);
if (!new)
return NULL;
if (new == dentry)
dput(new);
else {
struct autofs_info *ino;
ino = autofs4_dentry_ino(new);
ino->last_used = jiffies;
dput(path->dentry);
path->dentry = new;
}
}
return path->dentry;
}

View File

@ -627,7 +627,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
if (pfn != pmd_pfn(*pmdp))
goto unlock_pmd;
if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
if (!pmd_dirty(*pmdp)
&& !pmd_access_permitted(*pmdp, WRITE))
goto unlock_pmd;
flush_cache_page(vma, address, pfn);

View File

@ -1340,10 +1340,15 @@ void setup_new_exec(struct linux_binprm * bprm)
* avoid bad behavior from the prior rlimits. This has to
* happen before arch_pick_mmap_layout(), which examines
* RLIMIT_STACK, but after the point of no return to avoid
* needing to clean up the change on failure.
* races from other threads changing the limits. This also
* must be protected from races with prlimit() calls.
*/
task_lock(current->group_leader);
if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
task_unlock(current->group_leader);
}
arch_pick_mmap_layout(current->mm);

View File

@ -779,7 +779,7 @@ static void __exit fat_destroy_inodecache(void)
static int fat_remount(struct super_block *sb, int *flags, char *data)
{
int new_rdonly;
bool new_rdonly;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
*flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME);

View File

@ -639,11 +639,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
/*
* page_put due to reference from alloc_huge_page()
* unlock_page because locked by add_to_page_cache()
* page_put due to reference from alloc_huge_page()
*/
put_page(page);
unlock_page(page);
put_page(page);
}
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)

View File

@ -269,6 +269,9 @@ static unsigned long mb_cache_count(struct shrinker *shrink,
struct mb_cache *cache = container_of(shrink, struct mb_cache,
c_shrink);
/* Unlikely, but not impossible */
if (unlikely(cache->c_entry_count < 0))
return 0;
return cache->c_entry_count;
}

View File

@ -1129,18 +1129,9 @@ static int follow_automount(struct path *path, struct nameidata *nd,
* of the daemon to instantiate them before they can be used.
*/
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
LOOKUP_OPEN | LOOKUP_CREATE |
LOOKUP_AUTOMOUNT))) {
/* Positive dentry that isn't meant to trigger an
* automount, EISDIR will allow it to be used,
* otherwise there's no mount here "now" so return
* ENOENT.
*/
if (path->dentry->d_inode)
LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
path->dentry->d_inode)
return -EISDIR;
else
return -ENOENT;
}
if (path->dentry->d_sb->s_user_ns != &init_user_ns)
return -EACCES;

View File

@ -805,15 +805,23 @@ static inline int pmd_trans_huge(pmd_t pmd)
{
return 0;
}
#ifndef __HAVE_ARCH_PMD_WRITE
#ifndef pmd_write
static inline int pmd_write(pmd_t pmd)
{
BUG();
return 0;
}
#endif /* __HAVE_ARCH_PMD_WRITE */
#endif /* pmd_write */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#ifndef pud_write
static inline int pud_write(pud_t pud)
{
BUG();
return 0;
}
#endif /* pud_write */
#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
(defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
!defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))

View File

@ -3088,7 +3088,8 @@ static inline int vfs_lstat(const char __user *name, struct kstat *stat)
static inline int vfs_fstatat(int dfd, const char __user *filename,
struct kstat *stat, int flags)
{
return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS);
return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
stat, STATX_BASIC_STATS);
}
static inline int vfs_fstat(int fd, struct kstat *stat)
{
@ -3194,6 +3195,20 @@ static inline bool vma_is_dax(struct vm_area_struct *vma)
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
}
static inline bool vma_is_fsdax(struct vm_area_struct *vma)
{
struct inode *inode;
if (!vma->vm_file)
return false;
if (!vma_is_dax(vma))
return false;
inode = file_inode(vma->vm_file);
if (inode->i_mode == S_IFCHR)
return false; /* device-dax */
return true;
}
static inline int iocb_flags(struct file *file)
{
int res = 0;

View File

@ -239,14 +239,6 @@ static inline int pgd_write(pgd_t pgd)
}
#endif
#ifndef pud_write
static inline int pud_write(pud_t pud)
{
BUG();
return 0;
}
#endif
#define HUGETLB_ANON_FILE "anon_hugepage"
enum {

View File

@ -54,7 +54,7 @@ static inline struct page *new_page_nodemask(struct page *page,
new_page = __alloc_pages_nodemask(gfp_mask, order,
preferred_nid, nodemask);
if (new_page && PageTransHuge(page))
if (new_page && PageTransHuge(new_page))
prep_transhuge_page(new_page);
return new_page;

View File

@ -377,6 +377,7 @@ enum page_entry_size {
struct vm_operations_struct {
void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area);
int (*split)(struct vm_area_struct * area, unsigned long addr);
int (*mremap)(struct vm_area_struct * area);
int (*fault)(struct vm_fault *vmf);
int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size);
@ -1379,6 +1380,19 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages, int *locked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
#ifdef CONFIG_FS_DAX
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas);
#else
static inline long get_user_pages_longterm(unsigned long start,
unsigned long nr_pages, unsigned int gup_flags,
struct page **pages, struct vm_area_struct **vmas)
{
return get_user_pages(start, nr_pages, gup_flags, pages, vmas);
}
#endif /* CONFIG_FS_DAX */
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);

View File

@ -53,6 +53,18 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
ret = -EFAULT;
goto out;
}
/*
* While get_vaddr_frames() could be used for transient (kernel
* controlled lifetime) pinning of memory pages all current
* users establish long term (userspace controlled lifetime)
* page pinning. Treat get_vaddr_frames() like
* get_user_pages_longterm() and disallow it for filesystem-dax
* mappings.
*/
if (vma_is_fsdax(vma))
return -EOPNOTSUPP;
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
vec->got_ref = true;
vec->is_pfns = false;

View File

@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
*/
static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
{
return pte_write(pte) ||
return pte_access_permitted(pte, WRITE) ||
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
}
@ -1095,6 +1095,70 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
}
EXPORT_SYMBOL(get_user_pages);
#ifdef CONFIG_FS_DAX
/*
* This is the same as get_user_pages() in that it assumes we are
* operating on the current task's mm, but it goes further to validate
* that the vmas associated with the address range are suitable for
* longterm elevated page reference counts. For example, filesystem-dax
* mappings are subject to the lifetime enforced by the filesystem and
* we need guarantees that longterm users like RDMA and V4L2 only
* establish mappings that have a kernel enforced revocation mechanism.
*
* "longterm" == userspace controlled elevated page count lifetime.
* Contrast this to iov_iter_get_pages() usages which are transient.
*/
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas_arg)
{
struct vm_area_struct **vmas = vmas_arg;
struct vm_area_struct *vma_prev = NULL;
long rc, i;
if (!pages)
return -EINVAL;
if (!vmas) {
vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *),
GFP_KERNEL);
if (!vmas)
return -ENOMEM;
}
rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
for (i = 0; i < rc; i++) {
struct vm_area_struct *vma = vmas[i];
if (vma == vma_prev)
continue;
vma_prev = vma;
if (vma_is_fsdax(vma))
break;
}
/*
* Either get_user_pages() failed, or the vma validation
* succeeded, in either case we don't need to put_page() before
* returning.
*/
if (i >= rc)
goto out;
for (i = 0; i < rc; i++)
put_page(pages[i]);
rc = -EOPNOTSUPP;
out:
if (vmas != vmas_arg)
kfree(vmas);
return rc;
}
EXPORT_SYMBOL(get_user_pages_longterm);
#endif /* CONFIG_FS_DAX */
/**
* populate_vma_page_range() - populate a range of pages in the vma.
* @vma: target vma

View File

@ -391,11 +391,11 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
if (pmd_protnone(pmd))
return hmm_vma_walk_clear(start, end, walk);
if (write_fault && !pmd_write(pmd))
if (!pmd_access_permitted(pmd, write_fault))
return hmm_vma_walk_clear(start, end, walk);
pfn = pmd_pfn(pmd) + pte_index(addr);
flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0;
for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
return 0;
@ -456,11 +456,11 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
continue;
}
if (write_fault && !pte_write(pte))
if (!pte_access_permitted(pte, write_fault))
goto fault;
pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0;
continue;
fault:

View File

@ -870,7 +870,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
*/
WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
if (flags & FOLL_WRITE && !pmd_write(*pmd))
if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE))
return NULL;
if (pmd_present(*pmd) && pmd_devmap(*pmd))
@ -1012,7 +1012,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
assert_spin_locked(pud_lockptr(mm, pud));
if (flags & FOLL_WRITE && !pud_write(*pud))
if (!pud_access_permitted(*pud, flags & FOLL_WRITE))
return NULL;
if (pud_present(*pud) && pud_devmap(*pud))
@ -1386,7 +1386,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
*/
static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
{
return pmd_write(pmd) ||
return pmd_access_permitted(pmd, WRITE) ||
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
}

View File

@ -3125,6 +3125,13 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
}
}
static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
{
if (addr & ~(huge_page_mask(hstate_vma(vma))))
return -EINVAL;
return 0;
}
/*
* We cannot handle pagefaults against hugetlb pages at all. They cause
* handle_mm_fault() to try to instantiate regular-sized pages in the
@ -3141,6 +3148,7 @@ const struct vm_operations_struct hugetlb_vm_ops = {
.fault = hugetlb_vm_op_fault,
.open = hugetlb_vm_op_open,
.close = hugetlb_vm_op_close,
.split = hugetlb_vm_op_split,
};
static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
@ -4627,7 +4635,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
p4d = p4d_offset(pgd, addr);
p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return NULL;
pud = pud_alloc(mm, p4d, addr);
if (pud) {
if (sz == PUD_SIZE) {

View File

@ -1523,6 +1523,8 @@ static void kmemleak_scan(void)
if (page_count(page) == 0)
continue;
scan_block(page, page + 1, NULL);
if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page))))
cond_resched();
}
}
put_online_mems();

View File

@ -276,15 +276,14 @@ static long madvise_willneed(struct vm_area_struct *vma,
{
struct file *file = vma->vm_file;
*prev = vma;
#ifdef CONFIG_SWAP
if (!file) {
*prev = vma;
force_swapin_readahead(vma, start, end);
return 0;
}
if (shmem_mapping(file->f_mapping)) {
*prev = vma;
force_shm_swapin_readahead(vma, start, end,
file->f_mapping);
return 0;
@ -299,7 +298,6 @@ static long madvise_willneed(struct vm_area_struct *vma,
return 0;
}
*prev = vma;
start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
if (end > vma->vm_end)
end = vma->vm_end;

View File

@ -6044,7 +6044,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
memcg_check_events(memcg, page);
if (!mem_cgroup_is_root(memcg))
css_put(&memcg->css);
css_put_many(&memcg->css, nr_entries);
}
/**

View File

@ -3948,7 +3948,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
if (unlikely(!pte_same(*vmf->pte, entry)))
goto unlock;
if (vmf->flags & FAULT_FLAG_WRITE) {
if (!pte_write(entry))
if (!pte_access_permitted(entry, WRITE))
return do_wp_page(vmf);
entry = pte_mkdirty(entry);
}
@ -4013,7 +4013,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
/* NUMA case for anonymous PUDs would go here */
if (dirty && !pud_write(orig_pud)) {
if (dirty && !pud_access_permitted(orig_pud, WRITE)) {
ret = wp_huge_pud(&vmf, orig_pud);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
@ -4046,7 +4046,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
return do_huge_pmd_numa_page(&vmf, orig_pmd);
if (dirty && !pmd_write(orig_pmd)) {
if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) {
ret = wp_huge_pmd(&vmf, orig_pmd);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
@ -4336,7 +4336,7 @@ int follow_phys(struct vm_area_struct *vma,
goto out;
pte = *ptep;
if ((flags & FOLL_WRITE) && !pte_write(pte))
if (!pte_access_permitted(pte, flags & FOLL_WRITE))
goto unlock;
*prot = pgprot_val(pte_pgprot(pte));

View File

@ -2555,9 +2555,11 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *new;
int err;
if (is_vm_hugetlb_page(vma) && (addr &
~(huge_page_mask(hstate_vma(vma)))))
return -EINVAL;
if (vma->vm_ops && vma->vm_ops->split) {
err = vma->vm_ops->split(vma, addr);
if (err)
return err;
}
new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!new)

View File

@ -550,7 +550,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
*/
set_bit(MMF_UNSTABLE, &mm->flags);
tlb_gather_mmu(&tlb, mm, 0, -1);
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (!can_madv_dontneed_vma(vma))
continue;
@ -565,11 +564,13 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
* we do not want to block exit_mmap by keeping mm ref
* count elevated without a good reason.
*/
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED))
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end);
unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
NULL);
tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end);
}
}
tlb_finish_mmu(&tlb, 0, -1);
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)),

View File

@ -433,11 +433,8 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
else
bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
if (unlikely(bg_thresh >= thresh)) {
pr_warn("vm direct limit must be set greater than background limit.\n");
if (bg_thresh >= thresh)
bg_thresh = thresh / 2;
}
tsk = current;
if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;

View File

@ -2507,10 +2507,6 @@ void drain_all_pages(struct zone *zone)
if (WARN_ON_ONCE(!mm_percpu_wq))
return;
/* Workqueues cannot recurse */
if (current->flags & PF_WQ_WORKER)
return;
/*
* Do not drain if one is already in progress unless it's specific to
* a zone. Such callers are primarily CMA and memory hotplug and need
@ -7656,11 +7652,18 @@ int alloc_contig_range(unsigned long start, unsigned long end,
/*
* In case of -EBUSY, we'd like to know which page causes problem.
* So, just fall through. We will check it in test_pages_isolated().
* So, just fall through. test_pages_isolated() has a tracepoint
* which will report the busy page.
*
* It is possible that busy pages could become available before
* the call to test_pages_isolated, and the range will actually be
* allocated. So, if we fall through be sure to clear ret so that
* -EBUSY is not accidentally used or returned to caller.
*/
ret = __alloc_contig_migrate_range(&cc, start, end);
if (ret && ret != -EBUSY)
goto done;
ret =0;
/*
* Pages from [start, end) are within a MAX_ORDER_NR_PAGES

View File

@ -83,8 +83,11 @@ def print_result(symboltype, symbolformat, argc):
for d, n in delta:
if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d))
print("Total: Before=%d, After=%d, chg %+.2f%%" % \
(otot, ntot, (ntot - otot)*100.0/otot))
if otot:
percent = (ntot - otot) * 100.0 / otot
else:
percent = 0
print("Total: Before=%d, After=%d, chg %+.2f%%" % (otot, ntot, percent))
if sys.argv[1] == "-c":
print_result("Function", "tT", 3)

View File

@ -44,9 +44,16 @@
set -o errexit
set -o nounset
READELF="${CROSS_COMPILE}readelf"
ADDR2LINE="${CROSS_COMPILE}addr2line"
SIZE="${CROSS_COMPILE}size"
NM="${CROSS_COMPILE}nm"
command -v awk >/dev/null 2>&1 || die "awk isn't installed"
command -v readelf >/dev/null 2>&1 || die "readelf isn't installed"
command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed"
command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed"
command -v ${ADDR2LINE} >/dev/null 2>&1 || die "addr2line isn't installed"
command -v ${SIZE} >/dev/null 2>&1 || die "size isn't installed"
command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed"
usage() {
echo "usage: faddr2line <object file> <func+offset> <func+offset>..." >&2
@ -69,10 +76,10 @@ die() {
find_dir_prefix() {
local objfile=$1
local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}')
local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}')
[[ -z $start_kernel_addr ]] && return
local file_line=$(addr2line -e $objfile $start_kernel_addr)
local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr)
[[ -z $file_line ]] && return
local prefix=${file_line%init/main.c:*}
@ -104,7 +111,7 @@ __faddr2line() {
# Go through each of the object's symbols which match the func name.
# In rare cases there might be duplicates.
file_end=$(size -Ax $objfile | awk '$1 == ".text" {print $2}')
file_end=$(${SIZE} -Ax $objfile | awk '$1 == ".text" {print $2}')
while read symbol; do
local fields=($symbol)
local sym_base=0x${fields[0]}
@ -156,10 +163,10 @@ __faddr2line() {
# pass real address to addr2line
echo "$func+$offset/$sym_size:"
addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;"
${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;"
DONE=1
done < <(nm -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }')
done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }')
}
[[ $# -lt 2 ]] && usage