Merge branch 'akpm' (patches from Andrew Morton)

Merge a bunch of fixes from Andrew Morton.

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  fs/proc/task_mmu.c: fix buffer overflow in add_page_map()
  arch: *: Kconfig: add "kernel/Kconfig.freezer" to "arch/*/Kconfig"
  ocfs2: fix null pointer dereference in ocfs2_dir_foreach_blk_id()
  x86 get_unmapped_area(): use proper mmap base for bottom-up direction
  ocfs2: fix NULL pointer dereference in ocfs2_duplicate_clusters_by_page
  ocfs2: Revert 40bd62e to avoid regression in extended allocation
  drivers/rtc/rtc-stmp3xxx.c: provide timeout for potentially endless loop polling a HW bit
  hugetlb: fix lockdep splat caused by pmd sharing
  aoe: adjust ref of head for compound page tails
  microblaze: fix clone syscall
  mm: save soft-dirty bits on file pages
  mm: save soft-dirty bits on swapped pages
  memcg: don't initialize kmem-cache destroying work for root caches
This commit is contained in:
Linus Torvalds 2013-08-14 10:04:43 -07:00
commit f1d6e17f54
32 changed files with 283 additions and 106 deletions

View File

@ -407,6 +407,12 @@ config CLONE_BACKWARDS2
help
Architecture has the first two arguments of clone(2) swapped.
config CLONE_BACKWARDS3
bool
help
Architecture has tls passed as the 3rd argument of clone(2),
not the 5th one.
config ODD_RT_SIGACTION
bool
help

View File

@ -158,6 +158,7 @@ source "kernel/Kconfig.hz"
endmenu
source "init/Kconfig"
source "kernel/Kconfig.freezer"
source "drivers/Kconfig"
source "fs/Kconfig"

View File

@ -28,7 +28,7 @@ config MICROBLAZE
select GENERIC_CLOCKEVENTS
select GENERIC_IDLE_POLL_SETUP
select MODULES_USE_ELF_RELA
select CLONE_BACKWARDS
select CLONE_BACKWARDS3
config SWAP
def_bool n

View File

@ -55,6 +55,7 @@ config GENERIC_CSUM
source "init/Kconfig"
source "kernel/Kconfig.freezer"
menu "Processor type and features"

View File

@ -87,6 +87,8 @@ config STACKTRACE_SUPPORT
source "init/Kconfig"
source "kernel/Kconfig.freezer"
config MMU
def_bool y

View File

@ -55,9 +55,53 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif
#ifdef CONFIG_MEM_SOFT_DIRTY
/*
* Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and
* _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset
* into this range.
*/
#define PTE_FILE_MAX_BITS 28
#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
#define PTE_FILE_SHIFT4 (_PAGE_BIT_SOFT_DIRTY + 1)
#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
#define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1)
#define pte_to_pgoff(pte) \
((((pte).pte_low >> (PTE_FILE_SHIFT1)) \
& ((1U << PTE_FILE_BITS1) - 1))) \
+ ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \
& ((1U << PTE_FILE_BITS2) - 1)) \
<< (PTE_FILE_BITS1)) \
+ ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \
& ((1U << PTE_FILE_BITS3) - 1)) \
<< (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
+ ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \
<< (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))
#define pgoff_to_pte(off) \
((pte_t) { .pte_low = \
((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \
+ ((((off) >> PTE_FILE_BITS1) \
& ((1U << PTE_FILE_BITS2) - 1)) \
<< PTE_FILE_SHIFT2) \
+ ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
& ((1U << PTE_FILE_BITS3) - 1)) \
<< PTE_FILE_SHIFT3) \
+ ((((off) >> \
(PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \
<< PTE_FILE_SHIFT4) \
+ _PAGE_FILE })
#else /* CONFIG_MEM_SOFT_DIRTY */
/*
* Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
* split up the 29 bits of offset into this range:
* split up the 29 bits of offset into this range.
*/
#define PTE_FILE_MAX_BITS 29
#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
@ -88,6 +132,8 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
<< PTE_FILE_SHIFT3) \
+ _PAGE_FILE })
#endif /* CONFIG_MEM_SOFT_DIRTY */
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)

View File

@ -179,6 +179,9 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
/*
* Bits 0, 6 and 7 are taken in the low part of the pte,
* put the 32 bits of offset into the high part.
*
* For soft-dirty tracking 11 bit is taken from
* the low part of pte as well.
*/
#define pte_to_pgoff(pte) ((pte).pte_high)
#define pgoff_to_pte(off) \

View File

@ -314,6 +314,36 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
}
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
static inline int pte_swp_soft_dirty(pte_t pte)
{
return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
}
static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
}
static inline pte_t pte_file_mksoft_dirty(pte_t pte)
{
return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
}
static inline int pte_file_soft_dirty(pte_t pte)
{
return pte_flags(pte) & _PAGE_SOFT_DIRTY;
}
/*
* Mask out unsupported bits in a present pgprot. Non-present pgprots
* can use those bits for other purposes, so leave them be.

View File

@ -61,12 +61,27 @@
* they do not conflict with each other.
*/
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_HIDDEN
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
#else
#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0))
#endif
/*
* Tracking soft dirty bit when a page goes to a swap is tricky.
* We need a bit which can be stored in pte _and_ not conflict
* with swap entry format. On x86 bits 6 and 7 are *not* involved
* into swap entry computation, but bit 6 is used for nonlinear
* file mapping, so we borrow bit 7 for soft dirty tracking.
*/
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE
#else
#define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0))
#endif
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
#else

View File

@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
*begin = new_begin;
}
} else {
*begin = TASK_UNMAPPED_BASE;
*begin = mmap_legacy_base();
*end = TASK_SIZE;
}
}

View File

@ -98,7 +98,7 @@ static unsigned long mmap_base(void)
* Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
* does, but not when emulating X86_32
*/
static unsigned long mmap_legacy_base(void)
unsigned long mmap_legacy_base(void)
{
if (mmap_is_ia32())
return TASK_UNMAPPED_BASE;

View File

@ -906,16 +906,10 @@ bio_pageinc(struct bio *bio)
int i;
bio_for_each_segment(bv, bio, i) {
page = bv->bv_page;
/* Non-zero page count for non-head members of
* compound pages is no longer allowed by the kernel,
* but this has never been seen here.
* compound pages is no longer allowed by the kernel.
*/
if (unlikely(PageCompound(page)))
if (compound_trans_head(page) != page) {
pr_crit("page tail used for block I/O\n");
BUG();
}
page = compound_trans_head(bv->bv_page);
atomic_inc(&page->_count);
}
}
@ -924,10 +918,13 @@ static void
bio_pagedec(struct bio *bio)
{
struct bio_vec *bv;
struct page *page;
int i;
bio_for_each_segment(bv, bio, i)
atomic_dec(&bv->bv_page->_count);
bio_for_each_segment(bv, bio, i) {
page = compound_trans_head(bv->bv_page);
atomic_dec(&page->_count);
}
}
static void

View File

@ -23,6 +23,7 @@
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/rtc.h>
#include <linux/slab.h>
#include <linux/of_device.h>
@ -119,24 +120,39 @@ static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev)
}
#endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */
static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data)
static int stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data)
{
int timeout = 5000; /* 3ms according to i.MX28 Ref Manual */
/*
* The datasheet doesn't say which way round the
* NEW_REGS/STALE_REGS bitfields go. In fact it's 0x1=P0,
* 0x2=P1, .., 0x20=P5, 0x40=ALARM, 0x80=SECONDS
* The i.MX28 Applications Processor Reference Manual, Rev. 1, 2010
* states:
* | The order in which registers are updated is
* | Persistent 0, 1, 2, 3, 4, 5, Alarm, Seconds.
* | (This list is in bitfield order, from LSB to MSB, as they would
* | appear in the STALE_REGS and NEW_REGS bitfields of the HW_RTC_STAT
* | register. For example, the Seconds register corresponds to
* | STALE_REGS or NEW_REGS containing 0x80.)
*/
while (readl(rtc_data->io + STMP3XXX_RTC_STAT) &
(0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT))
cpu_relax();
do {
if (!(readl(rtc_data->io + STMP3XXX_RTC_STAT) &
(0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)))
return 0;
udelay(1);
} while (--timeout > 0);
return (readl(rtc_data->io + STMP3XXX_RTC_STAT) &
(0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)) ? -ETIME : 0;
}
/* Time read/write */
static int stmp3xxx_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
{
int ret;
struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
stmp3xxx_wait_time(rtc_data);
ret = stmp3xxx_wait_time(rtc_data);
if (ret)
return ret;
rtc_time_to_tm(readl(rtc_data->io + STMP3XXX_RTC_SECONDS), rtc_tm);
return 0;
}
@ -146,8 +162,7 @@ static int stmp3xxx_rtc_set_mmss(struct device *dev, unsigned long t)
struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
writel(t, rtc_data->io + STMP3XXX_RTC_SECONDS);
stmp3xxx_wait_time(rtc_data);
return 0;
return stmp3xxx_wait_time(rtc_data);
}
/* interrupt(s) handler */

View File

@ -463,6 +463,14 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
return inode;
}
/*
* Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never
* be taken from reclaim -- unlike regular filesystems. This needs an
* annotation because huge_pmd_share() does an allocation under
* i_mmap_mutex.
*/
struct lock_class_key hugetlbfs_i_mmap_mutex_key;
static struct inode *hugetlbfs_get_inode(struct super_block *sb,
struct inode *dir,
umode_t mode, dev_t dev)
@ -474,6 +482,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
struct hugetlbfs_inode_info *info;
inode->i_ino = get_next_ino();
inode_init_owner(inode, dir, mode);
lockdep_set_class(&inode->i_mapping->i_mmap_mutex,
&hugetlbfs_i_mmap_mutex_key);
inode->i_mapping->a_ops = &hugetlbfs_aops;
inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;

View File

@ -1757,7 +1757,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
goto out;
} else if (ret == 1) {
clusters_need = wc->w_clen;
ret = ocfs2_refcount_cow(inode, filp, di_bh,
ret = ocfs2_refcount_cow(inode, di_bh,
wc->w_cpos, wc->w_clen, UINT_MAX);
if (ret) {
mlog_errno(ret);

View File

@ -2153,11 +2153,9 @@ int ocfs2_empty_dir(struct inode *inode)
{
int ret;
struct ocfs2_empty_dir_priv priv = {
.ctx.actor = ocfs2_empty_dir_filldir
.ctx.actor = ocfs2_empty_dir_filldir,
};
memset(&priv, 0, sizeof(priv));
if (ocfs2_dir_indexed(inode)) {
ret = ocfs2_empty_dir_dx(inode, &priv);
if (ret)

View File

@ -370,7 +370,7 @@ static int ocfs2_cow_file_pos(struct inode *inode,
if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
goto out;
return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1);
return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
out:
return status;
@ -899,7 +899,7 @@ static int ocfs2_zero_extend_get_range(struct inode *inode,
zero_clusters = last_cpos - zero_cpos;
if (needs_cow) {
rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos,
rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos,
zero_clusters, UINT_MAX);
if (rc) {
mlog_errno(rc);
@ -2078,7 +2078,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
*meta_level = 1;
ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX);
ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
if (ret)
mlog_errno(ret);
out:

View File

@ -537,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
ocfs2_quota_trans_credits(sb) + bits_wanted;
ocfs2_quota_trans_credits(sb);
}
static inline int ocfs2_calc_symlink_credits(struct super_block *sb)

View File

@ -69,7 +69,7 @@ static int __ocfs2_move_extent(handle_t *handle,
u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci);
u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos);
ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos,
ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos,
p_cpos, new_p_cpos, len);
if (ret) {
mlog_errno(ret);

View File

@ -49,7 +49,6 @@
struct ocfs2_cow_context {
struct inode *inode;
struct file *file;
u32 cow_start;
u32 cow_len;
struct ocfs2_extent_tree data_et;
@ -66,7 +65,7 @@ struct ocfs2_cow_context {
u32 *num_clusters,
unsigned int *extent_flags);
int (*cow_duplicate_clusters)(handle_t *handle,
struct file *file,
struct inode *inode,
u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len);
};
@ -2922,14 +2921,12 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
}
int ocfs2_duplicate_clusters_by_page(handle_t *handle,
struct file *file,
struct inode *inode,
u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len)
{
int ret = 0, partial;
struct inode *inode = file_inode(file);
struct ocfs2_caching_info *ci = INODE_CACHE(inode);
struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
struct super_block *sb = inode->i_sb;
u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
struct page *page;
pgoff_t page_index;
@ -2978,13 +2975,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
BUG_ON(PageDirty(page));
if (PageReadahead(page)) {
page_cache_async_readahead(mapping,
&file->f_ra, file,
page, page_index,
readahead_pages);
}
if (!PageUptodate(page)) {
ret = block_read_full_page(page, ocfs2_get_block);
if (ret) {
@ -3004,7 +2994,8 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
}
}
ocfs2_map_and_dirty_page(inode, handle, from, to,
ocfs2_map_and_dirty_page(inode,
handle, from, to,
page, 0, &new_block);
mark_page_accessed(page);
unlock:
@ -3020,12 +3011,11 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
}
int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
struct file *file,
struct inode *inode,
u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len)
{
int ret = 0;
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct ocfs2_caching_info *ci = INODE_CACHE(inode);
int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
@ -3150,7 +3140,7 @@ static int ocfs2_replace_clusters(handle_t *handle,
/*If the old clusters is unwritten, no need to duplicate. */
if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
ret = context->cow_duplicate_clusters(handle, context->file,
ret = context->cow_duplicate_clusters(handle, context->inode,
cpos, old, new, len);
if (ret) {
mlog_errno(ret);
@ -3428,35 +3418,12 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
return ret;
}
static void ocfs2_readahead_for_cow(struct inode *inode,
struct file *file,
u32 start, u32 len)
{
struct address_space *mapping;
pgoff_t index;
unsigned long num_pages;
int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
if (!file)
return;
mapping = file->f_mapping;
num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT;
if (!num_pages)
num_pages = 1;
index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT;
page_cache_sync_readahead(mapping, &file->f_ra, file,
index, num_pages);
}
/*
* Starting at cpos, try to CoW write_len clusters. Don't CoW
* past max_cpos. This will stop when it runs into a hole or an
* unrefcounted extent.
*/
static int ocfs2_refcount_cow_hunk(struct inode *inode,
struct file *file,
struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos)
{
@ -3485,8 +3452,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
BUG_ON(cow_len == 0);
ocfs2_readahead_for_cow(inode, file, cow_start, cow_len);
context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
if (!context) {
ret = -ENOMEM;
@ -3508,7 +3473,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
context->ref_root_bh = ref_root_bh;
context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
context->get_clusters = ocfs2_di_get_clusters;
context->file = file;
ocfs2_init_dinode_extent_tree(&context->data_et,
INODE_CACHE(inode), di_bh);
@ -3537,7 +3501,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
* clusters between cpos and cpos+write_len are safe to modify.
*/
int ocfs2_refcount_cow(struct inode *inode,
struct file *file,
struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos)
{
@ -3557,7 +3520,7 @@ int ocfs2_refcount_cow(struct inode *inode,
num_clusters = write_len;
if (ext_flags & OCFS2_EXT_REFCOUNTED) {
ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos,
ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
num_clusters, max_cpos);
if (ret) {
mlog_errno(ret);

View File

@ -53,7 +53,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
int *credits,
int *ref_blocks);
int ocfs2_refcount_cow(struct inode *inode,
struct file *filep, struct buffer_head *di_bh,
struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos);
typedef int (ocfs2_post_refcount_func)(struct inode *inode,
@ -85,11 +85,11 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
u32 cpos, u32 write_len,
struct ocfs2_post_refcount *post);
int ocfs2_duplicate_clusters_by_page(handle_t *handle,
struct file *file,
struct inode *inode,
u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len);
int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
struct file *file,
struct inode *inode,
u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len);
int ocfs2_cow_sync_writeback(struct super_block *sb,

View File

@ -730,8 +730,16 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
* of how soft-dirty works.
*/
pte_t ptent = *pte;
ptent = pte_wrprotect(ptent);
ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
if (pte_present(ptent)) {
ptent = pte_wrprotect(ptent);
ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
} else if (is_swap_pte(ptent)) {
ptent = pte_swp_clear_soft_dirty(ptent);
} else if (pte_file(ptent)) {
ptent = pte_file_clear_soft_dirty(ptent);
}
set_pte_at(vma->vm_mm, addr, pte, ptent);
#endif
}
@ -752,14 +760,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
if (!pte_present(ptent))
continue;
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty(vma, addr, pte);
continue;
}
if (!pte_present(ptent))
continue;
page = vm_normal_page(vma, addr, ptent);
if (!page)
continue;
@ -859,7 +868,7 @@ typedef struct {
} pagemap_entry_t;
struct pagemapread {
int pos, len;
int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
pagemap_entry_t *buffer;
bool v2;
};
@ -867,7 +876,7 @@ struct pagemapread {
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
#define PAGEMAP_WALK_MASK (PMD_MASK)
#define PM_ENTRY_BYTES sizeof(u64)
#define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
#define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
@ -930,8 +939,10 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
flags = PM_PRESENT;
page = vm_normal_page(vma, addr, pte);
} else if (is_swap_pte(pte)) {
swp_entry_t entry = pte_to_swp_entry(pte);
swp_entry_t entry;
if (pte_swp_soft_dirty(pte))
flags2 |= __PM_SOFT_DIRTY;
entry = pte_to_swp_entry(pte);
frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
flags = PM_SWAP;
@ -1116,8 +1127,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
goto out_task;
pm.v2 = soft_dirty_cleared;
pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
ret = -ENOMEM;
if (!pm.buffer)
goto out_task;

View File

@ -417,6 +417,36 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
{
return pmd;
}
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
return pte;
}
static inline int pte_swp_soft_dirty(pte_t pte)
{
return 0;
}
static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
return pte;
}
static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
{
return pte;
}
static inline pte_t pte_file_mksoft_dirty(pte_t pte)
{
return pte;
}
static inline int pte_file_soft_dirty(pte_t pte)
{
return 0;
}
#endif
#ifndef __HAVE_PFNMAP_TRACKING

View File

@ -314,6 +314,7 @@ struct nsproxy;
struct user_namespace;
#ifdef CONFIG_MMU
extern unsigned long mmap_legacy_base(void);
extern void arch_pick_mmap_layout(struct mm_struct *mm);
extern unsigned long
arch_get_unmapped_area(struct file *, unsigned long, unsigned long,

View File

@ -67,6 +67,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
swp_entry_t arch_entry;
BUG_ON(pte_file(pte));
if (pte_swp_soft_dirty(pte))
pte = pte_swp_clear_soft_dirty(pte);
arch_entry = __pte_to_swp_entry(pte);
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
}

View File

@ -802,9 +802,14 @@ asmlinkage long sys_vfork(void);
asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int,
int __user *);
#else
#ifdef CONFIG_CLONE_BACKWARDS3
asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *,
int __user *, int);
#else
asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
int __user *, int);
#endif
#endif
asmlinkage long sys_execve(const char __user *filename,
const char __user *const __user *argv,

View File

@ -1679,6 +1679,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
int __user *, parent_tidptr,
int __user *, child_tidptr,
int, tls_val)
#elif defined(CONFIG_CLONE_BACKWARDS3)
SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
int, stack_size,
int __user *, parent_tidptr,
int __user *, child_tidptr,
int, tls_val)
#else
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr,

View File

@ -57,17 +57,22 @@ static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long pgoff, pgprot_t prot)
{
int err = -ENOMEM;
pte_t *pte;
pte_t *pte, ptfile;
spinlock_t *ptl;
pte = get_locked_pte(mm, addr, &ptl);
if (!pte)
goto out;
if (!pte_none(*pte))
zap_pte(mm, vma, addr, pte);
ptfile = pgoff_to_pte(pgoff);
set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
if (!pte_none(*pte)) {
if (pte_present(*pte) && pte_soft_dirty(*pte))
pte_file_mksoft_dirty(ptfile);
zap_pte(mm, vma, addr, pte);
}
set_pte_at(mm, addr, pte, ptfile);
/*
* We don't need to run update_mmu_cache() here because the "file pte"
* being installed by install_file_pte() is not a real pte - it's a

View File

@ -3195,11 +3195,11 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
if (!s->memcg_params)
return -ENOMEM;
INIT_WORK(&s->memcg_params->destroy,
kmem_cache_destroy_work_func);
if (memcg) {
s->memcg_params->memcg = memcg;
s->memcg_params->root_cache = root_cache;
INIT_WORK(&s->memcg_params->destroy,
kmem_cache_destroy_work_func);
} else
s->memcg_params->is_root_cache = true;

View File

@ -1141,9 +1141,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
continue;
if (unlikely(details) && details->nonlinear_vma
&& linear_page_index(details->nonlinear_vma,
addr) != page->index)
set_pte_at(mm, addr, pte,
pgoff_to_pte(page->index));
addr) != page->index) {
pte_t ptfile = pgoff_to_pte(page->index);
if (pte_soft_dirty(ptent))
pte_file_mksoft_dirty(ptfile);
set_pte_at(mm, addr, pte, ptfile);
}
if (PageAnon(page))
rss[MM_ANONPAGES]--;
else {
@ -3115,6 +3118,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
exclusive = 1;
}
flush_icache_page(vma, page);
if (pte_swp_soft_dirty(orig_pte))
pte = pte_mksoft_dirty(pte);
set_pte_at(mm, address, page_table, pte);
if (page == swapcache)
do_page_add_anon_rmap(page, vma, address, exclusive);
@ -3408,6 +3413,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
entry = mk_pte(page, vma->vm_page_prot);
if (flags & FAULT_FLAG_WRITE)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
else if (pte_file(orig_pte) && pte_file_soft_dirty(orig_pte))
pte_mksoft_dirty(entry);
if (anon) {
inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);

View File

@ -1236,6 +1236,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
swp_entry_to_pte(make_hwpoison_entry(page)));
} else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(page) };
pte_t swp_pte;
if (PageSwapCache(page)) {
/*
@ -1264,7 +1265,10 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
entry = make_migration_entry(page, pte_write(pteval));
}
set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
set_pte_at(mm, address, pte, swp_pte);
BUG_ON(pte_file(*pte));
} else if (IS_ENABLED(CONFIG_MIGRATION) &&
(TTU_ACTION(flags) == TTU_MIGRATION)) {
@ -1401,8 +1405,12 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
pteval = ptep_clear_flush(vma, address, pte);
/* If nonlinear, store the file page offset in the pte. */
if (page->index != linear_page_index(vma, address))
set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
if (page->index != linear_page_index(vma, address)) {
pte_t ptfile = pgoff_to_pte(page->index);
if (pte_soft_dirty(pteval))
pte_file_mksoft_dirty(ptfile);
set_pte_at(mm, address, pte, ptfile);
}
/* Move the dirty bit to the physical page now the pte is gone. */
if (pte_dirty(pteval))

View File

@ -866,6 +866,21 @@ unsigned int count_swap_pages(int type, int free)
}
#endif /* CONFIG_HIBERNATION */
static inline int maybe_same_pte(pte_t pte, pte_t swp_pte)
{
#ifdef CONFIG_MEM_SOFT_DIRTY
/*
* When pte keeps soft dirty bit the pte generated
* from swap entry does not has it, still it's same
* pte from logical point of view.
*/
pte_t swp_pte_dirty = pte_swp_mksoft_dirty(swp_pte);
return pte_same(pte, swp_pte) || pte_same(pte, swp_pte_dirty);
#else
return pte_same(pte, swp_pte);
#endif
}
/*
* No need to decide whether this PTE shares the swap entry with others,
* just let do_wp_page work it out if a write is requested later - to
@ -892,7 +907,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
}
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
mem_cgroup_cancel_charge_swapin(memcg);
ret = 0;
goto out;
@ -947,7 +962,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
* swapoff spends a _lot_ of time in this loop!
* Test inline before going to call unuse_pte.
*/
if (unlikely(pte_same(*pte, swp_pte))) {
if (unlikely(maybe_same_pte(*pte, swp_pte))) {
pte_unmap(pte);
ret = unuse_pte(vma, pmd, addr, entry, page);
if (ret)