Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton: "4 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm: thp: calculate the mapcount correctly for THP pages during WP faults ksm: fix conflict between mmput and scan_get_next_rmap_item ocfs2: fix posix_acl_create deadlock ocfs2: revert using ocfs2_acl_chmod to avoid inode cluster lock hang
This commit is contained in:
commit
a2ccb68b1e
|
@ -322,3 +322,90 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
|
||||||
brelse(di_bh);
|
brelse(di_bh);
|
||||||
return acl;
|
return acl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh)
|
||||||
|
{
|
||||||
|
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||||
|
struct posix_acl *acl;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (S_ISLNK(inode->i_mode))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh);
|
||||||
|
if (IS_ERR(acl) || !acl)
|
||||||
|
return PTR_ERR(acl);
|
||||||
|
ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
|
||||||
|
acl, NULL, NULL);
|
||||||
|
posix_acl_release(acl);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize the ACLs of a new inode. If parent directory has default ACL,
|
||||||
|
* then clone to new inode. Called from ocfs2_mknod.
|
||||||
|
*/
|
||||||
|
int ocfs2_init_acl(handle_t *handle,
|
||||||
|
struct inode *inode,
|
||||||
|
struct inode *dir,
|
||||||
|
struct buffer_head *di_bh,
|
||||||
|
struct buffer_head *dir_bh,
|
||||||
|
struct ocfs2_alloc_context *meta_ac,
|
||||||
|
struct ocfs2_alloc_context *data_ac)
|
||||||
|
{
|
||||||
|
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||||
|
struct posix_acl *acl = NULL;
|
||||||
|
int ret = 0, ret2;
|
||||||
|
umode_t mode;
|
||||||
|
|
||||||
|
if (!S_ISLNK(inode->i_mode)) {
|
||||||
|
if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
|
||||||
|
acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
|
||||||
|
dir_bh);
|
||||||
|
if (IS_ERR(acl))
|
||||||
|
return PTR_ERR(acl);
|
||||||
|
}
|
||||||
|
if (!acl) {
|
||||||
|
mode = inode->i_mode & ~current_umask();
|
||||||
|
ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
|
||||||
|
if (ret) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
|
||||||
|
if (S_ISDIR(inode->i_mode)) {
|
||||||
|
ret = ocfs2_set_acl(handle, inode, di_bh,
|
||||||
|
ACL_TYPE_DEFAULT, acl,
|
||||||
|
meta_ac, data_ac);
|
||||||
|
if (ret)
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
mode = inode->i_mode;
|
||||||
|
ret = __posix_acl_create(&acl, GFP_NOFS, &mode);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
|
||||||
|
if (ret2) {
|
||||||
|
mlog_errno(ret2);
|
||||||
|
ret = ret2;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
if (ret > 0) {
|
||||||
|
ret = ocfs2_set_acl(handle, inode,
|
||||||
|
di_bh, ACL_TYPE_ACCESS,
|
||||||
|
acl, meta_ac, data_ac);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cleanup:
|
||||||
|
posix_acl_release(acl);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
|
@ -35,5 +35,10 @@ int ocfs2_set_acl(handle_t *handle,
|
||||||
struct posix_acl *acl,
|
struct posix_acl *acl,
|
||||||
struct ocfs2_alloc_context *meta_ac,
|
struct ocfs2_alloc_context *meta_ac,
|
||||||
struct ocfs2_alloc_context *data_ac);
|
struct ocfs2_alloc_context *data_ac);
|
||||||
|
extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *);
|
||||||
|
extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
|
||||||
|
struct buffer_head *, struct buffer_head *,
|
||||||
|
struct ocfs2_alloc_context *,
|
||||||
|
struct ocfs2_alloc_context *);
|
||||||
|
|
||||||
#endif /* OCFS2_ACL_H */
|
#endif /* OCFS2_ACL_H */
|
||||||
|
|
|
@ -1268,20 +1268,20 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
|
||||||
if (size_change)
|
if (size_change)
|
||||||
ocfs2_rw_unlock(inode, 1);
|
ocfs2_rw_unlock(inode, 1);
|
||||||
bail:
|
bail:
|
||||||
brelse(bh);
|
|
||||||
|
|
||||||
/* Release quota pointers in case we acquired them */
|
/* Release quota pointers in case we acquired them */
|
||||||
for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++)
|
for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++)
|
||||||
dqput(transfer_to[qtype]);
|
dqput(transfer_to[qtype]);
|
||||||
|
|
||||||
if (!status && attr->ia_valid & ATTR_MODE) {
|
if (!status && attr->ia_valid & ATTR_MODE) {
|
||||||
status = posix_acl_chmod(inode, inode->i_mode);
|
status = ocfs2_acl_chmod(inode, bh);
|
||||||
if (status < 0)
|
if (status < 0)
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
}
|
}
|
||||||
if (inode_locked)
|
if (inode_locked)
|
||||||
ocfs2_inode_unlock(inode, 1);
|
ocfs2_inode_unlock(inode, 1);
|
||||||
|
|
||||||
|
brelse(bh);
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -259,7 +259,6 @@ static int ocfs2_mknod(struct inode *dir,
|
||||||
struct ocfs2_dir_lookup_result lookup = { NULL, };
|
struct ocfs2_dir_lookup_result lookup = { NULL, };
|
||||||
sigset_t oldset;
|
sigset_t oldset;
|
||||||
int did_block_signals = 0;
|
int did_block_signals = 0;
|
||||||
struct posix_acl *default_acl = NULL, *acl = NULL;
|
|
||||||
struct ocfs2_dentry_lock *dl = NULL;
|
struct ocfs2_dentry_lock *dl = NULL;
|
||||||
|
|
||||||
trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
|
trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
|
||||||
|
@ -367,12 +366,6 @@ static int ocfs2_mknod(struct inode *dir,
|
||||||
goto leave;
|
goto leave;
|
||||||
}
|
}
|
||||||
|
|
||||||
status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
|
|
||||||
if (status) {
|
|
||||||
mlog_errno(status);
|
|
||||||
goto leave;
|
|
||||||
}
|
|
||||||
|
|
||||||
handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
|
handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
|
||||||
S_ISDIR(mode),
|
S_ISDIR(mode),
|
||||||
xattr_credits));
|
xattr_credits));
|
||||||
|
@ -421,16 +414,8 @@ static int ocfs2_mknod(struct inode *dir,
|
||||||
inc_nlink(dir);
|
inc_nlink(dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (default_acl) {
|
status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
|
||||||
status = ocfs2_set_acl(handle, inode, new_fe_bh,
|
|
||||||
ACL_TYPE_DEFAULT, default_acl,
|
|
||||||
meta_ac, data_ac);
|
meta_ac, data_ac);
|
||||||
}
|
|
||||||
if (!status && acl) {
|
|
||||||
status = ocfs2_set_acl(handle, inode, new_fe_bh,
|
|
||||||
ACL_TYPE_ACCESS, acl,
|
|
||||||
meta_ac, data_ac);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (status < 0) {
|
if (status < 0) {
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
|
@ -472,10 +457,6 @@ static int ocfs2_mknod(struct inode *dir,
|
||||||
d_instantiate(dentry, inode);
|
d_instantiate(dentry, inode);
|
||||||
status = 0;
|
status = 0;
|
||||||
leave:
|
leave:
|
||||||
if (default_acl)
|
|
||||||
posix_acl_release(default_acl);
|
|
||||||
if (acl)
|
|
||||||
posix_acl_release(acl);
|
|
||||||
if (status < 0 && did_quota_inode)
|
if (status < 0 && did_quota_inode)
|
||||||
dquot_free_inode(inode);
|
dquot_free_inode(inode);
|
||||||
if (handle)
|
if (handle)
|
||||||
|
|
|
@ -4248,20 +4248,12 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
|
||||||
struct inode *inode = d_inode(old_dentry);
|
struct inode *inode = d_inode(old_dentry);
|
||||||
struct buffer_head *old_bh = NULL;
|
struct buffer_head *old_bh = NULL;
|
||||||
struct inode *new_orphan_inode = NULL;
|
struct inode *new_orphan_inode = NULL;
|
||||||
struct posix_acl *default_acl, *acl;
|
|
||||||
umode_t mode;
|
|
||||||
|
|
||||||
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
|
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
mode = inode->i_mode;
|
|
||||||
error = posix_acl_create(dir, &mode, &default_acl, &acl);
|
|
||||||
if (error) {
|
|
||||||
mlog_errno(error);
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
error = ocfs2_create_inode_in_orphan(dir, mode,
|
error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
|
||||||
&new_orphan_inode);
|
&new_orphan_inode);
|
||||||
if (error) {
|
if (error) {
|
||||||
mlog_errno(error);
|
mlog_errno(error);
|
||||||
|
@ -4300,16 +4292,11 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
|
||||||
/* If the security isn't preserved, we need to re-initialize them. */
|
/* If the security isn't preserved, we need to re-initialize them. */
|
||||||
if (!preserve) {
|
if (!preserve) {
|
||||||
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
|
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
|
||||||
&new_dentry->d_name,
|
&new_dentry->d_name);
|
||||||
default_acl, acl);
|
|
||||||
if (error)
|
if (error)
|
||||||
mlog_errno(error);
|
mlog_errno(error);
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
if (default_acl)
|
|
||||||
posix_acl_release(default_acl);
|
|
||||||
if (acl)
|
|
||||||
posix_acl_release(acl);
|
|
||||||
if (!error) {
|
if (!error) {
|
||||||
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
|
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
|
||||||
new_dentry);
|
new_dentry);
|
||||||
|
|
|
@ -7216,12 +7216,10 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
|
||||||
*/
|
*/
|
||||||
int ocfs2_init_security_and_acl(struct inode *dir,
|
int ocfs2_init_security_and_acl(struct inode *dir,
|
||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
const struct qstr *qstr,
|
const struct qstr *qstr)
|
||||||
struct posix_acl *default_acl,
|
|
||||||
struct posix_acl *acl)
|
|
||||||
{
|
{
|
||||||
struct buffer_head *dir_bh = NULL;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
struct buffer_head *dir_bh = NULL;
|
||||||
|
|
||||||
ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
|
ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -7234,11 +7232,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
|
||||||
mlog_errno(ret);
|
mlog_errno(ret);
|
||||||
goto leave;
|
goto leave;
|
||||||
}
|
}
|
||||||
|
ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
|
||||||
if (!ret && default_acl)
|
if (ret)
|
||||||
ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
|
mlog_errno(ret);
|
||||||
if (!ret && acl)
|
|
||||||
ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS);
|
|
||||||
|
|
||||||
ocfs2_inode_unlock(dir, 0);
|
ocfs2_inode_unlock(dir, 0);
|
||||||
brelse(dir_bh);
|
brelse(dir_bh);
|
||||||
|
|
|
@ -94,7 +94,5 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
|
||||||
bool preserve_security);
|
bool preserve_security);
|
||||||
int ocfs2_init_security_and_acl(struct inode *dir,
|
int ocfs2_init_security_and_acl(struct inode *dir,
|
||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
const struct qstr *qstr,
|
const struct qstr *qstr);
|
||||||
struct posix_acl *default_acl,
|
|
||||||
struct posix_acl *acl);
|
|
||||||
#endif /* OCFS2_XATTR_H */
|
#endif /* OCFS2_XATTR_H */
|
||||||
|
|
|
@ -500,11 +500,20 @@ static inline int page_mapcount(struct page *page)
|
||||||
|
|
||||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
int total_mapcount(struct page *page);
|
int total_mapcount(struct page *page);
|
||||||
|
int page_trans_huge_mapcount(struct page *page, int *total_mapcount);
|
||||||
#else
|
#else
|
||||||
static inline int total_mapcount(struct page *page)
|
static inline int total_mapcount(struct page *page)
|
||||||
{
|
{
|
||||||
return page_mapcount(page);
|
return page_mapcount(page);
|
||||||
}
|
}
|
||||||
|
static inline int page_trans_huge_mapcount(struct page *page,
|
||||||
|
int *total_mapcount)
|
||||||
|
{
|
||||||
|
int mapcount = page_mapcount(page);
|
||||||
|
if (total_mapcount)
|
||||||
|
*total_mapcount = mapcount;
|
||||||
|
return mapcount;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline struct page *virt_to_head_page(const void *x)
|
static inline struct page *virt_to_head_page(const void *x)
|
||||||
|
|
|
@ -418,7 +418,7 @@ extern sector_t swapdev_block(int, pgoff_t);
|
||||||
extern int page_swapcount(struct page *);
|
extern int page_swapcount(struct page *);
|
||||||
extern int swp_swapcount(swp_entry_t entry);
|
extern int swp_swapcount(swp_entry_t entry);
|
||||||
extern struct swap_info_struct *page_swap_info(struct page *);
|
extern struct swap_info_struct *page_swap_info(struct page *);
|
||||||
extern int reuse_swap_page(struct page *);
|
extern bool reuse_swap_page(struct page *, int *);
|
||||||
extern int try_to_free_swap(struct page *);
|
extern int try_to_free_swap(struct page *);
|
||||||
struct backing_dev_info;
|
struct backing_dev_info;
|
||||||
|
|
||||||
|
@ -513,8 +513,8 @@ static inline int swp_swapcount(swp_entry_t entry)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define reuse_swap_page(page) \
|
#define reuse_swap_page(page, total_mapcount) \
|
||||||
(!PageTransCompound(page) && page_mapcount(page) == 1)
|
(page_trans_huge_mapcount(page, total_mapcount) == 1)
|
||||||
|
|
||||||
static inline int try_to_free_swap(struct page *page)
|
static inline int try_to_free_swap(struct page *page)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1298,15 +1298,9 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
|
VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
|
||||||
/*
|
/*
|
||||||
* We can only reuse the page if nobody else maps the huge page or it's
|
* We can only reuse the page if nobody else maps the huge page or it's
|
||||||
* part. We can do it by checking page_mapcount() on each sub-page, but
|
* part.
|
||||||
* it's expensive.
|
|
||||||
* The cheaper way is to check page_count() to be equal 1: every
|
|
||||||
* mapcount takes page reference reference, so this way we can
|
|
||||||
* guarantee, that the PMD is the only mapping.
|
|
||||||
* This can give false negative if somebody pinned the page, but that's
|
|
||||||
* fine.
|
|
||||||
*/
|
*/
|
||||||
if (page_mapcount(page) == 1 && page_count(page) == 1) {
|
if (page_trans_huge_mapcount(page, NULL) == 1) {
|
||||||
pmd_t entry;
|
pmd_t entry;
|
||||||
entry = pmd_mkyoung(orig_pmd);
|
entry = pmd_mkyoung(orig_pmd);
|
||||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||||
|
@ -2079,7 +2073,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
|
||||||
if (pte_write(pteval)) {
|
if (pte_write(pteval)) {
|
||||||
writable = true;
|
writable = true;
|
||||||
} else {
|
} else {
|
||||||
if (PageSwapCache(page) && !reuse_swap_page(page)) {
|
if (PageSwapCache(page) &&
|
||||||
|
!reuse_swap_page(page, NULL)) {
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
result = SCAN_SWAP_CACHE_PAGE;
|
result = SCAN_SWAP_CACHE_PAGE;
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -3222,6 +3217,64 @@ int total_mapcount(struct page *page)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This calculates accurately how many mappings a transparent hugepage
|
||||||
|
* has (unlike page_mapcount() which isn't fully accurate). This full
|
||||||
|
* accuracy is primarily needed to know if copy-on-write faults can
|
||||||
|
* reuse the page and change the mapping to read-write instead of
|
||||||
|
* copying them. At the same time this returns the total_mapcount too.
|
||||||
|
*
|
||||||
|
* The function returns the highest mapcount any one of the subpages
|
||||||
|
* has. If the return value is one, even if different processes are
|
||||||
|
* mapping different subpages of the transparent hugepage, they can
|
||||||
|
* all reuse it, because each process is reusing a different subpage.
|
||||||
|
*
|
||||||
|
* The total_mapcount is instead counting all virtual mappings of the
|
||||||
|
* subpages. If the total_mapcount is equal to "one", it tells the
|
||||||
|
* caller all mappings belong to the same "mm" and in turn the
|
||||||
|
* anon_vma of the transparent hugepage can become the vma->anon_vma
|
||||||
|
* local one as no other process may be mapping any of the subpages.
|
||||||
|
*
|
||||||
|
* It would be more accurate to replace page_mapcount() with
|
||||||
|
* page_trans_huge_mapcount(), however we only use
|
||||||
|
* page_trans_huge_mapcount() in the copy-on-write faults where we
|
||||||
|
* need full accuracy to avoid breaking page pinning, because
|
||||||
|
* page_trans_huge_mapcount() is slower than page_mapcount().
|
||||||
|
*/
|
||||||
|
int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
|
||||||
|
{
|
||||||
|
int i, ret, _total_mapcount, mapcount;
|
||||||
|
|
||||||
|
/* hugetlbfs shouldn't call it */
|
||||||
|
VM_BUG_ON_PAGE(PageHuge(page), page);
|
||||||
|
|
||||||
|
if (likely(!PageTransCompound(page))) {
|
||||||
|
mapcount = atomic_read(&page->_mapcount) + 1;
|
||||||
|
if (total_mapcount)
|
||||||
|
*total_mapcount = mapcount;
|
||||||
|
return mapcount;
|
||||||
|
}
|
||||||
|
|
||||||
|
page = compound_head(page);
|
||||||
|
|
||||||
|
_total_mapcount = ret = 0;
|
||||||
|
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
||||||
|
mapcount = atomic_read(&page[i]._mapcount) + 1;
|
||||||
|
ret = max(ret, mapcount);
|
||||||
|
_total_mapcount += mapcount;
|
||||||
|
}
|
||||||
|
if (PageDoubleMap(page)) {
|
||||||
|
ret -= 1;
|
||||||
|
_total_mapcount -= HPAGE_PMD_NR;
|
||||||
|
}
|
||||||
|
mapcount = compound_mapcount(page);
|
||||||
|
ret += mapcount;
|
||||||
|
_total_mapcount += mapcount;
|
||||||
|
if (total_mapcount)
|
||||||
|
*total_mapcount = _total_mapcount;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function splits huge page into normal pages. @page can point to any
|
* This function splits huge page into normal pages. @page can point to any
|
||||||
* subpage of huge page to split. Split doesn't change the position of @page.
|
* subpage of huge page to split. Split doesn't change the position of @page.
|
||||||
|
|
15
mm/ksm.c
15
mm/ksm.c
|
@ -783,6 +783,7 @@ static int unmerge_and_remove_all_rmap_items(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
|
remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
|
||||||
|
up_read(&mm->mmap_sem);
|
||||||
|
|
||||||
spin_lock(&ksm_mmlist_lock);
|
spin_lock(&ksm_mmlist_lock);
|
||||||
ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
|
ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
|
||||||
|
@ -794,12 +795,9 @@ static int unmerge_and_remove_all_rmap_items(void)
|
||||||
|
|
||||||
free_mm_slot(mm_slot);
|
free_mm_slot(mm_slot);
|
||||||
clear_bit(MMF_VM_MERGEABLE, &mm->flags);
|
clear_bit(MMF_VM_MERGEABLE, &mm->flags);
|
||||||
up_read(&mm->mmap_sem);
|
|
||||||
mmdrop(mm);
|
mmdrop(mm);
|
||||||
} else {
|
} else
|
||||||
spin_unlock(&ksm_mmlist_lock);
|
spin_unlock(&ksm_mmlist_lock);
|
||||||
up_read(&mm->mmap_sem);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Clean up stable nodes, but don't worry if some are still busy */
|
/* Clean up stable nodes, but don't worry if some are still busy */
|
||||||
|
@ -1663,8 +1661,15 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page)
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
mmdrop(mm);
|
mmdrop(mm);
|
||||||
} else {
|
} else {
|
||||||
spin_unlock(&ksm_mmlist_lock);
|
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
|
/*
|
||||||
|
* up_read(&mm->mmap_sem) first because after
|
||||||
|
* spin_unlock(&ksm_mmlist_lock) run, the "mm" may
|
||||||
|
* already have been freed under us by __ksm_exit()
|
||||||
|
* because the "mm_slot" is still hashed and
|
||||||
|
* ksm_scan.mm_slot doesn't point to it anymore.
|
||||||
|
*/
|
||||||
|
spin_unlock(&ksm_mmlist_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Repeat until we've completed scanning the whole list */
|
/* Repeat until we've completed scanning the whole list */
|
||||||
|
|
18
mm/memory.c
18
mm/memory.c
|
@ -2373,6 +2373,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
* not dirty accountable.
|
* not dirty accountable.
|
||||||
*/
|
*/
|
||||||
if (PageAnon(old_page) && !PageKsm(old_page)) {
|
if (PageAnon(old_page) && !PageKsm(old_page)) {
|
||||||
|
int total_mapcount;
|
||||||
if (!trylock_page(old_page)) {
|
if (!trylock_page(old_page)) {
|
||||||
get_page(old_page);
|
get_page(old_page);
|
||||||
pte_unmap_unlock(page_table, ptl);
|
pte_unmap_unlock(page_table, ptl);
|
||||||
|
@ -2387,13 +2388,18 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
}
|
}
|
||||||
put_page(old_page);
|
put_page(old_page);
|
||||||
}
|
}
|
||||||
if (reuse_swap_page(old_page)) {
|
if (reuse_swap_page(old_page, &total_mapcount)) {
|
||||||
|
if (total_mapcount == 1) {
|
||||||
/*
|
/*
|
||||||
* The page is all ours. Move it to our anon_vma so
|
* The page is all ours. Move it to
|
||||||
* the rmap code will not search our parent or siblings.
|
* our anon_vma so the rmap code will
|
||||||
* Protected against the rmap code by the page lock.
|
* not search our parent or siblings.
|
||||||
|
* Protected against the rmap code by
|
||||||
|
* the page lock.
|
||||||
*/
|
*/
|
||||||
page_move_anon_rmap(old_page, vma, address);
|
page_move_anon_rmap(compound_head(old_page),
|
||||||
|
vma, address);
|
||||||
|
}
|
||||||
unlock_page(old_page);
|
unlock_page(old_page);
|
||||||
return wp_page_reuse(mm, vma, address, page_table, ptl,
|
return wp_page_reuse(mm, vma, address, page_table, ptl,
|
||||||
orig_pte, old_page, 0, 0);
|
orig_pte, old_page, 0, 0);
|
||||||
|
@ -2617,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
inc_mm_counter_fast(mm, MM_ANONPAGES);
|
inc_mm_counter_fast(mm, MM_ANONPAGES);
|
||||||
dec_mm_counter_fast(mm, MM_SWAPENTS);
|
dec_mm_counter_fast(mm, MM_SWAPENTS);
|
||||||
pte = mk_pte(page, vma->vm_page_prot);
|
pte = mk_pte(page, vma->vm_page_prot);
|
||||||
if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
|
if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
|
||||||
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
|
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
|
||||||
flags &= ~FAULT_FLAG_WRITE;
|
flags &= ~FAULT_FLAG_WRITE;
|
||||||
ret |= VM_FAULT_WRITE;
|
ret |= VM_FAULT_WRITE;
|
||||||
|
|
|
@ -922,18 +922,19 @@ int swp_swapcount(swp_entry_t entry)
|
||||||
* to it. And as a side-effect, free up its swap: because the old content
|
* to it. And as a side-effect, free up its swap: because the old content
|
||||||
* on disk will never be read, and seeking back there to write new content
|
* on disk will never be read, and seeking back there to write new content
|
||||||
* later would only waste time away from clustering.
|
* later would only waste time away from clustering.
|
||||||
|
*
|
||||||
|
* NOTE: total_mapcount should not be relied upon by the caller if
|
||||||
|
* reuse_swap_page() returns false, but it may be always overwritten
|
||||||
|
* (see the other implementation for CONFIG_SWAP=n).
|
||||||
*/
|
*/
|
||||||
int reuse_swap_page(struct page *page)
|
bool reuse_swap_page(struct page *page, int *total_mapcount)
|
||||||
{
|
{
|
||||||
int count;
|
int count;
|
||||||
|
|
||||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||||
if (unlikely(PageKsm(page)))
|
if (unlikely(PageKsm(page)))
|
||||||
return 0;
|
return false;
|
||||||
/* The page is part of THP and cannot be reused */
|
count = page_trans_huge_mapcount(page, total_mapcount);
|
||||||
if (PageTransCompound(page))
|
|
||||||
return 0;
|
|
||||||
count = page_mapcount(page);
|
|
||||||
if (count <= 1 && PageSwapCache(page)) {
|
if (count <= 1 && PageSwapCache(page)) {
|
||||||
count += page_swapcount(page);
|
count += page_swapcount(page);
|
||||||
if (count == 1 && !PageWriteback(page)) {
|
if (count == 1 && !PageWriteback(page)) {
|
||||||
|
|
Loading…
Reference in New Issue