mirror of https://gitee.com/openkylin/linux.git
cgroup: saner refcounting for cgroup_root
* make the reference from superblock to cgroup_root counting - do cgroup_put() in cgroup_kill_sb() whether we'd done percpu_ref_kill() or not; matching grab is done when we allocate a new root. That gives the same refcounting rules for all callers of cgroup_do_mount() - a reference to cgroup_root has been grabbed by caller and it either is transferred to new superblock or dropped. * have cgroup_kill_sb() treat an already killed refcount as "just don't bother killing it, then". * after successful cgroup_do_mount() have cgroup1_mount() recheck if we'd raced with mount/umount from somebody else and cgroup_root got killed. In that case we drop the superblock and bugger off with -ERESTARTSYS, same as if we'd found it in the list already dying. * don't bother with delayed initialization of refcount - it's unreliable and not needed. No need to prevent attempts to bump the refcount if we find cgroup_root of another mount in progress - sget will reuse an existing superblock just fine and if the other sb manages to die before we get there, we'll catch that immediately after cgroup_do_mount(). * don't bother with kernfs_pin_sb() - no need for doing that either. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
399504e21a
commit
35ac118424
|
@ -198,7 +198,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
|
||||||
|
|
||||||
void cgroup_free_root(struct cgroup_root *root);
|
void cgroup_free_root(struct cgroup_root *root);
|
||||||
void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
|
void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
|
||||||
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags);
|
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
|
||||||
int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
|
int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
|
||||||
struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
|
struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
|
||||||
struct cgroup_root *root, unsigned long magic,
|
struct cgroup_root *root, unsigned long magic,
|
||||||
|
|
|
@ -1116,13 +1116,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
|
||||||
void *data, unsigned long magic,
|
void *data, unsigned long magic,
|
||||||
struct cgroup_namespace *ns)
|
struct cgroup_namespace *ns)
|
||||||
{
|
{
|
||||||
struct super_block *pinned_sb = NULL;
|
|
||||||
struct cgroup_sb_opts opts;
|
struct cgroup_sb_opts opts;
|
||||||
struct cgroup_root *root;
|
struct cgroup_root *root;
|
||||||
struct cgroup_subsys *ss;
|
struct cgroup_subsys *ss;
|
||||||
struct dentry *dentry;
|
struct dentry *dentry;
|
||||||
int i, ret;
|
int i, ret;
|
||||||
bool new_root = false;
|
|
||||||
|
|
||||||
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
|
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
|
||||||
|
|
||||||
|
@ -1184,29 +1182,6 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
|
||||||
if (root->flags ^ opts.flags)
|
if (root->flags ^ opts.flags)
|
||||||
pr_warn("new mount options do not match the existing superblock, will be ignored\n");
|
pr_warn("new mount options do not match the existing superblock, will be ignored\n");
|
||||||
|
|
||||||
/*
|
|
||||||
* We want to reuse @root whose lifetime is governed by its
|
|
||||||
* ->cgrp. Let's check whether @root is alive and keep it
|
|
||||||
* that way. As cgroup_kill_sb() can happen anytime, we
|
|
||||||
* want to block it by pinning the sb so that @root doesn't
|
|
||||||
* get killed before mount is complete.
|
|
||||||
*
|
|
||||||
* With the sb pinned, tryget_live can reliably indicate
|
|
||||||
* whether @root can be reused. If it's being killed,
|
|
||||||
* drain it. We can use wait_queue for the wait but this
|
|
||||||
* path is super cold. Let's just sleep a bit and retry.
|
|
||||||
*/
|
|
||||||
pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
|
|
||||||
if (IS_ERR(pinned_sb) ||
|
|
||||||
!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
|
|
||||||
mutex_unlock(&cgroup_mutex);
|
|
||||||
if (!IS_ERR_OR_NULL(pinned_sb))
|
|
||||||
deactivate_super(pinned_sb);
|
|
||||||
msleep(10);
|
|
||||||
ret = restart_syscall();
|
|
||||||
goto out_free;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
@ -1232,15 +1207,20 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
new_root = true;
|
|
||||||
|
|
||||||
init_cgroup_root(root, &opts);
|
init_cgroup_root(root, &opts);
|
||||||
|
|
||||||
ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD);
|
ret = cgroup_setup_root(root, opts.subsys_mask);
|
||||||
if (ret)
|
if (ret)
|
||||||
cgroup_free_root(root);
|
cgroup_free_root(root);
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
|
if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
|
||||||
|
mutex_unlock(&cgroup_mutex);
|
||||||
|
msleep(10);
|
||||||
|
ret = restart_syscall();
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
mutex_unlock(&cgroup_mutex);
|
mutex_unlock(&cgroup_mutex);
|
||||||
out_free:
|
out_free:
|
||||||
kfree(opts.release_agent);
|
kfree(opts.release_agent);
|
||||||
|
@ -1252,25 +1232,13 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
|
||||||
dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
|
dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
|
||||||
CGROUP_SUPER_MAGIC, ns);
|
CGROUP_SUPER_MAGIC, ns);
|
||||||
|
|
||||||
/*
|
if (!IS_ERR(dentry) && percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
|
||||||
* There's a race window after we release cgroup_mutex and before
|
struct super_block *sb = dentry->d_sb;
|
||||||
* allocating a superblock. Make sure a concurrent process won't
|
dput(dentry);
|
||||||
* be able to re-use the root during this window by delaying the
|
deactivate_locked_super(sb);
|
||||||
* initialization of root refcnt.
|
msleep(10);
|
||||||
*/
|
dentry = ERR_PTR(restart_syscall());
|
||||||
if (new_root) {
|
|
||||||
mutex_lock(&cgroup_mutex);
|
|
||||||
percpu_ref_reinit(&root->cgrp.self.refcnt);
|
|
||||||
mutex_unlock(&cgroup_mutex);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* If @pinned_sb, we're reusing an existing root and holding an
|
|
||||||
* extra ref on its sb. Mount is complete. Put the extra ref.
|
|
||||||
*/
|
|
||||||
if (pinned_sb)
|
|
||||||
deactivate_super(pinned_sb);
|
|
||||||
|
|
||||||
return dentry;
|
return dentry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1927,7 +1927,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
|
||||||
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
|
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
|
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
|
||||||
{
|
{
|
||||||
LIST_HEAD(tmp_links);
|
LIST_HEAD(tmp_links);
|
||||||
struct cgroup *root_cgrp = &root->cgrp;
|
struct cgroup *root_cgrp = &root->cgrp;
|
||||||
|
@ -1944,7 +1944,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
|
||||||
root_cgrp->ancestor_ids[0] = ret;
|
root_cgrp->ancestor_ids[0] = ret;
|
||||||
|
|
||||||
ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
|
ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
|
||||||
ref_flags, GFP_KERNEL);
|
0, GFP_KERNEL);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
@ -2121,18 +2121,16 @@ static void cgroup_kill_sb(struct super_block *sb)
|
||||||
struct cgroup_root *root = cgroup_root_from_kf(kf_root);
|
struct cgroup_root *root = cgroup_root_from_kf(kf_root);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If @root doesn't have any mounts or children, start killing it.
|
* If @root doesn't have any children, start killing it.
|
||||||
* This prevents new mounts by disabling percpu_ref_tryget_live().
|
* This prevents new mounts by disabling percpu_ref_tryget_live().
|
||||||
* cgroup_mount() may wait for @root's release.
|
* cgroup_mount() may wait for @root's release.
|
||||||
*
|
*
|
||||||
* And don't kill the default root.
|
* And don't kill the default root.
|
||||||
*/
|
*/
|
||||||
if (!list_empty(&root->cgrp.self.children) ||
|
if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
|
||||||
root == &cgrp_dfl_root)
|
!percpu_ref_is_dying(&root->cgrp.self.refcnt))
|
||||||
cgroup_put(&root->cgrp);
|
|
||||||
else
|
|
||||||
percpu_ref_kill(&root->cgrp.self.refcnt);
|
percpu_ref_kill(&root->cgrp.self.refcnt);
|
||||||
|
cgroup_put(&root->cgrp);
|
||||||
kernfs_kill_sb(sb);
|
kernfs_kill_sb(sb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5402,7 +5400,7 @@ int __init cgroup_init(void)
|
||||||
hash_add(css_set_table, &init_css_set.hlist,
|
hash_add(css_set_table, &init_css_set.hlist,
|
||||||
css_set_hash(init_css_set.subsys));
|
css_set_hash(init_css_set.subsys));
|
||||||
|
|
||||||
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0));
|
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
|
||||||
|
|
||||||
mutex_unlock(&cgroup_mutex);
|
mutex_unlock(&cgroup_mutex);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue