linux/kernel/cgroup/debug.c

359 lines
7.6 KiB
C
Raw Normal View History

/*
* Debug controller
*
* WARNING: This controller is for cgroup core debugging only.
* Its interfaces are unstable and subject to changes at any time.
*/
#include <linux/ctype.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include "cgroup-internal.h"
static struct cgroup_subsys_state *
debug_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
if (!css)
return ERR_PTR(-ENOMEM);
return css;
}
static void debug_css_free(struct cgroup_subsys_state *css)
{
kfree(css);
}
/*
* debug_taskcount_read - return the number of tasks in a cgroup.
* @cgrp: the cgroup in question
*/
static u64 debug_taskcount_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
return cgroup_task_count(css->cgroup);
}
static int current_css_set_read(struct seq_file *seq, void *v)
{
struct kernfs_open_file *of = seq->private;
struct css_set *cset;
struct cgroup_subsys *ss;
struct cgroup_subsys_state *css;
int i, refcnt;
if (!cgroup_kn_lock_live(of->kn, false))
return -ENODEV;
spin_lock_irq(&css_set_lock);
rcu_read_lock();
cset = rcu_dereference(current->cgroups);
refcnt = refcount_read(&cset->refcount);
seq_printf(seq, "css_set %pK %d", cset, refcnt);
if (refcnt > cset->nr_tasks)
seq_printf(seq, " +%d", refcnt - cset->nr_tasks);
seq_puts(seq, "\n");
/*
* Print the css'es stored in the current css_set.
*/
for_each_subsys(ss, i) {
css = cset->subsys[ss->id];
if (!css)
continue;
seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name,
(unsigned long)css, css->id);
}
rcu_read_unlock();
spin_unlock_irq(&css_set_lock);
cgroup_kn_unlock(of->kn);
return 0;
}
static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
u64 count;
rcu_read_lock();
count = refcount_read(&task_css_set(current)->refcount);
rcu_read_unlock();
return count;
}
static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
{
struct cgrp_cset_link *link;
struct css_set *cset;
char *name_buf;
name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
if (!name_buf)
return -ENOMEM;
spin_lock_irq(&css_set_lock);
rcu_read_lock();
cset = rcu_dereference(current->cgroups);
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
struct cgroup *c = link->cgrp;
cgroup_name(c, name_buf, NAME_MAX + 1);
seq_printf(seq, "Root %d group %s\n",
c->root->hierarchy_id, name_buf);
}
rcu_read_unlock();
spin_unlock_irq(&css_set_lock);
kfree(name_buf);
return 0;
}
#define MAX_TASKS_SHOWN_PER_CSS 25
static int cgroup_css_links_read(struct seq_file *seq, void *v)
{
struct cgroup_subsys_state *css = seq_css(seq);
struct cgrp_cset_link *link;
int dead_cnt = 0, extra_refs = 0;
spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
struct css_set *cset = link->cset;
struct task_struct *task;
int count = 0;
int refcnt = refcount_read(&cset->refcount);
seq_printf(seq, " %d", refcnt);
if (refcnt - cset->nr_tasks > 0) {
int extra = refcnt - cset->nr_tasks;
seq_printf(seq, " +%d", extra);
/*
* Take out the one additional reference in
* init_css_set.
*/
if (cset == &init_css_set)
extra--;
extra_refs += extra;
}
seq_puts(seq, "\n");
list_for_each_entry(task, &cset->tasks, cg_list) {
if (count++ <= MAX_TASKS_SHOWN_PER_CSS)
seq_printf(seq, " task %d\n",
task_pid_vnr(task));
}
list_for_each_entry(task, &cset->mg_tasks, cg_list) {
if (count++ <= MAX_TASKS_SHOWN_PER_CSS)
seq_printf(seq, " task %d\n",
task_pid_vnr(task));
}
/* show # of overflowed tasks */
if (count > MAX_TASKS_SHOWN_PER_CSS)
seq_printf(seq, " ... (%d)\n",
count - MAX_TASKS_SHOWN_PER_CSS);
if (cset->dead) {
seq_puts(seq, " [dead]\n");
dead_cnt++;
}
WARN_ON(count != cset->nr_tasks);
}
spin_unlock_irq(&css_set_lock);
if (!dead_cnt && !extra_refs)
return 0;
seq_puts(seq, "\n");
if (extra_refs)
seq_printf(seq, "extra references = %d\n", extra_refs);
if (dead_cnt)
seq_printf(seq, "dead css_sets = %d\n", dead_cnt);
return 0;
}
static int cgroup_subsys_states_read(struct seq_file *seq, void *v)
{
struct kernfs_open_file *of = seq->private;
struct cgroup *cgrp;
struct cgroup_subsys *ss;
struct cgroup_subsys_state *css;
char pbuf[16];
int i;
cgrp = cgroup_kn_lock_live(of->kn, false);
if (!cgrp)
return -ENODEV;
for_each_subsys(ss, i) {
css = rcu_dereference_check(cgrp->subsys[ss->id], true);
if (!css)
continue;
pbuf[0] = '\0';
/* Show the parent CSS if applicable*/
if (css->parent)
snprintf(pbuf, sizeof(pbuf) - 1, " P=%d",
css->parent->id);
seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name,
(unsigned long)css, css->id,
atomic_read(&css->online_cnt), pbuf);
}
cgroup_kn_unlock(of->kn);
return 0;
}
static void cgroup_masks_read_one(struct seq_file *seq, const char *name,
u16 mask)
{
struct cgroup_subsys *ss;
int ssid;
bool first = true;
seq_printf(seq, "%-17s: ", name);
for_each_subsys(ss, ssid) {
if (!(mask & (1 << ssid)))
continue;
if (!first)
seq_puts(seq, ", ");
seq_puts(seq, ss->name);
first = false;
}
seq_putc(seq, '\n');
}
static int cgroup_masks_read(struct seq_file *seq, void *v)
{
struct kernfs_open_file *of = seq->private;
struct cgroup *cgrp;
cgrp = cgroup_kn_lock_live(of->kn, false);
if (!cgrp)
return -ENODEV;
cgroup_masks_read_one(seq, "subtree_control", cgrp->subtree_control);
cgroup_masks_read_one(seq, "subtree_ss_mask", cgrp->subtree_ss_mask);
cgroup_kn_unlock(of->kn);
return 0;
}
static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
return (!cgroup_is_populated(css->cgroup) &&
!css_has_online_children(&css->cgroup->self));
}
static struct cftype debug_legacy_files[] = {
{
.name = "taskcount",
.read_u64 = debug_taskcount_read,
},
{
.name = "current_css_set",
.seq_show = current_css_set_read,
.flags = CFTYPE_ONLY_ON_ROOT,
},
{
.name = "current_css_set_refcount",
.read_u64 = current_css_set_refcount_read,
.flags = CFTYPE_ONLY_ON_ROOT,
},
{
.name = "current_css_set_cg_links",
.seq_show = current_css_set_cg_links_read,
.flags = CFTYPE_ONLY_ON_ROOT,
},
{
.name = "cgroup_css_links",
.seq_show = cgroup_css_links_read,
},
{
.name = "cgroup_subsys_states",
.seq_show = cgroup_subsys_states_read,
},
{
.name = "cgroup_masks",
.seq_show = cgroup_masks_read,
},
{
.name = "releasable",
.read_u64 = releasable_read,
},
{ } /* terminate */
};
static struct cftype debug_files[] = {
{
.name = "taskcount",
.read_u64 = debug_taskcount_read,
},
{
.name = "current_css_set",
.seq_show = current_css_set_read,
.flags = CFTYPE_ONLY_ON_ROOT,
},
{
.name = "current_css_set_refcount",
.read_u64 = current_css_set_refcount_read,
.flags = CFTYPE_ONLY_ON_ROOT,
},
{
.name = "current_css_set_cg_links",
.seq_show = current_css_set_cg_links_read,
.flags = CFTYPE_ONLY_ON_ROOT,
},
{
.name = "css_links",
.seq_show = cgroup_css_links_read,
},
{
.name = "csses",
.seq_show = cgroup_subsys_states_read,
},
{
.name = "masks",
.seq_show = cgroup_masks_read,
},
{ } /* terminate */
};
struct cgroup_subsys debug_cgrp_subsys = {
.css_alloc = debug_css_alloc,
.css_free = debug_css_free,
.legacy_cftypes = debug_legacy_files,
};
/*
* On v2, debug is an implicit controller enabled by "cgroup_debug" boot
* parameter.
*/
static int __init enable_cgroup_debug(char *str)
{
debug_cgrp_subsys.dfl_cftypes = debug_files;
debug_cgrp_subsys.implicit_on_dfl = true;
cgroup: implement cgroup v2 thread support This patch implements cgroup v2 thread support. The goal of the thread mode is supporting hierarchical accounting and control at thread granularity while staying inside the resource domain model which allows coordination across different resource controllers and handling of anonymous resource consumptions. A cgroup is always created as a domain and can be made threaded by writing to the "cgroup.type" file. When a cgroup becomes threaded, it becomes a member of a threaded subtree which is anchored at the closest ancestor which isn't threaded. The threads of the processes which are in a threaded subtree can be placed anywhere without being restricted by process granularity or no-internal-process constraint. Note that the threads aren't allowed to escape to a different threaded subtree. To be used inside a threaded subtree, a controller should explicitly support threaded mode and be able to handle internal competition in the way which is appropriate for the resource. The root of a threaded subtree, the nearest ancestor which isn't threaded, is called the threaded domain and serves as the resource domain for the whole subtree. This is the last cgroup where domain controllers are operational and where all the domain-level resource consumptions in the subtree are accounted. This allows threaded controllers to operate at thread granularity when requested while staying inside the scope of system-level resource distribution. As the root cgroup is exempt from the no-internal-process constraint, it can serve as both a threaded domain and a parent to normal cgroups, so, unlike non-root cgroups, the root cgroup can have both domain and threaded children. Internally, in a threaded subtree, each css_set has its ->dom_cset pointing to a matching css_set which belongs to the threaded domain. This ensures that thread root level cgroup_subsys_state for all threaded controllers are readily accessible for domain-level operations. This patch enables threaded mode for the pids and perf_events controllers. Neither has to worry about domain-level resource consumptions and it's enough to simply set the flag. For more details on the interface and behavior of the thread mode, please refer to the section 2-2-2 in Documentation/cgroup-v2.txt added by this patch. v5: - Dropped silly no-op ->dom_cgrp init from cgroup_create(). Spotted by Waiman. - Documentation updated as suggested by Waiman. - cgroup.type content slightly reformatted. - Mark the debug controller threaded. v4: - Updated to the general idea of marking specific cgroups domain/threaded as suggested by PeterZ. v3: - Dropped "join" and always make mixed children join the parent's threaded subtree. v2: - After discussions with Waiman, support for mixed thread mode is added. This should address the issue that Peter pointed out where any nesting should be avoided for thread subtrees while coexisting with other domain cgroups. - Enabling / disabling thread mode now piggy backs on the existing control mask update mechanism. - Bug fixes and cleanup. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Waiman Long <longman@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org>
2017-07-21 23:14:51 +08:00
debug_cgrp_subsys.threaded = true;
return 1;
}
__setup("cgroup_debug", enable_cgroup_debug);