perf: Add PERF_RECORD_NAMESPACES to include namespaces related info
With the advert of container technologies like docker, that depend on namespaces for isolation, there is a need for tracing support for namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for recording namespaces related info. By recording info for every namespace, it is left to userspace to take a call on the definition of a container and trace containers by updating perf tool accordingly. Each namespace has a combination of device and inode numbers. Though every namespace has the same device number currently, that may change in future to avoid the need for a namespace of namespaces. Considering such possibility, record both device and inode numbers separately for each namespace. Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexei Starovoitov <ast@fb.com> Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> Cc: Aravinda Prasad <aravinda@linux.vnet.ibm.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Eric Biederman <ebiederm@xmission.com> Cc: Sargun Dhillon <sargun@sargun.me> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/r/148891929686.25309.2827618988917007768.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
3ef5b4023c
commit
e422267322
|
@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
|
||||||
|
|
||||||
extern void perf_event_exec(void);
|
extern void perf_event_exec(void);
|
||||||
extern void perf_event_comm(struct task_struct *tsk, bool exec);
|
extern void perf_event_comm(struct task_struct *tsk, bool exec);
|
||||||
|
extern void perf_event_namespaces(struct task_struct *tsk);
|
||||||
extern void perf_event_fork(struct task_struct *tsk);
|
extern void perf_event_fork(struct task_struct *tsk);
|
||||||
|
|
||||||
/* Callchains */
|
/* Callchains */
|
||||||
|
@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks
|
||||||
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
|
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
|
||||||
static inline void perf_event_exec(void) { }
|
static inline void perf_event_exec(void) { }
|
||||||
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
|
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
|
||||||
|
static inline void perf_event_namespaces(struct task_struct *tsk) { }
|
||||||
static inline void perf_event_fork(struct task_struct *tsk) { }
|
static inline void perf_event_fork(struct task_struct *tsk) { }
|
||||||
static inline void perf_event_init(void) { }
|
static inline void perf_event_init(void) { }
|
||||||
static inline int perf_swevent_get_recursion_context(void) { return -1; }
|
static inline int perf_swevent_get_recursion_context(void) { return -1; }
|
||||||
|
|
|
@ -344,7 +344,8 @@ struct perf_event_attr {
|
||||||
use_clockid : 1, /* use @clockid for time fields */
|
use_clockid : 1, /* use @clockid for time fields */
|
||||||
context_switch : 1, /* context switch data */
|
context_switch : 1, /* context switch data */
|
||||||
write_backward : 1, /* Write ring buffer from end to beginning */
|
write_backward : 1, /* Write ring buffer from end to beginning */
|
||||||
__reserved_1 : 36;
|
namespaces : 1, /* include namespaces data */
|
||||||
|
__reserved_1 : 35;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
__u32 wakeup_events; /* wakeup every n events */
|
__u32 wakeup_events; /* wakeup every n events */
|
||||||
|
@ -610,6 +611,23 @@ struct perf_event_header {
|
||||||
__u16 size;
|
__u16 size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct perf_ns_link_info {
|
||||||
|
__u64 dev;
|
||||||
|
__u64 ino;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NET_NS_INDEX = 0,
|
||||||
|
UTS_NS_INDEX = 1,
|
||||||
|
IPC_NS_INDEX = 2,
|
||||||
|
PID_NS_INDEX = 3,
|
||||||
|
USER_NS_INDEX = 4,
|
||||||
|
MNT_NS_INDEX = 5,
|
||||||
|
CGROUP_NS_INDEX = 6,
|
||||||
|
|
||||||
|
NR_NAMESPACES, /* number of available namespaces */
|
||||||
|
};
|
||||||
|
|
||||||
enum perf_event_type {
|
enum perf_event_type {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -862,6 +880,18 @@ enum perf_event_type {
|
||||||
*/
|
*/
|
||||||
PERF_RECORD_SWITCH_CPU_WIDE = 15,
|
PERF_RECORD_SWITCH_CPU_WIDE = 15,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* struct {
|
||||||
|
* struct perf_event_header header;
|
||||||
|
* u32 pid;
|
||||||
|
* u32 tid;
|
||||||
|
* u64 nr_namespaces;
|
||||||
|
* { u64 dev, inode; } [nr_namespaces];
|
||||||
|
* struct sample_id sample_id;
|
||||||
|
* };
|
||||||
|
*/
|
||||||
|
PERF_RECORD_NAMESPACES = 16,
|
||||||
|
|
||||||
PERF_RECORD_MAX, /* non-ABI */
|
PERF_RECORD_MAX, /* non-ABI */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,8 @@
|
||||||
#include <linux/parser.h>
|
#include <linux/parser.h>
|
||||||
#include <linux/sched/clock.h>
|
#include <linux/sched/clock.h>
|
||||||
#include <linux/sched/mm.h>
|
#include <linux/sched/mm.h>
|
||||||
|
#include <linux/proc_ns.h>
|
||||||
|
#include <linux/mount.h>
|
||||||
|
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
|
@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
|
||||||
|
|
||||||
static atomic_t nr_mmap_events __read_mostly;
|
static atomic_t nr_mmap_events __read_mostly;
|
||||||
static atomic_t nr_comm_events __read_mostly;
|
static atomic_t nr_comm_events __read_mostly;
|
||||||
|
static atomic_t nr_namespaces_events __read_mostly;
|
||||||
static atomic_t nr_task_events __read_mostly;
|
static atomic_t nr_task_events __read_mostly;
|
||||||
static atomic_t nr_freq_events __read_mostly;
|
static atomic_t nr_freq_events __read_mostly;
|
||||||
static atomic_t nr_switch_events __read_mostly;
|
static atomic_t nr_switch_events __read_mostly;
|
||||||
|
@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
|
||||||
atomic_dec(&nr_mmap_events);
|
atomic_dec(&nr_mmap_events);
|
||||||
if (event->attr.comm)
|
if (event->attr.comm)
|
||||||
atomic_dec(&nr_comm_events);
|
atomic_dec(&nr_comm_events);
|
||||||
|
if (event->attr.namespaces)
|
||||||
|
atomic_dec(&nr_namespaces_events);
|
||||||
if (event->attr.task)
|
if (event->attr.task)
|
||||||
atomic_dec(&nr_task_events);
|
atomic_dec(&nr_task_events);
|
||||||
if (event->attr.freq)
|
if (event->attr.freq)
|
||||||
|
@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
|
||||||
void perf_event_fork(struct task_struct *task)
|
void perf_event_fork(struct task_struct *task)
|
||||||
{
|
{
|
||||||
perf_event_task(task, NULL, 1);
|
perf_event_task(task, NULL, 1);
|
||||||
|
perf_event_namespaces(task);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
|
||||||
perf_event_comm_event(&comm_event);
|
perf_event_comm_event(&comm_event);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* namespaces tracking
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct perf_namespaces_event {
|
||||||
|
struct task_struct *task;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
struct perf_event_header header;
|
||||||
|
|
||||||
|
u32 pid;
|
||||||
|
u32 tid;
|
||||||
|
u64 nr_namespaces;
|
||||||
|
struct perf_ns_link_info link_info[NR_NAMESPACES];
|
||||||
|
} event_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int perf_event_namespaces_match(struct perf_event *event)
|
||||||
|
{
|
||||||
|
return event->attr.namespaces;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void perf_event_namespaces_output(struct perf_event *event,
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
struct perf_namespaces_event *namespaces_event = data;
|
||||||
|
struct perf_output_handle handle;
|
||||||
|
struct perf_sample_data sample;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!perf_event_namespaces_match(event))
|
||||||
|
return;
|
||||||
|
|
||||||
|
perf_event_header__init_id(&namespaces_event->event_id.header,
|
||||||
|
&sample, event);
|
||||||
|
ret = perf_output_begin(&handle, event,
|
||||||
|
namespaces_event->event_id.header.size);
|
||||||
|
if (ret)
|
||||||
|
return;
|
||||||
|
|
||||||
|
namespaces_event->event_id.pid = perf_event_pid(event,
|
||||||
|
namespaces_event->task);
|
||||||
|
namespaces_event->event_id.tid = perf_event_tid(event,
|
||||||
|
namespaces_event->task);
|
||||||
|
|
||||||
|
perf_output_put(&handle, namespaces_event->event_id);
|
||||||
|
|
||||||
|
perf_event__output_id_sample(event, &handle, &sample);
|
||||||
|
|
||||||
|
perf_output_end(&handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
|
||||||
|
struct task_struct *task,
|
||||||
|
const struct proc_ns_operations *ns_ops)
|
||||||
|
{
|
||||||
|
struct path ns_path;
|
||||||
|
struct inode *ns_inode;
|
||||||
|
void *error;
|
||||||
|
|
||||||
|
error = ns_get_path(&ns_path, task, ns_ops);
|
||||||
|
if (!error) {
|
||||||
|
ns_inode = ns_path.dentry->d_inode;
|
||||||
|
ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
|
||||||
|
ns_link_info->ino = ns_inode->i_ino;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void perf_event_namespaces(struct task_struct *task)
|
||||||
|
{
|
||||||
|
struct perf_namespaces_event namespaces_event;
|
||||||
|
struct perf_ns_link_info *ns_link_info;
|
||||||
|
|
||||||
|
if (!atomic_read(&nr_namespaces_events))
|
||||||
|
return;
|
||||||
|
|
||||||
|
namespaces_event = (struct perf_namespaces_event){
|
||||||
|
.task = task,
|
||||||
|
.event_id = {
|
||||||
|
.header = {
|
||||||
|
.type = PERF_RECORD_NAMESPACES,
|
||||||
|
.misc = 0,
|
||||||
|
.size = sizeof(namespaces_event.event_id),
|
||||||
|
},
|
||||||
|
/* .pid */
|
||||||
|
/* .tid */
|
||||||
|
.nr_namespaces = NR_NAMESPACES,
|
||||||
|
/* .link_info[NR_NAMESPACES] */
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
ns_link_info = namespaces_event.event_id.link_info;
|
||||||
|
|
||||||
|
perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
|
||||||
|
task, &mntns_operations);
|
||||||
|
|
||||||
|
#ifdef CONFIG_USER_NS
|
||||||
|
perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
|
||||||
|
task, &userns_operations);
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_NET_NS
|
||||||
|
perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
|
||||||
|
task, &netns_operations);
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_UTS_NS
|
||||||
|
perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
|
||||||
|
task, &utsns_operations);
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_IPC_NS
|
||||||
|
perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
|
||||||
|
task, &ipcns_operations);
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_PID_NS
|
||||||
|
perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
|
||||||
|
task, &pidns_operations);
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_CGROUPS
|
||||||
|
perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
|
||||||
|
task, &cgroupns_operations);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
perf_iterate_sb(perf_event_namespaces_output,
|
||||||
|
&namespaces_event,
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* mmap tracking
|
* mmap tracking
|
||||||
*/
|
*/
|
||||||
|
@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
|
||||||
atomic_inc(&nr_mmap_events);
|
atomic_inc(&nr_mmap_events);
|
||||||
if (event->attr.comm)
|
if (event->attr.comm)
|
||||||
atomic_inc(&nr_comm_events);
|
atomic_inc(&nr_comm_events);
|
||||||
|
if (event->attr.namespaces)
|
||||||
|
atomic_inc(&nr_namespaces_events);
|
||||||
if (event->attr.task)
|
if (event->attr.task)
|
||||||
atomic_inc(&nr_task_events);
|
atomic_inc(&nr_task_events);
|
||||||
if (event->attr.freq)
|
if (event->attr.freq)
|
||||||
|
@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||||
return -EACCES;
|
return -EACCES;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (attr.namespaces) {
|
||||||
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
|
return -EACCES;
|
||||||
|
}
|
||||||
|
|
||||||
if (attr.freq) {
|
if (attr.freq) {
|
||||||
if (attr.sample_freq > sysctl_perf_event_sample_rate)
|
if (attr.sample_freq > sysctl_perf_event_sample_rate)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
|
@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
perf_event_namespaces(current);
|
||||||
|
|
||||||
bad_unshare_cleanup_cred:
|
bad_unshare_cleanup_cred:
|
||||||
if (new_cred)
|
if (new_cred)
|
||||||
put_cred(new_cred);
|
put_cred(new_cred);
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include <linux/file.h>
|
#include <linux/file.h>
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
#include <linux/cgroup.h>
|
#include <linux/cgroup.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
static struct kmem_cache *nsproxy_cachep;
|
static struct kmem_cache *nsproxy_cachep;
|
||||||
|
|
||||||
|
@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
switch_task_namespaces(tsk, new_nsproxy);
|
switch_task_namespaces(tsk, new_nsproxy);
|
||||||
|
|
||||||
|
perf_event_namespaces(tsk);
|
||||||
out:
|
out:
|
||||||
fput(file);
|
fput(file);
|
||||||
return err;
|
return err;
|
||||||
|
|
Loading…
Reference in New Issue