ns: proc files for namespace naming policy.
Create files under /proc/<pid>/ns/ to allow controlling the namespaces of a process. This addresses three specific problems that can make namespaces hard to work with. - Namespaces require a dedicated process to pin them in memory. - It is not possible to use a namespace unless you are the child of the original creator. - Namespaces don't have names that userspace can use to talk about them. The namespace files under /proc/<pid>/ns/ can be opened and the file descriptor can be used to talk about a specific namespace, and to keep the specified namespace alive. A namespace can be kept alive by either holding the file descriptor open or bind mounting the file someplace else. aka: mount --bind /proc/self/ns/net /some/filesystem/path mount --bind /proc/self/fd/<N> /some/filesystem/path This allows namespaces to be named with userspace policy. It requires additional support to make use of these filedescriptors and that will be comming in the following patches. Acked-by: Daniel Lezcano <daniel.lezcano@free.fr> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
This commit is contained in:
parent
0ee5623f9a
commit
6b4e306aa3
|
@ -20,6 +20,7 @@ proc-y += stat.o
|
||||||
proc-y += uptime.o
|
proc-y += uptime.o
|
||||||
proc-y += version.o
|
proc-y += version.o
|
||||||
proc-y += softirqs.o
|
proc-y += softirqs.o
|
||||||
|
proc-y += namespaces.o
|
||||||
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
|
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
|
||||||
proc-$(CONFIG_NET) += proc_net.o
|
proc-$(CONFIG_NET) += proc_net.o
|
||||||
proc-$(CONFIG_PROC_KCORE) += kcore.o
|
proc-$(CONFIG_PROC_KCORE) += kcore.o
|
||||||
|
|
|
@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode)
|
||||||
return allowed;
|
return allowed;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int proc_setattr(struct dentry *dentry, struct iattr *attr)
|
int proc_setattr(struct dentry *dentry, struct iattr *attr)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
struct inode *inode = dentry->d_inode;
|
struct inode *inode = dentry->d_inode;
|
||||||
|
@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
|
||||||
static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
|
|
||||||
{
|
{
|
||||||
struct inode * inode;
|
struct inode * inode;
|
||||||
struct proc_inode *ei;
|
struct proc_inode *ei;
|
||||||
|
@ -1779,7 +1778,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
|
int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
|
||||||
{
|
{
|
||||||
struct inode *inode = dentry->d_inode;
|
struct inode *inode = dentry->d_inode;
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
|
@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
|
||||||
* made this apply to all per process world readable and executable
|
* made this apply to all per process world readable and executable
|
||||||
* directories.
|
* directories.
|
||||||
*/
|
*/
|
||||||
static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
|
int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
|
||||||
{
|
{
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
|
@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry)
|
||||||
return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
|
return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct dentry_operations pid_dentry_operations =
|
const struct dentry_operations pid_dentry_operations =
|
||||||
{
|
{
|
||||||
.d_revalidate = pid_revalidate,
|
.d_revalidate = pid_revalidate,
|
||||||
.d_delete = pid_delete_dentry,
|
.d_delete = pid_delete_dentry,
|
||||||
|
@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations =
|
||||||
|
|
||||||
/* Lookups */
|
/* Lookups */
|
||||||
|
|
||||||
typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
|
|
||||||
struct task_struct *, const void *);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fill a directory entry.
|
* Fill a directory entry.
|
||||||
*
|
*
|
||||||
|
@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
|
||||||
* reported by readdir in sync with the inode numbers reported
|
* reported by readdir in sync with the inode numbers reported
|
||||||
* by stat.
|
* by stat.
|
||||||
*/
|
*/
|
||||||
static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
|
int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
|
||||||
char *name, int len,
|
const char *name, int len,
|
||||||
instantiate_t instantiate, struct task_struct *task, const void *ptr)
|
instantiate_t instantiate, struct task_struct *task, const void *ptr)
|
||||||
{
|
{
|
||||||
struct dentry *child, *dir = filp->f_path.dentry;
|
struct dentry *child, *dir = filp->f_path.dentry;
|
||||||
|
@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = {
|
||||||
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
|
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
|
||||||
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
|
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
|
||||||
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
|
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
|
||||||
|
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
|
||||||
#ifdef CONFIG_NET
|
#ifdef CONFIG_NET
|
||||||
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
|
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
|
||||||
#endif
|
#endif
|
||||||
|
@ -3168,6 +3165,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
|
||||||
static const struct pid_entry tid_base_stuff[] = {
|
static const struct pid_entry tid_base_stuff[] = {
|
||||||
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
|
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
|
||||||
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
|
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
|
||||||
|
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
|
||||||
REG("environ", S_IRUSR, proc_environ_operations),
|
REG("environ", S_IRUSR, proc_environ_operations),
|
||||||
INF("auxv", S_IRUSR, proc_pid_auxv),
|
INF("auxv", S_IRUSR, proc_pid_auxv),
|
||||||
ONE("status", S_IRUGO, proc_pid_status),
|
ONE("status", S_IRUGO, proc_pid_status),
|
||||||
|
|
|
@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
|
||||||
{
|
{
|
||||||
struct proc_dir_entry *de;
|
struct proc_dir_entry *de;
|
||||||
struct ctl_table_header *head;
|
struct ctl_table_header *head;
|
||||||
|
const struct proc_ns_operations *ns_ops;
|
||||||
|
|
||||||
truncate_inode_pages(&inode->i_data, 0);
|
truncate_inode_pages(&inode->i_data, 0);
|
||||||
end_writeback(inode);
|
end_writeback(inode);
|
||||||
|
@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
|
||||||
rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
|
rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
|
||||||
sysctl_head_put(head);
|
sysctl_head_put(head);
|
||||||
}
|
}
|
||||||
|
/* Release any associated namespace */
|
||||||
|
ns_ops = PROC_I(inode)->ns_ops;
|
||||||
|
if (ns_ops && ns_ops->put)
|
||||||
|
ns_ops->put(PROC_I(inode)->ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct kmem_cache * proc_inode_cachep;
|
static struct kmem_cache * proc_inode_cachep;
|
||||||
|
@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
|
||||||
ei->pde = NULL;
|
ei->pde = NULL;
|
||||||
ei->sysctl = NULL;
|
ei->sysctl = NULL;
|
||||||
ei->sysctl_entry = NULL;
|
ei->sysctl_entry = NULL;
|
||||||
|
ei->ns = NULL;
|
||||||
|
ei->ns_ops = NULL;
|
||||||
inode = &ei->vfs_inode;
|
inode = &ei->vfs_inode;
|
||||||
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
|
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
|
||||||
return inode;
|
return inode;
|
||||||
|
|
|
@ -119,3 +119,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
|
||||||
*/
|
*/
|
||||||
int proc_readdir(struct file *, void *, filldir_t);
|
int proc_readdir(struct file *, void *, filldir_t);
|
||||||
struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
|
struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* Lookups */
|
||||||
|
typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
|
||||||
|
struct task_struct *, const void *);
|
||||||
|
int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
|
||||||
|
const char *name, int len,
|
||||||
|
instantiate_t instantiate, struct task_struct *task, const void *ptr);
|
||||||
|
int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
|
||||||
|
struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
|
||||||
|
extern const struct dentry_operations pid_dentry_operations;
|
||||||
|
int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
|
||||||
|
int proc_setattr(struct dentry *dentry, struct iattr *attr);
|
||||||
|
|
||||||
|
extern const struct inode_operations proc_ns_dir_inode_operations;
|
||||||
|
extern const struct file_operations proc_ns_dir_operations;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,188 @@
|
||||||
|
#include <linux/proc_fs.h>
|
||||||
|
#include <linux/nsproxy.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
|
#include <linux/ptrace.h>
|
||||||
|
#include <linux/fs_struct.h>
|
||||||
|
#include <linux/mount.h>
|
||||||
|
#include <linux/path.h>
|
||||||
|
#include <linux/namei.h>
|
||||||
|
#include <linux/file.h>
|
||||||
|
#include <linux/utsname.h>
|
||||||
|
#include <net/net_namespace.h>
|
||||||
|
#include <linux/mnt_namespace.h>
|
||||||
|
#include <linux/ipc_namespace.h>
|
||||||
|
#include <linux/pid_namespace.h>
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
static const struct proc_ns_operations *ns_entries[] = {
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct file_operations ns_file_operations = {
|
||||||
|
.llseek = no_llseek,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct dentry *proc_ns_instantiate(struct inode *dir,
|
||||||
|
struct dentry *dentry, struct task_struct *task, const void *ptr)
|
||||||
|
{
|
||||||
|
const struct proc_ns_operations *ns_ops = ptr;
|
||||||
|
struct inode *inode;
|
||||||
|
struct proc_inode *ei;
|
||||||
|
struct dentry *error = ERR_PTR(-ENOENT);
|
||||||
|
|
||||||
|
inode = proc_pid_make_inode(dir->i_sb, task);
|
||||||
|
if (!inode)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ei = PROC_I(inode);
|
||||||
|
inode->i_mode = S_IFREG|S_IRUSR;
|
||||||
|
inode->i_fop = &ns_file_operations;
|
||||||
|
ei->ns_ops = ns_ops;
|
||||||
|
ei->ns = ns_ops->get(task);
|
||||||
|
if (!ei->ns)
|
||||||
|
goto out_iput;
|
||||||
|
|
||||||
|
dentry->d_op = &pid_dentry_operations;
|
||||||
|
d_add(dentry, inode);
|
||||||
|
/* Close the race of the process dying before we return the dentry */
|
||||||
|
if (pid_revalidate(dentry, NULL))
|
||||||
|
error = NULL;
|
||||||
|
out:
|
||||||
|
return error;
|
||||||
|
out_iput:
|
||||||
|
iput(inode);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int proc_ns_fill_cache(struct file *filp, void *dirent,
|
||||||
|
filldir_t filldir, struct task_struct *task,
|
||||||
|
const struct proc_ns_operations *ops)
|
||||||
|
{
|
||||||
|
return proc_fill_cache(filp, dirent, filldir,
|
||||||
|
ops->name, strlen(ops->name),
|
||||||
|
proc_ns_instantiate, task, ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int proc_ns_dir_readdir(struct file *filp, void *dirent,
|
||||||
|
filldir_t filldir)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct dentry *dentry = filp->f_path.dentry;
|
||||||
|
struct inode *inode = dentry->d_inode;
|
||||||
|
struct task_struct *task = get_proc_task(inode);
|
||||||
|
const struct proc_ns_operations **entry, **last;
|
||||||
|
ino_t ino;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = -ENOENT;
|
||||||
|
if (!task)
|
||||||
|
goto out_no_task;
|
||||||
|
|
||||||
|
ret = -EPERM;
|
||||||
|
if (!ptrace_may_access(task, PTRACE_MODE_READ))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
i = filp->f_pos;
|
||||||
|
switch (i) {
|
||||||
|
case 0:
|
||||||
|
ino = inode->i_ino;
|
||||||
|
if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
|
||||||
|
goto out;
|
||||||
|
i++;
|
||||||
|
filp->f_pos++;
|
||||||
|
/* fall through */
|
||||||
|
case 1:
|
||||||
|
ino = parent_ino(dentry);
|
||||||
|
if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
|
||||||
|
goto out;
|
||||||
|
i++;
|
||||||
|
filp->f_pos++;
|
||||||
|
/* fall through */
|
||||||
|
default:
|
||||||
|
i -= 2;
|
||||||
|
if (i >= ARRAY_SIZE(ns_entries)) {
|
||||||
|
ret = 1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
entry = ns_entries + i;
|
||||||
|
last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
|
||||||
|
while (entry <= last) {
|
||||||
|
if (proc_ns_fill_cache(filp, dirent, filldir,
|
||||||
|
task, *entry) < 0)
|
||||||
|
goto out;
|
||||||
|
filp->f_pos++;
|
||||||
|
entry++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 1;
|
||||||
|
out:
|
||||||
|
put_task_struct(task);
|
||||||
|
out_no_task:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct file_operations proc_ns_dir_operations = {
|
||||||
|
.read = generic_read_dir,
|
||||||
|
.readdir = proc_ns_dir_readdir,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct dentry *proc_ns_dir_lookup(struct inode *dir,
|
||||||
|
struct dentry *dentry, struct nameidata *nd)
|
||||||
|
{
|
||||||
|
struct dentry *error;
|
||||||
|
struct task_struct *task = get_proc_task(dir);
|
||||||
|
const struct proc_ns_operations **entry, **last;
|
||||||
|
unsigned int len = dentry->d_name.len;
|
||||||
|
|
||||||
|
error = ERR_PTR(-ENOENT);
|
||||||
|
|
||||||
|
if (!task)
|
||||||
|
goto out_no_task;
|
||||||
|
|
||||||
|
error = ERR_PTR(-EPERM);
|
||||||
|
if (!ptrace_may_access(task, PTRACE_MODE_READ))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
|
||||||
|
for (entry = ns_entries; entry <= last; entry++) {
|
||||||
|
if (strlen((*entry)->name) != len)
|
||||||
|
continue;
|
||||||
|
if (!memcmp(dentry->d_name.name, (*entry)->name, len))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (entry > last)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
error = proc_ns_instantiate(dir, dentry, task, *entry);
|
||||||
|
out:
|
||||||
|
put_task_struct(task);
|
||||||
|
out_no_task:
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct inode_operations proc_ns_dir_inode_operations = {
|
||||||
|
.lookup = proc_ns_dir_lookup,
|
||||||
|
.getattr = pid_getattr,
|
||||||
|
.setattr = proc_setattr,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct file *proc_ns_fget(int fd)
|
||||||
|
{
|
||||||
|
struct file *file;
|
||||||
|
|
||||||
|
file = fget(fd);
|
||||||
|
if (!file)
|
||||||
|
return ERR_PTR(-EBADF);
|
||||||
|
|
||||||
|
if (file->f_op != &ns_file_operations)
|
||||||
|
goto out_invalid;
|
||||||
|
|
||||||
|
return file;
|
||||||
|
|
||||||
|
out_invalid:
|
||||||
|
fput(file);
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
}
|
||||||
|
|
|
@ -179,6 +179,8 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
|
||||||
extern struct file *get_mm_exe_file(struct mm_struct *mm);
|
extern struct file *get_mm_exe_file(struct mm_struct *mm);
|
||||||
extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm);
|
extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm);
|
||||||
|
|
||||||
|
extern struct file *proc_ns_fget(int fd);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
|
#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
|
||||||
|
@ -239,6 +241,11 @@ static inline void dup_mm_exe_file(struct mm_struct *oldmm,
|
||||||
struct mm_struct *newmm)
|
struct mm_struct *newmm)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
static inline struct file *proc_ns_fget(int fd)
|
||||||
|
{
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* CONFIG_PROC_FS */
|
#endif /* CONFIG_PROC_FS */
|
||||||
|
|
||||||
#if !defined(CONFIG_PROC_KCORE)
|
#if !defined(CONFIG_PROC_KCORE)
|
||||||
|
@ -250,6 +257,15 @@ kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
|
||||||
extern void kclist_add(struct kcore_list *, void *, size_t, int type);
|
extern void kclist_add(struct kcore_list *, void *, size_t, int type);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
struct nsproxy;
|
||||||
|
struct proc_ns_operations {
|
||||||
|
const char *name;
|
||||||
|
int type;
|
||||||
|
void *(*get)(struct task_struct *task);
|
||||||
|
void (*put)(void *ns);
|
||||||
|
int (*install)(struct nsproxy *nsproxy, void *ns);
|
||||||
|
};
|
||||||
|
|
||||||
union proc_op {
|
union proc_op {
|
||||||
int (*proc_get_link)(struct inode *, struct path *);
|
int (*proc_get_link)(struct inode *, struct path *);
|
||||||
int (*proc_read)(struct task_struct *task, char *page);
|
int (*proc_read)(struct task_struct *task, char *page);
|
||||||
|
@ -268,6 +284,8 @@ struct proc_inode {
|
||||||
struct proc_dir_entry *pde;
|
struct proc_dir_entry *pde;
|
||||||
struct ctl_table_header *sysctl;
|
struct ctl_table_header *sysctl;
|
||||||
struct ctl_table *sysctl_entry;
|
struct ctl_table *sysctl_entry;
|
||||||
|
void *ns;
|
||||||
|
const struct proc_ns_operations *ns_ops;
|
||||||
struct inode vfs_inode;
|
struct inode vfs_inode;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue