linux/security/capability.c

1138 lines
25 KiB
C
Raw Normal View History

/*
* Capabilities Linux Security Module
*
* This is the default security module in case no other module is loaded.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <linux/security.h>
static int cap_syslog(int type)
{
return 0;
}
static int cap_quotactl(int cmds, int type, int id, struct super_block *sb)
{
return 0;
}
static int cap_quota_on(struct dentry *dentry)
{
return 0;
}
static int cap_bprm_check_security(struct linux_binprm *bprm)
{
return 0;
}
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
static void cap_bprm_committing_creds(struct linux_binprm *bprm)
{
}
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
static void cap_bprm_committed_creds(struct linux_binprm *bprm)
{
}
static int cap_sb_alloc_security(struct super_block *sb)
{
return 0;
}
static void cap_sb_free_security(struct super_block *sb)
{
}
static int cap_sb_copy_data(char *orig, char *copy)
{
return 0;
}
static int cap_sb_remount(struct super_block *sb, void *data)
{
return 0;
}
static int cap_sb_kern_mount(struct super_block *sb, int flags, void *data)
{
return 0;
}
static int cap_sb_show_options(struct seq_file *m, struct super_block *sb)
{
return 0;
}
static int cap_sb_statfs(struct dentry *dentry)
{
return 0;
}
static int cap_sb_mount(const char *dev_name, struct path *path,
const char *type, unsigned long flags, void *data)
{
return 0;
}
static int cap_sb_umount(struct vfsmount *mnt, int flags)
{
return 0;
}
static int cap_sb_pivotroot(struct path *old_path, struct path *new_path)
{
return 0;
}
static int cap_sb_set_mnt_opts(struct super_block *sb,
struct security_mnt_opts *opts,
unsigned long kern_flags,
unsigned long *set_kern_flags)
{
if (unlikely(opts->num_mnt_opts))
return -EOPNOTSUPP;
return 0;
}
selinux: make security_sb_clone_mnt_opts return an error on context mismatch I had the following problem reported a while back. If you mount the same filesystem twice using NFSv4 with different contexts, then the second context= option is ignored. For instance: # mount server:/export /mnt/test1 # mount server:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0 # ls -dZ /mnt/test1 drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test1 # ls -dZ /mnt/test2 drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test2 When we call into SELinux to set the context of a "cloned" superblock, it will currently just bail out when it notices that we're reusing an existing superblock. Since the existing superblock is already set up and presumably in use, we can't go overwriting its context with the one from the "original" sb. Because of this, the second context= option in this case cannot take effect. This patch fixes this by turning security_sb_clone_mnt_opts into an int return operation. When it finds that the "new" superblock that it has been handed is already set up, it checks to see whether the contexts on the old superblock match it. If it does, then it will just return success, otherwise it'll return -EBUSY and emit a printk to tell the admin why the second mount failed. Note that this patch may cause casualties. The NFSv4 code relies on being able to walk down to an export from the pseudoroot. If you mount filesystems that are nested within one another with different contexts, then this patch will make those mounts fail in new and "exciting" ways. For instance, suppose that /export is a separate filesystem on the server: # mount server:/ /mnt/test1 # mount salusa:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0 mount.nfs: an incorrect mount option was specified ...with the printk in the ring buffer. Because we *might* eventually walk down to /mnt/test1/export, the mount is denied due to this patch. The second mount needs the pseudoroot superblock, but that's already present with the wrong context. OTOH, if we mount these in the reverse order, then both mounts work, because the pseudoroot superblock created when mounting /export is discarded once that mount is done. If we then however try to walk into that directory, the automount fails for the similar reasons: # cd /mnt/test1/scratch/ -bash: cd: /mnt/test1/scratch: Device or resource busy The story I've gotten from the SELinux folks that I've talked to is that this is desirable behavior. In SELinux-land, mounting the same data under different contexts is wrong -- there can be only one. Cc: Steve Dickson <steved@redhat.com> Cc: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: Jeff Layton <jlayton@redhat.com> Acked-by: Eric Paris <eparis@redhat.com> Signed-off-by: James Morris <james.l.morris@oracle.com>
2013-04-01 20:14:24 +08:00
static int cap_sb_clone_mnt_opts(const struct super_block *oldsb,
struct super_block *newsb)
{
selinux: make security_sb_clone_mnt_opts return an error on context mismatch I had the following problem reported a while back. If you mount the same filesystem twice using NFSv4 with different contexts, then the second context= option is ignored. For instance: # mount server:/export /mnt/test1 # mount server:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0 # ls -dZ /mnt/test1 drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test1 # ls -dZ /mnt/test2 drwxrwxrwt. root root system_u:object_r:nfs_t:s0 /mnt/test2 When we call into SELinux to set the context of a "cloned" superblock, it will currently just bail out when it notices that we're reusing an existing superblock. Since the existing superblock is already set up and presumably in use, we can't go overwriting its context with the one from the "original" sb. Because of this, the second context= option in this case cannot take effect. This patch fixes this by turning security_sb_clone_mnt_opts into an int return operation. When it finds that the "new" superblock that it has been handed is already set up, it checks to see whether the contexts on the old superblock match it. If it does, then it will just return success, otherwise it'll return -EBUSY and emit a printk to tell the admin why the second mount failed. Note that this patch may cause casualties. The NFSv4 code relies on being able to walk down to an export from the pseudoroot. If you mount filesystems that are nested within one another with different contexts, then this patch will make those mounts fail in new and "exciting" ways. For instance, suppose that /export is a separate filesystem on the server: # mount server:/ /mnt/test1 # mount salusa:/export /mnt/test2 -o context=system_u:object_r:tmp_t:s0 mount.nfs: an incorrect mount option was specified ...with the printk in the ring buffer. Because we *might* eventually walk down to /mnt/test1/export, the mount is denied due to this patch. The second mount needs the pseudoroot superblock, but that's already present with the wrong context. OTOH, if we mount these in the reverse order, then both mounts work, because the pseudoroot superblock created when mounting /export is discarded once that mount is done. If we then however try to walk into that directory, the automount fails for the similar reasons: # cd /mnt/test1/scratch/ -bash: cd: /mnt/test1/scratch: Device or resource busy The story I've gotten from the SELinux folks that I've talked to is that this is desirable behavior. In SELinux-land, mounting the same data under different contexts is wrong -- there can be only one. Cc: Steve Dickson <steved@redhat.com> Cc: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: Jeff Layton <jlayton@redhat.com> Acked-by: Eric Paris <eparis@redhat.com> Signed-off-by: James Morris <james.l.morris@oracle.com>
2013-04-01 20:14:24 +08:00
return 0;
}
static int cap_sb_parse_opts_str(char *options, struct security_mnt_opts *opts)
{
return 0;
}
static int cap_dentry_init_security(struct dentry *dentry, int mode,
struct qstr *name, void **ctx,
u32 *ctxlen)
{
return -EOPNOTSUPP;
}
static int cap_inode_alloc_security(struct inode *inode)
{
return 0;
}
static void cap_inode_free_security(struct inode *inode)
{
}
static int cap_inode_init_security(struct inode *inode, struct inode *dir,
const struct qstr *qstr, const char **name,
void **value, size_t *len)
{
return -EOPNOTSUPP;
}
static int cap_inode_create(struct inode *inode, struct dentry *dentry,
umode_t mask)
{
return 0;
}
static int cap_inode_link(struct dentry *old_dentry, struct inode *inode,
struct dentry *new_dentry)
{
return 0;
}
static int cap_inode_unlink(struct inode *inode, struct dentry *dentry)
{
return 0;
}
static int cap_inode_symlink(struct inode *inode, struct dentry *dentry,
const char *name)
{
return 0;
}
static int cap_inode_mkdir(struct inode *inode, struct dentry *dentry,
umode_t mask)
{
return 0;
}
static int cap_inode_rmdir(struct inode *inode, struct dentry *dentry)
{
return 0;
}
static int cap_inode_mknod(struct inode *inode, struct dentry *dentry,
umode_t mode, dev_t dev)
{
return 0;
}
static int cap_inode_rename(struct inode *old_inode, struct dentry *old_dentry,
struct inode *new_inode, struct dentry *new_dentry)
{
return 0;
}
static int cap_inode_readlink(struct dentry *dentry)
{
return 0;
}
static int cap_inode_follow_link(struct dentry *dentry,
struct nameidata *nameidata)
{
return 0;
}
static int cap_inode_permission(struct inode *inode, int mask)
{
return 0;
}
static int cap_inode_setattr(struct dentry *dentry, struct iattr *iattr)
{
return 0;
}
static int cap_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
{
return 0;
}
static void cap_inode_post_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
}
static int cap_inode_getxattr(struct dentry *dentry, const char *name)
{
return 0;
}
static int cap_inode_listxattr(struct dentry *dentry)
{
return 0;
}
static int cap_inode_getsecurity(const struct inode *inode, const char *name,
void **buffer, bool alloc)
{
return -EOPNOTSUPP;
}
static int cap_inode_setsecurity(struct inode *inode, const char *name,
const void *value, size_t size, int flags)
{
return -EOPNOTSUPP;
}
static int cap_inode_listsecurity(struct inode *inode, char *buffer,
size_t buffer_size)
{
return 0;
}
static void cap_inode_getsecid(const struct inode *inode, u32 *secid)
{
*secid = 0;
}
#ifdef CONFIG_SECURITY_PATH
static int cap_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode,
unsigned int dev)
{
return 0;
}
static int cap_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode)
{
return 0;
}
static int cap_path_rmdir(struct path *dir, struct dentry *dentry)
{
return 0;
}
static int cap_path_unlink(struct path *dir, struct dentry *dentry)
{
return 0;
}
static int cap_path_symlink(struct path *dir, struct dentry *dentry,
const char *old_name)
{
return 0;
}
static int cap_path_link(struct dentry *old_dentry, struct path *new_dir,
struct dentry *new_dentry)
{
return 0;
}
static int cap_path_rename(struct path *old_path, struct dentry *old_dentry,
struct path *new_path, struct dentry *new_dentry)
{
return 0;
}
static int cap_path_truncate(struct path *path)
{
return 0;
}
static int cap_path_chmod(struct path *path, umode_t mode)
{
return 0;
}
static int cap_path_chown(struct path *path, kuid_t uid, kgid_t gid)
{
return 0;
}
static int cap_path_chroot(struct path *root)
{
return 0;
}
#endif
static int cap_file_permission(struct file *file, int mask)
{
return 0;
}
static int cap_file_alloc_security(struct file *file)
{
return 0;
}
static void cap_file_free_security(struct file *file)
{
}
static int cap_file_ioctl(struct file *file, unsigned int command,
unsigned long arg)
{
return 0;
}
static int cap_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
unsigned long prot)
{
return 0;
}
static int cap_file_lock(struct file *file, unsigned int cmd)
{
return 0;
}
static int cap_file_fcntl(struct file *file, unsigned int cmd,
unsigned long arg)
{
return 0;
}
static int cap_file_set_fowner(struct file *file)
{
return 0;
}
static int cap_file_send_sigiotask(struct task_struct *tsk,
struct fown_struct *fown, int sig)
{
return 0;
}
static int cap_file_receive(struct file *file)
{
return 0;
}
static int cap_file_open(struct file *file, const struct cred *cred)
{
return 0;
}
static int cap_task_create(unsigned long clone_flags)
{
return 0;
}
static void cap_task_free(struct task_struct *task)
{
}
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
static int cap_cred_alloc_blank(struct cred *cred, gfp_t gfp)
{
return 0;
}
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
static void cap_cred_free(struct cred *cred)
{
}
static int cap_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp)
{
return 0;
}
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
static void cap_cred_transfer(struct cred *new, const struct cred *old)
{
}
CRED: Allow kernel services to override LSM settings for task actions Allow kernel services to override LSM settings appropriate to the actions performed by a task by duplicating a set of credentials, modifying it and then using task_struct::cred to point to it when performing operations on behalf of a task. This is used, for example, by CacheFiles which has to transparently access the cache on behalf of a process that thinks it is doing, say, NFS accesses with a potentially inappropriate (with respect to accessing the cache) set of credentials. This patch provides two LSM hooks for modifying a task security record: (*) security_kernel_act_as() which allows modification of the security datum with which a task acts on other objects (most notably files). (*) security_kernel_create_files_as() which allows modification of the security datum that is used to initialise the security data on a file that a task creates. The patch also provides four new credentials handling functions, which wrap the LSM functions: (1) prepare_kernel_cred() Prepare a set of credentials for a kernel service to use, based either on a daemon's credentials or on init_cred. All the keyrings are cleared. (2) set_security_override() Set the LSM security ID in a set of credentials to a specific security context, assuming permission from the LSM policy. (3) set_security_override_from_ctx() As (2), but takes the security context as a string. (4) set_create_files_as() Set the file creation LSM security ID in a set of credentials to be the same as that on a particular inode. Signed-off-by: Casey Schaufler <casey@schaufler-ca.com> [Smack changes] Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:28 +08:00
static int cap_kernel_act_as(struct cred *new, u32 secid)
{
return 0;
}
static int cap_kernel_create_files_as(struct cred *new, struct inode *inode)
{
return 0;
}
static int cap_kernel_fw_from_file(struct file *file, char *buf, size_t size)
{
return 0;
}
static int cap_kernel_module_request(char *kmod_name)
{
return 0;
}
static int cap_kernel_module_from_file(struct file *file)
{
return 0;
}
static int cap_task_setpgid(struct task_struct *p, pid_t pgid)
{
return 0;
}
static int cap_task_getpgid(struct task_struct *p)
{
return 0;
}
static int cap_task_getsid(struct task_struct *p)
{
return 0;
}
static void cap_task_getsecid(struct task_struct *p, u32 *secid)
{
*secid = 0;
}
static int cap_task_getioprio(struct task_struct *p)
{
return 0;
}
static int cap_task_setrlimit(struct task_struct *p, unsigned int resource,
struct rlimit *new_rlim)
{
return 0;
}
static int cap_task_getscheduler(struct task_struct *p)
{
return 0;
}
static int cap_task_movememory(struct task_struct *p)
{
return 0;
}
static int cap_task_wait(struct task_struct *p)
{
return 0;
}
static int cap_task_kill(struct task_struct *p, struct siginfo *info,
int sig, u32 secid)
{
return 0;
}
static void cap_task_to_inode(struct task_struct *p, struct inode *inode)
{
}
static int cap_ipc_permission(struct kern_ipc_perm *ipcp, short flag)
{
return 0;
}
static void cap_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid)
{
*secid = 0;
}
static int cap_msg_msg_alloc_security(struct msg_msg *msg)
{
return 0;
}
static void cap_msg_msg_free_security(struct msg_msg *msg)
{
}
static int cap_msg_queue_alloc_security(struct msg_queue *msq)
{
return 0;
}
static void cap_msg_queue_free_security(struct msg_queue *msq)
{
}
static int cap_msg_queue_associate(struct msg_queue *msq, int msqflg)
{
return 0;
}
static int cap_msg_queue_msgctl(struct msg_queue *msq, int cmd)
{
return 0;
}
static int cap_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
int msgflg)
{
return 0;
}
static int cap_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
struct task_struct *target, long type, int mode)
{
return 0;
}
static int cap_shm_alloc_security(struct shmid_kernel *shp)
{
return 0;
}
static void cap_shm_free_security(struct shmid_kernel *shp)
{
}
static int cap_shm_associate(struct shmid_kernel *shp, int shmflg)
{
return 0;
}
static int cap_shm_shmctl(struct shmid_kernel *shp, int cmd)
{
return 0;
}
static int cap_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr,
int shmflg)
{
return 0;
}
static int cap_sem_alloc_security(struct sem_array *sma)
{
return 0;
}
static void cap_sem_free_security(struct sem_array *sma)
{
}
static int cap_sem_associate(struct sem_array *sma, int semflg)
{
return 0;
}
static int cap_sem_semctl(struct sem_array *sma, int cmd)
{
return 0;
}
static int cap_sem_semop(struct sem_array *sma, struct sembuf *sops,
unsigned nsops, int alter)
{
return 0;
}
#ifdef CONFIG_SECURITY_NETWORK
static int cap_unix_stream_connect(struct sock *sock, struct sock *other,
struct sock *newsk)
{
return 0;
}
static int cap_unix_may_send(struct socket *sock, struct socket *other)
{
return 0;
}
static int cap_socket_create(int family, int type, int protocol, int kern)
{
return 0;
}
static int cap_socket_post_create(struct socket *sock, int family, int type,
int protocol, int kern)
{
return 0;
}
static int cap_socket_bind(struct socket *sock, struct sockaddr *address,
int addrlen)
{
return 0;
}
static int cap_socket_connect(struct socket *sock, struct sockaddr *address,
int addrlen)
{
return 0;
}
static int cap_socket_listen(struct socket *sock, int backlog)
{
return 0;
}
static int cap_socket_accept(struct socket *sock, struct socket *newsock)
{
return 0;
}
static int cap_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size)
{
return 0;
}
static int cap_socket_recvmsg(struct socket *sock, struct msghdr *msg,
int size, int flags)
{
return 0;
}
static int cap_socket_getsockname(struct socket *sock)
{
return 0;
}
static int cap_socket_getpeername(struct socket *sock)
{
return 0;
}
static int cap_socket_setsockopt(struct socket *sock, int level, int optname)
{
return 0;
}
static int cap_socket_getsockopt(struct socket *sock, int level, int optname)
{
return 0;
}
static int cap_socket_shutdown(struct socket *sock, int how)
{
return 0;
}
static int cap_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
return 0;
}
static int cap_socket_getpeersec_stream(struct socket *sock,
char __user *optval,
int __user *optlen, unsigned len)
{
return -ENOPROTOOPT;
}
static int cap_socket_getpeersec_dgram(struct socket *sock,
struct sk_buff *skb, u32 *secid)
{
return -ENOPROTOOPT;
}
static int cap_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
{
return 0;
}
static void cap_sk_free_security(struct sock *sk)
{
}
static void cap_sk_clone_security(const struct sock *sk, struct sock *newsk)
{
}
static void cap_sk_getsecid(struct sock *sk, u32 *secid)
{
}
static void cap_sock_graft(struct sock *sk, struct socket *parent)
{
}
static int cap_inet_conn_request(struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
return 0;
}
static void cap_inet_csk_clone(struct sock *newsk,
const struct request_sock *req)
{
}
static void cap_inet_conn_established(struct sock *sk, struct sk_buff *skb)
{
}
static int cap_secmark_relabel_packet(u32 secid)
{
return 0;
}
static void cap_secmark_refcount_inc(void)
{
}
static void cap_secmark_refcount_dec(void)
{
}
static void cap_req_classify_flow(const struct request_sock *req,
struct flowi *fl)
{
}
tun: fix LSM/SELinux labeling of tun/tap devices This patch corrects some problems with LSM/SELinux that were introduced with the multiqueue patchset. The problem stems from the fact that the multiqueue work changed the relationship between the tun device and its associated socket; before the socket persisted for the life of the device, however after the multiqueue changes the socket only persisted for the life of the userspace connection (fd open). For non-persistent devices this is not an issue, but for persistent devices this can cause the tun device to lose its SELinux label. We correct this problem by adding an opaque LSM security blob to the tun device struct which allows us to have the LSM security state, e.g. SELinux labeling information, persist for the lifetime of the tun device. In the process we tweak the LSM hooks to work with this new approach to TUN device/socket labeling and introduce a new LSM hook, security_tun_dev_attach_queue(), to approve requests to attach to a TUN queue via TUNSETQUEUE. The SELinux code has been adjusted to match the new LSM hooks, the other LSMs do not make use of the LSM TUN controls. This patch makes use of the recently added "tun_socket:attach_queue" permission to restrict access to the TUNSETQUEUE operation. On older SELinux policies which do not define the "tun_socket:attach_queue" permission the access control decision for TUNSETQUEUE will be handled according to the SELinux policy's unknown permission setting. Signed-off-by: Paul Moore <pmoore@redhat.com> Acked-by: Eric Paris <eparis@parisplace.org> Tested-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-01-14 15:12:19 +08:00
static int cap_tun_dev_alloc_security(void **security)
{
return 0;
}
static void cap_tun_dev_free_security(void *security)
{
}
static int cap_tun_dev_create(void)
{
return 0;
}
tun: fix LSM/SELinux labeling of tun/tap devices This patch corrects some problems with LSM/SELinux that were introduced with the multiqueue patchset. The problem stems from the fact that the multiqueue work changed the relationship between the tun device and its associated socket; before the socket persisted for the life of the device, however after the multiqueue changes the socket only persisted for the life of the userspace connection (fd open). For non-persistent devices this is not an issue, but for persistent devices this can cause the tun device to lose its SELinux label. We correct this problem by adding an opaque LSM security blob to the tun device struct which allows us to have the LSM security state, e.g. SELinux labeling information, persist for the lifetime of the tun device. In the process we tweak the LSM hooks to work with this new approach to TUN device/socket labeling and introduce a new LSM hook, security_tun_dev_attach_queue(), to approve requests to attach to a TUN queue via TUNSETQUEUE. The SELinux code has been adjusted to match the new LSM hooks, the other LSMs do not make use of the LSM TUN controls. This patch makes use of the recently added "tun_socket:attach_queue" permission to restrict access to the TUNSETQUEUE operation. On older SELinux policies which do not define the "tun_socket:attach_queue" permission the access control decision for TUNSETQUEUE will be handled according to the SELinux policy's unknown permission setting. Signed-off-by: Paul Moore <pmoore@redhat.com> Acked-by: Eric Paris <eparis@parisplace.org> Tested-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-01-14 15:12:19 +08:00
static int cap_tun_dev_attach_queue(void *security)
{
return 0;
}
static int cap_tun_dev_attach(struct sock *sk, void *security)
{
tun: fix LSM/SELinux labeling of tun/tap devices This patch corrects some problems with LSM/SELinux that were introduced with the multiqueue patchset. The problem stems from the fact that the multiqueue work changed the relationship between the tun device and its associated socket; before the socket persisted for the life of the device, however after the multiqueue changes the socket only persisted for the life of the userspace connection (fd open). For non-persistent devices this is not an issue, but for persistent devices this can cause the tun device to lose its SELinux label. We correct this problem by adding an opaque LSM security blob to the tun device struct which allows us to have the LSM security state, e.g. SELinux labeling information, persist for the lifetime of the tun device. In the process we tweak the LSM hooks to work with this new approach to TUN device/socket labeling and introduce a new LSM hook, security_tun_dev_attach_queue(), to approve requests to attach to a TUN queue via TUNSETQUEUE. The SELinux code has been adjusted to match the new LSM hooks, the other LSMs do not make use of the LSM TUN controls. This patch makes use of the recently added "tun_socket:attach_queue" permission to restrict access to the TUNSETQUEUE operation. On older SELinux policies which do not define the "tun_socket:attach_queue" permission the access control decision for TUNSETQUEUE will be handled according to the SELinux policy's unknown permission setting. Signed-off-by: Paul Moore <pmoore@redhat.com> Acked-by: Eric Paris <eparis@parisplace.org> Tested-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-01-14 15:12:19 +08:00
return 0;
}
tun: fix LSM/SELinux labeling of tun/tap devices This patch corrects some problems with LSM/SELinux that were introduced with the multiqueue patchset. The problem stems from the fact that the multiqueue work changed the relationship between the tun device and its associated socket; before the socket persisted for the life of the device, however after the multiqueue changes the socket only persisted for the life of the userspace connection (fd open). For non-persistent devices this is not an issue, but for persistent devices this can cause the tun device to lose its SELinux label. We correct this problem by adding an opaque LSM security blob to the tun device struct which allows us to have the LSM security state, e.g. SELinux labeling information, persist for the lifetime of the tun device. In the process we tweak the LSM hooks to work with this new approach to TUN device/socket labeling and introduce a new LSM hook, security_tun_dev_attach_queue(), to approve requests to attach to a TUN queue via TUNSETQUEUE. The SELinux code has been adjusted to match the new LSM hooks, the other LSMs do not make use of the LSM TUN controls. This patch makes use of the recently added "tun_socket:attach_queue" permission to restrict access to the TUNSETQUEUE operation. On older SELinux policies which do not define the "tun_socket:attach_queue" permission the access control decision for TUNSETQUEUE will be handled according to the SELinux policy's unknown permission setting. Signed-off-by: Paul Moore <pmoore@redhat.com> Acked-by: Eric Paris <eparis@parisplace.org> Tested-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-01-14 15:12:19 +08:00
static int cap_tun_dev_open(void *security)
{
return 0;
}
static void cap_skb_owned_by(struct sk_buff *skb, struct sock *sk)
{
}
#endif /* CONFIG_SECURITY_NETWORK */
#ifdef CONFIG_SECURITY_NETWORK_XFRM
static int cap_xfrm_policy_alloc_security(struct xfrm_sec_ctx **ctxp,
selinux: add gfp argument to security_xfrm_policy_alloc and fix callers security_xfrm_policy_alloc can be called in atomic context so the allocation should be done with GFP_ATOMIC. Add an argument to let the callers choose the appropriate way. In order to do so a gfp argument needs to be added to the method xfrm_policy_alloc_security in struct security_operations and to the internal function selinux_xfrm_alloc_user. After that switch to GFP_ATOMIC in the atomic callers and leave GFP_KERNEL as before for the rest. The path that needed the gfp argument addition is: security_xfrm_policy_alloc -> security_ops.xfrm_policy_alloc_security -> all users of xfrm_policy_alloc_security (e.g. selinux_xfrm_policy_alloc) -> selinux_xfrm_alloc_user (here the allocation used to be GFP_KERNEL only) Now adding a gfp argument to selinux_xfrm_alloc_user requires us to also add it to security_context_to_sid which is used inside and prior to this patch did only GFP_KERNEL allocation. So add gfp argument to security_context_to_sid and adjust all of its callers as well. CC: Paul Moore <paul@paul-moore.com> CC: Dave Jones <davej@redhat.com> CC: Steffen Klassert <steffen.klassert@secunet.com> CC: Fan Du <fan.du@windriver.com> CC: David S. Miller <davem@davemloft.net> CC: LSM list <linux-security-module@vger.kernel.org> CC: SELinux list <selinux@tycho.nsa.gov> Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Acked-by: Paul Moore <paul@paul-moore.com> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2014-03-07 19:44:19 +08:00
struct xfrm_user_sec_ctx *sec_ctx,
gfp_t gfp)
{
return 0;
}
static int cap_xfrm_policy_clone_security(struct xfrm_sec_ctx *old_ctx,
struct xfrm_sec_ctx **new_ctxp)
{
return 0;
}
static void cap_xfrm_policy_free_security(struct xfrm_sec_ctx *ctx)
{
}
static int cap_xfrm_policy_delete_security(struct xfrm_sec_ctx *ctx)
{
return 0;
}
static int cap_xfrm_state_alloc(struct xfrm_state *x,
struct xfrm_user_sec_ctx *sec_ctx)
{
return 0;
}
static int cap_xfrm_state_alloc_acquire(struct xfrm_state *x,
struct xfrm_sec_ctx *polsec,
u32 secid)
{
return 0;
}
static void cap_xfrm_state_free_security(struct xfrm_state *x)
{
}
static int cap_xfrm_state_delete_security(struct xfrm_state *x)
{
return 0;
}
static int cap_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 sk_sid, u8 dir)
{
return 0;
}
static int cap_xfrm_state_pol_flow_match(struct xfrm_state *x,
struct xfrm_policy *xp,
const struct flowi *fl)
{
return 1;
}
static int cap_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall)
{
return 0;
}
#endif /* CONFIG_SECURITY_NETWORK_XFRM */
static void cap_d_instantiate(struct dentry *dentry, struct inode *inode)
{
}
static int cap_getprocattr(struct task_struct *p, char *name, char **value)
{
return -EINVAL;
}
static int cap_setprocattr(struct task_struct *p, char *name, void *value,
size_t size)
{
return -EINVAL;
}
static int cap_ismaclabel(const char *name)
{
return 0;
}
static int cap_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
{
return -EOPNOTSUPP;
}
static int cap_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid)
{
*secid = 0;
return 0;
}
static void cap_release_secctx(char *secdata, u32 seclen)
{
}
LSM/SELinux: inode_{get,set,notify}secctx hooks to access LSM security context information. This patch introduces three new hooks. The inode_getsecctx hook is used to get all relevant information from an LSM about an inode. The inode_setsecctx is used to set both the in-core and on-disk state for the inode based on a context derived from inode_getsecctx.The final hook inode_notifysecctx will notify the LSM of a change for the in-core state of the inode in question. These hooks are for use in the labeled NFS code and addresses concerns of how to set security on an inode in a multi-xattr LSM. For historical reasons Stephen Smalley's explanation of the reason for these hooks is pasted below. Quote Stephen Smalley inode_setsecctx: Change the security context of an inode. Updates the in core security context managed by the security module and invokes the fs code as needed (via __vfs_setxattr_noperm) to update any backing xattrs that represent the context. Example usage: NFS server invokes this hook to change the security context in its incore inode and on the backing file system to a value provided by the client on a SETATTR operation. inode_notifysecctx: Notify the security module of what the security context of an inode should be. Initializes the incore security context managed by the security module for this inode. Example usage: NFS client invokes this hook to initialize the security context in its incore inode to the value provided by the server for the file when the server returned the file's attributes to the client. Signed-off-by: David P. Quigley <dpquigl@tycho.nsa.gov> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-04 02:25:57 +08:00
static int cap_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
{
return 0;
}
static int cap_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
{
return 0;
}
static int cap_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
{
return -EOPNOTSUPP;
LSM/SELinux: inode_{get,set,notify}secctx hooks to access LSM security context information. This patch introduces three new hooks. The inode_getsecctx hook is used to get all relevant information from an LSM about an inode. The inode_setsecctx is used to set both the in-core and on-disk state for the inode based on a context derived from inode_getsecctx.The final hook inode_notifysecctx will notify the LSM of a change for the in-core state of the inode in question. These hooks are for use in the labeled NFS code and addresses concerns of how to set security on an inode in a multi-xattr LSM. For historical reasons Stephen Smalley's explanation of the reason for these hooks is pasted below. Quote Stephen Smalley inode_setsecctx: Change the security context of an inode. Updates the in core security context managed by the security module and invokes the fs code as needed (via __vfs_setxattr_noperm) to update any backing xattrs that represent the context. Example usage: NFS server invokes this hook to change the security context in its incore inode and on the backing file system to a value provided by the client on a SETATTR operation. inode_notifysecctx: Notify the security module of what the security context of an inode should be. Initializes the incore security context managed by the security module for this inode. Example usage: NFS client invokes this hook to initialize the security context in its incore inode to the value provided by the server for the file when the server returned the file's attributes to the client. Signed-off-by: David P. Quigley <dpquigl@tycho.nsa.gov> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-04 02:25:57 +08:00
}
#ifdef CONFIG_KEYS
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
static int cap_key_alloc(struct key *key, const struct cred *cred,
unsigned long flags)
{
return 0;
}
static void cap_key_free(struct key *key)
{
}
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
static int cap_key_permission(key_ref_t key_ref, const struct cred *cred,
unsigned perm)
{
return 0;
}
static int cap_key_getsecurity(struct key *key, char **_buffer)
{
*_buffer = NULL;
return 0;
}
#endif /* CONFIG_KEYS */
#ifdef CONFIG_AUDIT
static int cap_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule)
{
return 0;
}
static int cap_audit_rule_known(struct audit_krule *krule)
{
return 0;
}
static int cap_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule,
struct audit_context *actx)
{
return 0;
}
static void cap_audit_rule_free(void *lsmrule)
{
}
#endif /* CONFIG_AUDIT */
#define set_to_cap_if_null(ops, function) \
do { \
if (!ops->function) { \
ops->function = cap_##function; \
pr_debug("Had to override the " #function \
" security operation with the default.\n");\
} \
} while (0)
void __init security_fixup_ops(struct security_operations *ops)
{
set_to_cap_if_null(ops, ptrace_access_check);
security: Fix setting of PF_SUPERPRIV by __capable() Fix the setting of PF_SUPERPRIV by __capable() as it could corrupt the flags the target process if that is not the current process and it is trying to change its own flags in a different way at the same time. __capable() is using neither atomic ops nor locking to protect t->flags. This patch removes __capable() and introduces has_capability() that doesn't set PF_SUPERPRIV on the process being queried. This patch further splits security_ptrace() in two: (1) security_ptrace_may_access(). This passes judgement on whether one process may access another only (PTRACE_MODE_ATTACH for ptrace() and PTRACE_MODE_READ for /proc), and takes a pointer to the child process. current is the parent. (2) security_ptrace_traceme(). This passes judgement on PTRACE_TRACEME only, and takes only a pointer to the parent process. current is the child. In Smack and commoncap, this uses has_capability() to determine whether the parent will be permitted to use PTRACE_ATTACH if normal checks fail. This does not set PF_SUPERPRIV. Two of the instances of __capable() actually only act on current, and so have been changed to calls to capable(). Of the places that were using __capable(): (1) The OOM killer calls __capable() thrice when weighing the killability of a process. All of these now use has_capability(). (2) cap_ptrace() and smack_ptrace() were using __capable() to check to see whether the parent was allowed to trace any process. As mentioned above, these have been split. For PTRACE_ATTACH and /proc, capable() is now used, and for PTRACE_TRACEME, has_capability() is used. (3) cap_safe_nice() only ever saw current, so now uses capable(). (4) smack_setprocattr() rejected accesses to tasks other than current just after calling __capable(), so the order of these two tests have been switched and capable() is used instead. (5) In smack_file_send_sigiotask(), we need to allow privileged processes to receive SIGIO on files they're manipulating. (6) In smack_task_wait(), we let a process wait for a privileged process, whether or not the process doing the waiting is privileged. I've tested this with the LTP SELinux and syscalls testscripts. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Serge Hallyn <serue@us.ibm.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Andrew G. Morgan <morgan@kernel.org> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: James Morris <jmorris@namei.org>
2008-08-14 18:37:28 +08:00
set_to_cap_if_null(ops, ptrace_traceme);
set_to_cap_if_null(ops, capget);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
set_to_cap_if_null(ops, capset);
set_to_cap_if_null(ops, capable);
set_to_cap_if_null(ops, quotactl);
set_to_cap_if_null(ops, quota_on);
set_to_cap_if_null(ops, syslog);
set_to_cap_if_null(ops, settime);
set_to_cap_if_null(ops, vm_enough_memory);
CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:24 +08:00
set_to_cap_if_null(ops, bprm_set_creds);
set_to_cap_if_null(ops, bprm_committing_creds);
set_to_cap_if_null(ops, bprm_committed_creds);
set_to_cap_if_null(ops, bprm_check_security);
set_to_cap_if_null(ops, bprm_secureexec);
set_to_cap_if_null(ops, sb_alloc_security);
set_to_cap_if_null(ops, sb_free_security);
set_to_cap_if_null(ops, sb_copy_data);
set_to_cap_if_null(ops, sb_remount);
set_to_cap_if_null(ops, sb_kern_mount);
set_to_cap_if_null(ops, sb_show_options);
set_to_cap_if_null(ops, sb_statfs);
set_to_cap_if_null(ops, sb_mount);
set_to_cap_if_null(ops, sb_umount);
set_to_cap_if_null(ops, sb_pivotroot);
set_to_cap_if_null(ops, sb_set_mnt_opts);
set_to_cap_if_null(ops, sb_clone_mnt_opts);
set_to_cap_if_null(ops, sb_parse_opts_str);
set_to_cap_if_null(ops, dentry_init_security);
set_to_cap_if_null(ops, inode_alloc_security);
set_to_cap_if_null(ops, inode_free_security);
set_to_cap_if_null(ops, inode_init_security);
set_to_cap_if_null(ops, inode_create);
set_to_cap_if_null(ops, inode_link);
set_to_cap_if_null(ops, inode_unlink);
set_to_cap_if_null(ops, inode_symlink);
set_to_cap_if_null(ops, inode_mkdir);
set_to_cap_if_null(ops, inode_rmdir);
set_to_cap_if_null(ops, inode_mknod);
set_to_cap_if_null(ops, inode_rename);
set_to_cap_if_null(ops, inode_readlink);
set_to_cap_if_null(ops, inode_follow_link);
set_to_cap_if_null(ops, inode_permission);
set_to_cap_if_null(ops, inode_setattr);
set_to_cap_if_null(ops, inode_getattr);
set_to_cap_if_null(ops, inode_setxattr);
set_to_cap_if_null(ops, inode_post_setxattr);
set_to_cap_if_null(ops, inode_getxattr);
set_to_cap_if_null(ops, inode_listxattr);
set_to_cap_if_null(ops, inode_removexattr);
set_to_cap_if_null(ops, inode_need_killpriv);
set_to_cap_if_null(ops, inode_killpriv);
set_to_cap_if_null(ops, inode_getsecurity);
set_to_cap_if_null(ops, inode_setsecurity);
set_to_cap_if_null(ops, inode_listsecurity);
set_to_cap_if_null(ops, inode_getsecid);
#ifdef CONFIG_SECURITY_PATH
set_to_cap_if_null(ops, path_mknod);
set_to_cap_if_null(ops, path_mkdir);
set_to_cap_if_null(ops, path_rmdir);
set_to_cap_if_null(ops, path_unlink);
set_to_cap_if_null(ops, path_symlink);
set_to_cap_if_null(ops, path_link);
set_to_cap_if_null(ops, path_rename);
set_to_cap_if_null(ops, path_truncate);
set_to_cap_if_null(ops, path_chmod);
set_to_cap_if_null(ops, path_chown);
set_to_cap_if_null(ops, path_chroot);
#endif
set_to_cap_if_null(ops, file_permission);
set_to_cap_if_null(ops, file_alloc_security);
set_to_cap_if_null(ops, file_free_security);
set_to_cap_if_null(ops, file_ioctl);
set_to_cap_if_null(ops, mmap_addr);
set_to_cap_if_null(ops, mmap_file);
set_to_cap_if_null(ops, file_mprotect);
set_to_cap_if_null(ops, file_lock);
set_to_cap_if_null(ops, file_fcntl);
set_to_cap_if_null(ops, file_set_fowner);
set_to_cap_if_null(ops, file_send_sigiotask);
set_to_cap_if_null(ops, file_receive);
set_to_cap_if_null(ops, file_open);
set_to_cap_if_null(ops, task_create);
set_to_cap_if_null(ops, task_free);
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
set_to_cap_if_null(ops, cred_alloc_blank);
set_to_cap_if_null(ops, cred_free);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
set_to_cap_if_null(ops, cred_prepare);
KEYS: Add a keyctl to install a process's session keyring on its parent [try #6] Add a keyctl to install a process's session keyring onto its parent. This replaces the parent's session keyring. Because the COW credential code does not permit one process to change another process's credentials directly, the change is deferred until userspace next starts executing again. Normally this will be after a wait*() syscall. To support this, three new security hooks have been provided: cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in the blank security creds and key_session_to_parent() - which asks the LSM if the process may replace its parent's session keyring. The replacement may only happen if the process has the same ownership details as its parent, and the process has LINK permission on the session keyring, and the session keyring is owned by the process, and the LSM permits it. Note that this requires alteration to each architecture's notify_resume path. This has been done for all arches barring blackfin, m68k* and xtensa, all of which need assembly alteration to support TIF_NOTIFY_RESUME. This allows the replacement to be performed at the point the parent process resumes userspace execution. This allows the userspace AFS pioctl emulation to fully emulate newpag() and the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to alter the parent process's PAG membership. However, since kAFS doesn't use PAGs per se, but rather dumps the keys into the session keyring, the session keyring of the parent must be replaced if, for example, VIOCSETTOK is passed the newpag flag. This can be tested with the following program: #include <stdio.h> #include <stdlib.h> #include <keyutils.h> #define KEYCTL_SESSION_TO_PARENT 18 #define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0) int main(int argc, char **argv) { key_serial_t keyring, key; long ret; keyring = keyctl_join_session_keyring(argv[1]); OSERROR(keyring, "keyctl_join_session_keyring"); key = add_key("user", "a", "b", 1, keyring); OSERROR(key, "add_key"); ret = keyctl(KEYCTL_SESSION_TO_PARENT); OSERROR(ret, "KEYCTL_SESSION_TO_PARENT"); return 0; } Compiled and linked with -lkeyutils, you should see something like: [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 355907932 --alswrv 4043 -1 \_ keyring: _uid.4043 [dhowells@andromeda ~]$ /tmp/newpag [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: _ses 1055658746 --alswrv 4043 4043 \_ user: a [dhowells@andromeda ~]$ /tmp/newpag hello [dhowells@andromeda ~]$ keyctl show Session Keyring -3 --alswrv 4043 4043 keyring: hello 340417692 --alswrv 4043 4043 \_ user: a Where the test program creates a new session keyring, sticks a user key named 'a' into it and then installs it on its parent. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-02 16:14:21 +08:00
set_to_cap_if_null(ops, cred_transfer);
CRED: Allow kernel services to override LSM settings for task actions Allow kernel services to override LSM settings appropriate to the actions performed by a task by duplicating a set of credentials, modifying it and then using task_struct::cred to point to it when performing operations on behalf of a task. This is used, for example, by CacheFiles which has to transparently access the cache on behalf of a process that thinks it is doing, say, NFS accesses with a potentially inappropriate (with respect to accessing the cache) set of credentials. This patch provides two LSM hooks for modifying a task security record: (*) security_kernel_act_as() which allows modification of the security datum with which a task acts on other objects (most notably files). (*) security_kernel_create_files_as() which allows modification of the security datum that is used to initialise the security data on a file that a task creates. The patch also provides four new credentials handling functions, which wrap the LSM functions: (1) prepare_kernel_cred() Prepare a set of credentials for a kernel service to use, based either on a daemon's credentials or on init_cred. All the keyrings are cleared. (2) set_security_override() Set the LSM security ID in a set of credentials to a specific security context, assuming permission from the LSM policy. (3) set_security_override_from_ctx() As (2), but takes the security context as a string. (4) set_create_files_as() Set the file creation LSM security ID in a set of credentials to be the same as that on a particular inode. Signed-off-by: Casey Schaufler <casey@schaufler-ca.com> [Smack changes] Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:28 +08:00
set_to_cap_if_null(ops, kernel_act_as);
set_to_cap_if_null(ops, kernel_create_files_as);
set_to_cap_if_null(ops, kernel_fw_from_file);
set_to_cap_if_null(ops, kernel_module_request);
set_to_cap_if_null(ops, kernel_module_from_file);
CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 07:39:23 +08:00
set_to_cap_if_null(ops, task_fix_setuid);
set_to_cap_if_null(ops, task_setpgid);
set_to_cap_if_null(ops, task_getpgid);
set_to_cap_if_null(ops, task_getsid);
set_to_cap_if_null(ops, task_getsecid);
set_to_cap_if_null(ops, task_setnice);
set_to_cap_if_null(ops, task_setioprio);
set_to_cap_if_null(ops, task_getioprio);
set_to_cap_if_null(ops, task_setrlimit);
set_to_cap_if_null(ops, task_setscheduler);
set_to_cap_if_null(ops, task_getscheduler);
set_to_cap_if_null(ops, task_movememory);
set_to_cap_if_null(ops, task_wait);
set_to_cap_if_null(ops, task_kill);
set_to_cap_if_null(ops, task_prctl);
set_to_cap_if_null(ops, task_to_inode);
set_to_cap_if_null(ops, ipc_permission);
set_to_cap_if_null(ops, ipc_getsecid);
set_to_cap_if_null(ops, msg_msg_alloc_security);
set_to_cap_if_null(ops, msg_msg_free_security);
set_to_cap_if_null(ops, msg_queue_alloc_security);
set_to_cap_if_null(ops, msg_queue_free_security);
set_to_cap_if_null(ops, msg_queue_associate);
set_to_cap_if_null(ops, msg_queue_msgctl);
set_to_cap_if_null(ops, msg_queue_msgsnd);
set_to_cap_if_null(ops, msg_queue_msgrcv);
set_to_cap_if_null(ops, shm_alloc_security);
set_to_cap_if_null(ops, shm_free_security);
set_to_cap_if_null(ops, shm_associate);
set_to_cap_if_null(ops, shm_shmctl);
set_to_cap_if_null(ops, shm_shmat);
set_to_cap_if_null(ops, sem_alloc_security);
set_to_cap_if_null(ops, sem_free_security);
set_to_cap_if_null(ops, sem_associate);
set_to_cap_if_null(ops, sem_semctl);
set_to_cap_if_null(ops, sem_semop);
set_to_cap_if_null(ops, netlink_send);
set_to_cap_if_null(ops, d_instantiate);
set_to_cap_if_null(ops, getprocattr);
set_to_cap_if_null(ops, setprocattr);
set_to_cap_if_null(ops, ismaclabel);
set_to_cap_if_null(ops, secid_to_secctx);
set_to_cap_if_null(ops, secctx_to_secid);
set_to_cap_if_null(ops, release_secctx);
LSM/SELinux: inode_{get,set,notify}secctx hooks to access LSM security context information. This patch introduces three new hooks. The inode_getsecctx hook is used to get all relevant information from an LSM about an inode. The inode_setsecctx is used to set both the in-core and on-disk state for the inode based on a context derived from inode_getsecctx.The final hook inode_notifysecctx will notify the LSM of a change for the in-core state of the inode in question. These hooks are for use in the labeled NFS code and addresses concerns of how to set security on an inode in a multi-xattr LSM. For historical reasons Stephen Smalley's explanation of the reason for these hooks is pasted below. Quote Stephen Smalley inode_setsecctx: Change the security context of an inode. Updates the in core security context managed by the security module and invokes the fs code as needed (via __vfs_setxattr_noperm) to update any backing xattrs that represent the context. Example usage: NFS server invokes this hook to change the security context in its incore inode and on the backing file system to a value provided by the client on a SETATTR operation. inode_notifysecctx: Notify the security module of what the security context of an inode should be. Initializes the incore security context managed by the security module for this inode. Example usage: NFS client invokes this hook to initialize the security context in its incore inode to the value provided by the server for the file when the server returned the file's attributes to the client. Signed-off-by: David P. Quigley <dpquigl@tycho.nsa.gov> Acked-by: Serge Hallyn <serue@us.ibm.com> Signed-off-by: James Morris <jmorris@namei.org>
2009-09-04 02:25:57 +08:00
set_to_cap_if_null(ops, inode_notifysecctx);
set_to_cap_if_null(ops, inode_setsecctx);
set_to_cap_if_null(ops, inode_getsecctx);
#ifdef CONFIG_SECURITY_NETWORK
set_to_cap_if_null(ops, unix_stream_connect);
set_to_cap_if_null(ops, unix_may_send);
set_to_cap_if_null(ops, socket_create);
set_to_cap_if_null(ops, socket_post_create);
set_to_cap_if_null(ops, socket_bind);
set_to_cap_if_null(ops, socket_connect);
set_to_cap_if_null(ops, socket_listen);
set_to_cap_if_null(ops, socket_accept);
set_to_cap_if_null(ops, socket_sendmsg);
set_to_cap_if_null(ops, socket_recvmsg);
set_to_cap_if_null(ops, socket_getsockname);
set_to_cap_if_null(ops, socket_getpeername);
set_to_cap_if_null(ops, socket_setsockopt);
set_to_cap_if_null(ops, socket_getsockopt);
set_to_cap_if_null(ops, socket_shutdown);
set_to_cap_if_null(ops, socket_sock_rcv_skb);
set_to_cap_if_null(ops, socket_getpeersec_stream);
set_to_cap_if_null(ops, socket_getpeersec_dgram);
set_to_cap_if_null(ops, sk_alloc_security);
set_to_cap_if_null(ops, sk_free_security);
set_to_cap_if_null(ops, sk_clone_security);
set_to_cap_if_null(ops, sk_getsecid);
set_to_cap_if_null(ops, sock_graft);
set_to_cap_if_null(ops, inet_conn_request);
set_to_cap_if_null(ops, inet_csk_clone);
set_to_cap_if_null(ops, inet_conn_established);
set_to_cap_if_null(ops, secmark_relabel_packet);
set_to_cap_if_null(ops, secmark_refcount_inc);
set_to_cap_if_null(ops, secmark_refcount_dec);
set_to_cap_if_null(ops, req_classify_flow);
tun: fix LSM/SELinux labeling of tun/tap devices This patch corrects some problems with LSM/SELinux that were introduced with the multiqueue patchset. The problem stems from the fact that the multiqueue work changed the relationship between the tun device and its associated socket; before the socket persisted for the life of the device, however after the multiqueue changes the socket only persisted for the life of the userspace connection (fd open). For non-persistent devices this is not an issue, but for persistent devices this can cause the tun device to lose its SELinux label. We correct this problem by adding an opaque LSM security blob to the tun device struct which allows us to have the LSM security state, e.g. SELinux labeling information, persist for the lifetime of the tun device. In the process we tweak the LSM hooks to work with this new approach to TUN device/socket labeling and introduce a new LSM hook, security_tun_dev_attach_queue(), to approve requests to attach to a TUN queue via TUNSETQUEUE. The SELinux code has been adjusted to match the new LSM hooks, the other LSMs do not make use of the LSM TUN controls. This patch makes use of the recently added "tun_socket:attach_queue" permission to restrict access to the TUNSETQUEUE operation. On older SELinux policies which do not define the "tun_socket:attach_queue" permission the access control decision for TUNSETQUEUE will be handled according to the SELinux policy's unknown permission setting. Signed-off-by: Paul Moore <pmoore@redhat.com> Acked-by: Eric Paris <eparis@parisplace.org> Tested-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-01-14 15:12:19 +08:00
set_to_cap_if_null(ops, tun_dev_alloc_security);
set_to_cap_if_null(ops, tun_dev_free_security);
set_to_cap_if_null(ops, tun_dev_create);
tun: fix LSM/SELinux labeling of tun/tap devices This patch corrects some problems with LSM/SELinux that were introduced with the multiqueue patchset. The problem stems from the fact that the multiqueue work changed the relationship between the tun device and its associated socket; before the socket persisted for the life of the device, however after the multiqueue changes the socket only persisted for the life of the userspace connection (fd open). For non-persistent devices this is not an issue, but for persistent devices this can cause the tun device to lose its SELinux label. We correct this problem by adding an opaque LSM security blob to the tun device struct which allows us to have the LSM security state, e.g. SELinux labeling information, persist for the lifetime of the tun device. In the process we tweak the LSM hooks to work with this new approach to TUN device/socket labeling and introduce a new LSM hook, security_tun_dev_attach_queue(), to approve requests to attach to a TUN queue via TUNSETQUEUE. The SELinux code has been adjusted to match the new LSM hooks, the other LSMs do not make use of the LSM TUN controls. This patch makes use of the recently added "tun_socket:attach_queue" permission to restrict access to the TUNSETQUEUE operation. On older SELinux policies which do not define the "tun_socket:attach_queue" permission the access control decision for TUNSETQUEUE will be handled according to the SELinux policy's unknown permission setting. Signed-off-by: Paul Moore <pmoore@redhat.com> Acked-by: Eric Paris <eparis@parisplace.org> Tested-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-01-14 15:12:19 +08:00
set_to_cap_if_null(ops, tun_dev_open);
set_to_cap_if_null(ops, tun_dev_attach_queue);
set_to_cap_if_null(ops, tun_dev_attach);
set_to_cap_if_null(ops, skb_owned_by);
#endif /* CONFIG_SECURITY_NETWORK */
#ifdef CONFIG_SECURITY_NETWORK_XFRM
set_to_cap_if_null(ops, xfrm_policy_alloc_security);
set_to_cap_if_null(ops, xfrm_policy_clone_security);
set_to_cap_if_null(ops, xfrm_policy_free_security);
set_to_cap_if_null(ops, xfrm_policy_delete_security);
set_to_cap_if_null(ops, xfrm_state_alloc);
set_to_cap_if_null(ops, xfrm_state_alloc_acquire);
set_to_cap_if_null(ops, xfrm_state_free_security);
set_to_cap_if_null(ops, xfrm_state_delete_security);
set_to_cap_if_null(ops, xfrm_policy_lookup);
set_to_cap_if_null(ops, xfrm_state_pol_flow_match);
set_to_cap_if_null(ops, xfrm_decode_session);
#endif /* CONFIG_SECURITY_NETWORK_XFRM */
#ifdef CONFIG_KEYS
set_to_cap_if_null(ops, key_alloc);
set_to_cap_if_null(ops, key_free);
set_to_cap_if_null(ops, key_permission);
set_to_cap_if_null(ops, key_getsecurity);
#endif /* CONFIG_KEYS */
#ifdef CONFIG_AUDIT
set_to_cap_if_null(ops, audit_rule_init);
set_to_cap_if_null(ops, audit_rule_known);
set_to_cap_if_null(ops, audit_rule_match);
set_to_cap_if_null(ops, audit_rule_free);
#endif
}