binfmt_misc for-linus on 20160727

First off, the intention of this pull is to declare that I'll be the
 binfmt_misc maintainer (mainly on the grounds of you touched it last,
 it's yours).  There's no MAINTAINERS entry, but get_maintainers.pl
 will now finger me.
 
 The update itself is to allow architecture emulation containers to
 function such that the emulation binary can be housed outside the
 container itself.  The container and fs parts both have acks from
 relevant experts.
 
 The change is user visible. To use the new feature you have to add an
 F option to your binfmt_misc configuration.  However, the existing
 tools, like systemd-binfmt work with this without modification.
 
 Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIcBAABAgAGBQJXmW5WAAoJEAVr7HOZEZN4K1QQAKgx5MPkoTU3QKKgzaMBBnWH
 pSMdoN8BhVSwENE/YJGMEyLaRa0zmrHVtFcnH2CHQE/GoXNnaej9l3LtBIwJ9K2P
 nrv4Rlhla5BxjhDkg8IWf3iG7iKDDHGZoyuVPx4dwxHFK1yCNH4SDeHaJCKK5qsC
 aLltMJMRnjsgJvBUC01dCUlp8srkWywHcyk9M9ic/Fr5vJ6JzdUr6/Md29eHmAXe
 NgCGwkVgSDiKfnTGZjIMsAtpwPsJ6RqBWQTcTdM/mkIpqwrMiVuaVOHqu2cmMU2i
 j4cQE6rQpy3sedDKZbHBQMOfYJNT4QYgYGuvyIWce9EPkIpOWHzQ7kYPJ/A/jZCE
 lN37TeyodbUDCnyuKk1YOrTBjJ0qdtc4FXJ1aq5s92GkgDs+LtxMdGzKDf3yUGiU
 W0TsE/wVy4rmEaeiyut33661ud4vivP4WklWK1Y+bklQcIcKQKKWnOCnDFDR5vuz
 CbL5ykVcJb3F28YhGYHvGLeXl0YcR3SwngWnnPCDPtBCeSirohuKb1SEe21C/RaB
 rm9S27d+LcKCXJyCqKh8BGsqroZ0iSZQI0Lbdqt+BCuuBw2rQhGStDeccDDUp9jg
 MOwpQwabjEseK0n75+hZ2SFS5Q+TQ6pccMlUJIDiBKWmRly8NpKlSKKWvBX8obIe
 0Gq6hgX1IwQnXI1O8QMC
 =6OjN
 -----END PGP SIGNATURE-----

Merge tag 'binfmt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/binfmt_misc

Pull binfmt_misc update from James Bottomley:
 "This update is to allow architecture emulation containers to function
  such that the emulation binary can be housed outside the container
  itself.  The container and fs parts both have acks from relevant
  experts.

  To use the new feature you have to add an F option to your binfmt_misc
  configuration"

From the docs:
 "The usual behaviour of binfmt_misc is to spawn the binary lazily when
  the misc format file is invoked.  However, this doesn't work very well
  in the face of mount namespaces and changeroots, so the F mode opens
  the binary as soon as the emulation is installed and uses the opened
  image to spawn the emulator, meaning it is always available once
  installed, regardless of how the environment changes"

* tag 'binfmt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/binfmt_misc:
  binfmt_misc: add F option description to documentation
  binfmt_misc: add persistent opened binary handler for containers
  fs: add filp_clone_open API
This commit is contained in:
Linus Torvalds 2016-08-07 10:13:14 -04:00
commit e9d488c311
4 changed files with 67 additions and 2 deletions

View File

@ -66,6 +66,13 @@ Here is what the fields mean:
This feature should be used with care as the interpreter This feature should be used with care as the interpreter
will run with root permissions when a setuid binary owned by root will run with root permissions when a setuid binary owned by root
is run with binfmt_misc. is run with binfmt_misc.
'F' - fix binary. The usual behaviour of binfmt_misc is to spawn the
binary lazily when the misc format file is invoked. However,
this doesn't work very well in the face of mount namespaces and
changeroots, so the F mode opens the binary as soon as the
emulation is installed and uses the opened image to spawn the
emulator, meaning it is always available once installed,
regardless of how the environment changes.
There are some restrictions: There are some restrictions:

View File

@ -26,6 +26,8 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include "internal.h"
#ifdef DEBUG #ifdef DEBUG
# define USE_DEBUG 1 # define USE_DEBUG 1
#else #else
@ -43,6 +45,7 @@ enum {Enabled, Magic};
#define MISC_FMT_PRESERVE_ARGV0 (1 << 31) #define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
#define MISC_FMT_OPEN_BINARY (1 << 30) #define MISC_FMT_OPEN_BINARY (1 << 30)
#define MISC_FMT_CREDENTIALS (1 << 29) #define MISC_FMT_CREDENTIALS (1 << 29)
#define MISC_FMT_OPEN_FILE (1 << 28)
typedef struct { typedef struct {
struct list_head list; struct list_head list;
@ -54,6 +57,7 @@ typedef struct {
char *interpreter; /* filename of interpreter */ char *interpreter; /* filename of interpreter */
char *name; char *name;
struct dentry *dentry; struct dentry *dentry;
struct file *interp_file;
} Node; } Node;
static DEFINE_RWLOCK(entries_lock); static DEFINE_RWLOCK(entries_lock);
@ -201,7 +205,13 @@ static int load_misc_binary(struct linux_binprm *bprm)
if (retval < 0) if (retval < 0)
goto error; goto error;
interp_file = open_exec(iname); if (fmt->flags & MISC_FMT_OPEN_FILE && fmt->interp_file) {
interp_file = filp_clone_open(fmt->interp_file);
if (!IS_ERR(interp_file))
deny_write_access(interp_file);
} else {
interp_file = open_exec(iname);
}
retval = PTR_ERR(interp_file); retval = PTR_ERR(interp_file);
if (IS_ERR(interp_file)) if (IS_ERR(interp_file))
goto error; goto error;
@ -285,6 +295,11 @@ static char *check_special_flags(char *sfs, Node *e)
e->flags |= (MISC_FMT_CREDENTIALS | e->flags |= (MISC_FMT_CREDENTIALS |
MISC_FMT_OPEN_BINARY); MISC_FMT_OPEN_BINARY);
break; break;
case 'F':
pr_debug("register: flag: F: open interpreter file now\n");
p++;
e->flags |= MISC_FMT_OPEN_FILE;
break;
default: default:
cont = 0; cont = 0;
} }
@ -543,6 +558,8 @@ static void entry_status(Node *e, char *page)
*dp++ = 'O'; *dp++ = 'O';
if (e->flags & MISC_FMT_CREDENTIALS) if (e->flags & MISC_FMT_CREDENTIALS)
*dp++ = 'C'; *dp++ = 'C';
if (e->flags & MISC_FMT_OPEN_FILE)
*dp++ = 'F';
*dp++ = '\n'; *dp++ = '\n';
if (!test_bit(Magic, &e->flags)) { if (!test_bit(Magic, &e->flags)) {
@ -590,6 +607,11 @@ static void kill_node(Node *e)
} }
write_unlock(&entries_lock); write_unlock(&entries_lock);
if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) {
filp_close(e->interp_file, NULL);
e->interp_file = NULL;
}
if (dentry) { if (dentry) {
drop_nlink(d_inode(dentry)); drop_nlink(d_inode(dentry));
d_drop(dentry); d_drop(dentry);
@ -696,6 +718,21 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
goto out2; goto out2;
} }
if (e->flags & MISC_FMT_OPEN_FILE) {
struct file *f;
f = open_exec(e->interpreter);
if (IS_ERR(f)) {
err = PTR_ERR(f);
pr_notice("register: failed to install interpreter file %s\n", e->interpreter);
simple_release_fs(&bm_mnt, &entry_count);
iput(inode);
inode = NULL;
goto out2;
}
e->interp_file = f;
}
e->dentry = dget(dentry); e->dentry = dget(dentry);
inode->i_private = e; inode->i_private = e;
inode->i_fop = &bm_entry_operations; inode->i_fop = &bm_entry_operations;
@ -713,7 +750,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
if (err) { if (err) {
kfree(e); kfree(e);
return -EINVAL; return err;
} }
return count; return count;
} }

View File

@ -111,6 +111,7 @@ extern long do_handle_open(int mountdirfd,
struct file_handle __user *ufh, int open_flag); struct file_handle __user *ufh, int open_flag);
extern int open_check_o_direct(struct file *f); extern int open_check_o_direct(struct file *f);
extern int vfs_open(const struct path *, struct file *, const struct cred *); extern int vfs_open(const struct path *, struct file *, const struct cred *);
extern struct file *filp_clone_open(struct file *);
/* /*
* inode.c * inode.c

View File

@ -998,6 +998,26 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
} }
EXPORT_SYMBOL(file_open_root); EXPORT_SYMBOL(file_open_root);
struct file *filp_clone_open(struct file *oldfile)
{
struct file *file;
int retval;
file = get_empty_filp();
if (IS_ERR(file))
return file;
file->f_flags = oldfile->f_flags;
retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred);
if (retval) {
put_filp(file);
return ERR_PTR(retval);
}
return file;
}
EXPORT_SYMBOL(filp_clone_open);
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{ {
struct open_flags op; struct open_flags op;