From 1bd6152ae23549032ef4aca0d3d350512f012f05 Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Tue, 28 Feb 2017 21:13:12 +0100 Subject: [PATCH 1/6] seccomp: changing from whitelist to blacklist This patch changes the default behavior of the seccomp filter from whitelist to blacklist. By default now all system calls are allowed and a small black list of definitely forbidden ones was created. Signed-off-by: Eduardo Otubo --- include/sysemu/seccomp.h | 2 + qemu-seccomp.c | 260 +++++---------------------------------- vl.c | 1 - 3 files changed, 30 insertions(+), 233 deletions(-) diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h index cfc06008cb..23b9c3c789 100644 --- a/include/sysemu/seccomp.h +++ b/include/sysemu/seccomp.h @@ -15,6 +15,8 @@ #ifndef QEMU_SECCOMP_H #define QEMU_SECCOMP_H +#define QEMU_SECCOMP_SET_DEFAULT (1 << 0) + #include int seccomp_start(void); diff --git a/qemu-seccomp.c b/qemu-seccomp.c index df75d9c471..f66613fc71 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -28,232 +28,33 @@ struct QemuSeccompSyscall { int32_t num; - uint8_t priority; + uint8_t set; }; -static const struct QemuSeccompSyscall seccomp_whitelist[] = { - { SCMP_SYS(timer_settime), 255 }, - { SCMP_SYS(timer_gettime), 254 }, - { SCMP_SYS(futex), 253 }, - { SCMP_SYS(select), 252 }, - { SCMP_SYS(recvfrom), 251 }, - { SCMP_SYS(sendto), 250 }, - { SCMP_SYS(socketcall), 250 }, - { SCMP_SYS(read), 249 }, - { SCMP_SYS(io_submit), 249 }, - { SCMP_SYS(brk), 248 }, - { SCMP_SYS(clone), 247 }, - { SCMP_SYS(mmap), 247 }, - { SCMP_SYS(mprotect), 246 }, - { SCMP_SYS(execve), 245 }, - { SCMP_SYS(open), 245 }, - { SCMP_SYS(ioctl), 245 }, - { SCMP_SYS(socket), 245 }, - { SCMP_SYS(setsockopt), 245 }, - { SCMP_SYS(recvmsg), 245 }, - { SCMP_SYS(sendmsg), 245 }, - { SCMP_SYS(accept), 245 }, - { SCMP_SYS(connect), 245 }, - { SCMP_SYS(socketpair), 245 }, - { SCMP_SYS(bind), 245 }, - { SCMP_SYS(listen), 245 }, - { SCMP_SYS(semget), 245 }, - { SCMP_SYS(ipc), 245 }, - { SCMP_SYS(gettimeofday), 245 }, - { SCMP_SYS(readlink), 245 }, - { SCMP_SYS(access), 245 }, - { SCMP_SYS(prctl), 245 }, - { SCMP_SYS(signalfd), 245 }, - { SCMP_SYS(getrlimit), 245 }, - { SCMP_SYS(getrusage), 245 }, - { SCMP_SYS(set_tid_address), 245 }, - { SCMP_SYS(statfs), 245 }, - { SCMP_SYS(unlink), 245 }, - { SCMP_SYS(wait4), 245 }, - { SCMP_SYS(fcntl64), 245 }, - { SCMP_SYS(fstat64), 245 }, - { SCMP_SYS(stat64), 245 }, - { SCMP_SYS(getgid32), 245 }, - { SCMP_SYS(getegid32), 245 }, - { SCMP_SYS(getuid32), 245 }, - { SCMP_SYS(geteuid32), 245 }, - { SCMP_SYS(sigreturn), 245 }, - { SCMP_SYS(_newselect), 245 }, - { SCMP_SYS(_llseek), 245 }, - { SCMP_SYS(mmap2), 245 }, - { SCMP_SYS(sigprocmask), 245 }, - { SCMP_SYS(sched_getparam), 245 }, - { SCMP_SYS(sched_getscheduler), 245 }, - { SCMP_SYS(fstat), 245 }, - { SCMP_SYS(clock_getres), 245 }, - { SCMP_SYS(sched_get_priority_min), 245 }, - { SCMP_SYS(sched_get_priority_max), 245 }, - { SCMP_SYS(stat), 245 }, - { SCMP_SYS(uname), 245 }, - { SCMP_SYS(eventfd2), 245 }, - { SCMP_SYS(io_getevents), 245 }, - { SCMP_SYS(dup), 245 }, - { SCMP_SYS(dup2), 245 }, - { SCMP_SYS(dup3), 245 }, - { SCMP_SYS(gettid), 245 }, - { SCMP_SYS(getgid), 245 }, - { SCMP_SYS(getegid), 245 }, - { SCMP_SYS(getuid), 245 }, - { SCMP_SYS(geteuid), 245 }, - { SCMP_SYS(timer_create), 245 }, - { SCMP_SYS(times), 245 }, - { SCMP_SYS(exit), 245 }, - { SCMP_SYS(clock_gettime), 245 }, - { SCMP_SYS(time), 245 }, - { SCMP_SYS(restart_syscall), 245 }, - { SCMP_SYS(pwrite64), 245 }, - { SCMP_SYS(nanosleep), 245 }, - { SCMP_SYS(chown), 245 }, - { SCMP_SYS(openat), 245 }, - { SCMP_SYS(getdents), 245 }, - { SCMP_SYS(timer_delete), 245 }, - { SCMP_SYS(exit_group), 245 }, - { SCMP_SYS(rt_sigreturn), 245 }, - { SCMP_SYS(sync), 245 }, - { SCMP_SYS(pread64), 245 }, - { SCMP_SYS(madvise), 245 }, - { SCMP_SYS(set_robust_list), 245 }, - { SCMP_SYS(lseek), 245 }, - { SCMP_SYS(pselect6), 245 }, - { SCMP_SYS(fork), 245 }, - { SCMP_SYS(rt_sigprocmask), 245 }, - { SCMP_SYS(write), 244 }, - { SCMP_SYS(fcntl), 243 }, - { SCMP_SYS(tgkill), 242 }, - { SCMP_SYS(kill), 242 }, - { SCMP_SYS(rt_sigaction), 242 }, - { SCMP_SYS(pipe2), 242 }, - { SCMP_SYS(munmap), 242 }, - { SCMP_SYS(mremap), 242 }, - { SCMP_SYS(fdatasync), 242 }, - { SCMP_SYS(close), 242 }, - { SCMP_SYS(rt_sigpending), 242 }, - { SCMP_SYS(rt_sigtimedwait), 242 }, - { SCMP_SYS(readv), 242 }, - { SCMP_SYS(writev), 242 }, - { SCMP_SYS(preadv), 242 }, - { SCMP_SYS(pwritev), 242 }, - { SCMP_SYS(setrlimit), 242 }, - { SCMP_SYS(ftruncate), 242 }, - { SCMP_SYS(lstat), 242 }, - { SCMP_SYS(pipe), 242 }, - { SCMP_SYS(umask), 242 }, - { SCMP_SYS(chdir), 242 }, - { SCMP_SYS(setitimer), 242 }, - { SCMP_SYS(setsid), 242 }, - { SCMP_SYS(poll), 242 }, - { SCMP_SYS(epoll_create), 242 }, - { SCMP_SYS(epoll_ctl), 242 }, - { SCMP_SYS(epoll_wait), 242 }, - { SCMP_SYS(waitpid), 242 }, - { SCMP_SYS(getsockname), 242 }, - { SCMP_SYS(getpeername), 242 }, - { SCMP_SYS(accept4), 242 }, - { SCMP_SYS(timerfd_settime), 242 }, - { SCMP_SYS(newfstatat), 241 }, - { SCMP_SYS(shutdown), 241 }, - { SCMP_SYS(getsockopt), 241 }, - { SCMP_SYS(semop), 241 }, - { SCMP_SYS(semtimedop), 241 }, - { SCMP_SYS(epoll_ctl_old), 241 }, - { SCMP_SYS(epoll_wait_old), 241 }, - { SCMP_SYS(epoll_pwait), 241 }, - { SCMP_SYS(epoll_create1), 241 }, - { SCMP_SYS(ppoll), 241 }, - { SCMP_SYS(creat), 241 }, - { SCMP_SYS(link), 241 }, - { SCMP_SYS(getpid), 241 }, - { SCMP_SYS(getppid), 241 }, - { SCMP_SYS(getpgrp), 241 }, - { SCMP_SYS(getpgid), 241 }, - { SCMP_SYS(getsid), 241 }, - { SCMP_SYS(getdents64), 241 }, - { SCMP_SYS(getresuid), 241 }, - { SCMP_SYS(getresgid), 241 }, - { SCMP_SYS(getgroups), 241 }, - { SCMP_SYS(getresuid32), 241 }, - { SCMP_SYS(getresgid32), 241 }, - { SCMP_SYS(getgroups32), 241 }, - { SCMP_SYS(signal), 241 }, - { SCMP_SYS(sigaction), 241 }, - { SCMP_SYS(sigsuspend), 241 }, - { SCMP_SYS(sigpending), 241 }, - { SCMP_SYS(truncate64), 241 }, - { SCMP_SYS(ftruncate64), 241 }, - { SCMP_SYS(fchown32), 241 }, - { SCMP_SYS(chown32), 241 }, - { SCMP_SYS(lchown32), 241 }, - { SCMP_SYS(statfs64), 241 }, - { SCMP_SYS(fstatfs64), 241 }, - { SCMP_SYS(fstatat64), 241 }, - { SCMP_SYS(lstat64), 241 }, - { SCMP_SYS(sendfile64), 241 }, - { SCMP_SYS(ugetrlimit), 241 }, - { SCMP_SYS(alarm), 241 }, - { SCMP_SYS(rt_sigsuspend), 241 }, - { SCMP_SYS(rt_sigqueueinfo), 241 }, - { SCMP_SYS(rt_tgsigqueueinfo), 241 }, - { SCMP_SYS(sigaltstack), 241 }, - { SCMP_SYS(signalfd4), 241 }, - { SCMP_SYS(truncate), 241 }, - { SCMP_SYS(fchown), 241 }, - { SCMP_SYS(lchown), 241 }, - { SCMP_SYS(fchownat), 241 }, - { SCMP_SYS(fstatfs), 241 }, - { SCMP_SYS(getitimer), 241 }, - { SCMP_SYS(syncfs), 241 }, - { SCMP_SYS(fsync), 241 }, - { SCMP_SYS(fchdir), 241 }, - { SCMP_SYS(msync), 241 }, - { SCMP_SYS(sched_setparam), 241 }, - { SCMP_SYS(sched_setscheduler), 241 }, - { SCMP_SYS(sched_yield), 241 }, - { SCMP_SYS(sched_rr_get_interval), 241 }, - { SCMP_SYS(sched_setaffinity), 241 }, - { SCMP_SYS(sched_getaffinity), 241 }, - { SCMP_SYS(readahead), 241 }, - { SCMP_SYS(timer_getoverrun), 241 }, - { SCMP_SYS(unlinkat), 241 }, - { SCMP_SYS(readlinkat), 241 }, - { SCMP_SYS(faccessat), 241 }, - { SCMP_SYS(get_robust_list), 241 }, - { SCMP_SYS(splice), 241 }, - { SCMP_SYS(vmsplice), 241 }, - { SCMP_SYS(getcpu), 241 }, - { SCMP_SYS(sendmmsg), 241 }, - { SCMP_SYS(recvmmsg), 241 }, - { SCMP_SYS(prlimit64), 241 }, - { SCMP_SYS(waitid), 241 }, - { SCMP_SYS(io_cancel), 241 }, - { SCMP_SYS(io_setup), 241 }, - { SCMP_SYS(io_destroy), 241 }, - { SCMP_SYS(arch_prctl), 240 }, - { SCMP_SYS(mkdir), 240 }, - { SCMP_SYS(fchmod), 240 }, - { SCMP_SYS(shmget), 240 }, - { SCMP_SYS(shmat), 240 }, - { SCMP_SYS(shmdt), 240 }, - { SCMP_SYS(timerfd_create), 240 }, - { SCMP_SYS(shmctl), 240 }, - { SCMP_SYS(mlockall), 240 }, - { SCMP_SYS(mlock), 240 }, - { SCMP_SYS(munlock), 240 }, - { SCMP_SYS(semctl), 240 }, - { SCMP_SYS(fallocate), 240 }, - { SCMP_SYS(fadvise64), 240 }, - { SCMP_SYS(inotify_init1), 240 }, - { SCMP_SYS(inotify_add_watch), 240 }, - { SCMP_SYS(mbind), 240 }, - { SCMP_SYS(memfd_create), 240 }, -#ifdef HAVE_CACHEFLUSH - { SCMP_SYS(cacheflush), 240 }, -#endif - { SCMP_SYS(sysinfo), 240 }, +static const struct QemuSeccompSyscall blacklist[] = { + /* default set of syscalls to blacklist */ + { SCMP_SYS(reboot), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(swapon), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(swapoff), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(syslog), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(mount), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(umount), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(kexec_load), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(afs_syscall), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(break), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(ftime), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(getpmsg), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(gtty), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(lock), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(mpx), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(prof), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(profil), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(putpmsg), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(security), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(stty), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(tuxcall), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(ulimit), QEMU_SECCOMP_SET_DEFAULT }, + { SCMP_SYS(vserver), QEMU_SECCOMP_SET_DEFAULT }, }; int seccomp_start(void) @@ -262,19 +63,14 @@ int seccomp_start(void) unsigned int i = 0; scmp_filter_ctx ctx; - ctx = seccomp_init(SCMP_ACT_KILL); + ctx = seccomp_init(SCMP_ACT_ALLOW); if (ctx == NULL) { rc = -1; goto seccomp_return; } - for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) { - rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0); - if (rc < 0) { - goto seccomp_return; - } - rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num, - seccomp_whitelist[i].priority); + for (i = 0; i < ARRAY_SIZE(blacklist); i++) { + rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, blacklist[i].num, 0); if (rc < 0) { goto seccomp_return; } diff --git a/vl.c b/vl.c index fb1f05b937..76e0b3a946 100644 --- a/vl.c +++ b/vl.c @@ -1032,7 +1032,6 @@ static int bt_parse(const char *opt) static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) { - /* FIXME: change this to true for 1.3 */ if (qemu_opt_get_bool(opts, "enable", false)) { #ifdef CONFIG_SECCOMP if (seccomp_start() < 0) { From 2b716fa6d63a183a42b789595c3944f53c0ded7c Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Wed, 1 Mar 2017 23:17:29 +0100 Subject: [PATCH 2/6] seccomp: add obsolete argument to command line This patch introduces the argument [,obsolete=allow] to the `-sandbox on' option. It allows Qemu to run safely on old system that still relies on old system calls. Signed-off-by: Eduardo Otubo --- include/sysemu/seccomp.h | 3 ++- qemu-options.hx | 12 ++++++++++-- qemu-seccomp.c | 19 ++++++++++++++++++- vl.c | 24 +++++++++++++++++++++++- 4 files changed, 53 insertions(+), 5 deletions(-) diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h index 23b9c3c789..215138a372 100644 --- a/include/sysemu/seccomp.h +++ b/include/sysemu/seccomp.h @@ -16,8 +16,9 @@ #define QEMU_SECCOMP_H #define QEMU_SECCOMP_SET_DEFAULT (1 << 0) +#define QEMU_SECCOMP_SET_OBSOLETE (1 << 1) #include -int seccomp_start(void); +int seccomp_start(uint32_t seccomp_opts); #endif diff --git a/qemu-options.hx b/qemu-options.hx index 9f6e2adfff..72150c6b84 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4017,13 +4017,21 @@ Old param mode (ARM only). ETEXI DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ - "-sandbox Enable seccomp mode 2 system call filter (default 'off').\n", + "-sandbox on[,obsolete=allow|deny]\n" \ + " Enable seccomp mode 2 system call filter (default 'off').\n" \ + " use 'obsolete' to allow obsolete system calls that are provided\n" \ + " by the kernel, but typically no longer used by modern\n" \ + " C library implementations.\n", QEMU_ARCH_ALL) STEXI -@item -sandbox @var{arg} +@item -sandbox @var{arg}[,obsolete=@var{string}] @findex -sandbox Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will disable it. The default is 'off'. +@table @option +@item obsolete=@var{string} +Enable Obsolete system calls +@end table ETEXI DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig, diff --git a/qemu-seccomp.c b/qemu-seccomp.c index f66613fc71..8a5fbd2ff1 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -55,9 +55,22 @@ static const struct QemuSeccompSyscall blacklist[] = { { SCMP_SYS(tuxcall), QEMU_SECCOMP_SET_DEFAULT }, { SCMP_SYS(ulimit), QEMU_SECCOMP_SET_DEFAULT }, { SCMP_SYS(vserver), QEMU_SECCOMP_SET_DEFAULT }, + /* obsolete */ + { SCMP_SYS(readdir), QEMU_SECCOMP_SET_OBSOLETE }, + { SCMP_SYS(_sysctl), QEMU_SECCOMP_SET_OBSOLETE }, + { SCMP_SYS(bdflush), QEMU_SECCOMP_SET_OBSOLETE }, + { SCMP_SYS(create_module), QEMU_SECCOMP_SET_OBSOLETE }, + { SCMP_SYS(get_kernel_syms), QEMU_SECCOMP_SET_OBSOLETE }, + { SCMP_SYS(query_module), QEMU_SECCOMP_SET_OBSOLETE }, + { SCMP_SYS(sgetmask), QEMU_SECCOMP_SET_OBSOLETE }, + { SCMP_SYS(ssetmask), QEMU_SECCOMP_SET_OBSOLETE }, + { SCMP_SYS(sysfs), QEMU_SECCOMP_SET_OBSOLETE }, + { SCMP_SYS(uselib), QEMU_SECCOMP_SET_OBSOLETE }, + { SCMP_SYS(ustat), QEMU_SECCOMP_SET_OBSOLETE }, }; -int seccomp_start(void) + +int seccomp_start(uint32_t seccomp_opts) { int rc = 0; unsigned int i = 0; @@ -70,6 +83,10 @@ int seccomp_start(void) } for (i = 0; i < ARRAY_SIZE(blacklist); i++) { + if (!(seccomp_opts & blacklist[i].set)) { + continue; + } + rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, blacklist[i].num, 0); if (rc < 0) { goto seccomp_return; diff --git a/vl.c b/vl.c index 76e0b3a946..57c5e93c1a 100644 --- a/vl.c +++ b/vl.c @@ -271,6 +271,10 @@ static QemuOptsList qemu_sandbox_opts = { .name = "enable", .type = QEMU_OPT_BOOL, }, + { + .name = "obsolete", + .type = QEMU_OPT_STRING, + }, { /* end of list */ } }, }; @@ -1034,7 +1038,25 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) { if (qemu_opt_get_bool(opts, "enable", false)) { #ifdef CONFIG_SECCOMP - if (seccomp_start() < 0) { + uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT + | QEMU_SECCOMP_SET_OBSOLETE; + const char *value = NULL; + + value = qemu_opt_get(opts, "obsolete"); + if (value) { + if (g_str_equal(value, "allow")) { + seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE; + } else if (g_str_equal(value, "deny")) { + /* this is the default option, this if is here + * to provide a little bit of consistency for + * the command line */ + } else { + error_report("invalid argument for obsolete"); + return -1; + } + } + + if (seccomp_start(seccomp_opts) < 0) { error_report("failed to install seccomp syscall filter " "in the kernel"); return -1; From 73a1e647256b09734ce64ef7a6001a0db03f7106 Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Mon, 13 Mar 2017 22:13:27 +0100 Subject: [PATCH 3/6] seccomp: add elevateprivileges argument to command line This patch introduces the new argument [,elevateprivileges=allow|deny|children] to the `-sandbox on'. It allows or denies Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls. The 'children' option will let forks and execves run unprivileged. Signed-off-by: Eduardo Otubo --- include/sysemu/seccomp.h | 1 + qemu-options.hx | 12 +++++++++--- qemu-seccomp.c | 11 +++++++++++ vl.c | 27 +++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h index 215138a372..4a9e63c7cd 100644 --- a/include/sysemu/seccomp.h +++ b/include/sysemu/seccomp.h @@ -17,6 +17,7 @@ #define QEMU_SECCOMP_SET_DEFAULT (1 << 0) #define QEMU_SECCOMP_SET_OBSOLETE (1 << 1) +#define QEMU_SECCOMP_SET_PRIVILEGED (1 << 2) #include diff --git a/qemu-options.hx b/qemu-options.hx index 72150c6b84..5c1b163fb5 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4017,20 +4017,26 @@ Old param mode (ARM only). ETEXI DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ - "-sandbox on[,obsolete=allow|deny]\n" \ + "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \ " Enable seccomp mode 2 system call filter (default 'off').\n" \ " use 'obsolete' to allow obsolete system calls that are provided\n" \ " by the kernel, but typically no longer used by modern\n" \ - " C library implementations.\n", + " C library implementations.\n" \ + " use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \ + " its privileges by blacklisting all set*uid|gid system calls.\n" \ + " The value 'children' will deny set*uid|gid system calls for\n" \ + " main QEMU process but will allow forks and execves to run unprivileged\n", QEMU_ARCH_ALL) STEXI -@item -sandbox @var{arg}[,obsolete=@var{string}] +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}] @findex -sandbox Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will disable it. The default is 'off'. @table @option @item obsolete=@var{string} Enable Obsolete system calls +@item elevateprivileges=@var{string} +Disable set*uid|gid system calls @end table ETEXI diff --git a/qemu-seccomp.c b/qemu-seccomp.c index 8a5fbd2ff1..978d66bd28 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -67,6 +67,17 @@ static const struct QemuSeccompSyscall blacklist[] = { { SCMP_SYS(sysfs), QEMU_SECCOMP_SET_OBSOLETE }, { SCMP_SYS(uselib), QEMU_SECCOMP_SET_OBSOLETE }, { SCMP_SYS(ustat), QEMU_SECCOMP_SET_OBSOLETE }, + /* privileged */ + { SCMP_SYS(setuid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setgid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setpgid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setsid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setreuid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setregid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setresuid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setresgid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setfsuid), QEMU_SECCOMP_SET_PRIVILEGED }, + { SCMP_SYS(setfsgid), QEMU_SECCOMP_SET_PRIVILEGED }, }; diff --git a/vl.c b/vl.c index 57c5e93c1a..d59b560276 100644 --- a/vl.c +++ b/vl.c @@ -29,6 +29,7 @@ #ifdef CONFIG_SECCOMP #include "sysemu/seccomp.h" +#include "sys/prctl.h" #endif #if defined(CONFIG_VDE) @@ -275,6 +276,10 @@ static QemuOptsList qemu_sandbox_opts = { .name = "obsolete", .type = QEMU_OPT_STRING, }, + { + .name = "elevateprivileges", + .type = QEMU_OPT_STRING, + }, { /* end of list */ } }, }; @@ -1056,6 +1061,28 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) } } + value = qemu_opt_get(opts, "elevateprivileges"); + if (value) { + if (g_str_equal(value, "deny")) { + seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; + } else if (g_str_equal(value, "children")) { + seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; + + /* calling prctl directly because we're + * not sure if host has CAP_SYS_ADMIN set*/ + if (prctl(PR_SET_NO_NEW_PRIVS, 1)) { + error_report("failed to set no_new_privs " + "aborting"); + return -1; + } + } else if (g_str_equal(value, "allow")) { + /* default value */ + } else { + error_report("invalid argument for elevateprivileges"); + return -1; + } + } + if (seccomp_start(seccomp_opts) < 0) { error_report("failed to install seccomp syscall filter " "in the kernel"); From 995a226f880b807e05240e8752d6ce65679775be Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Mon, 13 Mar 2017 22:16:01 +0100 Subject: [PATCH 4/6] seccomp: add spawn argument to command line This patch adds [,spawn=deny] argument to `-sandbox on' option. It blacklists fork and execve system calls, avoiding Qemu to spawn new threads or processes. Signed-off-by: Eduardo Otubo --- include/sysemu/seccomp.h | 1 + qemu-options.hx | 9 +++++++-- qemu-seccomp.c | 4 ++++ vl.c | 16 ++++++++++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h index 4a9e63c7cd..3ab5fc4f61 100644 --- a/include/sysemu/seccomp.h +++ b/include/sysemu/seccomp.h @@ -18,6 +18,7 @@ #define QEMU_SECCOMP_SET_DEFAULT (1 << 0) #define QEMU_SECCOMP_SET_OBSOLETE (1 << 1) #define QEMU_SECCOMP_SET_PRIVILEGED (1 << 2) +#define QEMU_SECCOMP_SET_SPAWN (1 << 3) #include diff --git a/qemu-options.hx b/qemu-options.hx index 5c1b163fb5..2b04b9f170 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4018,6 +4018,7 @@ ETEXI DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \ + " [,spawn=allow|deny]\n" \ " Enable seccomp mode 2 system call filter (default 'off').\n" \ " use 'obsolete' to allow obsolete system calls that are provided\n" \ " by the kernel, but typically no longer used by modern\n" \ @@ -4025,10 +4026,12 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ " use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \ " its privileges by blacklisting all set*uid|gid system calls.\n" \ " The value 'children' will deny set*uid|gid system calls for\n" \ - " main QEMU process but will allow forks and execves to run unprivileged\n", + " main QEMU process but will allow forks and execves to run unprivileged\n" \ + " use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \ + " blacklisting *fork and execve\n", QEMU_ARCH_ALL) STEXI -@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}] +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}][,spawn=@var{string}] @findex -sandbox Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will disable it. The default is 'off'. @@ -4037,6 +4040,8 @@ disable it. The default is 'off'. Enable Obsolete system calls @item elevateprivileges=@var{string} Disable set*uid|gid system calls +@item spawn=@var{string} +Disable *fork and execve @end table ETEXI diff --git a/qemu-seccomp.c b/qemu-seccomp.c index 978d66bd28..f3878a5e29 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -78,6 +78,10 @@ static const struct QemuSeccompSyscall blacklist[] = { { SCMP_SYS(setresgid), QEMU_SECCOMP_SET_PRIVILEGED }, { SCMP_SYS(setfsuid), QEMU_SECCOMP_SET_PRIVILEGED }, { SCMP_SYS(setfsgid), QEMU_SECCOMP_SET_PRIVILEGED }, + /* spawn */ + { SCMP_SYS(fork), QEMU_SECCOMP_SET_SPAWN }, + { SCMP_SYS(vfork), QEMU_SECCOMP_SET_SPAWN }, + { SCMP_SYS(execve), QEMU_SECCOMP_SET_SPAWN }, }; diff --git a/vl.c b/vl.c index d59b560276..984db0c399 100644 --- a/vl.c +++ b/vl.c @@ -280,6 +280,10 @@ static QemuOptsList qemu_sandbox_opts = { .name = "elevateprivileges", .type = QEMU_OPT_STRING, }, + { + .name = "spawn", + .type = QEMU_OPT_STRING, + }, { /* end of list */ } }, }; @@ -1083,6 +1087,18 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) } } + value = qemu_opt_get(opts, "spawn"); + if (value) { + if (g_str_equal(value, "deny")) { + seccomp_opts |= QEMU_SECCOMP_SET_SPAWN; + } else if (g_str_equal(value, "allow")) { + /* default value */ + } else { + error_report("invalid argument for spawn"); + return -1; + } + } + if (seccomp_start(seccomp_opts) < 0) { error_report("failed to install seccomp syscall filter " "in the kernel"); From 24f8cdc5722476e12d8e39d71f66311b4fa971c1 Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Mon, 13 Mar 2017 22:18:51 +0100 Subject: [PATCH 5/6] seccomp: add resourcecontrol argument to command line This patch adds [,resourcecontrol=deny] to `-sandbox on' option. It blacklists all process affinity and scheduler priority system calls to avoid any bigger of the process. Signed-off-by: Eduardo Otubo --- include/sysemu/seccomp.h | 1 + qemu-options.hx | 9 ++++++--- qemu-seccomp.c | 11 +++++++++++ vl.c | 16 ++++++++++++++++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h index 3ab5fc4f61..e67c2dc840 100644 --- a/include/sysemu/seccomp.h +++ b/include/sysemu/seccomp.h @@ -19,6 +19,7 @@ #define QEMU_SECCOMP_SET_OBSOLETE (1 << 1) #define QEMU_SECCOMP_SET_PRIVILEGED (1 << 2) #define QEMU_SECCOMP_SET_SPAWN (1 << 3) +#define QEMU_SECCOMP_SET_RESOURCECTL (1 << 4) #include diff --git a/qemu-options.hx b/qemu-options.hx index 2b04b9f170..600614f6e5 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4018,7 +4018,7 @@ ETEXI DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \ - " [,spawn=allow|deny]\n" \ + " [,spawn=allow|deny][,resourcecontrol=allow|deny]\n" \ " Enable seccomp mode 2 system call filter (default 'off').\n" \ " use 'obsolete' to allow obsolete system calls that are provided\n" \ " by the kernel, but typically no longer used by modern\n" \ @@ -4028,10 +4028,11 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ " The value 'children' will deny set*uid|gid system calls for\n" \ " main QEMU process but will allow forks and execves to run unprivileged\n" \ " use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \ - " blacklisting *fork and execve\n", + " blacklisting *fork and execve\n" \ + " use 'resourcecontrol' to disable process affinity and schedular priority\n", QEMU_ARCH_ALL) STEXI -@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}][,spawn=@var{string}] +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}][,spawn=@var{string}][,resourcecontrol=@var{string}] @findex -sandbox Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will disable it. The default is 'off'. @@ -4042,6 +4043,8 @@ Enable Obsolete system calls Disable set*uid|gid system calls @item spawn=@var{string} Disable *fork and execve +@item resourcecontrol=@var{string} +Disable process affinity and schedular priority @end table ETEXI diff --git a/qemu-seccomp.c b/qemu-seccomp.c index f3878a5e29..b770a77d33 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -82,6 +82,17 @@ static const struct QemuSeccompSyscall blacklist[] = { { SCMP_SYS(fork), QEMU_SECCOMP_SET_SPAWN }, { SCMP_SYS(vfork), QEMU_SECCOMP_SET_SPAWN }, { SCMP_SYS(execve), QEMU_SECCOMP_SET_SPAWN }, + /* resource control */ + { SCMP_SYS(getpriority), QEMU_SECCOMP_SET_RESOURCECTL }, + { SCMP_SYS(setpriority), QEMU_SECCOMP_SET_RESOURCECTL }, + { SCMP_SYS(sched_setparam), QEMU_SECCOMP_SET_RESOURCECTL }, + { SCMP_SYS(sched_getparam), QEMU_SECCOMP_SET_RESOURCECTL }, + { SCMP_SYS(sched_setscheduler), QEMU_SECCOMP_SET_RESOURCECTL }, + { SCMP_SYS(sched_getscheduler), QEMU_SECCOMP_SET_RESOURCECTL }, + { SCMP_SYS(sched_setaffinity), QEMU_SECCOMP_SET_RESOURCECTL }, + { SCMP_SYS(sched_getaffinity), QEMU_SECCOMP_SET_RESOURCECTL }, + { SCMP_SYS(sched_get_priority_max), QEMU_SECCOMP_SET_RESOURCECTL }, + { SCMP_SYS(sched_get_priority_min), QEMU_SECCOMP_SET_RESOURCECTL }, }; diff --git a/vl.c b/vl.c index 984db0c399..9e62e92aea 100644 --- a/vl.c +++ b/vl.c @@ -284,6 +284,10 @@ static QemuOptsList qemu_sandbox_opts = { .name = "spawn", .type = QEMU_OPT_STRING, }, + { + .name = "resourcecontrol", + .type = QEMU_OPT_STRING, + }, { /* end of list */ } }, }; @@ -1099,6 +1103,18 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) } } + value = qemu_opt_get(opts, "resourcecontrol"); + if (value) { + if (g_str_equal(value, "deny")) { + seccomp_opts |= QEMU_SECCOMP_SET_RESOURCECTL; + } else if (g_str_equal(value, "allow")) { + /* default value */ + } else { + error_report("invalid argument for resourcecontrol"); + return -1; + } + } + if (seccomp_start(seccomp_opts) < 0) { error_report("failed to install seccomp syscall filter " "in the kernel"); From c3883e1f935bf11ef0d2b8157b0022ace3d0e77d Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 7 Sep 2017 16:53:16 +0800 Subject: [PATCH 6/6] buildsys: Move seccomp cflags/libs to per object Like many other libraries, libseccomp cflags and libs should only apply to the building of necessary objects. Do so in the usual way with the help of per object variables. Signed-off-by: Fam Zheng --- Makefile.objs | 2 ++ configure | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile.objs b/Makefile.objs index 24a4ea08b8..d9cf7ad791 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -70,6 +70,8 @@ common-obj-y += backends/ common-obj-y += chardev/ common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o +qemu-seccomp.o-cflags := $(SECCOMP_CFLAGS) +qemu-seccomp.o-libs := $(SECCOMP_LIBS) common-obj-$(CONFIG_FDT) += device_tree.o diff --git a/configure b/configure index 9ee4559b54..94db2d103e 100755 --- a/configure +++ b/configure @@ -2035,8 +2035,8 @@ if test "$seccomp" != "no" ; then if test "$libseccomp_minver" != "" && $pkg_config --atleast-version=$libseccomp_minver libseccomp ; then - libs_softmmu="$libs_softmmu $($pkg_config --libs libseccomp)" - QEMU_CFLAGS="$QEMU_CFLAGS $($pkg_config --cflags libseccomp)" + seccomp_cflags="$($pkg_config --cflags libseccomp)" + seccomp_libs="$($pkg_config --libs libseccomp)" seccomp="yes" else if test "$seccomp" = "yes" ; then @@ -5829,6 +5829,8 @@ fi if test "$seccomp" = "yes"; then echo "CONFIG_SECCOMP=y" >> $config_host_mak + echo "SECCOMP_CFLAGS=$seccomp_cflags" >> $config_host_mak + echo "SECCOMP_LIBS=$seccomp_libs" >> $config_host_mak fi # XXX: suppress that