2019-03-13 17:32:51 +08:00
|
|
|
/*
|
|
|
|
* Seccomp sandboxing for virtiofsd
|
|
|
|
*
|
|
|
|
* Copyright (C) 2019 Red Hat, Inc.
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "qemu/osdep.h"
|
2020-02-05 16:45:39 +08:00
|
|
|
#include "passthrough_seccomp.h"
|
2019-03-13 17:32:51 +08:00
|
|
|
#include "fuse_i.h"
|
|
|
|
#include "fuse_log.h"
|
|
|
|
#include <errno.h>
|
|
|
|
#include <glib.h>
|
|
|
|
#include <seccomp.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
/* Bodge for libseccomp 2.4.2 which broke ppoll */
|
|
|
|
#if !defined(__SNR_ppoll) && defined(__SNR_brk)
|
|
|
|
#ifdef __NR_ppoll
|
|
|
|
#define __SNR_ppoll __NR_ppoll
|
|
|
|
#else
|
|
|
|
#define __SNR_ppoll __PNR_ppoll
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static const int syscall_whitelist[] = {
|
|
|
|
/* TODO ireg sem*() syscalls */
|
|
|
|
SCMP_SYS(brk),
|
|
|
|
SCMP_SYS(capget), /* For CAP_FSETID */
|
|
|
|
SCMP_SYS(capset),
|
|
|
|
SCMP_SYS(clock_gettime),
|
|
|
|
SCMP_SYS(clone),
|
|
|
|
#ifdef __NR_clone3
|
|
|
|
SCMP_SYS(clone3),
|
|
|
|
#endif
|
|
|
|
SCMP_SYS(close),
|
|
|
|
SCMP_SYS(copy_file_range),
|
|
|
|
SCMP_SYS(dup),
|
|
|
|
SCMP_SYS(eventfd2),
|
|
|
|
SCMP_SYS(exit),
|
|
|
|
SCMP_SYS(exit_group),
|
|
|
|
SCMP_SYS(fallocate),
|
2020-02-27 13:59:27 +08:00
|
|
|
SCMP_SYS(fchdir),
|
virtiofsd: Whitelist fchmod
lo_setattr() invokes fchmod() in a rarely used code path, so it should
be whitelisted or virtiofsd will crash with EBADSYS.
Said code path can be triggered for example as follows:
On the host, in the shared directory, create a file with the sticky bit
set and a security.capability xattr:
(1) # touch foo
(2) # chmod u+s foo
(3) # setcap '' foo
Then in the guest let some process truncate that file after it has
dropped all of its capabilities (at least CAP_FSETID):
int main(int argc, char *argv[])
{
capng_setpid(getpid());
capng_clear(CAPNG_SELECT_BOTH);
capng_updatev(CAPNG_ADD, CAPNG_PERMITTED | CAPNG_EFFECTIVE, 0);
capng_apply(CAPNG_SELECT_BOTH);
ftruncate(open(argv[1], O_RDWR), 0);
}
This will cause the guest kernel to drop the sticky bit (i.e. perform a
mode change) as part of the truncate (where FATTR_FH is set), and that
will cause virtiofsd to invoke fchmod() instead of fchmodat().
(A similar configuration exists further below with futimens() vs.
utimensat(), but the former is not a syscall but just a wrapper for the
latter, so no further whitelisting is required.)
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1842667
Reported-by: Qian Cai <caiqian@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-Id: <20200608093111.14942-1-mreitz@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2020-06-08 17:31:11 +08:00
|
|
|
SCMP_SYS(fchmod),
|
2019-03-13 17:32:51 +08:00
|
|
|
SCMP_SYS(fchmodat),
|
|
|
|
SCMP_SYS(fchownat),
|
|
|
|
SCMP_SYS(fcntl),
|
|
|
|
SCMP_SYS(fdatasync),
|
|
|
|
SCMP_SYS(fgetxattr),
|
|
|
|
SCMP_SYS(flistxattr),
|
|
|
|
SCMP_SYS(flock),
|
|
|
|
SCMP_SYS(fremovexattr),
|
|
|
|
SCMP_SYS(fsetxattr),
|
|
|
|
SCMP_SYS(fstat),
|
|
|
|
SCMP_SYS(fstatfs),
|
|
|
|
SCMP_SYS(fsync),
|
|
|
|
SCMP_SYS(ftruncate),
|
|
|
|
SCMP_SYS(futex),
|
|
|
|
SCMP_SYS(getdents),
|
|
|
|
SCMP_SYS(getdents64),
|
|
|
|
SCMP_SYS(getegid),
|
|
|
|
SCMP_SYS(geteuid),
|
|
|
|
SCMP_SYS(getpid),
|
|
|
|
SCMP_SYS(gettid),
|
|
|
|
SCMP_SYS(gettimeofday),
|
2020-02-27 13:59:27 +08:00
|
|
|
SCMP_SYS(getxattr),
|
2019-03-13 17:32:51 +08:00
|
|
|
SCMP_SYS(linkat),
|
2020-02-27 13:59:27 +08:00
|
|
|
SCMP_SYS(listxattr),
|
2019-03-13 17:32:51 +08:00
|
|
|
SCMP_SYS(lseek),
|
|
|
|
SCMP_SYS(madvise),
|
|
|
|
SCMP_SYS(mkdirat),
|
|
|
|
SCMP_SYS(mknodat),
|
|
|
|
SCMP_SYS(mmap),
|
|
|
|
SCMP_SYS(mprotect),
|
|
|
|
SCMP_SYS(mremap),
|
|
|
|
SCMP_SYS(munmap),
|
|
|
|
SCMP_SYS(newfstatat),
|
|
|
|
SCMP_SYS(open),
|
|
|
|
SCMP_SYS(openat),
|
|
|
|
SCMP_SYS(ppoll),
|
|
|
|
SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
|
|
|
|
SCMP_SYS(preadv),
|
|
|
|
SCMP_SYS(pread64),
|
|
|
|
SCMP_SYS(pwritev),
|
|
|
|
SCMP_SYS(pwrite64),
|
|
|
|
SCMP_SYS(read),
|
|
|
|
SCMP_SYS(readlinkat),
|
|
|
|
SCMP_SYS(recvmsg),
|
|
|
|
SCMP_SYS(renameat),
|
|
|
|
SCMP_SYS(renameat2),
|
2020-02-27 13:59:27 +08:00
|
|
|
SCMP_SYS(removexattr),
|
2019-03-13 17:32:51 +08:00
|
|
|
SCMP_SYS(rt_sigaction),
|
|
|
|
SCMP_SYS(rt_sigprocmask),
|
|
|
|
SCMP_SYS(rt_sigreturn),
|
2020-09-22 05:32:16 +08:00
|
|
|
SCMP_SYS(sched_getattr),
|
|
|
|
SCMP_SYS(sched_setattr),
|
2019-03-13 17:32:51 +08:00
|
|
|
SCMP_SYS(sendmsg),
|
|
|
|
SCMP_SYS(setresgid),
|
|
|
|
SCMP_SYS(setresuid),
|
|
|
|
#ifdef __NR_setresgid32
|
|
|
|
SCMP_SYS(setresgid32),
|
|
|
|
#endif
|
|
|
|
#ifdef __NR_setresuid32
|
|
|
|
SCMP_SYS(setresuid32),
|
|
|
|
#endif
|
|
|
|
SCMP_SYS(set_robust_list),
|
2020-02-27 13:59:27 +08:00
|
|
|
SCMP_SYS(setxattr),
|
2019-03-13 17:32:51 +08:00
|
|
|
SCMP_SYS(symlinkat),
|
|
|
|
SCMP_SYS(time), /* Rarely needed, except on static builds */
|
|
|
|
SCMP_SYS(tgkill),
|
|
|
|
SCMP_SYS(unlinkat),
|
2020-02-27 13:59:27 +08:00
|
|
|
SCMP_SYS(unshare),
|
2019-03-13 17:32:51 +08:00
|
|
|
SCMP_SYS(utimensat),
|
|
|
|
SCMP_SYS(write),
|
|
|
|
SCMP_SYS(writev),
|
|
|
|
};
|
|
|
|
|
2019-06-26 17:25:54 +08:00
|
|
|
/* Syscalls used when --syslog is enabled */
|
|
|
|
static const int syscall_whitelist_syslog[] = {
|
2020-11-02 23:07:50 +08:00
|
|
|
SCMP_SYS(send),
|
2019-06-26 17:25:54 +08:00
|
|
|
SCMP_SYS(sendto),
|
|
|
|
};
|
|
|
|
|
|
|
|
static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len)
|
2019-03-13 17:32:51 +08:00
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
|
2019-06-26 17:25:54 +08:00
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) {
|
|
|
|
fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n",
|
|
|
|
syscalls[i]);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void setup_seccomp(bool enable_syslog)
|
|
|
|
{
|
|
|
|
scmp_filter_ctx ctx;
|
|
|
|
|
2019-03-13 17:32:51 +08:00
|
|
|
#ifdef SCMP_ACT_KILL_PROCESS
|
|
|
|
ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
|
|
|
|
/* Handle a newer libseccomp but an older kernel */
|
|
|
|
if (!ctx && errno == EOPNOTSUPP) {
|
|
|
|
ctx = seccomp_init(SCMP_ACT_TRAP);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
ctx = seccomp_init(SCMP_ACT_TRAP);
|
|
|
|
#endif
|
|
|
|
if (!ctx) {
|
|
|
|
fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2019-06-26 17:25:54 +08:00
|
|
|
add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist));
|
|
|
|
if (enable_syslog) {
|
|
|
|
add_whitelist(ctx, syscall_whitelist_syslog,
|
|
|
|
G_N_ELEMENTS(syscall_whitelist_syslog));
|
2019-03-13 17:32:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* libvhost-user calls this for post-copy migration, we don't need it */
|
|
|
|
if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
|
|
|
|
SCMP_SYS(userfaultfd), 0) != 0) {
|
|
|
|
fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (seccomp_load(ctx) < 0) {
|
|
|
|
fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
seccomp_release(ctx);
|
|
|
|
}
|