diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 08c206a863e1..ba848b761cfb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -758,6 +758,8 @@ struct bpf_sock { __u32 family; __u32 type; __u32 protocol; + __u32 mark; + __u32 priority; }; #define XDP_PACKET_HEADROOM 256 diff --git a/net/core/filter.c b/net/core/filter.c index c6a37fe0285b..9dad3e7e2e10 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3149,6 +3149,20 @@ bpf_base_func_proto(enum bpf_func_id func_id) } } +static const struct bpf_func_proto * +sock_filter_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + /* inet and inet6 sockets are created in a process + * context so there is always a valid uid/gid + */ + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + default: + return bpf_base_func_proto(func_id); + } +} + static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { @@ -3455,6 +3469,10 @@ static bool sock_filter_is_valid_access(int off, int size, switch (off) { case offsetof(struct bpf_sock, bound_dev_if): break; + case offsetof(struct bpf_sock, mark): + break; + case offsetof(struct bpf_sock, priority): + break; default: return false; } @@ -3958,6 +3976,28 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, offsetof(struct sock, sk_bound_dev_if)); break; + case offsetof(struct bpf_sock, mark): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_mark)); + else + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_mark)); + break; + + case offsetof(struct bpf_sock, priority): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_priority)); + else + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_priority)); + break; + case offsetof(struct bpf_sock, family): BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); @@ -4207,7 +4247,7 @@ const struct bpf_verifier_ops lwt_xmit_prog_ops = { }; const struct bpf_verifier_ops cg_sock_prog_ops = { - .get_func_proto = bpf_base_func_proto, + .get_func_proto = sock_filter_func_proto, .is_valid_access = sock_filter_is_valid_access, .convert_ctx_access = sock_filter_convert_ctx_access, }; diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c index 533dd11a6baa..05dcdf8a4baa 100644 --- a/samples/bpf/sock_flags_kern.c +++ b/samples/bpf/sock_flags_kern.c @@ -9,8 +9,13 @@ SEC("cgroup/sock1") int bpf_prog1(struct bpf_sock *sk) { char fmt[] = "socket: family %d type %d protocol %d\n"; + char fmt2[] = "socket: uid %u gid %u\n"; + __u64 gid_uid = bpf_get_current_uid_gid(); + __u32 uid = gid_uid & 0xffffffff; + __u32 gid = gid_uid >> 32; bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol); + bpf_trace_printk(fmt2, sizeof(fmt2), uid, gid); /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets * ie., make ping6 fail diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index c3cfb23e23b5..e79594dd629b 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -19,68 +19,271 @@ #include #include #include +#include #include #include "libbpf.h" char bpf_log_buf[BPF_LOG_BUF_SIZE]; -static int prog_load(int idx) +static int prog_load(__u32 idx, __u32 mark, __u32 prio) { - struct bpf_insn prog[] = { + /* save pointer to context */ + struct bpf_insn prog_start[] = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_3, idx), - BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)), + }; + struct bpf_insn prog_end[] = { BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ BPF_EXIT_INSN(), }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, + /* set sk_bound_dev_if on socket */ + struct bpf_insn prog_dev[] = { + BPF_MOV64_IMM(BPF_REG_3, idx), + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)), + }; + + /* set mark on socket */ + struct bpf_insn prog_mark[] = { + /* get uid of process */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_current_uid_gid), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff), + + /* if uid is 0, use given mark, else use the uid as the mark */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, mark), + + /* set the mark on the new socket */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, mark)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, mark)), + }; + + /* set priority on socket */ + struct bpf_insn prog_prio[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_3, prio), + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)), + }; + + struct bpf_insn *prog; + size_t insns_cnt; + void *p; + int ret; + + insns_cnt = sizeof(prog_start) + sizeof(prog_end); + if (idx) + insns_cnt += sizeof(prog_dev); + + if (mark) + insns_cnt += sizeof(prog_mark); + + if (prio) + insns_cnt += sizeof(prog_prio); + + p = prog = malloc(insns_cnt); + if (!prog) { + fprintf(stderr, "Failed to allocate memory for instructions\n"); + return EXIT_FAILURE; + } + + memcpy(p, prog_start, sizeof(prog_start)); + p += sizeof(prog_start); + + if (idx) { + memcpy(p, prog_dev, sizeof(prog_dev)); + p += sizeof(prog_dev); + } + + if (mark) { + memcpy(p, prog_mark, sizeof(prog_mark)); + p += sizeof(prog_mark); + } + + if (prio) { + memcpy(p, prog_prio, sizeof(prog_prio)); + p += sizeof(prog_prio); + } + + memcpy(p, prog_end, sizeof(prog_end)); + p += sizeof(prog_end); + + insns_cnt /= sizeof(struct bpf_insn); + + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); + + free(prog); + + return ret; +} + +static int get_bind_to_device(int sd, char *name, size_t len) +{ + socklen_t optlen = len; + int rc; + + name[0] = '\0'; + rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); + if (rc < 0) + perror("setsockopt(SO_BINDTODEVICE)"); + + return rc; +} + +static unsigned int get_somark(int sd) +{ + unsigned int mark = 0; + socklen_t optlen = sizeof(mark); + int rc; + + rc = getsockopt(sd, SOL_SOCKET, SO_MARK, &mark, &optlen); + if (rc < 0) + perror("getsockopt(SO_MARK)"); + + return mark; +} + +static unsigned int get_priority(int sd) +{ + unsigned int prio = 0; + socklen_t optlen = sizeof(prio); + int rc; + + rc = getsockopt(sd, SOL_SOCKET, SO_PRIORITY, &prio, &optlen); + if (rc < 0) + perror("getsockopt(SO_PRIORITY)"); + + return prio; +} + +static int show_sockopts(int family) +{ + unsigned int mark, prio; + char name[16]; + int sd; + + sd = socket(family, SOCK_DGRAM, 17); + if (sd < 0) { + perror("socket"); + return 1; + } + + if (get_bind_to_device(sd, name, sizeof(name)) < 0) + return 1; + + mark = get_somark(sd); + prio = get_priority(sd); + + close(sd); + + printf("sd %d: dev %s, mark %u, priority %u\n", sd, name, mark, prio); + + return 0; } static int usage(const char *argv0) { - printf("Usage: %s cg-path device-index\n", argv0); + printf("Usage:\n"); + printf(" Attach a program\n"); + printf(" %s -b bind-to-dev -m mark -p prio cg-path\n", argv0); + printf("\n"); + printf(" Detach a program\n"); + printf(" %s -d cg-path\n", argv0); + printf("\n"); + printf(" Show inherited socket settings (mark, priority, and device)\n"); + printf(" %s [-6]\n", argv0); return EXIT_FAILURE; } int main(int argc, char **argv) { + __u32 idx = 0, mark = 0, prio = 0; + const char *cgrp_path = NULL; int cg_fd, prog_fd, ret; - unsigned int idx; + int family = PF_INET; + int do_attach = 1; + int rc; - if (argc < 2) - return usage(argv[0]); + while ((rc = getopt(argc, argv, "db:m:p:6")) != -1) { + switch (rc) { + case 'd': + do_attach = 0; + break; + case 'b': + idx = if_nametoindex(optarg); + if (!idx) { + idx = strtoumax(optarg, NULL, 0); + if (!idx) { + printf("Invalid device name\n"); + return EXIT_FAILURE; + } + } + break; + case 'm': + mark = strtoumax(optarg, NULL, 0); + break; + case 'p': + prio = strtoumax(optarg, NULL, 0); + break; + case '6': + family = PF_INET6; + break; + default: + return usage(argv[0]); + } + } - idx = if_nametoindex(argv[2]); - if (!idx) { - printf("Invalid device name\n"); + if (optind == argc) + return show_sockopts(family); + + cgrp_path = argv[optind]; + if (!cgrp_path) { + fprintf(stderr, "cgroup path not given\n"); return EXIT_FAILURE; } - cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); + if (do_attach && !idx && !mark && !prio) { + fprintf(stderr, + "One of device, mark or priority must be given\n"); + return EXIT_FAILURE; + } + + cg_fd = open(cgrp_path, O_DIRECTORY | O_RDONLY); if (cg_fd < 0) { printf("Failed to open cgroup path: '%s'\n", strerror(errno)); return EXIT_FAILURE; } - prog_fd = prog_load(idx); - printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); + if (do_attach) { + prog_fd = prog_load(idx, mark, prio); + if (prog_fd < 0) { + printf("Failed to load prog: '%s'\n", strerror(errno)); + printf("Output from kernel verifier:\n%s\n-------\n", + bpf_log_buf); + return EXIT_FAILURE; + } - if (prog_fd < 0) { - printf("Failed to load prog: '%s'\n", strerror(errno)); - return EXIT_FAILURE; - } - - ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0); - if (ret < 0) { - printf("Failed to attach prog to cgroup: '%s'\n", - strerror(errno)); - return EXIT_FAILURE; + ret = bpf_prog_attach(prog_fd, cg_fd, + BPF_CGROUP_INET_SOCK_CREATE, 0); + if (ret < 0) { + printf("Failed to attach prog to cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } + } else { + ret = bpf_prog_detach(cg_fd, BPF_CGROUP_INET_SOCK_CREATE); + if (ret < 0) { + printf("Failed to detach prog from cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } } + close(cg_fd); return EXIT_SUCCESS; } diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh index 925fd467c7cc..a81f38eef417 100755 --- a/samples/bpf/test_cgrp2_sock.sh +++ b/samples/bpf/test_cgrp2_sock.sh @@ -1,47 +1,133 @@ -#!/bin/bash +#!/bin/sh -function config_device { - ip netns add at_ns0 - ip link add veth0 type veth peer name veth0b - ip link set veth0b up - ip link set veth0 netns at_ns0 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 - ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad - ip netns exec at_ns0 ip link set dev veth0 up - ip link add foo type vrf table 1234 - ip link set foo up - ip addr add 172.16.1.101/24 dev veth0b - ip addr add 2401:db00::2/64 dev veth0b nodad - ip link set veth0b master foo +# Test various socket options that can be set by attaching programs to cgroups. + +CGRP_MNT="/tmp/cgroupv2-test_cgrp2_sock" + +################################################################################ +# +print_result() +{ + local rc=$1 + local status=" OK " + + [ $rc -ne 0 ] && status="FAIL" + + printf "%-50s [%4s]\n" "$2" "$status" } -function attach_bpf { - rm -rf /tmp/cgroupv2 - mkdir -p /tmp/cgroupv2 - mount -t cgroup2 none /tmp/cgroupv2 - mkdir -p /tmp/cgroupv2/foo - test_cgrp2_sock /tmp/cgroupv2/foo foo - echo $$ >> /tmp/cgroupv2/foo/cgroup.procs +check_sock() +{ + out=$(test_cgrp2_sock) + echo $out | grep -q "$1" + if [ $? -ne 0 ]; then + print_result 1 "IPv4: $2" + echo " expected: $1" + echo " have: $out" + rc=1 + else + print_result 0 "IPv4: $2" + fi } -function cleanup { - set +ex - ip netns delete at_ns0 - ip link del veth0 - ip link del foo - umount /tmp/cgroupv2 - rm -rf /tmp/cgroupv2 - set -ex +check_sock6() +{ + out=$(test_cgrp2_sock -6) + echo $out | grep -q "$1" + if [ $? -ne 0 ]; then + print_result 1 "IPv6: $2" + echo " expected: $1" + echo " have: $out" + rc=1 + else + print_result 0 "IPv6: $2" + fi } -function do_test { - ping -c1 -w1 172.16.1.100 - ping6 -c1 -w1 2401:db00::1 +################################################################################ +# + +cleanup() +{ + echo $$ >> ${CGRP_MNT}/cgroup.procs + rmdir ${CGRP_MNT}/sockopts } +cleanup_and_exit() +{ + local rc=$1 + local msg="$2" + + [ -n "$msg" ] && echo "ERROR: $msg" + + ip li del cgrp2_sock + umount ${CGRP_MNT} + + exit $rc +} + + +################################################################################ +# main + +rc=0 + +ip li add cgrp2_sock type dummy 2>/dev/null + +set -e +mkdir -p ${CGRP_MNT} +mount -t cgroup2 none ${CGRP_MNT} +set +e + + +# make sure we have a known start point cleanup 2>/dev/null -config_device -attach_bpf -do_test -cleanup -echo "*** PASS ***" + +mkdir -p ${CGRP_MNT}/sockopts +[ $? -ne 0 ] && cleanup_and_exit 1 "Failed to create cgroup hierarchy" + + +# set pid into cgroup +echo $$ > ${CGRP_MNT}/sockopts/cgroup.procs + +# no bpf program attached, so socket should show no settings +check_sock "dev , mark 0, priority 0" "No programs attached" +check_sock6 "dev , mark 0, priority 0" "No programs attached" + +# verify device is set +# +test_cgrp2_sock -b cgrp2_sock ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set device" +fi +check_sock "dev cgrp2_sock, mark 0, priority 0" "Device set" +check_sock6 "dev cgrp2_sock, mark 0, priority 0" "Device set" + +# verify mark is set +# +test_cgrp2_sock -m 666 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set mark" +fi +check_sock "dev , mark 666, priority 0" "Mark set" +check_sock6 "dev , mark 666, priority 0" "Mark set" + +# verify priority is set +# +test_cgrp2_sock -p 123 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set priority" +fi +check_sock "dev , mark 0, priority 123" "Priority set" +check_sock6 "dev , mark 0, priority 123" "Priority set" + +# all 3 at once +# +test_cgrp2_sock -b cgrp2_sock -m 666 -p 123 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set device, mark and priority" +fi +check_sock "dev cgrp2_sock, mark 666, priority 123" "Priority set" +check_sock6 "dev cgrp2_sock, mark 666, priority 123" "Priority set" + +cleanup_and_exit $rc