From ddd872bc3098f9d9abe1680a6b2013e59e3337f7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Dec 2014 15:06:34 -0800 Subject: [PATCH 1/6] bpf: verifier: add checks for BPF_ABS | BPF_IND instructions introduce program type BPF_PROG_TYPE_SOCKET_FILTER that is used for attaching programs to sockets where ctx == skb. add verifier checks for ABS/IND instructions which can only be seen in socket filters, therefore the check: if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n"); Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + kernel/bpf/verifier.c | 70 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4a3d0f84f178..45da7ec7d274 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -117,6 +117,7 @@ enum bpf_map_type { enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, + BPF_PROG_TYPE_SOCKET_FILTER, }; /* flags for BPF_MAP_UPDATE_ELEM command */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b6a1f7c14a67..a28e09c7825d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1172,6 +1172,70 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) return 0; } +/* verify safety of LD_ABS|LD_IND instructions: + * - they can only appear in the programs where ctx == skb + * - since they are wrappers of function calls, they scratch R1-R5 registers, + * preserve R6-R9, and store return value into R0 + * + * Implicit input: + * ctx == skb == R6 == CTX + * + * Explicit input: + * SRC == any register + * IMM == 32-bit immediate + * + * Output: + * R0 - 8/16/32-bit skb data converted to cpu endianness + */ +static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) +{ + struct reg_state *regs = env->cur_state.regs; + u8 mode = BPF_MODE(insn->code); + struct reg_state *reg; + int i, err; + + if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { + verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n"); + return -EINVAL; + } + + if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || + (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { + verbose("BPF_LD_ABS uses reserved fields\n"); + return -EINVAL; + } + + /* check whether implicit source operand (register R6) is readable */ + err = check_reg_arg(regs, BPF_REG_6, SRC_OP); + if (err) + return err; + + if (regs[BPF_REG_6].type != PTR_TO_CTX) { + verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); + return -EINVAL; + } + + if (mode == BPF_IND) { + /* check explicit source operand */ + err = check_reg_arg(regs, insn->src_reg, SRC_OP); + if (err) + return err; + } + + /* reset caller saved regs to unreadable */ + for (i = 0; i < CALLER_SAVED_REGS; i++) { + reg = regs + caller_saved[i]; + reg->type = NOT_INIT; + reg->imm = 0; + } + + /* mark destination R0 register as readable, since it contains + * the value fetched from the packet + */ + regs[BPF_REG_0].type = UNKNOWN_VALUE; + return 0; +} + /* non-recursive DFS pseudo code * 1 procedure DFS-iterative(G,v): * 2 label v as discovered @@ -1677,8 +1741,10 @@ static int do_check(struct verifier_env *env) u8 mode = BPF_MODE(insn->code); if (mode == BPF_ABS || mode == BPF_IND) { - verbose("LD_ABS is not supported yet\n"); - return -EINVAL; + err = check_ld_abs(env, insn); + if (err) + return err; + } else if (mode == BPF_IMM) { err = check_ld_imm(env, insn); if (err) From 89aa075832b0da4402acebd698d0411dcc82d03e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Dec 2014 15:06:35 -0800 Subject: [PATCH 2/6] net: sock: allow eBPF programs to be attached to sockets introduce new setsockopt() command: setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, sizeof(prog_fd)) where prog_fd was received from syscall bpf(BPF_PROG_LOAD, attr, ...) and attr->prog_type == BPF_PROG_TYPE_SOCKET_FILTER setsockopt() calls bpf_prog_get() which increments refcnt of the program, so it doesn't get unloaded while socket is using the program. The same eBPF program can be attached to multiple sockets. User task exit automatically closes socket which calls sk_filter_uncharge() which decrements refcnt of eBPF program Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 3 + arch/avr32/include/uapi/asm/socket.h | 3 + arch/cris/include/uapi/asm/socket.h | 3 + arch/frv/include/uapi/asm/socket.h | 3 + arch/ia64/include/uapi/asm/socket.h | 3 + arch/m32r/include/uapi/asm/socket.h | 3 + arch/mips/include/uapi/asm/socket.h | 3 + arch/mn10300/include/uapi/asm/socket.h | 3 + arch/parisc/include/uapi/asm/socket.h | 3 + arch/powerpc/include/uapi/asm/socket.h | 3 + arch/s390/include/uapi/asm/socket.h | 3 + arch/sparc/include/uapi/asm/socket.h | 3 + arch/xtensa/include/uapi/asm/socket.h | 3 + include/linux/bpf.h | 4 ++ include/linux/filter.h | 1 + include/uapi/asm-generic/socket.h | 3 + net/core/filter.c | 97 +++++++++++++++++++++++++- net/core/sock.c | 13 ++++ 18 files changed, 155 insertions(+), 2 deletions(-) diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index e2fe0700b3b4..9a20821b111c 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -89,4 +89,7 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 92121b0f5b98..2b65ed6b277c 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -82,4 +82,7 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index 60f60f5b9b35..e2503d9f1869 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -84,6 +84,9 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index 2c6890209ea6..4823ad125578 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -82,5 +82,8 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 09a93fb566f6..59be3d87f86d 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -91,4 +91,7 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index e8589819c274..7bc4cb273856 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -82,4 +82,7 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 2e9ee8c55a10..dec3c850f36b 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -100,4 +100,7 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index f3492e8c9f70..cab7d6d50051 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -82,4 +82,7 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 7984a1cab3da..a5cd40cd8ee1 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -81,4 +81,7 @@ #define SO_INCOMING_CPU 0x402A +#define SO_ATTACH_BPF 0x402B +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 3474e4ef166d..c046666038f8 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -89,4 +89,7 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 8457636c33e1..296942d56e6a 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -88,4 +88,7 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 4a8003a94163..e6a16c40be5f 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -78,6 +78,9 @@ #define SO_INCOMING_CPU 0x0033 +#define SO_ATTACH_BPF 0x0034 +#define SO_DETACH_BPF SO_DETACH_FILTER + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index c46f6a696849..4120af086160 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -93,4 +93,7 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 75e94eaa228b..bbfceb756452 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -128,7 +128,11 @@ struct bpf_prog_aux { struct work_struct work; }; +#ifdef CONFIG_BPF_SYSCALL void bpf_prog_put(struct bpf_prog *prog); +#else +static inline void bpf_prog_put(struct bpf_prog *prog) {} +#endif struct bpf_prog *bpf_prog_get(u32 ufd); /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); diff --git a/include/linux/filter.h b/include/linux/filter.h index ca95abd2bed1..caac2087a4d5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -381,6 +381,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index f541ccefd4ac..5c15c2a5c123 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -84,4 +84,7 @@ #define SO_INCOMING_CPU 49 +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/filter.c b/net/core/filter.c index 647b12265e18..8cc3c03078b3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -44,6 +44,7 @@ #include #include #include +#include /** * sk_filter - run a packet through a socket filter @@ -813,8 +814,12 @@ static void bpf_release_orig_filter(struct bpf_prog *fp) static void __bpf_prog_release(struct bpf_prog *prog) { - bpf_release_orig_filter(prog); - bpf_prog_free(prog); + if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { + bpf_prog_put(prog); + } else { + bpf_release_orig_filter(prog); + bpf_prog_free(prog); + } } static void __sk_filter_release(struct sk_filter *fp) @@ -1088,6 +1093,94 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) } EXPORT_SYMBOL_GPL(sk_attach_filter); +#ifdef CONFIG_BPF_SYSCALL +int sk_attach_bpf(u32 ufd, struct sock *sk) +{ + struct sk_filter *fp, *old_fp; + struct bpf_prog *prog; + + if (sock_flag(sk, SOCK_FILTER_LOCKED)) + return -EPERM; + + prog = bpf_prog_get(ufd); + if (!prog) + return -EINVAL; + + if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { + /* valid fd, but invalid program type */ + bpf_prog_put(prog); + return -EINVAL; + } + + fp = kmalloc(sizeof(*fp), GFP_KERNEL); + if (!fp) { + bpf_prog_put(prog); + return -ENOMEM; + } + fp->prog = prog; + + atomic_set(&fp->refcnt, 0); + + if (!sk_filter_charge(sk, fp)) { + __sk_filter_release(fp); + return -ENOMEM; + } + + old_fp = rcu_dereference_protected(sk->sk_filter, + sock_owned_by_user(sk)); + rcu_assign_pointer(sk->sk_filter, fp); + + if (old_fp) + sk_filter_uncharge(sk, old_fp); + + return 0; +} + +/* allow socket filters to call + * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem() + */ +static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + default: + return NULL; + } +} + +static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type) +{ + /* skb fields cannot be accessed yet */ + return false; +} + +static struct bpf_verifier_ops sock_filter_ops = { + .get_func_proto = sock_filter_func_proto, + .is_valid_access = sock_filter_is_valid_access, +}; + +static struct bpf_prog_type_list tl = { + .ops = &sock_filter_ops, + .type = BPF_PROG_TYPE_SOCKET_FILTER, +}; + +static int __init register_sock_filter_ops(void) +{ + bpf_register_prog_type(&tl); + return 0; +} +late_initcall(register_sock_filter_ops); +#else +int sk_attach_bpf(u32 ufd, struct sock *sk) +{ + return -EOPNOTSUPP; +} +#endif int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/sock.c b/net/core/sock.c index 0725cf0cb685..9a56b2000c3f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -888,6 +888,19 @@ int sock_setsockopt(struct socket *sock, int level, int optname, } break; + case SO_ATTACH_BPF: + ret = -EINVAL; + if (optlen == sizeof(u32)) { + u32 ufd; + + ret = -EFAULT; + if (copy_from_user(&ufd, optval, sizeof(ufd))) + break; + + ret = sk_attach_bpf(ufd, sk); + } + break; + case SO_DETACH_FILTER: ret = sk_detach_filter(sk); break; From 03f4723ed7a52bd31da26eefe2cdde563ea0f468 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Dec 2014 15:06:36 -0800 Subject: [PATCH 3/6] samples: bpf: example of stateful socket filtering this socket filter example does: - creates arraymap in kernel with key 4 bytes and value 8 bytes - loads eBPF program which assumes that packet is IPv4 and loads one byte of IP->proto from the packet and uses it as a key in a map r0 = skb->data[ETH_HLEN + offsetof(struct iphdr, protocol)]; *(u32*)(fp - 4) = r0; value = bpf_map_lookup_elem(map_fd, fp - 4); if (value) (*(u64*)value) += 1; - attaches this program to raw socket - every second user space reads map[IPPROTO_TCP], map[IPPROTO_UDP], map[IPPROTO_ICMP] to see how many packets of given protocol were seen on loopback interface Usage: $sudo samples/bpf/sock_example TCP 0 UDP 0 ICMP 0 packets TCP 187600 UDP 0 ICMP 4 packets TCP 376504 UDP 0 ICMP 8 packets TCP 563116 UDP 0 ICMP 12 packets TCP 753144 UDP 0 ICMP 16 packets Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 2 + samples/bpf/libbpf.c | 28 ++++++++++ samples/bpf/libbpf.h | 13 +++++ samples/bpf/sock_example.c | 101 +++++++++++++++++++++++++++++++++++++ 4 files changed, 144 insertions(+) create mode 100644 samples/bpf/sock_example.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0718d9ce4619..f46d3492d032 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -3,9 +3,11 @@ obj- := dummy.o # List of programs to build hostprogs-y := test_verifier test_maps +hostprogs-y += sock_example test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o +sock_example-objs := sock_example.o libbpf.o # Tell kbuild to always build the programs always := $(hostprogs-y) diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c index 17bb520eb57f..46d50b7ddf79 100644 --- a/samples/bpf/libbpf.c +++ b/samples/bpf/libbpf.c @@ -7,6 +7,10 @@ #include #include #include +#include +#include +#include +#include #include "libbpf.h" static __u64 ptr_to_u64(void *ptr) @@ -93,3 +97,27 @@ int bpf_prog_load(enum bpf_prog_type prog_type, return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); } + +int open_raw_sock(const char *name) +{ + struct sockaddr_ll sll; + int sock; + + sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL)); + if (sock < 0) { + printf("cannot create raw socket\n"); + return -1; + } + + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = if_nametoindex(name); + sll.sll_protocol = htons(ETH_P_ALL); + if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) { + printf("bind to %s: %s\n", name, strerror(errno)); + close(sock); + return -1; + } + + return sock; +} diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index f8678e5f48bf..cc62ad4d95de 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -99,6 +99,16 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ + +#define BPF_LD_ABS(SIZE, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + /* Memory load, dst_reg = *(uint *) (src_reg + off16) */ #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ @@ -169,4 +179,7 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; .off = 0, \ .imm = 0 }) +/* create RAW socket and bind to interface 'name' */ +int open_raw_sock(const char *name); + #endif diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c new file mode 100644 index 000000000000..c8ad0404416f --- /dev/null +++ b/samples/bpf/sock_example.c @@ -0,0 +1,101 @@ +/* eBPF example program: + * - creates arraymap in kernel with key 4 bytes and value 8 bytes + * + * - loads eBPF program: + * r0 = skb->data[ETH_HLEN + offsetof(struct iphdr, protocol)]; + * *(u32*)(fp - 4) = r0; + * // assuming packet is IPv4, lookup ip->proto in a map + * value = bpf_map_lookup_elem(map_fd, fp - 4); + * if (value) + * (*(u64*)value) += 1; + * + * - attaches this program to eth0 raw socket + * + * - every second user space reads map[tcp], map[udp], map[icmp] to see + * how many packets of given protocol were seen on eth0 + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libbpf.h" + +static int test_sock(void) +{ + int sock = -1, map_fd, prog_fd, i, key; + long long value = 0, tcp_cnt, udp_cnt, icmp_cnt; + + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), + 256); + if (map_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + goto cleanup; + } + + struct bpf_insn prog[] = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol) /* R0 = ip->proto */), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */ + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */ + BPF_EXIT_INSN(), + }; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), + "GPL"); + if (prog_fd < 0) { + printf("failed to load prog '%s'\n", strerror(errno)); + goto cleanup; + } + + sock = open_raw_sock("lo"); + + if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, + sizeof(prog_fd)) < 0) { + printf("setsockopt %s\n", strerror(errno)); + goto cleanup; + } + + for (i = 0; i < 10; i++) { + key = IPPROTO_TCP; + assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0); + + key = IPPROTO_UDP; + assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0); + + key = IPPROTO_ICMP; + assert(bpf_lookup_elem(map_fd, &key, &icmp_cnt) == 0); + + printf("TCP %lld UDP %lld ICMP %lld packets\n", + tcp_cnt, udp_cnt, icmp_cnt); + sleep(1); + } + +cleanup: + /* maps, programs, raw sockets will auto cleanup on process exit */ + return 0; +} + +int main(void) +{ + FILE *f; + + f = popen("ping -c5 localhost", "r"); + (void)f; + + return test_sock(); +} From 249b812d8005ec38e351ee763ceb85d56b155064 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Dec 2014 15:06:37 -0800 Subject: [PATCH 4/6] samples: bpf: elf_bpf file loader simple .o parser and loader using BPF syscall. .o is a standard ELF generated by LLVM backend It parses elf file compiled by llvm .c->.o - parses 'maps' section and creates maps via BPF syscall - parses 'license' section and passes it to syscall - parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD - loads eBPF programs via BPF syscall One ELF file can contain multiple BPF programs. int load_bpf_file(char *path); populates prog_fd[] and map_fd[] with FDs received from bpf syscall bpf_helpers.h - helper functions available to eBPF programs written in C Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/bpf_helpers.h | 40 ++++++++ samples/bpf/bpf_load.c | 203 ++++++++++++++++++++++++++++++++++++++ samples/bpf/bpf_load.h | 24 +++++ 3 files changed, 267 insertions(+) create mode 100644 samples/bpf/bpf_helpers.h create mode 100644 samples/bpf/bpf_load.c create mode 100644 samples/bpf/bpf_load.h diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h new file mode 100644 index 000000000000..ca0333146006 --- /dev/null +++ b/samples/bpf/bpf_helpers.h @@ -0,0 +1,40 @@ +#ifndef __BPF_HELPERS_H +#define __BPF_HELPERS_H + +/* helper macro to place programs, maps, license in + * different sections in elf_bpf file. Section names + * are interpreted by elf_bpf loader + */ +#define SEC(NAME) __attribute__((section(NAME), used)) + +/* helper functions called from eBPF programs written in C */ +static void *(*bpf_map_lookup_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_lookup_elem; +static int (*bpf_map_update_elem)(void *map, void *key, void *value, + unsigned long long flags) = + (void *) BPF_FUNC_map_update_elem; +static int (*bpf_map_delete_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_delete_elem; + +/* llvm builtin functions that eBPF C program may use to + * emit BPF_LD_ABS and BPF_LD_IND instructions + */ +struct sk_buff; +unsigned long long load_byte(void *skb, + unsigned long long off) asm("llvm.bpf.load.byte"); +unsigned long long load_half(void *skb, + unsigned long long off) asm("llvm.bpf.load.half"); +unsigned long long load_word(void *skb, + unsigned long long off) asm("llvm.bpf.load.word"); + +/* a helper structure used by eBPF C program + * to describe map attributes to elf_bpf loader + */ +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; +}; + +#endif diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c new file mode 100644 index 000000000000..1831d236382b --- /dev/null +++ b/samples/bpf/bpf_load.c @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libbpf.h" +#include "bpf_helpers.h" +#include "bpf_load.h" + +static char license[128]; +static bool processed_sec[128]; +int map_fd[MAX_MAPS]; +int prog_fd[MAX_PROGS]; +int prog_cnt; + +static int load_and_attach(const char *event, struct bpf_insn *prog, int size) +{ + int fd; + bool is_socket = strncmp(event, "socket", 6) == 0; + + if (!is_socket) + /* tracing events tbd */ + return -1; + + fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, + prog, size, license); + + if (fd < 0) { + printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); + return -1; + } + + prog_fd[prog_cnt++] = fd; + + return 0; +} + +static int load_maps(struct bpf_map_def *maps, int len) +{ + int i; + + for (i = 0; i < len / sizeof(struct bpf_map_def); i++) { + + map_fd[i] = bpf_create_map(maps[i].type, + maps[i].key_size, + maps[i].value_size, + maps[i].max_entries); + if (map_fd[i] < 0) + return 1; + } + return 0; +} + +static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname, + GElf_Shdr *shdr, Elf_Data **data) +{ + Elf_Scn *scn; + + scn = elf_getscn(elf, i); + if (!scn) + return 1; + + if (gelf_getshdr(scn, shdr) != shdr) + return 2; + + *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name); + if (!*shname || !shdr->sh_size) + return 3; + + *data = elf_getdata(scn, 0); + if (!*data || elf_getdata(scn, *data) != NULL) + return 4; + + return 0; +} + +static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, + GElf_Shdr *shdr, struct bpf_insn *insn) +{ + int i, nrels; + + nrels = shdr->sh_size / shdr->sh_entsize; + + for (i = 0; i < nrels; i++) { + GElf_Sym sym; + GElf_Rel rel; + unsigned int insn_idx; + + gelf_getrel(data, i, &rel); + + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + + gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym); + + if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { + printf("invalid relo for insn[%d].code 0x%x\n", + insn_idx, insn[insn_idx].code); + return 1; + } + insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)]; + } + + return 0; +} + +int load_bpf_file(char *path) +{ + int fd, i; + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr, shdr_prog; + Elf_Data *data, *data_prog, *symbols = NULL; + char *shname, *shname_prog; + + if (elf_version(EV_CURRENT) == EV_NONE) + return 1; + + fd = open(path, O_RDONLY, 0); + if (fd < 0) + return 1; + + elf = elf_begin(fd, ELF_C_READ, NULL); + + if (!elf) + return 1; + + if (gelf_getehdr(elf, &ehdr) != &ehdr) + return 1; + + /* scan over all elf sections to get license and map info */ + for (i = 1; i < ehdr.e_shnum; i++) { + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + + if (0) /* helpful for llvm debugging */ + printf("section %d:%s data %p size %zd link %d flags %d\n", + i, shname, data->d_buf, data->d_size, + shdr.sh_link, (int) shdr.sh_flags); + + if (strcmp(shname, "license") == 0) { + processed_sec[i] = true; + memcpy(license, data->d_buf, data->d_size); + } else if (strcmp(shname, "maps") == 0) { + processed_sec[i] = true; + if (load_maps(data->d_buf, data->d_size)) + return 1; + } else if (shdr.sh_type == SHT_SYMTAB) { + symbols = data; + } + } + + /* load programs that need map fixup (relocations) */ + for (i = 1; i < ehdr.e_shnum; i++) { + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + if (shdr.sh_type == SHT_REL) { + struct bpf_insn *insns; + + if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog, + &shdr_prog, &data_prog)) + continue; + + insns = (struct bpf_insn *) data_prog->d_buf; + + processed_sec[shdr.sh_info] = true; + processed_sec[i] = true; + + if (parse_relo_and_apply(data, symbols, &shdr, insns)) + continue; + + if (memcmp(shname_prog, "events/", 7) == 0 || + memcmp(shname_prog, "socket", 6) == 0) + load_and_attach(shname_prog, insns, data_prog->d_size); + } + } + + /* load programs that don't use maps */ + for (i = 1; i < ehdr.e_shnum; i++) { + + if (processed_sec[i]) + continue; + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + + if (memcmp(shname, "events/", 7) == 0 || + memcmp(shname, "socket", 6) == 0) + load_and_attach(shname, data->d_buf, data->d_size); + } + + close(fd); + return 0; +} diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h new file mode 100644 index 000000000000..27789a34f5e6 --- /dev/null +++ b/samples/bpf/bpf_load.h @@ -0,0 +1,24 @@ +#ifndef __BPF_LOAD_H +#define __BPF_LOAD_H + +#define MAX_MAPS 32 +#define MAX_PROGS 32 + +extern int map_fd[MAX_MAPS]; +extern int prog_fd[MAX_PROGS]; + +/* parses elf file compiled by llvm .c->.o + * . parses 'maps' section and creates maps via BPF syscall + * . parses 'license' section and passes it to syscall + * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by + * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD + * . loads eBPF programs via BPF syscall + * + * One ELF file can contain multiple BPF programs which will be loaded + * and their FDs stored stored in prog_fd array + * + * returns zero on success + */ +int load_bpf_file(char *path); + +#endif From a80857822b0c2ed608c93504bd3687b78f20c619 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Dec 2014 15:06:38 -0800 Subject: [PATCH 5/6] samples: bpf: trivial eBPF program in C this example does the same task as previous socket example in assembler, but this one does it in C. eBPF program in kernel does: /* assume that packet is IPv4, load one byte of IP->proto */ int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); long *value; value = bpf_map_lookup_elem(&my_map, &index); if (value) __sync_fetch_and_add(value, 1); Corresponding user space reads map[tcp], map[udp], map[icmp] and prints protocol stats every second Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 14 +++++++++++ samples/bpf/libbpf.h | 2 +- samples/bpf/sockex1_kern.c | 25 +++++++++++++++++++ samples/bpf/sockex1_user.c | 49 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 samples/bpf/sockex1_kern.c create mode 100644 samples/bpf/sockex1_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index f46d3492d032..770d145186c3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -4,12 +4,26 @@ obj- := dummy.o # List of programs to build hostprogs-y := test_verifier test_maps hostprogs-y += sock_example +hostprogs-y += sockex1 test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o sock_example-objs := sock_example.o libbpf.o +sockex1-objs := bpf_load.o libbpf.o sockex1_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) +always += sockex1_kern.o HOSTCFLAGS += -I$(objtree)/usr/include + +HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable +HOSTLOADLIBES_sockex1 += -lelf + +# point this to your LLVM backend with bpf support +LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc + +%.o: %.c + clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ + -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ + -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index cc62ad4d95de..58c5fe1bdba1 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -15,7 +15,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, int insn_len, const char *license); -#define LOG_BUF_SIZE 8192 +#define LOG_BUF_SIZE 65536 extern char bpf_log_buf[LOG_BUF_SIZE]; /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ diff --git a/samples/bpf/sockex1_kern.c b/samples/bpf/sockex1_kern.c new file mode 100644 index 000000000000..066892662915 --- /dev/null +++ b/samples/bpf/sockex1_kern.c @@ -0,0 +1,25 @@ +#include +#include +#include +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 256, +}; + +SEC("socket1") +int bpf_prog1(struct sk_buff *skb) +{ + int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); + long *value; + + value = bpf_map_lookup_elem(&my_map, &index); + if (value) + __sync_fetch_and_add(value, 1); + + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c new file mode 100644 index 000000000000..34a443ff3831 --- /dev/null +++ b/samples/bpf/sockex1_user.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" +#include +#include + +int main(int ac, char **argv) +{ + char filename[256]; + FILE *f; + int i, sock; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + sock = open_raw_sock("lo"); + + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, + sizeof(prog_fd[0])) == 0); + + f = popen("ping -c5 localhost", "r"); + (void) f; + + for (i = 0; i < 5; i++) { + long long tcp_cnt, udp_cnt, icmp_cnt; + int key; + + key = IPPROTO_TCP; + assert(bpf_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0); + + key = IPPROTO_UDP; + assert(bpf_lookup_elem(map_fd[0], &key, &udp_cnt) == 0); + + key = IPPROTO_ICMP; + assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0); + + printf("TCP %lld UDP %lld ICMP %lld packets\n", + tcp_cnt, udp_cnt, icmp_cnt); + sleep(1); + } + + return 0; +} From fbe3310840c65f3cf97dd90d23e177d061c376f2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Dec 2014 15:06:39 -0800 Subject: [PATCH 6/6] samples: bpf: large eBPF program in C sockex2_kern.c is purposefully large eBPF program in C. llvm compiles ~200 lines of C code into ~300 eBPF instructions. It's similar to __skb_flow_dissect() to demonstrate that complex packet parsing can be done by eBPF. Then it uses (struct flow_keys)->dst IP address (or hash of ipv6 dst) to keep stats of number of packets per IP. User space loads eBPF program, attaches it to loopback interface and prints dest_ip->#packets stats every second. Usage: $sudo samples/bpf/sockex2 ip 127.0.0.1 count 19 ip 127.0.0.1 count 178115 ip 127.0.0.1 count 369437 ip 127.0.0.1 count 559841 ip 127.0.0.1 count 750539 Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 + samples/bpf/sockex2_kern.c | 215 +++++++++++++++++++++++++++++++++++++ samples/bpf/sockex2_user.c | 44 ++++++++ 3 files changed, 263 insertions(+) create mode 100644 samples/bpf/sockex2_kern.c create mode 100644 samples/bpf/sockex2_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 770d145186c3..b5b3600dcdf5 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -5,20 +5,24 @@ obj- := dummy.o hostprogs-y := test_verifier test_maps hostprogs-y += sock_example hostprogs-y += sockex1 +hostprogs-y += sockex2 test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o sock_example-objs := sock_example.o libbpf.o sockex1-objs := bpf_load.o libbpf.o sockex1_user.o +sockex2-objs := bpf_load.o libbpf.o sockex2_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) always += sockex1_kern.o +always += sockex2_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable HOSTLOADLIBES_sockex1 += -lelf +HOSTLOADLIBES_sockex2 += -lelf # point this to your LLVM backend with bpf support LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c new file mode 100644 index 000000000000..6f0135f0f217 --- /dev/null +++ b/samples/bpf/sockex2_kern.c @@ -0,0 +1,215 @@ +#include +#include "bpf_helpers.h" +#include +#include +#include +#include +#include +#include +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct flow_keys { + __be32 src; + __be32 dst; + union { + __be32 ports; + __be16 port16[2]; + }; + __u16 thoff; + __u8 ip_proto; +}; + +static inline int proto_ports_offset(__u64 proto) +{ + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + return 0; + case IPPROTO_AH: + return 4; + default: + return 0; + } +} + +static inline int ip_is_fragment(struct sk_buff *ctx, __u64 nhoff) +{ + return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) + & (IP_MF | IP_OFFSET); +} + +static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off) +{ + __u64 w0 = load_word(ctx, off); + __u64 w1 = load_word(ctx, off + 4); + __u64 w2 = load_word(ctx, off + 8); + __u64 w3 = load_word(ctx, off + 12); + + return (__u32)(w0 ^ w1 ^ w2 ^ w3); +} + +static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, + struct flow_keys *flow) +{ + __u64 verlen; + + if (unlikely(ip_is_fragment(skb, nhoff))) + *ip_proto = 0; + else + *ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); + + if (*ip_proto != IPPROTO_GRE) { + flow->src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); + flow->dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); + } + + verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); + if (likely(verlen == 0x45)) + nhoff += 20; + else + nhoff += (verlen & 0xF) << 2; + + return nhoff; +} + +static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, + struct flow_keys *flow) +{ + *ip_proto = load_byte(skb, + nhoff + offsetof(struct ipv6hdr, nexthdr)); + flow->src = ipv6_addr_hash(skb, + nhoff + offsetof(struct ipv6hdr, saddr)); + flow->dst = ipv6_addr_hash(skb, + nhoff + offsetof(struct ipv6hdr, daddr)); + nhoff += sizeof(struct ipv6hdr); + + return nhoff; +} + +static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow) +{ + __u64 nhoff = ETH_HLEN; + __u64 ip_proto; + __u64 proto = load_half(skb, 12); + int poff; + + if (proto == ETH_P_8021AD) { + proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nhoff += sizeof(struct vlan_hdr); + } + + if (proto == ETH_P_8021Q) { + proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nhoff += sizeof(struct vlan_hdr); + } + + if (likely(proto == ETH_P_IP)) + nhoff = parse_ip(skb, nhoff, &ip_proto, flow); + else if (proto == ETH_P_IPV6) + nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); + else + return false; + + switch (ip_proto) { + case IPPROTO_GRE: { + struct gre_hdr { + __be16 flags; + __be16 proto; + }; + + __u64 gre_flags = load_half(skb, + nhoff + offsetof(struct gre_hdr, flags)); + __u64 gre_proto = load_half(skb, + nhoff + offsetof(struct gre_hdr, proto)); + + if (gre_flags & (GRE_VERSION|GRE_ROUTING)) + break; + + proto = gre_proto; + nhoff += 4; + if (gre_flags & GRE_CSUM) + nhoff += 4; + if (gre_flags & GRE_KEY) + nhoff += 4; + if (gre_flags & GRE_SEQ) + nhoff += 4; + + if (proto == ETH_P_8021Q) { + proto = load_half(skb, + nhoff + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nhoff += sizeof(struct vlan_hdr); + } + + if (proto == ETH_P_IP) + nhoff = parse_ip(skb, nhoff, &ip_proto, flow); + else if (proto == ETH_P_IPV6) + nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); + else + return false; + break; + } + case IPPROTO_IPIP: + nhoff = parse_ip(skb, nhoff, &ip_proto, flow); + break; + case IPPROTO_IPV6: + nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); + break; + default: + break; + } + + flow->ip_proto = ip_proto; + poff = proto_ports_offset(ip_proto); + if (poff >= 0) { + nhoff += poff; + flow->ports = load_word(skb, nhoff); + } + + flow->thoff = (__u16) nhoff; + + return true; +} + +struct bpf_map_def SEC("maps") hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__be32), + .value_size = sizeof(long), + .max_entries = 1024, +}; + +SEC("socket2") +int bpf_prog2(struct sk_buff *skb) +{ + struct flow_keys flow; + long *value; + u32 key; + + if (!flow_dissector(skb, &flow)) + return 0; + + key = flow.dst; + value = bpf_map_lookup_elem(&hash_map, &key); + if (value) { + __sync_fetch_and_add(value, 1); + } else { + long val = 1; + + bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c new file mode 100644 index 000000000000..d2d5f5a790d3 --- /dev/null +++ b/samples/bpf/sockex2_user.c @@ -0,0 +1,44 @@ +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" +#include +#include + +int main(int ac, char **argv) +{ + char filename[256]; + FILE *f; + int i, sock; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + sock = open_raw_sock("lo"); + + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, + sizeof(prog_fd[0])) == 0); + + f = popen("ping -c5 localhost", "r"); + (void) f; + + for (i = 0; i < 5; i++) { + int key = 0, next_key; + long long value; + + while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_lookup_elem(map_fd[0], &next_key, &value); + printf("ip %s count %lld\n", + inet_ntoa((struct in_addr){htonl(next_key)}), + value); + key = next_key; + } + sleep(1); + } + return 0; +}