mirror of https://gitee.com/openkylin/linux.git
627 lines
14 KiB
C
627 lines
14 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* Copyright (c) 2020 Facebook */
|
|
|
|
#include <stddef.h>
|
|
#include <errno.h>
|
|
#include <stdbool.h>
|
|
#include <sys/types.h>
|
|
#include <sys/socket.h>
|
|
#include <linux/tcp.h>
|
|
#include <linux/socket.h>
|
|
#include <linux/bpf.h>
|
|
#include <linux/types.h>
|
|
#include <bpf/bpf_helpers.h>
|
|
#include <bpf/bpf_endian.h>
|
|
#define BPF_PROG_TEST_TCP_HDR_OPTIONS
|
|
#include "test_tcp_hdr_options.h"
|
|
|
|
#ifndef sizeof_field
|
|
#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
|
|
#endif
|
|
|
|
__u8 test_kind = TCPOPT_EXP;
|
|
__u16 test_magic = 0xeB9F;
|
|
__u32 inherit_cb_flags = 0;
|
|
|
|
struct bpf_test_option passive_synack_out = {};
|
|
struct bpf_test_option passive_fin_out = {};
|
|
|
|
struct bpf_test_option passive_estab_in = {};
|
|
struct bpf_test_option passive_fin_in = {};
|
|
|
|
struct bpf_test_option active_syn_out = {};
|
|
struct bpf_test_option active_fin_out = {};
|
|
|
|
struct bpf_test_option active_estab_in = {};
|
|
struct bpf_test_option active_fin_in = {};
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_SK_STORAGE);
|
|
__uint(map_flags, BPF_F_NO_PREALLOC);
|
|
__type(key, int);
|
|
__type(value, struct hdr_stg);
|
|
} hdr_stg_map SEC(".maps");
|
|
|
|
static bool skops_want_cookie(const struct bpf_sock_ops *skops)
|
|
{
|
|
return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
|
|
}
|
|
|
|
static bool skops_current_mss(const struct bpf_sock_ops *skops)
|
|
{
|
|
return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
|
|
}
|
|
|
|
static __u8 option_total_len(__u8 flags)
|
|
{
|
|
__u8 i, len = 1; /* +1 for flags */
|
|
|
|
if (!flags)
|
|
return 0;
|
|
|
|
/* RESEND bit does not use a byte */
|
|
for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
|
|
len += !!TEST_OPTION_FLAGS(flags, i);
|
|
|
|
if (test_kind == TCPOPT_EXP)
|
|
return len + TCP_BPF_EXPOPT_BASE_LEN;
|
|
else
|
|
return len + 2; /* +1 kind, +1 kind-len */
|
|
}
|
|
|
|
static void write_test_option(const struct bpf_test_option *test_opt,
|
|
__u8 *data)
|
|
{
|
|
__u8 offset = 0;
|
|
|
|
data[offset++] = test_opt->flags;
|
|
if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
|
|
data[offset++] = test_opt->max_delack_ms;
|
|
|
|
if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
|
|
data[offset++] = test_opt->rand;
|
|
}
|
|
|
|
static int store_option(struct bpf_sock_ops *skops,
|
|
const struct bpf_test_option *test_opt)
|
|
{
|
|
union {
|
|
struct tcp_exprm_opt exprm;
|
|
struct tcp_opt regular;
|
|
} write_opt;
|
|
int err;
|
|
|
|
if (test_kind == TCPOPT_EXP) {
|
|
write_opt.exprm.kind = TCPOPT_EXP;
|
|
write_opt.exprm.len = option_total_len(test_opt->flags);
|
|
write_opt.exprm.magic = __bpf_htons(test_magic);
|
|
write_opt.exprm.data32 = 0;
|
|
write_test_option(test_opt, write_opt.exprm.data);
|
|
err = bpf_store_hdr_opt(skops, &write_opt.exprm,
|
|
sizeof(write_opt.exprm), 0);
|
|
} else {
|
|
write_opt.regular.kind = test_kind;
|
|
write_opt.regular.len = option_total_len(test_opt->flags);
|
|
write_opt.regular.data32 = 0;
|
|
write_test_option(test_opt, write_opt.regular.data);
|
|
err = bpf_store_hdr_opt(skops, &write_opt.regular,
|
|
sizeof(write_opt.regular), 0);
|
|
}
|
|
|
|
if (err)
|
|
RET_CG_ERR(err);
|
|
|
|
return CG_OK;
|
|
}
|
|
|
|
static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
|
|
{
|
|
opt->flags = *start++;
|
|
|
|
if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
|
|
opt->max_delack_ms = *start++;
|
|
|
|
if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
|
|
opt->rand = *start++;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int load_option(struct bpf_sock_ops *skops,
|
|
struct bpf_test_option *test_opt, bool from_syn)
|
|
{
|
|
union {
|
|
struct tcp_exprm_opt exprm;
|
|
struct tcp_opt regular;
|
|
} search_opt;
|
|
int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
|
|
|
|
if (test_kind == TCPOPT_EXP) {
|
|
search_opt.exprm.kind = TCPOPT_EXP;
|
|
search_opt.exprm.len = 4;
|
|
search_opt.exprm.magic = __bpf_htons(test_magic);
|
|
search_opt.exprm.data32 = 0;
|
|
ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
|
|
sizeof(search_opt.exprm), load_flags);
|
|
if (ret < 0)
|
|
return ret;
|
|
return parse_test_option(test_opt, search_opt.exprm.data);
|
|
} else {
|
|
search_opt.regular.kind = test_kind;
|
|
search_opt.regular.len = 0;
|
|
search_opt.regular.data32 = 0;
|
|
ret = bpf_load_hdr_opt(skops, &search_opt.regular,
|
|
sizeof(search_opt.regular), load_flags);
|
|
if (ret < 0)
|
|
return ret;
|
|
return parse_test_option(test_opt, search_opt.regular.data);
|
|
}
|
|
}
|
|
|
|
static int synack_opt_len(struct bpf_sock_ops *skops)
|
|
{
|
|
struct bpf_test_option test_opt = {};
|
|
__u8 optlen;
|
|
int err;
|
|
|
|
if (!passive_synack_out.flags)
|
|
return CG_OK;
|
|
|
|
err = load_option(skops, &test_opt, true);
|
|
|
|
/* bpf_test_option is not found */
|
|
if (err == -ENOMSG)
|
|
return CG_OK;
|
|
|
|
if (err)
|
|
RET_CG_ERR(err);
|
|
|
|
optlen = option_total_len(passive_synack_out.flags);
|
|
if (optlen) {
|
|
err = bpf_reserve_hdr_opt(skops, optlen, 0);
|
|
if (err)
|
|
RET_CG_ERR(err);
|
|
}
|
|
|
|
return CG_OK;
|
|
}
|
|
|
|
static int write_synack_opt(struct bpf_sock_ops *skops)
|
|
{
|
|
struct bpf_test_option opt;
|
|
|
|
if (!passive_synack_out.flags)
|
|
/* We should not even be called since no header
|
|
* space has been reserved.
|
|
*/
|
|
RET_CG_ERR(0);
|
|
|
|
opt = passive_synack_out;
|
|
if (skops_want_cookie(skops))
|
|
SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
|
|
|
|
return store_option(skops, &opt);
|
|
}
|
|
|
|
static int syn_opt_len(struct bpf_sock_ops *skops)
|
|
{
|
|
__u8 optlen;
|
|
int err;
|
|
|
|
if (!active_syn_out.flags)
|
|
return CG_OK;
|
|
|
|
optlen = option_total_len(active_syn_out.flags);
|
|
if (optlen) {
|
|
err = bpf_reserve_hdr_opt(skops, optlen, 0);
|
|
if (err)
|
|
RET_CG_ERR(err);
|
|
}
|
|
|
|
return CG_OK;
|
|
}
|
|
|
|
static int write_syn_opt(struct bpf_sock_ops *skops)
|
|
{
|
|
if (!active_syn_out.flags)
|
|
RET_CG_ERR(0);
|
|
|
|
return store_option(skops, &active_syn_out);
|
|
}
|
|
|
|
static int fin_opt_len(struct bpf_sock_ops *skops)
|
|
{
|
|
struct bpf_test_option *opt;
|
|
struct hdr_stg *hdr_stg;
|
|
__u8 optlen;
|
|
int err;
|
|
|
|
if (!skops->sk)
|
|
RET_CG_ERR(0);
|
|
|
|
hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
|
|
if (!hdr_stg)
|
|
RET_CG_ERR(0);
|
|
|
|
if (hdr_stg->active)
|
|
opt = &active_fin_out;
|
|
else
|
|
opt = &passive_fin_out;
|
|
|
|
optlen = option_total_len(opt->flags);
|
|
if (optlen) {
|
|
err = bpf_reserve_hdr_opt(skops, optlen, 0);
|
|
if (err)
|
|
RET_CG_ERR(err);
|
|
}
|
|
|
|
return CG_OK;
|
|
}
|
|
|
|
static int write_fin_opt(struct bpf_sock_ops *skops)
|
|
{
|
|
struct bpf_test_option *opt;
|
|
struct hdr_stg *hdr_stg;
|
|
|
|
if (!skops->sk)
|
|
RET_CG_ERR(0);
|
|
|
|
hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
|
|
if (!hdr_stg)
|
|
RET_CG_ERR(0);
|
|
|
|
if (hdr_stg->active)
|
|
opt = &active_fin_out;
|
|
else
|
|
opt = &passive_fin_out;
|
|
|
|
if (!opt->flags)
|
|
RET_CG_ERR(0);
|
|
|
|
return store_option(skops, opt);
|
|
}
|
|
|
|
static int resend_in_ack(struct bpf_sock_ops *skops)
|
|
{
|
|
struct hdr_stg *hdr_stg;
|
|
|
|
if (!skops->sk)
|
|
return -1;
|
|
|
|
hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
|
|
if (!hdr_stg)
|
|
return -1;
|
|
|
|
return !!hdr_stg->resend_syn;
|
|
}
|
|
|
|
static int nodata_opt_len(struct bpf_sock_ops *skops)
|
|
{
|
|
int resend;
|
|
|
|
resend = resend_in_ack(skops);
|
|
if (resend < 0)
|
|
RET_CG_ERR(0);
|
|
|
|
if (resend)
|
|
return syn_opt_len(skops);
|
|
|
|
return CG_OK;
|
|
}
|
|
|
|
static int write_nodata_opt(struct bpf_sock_ops *skops)
|
|
{
|
|
int resend;
|
|
|
|
resend = resend_in_ack(skops);
|
|
if (resend < 0)
|
|
RET_CG_ERR(0);
|
|
|
|
if (resend)
|
|
return write_syn_opt(skops);
|
|
|
|
return CG_OK;
|
|
}
|
|
|
|
static int data_opt_len(struct bpf_sock_ops *skops)
|
|
{
|
|
/* Same as the nodata version. Mostly to show
|
|
* an example usage on skops->skb_len.
|
|
*/
|
|
return nodata_opt_len(skops);
|
|
}
|
|
|
|
static int write_data_opt(struct bpf_sock_ops *skops)
|
|
{
|
|
return write_nodata_opt(skops);
|
|
}
|
|
|
|
static int current_mss_opt_len(struct bpf_sock_ops *skops)
|
|
{
|
|
/* Reserve maximum that may be needed */
|
|
int err;
|
|
|
|
err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
|
|
if (err)
|
|
RET_CG_ERR(err);
|
|
|
|
return CG_OK;
|
|
}
|
|
|
|
static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
|
|
{
|
|
__u8 tcp_flags = skops_tcp_flags(skops);
|
|
|
|
if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
|
|
return synack_opt_len(skops);
|
|
|
|
if (tcp_flags & TCPHDR_SYN)
|
|
return syn_opt_len(skops);
|
|
|
|
if (tcp_flags & TCPHDR_FIN)
|
|
return fin_opt_len(skops);
|
|
|
|
if (skops_current_mss(skops))
|
|
/* The kernel is calculating the MSS */
|
|
return current_mss_opt_len(skops);
|
|
|
|
if (skops->skb_len)
|
|
return data_opt_len(skops);
|
|
|
|
return nodata_opt_len(skops);
|
|
}
|
|
|
|
static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
|
|
{
|
|
__u8 tcp_flags = skops_tcp_flags(skops);
|
|
struct tcphdr *th;
|
|
|
|
if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
|
|
return write_synack_opt(skops);
|
|
|
|
if (tcp_flags & TCPHDR_SYN)
|
|
return write_syn_opt(skops);
|
|
|
|
if (tcp_flags & TCPHDR_FIN)
|
|
return write_fin_opt(skops);
|
|
|
|
th = skops->skb_data;
|
|
if (th + 1 > skops->skb_data_end)
|
|
RET_CG_ERR(0);
|
|
|
|
if (skops->skb_len > tcp_hdrlen(th))
|
|
return write_data_opt(skops);
|
|
|
|
return write_nodata_opt(skops);
|
|
}
|
|
|
|
static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
|
|
{
|
|
__u32 max_delack_us = max_delack_ms * 1000;
|
|
|
|
return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
|
|
&max_delack_us, sizeof(max_delack_us));
|
|
}
|
|
|
|
static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
|
|
{
|
|
__u32 min_rto_us = peer_max_delack_ms * 1000;
|
|
|
|
return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
|
|
sizeof(min_rto_us));
|
|
}
|
|
|
|
static int handle_active_estab(struct bpf_sock_ops *skops)
|
|
{
|
|
struct hdr_stg init_stg = {
|
|
.active = true,
|
|
};
|
|
int err;
|
|
|
|
err = load_option(skops, &active_estab_in, false);
|
|
if (err && err != -ENOMSG)
|
|
RET_CG_ERR(err);
|
|
|
|
init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
|
|
OPTION_RESEND);
|
|
if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
|
|
&init_stg,
|
|
BPF_SK_STORAGE_GET_F_CREATE))
|
|
RET_CG_ERR(0);
|
|
|
|
if (init_stg.resend_syn)
|
|
/* Don't clear the write_hdr cb now because
|
|
* the ACK may get lost and retransmit may
|
|
* be needed.
|
|
*
|
|
* PARSE_ALL_HDR cb flag is set to learn if this
|
|
* resend_syn option has received by the peer.
|
|
*
|
|
* The header option will be resent until a valid
|
|
* packet is received at handle_parse_hdr()
|
|
* and all hdr cb flags will be cleared in
|
|
* handle_parse_hdr().
|
|
*/
|
|
set_parse_all_hdr_cb_flags(skops);
|
|
else if (!active_fin_out.flags)
|
|
/* No options will be written from now */
|
|
clear_hdr_cb_flags(skops);
|
|
|
|
if (active_syn_out.max_delack_ms) {
|
|
err = set_delack_max(skops, active_syn_out.max_delack_ms);
|
|
if (err)
|
|
RET_CG_ERR(err);
|
|
}
|
|
|
|
if (active_estab_in.max_delack_ms) {
|
|
err = set_rto_min(skops, active_estab_in.max_delack_ms);
|
|
if (err)
|
|
RET_CG_ERR(err);
|
|
}
|
|
|
|
return CG_OK;
|
|
}
|
|
|
|
static int handle_passive_estab(struct bpf_sock_ops *skops)
|
|
{
|
|
struct hdr_stg init_stg = {};
|
|
struct tcphdr *th;
|
|
int err;
|
|
|
|
inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
|
|
|
|
err = load_option(skops, &passive_estab_in, true);
|
|
if (err == -ENOENT) {
|
|
/* saved_syn is not found. It was in syncookie mode.
|
|
* We have asked the active side to resend the options
|
|
* in ACK, so try to find the bpf_test_option from ACK now.
|
|
*/
|
|
err = load_option(skops, &passive_estab_in, false);
|
|
init_stg.syncookie = true;
|
|
}
|
|
|
|
/* ENOMSG: The bpf_test_option is not found which is fine.
|
|
* Bail out now for all other errors.
|
|
*/
|
|
if (err && err != -ENOMSG)
|
|
RET_CG_ERR(err);
|
|
|
|
th = skops->skb_data;
|
|
if (th + 1 > skops->skb_data_end)
|
|
RET_CG_ERR(0);
|
|
|
|
if (th->syn) {
|
|
/* Fastopen */
|
|
|
|
/* Cannot clear cb_flags to stop write_hdr cb.
|
|
* synack is not sent yet for fast open.
|
|
* Even it was, the synack may need to be retransmitted.
|
|
*
|
|
* PARSE_ALL_HDR cb flag is set to learn
|
|
* if synack has reached the peer.
|
|
* All cb_flags will be cleared in handle_parse_hdr().
|
|
*/
|
|
set_parse_all_hdr_cb_flags(skops);
|
|
init_stg.fastopen = true;
|
|
} else if (!passive_fin_out.flags) {
|
|
/* No options will be written from now */
|
|
clear_hdr_cb_flags(skops);
|
|
}
|
|
|
|
if (!skops->sk ||
|
|
!bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
|
|
BPF_SK_STORAGE_GET_F_CREATE))
|
|
RET_CG_ERR(0);
|
|
|
|
if (passive_synack_out.max_delack_ms) {
|
|
err = set_delack_max(skops, passive_synack_out.max_delack_ms);
|
|
if (err)
|
|
RET_CG_ERR(err);
|
|
}
|
|
|
|
if (passive_estab_in.max_delack_ms) {
|
|
err = set_rto_min(skops, passive_estab_in.max_delack_ms);
|
|
if (err)
|
|
RET_CG_ERR(err);
|
|
}
|
|
|
|
return CG_OK;
|
|
}
|
|
|
|
static int handle_parse_hdr(struct bpf_sock_ops *skops)
|
|
{
|
|
struct hdr_stg *hdr_stg;
|
|
struct tcphdr *th;
|
|
|
|
if (!skops->sk)
|
|
RET_CG_ERR(0);
|
|
|
|
th = skops->skb_data;
|
|
if (th + 1 > skops->skb_data_end)
|
|
RET_CG_ERR(0);
|
|
|
|
hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
|
|
if (!hdr_stg)
|
|
RET_CG_ERR(0);
|
|
|
|
if (hdr_stg->resend_syn || hdr_stg->fastopen)
|
|
/* The PARSE_ALL_HDR cb flag was turned on
|
|
* to ensure that the previously written
|
|
* options have reached the peer.
|
|
* Those previously written option includes:
|
|
* - Active side: resend_syn in ACK during syncookie
|
|
* or
|
|
* - Passive side: SYNACK during fastopen
|
|
*
|
|
* A valid packet has been received here after
|
|
* the 3WHS, so the PARSE_ALL_HDR cb flag
|
|
* can be cleared now.
|
|
*/
|
|
clear_parse_all_hdr_cb_flags(skops);
|
|
|
|
if (hdr_stg->resend_syn && !active_fin_out.flags)
|
|
/* Active side resent the syn option in ACK
|
|
* because the server was in syncookie mode.
|
|
* A valid packet has been received, so
|
|
* clear header cb flags if there is no
|
|
* more option to send.
|
|
*/
|
|
clear_hdr_cb_flags(skops);
|
|
|
|
if (hdr_stg->fastopen && !passive_fin_out.flags)
|
|
/* Passive side was in fastopen.
|
|
* A valid packet has been received, so
|
|
* the SYNACK has reached the peer.
|
|
* Clear header cb flags if there is no more
|
|
* option to send.
|
|
*/
|
|
clear_hdr_cb_flags(skops);
|
|
|
|
if (th->fin) {
|
|
struct bpf_test_option *fin_opt;
|
|
int err;
|
|
|
|
if (hdr_stg->active)
|
|
fin_opt = &active_fin_in;
|
|
else
|
|
fin_opt = &passive_fin_in;
|
|
|
|
err = load_option(skops, fin_opt, false);
|
|
if (err && err != -ENOMSG)
|
|
RET_CG_ERR(err);
|
|
}
|
|
|
|
return CG_OK;
|
|
}
|
|
|
|
SEC("sockops/estab")
|
|
int estab(struct bpf_sock_ops *skops)
|
|
{
|
|
int true_val = 1;
|
|
|
|
switch (skops->op) {
|
|
case BPF_SOCK_OPS_TCP_LISTEN_CB:
|
|
bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
|
|
&true_val, sizeof(true_val));
|
|
set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
|
|
break;
|
|
case BPF_SOCK_OPS_TCP_CONNECT_CB:
|
|
set_hdr_cb_flags(skops, 0);
|
|
break;
|
|
case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
|
|
return handle_parse_hdr(skops);
|
|
case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
|
|
return handle_hdr_opt_len(skops);
|
|
case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
|
|
return handle_write_hdr_opt(skops);
|
|
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
|
|
return handle_passive_estab(skops);
|
|
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
|
|
return handle_active_estab(skops);
|
|
}
|
|
|
|
return CG_OK;
|
|
}
|
|
|
|
char _license[] SEC("license") = "GPL";
|