From 020e71a3cf7f50c0f2c54cf2444067b76fe6d785 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2021 09:48:24 -0700 Subject: [PATCH] ipv4: guard IP_MINTTL with a static key RFC 5082 IP_MINTTL option is rarely used on hosts. Add a static key to remove from TCP fast path useless code, and potential cache line miss to fetch inet_sk(sk)->min_ttl Note that once ip4_min_ttl static key has been enabled, it stays enabled until next boot. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- include/net/ip.h | 2 ++ net/ipv4/ip_sockglue.c | 6 ++++++ net/ipv4/tcp_ipv4.c | 20 ++++++++++++-------- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index cf229a531194..b71e88507c4a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -750,6 +751,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); +DECLARE_STATIC_KEY_FALSE(ip4_min_ttl); int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d5487c858067..38d29b175ca6 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -886,6 +886,8 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname, return ip_mc_leave_group(sk, &mreq); } +DEFINE_STATIC_KEY_FALSE(ip4_min_ttl); + static int do_ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -1352,6 +1354,10 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, goto e_inval; if (val < 0 || val > 255) goto e_inval; + + if (val) + static_branch_enable(&ip4_min_ttl); + /* tcp_v4_err() and tcp_v4_rcv() might read min_ttl * while we are changint it. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a9cbc8e6b796..13d868c43284 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -508,10 +508,12 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) if (sk->sk_state == TCP_CLOSE) goto out; - /* min_ttl can be changed concurrently from do_ip_setsockopt() */ - if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto out; + if (static_branch_unlikely(&ip4_min_ttl)) { + /* min_ttl can be changed concurrently from do_ip_setsockopt() */ + if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto out; + } } tp = tcp_sk(sk); @@ -2070,10 +2072,12 @@ int tcp_v4_rcv(struct sk_buff *skb) } } - /* min_ttl can be changed concurrently from do_ip_setsockopt() */ - if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) { - __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); - goto discard_and_relse; + if (static_branch_unlikely(&ip4_min_ttl)) { + /* min_ttl can be changed concurrently from do_ip_setsockopt() */ + if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) { + __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); + goto discard_and_relse; + } } if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))