mirror of https://gitee.com/openkylin/linux.git
Merge branch 'net-mitigate-retpoline-overhead'
Paolo Abeni says: ==================== net: mitigate retpoline overhead The spectre v2 counter-measures, aka retpolines, are a source of measurable overhead[1]. We can partially address that when the function pointer refers to a builtin symbol resorting to a list of tests vs well-known builtin function and direct calls. Experimental results show that replacing a single indirect call via retpoline with several branches and a direct call gives performance gains even when multiple branches are added - 5 or more, as reported in [2]. This may lead to some uglification around the indirect calls. In netconf 2018 Eric Dumazet described a technique to hide the most relevant part of the needed boilerplate with some macro help. This series is a [re-]implementation of such idea, exposing the introduced helpers in a new header file. They are later leveraged to avoid the indirect call overhead in the GRO path, when possible. Overall this gives > 10% performance improvement for UDP GRO benchmark and smaller but measurable for TCP syn flood. The added infra can be used in follow-up patches to cope with retpoline overhead in other points of the networking stack (e.g. at the qdisc layer) and possibly even in other subsystems. v2 -> v3: - fix build error with CONFIG_IPV6=m v1 -> v2: - list explicitly the builtin function names in INDIRECT_CALL_*(), as suggested by Ed Cree - expand the recipients list rfc -> v1: - use branch prediction hints, as suggested by Eric [1] http://vger.kernel.org/netconf2018_files/PaoloAbeni_netconf2018.pdf [2] https://linuxplumbersconf.org/event/2/contributions/99/attachments/98/117/lpc18_paper_af_xdp_perf-v2.pdf ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
bedf3b3320
|
@ -0,0 +1,51 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_INDIRECT_CALL_WRAPPER_H
|
||||
#define _LINUX_INDIRECT_CALL_WRAPPER_H
|
||||
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
|
||||
/*
|
||||
* INDIRECT_CALL_$NR - wrapper for indirect calls with $NR known builtin
|
||||
* @f: function pointer
|
||||
* @f$NR: builtin functions names, up to $NR of them
|
||||
* @__VA_ARGS__: arguments for @f
|
||||
*
|
||||
* Avoid retpoline overhead for known builtin, checking @f vs each of them and
|
||||
* eventually invoking directly the builtin function. The functions are check
|
||||
* in the given order. Fallback to the indirect call.
|
||||
*/
|
||||
#define INDIRECT_CALL_1(f, f1, ...) \
|
||||
({ \
|
||||
likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__); \
|
||||
})
|
||||
#define INDIRECT_CALL_2(f, f2, f1, ...) \
|
||||
({ \
|
||||
likely(f == f2) ? f2(__VA_ARGS__) : \
|
||||
INDIRECT_CALL_1(f, f1, __VA_ARGS__); \
|
||||
})
|
||||
|
||||
#define INDIRECT_CALLABLE_DECLARE(f) f
|
||||
#define INDIRECT_CALLABLE_SCOPE
|
||||
|
||||
#else
|
||||
#define INDIRECT_CALL_1(f, name, ...) f(__VA_ARGS__)
|
||||
#define INDIRECT_CALL_2(f, name, ...) f(__VA_ARGS__)
|
||||
#define INDIRECT_CALLABLE_DECLARE(f)
|
||||
#define INDIRECT_CALLABLE_SCOPE static
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We can use INDIRECT_CALL_$NR for ipv6 related functions only if ipv6 is
|
||||
* builtin, this macro simplify dealing with indirect calls with only ipv4/ipv6
|
||||
* alternatives
|
||||
*/
|
||||
#if IS_BUILTIN(CONFIG_IPV6)
|
||||
#define INDIRECT_CALL_INET(f, f2, f1, ...) \
|
||||
INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
|
||||
#elif IS_ENABLED(CONFIG_INET)
|
||||
#define INDIRECT_CALL_INET(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
|
||||
#else
|
||||
#define INDIRECT_CALL_INET(f, f2, f1, ...) f(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -2,6 +2,8 @@
|
|||
#ifndef _INET_COMMON_H
|
||||
#define _INET_COMMON_H
|
||||
|
||||
#include <linux/indirect_call_wrapper.h>
|
||||
|
||||
extern const struct proto_ops inet_stream_ops;
|
||||
extern const struct proto_ops inet_dgram_ops;
|
||||
|
||||
|
@ -54,4 +56,11 @@ static inline void inet_ctl_sock_destroy(struct sock *sk)
|
|||
sock_release(sk->sk_socket);
|
||||
}
|
||||
|
||||
#define indirect_call_gro_receive(f2, f1, cb, head, skb) \
|
||||
({ \
|
||||
unlikely(gro_recursion_inc_test(skb)) ? \
|
||||
NAPI_GRO_CB(skb)->flush |= 1, NULL : \
|
||||
INDIRECT_CALL_2(cb, f2, f1, head, skb); \
|
||||
})
|
||||
|
||||
#endif
|
||||
|
|
|
@ -145,6 +145,7 @@
|
|||
#include <linux/sctp.h>
|
||||
#include <net/udp_tunnel.h>
|
||||
#include <linux/net_namespace.h>
|
||||
#include <linux/indirect_call_wrapper.h>
|
||||
|
||||
#include "net-sysfs.h"
|
||||
|
||||
|
@ -5338,6 +5339,8 @@ static void flush_all_backlogs(void)
|
|||
put_online_cpus();
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
|
||||
INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
|
||||
static int napi_gro_complete(struct sk_buff *skb)
|
||||
{
|
||||
struct packet_offload *ptype;
|
||||
|
@ -5357,7 +5360,9 @@ static int napi_gro_complete(struct sk_buff *skb)
|
|||
if (ptype->type != type || !ptype->callbacks.gro_complete)
|
||||
continue;
|
||||
|
||||
err = ptype->callbacks.gro_complete(skb, 0);
|
||||
err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
|
||||
ipv6_gro_complete, inet_gro_complete,
|
||||
skb, 0);
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
@ -5504,6 +5509,10 @@ static void gro_flush_oldest(struct list_head *head)
|
|||
napi_gro_complete(oldest);
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
|
||||
struct sk_buff *));
|
||||
INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
|
||||
struct sk_buff *));
|
||||
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
|
||||
{
|
||||
u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
|
||||
|
@ -5553,7 +5562,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
|
|||
NAPI_GRO_CB(skb)->csum_valid = 0;
|
||||
}
|
||||
|
||||
pp = ptype->callbacks.gro_receive(gro_head, skb);
|
||||
pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
|
||||
ipv6_gro_receive, inet_gro_receive,
|
||||
gro_head, skb);
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
|
|
@ -1385,6 +1385,10 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
|
|||
}
|
||||
EXPORT_SYMBOL(inet_gso_segment);
|
||||
|
||||
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *,
|
||||
struct sk_buff *));
|
||||
INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
|
||||
struct sk_buff *));
|
||||
struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
|
||||
{
|
||||
const struct net_offload *ops;
|
||||
|
@ -1494,7 +1498,8 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
|
|||
skb_gro_pull(skb, sizeof(*iph));
|
||||
skb_set_transport_header(skb, skb_gro_offset(skb));
|
||||
|
||||
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
|
||||
pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive,
|
||||
ops->callbacks.gro_receive, head, skb);
|
||||
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
|
@ -1556,6 +1561,8 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *, int));
|
||||
INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
|
||||
int inet_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
{
|
||||
__be16 newlen = htons(skb->len - nhoff);
|
||||
|
@ -1581,7 +1588,9 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
|
|||
* because any hdr with option will have been flushed in
|
||||
* inet_gro_receive().
|
||||
*/
|
||||
err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
|
||||
err = INDIRECT_CALL_2(ops->callbacks.gro_complete,
|
||||
tcp4_gro_complete, udp4_gro_complete,
|
||||
skb, nhoff + sizeof(*iph));
|
||||
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
* TCPv4 GSO/GRO support
|
||||
*/
|
||||
|
||||
#include <linux/indirect_call_wrapper.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <net/tcp.h>
|
||||
#include <net/protocol.h>
|
||||
|
@ -305,7 +306,8 @@ int tcp_gro_complete(struct sk_buff *skb)
|
|||
}
|
||||
EXPORT_SYMBOL(tcp_gro_complete);
|
||||
|
||||
static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
|
||||
INDIRECT_CALLABLE_SCOPE
|
||||
struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
|
||||
{
|
||||
/* Don't bother verifying checksum if we're going to flush anyway. */
|
||||
if (!NAPI_GRO_CB(skb)->flush &&
|
||||
|
@ -318,7 +320,7 @@ static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *
|
|||
return tcp_gro_receive(head, skb);
|
||||
}
|
||||
|
||||
static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
|
||||
INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
|
||||
{
|
||||
const struct iphdr *iph = ip_hdr(skb);
|
||||
struct tcphdr *th = tcp_hdr(skb);
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <linux/skbuff.h>
|
||||
#include <net/udp.h>
|
||||
#include <net/protocol.h>
|
||||
#include <net/inet_common.h>
|
||||
|
||||
static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
|
||||
netdev_features_t features,
|
||||
|
@ -391,6 +392,8 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
|
||||
__be16 sport, __be16 dport));
|
||||
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
|
||||
struct udphdr *uh, udp_lookup_t lookup)
|
||||
{
|
||||
|
@ -402,7 +405,8 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
|
|||
struct sock *sk;
|
||||
|
||||
rcu_read_lock();
|
||||
sk = (*lookup)(skb, uh->source, uh->dest);
|
||||
sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
|
||||
udp4_lib_lookup_skb, skb, uh->source, uh->dest);
|
||||
if (!sk)
|
||||
goto out_unlock;
|
||||
|
||||
|
@ -451,8 +455,8 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
|
|||
}
|
||||
EXPORT_SYMBOL(udp_gro_receive);
|
||||
|
||||
static struct sk_buff *udp4_gro_receive(struct list_head *head,
|
||||
struct sk_buff *skb)
|
||||
INDIRECT_CALLABLE_SCOPE
|
||||
struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
|
||||
{
|
||||
struct udphdr *uh = udp_gro_udphdr(skb);
|
||||
|
||||
|
@ -502,7 +506,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
|
|||
uh->len = newlen;
|
||||
|
||||
rcu_read_lock();
|
||||
sk = (*lookup)(skb, uh->source, uh->dest);
|
||||
sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
|
||||
udp4_lib_lookup_skb, skb, uh->source, uh->dest);
|
||||
if (sk && udp_sk(sk)->gro_enabled) {
|
||||
err = udp_gro_complete_segment(skb);
|
||||
} else if (sk && udp_sk(sk)->gro_complete) {
|
||||
|
@ -525,7 +530,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
|
|||
}
|
||||
EXPORT_SYMBOL(udp_gro_complete);
|
||||
|
||||
static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
{
|
||||
const struct iphdr *iph = ip_hdr(skb);
|
||||
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
|
||||
|
|
|
@ -20,6 +20,23 @@
|
|||
|
||||
#include "ip6_offload.h"
|
||||
|
||||
/* All GRO functions are always builtin, except UDP over ipv6, which lays in
|
||||
* ipv6 module, as it depends on UDPv6 lookup function, so we need special care
|
||||
* when ipv6 is built as a module
|
||||
*/
|
||||
#if IS_BUILTIN(CONFIG_IPV6)
|
||||
#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
|
||||
#else
|
||||
#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define indirect_call_gro_receive_l4(f2, f1, cb, head, skb) \
|
||||
({ \
|
||||
unlikely(gro_recursion_inc_test(skb)) ? \
|
||||
NAPI_GRO_CB(skb)->flush |= 1, NULL : \
|
||||
INDIRECT_CALL_L4(cb, f2, f1, head, skb); \
|
||||
})
|
||||
|
||||
static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
|
||||
{
|
||||
const struct net_offload *ops = NULL;
|
||||
|
@ -164,8 +181,12 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
|
|||
return len;
|
||||
}
|
||||
|
||||
static struct sk_buff *ipv6_gro_receive(struct list_head *head,
|
||||
struct sk_buff *skb)
|
||||
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *,
|
||||
struct sk_buff *));
|
||||
INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
|
||||
struct sk_buff *));
|
||||
INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
const struct net_offload *ops;
|
||||
struct sk_buff *pp = NULL;
|
||||
|
@ -260,7 +281,8 @@ static struct sk_buff *ipv6_gro_receive(struct list_head *head,
|
|||
|
||||
skb_gro_postpull_rcsum(skb, iph, nlen);
|
||||
|
||||
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
|
||||
pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive,
|
||||
ops->callbacks.gro_receive, head, skb);
|
||||
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
|
@ -301,7 +323,9 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
|
|||
return inet_gro_receive(head, skb);
|
||||
}
|
||||
|
||||
static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *, int));
|
||||
INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
|
||||
INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
{
|
||||
const struct net_offload *ops;
|
||||
struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
|
||||
|
@ -320,7 +344,8 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
|
|||
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
|
||||
goto out_unlock;
|
||||
|
||||
err = ops->callbacks.gro_complete(skb, nhoff);
|
||||
err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete,
|
||||
udp6_gro_complete, skb, nhoff);
|
||||
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
|
|
|
@ -9,14 +9,15 @@
|
|||
*
|
||||
* TCPv6 GSO/GRO support
|
||||
*/
|
||||
#include <linux/indirect_call_wrapper.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <net/protocol.h>
|
||||
#include <net/tcp.h>
|
||||
#include <net/ip6_checksum.h>
|
||||
#include "ip6_offload.h"
|
||||
|
||||
static struct sk_buff *tcp6_gro_receive(struct list_head *head,
|
||||
struct sk_buff *skb)
|
||||
INDIRECT_CALLABLE_SCOPE
|
||||
struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
|
||||
{
|
||||
/* Don't bother verifying checksum if we're going to flush anyway. */
|
||||
if (!NAPI_GRO_CB(skb)->flush &&
|
||||
|
@ -29,7 +30,7 @@ static struct sk_buff *tcp6_gro_receive(struct list_head *head,
|
|||
return tcp_gro_receive(head, skb);
|
||||
}
|
||||
|
||||
static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
|
||||
INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
|
||||
{
|
||||
const struct ipv6hdr *iph = ipv6_hdr(skb);
|
||||
struct tcphdr *th = tcp_hdr(skb);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
*/
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/indirect_call_wrapper.h>
|
||||
#include <net/protocol.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <net/udp.h>
|
||||
|
@ -114,8 +115,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
|
|||
return segs;
|
||||
}
|
||||
|
||||
static struct sk_buff *udp6_gro_receive(struct list_head *head,
|
||||
struct sk_buff *skb)
|
||||
INDIRECT_CALLABLE_SCOPE
|
||||
struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
|
||||
{
|
||||
struct udphdr *uh = udp_gro_udphdr(skb);
|
||||
|
||||
|
@ -142,7 +143,7 @@ static struct sk_buff *udp6_gro_receive(struct list_head *head,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
|
||||
{
|
||||
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
|
||||
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
|
||||
|
|
Loading…
Reference in New Issue