From 428d2459cceb77357b81c242ca22462a6a904817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Sat, 30 May 2020 14:39:12 +0200 Subject: [PATCH 01/19] xfrm: introduce oseq-may-wrap flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC 4303 in section 3.3.3 suggests to disable anti-replay for manually distributed ICVs in which case the sender does not need to monitor or reset the counter. However, the sender still increments the counter and when it reaches the maximum value, the counter rolls over back to zero. This patch introduces new extra_flag XFRM_SA_XFLAG_OSEQ_MAY_WRAP which allows sequence number to cycle in outbound packets if set. This flag is used only in legacy and bmp code, because esn should not be negotiated if anti-replay is disabled (see note in 3.3.3 section). Signed-off-by: Petr Vaněk Acked-by: Christophe Gouault Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 1 + net/xfrm/xfrm_replay.c | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index ff7cfdc6cb44..ffc6a5391bb7 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -387,6 +387,7 @@ struct xfrm_usersa_info { }; #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 +#define XFRM_SA_XFLAG_OSEQ_MAY_WRAP 2 struct xfrm_usersa_id { xfrm_address_t daddr; diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 98943f8d01aa..c6a4338a0d08 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -89,7 +89,8 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; XFRM_SKB_CB(skb)->seq.output.hi = 0; - if (unlikely(x->replay.oseq == 0)) { + if (unlikely(x->replay.oseq == 0) && + !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { x->replay.oseq--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; @@ -168,7 +169,8 @@ static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; XFRM_SKB_CB(skb)->seq.output.hi = 0; - if (unlikely(replay_esn->oseq == 0)) { + if (unlikely(replay_esn->oseq == 0) && + !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { replay_esn->oseq--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; @@ -572,7 +574,8 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk XFRM_SKB_CB(skb)->seq.output.hi = 0; xo->seq.hi = 0; - if (unlikely(oseq < x->replay.oseq)) { + if (unlikely(oseq < x->replay.oseq) && + !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; @@ -611,7 +614,8 @@ static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff XFRM_SKB_CB(skb)->seq.output.hi = 0; xo->seq.hi = 0; - if (unlikely(oseq < replay_esn->oseq)) { + if (unlikely(oseq < replay_esn->oseq) && + !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; From 1475ee0ac9a16dd5df23ca8abe1039eb6086eb66 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:29 +0800 Subject: [PATCH 02/19] xfrm: add is_ipip to struct xfrm_input_afinfo This patch is to add a new member is_ipip to struct xfrm_input_afinfo, to allow another group family of callback functions to be registered with is_ipip set. This will be used for doing a callback for struct xfrm(6)_tunnel of ipip/ipv6 tunnels in xfrm_input() by calling xfrm_rcv_cb(), which is needed by ipip/ipv6 tunnels' support in ip(6)_vti and xfrm interface in the next patches. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 3 ++- net/xfrm/xfrm_input.c | 24 +++++++++++++----------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e20b2b27ec48..4666bc9e59ab 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -373,7 +373,8 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family); struct xfrm_input_afinfo { - unsigned int family; + u8 family; + bool is_ipip; int (*callback)(struct sk_buff *skb, u8 protocol, int err); }; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index bd984ff17c2d..37456d022cfa 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -42,7 +42,7 @@ struct xfrm_trans_cb { #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); -static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; +static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[2][AF_INET6 + 1]; static struct gro_cells gro_cells; static struct net_device xfrm_napi_dev; @@ -53,14 +53,14 @@ int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) { int err = 0; - if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo))) + if (WARN_ON(afinfo->family > AF_INET6)) return -EAFNOSUPPORT; spin_lock_bh(&xfrm_input_afinfo_lock); - if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) + if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) err = -EEXIST; else - rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); + rcu_assign_pointer(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], afinfo); spin_unlock_bh(&xfrm_input_afinfo_lock); return err; } @@ -71,11 +71,11 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) int err = 0; spin_lock_bh(&xfrm_input_afinfo_lock); - if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { - if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) + if (likely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) { + if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family] != afinfo)) err = -EINVAL; else - RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL); + RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], NULL); } spin_unlock_bh(&xfrm_input_afinfo_lock); synchronize_rcu(); @@ -83,15 +83,15 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_input_unregister_afinfo); -static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) +static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(u8 family, bool is_ipip) { const struct xfrm_input_afinfo *afinfo; - if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo))) + if (WARN_ON_ONCE(family > AF_INET6)) return NULL; rcu_read_lock(); - afinfo = rcu_dereference(xfrm_input_afinfo[family]); + afinfo = rcu_dereference(xfrm_input_afinfo[is_ipip][family]); if (unlikely(!afinfo)) rcu_read_unlock(); return afinfo; @@ -100,9 +100,11 @@ static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, int err) { + bool is_ipip = (protocol == IPPROTO_IPIP || protocol == IPPROTO_IPV6); + const struct xfrm_input_afinfo *afinfo; int ret; - const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); + afinfo = xfrm_input_get_afinfo(family, is_ipip); if (!afinfo) return -EAFNOSUPPORT; From 6df2db5d37ba3df8c80d90c15f1e20480be43f75 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:30 +0800 Subject: [PATCH 03/19] tunnel4: add cb_handler to struct xfrm_tunnel This patch is to register a callback function tunnel4_rcv_cb with is_ipip set in a xfrm_input_afinfo object for tunnel4 and tunnel64. It will be called by xfrm_rcv_cb() from xfrm_input() when family is AF_INET and proto is IPPROTO_IPIP or IPPROTO_IPV6. v1->v2: - Fix a sparse warning caused by the missing "__rcu", as Jakub noticed. - Handle the err returned by xfrm_input_register_afinfo() in tunnel4_init/fini(), as Sabrina noticed. v2->v3: - Add "#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)" to fix the build error when xfrm is disabled, reported by kbuild test robot. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + net/ipv4/tunnel4.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4666bc9e59ab..c1ec6294d773 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1416,6 +1416,7 @@ struct xfrm6_protocol { /* XFRM tunnel handlers. */ struct xfrm_tunnel { int (*handler)(struct sk_buff *skb); + int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, u32 info); struct xfrm_tunnel __rcu *next; diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index c4b2ccbeba04..e44aaf41a138 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -110,6 +110,33 @@ static int tunnel4_rcv(struct sk_buff *skb) return 0; } +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +static int tunnel4_rcv_cb(struct sk_buff *skb, u8 proto, int err) +{ + struct xfrm_tunnel __rcu *head; + struct xfrm_tunnel *handler; + int ret; + + head = (proto == IPPROTO_IPIP) ? tunnel4_handlers : tunnel64_handlers; + + for_each_tunnel_rcu(head, handler) { + if (handler->cb_handler) { + ret = handler->cb_handler(skb, err); + if (ret <= 0) + return ret; + } + } + + return 0; +} + +static const struct xfrm_input_afinfo tunnel4_input_afinfo = { + .family = AF_INET, + .is_ipip = true, + .callback = tunnel4_rcv_cb, +}; +#endif + #if IS_ENABLED(CONFIG_IPV6) static int tunnel64_rcv(struct sk_buff *skb) { @@ -230,6 +257,18 @@ static int __init tunnel4_init(void) #endif goto err; } +#endif +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + if (xfrm_input_register_afinfo(&tunnel4_input_afinfo)) { + inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP); +#if IS_ENABLED(CONFIG_IPV6) + inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6); +#endif +#if IS_ENABLED(CONFIG_MPLS) + inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS); +#endif + goto err; + } #endif return 0; @@ -240,6 +279,10 @@ static int __init tunnel4_init(void) static void __exit tunnel4_fini(void) { +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + if (xfrm_input_unregister_afinfo(&tunnel4_input_afinfo)) + pr_err("tunnel4 close: can't remove input afinfo\n"); +#endif #if IS_ENABLED(CONFIG_MPLS) if (inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS)) pr_err("tunnelmpls4 close: can't remove protocol\n"); From 86afc7031826147407e96412668d343e0f1bd6fd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:31 +0800 Subject: [PATCH 04/19] tunnel6: add tunnel6_input_afinfo for ipip and ipv6 tunnels This patch is to register a callback function tunnel6_rcv_cb with is_ipip set in a xfrm_input_afinfo object for tunnel6 and tunnel46. It will be called by xfrm_rcv_cb() from xfrm_input() when family is AF_INET6 and proto is IPPROTO_IPIP or IPPROTO_IPV6. v1->v2: - Fix a sparse warning caused by the missing "__rcu", as Jakub noticed. - Handle the err returned by xfrm_input_register_afinfo() in tunnel6_init/fini(), as Sabrina noticed. v2->v3: - Add "#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL)" to fix the build error when xfrm is disabled, reported by kbuild test robot Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + net/ipv6/tunnel6.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c1ec6294d773..83a532dda1bd 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1425,6 +1425,7 @@ struct xfrm_tunnel { struct xfrm6_tunnel { int (*handler)(struct sk_buff *skb); + int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); struct xfrm6_tunnel __rcu *next; diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 06c02ebe6b9b..00e8d8b1c9a7 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -155,6 +155,33 @@ static int tunnel6_rcv(struct sk_buff *skb) return 0; } +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +static int tunnel6_rcv_cb(struct sk_buff *skb, u8 proto, int err) +{ + struct xfrm6_tunnel __rcu *head; + struct xfrm6_tunnel *handler; + int ret; + + head = (proto == IPPROTO_IPV6) ? tunnel6_handlers : tunnel46_handlers; + + for_each_tunnel_rcu(head, handler) { + if (handler->cb_handler) { + ret = handler->cb_handler(skb, err); + if (ret <= 0) + return ret; + } + } + + return 0; +} + +static const struct xfrm_input_afinfo tunnel6_input_afinfo = { + .family = AF_INET6, + .is_ipip = true, + .callback = tunnel6_rcv_cb, +}; +#endif + static int tunnel46_rcv(struct sk_buff *skb) { struct xfrm6_tunnel *handler; @@ -245,11 +272,25 @@ static int __init tunnel6_init(void) inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP); return -EAGAIN; } +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + if (xfrm_input_register_afinfo(&tunnel6_input_afinfo)) { + pr_err("%s: can't add input afinfo\n", __func__); + inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); + inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP); + if (xfrm6_tunnel_mpls_supported()) + inet6_del_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS); + return -EAGAIN; + } +#endif return 0; } static void __exit tunnel6_fini(void) { +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + if (xfrm_input_unregister_afinfo(&tunnel6_input_afinfo)) + pr_err("%s: can't remove input afinfo\n", __func__); +#endif if (inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP)) pr_err("%s: can't remove protocol\n", __func__); if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6)) From 87e66b9682d7067eb7db08040dae36b608a4d971 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:32 +0800 Subject: [PATCH 05/19] ip_vti: support IPIP tunnel processing with .cb_handler With tunnel4_input_afinfo added, IPIP tunnel processing in ip_vti can be easily done with .cb_handler. So replace the processing by calling ip_tunnel_rcv() with it. v1->v2: - no change. v2-v3: - enable it only when CONFIG_INET_XFRM_TUNNEL is defined, to fix the build error, reported by kbuild test robot. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 51 +++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1d9c8cff5ac3..68177f065117 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -91,32 +91,6 @@ static int vti_rcv_proto(struct sk_buff *skb) return vti_rcv(skb, 0, false); } -static int vti_rcv_tunnel(struct sk_buff *skb) -{ - struct ip_tunnel_net *itn = net_generic(dev_net(skb->dev), vti_net_id); - const struct iphdr *iph = ip_hdr(skb); - struct ip_tunnel *tunnel; - - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->saddr, iph->daddr, 0); - if (tunnel) { - struct tnl_ptk_info tpi = { - .proto = htons(ETH_P_IP), - }; - - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto drop; - if (iptunnel_pull_header(skb, 0, tpi.proto, false)) - goto drop; - return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, false); - } - - return -EINVAL; -drop: - kfree_skb(skb); - return 0; -} - static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -495,11 +469,22 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { .priority = 100, }; -static struct xfrm_tunnel ipip_handler __read_mostly = { +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +static int vti_rcv_tunnel(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr, 0, false); +} + +static struct xfrm_tunnel vti_ipip_handler __read_mostly = { .handler = vti_rcv_tunnel, + .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 0, }; +#endif static int __net_init vti_init_net(struct net *net) { @@ -669,10 +654,12 @@ static int __init vti_init(void) if (err < 0) goto xfrm_proto_comp_failed; +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) msg = "ipip tunnel"; - err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + err = xfrm4_tunnel_register(&vti_ipip_handler, AF_INET); if (err < 0) goto xfrm_tunnel_failed; +#endif msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); @@ -682,8 +669,10 @@ static int __init vti_init(void) return err; rtnl_link_failed: - xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); xfrm_tunnel_failed: +#endif xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); @@ -699,7 +688,9 @@ static int __init vti_init(void) static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); - xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); +#endif xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); From e6ce64570f2451684b4f9bcbaee6c40c4a7dff82 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:33 +0800 Subject: [PATCH 06/19] ip_vti: support IPIP6 tunnel processing For IPIP6 tunnel processing, the functions called will be the same as that for IPIP tunnel's. So reuse it and register it with family == AF_INET6. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 68177f065117..c0b97b8f6fbd 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -658,7 +658,12 @@ static int __init vti_init(void) msg = "ipip tunnel"; err = xfrm4_tunnel_register(&vti_ipip_handler, AF_INET); if (err < 0) - goto xfrm_tunnel_failed; + goto xfrm_tunnel_ipip_failed; +#if IS_ENABLED(CONFIG_IPV6) + err = xfrm4_tunnel_register(&vti_ipip_handler, AF_INET6); + if (err < 0) + goto xfrm_tunnel_ipip6_failed; +#endif #endif msg = "netlink interface"; @@ -670,8 +675,12 @@ static int __init vti_init(void) rtnl_link_failed: #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_ENABLED(CONFIG_IPV6) + xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET6); +xfrm_tunnel_ipip6_failed: +#endif xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); -xfrm_tunnel_failed: +xfrm_tunnel_ipip_failed: #endif xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: @@ -689,6 +698,9 @@ static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_ENABLED(CONFIG_IPV6) + xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET6); +#endif xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); #endif xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); From 08622869ed3f167db9b2250ab1bb055f55293401 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:34 +0800 Subject: [PATCH 07/19] ip6_vti: support IP6IP6 tunnel processing with .cb_handler Similar to IPIP tunnel's processing, this patch is to support IP6IP6 tunnel processing with .cb_handler. v1->v2: - no change. v2-v3: - enable it only when CONFIG_INET6_XFRM_TUNNEL is defined, to fix the build error, reported by kbuild test robot. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1147f647b9a0..39efe41f7b48 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1218,6 +1218,26 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +static int vti6_rcv_tunnel(struct sk_buff *skb) +{ + const xfrm_address_t *saddr; + __be32 spi; + + saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr; + spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr); + + return vti6_input_proto(skb, IPPROTO_IPV6, spi, 0); +} + +static struct xfrm6_tunnel vti_ipv6_handler __read_mostly = { + .handler = vti6_rcv_tunnel, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 0, +}; +#endif + /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1243,6 +1263,12 @@ static int __init vti6_tunnel_init(void) err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + msg = "ipv6 tunnel"; + err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET6); + if (err < 0) + goto vti_tunnel_failed; +#endif msg = "netlink interface"; err = rtnl_link_register(&vti6_link_ops); @@ -1252,6 +1278,10 @@ static int __init vti6_tunnel_init(void) return 0; rtnl_link_failed: +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); +vti_tunnel_failed: +#endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); @@ -1270,6 +1300,9 @@ static int __init vti6_tunnel_init(void) static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); +#endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); From 2ab110cbb0c0cb05c64f37f42b78f5bc11699b0e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:35 +0800 Subject: [PATCH 08/19] ip6_vti: support IP6IP tunnel processing For IP6IP tunnel processing, the functions called will be the same as that for IP6IP6 tunnel's. So reuse it and register it with family == AF_INET. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 39efe41f7b48..dfa93bc857d2 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1267,7 +1267,10 @@ static int __init vti6_tunnel_init(void) msg = "ipv6 tunnel"; err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET6); if (err < 0) - goto vti_tunnel_failed; + goto vti_tunnel_ipv6_failed; + err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET); + if (err < 0) + goto vti_tunnel_ip6ip_failed; #endif msg = "netlink interface"; @@ -1279,8 +1282,10 @@ static int __init vti6_tunnel_init(void) rtnl_link_failed: #if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET); +vti_tunnel_ip6ip_failed: err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); -vti_tunnel_failed: +vti_tunnel_ipv6_failed: #endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: @@ -1301,6 +1306,7 @@ static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); #if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET); xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); #endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); From d5a7a5057387d79b91a6e2fd78a76ccd53f91e6c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:36 +0800 Subject: [PATCH 09/19] ipcomp: assign if_id to child tunnel from parent tunnel The child tunnel if_id will be used for xfrm interface's lookup when processing the IP(6)IP(6) packets in the next patches. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/ipcomp.c | 1 + net/ipv6/ipcomp6.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 59bfa3825810..b42683212c65 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -72,6 +72,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->props.flags = x->props.flags; t->props.extra_flags = x->props.extra_flags; memcpy(&t->mark, &x->mark, sizeof(t->mark)); + t->if_id = x->if_id; if (xfrm_init_state(t)) goto error; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 99668bfebd85..daef890460b7 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -91,6 +91,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) t->props.mode = x->props.mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); memcpy(&t->mark, &x->mark, sizeof(t->mark)); + t->if_id = x->if_id; if (xfrm_init_state(t)) goto error; From d7b360c2869f9ce2418510d14baf0f9696fcf1e9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:37 +0800 Subject: [PATCH 10/19] xfrm: interface: support IP6IP6 and IP6IP tunnels processing with .cb_handler Similar to ip6_vti, IP6IP6 and IP6IP tunnels processing can easily be done with .cb_handler for xfrm interface. v1->v2: - no change. v2-v3: - enable it only when CONFIG_INET6_XFRM_TUNNEL is defined, to fix the build error, reported by kbuild test robot. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index c407ecbc5d46..b9ef496d3d7c 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -798,6 +798,26 @@ static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = { .priority = 10, }; +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +static int xfrmi6_rcv_tunnel(struct sk_buff *skb) +{ + const xfrm_address_t *saddr; + __be32 spi; + + saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr; + spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr); + + return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL); +} + +static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = { + .handler = xfrmi6_rcv_tunnel, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi6_err, + .priority = -1, +}; +#endif + static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = { .handler = xfrm4_rcv, .input_handler = xfrm_input, @@ -866,9 +886,23 @@ static int __init xfrmi6_init(void) err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6); + if (err < 0) + goto xfrm_tunnel_ipv6_failed; + err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET); + if (err < 0) + goto xfrm_tunnel_ip6ip_failed; +#endif return 0; +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +xfrm_tunnel_ip6ip_failed: + xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); +xfrm_tunnel_ipv6_failed: + xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); +#endif xfrm_proto_comp_failed: xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH); xfrm_proto_ah_failed: @@ -879,6 +913,10 @@ static int __init xfrmi6_init(void) static void xfrmi6_fini(void) { +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET); + xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); +#endif xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP); From da9bbf0598c9e66b8a46ceabaa6172596795acf2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:38 +0800 Subject: [PATCH 11/19] xfrm: interface: support IPIP and IPIP6 tunnels processing with .cb_handler Similar to ip_vti, IPIP and IPIP6 tunnels processing can easily be done with .cb_handler for xfrm interface. v1->v2: - no change. v2-v3: - enable it only when CONFIG_INET_XFRM_TUNNEL is defined, to fix the build error, reported by kbuild test robot. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index b9ef496d3d7c..a79eb49a4e0d 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -842,6 +842,20 @@ static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = { .priority = 10, }; +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +static int xfrmi4_rcv_tunnel(struct sk_buff *skb) +{ + return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr); +} + +static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = { + .handler = xfrmi4_rcv_tunnel, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi4_err, + .priority = -1, +}; +#endif + static int __init xfrmi4_init(void) { int err; @@ -855,9 +869,23 @@ static int __init xfrmi4_init(void) err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET); + if (err < 0) + goto xfrm_tunnel_ipip_failed; + err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET6); + if (err < 0) + goto xfrm_tunnel_ipip6_failed; +#endif return 0; +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +xfrm_tunnel_ipip6_failed: + xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); +xfrm_tunnel_ipip_failed: + xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); +#endif xfrm_proto_comp_failed: xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: @@ -868,6 +896,10 @@ static int __init xfrmi4_init(void) static void xfrmi4_fini(void) { +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET6); + xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); +#endif xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP); From 2749c69734298905aedb0629f2bc66346f0031f9 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 9 Jul 2020 13:16:51 +0300 Subject: [PATCH 12/19] xfrm interface: avoid xi lookup in xfrmi_decode_session() The xfrmi context exists in the netdevice priv context. Avoid looking for it in a separate list. Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index a79eb49a4e0d..36a765eac034 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -47,6 +47,7 @@ static int xfrmi_dev_init(struct net_device *dev); static void xfrmi_dev_setup(struct net_device *dev); static struct rtnl_link_ops xfrmi_link_ops __read_mostly; static unsigned int xfrmi_net_id __read_mostly; +static const struct net_device_ops xfrmi_netdev_ops; struct xfrmi_net { /* lists for storing interfaces in use */ @@ -73,8 +74,7 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x) static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, unsigned short family) { - struct xfrmi_net *xfrmn; - struct xfrm_if *xi; + struct net_device *dev; int ifindex = 0; if (!secpath_exists(skb) || !skb->dev) @@ -88,18 +88,21 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, ifindex = inet_sdif(skb); break; } - if (!ifindex) - ifindex = skb->dev->ifindex; - xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id); + if (ifindex) { + struct net *net = xs_net(xfrm_input_state(skb)); - for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) { - if (ifindex == xi->dev->ifindex && - (xi->dev->flags & IFF_UP)) - return xi; + dev = dev_get_by_index_rcu(net, ifindex); + } else { + dev = skb->dev; } - return NULL; + if (!dev || !(dev->flags & IFF_UP)) + return NULL; + if (dev->netdev_ops != &xfrmi_netdev_ops) + return NULL; + + return netdev_priv(dev); } static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi) From e98e44562ba2ee994f4fd1e32be7e327edd263ca Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 9 Jul 2020 13:16:52 +0300 Subject: [PATCH 13/19] xfrm interface: store xfrmi contexts in a hash by if_id xfrmi_lookup() is called on every packet. Using a single list for looking up if_id becomes a bottleneck when having many xfrm interfaces. Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 36a765eac034..96496fdfe3ce 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -49,20 +49,28 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly; static unsigned int xfrmi_net_id __read_mostly; static const struct net_device_ops xfrmi_netdev_ops; +#define XFRMI_HASH_BITS 8 +#define XFRMI_HASH_SIZE BIT(XFRMI_HASH_BITS) + struct xfrmi_net { /* lists for storing interfaces in use */ - struct xfrm_if __rcu *xfrmi[1]; + struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE]; }; #define for_each_xfrmi_rcu(start, xi) \ for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next)) +static u32 xfrmi_hash(u32 if_id) +{ + return hash_32(if_id, XFRMI_HASH_BITS); +} + static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x) { struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); struct xfrm_if *xi; - for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) { + for_each_xfrmi_rcu(xfrmn->xfrmi[xfrmi_hash(x->if_id)], xi) { if (x->if_id == xi->p.if_id && (xi->dev->flags & IFF_UP)) return xi; @@ -107,7 +115,7 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi) { - struct xfrm_if __rcu **xip = &xfrmn->xfrmi[0]; + struct xfrm_if __rcu **xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)]; rcu_assign_pointer(xi->next , rtnl_dereference(*xip)); rcu_assign_pointer(*xip, xi); @@ -118,7 +126,7 @@ static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi) struct xfrm_if __rcu **xip; struct xfrm_if *iter; - for (xip = &xfrmn->xfrmi[0]; + for (xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)]; (iter = rtnl_dereference(*xip)) != NULL; xip = &iter->next) { if (xi == iter) { @@ -162,7 +170,7 @@ static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p) struct xfrm_if *xi; struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); - for (xip = &xfrmn->xfrmi[0]; + for (xip = &xfrmn->xfrmi[xfrmi_hash(p->if_id)]; (xi = rtnl_dereference(*xip)) != NULL; xip = &xi->next) if (xi->p.if_id == p->if_id) @@ -761,11 +769,14 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); struct xfrm_if __rcu **xip; struct xfrm_if *xi; + int i; - for (xip = &xfrmn->xfrmi[0]; - (xi = rtnl_dereference(*xip)) != NULL; - xip = &xi->next) - unregister_netdevice_queue(xi->dev, &list); + for (i = 0; i < XFRMI_HASH_SIZE; i++) { + for (xip = &xfrmn->xfrmi[i]; + (xi = rtnl_dereference(*xip)) != NULL; + xip = &xi->next) + unregister_netdevice_queue(xi->dev, &list); + } } unregister_netdevice_many(&list); rtnl_unlock(); From 55a48c7ec75ad8a1f4e39d271e2b5aee21150f65 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 13 Jul 2020 15:42:36 +0800 Subject: [PATCH 14/19] ip_vti: not register vti_ipip_handler twice An xfrm_tunnel object is linked into the list when registering, so vti_ipip_handler can not be registered twice, otherwise its next pointer will be overwritten on the second time. So this patch is to define a new xfrm_tunnel object to register for AF_INET6. Fixes: e6ce64570f24 ("ip_vti: support IPIP6 tunnel processing") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c0b97b8f6fbd..3e5d54517145 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -484,6 +484,13 @@ static struct xfrm_tunnel vti_ipip_handler __read_mostly = { .err_handler = vti4_err, .priority = 0, }; + +static struct xfrm_tunnel vti_ipip6_handler __read_mostly = { + .handler = vti_rcv_tunnel, + .cb_handler = vti_rcv_cb, + .err_handler = vti4_err, + .priority = 0, +}; #endif static int __net_init vti_init_net(struct net *net) @@ -660,7 +667,7 @@ static int __init vti_init(void) if (err < 0) goto xfrm_tunnel_ipip_failed; #if IS_ENABLED(CONFIG_IPV6) - err = xfrm4_tunnel_register(&vti_ipip_handler, AF_INET6); + err = xfrm4_tunnel_register(&vti_ipip6_handler, AF_INET6); if (err < 0) goto xfrm_tunnel_ipip6_failed; #endif @@ -676,7 +683,7 @@ static int __init vti_init(void) rtnl_link_failed: #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) #if IS_ENABLED(CONFIG_IPV6) - xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET6); + xfrm4_tunnel_deregister(&vti_ipip6_handler, AF_INET6); xfrm_tunnel_ipip6_failed: #endif xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); @@ -699,7 +706,7 @@ static void __exit vti_fini(void) rtnl_link_unregister(&vti_link_ops); #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) #if IS_ENABLED(CONFIG_IPV6) - xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET6); + xfrm4_tunnel_deregister(&vti_ipip6_handler, AF_INET6); #endif xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); #endif From a8757147905ea5adee78c7a813fc080a4124f248 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 13 Jul 2020 15:42:37 +0800 Subject: [PATCH 15/19] ip6_vti: not register vti_ipv6_handler twice An xfrm6_tunnel object is linked into the list when registering, so vti_ipv6_handler can not be registered twice, otherwise its next pointer will be overwritten on the second time. So this patch is to define a new xfrm6_tunnel object to register for AF_INET. Fixes: 2ab110cbb0c0 ("ip6_vti: support IP6IP tunnel processing") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index dfa93bc857d2..18ec4ab45be7 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1236,6 +1236,13 @@ static struct xfrm6_tunnel vti_ipv6_handler __read_mostly = { .err_handler = vti6_err, .priority = 0, }; + +static struct xfrm6_tunnel vti_ip6ip_handler __read_mostly = { + .handler = vti6_rcv_tunnel, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 0, +}; #endif /** @@ -1268,7 +1275,7 @@ static int __init vti6_tunnel_init(void) err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET6); if (err < 0) goto vti_tunnel_ipv6_failed; - err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET); + err = xfrm6_tunnel_register(&vti_ip6ip_handler, AF_INET); if (err < 0) goto vti_tunnel_ip6ip_failed; #endif @@ -1282,7 +1289,7 @@ static int __init vti6_tunnel_init(void) rtnl_link_failed: #if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) - err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET); + err = xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET); vti_tunnel_ip6ip_failed: err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); vti_tunnel_ipv6_failed: @@ -1306,7 +1313,7 @@ static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); #if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) - xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET); + xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET); xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); #endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); From 8b404f46dd6ab845d0fa794679329d4089281ccb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 13 Jul 2020 15:42:38 +0800 Subject: [PATCH 16/19] xfrm: interface: not xfrmi_ipv6/ipip_handler twice As we did in the last 2 patches for vti(6), this patch is to define a new xfrm_tunnel object 'xfrmi_ipip6_handler' to register for AF_INET6, and a new xfrm6_tunnel object 'xfrmi_ip6ip_handler' to register for AF_INET. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 96496fdfe3ce..63a52b4b6ea9 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -830,6 +830,13 @@ static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = { .err_handler = xfrmi6_err, .priority = -1, }; + +static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = { + .handler = xfrmi6_rcv_tunnel, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi6_err, + .priority = -1, +}; #endif static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = { @@ -868,6 +875,13 @@ static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = { .err_handler = xfrmi4_err, .priority = -1, }; + +static struct xfrm_tunnel xfrmi_ipip6_handler __read_mostly = { + .handler = xfrmi4_rcv_tunnel, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi4_err, + .priority = -1, +}; #endif static int __init xfrmi4_init(void) @@ -887,7 +901,7 @@ static int __init xfrmi4_init(void) err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET); if (err < 0) goto xfrm_tunnel_ipip_failed; - err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET6); + err = xfrm4_tunnel_register(&xfrmi_ipip6_handler, AF_INET6); if (err < 0) goto xfrm_tunnel_ipip6_failed; #endif @@ -911,7 +925,7 @@ static int __init xfrmi4_init(void) static void xfrmi4_fini(void) { #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) - xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET6); + xfrm4_tunnel_deregister(&xfrmi_ipip6_handler, AF_INET6); xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); #endif xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); @@ -936,7 +950,7 @@ static int __init xfrmi6_init(void) err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6); if (err < 0) goto xfrm_tunnel_ipv6_failed; - err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET); + err = xfrm6_tunnel_register(&xfrmi_ip6ip_handler, AF_INET); if (err < 0) goto xfrm_tunnel_ip6ip_failed; #endif @@ -960,7 +974,7 @@ static int __init xfrmi6_init(void) static void xfrmi6_fini(void) { #if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) - xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET); + xfrm6_tunnel_deregister(&xfrmi_ip6ip_handler, AF_INET); xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); #endif xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); From 0a0d93b943a20e4210491911e82c3b4a34391ce4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 17 Jul 2020 15:02:30 +0800 Subject: [PATCH 17/19] xfrm: interface: use IS_REACHABLE to avoid some compile errors kernel test robot reported some compile errors: ia64-linux-ld: net/xfrm/xfrm_interface.o: in function `xfrmi4_fini': net/xfrm/xfrm_interface.c:900: undefined reference to `xfrm4_tunnel_deregister' ia64-linux-ld: net/xfrm/xfrm_interface.c:901: undefined reference to `xfrm4_tunnel_deregister' ia64-linux-ld: net/xfrm/xfrm_interface.o: in function `xfrmi4_init': net/xfrm/xfrm_interface.c:873: undefined reference to `xfrm4_tunnel_register' ia64-linux-ld: net/xfrm/xfrm_interface.c:876: undefined reference to `xfrm4_tunnel_register' ia64-linux-ld: net/xfrm/xfrm_interface.c:885: undefined reference to `xfrm4_tunnel_deregister' This happened when set CONFIG_XFRM_INTERFACE=y and CONFIG_INET_TUNNEL=m. We don't really want xfrm_interface to depend inet_tunnel completely, but only to disable the tunnel code when inet_tunnel is not seen. So instead of adding "select INET_TUNNEL" for XFRM_INTERFACE, this patch is only to change to IS_REACHABLE to avoid these compile error. Reported-by: kernel test robot Fixes: da9bbf0598c9 ("xfrm: interface: support IPIP and IPIP6 tunnels processing with .cb_handler") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 63a52b4b6ea9..4c904d332007 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -812,7 +812,7 @@ static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = { .priority = 10, }; -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) static int xfrmi6_rcv_tunnel(struct sk_buff *skb) { const xfrm_address_t *saddr; @@ -863,7 +863,7 @@ static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = { .priority = 10, }; -#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) static int xfrmi4_rcv_tunnel(struct sk_buff *skb) { return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr); @@ -897,7 +897,7 @@ static int __init xfrmi4_init(void) err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; -#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET); if (err < 0) goto xfrm_tunnel_ipip_failed; @@ -908,7 +908,7 @@ static int __init xfrmi4_init(void) return 0; -#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) xfrm_tunnel_ipip6_failed: xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); xfrm_tunnel_ipip_failed: @@ -924,7 +924,7 @@ static int __init xfrmi4_init(void) static void xfrmi4_fini(void) { -#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) xfrm4_tunnel_deregister(&xfrmi_ipip6_handler, AF_INET6); xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); #endif @@ -946,7 +946,7 @@ static int __init xfrmi6_init(void) err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6); if (err < 0) goto xfrm_tunnel_ipv6_failed; @@ -957,7 +957,7 @@ static int __init xfrmi6_init(void) return 0; -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) xfrm_tunnel_ip6ip_failed: xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); xfrm_tunnel_ipv6_failed: @@ -973,7 +973,7 @@ static int __init xfrmi6_init(void) static void xfrmi6_fini(void) { -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) xfrm6_tunnel_deregister(&xfrmi_ip6ip_handler, AF_INET); xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); #endif From 96a208295040c00c85d022720c10ec1a751b9bdb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 17 Jul 2020 15:03:14 +0800 Subject: [PATCH 18/19] ip6_vti: use IS_REACHABLE to avoid some compile errors Naresh reported some compile errors: arm build failed due this error on linux-next 20200713 and 20200713 net/ipv6/ip6_vti.o: In function `vti6_rcv_tunnel': ip6_vti.c:(.text+0x1d20): undefined reference to `xfrm6_tunnel_spi_lookup' This happened when set CONFIG_IPV6_VTI=y and CONFIG_INET6_TUNNEL=m. We don't really want ip6_vti to depend inet6_tunnel completely, but only to disable the tunnel code when inet6_tunnel is not seen. So instead of adding "select INET6_TUNNEL" for IPV6_VTI, this patch is only to change to IS_REACHABLE to avoid these compile error. Reported-by: Naresh Kamboju Fixes: 08622869ed3f ("ip6_vti: support IP6IP6 tunnel processing with .cb_handler") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 18ec4ab45be7..53f12b40528e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1218,7 +1218,7 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) static int vti6_rcv_tunnel(struct sk_buff *skb) { const xfrm_address_t *saddr; @@ -1270,7 +1270,7 @@ static int __init vti6_tunnel_init(void) err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) msg = "ipv6 tunnel"; err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET6); if (err < 0) @@ -1288,7 +1288,7 @@ static int __init vti6_tunnel_init(void) return 0; rtnl_link_failed: -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) err = xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET); vti_tunnel_ip6ip_failed: err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); @@ -1312,7 +1312,7 @@ static int __init vti6_tunnel_init(void) static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET); xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); #endif From b328ecc468f8f92433c9ad82675c0ce9f99b10cf Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 17 Jul 2020 10:35:32 +0200 Subject: [PATCH 19/19] xfrm: Make the policy hold queue work with VTI. We forgot to support the xfrm policy hold queue when VTI was implemented. This patch adds everything we need so that we can use the policy hold queue together with VTI interfaces. Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 6 +++++- net/ipv6/ip6_vti.c | 6 +++++- net/xfrm/xfrm_policy.c | 11 +++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 3e5d54517145..8b962eac9ed8 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -218,12 +218,15 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, } dst_hold(dst); - dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0); + dst = xfrm_lookup_route(tunnel->net, dst, fl, NULL, 0); if (IS_ERR(dst)) { dev->stats.tx_carrier_errors++; goto tx_error_icmp; } + if (dst->flags & DST_XFRM_QUEUE) + goto queued; + if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) { dev->stats.tx_carrier_errors++; dst_release(dst); @@ -255,6 +258,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } +queued: skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 53f12b40528e..f5a4c4a6492b 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -491,13 +491,16 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) } dst_hold(dst); - dst = xfrm_lookup(t->net, dst, fl, NULL, 0); + dst = xfrm_lookup_route(t->net, dst, fl, NULL, 0); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; goto tx_err_link_failure; } + if (dst->flags & DST_XFRM_QUEUE) + goto queued; + x = dst->xfrm; if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr)) goto tx_err_link_failure; @@ -533,6 +536,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) goto tx_err_dst_release; } +queued: skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 564aa6492e7c..be150475b28b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2758,6 +2758,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) struct xfrm_policy_queue *pq = &pol->polq; struct flowi fl; struct sk_buff_head list; + __u32 skb_mark; spin_lock(&pq->hold_queue.lock); skb = skb_peek(&pq->hold_queue); @@ -2767,7 +2768,12 @@ static void xfrm_policy_queue_process(struct timer_list *t) } dst = skb_dst(skb); sk = skb->sk; + + /* Fixup the mark to support VTI. */ + skb_mark = skb->mark; + skb->mark = pol->mark.v; xfrm_decode_session(skb, &fl, dst->ops->family); + skb->mark = skb_mark; spin_unlock(&pq->hold_queue.lock); dst_hold(xfrm_dst_path(dst)); @@ -2799,7 +2805,12 @@ static void xfrm_policy_queue_process(struct timer_list *t) while (!skb_queue_empty(&list)) { skb = __skb_dequeue(&list); + /* Fixup the mark to support VTI. */ + skb_mark = skb->mark; + skb->mark = pol->mark.v; xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); + skb->mark = skb_mark; + dst_hold(xfrm_dst_path(skb_dst(skb))); dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0); if (IS_ERR(dst)) {