net: Avoid receiving packets with an l3mdev on unbound UDP sockets

Packets arriving in a VRF currently are delivered to UDP sockets that
aren't bound to any interface. TCP defaults to not delivering packets
arriving in a VRF to unbound sockets. IP route lookup and socket
transmit both assume that unbound means using the default table and
UDP applications that haven't been changed to be aware of VRFs may not
function correctly in this case since they may not be able to handle
overlapping IP address ranges, or be able to send packets back to the
original sender if required.

So add a sysctl, udp_l3mdev_accept, to control this behaviour with it
being analgous to the existing tcp_l3mdev_accept, namely to allow a
process to have a VRF-global listen socket. Have this default to off
as this is the behaviour that users will expect, given that there is
no explicit mechanism to set unmodified VRF-unaware application into a
default VRF.

Signed-off-by: Robert Shearman <rshearma@brocade.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Tested-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Robert Shearman 2017-01-26 18:02:24 +00:00 committed by David S. Miller
parent a3a4de056e
commit 63a6fff353
6 changed files with 66 additions and 17 deletions

View File

@ -721,6 +721,13 @@ tcp_challenge_ack_limit - INTEGER
UDP variables: UDP variables:
udp_l3mdev_accept - BOOLEAN
Enabling this option allows a "global" bound socket to work
across L3 master domains (e.g., VRFs) with packets capable of
being received regardless of the L3 domain in which they
originated. Only valid when the kernel was compiled with
CONFIG_NET_L3_MASTER_DEV.
udp_mem - vector of 3 INTEGERs: min, pressure, max udp_mem - vector of 3 INTEGERs: min, pressure, max
Number of pages allowed for queueing by all UDP sockets. Number of pages allowed for queueing by all UDP sockets.

View File

@ -98,10 +98,11 @@ VRF device:
or to specify the output device using cmsg and IP_PKTINFO. or to specify the output device using cmsg and IP_PKTINFO.
TCP services running in the default VRF context (ie., not bound to any VRF TCP & UDP services running in the default VRF context (ie., not bound
device) can work across all VRF domains by enabling the tcp_l3mdev_accept to any VRF device) can work across all VRF domains by enabling the
sysctl option: tcp_l3mdev_accept and udp_l3mdev_accept sysctl options:
sysctl -w net.ipv4.tcp_l3mdev_accept=1 sysctl -w net.ipv4.tcp_l3mdev_accept=1
sysctl -w net.ipv4.udp_l3mdev_accept=1
netfilter rules on the VRF device can be used to limit access to services netfilter rules on the VRF device can be used to limit access to services
running in the default VRF context as well. running in the default VRF context as well.

View File

@ -124,6 +124,10 @@ struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row; struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog; int sysctl_max_syn_backlog;
#ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
#endif
int sysctl_igmp_max_memberships; int sysctl_igmp_max_memberships;
int sysctl_igmp_max_msf; int sysctl_igmp_max_msf;
int sysctl_igmp_llm_reports; int sysctl_igmp_llm_reports;

View File

@ -1012,6 +1012,17 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = ipv4_privileged_ports, .proc_handler = ipv4_privileged_ports,
}, },
#ifdef CONFIG_NET_L3_MASTER_DEV
{
.procname = "udp_l3mdev_accept",
.data = &init_net.ipv4.sysctl_udp_l3mdev_accept,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
#endif
{ } { }
}; };

View File

@ -134,6 +134,17 @@ EXPORT_SYMBOL(udp_memory_allocated);
#define MAX_UDP_PORTS 65536 #define MAX_UDP_PORTS 65536
#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
/* IPCB reference means this can not be used from early demux */
static bool udp_lib_exact_dif_match(struct net *net, struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
if (!net->ipv4.sysctl_udp_l3mdev_accept &&
skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
return true;
#endif
return false;
}
static int udp_lib_lport_inuse(struct net *net, __u16 num, static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot, const struct udp_hslot *hslot,
unsigned long *bitmap, unsigned long *bitmap,
@ -369,7 +380,8 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
static int compute_score(struct sock *sk, struct net *net, static int compute_score(struct sock *sk, struct net *net,
__be32 saddr, __be16 sport, __be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum, int dif) __be32 daddr, unsigned short hnum, int dif,
bool exact_dif)
{ {
int score; int score;
struct inet_sock *inet; struct inet_sock *inet;
@ -400,7 +412,7 @@ static int compute_score(struct sock *sk, struct net *net,
score += 4; score += 4;
} }
if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if || exact_dif) {
if (sk->sk_bound_dev_if != dif) if (sk->sk_bound_dev_if != dif)
return -1; return -1;
score += 4; score += 4;
@ -425,7 +437,7 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
/* called with rcu_read_lock() */ /* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net, static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport, __be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum, int dif, __be32 daddr, unsigned int hnum, int dif, bool exact_dif,
struct udp_hslot *hslot2, struct udp_hslot *hslot2,
struct sk_buff *skb) struct sk_buff *skb)
{ {
@ -437,7 +449,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
badness = 0; badness = 0;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score(sk, net, saddr, sport, score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif); daddr, hnum, dif, exact_dif);
if (score > badness) { if (score > badness) {
reuseport = sk->sk_reuseport; reuseport = sk->sk_reuseport;
if (reuseport) { if (reuseport) {
@ -472,6 +484,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
unsigned short hnum = ntohs(dport); unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
bool exact_dif = udp_lib_exact_dif_match(net, skb);
int score, badness, matches = 0, reuseport = 0; int score, badness, matches = 0, reuseport = 0;
u32 hash = 0; u32 hash = 0;
@ -484,7 +497,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport, result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, daddr, hnum, dif,
hslot2, skb); exact_dif, hslot2, skb);
if (!result) { if (!result) {
unsigned int old_slot2 = slot2; unsigned int old_slot2 = slot2;
hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
@ -499,7 +512,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport, result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, daddr, hnum, dif,
hslot2, skb); exact_dif, hslot2, skb);
} }
return result; return result;
} }
@ -508,7 +521,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
badness = 0; badness = 0;
sk_for_each_rcu(sk, &hslot->head) { sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, saddr, sport, score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif); daddr, hnum, dif, exact_dif);
if (score > badness) { if (score > badness) {
reuseport = sk->sk_reuseport; reuseport = sk->sk_reuseport;
if (reuseport) { if (reuseport) {

View File

@ -55,6 +55,16 @@
#include <trace/events/skb.h> #include <trace/events/skb.h>
#include "udp_impl.h" #include "udp_impl.h"
static bool udp6_lib_exact_dif_match(struct net *net, struct sk_buff *skb)
{
#if defined(CONFIG_NET_L3_MASTER_DEV)
if (!net->ipv4.sysctl_udp_l3mdev_accept &&
skb && ipv6_l3mdev_skb(IP6CB(skb)->flags))
return true;
#endif
return false;
}
static u32 udp6_ehashfn(const struct net *net, static u32 udp6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const struct in6_addr *laddr,
const u16 lport, const u16 lport,
@ -118,7 +128,7 @@ static void udp_v6_rehash(struct sock *sk)
static int compute_score(struct sock *sk, struct net *net, static int compute_score(struct sock *sk, struct net *net,
const struct in6_addr *saddr, __be16 sport, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned short hnum, const struct in6_addr *daddr, unsigned short hnum,
int dif) int dif, bool exact_dif)
{ {
int score; int score;
struct inet_sock *inet; struct inet_sock *inet;
@ -149,7 +159,7 @@ static int compute_score(struct sock *sk, struct net *net,
score++; score++;
} }
if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if || exact_dif) {
if (sk->sk_bound_dev_if != dif) if (sk->sk_bound_dev_if != dif)
return -1; return -1;
score++; score++;
@ -165,7 +175,7 @@ static int compute_score(struct sock *sk, struct net *net,
static struct sock *udp6_lib_lookup2(struct net *net, static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum, int dif, const struct in6_addr *daddr, unsigned int hnum, int dif,
struct udp_hslot *hslot2, bool exact_dif, struct udp_hslot *hslot2,
struct sk_buff *skb) struct sk_buff *skb)
{ {
struct sock *sk, *result; struct sock *sk, *result;
@ -176,7 +186,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
badness = -1; badness = -1;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score(sk, net, saddr, sport, score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif); daddr, hnum, dif, exact_dif);
if (score > badness) { if (score > badness) {
reuseport = sk->sk_reuseport; reuseport = sk->sk_reuseport;
if (reuseport) { if (reuseport) {
@ -212,6 +222,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
unsigned short hnum = ntohs(dport); unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
bool exact_dif = udp6_lib_exact_dif_match(net, skb);
int score, badness, matches = 0, reuseport = 0; int score, badness, matches = 0, reuseport = 0;
u32 hash = 0; u32 hash = 0;
@ -223,7 +234,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
goto begin; goto begin;
result = udp6_lib_lookup2(net, saddr, sport, result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, daddr, hnum, dif, exact_dif,
hslot2, skb); hslot2, skb);
if (!result) { if (!result) {
unsigned int old_slot2 = slot2; unsigned int old_slot2 = slot2;
@ -239,7 +250,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport, result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, daddr, hnum, dif,
hslot2, skb); exact_dif, hslot2,
skb);
} }
return result; return result;
} }
@ -247,7 +259,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = NULL; result = NULL;
badness = -1; badness = -1;
sk_for_each_rcu(sk, &hslot->head) { sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
exact_dif);
if (score > badness) { if (score > badness) {
reuseport = sk->sk_reuseport; reuseport = sk->sk_reuseport;
if (reuseport) { if (reuseport) {