linux/include/net/l3mdev.h

200 lines
4.3 KiB
C
Raw Normal View History

/*
* include/net/l3mdev.h - L3 master device API
* Copyright (c) 2015 Cumulus Networks
* Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#ifndef _NET_L3MDEV_H_
#define _NET_L3MDEV_H_
/**
* struct l3mdev_ops - l3mdev operations
*
* @l3mdev_fib_table: Get FIB table id to use for lookups
*
* @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
*
* @l3mdev_get_saddr: Get source address for a flow
*
* @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device
*/
struct l3mdev_ops {
u32 (*l3mdev_fib_table)(const struct net_device *dev);
/* IPv4 ops */
struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
const struct flowi4 *fl4);
int (*l3mdev_get_saddr)(struct net_device *dev,
struct flowi4 *fl4);
/* IPv6 ops */
struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
const struct flowi6 *fl6);
};
#ifdef CONFIG_NET_L3_MASTER_DEV
net: l3mdev: address selection should only consider devices in L3 domain David Lamparter noted a use case where the source address selection fails to pick an address from a VRF interface - unnumbered interfaces. Relevant commands from his script: ip addr add 9.9.9.9/32 dev lo ip link set lo up ip link add name vrf0 type vrf table 101 ip rule add oif vrf0 table 101 ip rule add iif vrf0 table 101 ip link set vrf0 up ip addr add 10.0.0.3/32 dev vrf0 ip link add name dummy2 type dummy ip link set dummy2 master vrf0 up --> note dummy2 has no address - unnumbered device ip route add 10.2.2.2/32 dev dummy2 table 101 ip neigh add 10.2.2.2 dev dummy2 lladdr 02:00:00:00:00:02 tcpdump -ni dummy2 & And using ping instead of his socat example: $ ping -I vrf0 -c1 10.2.2.2 ping: Warning: source address might be selected on device other than vrf0. PING 10.2.2.2 (10.2.2.2) from 9.9.9.9 vrf0: 56(84) bytes of data. >From tcpdump: 12:57:29.449128 IP 9.9.9.9 > 10.2.2.2: ICMP echo request, id 2491, seq 1, length 64 Note the source address is from lo and is not a VRF local address. With this patch: $ ping -I vrf0 -c1 10.2.2.2 PING 10.2.2.2 (10.2.2.2) from 10.0.0.3 vrf0: 56(84) bytes of data. >From tcpdump: 12:59:25.096426 IP 10.0.0.3 > 10.2.2.2: ICMP echo request, id 2113, seq 1, length 64 Now the source address comes from vrf0. The ipv4 function for selecting source address takes a const argument. Removing the const requires touching a lot of places, so instead l3mdev_master_ifindex_rcu is changed to take a const argument and then do the typecast to non-const as required by netdev_master_upper_dev_get_rcu. This is similar to what l3mdev_fib_table_rcu does. IPv6 for unnumbered interfaces appears to be selecting the addresses properly. Cc: David Lamparter <david@opensourcerouting.org> Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-25 03:47:02 +08:00
int l3mdev_master_ifindex_rcu(const struct net_device *dev);
static inline int l3mdev_master_ifindex(struct net_device *dev)
{
int ifindex;
rcu_read_lock();
ifindex = l3mdev_master_ifindex_rcu(dev);
rcu_read_unlock();
return ifindex;
}
static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
{
struct net_device *dev;
int rc = 0;
if (likely(ifindex)) {
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifindex);
if (dev)
rc = l3mdev_master_ifindex_rcu(dev);
rcu_read_unlock();
}
return rc;
}
/* get index of an interface to use for FIB lookups. For devices
* enslaved to an L3 master device FIB lookups are based on the
* master index
*/
static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
{
return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex;
}
static inline int l3mdev_fib_oif(struct net_device *dev)
{
int oif;
rcu_read_lock();
oif = l3mdev_fib_oif_rcu(dev);
rcu_read_unlock();
return oif;
}
u32 l3mdev_fib_table_rcu(const struct net_device *dev);
u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
static inline u32 l3mdev_fib_table(const struct net_device *dev)
{
u32 tb_id;
rcu_read_lock();
tb_id = l3mdev_fib_table_rcu(dev);
rcu_read_unlock();
return tb_id;
}
static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
const struct flowi4 *fl4)
{
if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable)
return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4);
return NULL;
}
static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
{
struct net_device *dev;
bool rc = false;
if (ifindex == 0)
return false;
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifindex);
if (dev)
rc = netif_is_l3_master(dev);
rcu_read_unlock();
return rc;
}
int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6);
#else
net: l3mdev: address selection should only consider devices in L3 domain David Lamparter noted a use case where the source address selection fails to pick an address from a VRF interface - unnumbered interfaces. Relevant commands from his script: ip addr add 9.9.9.9/32 dev lo ip link set lo up ip link add name vrf0 type vrf table 101 ip rule add oif vrf0 table 101 ip rule add iif vrf0 table 101 ip link set vrf0 up ip addr add 10.0.0.3/32 dev vrf0 ip link add name dummy2 type dummy ip link set dummy2 master vrf0 up --> note dummy2 has no address - unnumbered device ip route add 10.2.2.2/32 dev dummy2 table 101 ip neigh add 10.2.2.2 dev dummy2 lladdr 02:00:00:00:00:02 tcpdump -ni dummy2 & And using ping instead of his socat example: $ ping -I vrf0 -c1 10.2.2.2 ping: Warning: source address might be selected on device other than vrf0. PING 10.2.2.2 (10.2.2.2) from 9.9.9.9 vrf0: 56(84) bytes of data. >From tcpdump: 12:57:29.449128 IP 9.9.9.9 > 10.2.2.2: ICMP echo request, id 2491, seq 1, length 64 Note the source address is from lo and is not a VRF local address. With this patch: $ ping -I vrf0 -c1 10.2.2.2 PING 10.2.2.2 (10.2.2.2) from 10.0.0.3 vrf0: 56(84) bytes of data. >From tcpdump: 12:59:25.096426 IP 10.0.0.3 > 10.2.2.2: ICMP echo request, id 2113, seq 1, length 64 Now the source address comes from vrf0. The ipv4 function for selecting source address takes a const argument. Removing the const requires touching a lot of places, so instead l3mdev_master_ifindex_rcu is changed to take a const argument and then do the typecast to non-const as required by netdev_master_upper_dev_get_rcu. This is similar to what l3mdev_fib_table_rcu does. IPv6 for unnumbered interfaces appears to be selecting the addresses properly. Cc: David Lamparter <david@opensourcerouting.org> Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-02-25 03:47:02 +08:00
static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev)
{
return 0;
}
static inline int l3mdev_master_ifindex(struct net_device *dev)
{
return 0;
}
static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
{
return 0;
}
static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
{
return dev ? dev->ifindex : 0;
}
static inline int l3mdev_fib_oif(struct net_device *dev)
{
return dev ? dev->ifindex : 0;
}
static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev)
{
return 0;
}
static inline u32 l3mdev_fib_table(const struct net_device *dev)
{
return 0;
}
static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
{
return 0;
}
static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
const struct flowi4 *fl4)
{
return NULL;
}
static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
{
return false;
}
static inline int l3mdev_get_saddr(struct net *net, int ifindex,
struct flowi4 *fl4)
{
return 0;
}
static inline
struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6)
{
return NULL;
}
#endif
#endif /* _NET_L3MDEV_H_ */