Merge branch 'net-metrics-consolidate'

David Ahern says:

====================
net: Consolidate metrics handling for ipv4 and ipv6

As part of the IPv6 fib info refactoring, the intent was to make metrics
handling for ipv6 identical to ipv4. One oversight in ip6_dst_destroy
led to confusion and a couple of incomplete attempts at finding and
fixing the resulting memory leak which was ultimately resolved by
ce7ea4af08 ("ipv6: fix memory leak on dst->_metrics").

Refactor metrics hanlding make the code really identical for v4 and v6,
and add a few test cases.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-10-04 21:54:45 -07:00
commit 2970f2a8e9
7 changed files with 217 additions and 94 deletions

View File

@ -420,8 +420,35 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
}
int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
u32 *metrics);
struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
int fc_mx_len);
static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics)
{
if (fib_metrics != &dst_default_metrics &&
refcount_dec_and_test(&fib_metrics->refcnt))
kfree(fib_metrics);
}
/* ipv4 and ipv6 both use refcounted metrics if it is not the default */
static inline
void ip_dst_init_metrics(struct dst_entry *dst, struct dst_metrics *fib_metrics)
{
dst_init_metrics(dst, fib_metrics->metrics, true);
if (fib_metrics != &dst_default_metrics) {
dst->_metrics |= DST_METRICS_REFCOUNTED;
refcount_inc(&fib_metrics->refcnt);
}
}
static inline
void ip_dst_metrics_put(struct dst_entry *dst)
{
struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
kfree(p);
}
u32 ip_idents_reserve(u32 hash, int segs);
void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);

View File

@ -208,7 +208,6 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
static void free_fib_info_rcu(struct rcu_head *head)
{
struct fib_info *fi = container_of(head, struct fib_info, rcu);
struct dst_metrics *m;
change_nexthops(fi) {
if (nexthop_nh->nh_dev)
@ -219,9 +218,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi);
m = fi->fib_metrics;
if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
kfree(m);
ip_fib_metrics_put(fi->fib_metrics);
kfree(fi);
}
@ -1020,13 +1018,6 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
return true;
}
static int
fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
{
return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len,
fi->fib_metrics->metrics);
}
struct fib_info *fib_create_info(struct fib_config *cfg,
struct netlink_ext_ack *extack)
{
@ -1084,16 +1075,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (!fi)
goto failure;
if (cfg->fc_mx) {
fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
if (unlikely(!fi->fib_metrics)) {
kfree(fi);
return ERR_PTR(err);
}
refcount_set(&fi->fib_metrics->refcnt, 1);
} else {
fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
cfg->fc_mx_len);
if (unlikely(IS_ERR(fi->fib_metrics))) {
err = PTR_ERR(fi->fib_metrics);
kfree(fi);
return ERR_PTR(err);
}
fib_info_cnt++;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
@ -1112,10 +1101,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto failure;
} endfor_nexthops(fi)
err = fib_convert_metrics(fi, cfg);
if (err)
goto failure;
if (cfg->fc_mp) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);

View File

@ -5,8 +5,8 @@
#include <net/net_namespace.h>
#include <net/tcp.h>
int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
u32 *metrics)
static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
int fc_mx_len, u32 *metrics)
{
bool ecn_ca = false;
struct nlattr *nla;
@ -52,4 +52,28 @@ int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
return 0;
}
EXPORT_SYMBOL_GPL(ip_metrics_convert);
struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
int fc_mx_len)
{
struct dst_metrics *fib_metrics;
int err;
if (!fc_mx)
return (struct dst_metrics *)&dst_default_metrics;
fib_metrics = kzalloc(sizeof(*fib_metrics), GFP_KERNEL);
if (unlikely(!fib_metrics))
return ERR_PTR(-ENOMEM);
err = ip_metrics_convert(net, fc_mx, fc_mx_len, fib_metrics->metrics);
if (!err) {
refcount_set(&fib_metrics->refcnt, 1);
} else {
kfree(fib_metrics);
fib_metrics = ERR_PTR(err);
}
return fib_metrics;
}
EXPORT_SYMBOL_GPL(ip_fib_metrics_init);

View File

@ -1476,12 +1476,9 @@ void rt_del_uncached_list(struct rtable *rt)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rtable *rt = (struct rtable *)dst;
if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
kfree(p);
ip_dst_metrics_put(dst);
rt_del_uncached_list(rt);
}
@ -1528,11 +1525,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
rt->rt_gateway = nh->nh_gw;
rt->rt_uses_gateway = 1;
}
dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
if (fi->fib_metrics != &dst_default_metrics) {
rt->dst._metrics |= DST_METRICS_REFCOUNTED;
refcount_inc(&fi->fib_metrics->refcnt);
}
ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif

View File

@ -29,6 +29,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
@ -160,8 +161,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
}
INIT_LIST_HEAD(&f6i->fib6_siblings);
f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
atomic_inc(&f6i->fib6_ref);
return f6i;
@ -171,7 +170,6 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
{
struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
struct rt6_exception_bucket *bucket;
struct dst_metrics *m;
WARN_ON(f6i->fib6_node);
@ -203,9 +201,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
if (f6i->fib6_nh.nh_dev)
dev_put(f6i->fib6_nh.nh_dev);
m = f6i->fib6_metrics;
if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
kfree(m);
ip_fib_metrics_put(f6i->fib6_metrics);
kfree(f6i);
}

View File

@ -364,14 +364,11 @@ EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rt6_info *rt = (struct rt6_info *)dst;
struct fib6_info *from;
struct inet6_dev *idev;
if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
kfree(p);
ip_dst_metrics_put(dst);
rt6_uncached_list_del(rt);
idev = rt->rt6i_idev;
@ -978,11 +975,7 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
{
rt->rt6i_flags &= ~RTF_EXPIRES;
rcu_assign_pointer(rt->from, from);
dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
if (from->fib6_metrics != &dst_default_metrics) {
rt->dst._metrics |= DST_METRICS_REFCOUNTED;
refcount_inc(&from->fib6_metrics->refcnt);
}
ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
}
/* Caller must already hold reference to @ort */
@ -2705,24 +2698,6 @@ static int ip6_dst_gc(struct dst_ops *ops)
return entries > rt_max_size;
}
static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
struct fib6_config *cfg)
{
struct dst_metrics *p;
if (!cfg->fc_mx)
return 0;
p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
if (unlikely(!p))
return -ENOMEM;
refcount_set(&p->refcnt, 1);
rt->fib6_metrics = p;
return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
}
static struct rt6_info *ip6_nh_lookup_table(struct net *net,
struct fib6_config *cfg,
const struct in6_addr *gw_addr,
@ -2998,13 +2973,15 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!rt)
goto out;
rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len);
if (IS_ERR(rt->fib6_metrics)) {
err = PTR_ERR(rt->fib6_metrics);
goto out;
}
if (cfg->fc_flags & RTF_ADDRCONF)
rt->dst_nocount = true;
err = ip6_convert_metrics(net, rt, cfg);
if (err < 0)
goto out;
if (cfg->fc_flags & RTF_EXPIRES)
fib6_set_expires(rt, jiffies +
clock_t_to_jiffies(cfg->fc_expires));
@ -3727,6 +3704,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
if (!f6i)
return ERR_PTR(-ENOMEM);
f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0);
f6i->dst_nocount = true;
f6i->dst_host = true;
f6i->fib6_protocol = RTPROT_KERNEL;

View File

@ -9,11 +9,11 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics"
VERBOSE=0
PAUSE_ON_FAIL=no
PAUSE=no
IP="ip -netns testns"
IP="ip -netns ns1"
log_test()
{
@ -47,8 +47,10 @@ log_test()
setup()
{
set -e
ip netns add testns
ip netns add ns1
$IP link set dev lo up
ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
$IP link add dummy0 type dummy
$IP link set dev dummy0 up
@ -61,7 +63,8 @@ setup()
cleanup()
{
$IP link del dev dummy0 &> /dev/null
ip netns del testns
ip netns del ns1
ip netns del ns2 &> /dev/null
}
get_linklocal()
@ -639,11 +642,14 @@ add_initial_route6()
check_route6()
{
local pfx="2001:db8:104::/64"
local pfx
local expected="$1"
local out
local rc=0
set -- $expected
pfx=$1
out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
[ "${out}" = "${expected}" ] && return 0
@ -690,28 +696,33 @@ route_setup()
[ "${VERBOSE}" = "1" ] && set -x
set -e
$IP li add red up type vrf table 101
ip netns add ns2
ip -netns ns2 link set dev lo up
ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
$IP li add veth1 type veth peer name veth2
$IP li add veth3 type veth peer name veth4
$IP li set veth1 up
$IP li set veth3 up
$IP li set veth2 vrf red up
$IP li set veth4 vrf red up
$IP li add dummy1 type dummy
$IP li set dummy1 vrf red up
$IP li set veth2 netns ns2 up
$IP li set veth4 netns ns2 up
ip -netns ns2 li add dummy1 type dummy
ip -netns ns2 li set dummy1 up
$IP -6 addr add 2001:db8:101::1/64 dev veth1
$IP -6 addr add 2001:db8:101::2/64 dev veth2
$IP -6 addr add 2001:db8:103::1/64 dev veth3
$IP -6 addr add 2001:db8:103::2/64 dev veth4
$IP -6 addr add 2001:db8:104::1/64 dev dummy1
$IP addr add 172.16.101.1/24 dev veth1
$IP addr add 172.16.101.2/24 dev veth2
$IP addr add 172.16.103.1/24 dev veth3
$IP addr add 172.16.103.2/24 dev veth4
$IP addr add 172.16.104.1/24 dev dummy1
ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2
ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4
ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1
ip -netns ns2 addr add 172.16.101.2/24 dev veth2
ip -netns ns2 addr add 172.16.103.2/24 dev veth4
ip -netns ns2 addr add 172.16.104.1/24 dev dummy1
set +ex
}
@ -944,7 +955,7 @@ ipv6_addr_metric_test()
log_test $rc 0 "Modify metric of address"
# verify prefix route removed on down
run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
run_cmd "$IP li set dev dummy2 down"
rc=$?
if [ $rc -eq 0 ]; then
@ -967,6 +978,74 @@ ipv6_addr_metric_test()
cleanup
}
ipv6_route_metrics_test()
{
local rc
echo
echo "IPv6 routes with metrics"
route_setup
#
# single path with metrics
#
run_cmd "$IP -6 ro add 2001:db8:111::/64 via 2001:db8:101::2 mtu 1400"
rc=$?
if [ $rc -eq 0 ]; then
check_route6 "2001:db8:111::/64 via 2001:db8:101::2 dev veth1 metric 1024 mtu 1400"
rc=$?
fi
log_test $rc 0 "Single path route with mtu metric"
#
# multipath via separate routes with metrics
#
run_cmd "$IP -6 ro add 2001:db8:112::/64 via 2001:db8:101::2 mtu 1400"
run_cmd "$IP -6 ro append 2001:db8:112::/64 via 2001:db8:103::2"
rc=$?
if [ $rc -eq 0 ]; then
check_route6 "2001:db8:112::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
rc=$?
fi
log_test $rc 0 "Multipath route via 2 single routes with mtu metric on first"
# second route is coalesced to first to make a multipath route.
# MTU of the second path is hidden from display!
run_cmd "$IP -6 ro add 2001:db8:113::/64 via 2001:db8:101::2"
run_cmd "$IP -6 ro append 2001:db8:113::/64 via 2001:db8:103::2 mtu 1400"
rc=$?
if [ $rc -eq 0 ]; then
check_route6 "2001:db8:113::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
rc=$?
fi
log_test $rc 0 "Multipath route via 2 single routes with mtu metric on 2nd"
run_cmd "$IP -6 ro del 2001:db8:113::/64 via 2001:db8:101::2"
if [ $? -eq 0 ]; then
check_route6 "2001:db8:113::/64 via 2001:db8:103::2 dev veth3 metric 1024 mtu 1400"
log_test $? 0 " MTU of second leg"
fi
#
# multipath with metrics
#
run_cmd "$IP -6 ro add 2001:db8:115::/64 mtu 1400 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
rc=$?
if [ $rc -eq 0 ]; then
check_route6 "2001:db8:115::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
rc=$?
fi
log_test $rc 0 "Multipath route with mtu metric"
$IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300
run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1"
log_test $? 0 "Using route with mtu metric"
route_cleanup
}
# add route for a prefix, flushing any existing routes first
# expected to be the first step of a test
add_route()
@ -1005,11 +1084,15 @@ add_initial_route()
check_route()
{
local pfx="172.16.104.0/24"
local pfx
local expected="$1"
local out
local rc=0
set -- $expected
pfx=$1
[ "${pfx}" = "unreachable" ] && pfx=$2
out=$($IP ro ls match ${pfx})
[ "${out}" = "${expected}" ] && return 0
@ -1319,6 +1402,40 @@ ipv4_addr_metric_test()
cleanup
}
ipv4_route_metrics_test()
{
local rc
echo
echo "IPv4 route add / append tests"
route_setup
run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 mtu 1400"
rc=$?
if [ $rc -eq 0 ]; then
check_route "172.16.111.0/24 via 172.16.101.2 dev veth1 mtu 1400"
rc=$?
fi
log_test $rc 0 "Single path route with mtu metric"
run_cmd "$IP ro add 172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
rc=$?
if [ $rc -eq 0 ]; then
check_route "172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
rc=$?
fi
log_test $rc 0 "Multipath route with mtu metric"
$IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300
run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1"
log_test $? 0 "Using route with mtu metric"
route_cleanup
}
################################################################################
# usage
@ -1385,6 +1502,8 @@ do
ipv4_route_test|ipv4_rt) ipv4_route_test;;
ipv6_addr_metric) ipv6_addr_metric_test;;
ipv4_addr_metric) ipv4_addr_metric_test;;
ipv6_route_metrics) ipv6_route_metrics_test;;
ipv4_route_metrics) ipv4_route_metrics_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac