ipv6: Only create RTF_CACHE routes after encountering pmtu exception

This patch creates a RTF_CACHE routes only after encountering a pmtu
exception.

After ip6_rt_update_pmtu() has inserted the RTF_CACHE route to the fib6
tree, the rt->rt6i_node->fn_sernum is bumped which will fail the
ip6_dst_check() and trigger a relookup.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Martin KaFai Lau 2015-05-22 20:56:00 -07:00 committed by David S. Miller
parent 8b9df26577
commit 45e4fd2668
3 changed files with 52 additions and 49 deletions

View File

@ -202,7 +202,7 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
{ {
if (rt->rt6i_flags & RTF_GATEWAY) if (rt->rt6i_flags & RTF_GATEWAY)
return &rt->rt6i_gateway; return &rt->rt6i_gateway;
else if (rt->rt6i_flags & RTF_CACHE) else if (unlikely(rt->rt6i_flags & RTF_CACHE))
return &rt->rt6i_dst.addr; return &rt->rt6i_dst.addr;
else else
return daddr; return daddr;

View File

@ -738,6 +738,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
rt6_clean_expires(iter); rt6_clean_expires(iter);
else else
rt6_set_expires(iter, rt->dst.expires); rt6_set_expires(iter, rt->dst.expires);
iter->rt6i_pmtu = rt->rt6i_pmtu;
return -EEXIST; return -EEXIST;
} }
/* If we have the same destination and the same metric, /* If we have the same destination and the same metric,

View File

@ -873,16 +873,13 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
struct flowi6 *fl6, int flags) struct flowi6 *fl6, int flags)
{ {
struct fib6_node *fn, *saved_fn; struct fib6_node *fn, *saved_fn;
struct rt6_info *rt, *nrt; struct rt6_info *rt;
int strict = 0; int strict = 0;
int attempts = 3;
int err;
strict |= flags & RT6_LOOKUP_F_IFACE; strict |= flags & RT6_LOOKUP_F_IFACE;
if (net->ipv6.devconf_all->forwarding == 0) if (net->ipv6.devconf_all->forwarding == 0)
strict |= RT6_LOOKUP_F_REACHABLE; strict |= RT6_LOOKUP_F_REACHABLE;
redo_fib6_lookup_lock:
read_lock_bh(&table->tb6_lock); read_lock_bh(&table->tb6_lock);
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
@ -901,46 +898,12 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
strict &= ~RT6_LOOKUP_F_REACHABLE; strict &= ~RT6_LOOKUP_F_REACHABLE;
fn = saved_fn; fn = saved_fn;
goto redo_rt6_select; goto redo_rt6_select;
} else {
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
goto out2;
} }
} }
dst_hold(&rt->dst); dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock); read_unlock_bh(&table->tb6_lock);
if (rt->rt6i_flags & RTF_CACHE)
goto out2;
if (!rt6_is_gw_or_nonexthop(rt) ||
!(rt->dst.flags & DST_HOST) || !(rt->rt6i_flags & RTF_LOCAL))
nrt = ip6_rt_cache_alloc(rt, &fl6->daddr, &fl6->saddr);
else
goto out2;
ip6_rt_put(rt);
rt = nrt ? : net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
if (nrt) {
err = ip6_ins_rt(nrt);
if (!err)
goto out2;
}
if (--attempts <= 0)
goto out2;
/*
* Race condition! In the gap, when table->tb6_lock was
* released someone could insert this route. Relookup.
*/
ip6_rt_put(rt);
goto redo_fib6_lookup_lock;
out2:
rt6_dst_from_metrics_check(rt); rt6_dst_from_metrics_check(rt);
rt->dst.lastuse = jiffies; rt->dst.lastuse = jiffies;
rt->dst.__use++; rt->dst.__use++;
@ -1113,22 +1076,61 @@ static void ip6_link_failure(struct sk_buff *skb)
} }
} }
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
struct sk_buff *skb, u32 mtu) {
struct net *net = dev_net(rt->dst.dev);
rt->rt6i_flags |= RTF_MODIFIED;
rt->rt6i_pmtu = mtu;
rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
}
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
const struct ipv6hdr *iph, u32 mtu)
{ {
struct rt6_info *rt6 = (struct rt6_info *)dst; struct rt6_info *rt6 = (struct rt6_info *)dst;
if (rt6->rt6i_flags & RTF_LOCAL)
return;
dst_confirm(dst); dst_confirm(dst);
if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) { mtu = max_t(u32, mtu, IPV6_MIN_MTU);
struct net *net = dev_net(dst->dev); if (mtu >= dst_mtu(dst))
return;
rt6->rt6i_flags |= RTF_MODIFIED; if (rt6->rt6i_flags & RTF_CACHE) {
if (mtu < IPV6_MIN_MTU) rt6_do_update_pmtu(rt6, mtu);
mtu = IPV6_MIN_MTU; } else {
const struct in6_addr *daddr, *saddr;
struct rt6_info *nrt6;
rt6->rt6i_pmtu = mtu; if (iph) {
rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); daddr = &iph->daddr;
saddr = &iph->saddr;
} else if (sk) {
daddr = &sk->sk_v6_daddr;
saddr = &inet6_sk(sk)->saddr;
} else {
return;
} }
nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
/* ip6_ins_rt(nrt6) will bump the
* rt6->rt6i_node->fn_sernum
* which will fail the next rt6_check() and
* invalidate the sk->sk_dst_cache.
*/
ip6_ins_rt(nrt6);
}
}
}
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu)
{
__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
} }
void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
@ -1147,7 +1149,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
dst = ip6_route_output(net, NULL, &fl6); dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error) if (!dst->error)
ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
dst_release(dst); dst_release(dst);
} }
EXPORT_SYMBOL_GPL(ip6_update_pmtu); EXPORT_SYMBOL_GPL(ip6_update_pmtu);