mirror of https://gitee.com/openkylin/linux.git
Merge branch 'cached-route-listings'
Stefano Brivio says: ==================== Fix listing (IPv4, IPv6) and flushing (IPv6) of cached route exceptions For IPv6 cached routes, the commands 'ip -6 route list cache' and 'ip -6 route flush cache' don't work at all after route exceptions have been moved to a separate hash table in commit2b760fcf5c
("ipv6: hook up exception table to store dst cache"). For IPv4 cached routes, the command 'ip route list cache' has also stopped working in kernel 3.5 after commit4895c771c7
("ipv4: Add FIB nexthop exceptions.") introduced storage for route exceptions as a separate entity. Fix this by allowing userspace to clearly request cached routes with the RTM_F_CLONED flag used as a filter (in conjuction with strict checking) and by retrieving and dumping cached routes if requested. If strict checking is not requested (iproute2 < 5.0.0), we don't have a way to consistently filter results on other selectors (e.g. on tables), so skip filtering entirely and dump both regular routes and exceptions. For IPv4, cache flushing uses a completely different mechanism, so it wasn't affected. Listing of exception routes (modified routes pre-3.5) was tested against these versions of kernel and iproute2: iproute2 kernel 4.14.0 4.15.0 4.19.0 5.0.0 5.1.0 3.5-rc4 + + + + + 4.4 4.9 4.14 4.15 4.19 5.0 5.1 fixed + + + + + For IPv6, a separate iproute2 patch is required. Versions of iproute2 and kernel tested: iproute2 kernel 4.14.0 4.15.0 4.19.0 5.0.0 5.1.0 5.1.0, patched 3.18 list + + + + + + flush + + + + + + 4.4 list + + + + + + flush + + + + + + 4.9 list + + + + + + flush + + + + + + 4.14 list + + + + + + flush + + + + + + 4.15 list flush 4.19 list flush 5.0 list flush 5.1 list flush with list + + + + + + fix flush + + + + v7: Make sure r->rtm_tos is initialised in 3/11, move loop over nexthop objects in 4/11, add comments about usage of "skip" counters in commit messages of 4/11 and 8/11 v6: Target for net-next, rebase and adapt to nexthop objects for IPv6 paths. Merge selftests into this series (as they were addressed for net-next). A number of minor changes detailed in logs of single patches. v5: Skip filtering altogether if no strict checking is requested: selecting routes or exceptions only would be inconsistent with the fact we can't filter on tables. Drop 1/8 (non-strict dump filter function no longer needed), replace 2/8 (don't use NLM_F_MATCH, decide to skip routes or exceptions in filter function), drop 6/8 (2/8 is enough for IPv6 too). Introduce dump_routes and dump_exceptions flags in filter, adapt other patches to that. v4: Fix the listing issue also for IPv4, making the behaviour consistent with IPv6. Honour NLM_F_MATCH as per RFC 3549 and allow usage of RTM_F_CLONED filter. Split patches into smaller logical changes. v3: Drop check on RTM_F_CLONED and rework logic of return values of rt6_dump_route() v2: Add count of routes handled in partial dumps, and skip them, in patch 1/2. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
dcdfa50eef
|
@ -316,6 +316,7 @@ struct fib6_walker {
|
|||
enum fib6_walk_state state;
|
||||
unsigned int skip;
|
||||
unsigned int count;
|
||||
unsigned int skip_in_node;
|
||||
int (*func)(struct fib6_walker *);
|
||||
void *args;
|
||||
};
|
||||
|
|
|
@ -197,7 +197,7 @@ struct rt6_rtnl_dump_arg {
|
|||
struct fib_dump_filter filter;
|
||||
};
|
||||
|
||||
int rt6_dump_route(struct fib6_info *f6i, void *p_arg);
|
||||
int rt6_dump_route(struct fib6_info *f6i, void *p_arg, unsigned int skip);
|
||||
void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
|
||||
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
|
||||
void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
|
||||
|
|
|
@ -245,6 +245,8 @@ struct fib_dump_filter {
|
|||
/* filter_set is an optimization that an entry is set */
|
||||
bool filter_set;
|
||||
bool dump_all_families;
|
||||
bool dump_routes;
|
||||
bool dump_exceptions;
|
||||
unsigned char protocol;
|
||||
unsigned char rt_type;
|
||||
unsigned int flags;
|
||||
|
|
|
@ -230,6 +230,10 @@ void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric);
|
|||
void rt_add_uncached_list(struct rtable *rt);
|
||||
void rt_del_uncached_list(struct rtable *rt);
|
||||
|
||||
int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
u32 table_id, struct fib_info *fi,
|
||||
int *fa_index, int fa_start);
|
||||
|
||||
static inline void ip_rt_put(struct rtable *rt)
|
||||
{
|
||||
/* dst_release() accepts a NULL parameter.
|
||||
|
|
|
@ -912,10 +912,15 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
|
|||
NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) {
|
||||
NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (rtm->rtm_flags & RTM_F_CLONED)
|
||||
filter->dump_routes = false;
|
||||
else
|
||||
filter->dump_exceptions = false;
|
||||
|
||||
filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC);
|
||||
filter->flags = rtm->rtm_flags;
|
||||
|
@ -962,9 +967,10 @@ EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
|
|||
|
||||
static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
{
|
||||
struct fib_dump_filter filter = { .dump_routes = true,
|
||||
.dump_exceptions = true };
|
||||
const struct nlmsghdr *nlh = cb->nlh;
|
||||
struct net *net = sock_net(skb->sk);
|
||||
struct fib_dump_filter filter = {};
|
||||
unsigned int h, s_h;
|
||||
unsigned int e = 0, s_e;
|
||||
struct fib_table *tb;
|
||||
|
@ -981,8 +987,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
|
|||
filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED);
|
||||
}
|
||||
|
||||
/* fib entries are never clones and ipv4 does not use prefix flag */
|
||||
if (filter.flags & (RTM_F_PREFIX | RTM_F_CLONED))
|
||||
/* ipv4 does not use prefix flag */
|
||||
if (filter.flags & RTM_F_PREFIX)
|
||||
return skb->len;
|
||||
|
||||
if (filter.table_id) {
|
||||
|
|
|
@ -2090,22 +2090,26 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
|
|||
{
|
||||
unsigned int flags = NLM_F_MULTI;
|
||||
__be32 xkey = htonl(l->key);
|
||||
int i, s_i, i_fa, s_fa, err;
|
||||
struct fib_alias *fa;
|
||||
int i, s_i;
|
||||
|
||||
if (filter->filter_set)
|
||||
if (filter->filter_set ||
|
||||
!filter->dump_exceptions || !filter->dump_routes)
|
||||
flags |= NLM_F_DUMP_FILTERED;
|
||||
|
||||
s_i = cb->args[4];
|
||||
s_fa = cb->args[5];
|
||||
i = 0;
|
||||
|
||||
/* rcu_read_lock is hold by caller */
|
||||
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
|
||||
int err;
|
||||
struct fib_info *fi = fa->fa_info;
|
||||
|
||||
if (i < s_i)
|
||||
goto next;
|
||||
|
||||
i_fa = 0;
|
||||
|
||||
if (tb->tb_id != fa->tb_id)
|
||||
goto next;
|
||||
|
||||
|
@ -2114,29 +2118,43 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
|
|||
goto next;
|
||||
|
||||
if ((filter->protocol &&
|
||||
fa->fa_info->fib_protocol != filter->protocol))
|
||||
fi->fib_protocol != filter->protocol))
|
||||
goto next;
|
||||
|
||||
if (filter->dev &&
|
||||
!fib_info_nh_uses_dev(fa->fa_info, filter->dev))
|
||||
!fib_info_nh_uses_dev(fi, filter->dev))
|
||||
goto next;
|
||||
}
|
||||
|
||||
err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
|
||||
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
|
||||
tb->tb_id, fa->fa_type,
|
||||
xkey, KEYLENGTH - fa->fa_slen,
|
||||
fa->fa_tos, fa->fa_info, flags);
|
||||
if (err < 0) {
|
||||
cb->args[4] = i;
|
||||
return err;
|
||||
if (filter->dump_routes && !s_fa) {
|
||||
err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
|
||||
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
|
||||
tb->tb_id, fa->fa_type,
|
||||
xkey, KEYLENGTH - fa->fa_slen,
|
||||
fa->fa_tos, fi, flags);
|
||||
if (err < 0)
|
||||
goto stop;
|
||||
i_fa++;
|
||||
}
|
||||
|
||||
if (filter->dump_exceptions) {
|
||||
err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
|
||||
&i_fa, s_fa);
|
||||
if (err < 0)
|
||||
goto stop;
|
||||
}
|
||||
|
||||
next:
|
||||
i++;
|
||||
}
|
||||
|
||||
cb->args[4] = i;
|
||||
return skb->len;
|
||||
|
||||
stop:
|
||||
cb->args[4] = i;
|
||||
cb->args[5] = i_fa;
|
||||
return err;
|
||||
}
|
||||
|
||||
/* rcu_read_lock needs to be hold by caller from readside */
|
||||
|
|
133
net/ipv4/route.c
133
net/ipv4/route.c
|
@ -2699,7 +2699,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
|
|||
r->rtm_family = AF_INET;
|
||||
r->rtm_dst_len = 32;
|
||||
r->rtm_src_len = 0;
|
||||
r->rtm_tos = fl4->flowi4_tos;
|
||||
r->rtm_tos = fl4 ? fl4->flowi4_tos : 0;
|
||||
r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
|
||||
if (nla_put_u32(skb, RTA_TABLE, table_id))
|
||||
goto nla_put_failure;
|
||||
|
@ -2727,7 +2727,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
|
|||
nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
|
||||
goto nla_put_failure;
|
||||
#endif
|
||||
if (!rt_is_input_route(rt) &&
|
||||
if (fl4 && !rt_is_input_route(rt) &&
|
||||
fl4->saddr != src) {
|
||||
if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
|
||||
goto nla_put_failure;
|
||||
|
@ -2767,36 +2767,40 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
|
|||
if (rtnetlink_put_metrics(skb, metrics) < 0)
|
||||
goto nla_put_failure;
|
||||
|
||||
if (fl4->flowi4_mark &&
|
||||
nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
|
||||
goto nla_put_failure;
|
||||
if (fl4) {
|
||||
if (fl4->flowi4_mark &&
|
||||
nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
|
||||
nla_put_u32(skb, RTA_UID,
|
||||
from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
|
||||
goto nla_put_failure;
|
||||
if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
|
||||
nla_put_u32(skb, RTA_UID,
|
||||
from_kuid_munged(current_user_ns(),
|
||||
fl4->flowi4_uid)))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (rt_is_input_route(rt)) {
|
||||
#ifdef CONFIG_IP_MROUTE
|
||||
if (ipv4_is_multicast(dst) &&
|
||||
!ipv4_is_local_multicast(dst) &&
|
||||
IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
|
||||
int err = ipmr_get_route(net, skb,
|
||||
fl4->saddr, fl4->daddr,
|
||||
r, portid);
|
||||
|
||||
if (err <= 0) {
|
||||
if (err == 0)
|
||||
return 0;
|
||||
goto nla_put_failure;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
|
||||
goto nla_put_failure;
|
||||
}
|
||||
}
|
||||
|
||||
error = rt->dst.error;
|
||||
|
||||
if (rt_is_input_route(rt)) {
|
||||
#ifdef CONFIG_IP_MROUTE
|
||||
if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
|
||||
IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
|
||||
int err = ipmr_get_route(net, skb,
|
||||
fl4->saddr, fl4->daddr,
|
||||
r, portid);
|
||||
|
||||
if (err <= 0) {
|
||||
if (err == 0)
|
||||
return 0;
|
||||
goto nla_put_failure;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
|
||||
goto nla_put_failure;
|
||||
}
|
||||
|
||||
if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
|
||||
goto nla_put_failure;
|
||||
|
||||
|
@ -2808,6 +2812,79 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
|
|||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
|
||||
struct netlink_callback *cb, u32 table_id,
|
||||
struct fnhe_hash_bucket *bucket, int genid,
|
||||
int *fa_index, int fa_start)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < FNHE_HASH_SIZE; i++) {
|
||||
struct fib_nh_exception *fnhe;
|
||||
|
||||
for (fnhe = rcu_dereference(bucket[i].chain); fnhe;
|
||||
fnhe = rcu_dereference(fnhe->fnhe_next)) {
|
||||
struct rtable *rt;
|
||||
int err;
|
||||
|
||||
if (*fa_index < fa_start)
|
||||
goto next;
|
||||
|
||||
if (fnhe->fnhe_genid != genid)
|
||||
goto next;
|
||||
|
||||
if (fnhe->fnhe_expires &&
|
||||
time_after(jiffies, fnhe->fnhe_expires))
|
||||
goto next;
|
||||
|
||||
rt = rcu_dereference(fnhe->fnhe_rth_input);
|
||||
if (!rt)
|
||||
rt = rcu_dereference(fnhe->fnhe_rth_output);
|
||||
if (!rt)
|
||||
goto next;
|
||||
|
||||
err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
|
||||
table_id, NULL, skb,
|
||||
NETLINK_CB(cb->skb).portid,
|
||||
cb->nlh->nlmsg_seq);
|
||||
if (err)
|
||||
return err;
|
||||
next:
|
||||
(*fa_index)++;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
u32 table_id, struct fib_info *fi,
|
||||
int *fa_index, int fa_start)
|
||||
{
|
||||
struct net *net = sock_net(cb->skb->sk);
|
||||
int nhsel, genid = fnhe_genid(net);
|
||||
|
||||
for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
|
||||
struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
|
||||
struct fnhe_hash_bucket *bucket;
|
||||
int err;
|
||||
|
||||
if (nhc->nhc_flags & RTNH_F_DEAD)
|
||||
continue;
|
||||
|
||||
bucket = rcu_dereference(nhc->nhc_exceptions);
|
||||
if (!bucket)
|
||||
continue;
|
||||
|
||||
err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, genid,
|
||||
fa_index, fa_start);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
|
||||
u8 ip_proto, __be16 sport,
|
||||
__be16 dport)
|
||||
|
|
|
@ -464,12 +464,19 @@ static int fib6_dump_node(struct fib6_walker *w)
|
|||
struct fib6_info *rt;
|
||||
|
||||
for_each_fib6_walker_rt(w) {
|
||||
res = rt6_dump_route(rt, w->args);
|
||||
if (res < 0) {
|
||||
res = rt6_dump_route(rt, w->args, w->skip_in_node);
|
||||
if (res >= 0) {
|
||||
/* Frame is full, suspend walking */
|
||||
w->leaf = rt;
|
||||
|
||||
/* We'll restart from this node, so if some routes were
|
||||
* already dumped, skip them next time.
|
||||
*/
|
||||
w->skip_in_node += res;
|
||||
|
||||
return 1;
|
||||
}
|
||||
w->skip_in_node = 0;
|
||||
|
||||
/* Multipath routes are dumped in one route with the
|
||||
* RTA_MULTIPATH attribute. Jump 'rt' to point to the
|
||||
|
@ -521,6 +528,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
|
|||
if (cb->args[4] == 0) {
|
||||
w->count = 0;
|
||||
w->skip = 0;
|
||||
w->skip_in_node = 0;
|
||||
|
||||
spin_lock_bh(&table->tb6_lock);
|
||||
res = fib6_walk(net, w);
|
||||
|
@ -536,6 +544,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
|
|||
w->state = FWS_INIT;
|
||||
w->node = w->root;
|
||||
w->skip = w->count;
|
||||
w->skip_in_node = 0;
|
||||
} else
|
||||
w->skip = 0;
|
||||
|
||||
|
@ -553,9 +562,10 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
|
|||
|
||||
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
{
|
||||
struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
|
||||
.filter.dump_routes = true };
|
||||
const struct nlmsghdr *nlh = cb->nlh;
|
||||
struct net *net = sock_net(skb->sk);
|
||||
struct rt6_rtnl_dump_arg arg = {};
|
||||
unsigned int h, s_h;
|
||||
unsigned int e = 0, s_e;
|
||||
struct fib6_walker *w;
|
||||
|
@ -572,13 +582,10 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
|
|||
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
|
||||
struct rtmsg *rtm = nlmsg_data(nlh);
|
||||
|
||||
arg.filter.flags = rtm->rtm_flags & (RTM_F_PREFIX|RTM_F_CLONED);
|
||||
if (rtm->rtm_flags & RTM_F_PREFIX)
|
||||
arg.filter.flags = RTM_F_PREFIX;
|
||||
}
|
||||
|
||||
/* fib entries are never clones */
|
||||
if (arg.filter.flags & RTM_F_CLONED)
|
||||
goto out;
|
||||
|
||||
w = (void *)cb->args[2];
|
||||
if (!w) {
|
||||
/* New dump:
|
||||
|
@ -1589,7 +1596,8 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
|
|||
if (plen == fn->fn_bit)
|
||||
return fn;
|
||||
|
||||
prev = fn;
|
||||
if (fn->fn_flags & RTN_RTINFO)
|
||||
prev = fn;
|
||||
|
||||
next:
|
||||
/*
|
||||
|
@ -2096,6 +2104,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
|
|||
c.w.func = fib6_clean_node;
|
||||
c.w.count = 0;
|
||||
c.w.skip = 0;
|
||||
c.w.skip_in_node = 0;
|
||||
c.func = func;
|
||||
c.sernum = sernum;
|
||||
c.arg = arg;
|
||||
|
|
123
net/ipv6/route.c
123
net/ipv6/route.c
|
@ -3840,7 +3840,8 @@ static int ip6_route_del(struct fib6_config *cfg,
|
|||
for_each_fib6_node_rt_rcu(fn) {
|
||||
struct fib6_nh *nh;
|
||||
|
||||
if (rt->nh && rt->nh->id != cfg->fc_nh_id)
|
||||
if (rt->nh && cfg->fc_nh_id &&
|
||||
rt->nh->id != cfg->fc_nh_id)
|
||||
continue;
|
||||
|
||||
if (cfg->fc_flags & RTF_CACHE) {
|
||||
|
@ -5521,33 +5522,129 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
|
|||
return false;
|
||||
}
|
||||
|
||||
int rt6_dump_route(struct fib6_info *rt, void *p_arg)
|
||||
struct fib6_nh_exception_dump_walker {
|
||||
struct rt6_rtnl_dump_arg *dump;
|
||||
struct fib6_info *rt;
|
||||
unsigned int flags;
|
||||
unsigned int skip;
|
||||
unsigned int count;
|
||||
};
|
||||
|
||||
static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
|
||||
{
|
||||
struct fib6_nh_exception_dump_walker *w = arg;
|
||||
struct rt6_rtnl_dump_arg *dump = w->dump;
|
||||
struct rt6_exception_bucket *bucket;
|
||||
struct rt6_exception *rt6_ex;
|
||||
int i, err;
|
||||
|
||||
bucket = fib6_nh_get_excptn_bucket(nh, NULL);
|
||||
if (!bucket)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
|
||||
hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
|
||||
if (w->skip) {
|
||||
w->skip--;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Expiration of entries doesn't bump sernum, insertion
|
||||
* does. Removal is triggered by insertion, so we can
|
||||
* rely on the fact that if entries change between two
|
||||
* partial dumps, this node is scanned again completely,
|
||||
* see rt6_insert_exception() and fib6_dump_table().
|
||||
*
|
||||
* Count expired entries we go through as handled
|
||||
* entries that we'll skip next time, in case of partial
|
||||
* node dump. Otherwise, if entries expire meanwhile,
|
||||
* we'll skip the wrong amount.
|
||||
*/
|
||||
if (rt6_check_expired(rt6_ex->rt6i)) {
|
||||
w->count++;
|
||||
continue;
|
||||
}
|
||||
|
||||
err = rt6_fill_node(dump->net, dump->skb, w->rt,
|
||||
&rt6_ex->rt6i->dst, NULL, NULL, 0,
|
||||
RTM_NEWROUTE,
|
||||
NETLINK_CB(dump->cb->skb).portid,
|
||||
dump->cb->nlh->nlmsg_seq, w->flags);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
w->count++;
|
||||
}
|
||||
bucket++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return -1 if done with node, number of handled routes on partial dump */
|
||||
int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
|
||||
{
|
||||
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
|
||||
struct fib_dump_filter *filter = &arg->filter;
|
||||
unsigned int flags = NLM_F_MULTI;
|
||||
struct net *net = arg->net;
|
||||
int count = 0;
|
||||
|
||||
if (rt == net->ipv6.fib6_null_entry)
|
||||
return 0;
|
||||
return -1;
|
||||
|
||||
if ((filter->flags & RTM_F_PREFIX) &&
|
||||
!(rt->fib6_flags & RTF_PREFIX_RT)) {
|
||||
/* success since this is not a prefix route */
|
||||
return 1;
|
||||
return -1;
|
||||
}
|
||||
if (filter->filter_set) {
|
||||
if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
|
||||
(filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
|
||||
(filter->protocol && rt->fib6_protocol != filter->protocol)) {
|
||||
return 1;
|
||||
}
|
||||
if (filter->filter_set &&
|
||||
((filter->rt_type && rt->fib6_type != filter->rt_type) ||
|
||||
(filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
|
||||
(filter->protocol && rt->fib6_protocol != filter->protocol))) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (filter->filter_set ||
|
||||
!filter->dump_routes || !filter->dump_exceptions) {
|
||||
flags |= NLM_F_DUMP_FILTERED;
|
||||
}
|
||||
|
||||
return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
|
||||
RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
|
||||
arg->cb->nlh->nlmsg_seq, flags);
|
||||
if (filter->dump_routes) {
|
||||
if (skip) {
|
||||
skip--;
|
||||
} else {
|
||||
if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
|
||||
0, RTM_NEWROUTE,
|
||||
NETLINK_CB(arg->cb->skb).portid,
|
||||
arg->cb->nlh->nlmsg_seq, flags)) {
|
||||
return 0;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (filter->dump_exceptions) {
|
||||
struct fib6_nh_exception_dump_walker w = { .dump = arg,
|
||||
.rt = rt,
|
||||
.flags = flags,
|
||||
.skip = skip,
|
||||
.count = 0 };
|
||||
int err;
|
||||
|
||||
if (rt->nh) {
|
||||
err = nexthop_for_each_fib6_nh(rt->nh,
|
||||
rt6_nh_dump_exceptions,
|
||||
&w);
|
||||
} else {
|
||||
err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
|
||||
}
|
||||
|
||||
if (err)
|
||||
return count += w.count;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
|
||||
|
|
|
@ -112,6 +112,10 @@
|
|||
# - cleanup_ipv6_exception
|
||||
# Same as above, but use IPv6 transport from A to B
|
||||
#
|
||||
# - list_flush_ipv4_exception
|
||||
# Using the same topology as in pmtu_ipv4, create exceptions, and check
|
||||
# they are shown when listing exception caches, gone after flushing them
|
||||
#
|
||||
# - list_flush_ipv6_exception
|
||||
# Using the same topology as in pmtu_ipv6, create exceptions, and check
|
||||
# they are shown when listing exception caches, gone after flushing them
|
||||
|
@ -156,6 +160,7 @@ tests="
|
|||
pmtu_vti6_link_change_mtu vti6: MTU changes on link changes 0
|
||||
cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1
|
||||
cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1
|
||||
list_flush_ipv4_exception ipv4: list and flush cached exceptions 1
|
||||
list_flush_ipv6_exception ipv6: list and flush cached exceptions 1"
|
||||
|
||||
NS_A="ns-A"
|
||||
|
@ -1207,6 +1212,61 @@ run_test_nh() {
|
|||
USE_NH=no
|
||||
}
|
||||
|
||||
test_list_flush_ipv4_exception() {
|
||||
setup namespaces routing || return 2
|
||||
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
|
||||
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
|
||||
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
|
||||
"${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
|
||||
|
||||
dst_prefix1="${prefix4}.${b_r1}."
|
||||
dst2="${prefix4}.${b_r2}.1"
|
||||
|
||||
# Set up initial MTU values
|
||||
mtu "${ns_a}" veth_A-R1 2000
|
||||
mtu "${ns_r1}" veth_R1-A 2000
|
||||
mtu "${ns_r1}" veth_R1-B 1500
|
||||
mtu "${ns_b}" veth_B-R1 1500
|
||||
|
||||
mtu "${ns_a}" veth_A-R2 2000
|
||||
mtu "${ns_r2}" veth_R2-A 2000
|
||||
mtu "${ns_r2}" veth_R2-B 1500
|
||||
mtu "${ns_b}" veth_B-R2 1500
|
||||
|
||||
fail=0
|
||||
|
||||
# Add 100 addresses for veth endpoint on B reached by default A route
|
||||
for i in $(seq 100 199); do
|
||||
run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
|
||||
done
|
||||
|
||||
# Create 100 cached route exceptions for path via R1, one via R2. Note
|
||||
# that with IPv4 we need to actually cause a route lookup that matches
|
||||
# the exception caused by ICMP, in order to actually have a cached
|
||||
# route, so we need to ping each destination twice
|
||||
for i in $(seq 100 199); do
|
||||
run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst_prefix1}${i}"
|
||||
done
|
||||
run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}"
|
||||
|
||||
# Each exception is printed as two lines
|
||||
if [ "$(${ns_a} ip route list cache | wc -l)" -ne 202 ]; then
|
||||
err " can't list cached exceptions"
|
||||
fail=1
|
||||
fi
|
||||
|
||||
run_cmd ${ns_a} ip route flush cache
|
||||
pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}1)"
|
||||
pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}2)"
|
||||
if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
|
||||
[ -n "$(${ns_a} ip route list cache)" ]; then
|
||||
err " can't flush cached exceptions"
|
||||
fail=1
|
||||
fi
|
||||
|
||||
return ${fail}
|
||||
}
|
||||
|
||||
test_list_flush_ipv6_exception() {
|
||||
setup namespaces routing || return 2
|
||||
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
|
||||
|
@ -1214,7 +1274,7 @@ test_list_flush_ipv6_exception() {
|
|||
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
|
||||
"${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
|
||||
|
||||
dst1="${prefix6}:${b_r1}::1"
|
||||
dst_prefix1="${prefix6}:${b_r1}::"
|
||||
dst2="${prefix6}:${b_r2}::1"
|
||||
|
||||
# Set up initial MTU values
|
||||
|
@ -1230,20 +1290,26 @@ test_list_flush_ipv6_exception() {
|
|||
|
||||
fail=0
|
||||
|
||||
# Create route exceptions
|
||||
run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
|
||||
run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
|
||||
# Add 100 addresses for veth endpoint on B reached by default A route
|
||||
for i in $(seq 100 199); do
|
||||
run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
|
||||
done
|
||||
|
||||
if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 2 ]; then
|
||||
# Create 100 cached route exceptions for path via R1, one via R2
|
||||
for i in $(seq 100 199); do
|
||||
run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}"
|
||||
done
|
||||
run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}"
|
||||
if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 101 ]; then
|
||||
err " can't list cached exceptions"
|
||||
fail=1
|
||||
fi
|
||||
|
||||
run_cmd ${ns_a} ip -6 route flush cache
|
||||
sleep 1
|
||||
pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
|
||||
pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst_prefix1}100")"
|
||||
pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
|
||||
if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ]; then
|
||||
if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
|
||||
[ -n "$(${ns_a} ip -6 route list cache)" ]; then
|
||||
err " can't flush cached exceptions"
|
||||
fail=1
|
||||
fi
|
||||
|
|
Loading…
Reference in New Issue