Merge branch 'nexthop-add-support-for-nexthop-objects-offload'

Ido Schimmel says:

====================
nexthop: Add support for nexthop objects offload

This patch set adds support for nexthop objects offload with a dummy
implementation over netdevsim. mlxsw support will be added later.

The general idea is very similar to route offload in that notifications
are sent whenever nexthop objects are changed. A listener can veto the
change and the error will be communicated to user space with extack.

To keep listeners as simple as possible, they not only receive
notifications for the nexthop object that is changed, but also for all
the other objects affected by this change. For example, when a single
nexthop is replaced, a replace notification is sent for the single
nexthop, but also for all the nexthop groups this nexthop is member in.
This relieves listeners from the need to track such dependencies.

To simplify things further for listeners, the notification info does not
contain the raw nexthop data structures (e.g., 'struct nexthop'), but
less complex data structures into which the raw data structures are
parsed into.

Tested with a new selftest over netdevsim and with fib_nexthops.sh:

Tests passed: 164
Tests failed:   0

Patch set overview:

Patches #1-#4 introduce the aforementioned data structures and convert
existing listeners (i.e., the VXLAN driver) to use them.

Patches #5-#6 add a new RTNH_F_TRAP flag and the ability to set it and
RTNH_F_OFFLOAD on nexthops. This flag is used by netdevsim for testing
purposes and will also be used by mlxsw. These flags are consistent with
the existing RTM_F_OFFLOAD and RTM_F_TRAP flags.

Patches #7-#14 gradually add the new nexthop notifications.

Patches #15-#18 add a dummy implementation for nexthop offload over
netdevsim and a selftest to exercise both good and bad flows.

Changes since RFC [1]:

Patch #1: s/is_encap/has_encap/
Patch #3: Add a blank line in __nh_notifier_single_info_init()
Patch #5: Reword commit message
Patch #6: s/nexthop_hw_flags_set/nexthop_set_hw_flags/
Patch #7: Reword commit message
Patch #11: Allocate extack on the stack

Follow-up patch sets:

selftests: forwarding: Add nexthop objects tests
mlxsw: Preparations for nexthop objects support - part 1/2
mlxsw: Preparations for nexthop objects support - part 2/2
mlxsw: Add support for nexthop objects
mlxsw: Add support for blackhole nexthops
mlxsw: Update adjacency index more efficiently

[1] https://lore.kernel.org/netdev/20200908091037.2709823-1-idosch@idosch.org/
====================

Link: https://lore.kernel.org/r/20201104133040.1125369-1-idosch@idosch.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2020-11-06 11:28:54 -08:00
commit 3b4202a470
12 changed files with 995 additions and 47 deletions

View File

@ -46,7 +46,7 @@ Resources
=========
The ``netdevsim`` driver exposes resources to control the number of FIB
entries and FIB rule entries that the driver will allow.
entries, FIB rule entries and nexthops that the driver will allow.
.. code:: shell
@ -54,6 +54,7 @@ entries and FIB rule entries that the driver will allow.
$ devlink resource set netdevsim/netdevsim0 path /IPv4/fib-rules size 16
$ devlink resource set netdevsim/netdevsim0 path /IPv6/fib size 64
$ devlink resource set netdevsim/netdevsim0 path /IPv6/fib-rules size 16
$ devlink resource set netdevsim/netdevsim0 path /nexthops size 16
$ devlink dev reload netdevsim/netdevsim0
Driver-specific Traps

View File

@ -324,6 +324,12 @@ static int nsim_dev_resources_register(struct devlink *devlink)
return err;
}
/* Resources for nexthops */
err = devlink_resource_register(devlink, "nexthops", (u64)-1,
NSIM_RESOURCE_NEXTHOPS,
DEVLINK_RESOURCE_ID_PARENT_TOP,
&params);
out:
return err;
}

View File

@ -25,6 +25,7 @@
#include <net/ip6_fib.h>
#include <net/fib_rules.h>
#include <net/net_namespace.h>
#include <net/nexthop.h>
#include "netdevsim.h"
@ -42,9 +43,12 @@ struct nsim_fib_data {
struct notifier_block fib_nb;
struct nsim_per_fib_data ipv4;
struct nsim_per_fib_data ipv6;
struct nsim_fib_entry nexthops;
struct rhashtable fib_rt_ht;
struct list_head fib_rt_list;
spinlock_t fib_lock; /* Protects hashtable, list and accounting */
struct notifier_block nexthop_nb;
struct rhashtable nexthop_ht;
struct devlink *devlink;
};
@ -86,6 +90,19 @@ static const struct rhashtable_params nsim_fib_rt_ht_params = {
.automatic_shrinking = true,
};
struct nsim_nexthop {
struct rhash_head ht_node;
u64 occ;
u32 id;
};
static const struct rhashtable_params nsim_nexthop_ht_params = {
.key_offset = offsetof(struct nsim_nexthop, id),
.head_offset = offsetof(struct nsim_nexthop, ht_node),
.key_len = sizeof(u32),
.automatic_shrinking = true,
};
u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
enum nsim_resource_id res_id, bool max)
{
@ -104,6 +121,9 @@ u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
case NSIM_RESOURCE_IPV6_FIB_RULES:
entry = &fib_data->ipv6.rules;
break;
case NSIM_RESOURCE_NEXTHOPS:
entry = &fib_data->nexthops;
break;
default:
return 0;
}
@ -129,6 +149,9 @@ static void nsim_fib_set_max(struct nsim_fib_data *fib_data,
case NSIM_RESOURCE_IPV6_FIB_RULES:
entry = &fib_data->ipv6.rules;
break;
case NSIM_RESOURCE_NEXTHOPS:
entry = &fib_data->nexthops;
break;
default:
WARN_ON(1);
return;
@ -389,11 +412,6 @@ static int nsim_fib4_event(struct nsim_fib_data *data,
fen_info = container_of(info, struct fib_entry_notifier_info, info);
if (fen_info->fi->nh) {
NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
return 0;
}
switch (event) {
case FIB_EVENT_ENTRY_REPLACE:
err = nsim_fib4_rt_insert(data, fen_info);
@ -704,11 +722,6 @@ static int nsim_fib6_event(struct nsim_fib_data *data,
fen6_info = container_of(info, struct fib6_entry_notifier_info, info);
if (fen6_info->rt->nh) {
NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported");
return 0;
}
if (fen6_info->rt->fib6_src.plen) {
NL_SET_ERR_MSG_MOD(info->extack, "IPv6 source-specific route is not supported");
return 0;
@ -838,6 +851,196 @@ static void nsim_fib_dump_inconsistent(struct notifier_block *nb)
data->ipv6.rules.num = 0ULL;
}
static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data,
struct nh_notifier_info *info)
{
struct nsim_nexthop *nexthop;
u64 occ = 0;
int i;
nexthop = kzalloc(sizeof(*nexthop), GFP_KERNEL);
if (!nexthop)
return NULL;
nexthop->id = info->id;
/* Determine the number of nexthop entries the new nexthop will
* occupy.
*/
if (!info->is_grp) {
occ = 1;
goto out;
}
for (i = 0; i < info->nh_grp->num_nh; i++)
occ += info->nh_grp->nh_entries[i].weight;
out:
nexthop->occ = occ;
return nexthop;
}
static void nsim_nexthop_destroy(struct nsim_nexthop *nexthop)
{
kfree(nexthop);
}
static int nsim_nexthop_account(struct nsim_fib_data *data, u64 occ,
bool add, struct netlink_ext_ack *extack)
{
int err = 0;
if (add) {
if (data->nexthops.num + occ <= data->nexthops.max) {
data->nexthops.num += occ;
} else {
err = -ENOSPC;
NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported nexthops");
}
} else {
if (WARN_ON(occ > data->nexthops.num))
return -EINVAL;
data->nexthops.num -= occ;
}
return err;
}
static int nsim_nexthop_add(struct nsim_fib_data *data,
struct nsim_nexthop *nexthop,
struct netlink_ext_ack *extack)
{
struct net *net = devlink_net(data->devlink);
int err;
err = nsim_nexthop_account(data, nexthop->occ, true, extack);
if (err)
return err;
err = rhashtable_insert_fast(&data->nexthop_ht, &nexthop->ht_node,
nsim_nexthop_ht_params);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to insert nexthop");
goto err_nexthop_dismiss;
}
nexthop_set_hw_flags(net, nexthop->id, false, true);
return 0;
err_nexthop_dismiss:
nsim_nexthop_account(data, nexthop->occ, false, extack);
return err;
}
static int nsim_nexthop_replace(struct nsim_fib_data *data,
struct nsim_nexthop *nexthop,
struct nsim_nexthop *nexthop_old,
struct netlink_ext_ack *extack)
{
struct net *net = devlink_net(data->devlink);
int err;
err = nsim_nexthop_account(data, nexthop->occ, true, extack);
if (err)
return err;
err = rhashtable_replace_fast(&data->nexthop_ht,
&nexthop_old->ht_node, &nexthop->ht_node,
nsim_nexthop_ht_params);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to replace nexthop");
goto err_nexthop_dismiss;
}
nexthop_set_hw_flags(net, nexthop->id, false, true);
nsim_nexthop_account(data, nexthop_old->occ, false, extack);
nsim_nexthop_destroy(nexthop_old);
return 0;
err_nexthop_dismiss:
nsim_nexthop_account(data, nexthop->occ, false, extack);
return err;
}
static int nsim_nexthop_insert(struct nsim_fib_data *data,
struct nh_notifier_info *info)
{
struct nsim_nexthop *nexthop, *nexthop_old;
int err;
nexthop = nsim_nexthop_create(data, info);
if (!nexthop)
return -ENOMEM;
nexthop_old = rhashtable_lookup_fast(&data->nexthop_ht, &info->id,
nsim_nexthop_ht_params);
if (!nexthop_old)
err = nsim_nexthop_add(data, nexthop, info->extack);
else
err = nsim_nexthop_replace(data, nexthop, nexthop_old,
info->extack);
if (err)
nsim_nexthop_destroy(nexthop);
return err;
}
static void nsim_nexthop_remove(struct nsim_fib_data *data,
struct nh_notifier_info *info)
{
struct nsim_nexthop *nexthop;
nexthop = rhashtable_lookup_fast(&data->nexthop_ht, &info->id,
nsim_nexthop_ht_params);
if (!nexthop)
return;
rhashtable_remove_fast(&data->nexthop_ht, &nexthop->ht_node,
nsim_nexthop_ht_params);
nsim_nexthop_account(data, nexthop->occ, false, info->extack);
nsim_nexthop_destroy(nexthop);
}
static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data,
nexthop_nb);
struct nh_notifier_info *info = ptr;
int err = 0;
ASSERT_RTNL();
switch (event) {
case NEXTHOP_EVENT_REPLACE:
err = nsim_nexthop_insert(data, info);
break;
case NEXTHOP_EVENT_DEL:
nsim_nexthop_remove(data, info);
break;
default:
break;
}
return notifier_from_errno(err);
}
static void nsim_nexthop_free(void *ptr, void *arg)
{
struct nsim_nexthop *nexthop = ptr;
struct nsim_fib_data *data = arg;
struct net *net;
net = devlink_net(data->devlink);
nexthop_set_hw_flags(net, nexthop->id, false, false);
nsim_nexthop_account(data, nexthop->occ, false, NULL);
nsim_nexthop_destroy(nexthop);
}
static u64 nsim_fib_ipv4_resource_occ_get(void *priv)
{
struct nsim_fib_data *data = priv;
@ -866,12 +1069,20 @@ static u64 nsim_fib_ipv6_rules_res_occ_get(void *priv)
return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB_RULES, false);
}
static u64 nsim_fib_nexthops_res_occ_get(void *priv)
{
struct nsim_fib_data *data = priv;
return nsim_fib_get_val(data, NSIM_RESOURCE_NEXTHOPS, false);
}
static void nsim_fib_set_max_all(struct nsim_fib_data *data,
struct devlink *devlink)
{
enum nsim_resource_id res_ids[] = {
NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES,
NSIM_RESOURCE_NEXTHOPS,
};
int i;
@ -897,20 +1108,32 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
return ERR_PTR(-ENOMEM);
data->devlink = devlink;
err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params);
if (err)
goto err_data_free;
spin_lock_init(&data->fib_lock);
INIT_LIST_HEAD(&data->fib_rt_list);
err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params);
if (err)
goto err_data_free;
goto err_rhashtable_nexthop_destroy;
nsim_fib_set_max_all(data, devlink);
data->nexthop_nb.notifier_call = nsim_nexthop_event_nb;
err = register_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb,
extack);
if (err) {
pr_err("Failed to register nexthop notifier\n");
goto err_rhashtable_fib_destroy;
}
data->fib_nb.notifier_call = nsim_fib_event_nb;
err = register_fib_notifier(devlink_net(devlink), &data->fib_nb,
nsim_fib_dump_inconsistent, extack);
if (err) {
pr_err("Failed to register fib notifier\n");
goto err_rhashtable_destroy;
goto err_nexthop_nb_unregister;
}
devlink_resource_occ_get_register(devlink,
@ -929,11 +1152,20 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
NSIM_RESOURCE_IPV6_FIB_RULES,
nsim_fib_ipv6_rules_res_occ_get,
data);
devlink_resource_occ_get_register(devlink,
NSIM_RESOURCE_NEXTHOPS,
nsim_fib_nexthops_res_occ_get,
data);
return data;
err_rhashtable_destroy:
err_nexthop_nb_unregister:
unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb);
err_rhashtable_fib_destroy:
rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free,
data);
err_rhashtable_nexthop_destroy:
rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free,
data);
err_data_free:
kfree(data);
return ERR_PTR(err);
@ -941,6 +1173,8 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data)
{
devlink_resource_occ_get_unregister(devlink,
NSIM_RESOURCE_NEXTHOPS);
devlink_resource_occ_get_unregister(devlink,
NSIM_RESOURCE_IPV6_FIB_RULES);
devlink_resource_occ_get_unregister(devlink,
@ -950,8 +1184,11 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data)
devlink_resource_occ_get_unregister(devlink,
NSIM_RESOURCE_IPV4_FIB);
unregister_fib_notifier(devlink_net(devlink), &data->fib_nb);
unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb);
rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free,
data);
rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free,
data);
WARN_ON_ONCE(!list_empty(&data->fib_rt_list));
kfree(data);
}

View File

@ -158,6 +158,7 @@ enum nsim_resource_id {
NSIM_RESOURCE_IPV6,
NSIM_RESOURCE_IPV6_FIB,
NSIM_RESOURCE_IPV6_FIB_RULES,
NSIM_RESOURCE_NEXTHOPS,
};
struct nsim_dev_health {

View File

@ -4684,9 +4684,14 @@ static void vxlan_fdb_nh_flush(struct nexthop *nh)
static int vxlan_nexthop_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct nexthop *nh = ptr;
struct nh_notifier_info *info = ptr;
struct nexthop *nh;
if (!nh || event != NEXTHOP_EVENT_DEL)
if (event != NEXTHOP_EVENT_DEL)
return NOTIFY_DONE;
nh = nexthop_find_by_id(info->net, info->id);
if (!nh)
return NOTIFY_DONE;
vxlan_fdb_nh_flush(nh);
@ -4706,7 +4711,8 @@ static __net_init int vxlan_init_net(struct net *net)
for (h = 0; h < PORT_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vn->sock_list[h]);
return register_nexthop_notifier(net, &vn->nexthop_notifier_block);
return register_nexthop_notifier(net, &vn->nexthop_notifier_block,
NULL);
}
static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)

View File

@ -105,11 +105,49 @@ struct nexthop {
};
enum nexthop_event_type {
NEXTHOP_EVENT_DEL
NEXTHOP_EVENT_DEL,
NEXTHOP_EVENT_REPLACE,
};
int register_nexthop_notifier(struct net *net, struct notifier_block *nb);
struct nh_notifier_single_info {
struct net_device *dev;
u8 gw_family;
union {
__be32 ipv4;
struct in6_addr ipv6;
};
u8 is_reject:1,
is_fdb:1,
has_encap:1;
};
struct nh_notifier_grp_entry_info {
u8 weight;
u32 id;
struct nh_notifier_single_info nh;
};
struct nh_notifier_grp_info {
u16 num_nh;
bool is_fdb;
struct nh_notifier_grp_entry_info nh_entries[];
};
struct nh_notifier_info {
struct net *net;
struct netlink_ext_ack *extack;
u32 id;
bool is_grp;
union {
struct nh_notifier_single_info *nh;
struct nh_notifier_grp_info *nh_grp;
};
};
int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
/* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id);

View File

@ -396,11 +396,13 @@ struct rtnexthop {
#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
#define RTNH_F_OFFLOAD 8 /* offloaded route */
#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */
#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */
#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */
#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */
#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | RTNH_F_OFFLOAD)
#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \
RTNH_F_OFFLOAD | RTNH_F_TRAP)
/* Macros to handle hexthops */

View File

@ -1644,6 +1644,8 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
*flags |= (nhc->nhc_flags & RTNH_F_ONLINK);
if (nhc->nhc_flags & RTNH_F_OFFLOAD)
*flags |= RTNH_F_OFFLOAD;
if (nhc->nhc_flags & RTNH_F_TRAP)
*flags |= RTNH_F_TRAP;
if (!skip_oif && nhc->nhc_dev &&
nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex))

View File

@ -2100,15 +2100,6 @@ static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
KEYLENGTH - fa->fa_slen, tb->tb_id,
info, NLM_F_REPLACE);
/* call_fib_entry_notifiers will be removed when
* in-kernel notifier is implemented and supported
* for nexthop objects
*/
call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
n->key,
KEYLENGTH - fa->fa_slen, fa,
NULL);
}
}
}

View File

@ -36,14 +36,145 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
[NHA_FDB] = { .type = NLA_FLAG },
};
static bool nexthop_notifiers_is_empty(struct net *net)
{
return !net->nexthop.notifier_chain.head;
}
static void
__nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
const struct nexthop *nh)
{
struct nh_info *nhi = rtnl_dereference(nh->nh_info);
nh_info->dev = nhi->fib_nhc.nhc_dev;
nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
if (nh_info->gw_family == AF_INET)
nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4;
else if (nh_info->gw_family == AF_INET6)
nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
nh_info->is_reject = nhi->reject_nh;
nh_info->is_fdb = nhi->fdb_nh;
nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
}
static int nh_notifier_single_info_init(struct nh_notifier_info *info,
const struct nexthop *nh)
{
info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
if (!info->nh)
return -ENOMEM;
__nh_notifier_single_info_init(info->nh, nh);
return 0;
}
static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
{
kfree(info->nh);
}
static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
const struct nexthop *nh)
{
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
u16 num_nh = nhg->num_nh;
int i;
info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh),
GFP_KERNEL);
if (!info->nh_grp)
return -ENOMEM;
info->nh_grp->num_nh = num_nh;
info->nh_grp->is_fdb = nhg->fdb_nh;
for (i = 0; i < num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
info->nh_grp->nh_entries[i].id = nhge->nh->id;
info->nh_grp->nh_entries[i].weight = nhge->weight;
__nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
nhge->nh);
}
return 0;
}
static void nh_notifier_grp_info_fini(struct nh_notifier_info *info)
{
kfree(info->nh_grp);
}
static int nh_notifier_info_init(struct nh_notifier_info *info,
const struct nexthop *nh)
{
info->id = nh->id;
info->is_grp = nh->is_group;
if (info->is_grp)
return nh_notifier_grp_info_init(info, nh);
else
return nh_notifier_single_info_init(info, nh);
}
static void nh_notifier_info_fini(struct nh_notifier_info *info)
{
if (info->is_grp)
nh_notifier_grp_info_fini(info);
else
nh_notifier_single_info_fini(info);
}
static int call_nexthop_notifiers(struct net *net,
enum nexthop_event_type event_type,
struct nexthop *nh)
struct nexthop *nh,
struct netlink_ext_ack *extack)
{
struct nh_notifier_info info = {
.net = net,
.extack = extack,
};
int err;
ASSERT_RTNL();
if (nexthop_notifiers_is_empty(net))
return 0;
err = nh_notifier_info_init(&info, nh);
if (err) {
NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
return err;
}
err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
event_type, nh);
event_type, &info);
nh_notifier_info_fini(&info);
return notifier_to_errno(err);
}
static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
enum nexthop_event_type event_type,
struct nexthop *nh,
struct netlink_ext_ack *extack)
{
struct nh_notifier_info info = {
.net = net,
.extack = extack,
};
int err;
err = nh_notifier_info_init(&info, nh);
if (err)
return err;
err = nb->notifier_call(nb, event_type, &info);
nh_notifier_info_fini(&info);
return notifier_to_errno(err);
}
@ -782,9 +913,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
{
struct nh_grp_entry *nhges, *new_nhges;
struct nexthop *nhp = nhge->nh_parent;
struct netlink_ext_ack extack;
struct nexthop *nh = nhge->nh;
struct nh_group *nhg, *newg;
int i, j;
int i, j, err;
WARN_ON(!nh);
@ -832,6 +964,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
list_del(&nhge->nh_list);
nexthop_put(nhge->nh);
err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack);
if (err)
pr_err("%s\n", extack._msg);
if (nlinfo)
nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
}
@ -907,7 +1043,7 @@ static void __remove_nexthop(struct net *net, struct nexthop *nh,
static void remove_nexthop(struct net *net, struct nexthop *nh,
struct nl_info *nlinfo)
{
call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh);
call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL);
/* remove from the tree */
rb_erase(&nh->rb_node, &net->nexthop.rb_root);
@ -940,13 +1076,17 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
struct netlink_ext_ack *extack)
{
struct nh_group *oldg, *newg;
int i;
int i, err;
if (!new->is_group) {
NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
return -EINVAL;
}
err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
if (err)
return err;
oldg = rtnl_dereference(old->nh_grp);
newg = rtnl_dereference(new->nh_grp);
@ -985,31 +1125,54 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old,
struct nexthop *new,
struct netlink_ext_ack *extack)
{
u8 old_protocol, old_nh_flags;
struct nh_info *oldi, *newi;
struct nh_grp_entry *nhge;
int err;
if (new->is_group) {
NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
return -EINVAL;
}
err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
if (err)
return err;
/* Hardware flags were set on 'old' as 'new' is not in the red-black
* tree. Therefore, inherit the flags from 'old' to 'new'.
*/
new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP);
oldi = rtnl_dereference(old->nh_info);
newi = rtnl_dereference(new->nh_info);
newi->nh_parent = old;
oldi->nh_parent = new;
old_protocol = old->protocol;
old_nh_flags = old->nh_flags;
old->protocol = new->protocol;
old->nh_flags = new->nh_flags;
rcu_assign_pointer(old->nh_info, newi);
rcu_assign_pointer(new->nh_info, oldi);
/* Send a replace notification for all the groups using the nexthop. */
list_for_each_entry(nhge, &old->grp_list, nh_list) {
struct nexthop *nhp = nhge->nh_parent;
err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
extack);
if (err)
goto err_notify;
}
/* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
* update IPv4 indication in all the groups using the nexthop.
*/
if (oldi->family == AF_INET && newi->family == AF_INET6) {
struct nh_grp_entry *nhge;
list_for_each_entry(nhge, &old->grp_list, nh_list) {
struct nexthop *nhp = nhge->nh_parent;
struct nh_group *nhg;
@ -1020,6 +1183,21 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old,
}
return 0;
err_notify:
rcu_assign_pointer(new->nh_info, newi);
rcu_assign_pointer(old->nh_info, oldi);
old->nh_flags = old_nh_flags;
old->protocol = old_protocol;
oldi->nh_parent = old;
newi->nh_parent = new;
list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
struct nexthop *nhp = nhge->nh_parent;
call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack);
}
call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
return err;
}
static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
@ -1168,7 +1346,11 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
rb_link_node_rcu(&new_nh->rb_node, parent, pp);
rb_insert_color(&new_nh->rb_node, root);
rc = 0;
rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
if (rc)
rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
out:
if (!rc) {
nh_base_seq_inc(net);
@ -1957,10 +2139,40 @@ static struct notifier_block nh_netdev_notifier = {
.notifier_call = nh_netdev_event,
};
int register_nexthop_notifier(struct net *net, struct notifier_block *nb)
static int nexthops_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
return blocking_notifier_chain_register(&net->nexthop.notifier_chain,
nb);
struct rb_root *root = &net->nexthop.rb_root;
struct rb_node *node;
int err = 0;
for (node = rb_first(root); node; node = rb_next(node)) {
struct nexthop *nh;
nh = rb_entry(node, struct nexthop, rb_node);
err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh,
extack);
if (err)
break;
}
return err;
}
int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
int err;
rtnl_lock();
err = nexthops_dump(net, nb, extack);
if (err)
goto unlock;
err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
nb);
unlock:
rtnl_unlock();
return err;
}
EXPORT_SYMBOL(register_nexthop_notifier);
@ -1971,6 +2183,27 @@ int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_nexthop_notifier);
void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap)
{
struct nexthop *nexthop;
rcu_read_lock();
nexthop = nexthop_find_by_id(net, id);
if (!nexthop)
goto out;
nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
if (offload)
nexthop->nh_flags |= RTNH_F_OFFLOAD;
if (trap)
nexthop->nh_flags |= RTNH_F_TRAP;
out:
rcu_read_unlock();
}
EXPORT_SYMBOL(nexthop_set_hw_flags);
static void __net_exit nexthop_net_exit(struct net *net)
{
rtnl_lock();

View File

@ -6039,11 +6039,6 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
struct sk_buff *skb;
int err = -ENOBUFS;
/* call_fib6_entry_notifiers will be removed when in-kernel notifier
* is implemented and supported for nexthop objects
*/
call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
if (!skb)
goto errout;

View File

@ -0,0 +1,436 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# This test is for checking the nexthop offload API. It makes use of netdevsim
# which registers a listener to the nexthop notification chain.
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
nexthop_single_add_test
nexthop_single_add_err_test
nexthop_group_add_test
nexthop_group_add_err_test
nexthop_group_replace_test
nexthop_group_replace_err_test
nexthop_single_replace_test
nexthop_single_replace_err_test
nexthop_single_in_group_replace_test
nexthop_single_in_group_replace_err_test
nexthop_single_in_group_delete_test
nexthop_single_in_group_delete_err_test
nexthop_replay_test
nexthop_replay_err_test
"
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
NUM_NETIFS=0
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
nexthop_check()
{
local nharg="$1"; shift
local expected="$1"; shift
out=$($IP nexthop show ${nharg} | sed -e 's/ *$//')
if [[ "$out" != "$expected" ]]; then
return 1
fi
return 0
}
nexthop_resource_check()
{
local expected_occ=$1; shift
occ=$($DEVLINK -jp resource show $DEVLINK_DEV \
| jq '.[][][] | select(.name=="nexthops") | .["occ"]')
if [ $expected_occ -ne $occ ]; then
return 1
fi
return 0
}
nexthop_resource_set()
{
local size=$1; shift
$DEVLINK resource set $DEVLINK_DEV path nexthops size $size
$DEVLINK dev reload $DEVLINK_DEV
}
nexthop_single_add_test()
{
RET=0
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
check_err $? "Unexpected nexthop entry"
nexthop_resource_check 1
check_err $? "Wrong nexthop occupancy"
$IP nexthop del id 1
nexthop_resource_check 0
check_err $? "Wrong nexthop occupancy after delete"
log_test "Single nexthop add and delete"
}
nexthop_single_add_err_test()
{
RET=0
nexthop_resource_set 1
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null
check_fail $? "Nexthop addition succeeded when should fail"
nexthop_resource_check 1
check_err $? "Wrong nexthop occupancy"
log_test "Single nexthop add failure"
$IP nexthop flush &> /dev/null
nexthop_resource_set 9999
}
nexthop_group_add_test()
{
RET=0
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop add id 2 via 192.0.2.3 dev dummy1
$IP nexthop add id 10 group 1/2
nexthop_check "id 10" "id 10 group 1/2 trap"
check_err $? "Unexpected nexthop group entry"
nexthop_resource_check 4
check_err $? "Wrong nexthop occupancy"
$IP nexthop del id 10
nexthop_resource_check 2
check_err $? "Wrong nexthop occupancy after delete"
$IP nexthop add id 10 group 1,20/2,39
nexthop_check "id 10" "id 10 group 1,20/2,39 trap"
check_err $? "Unexpected weighted nexthop group entry"
nexthop_resource_check 61
check_err $? "Wrong weighted nexthop occupancy"
$IP nexthop del id 10
nexthop_resource_check 2
check_err $? "Wrong nexthop occupancy after delete"
log_test "Nexthop group add and delete"
$IP nexthop flush &> /dev/null
}
nexthop_group_add_err_test()
{
RET=0
nexthop_resource_set 2
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop add id 2 via 192.0.2.3 dev dummy1
$IP nexthop add id 10 group 1/2 &> /dev/null
check_fail $? "Nexthop group addition succeeded when should fail"
nexthop_resource_check 2
check_err $? "Wrong nexthop occupancy"
log_test "Nexthop group add failure"
$IP nexthop flush &> /dev/null
nexthop_resource_set 9999
}
nexthop_group_replace_test()
{
RET=0
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop add id 2 via 192.0.2.3 dev dummy1
$IP nexthop add id 3 via 192.0.2.4 dev dummy1
$IP nexthop add id 10 group 1/2
$IP nexthop replace id 10 group 1/2/3
nexthop_check "id 10" "id 10 group 1/2/3 trap"
check_err $? "Unexpected nexthop group entry"
nexthop_resource_check 6
check_err $? "Wrong nexthop occupancy"
log_test "Nexthop group replace"
$IP nexthop flush &> /dev/null
}
nexthop_group_replace_err_test()
{
RET=0
nexthop_resource_set 5
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop add id 2 via 192.0.2.3 dev dummy1
$IP nexthop add id 3 via 192.0.2.4 dev dummy1
$IP nexthop add id 10 group 1/2
$IP nexthop replace id 10 group 1/2/3 &> /dev/null
check_fail $? "Nexthop group replacement succeeded when should fail"
nexthop_check "id 10" "id 10 group 1/2 trap"
check_err $? "Unexpected nexthop group entry after failure"
nexthop_resource_check 5
check_err $? "Wrong nexthop occupancy after failure"
log_test "Nexthop group replace failure"
$IP nexthop flush &> /dev/null
nexthop_resource_set 9999
}
nexthop_single_replace_test()
{
RET=0
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop replace id 1 via 192.0.2.3 dev dummy1
nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap"
check_err $? "Unexpected nexthop entry"
nexthop_resource_check 1
check_err $? "Wrong nexthop occupancy"
log_test "Single nexthop replace"
$IP nexthop flush &> /dev/null
}
nexthop_single_replace_err_test()
{
RET=0
# This is supposed to cause the replace to fail because the new nexthop
# is programmed before deleting the replaced one.
nexthop_resource_set 1
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null
check_fail $? "Nexthop replace succeeded when should fail"
nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
check_err $? "Unexpected nexthop entry after failure"
nexthop_resource_check 1
check_err $? "Wrong nexthop occupancy after failure"
log_test "Single nexthop replace failure"
$IP nexthop flush &> /dev/null
nexthop_resource_set 9999
}
nexthop_single_in_group_replace_test()
{
RET=0
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop add id 2 via 192.0.2.3 dev dummy1
$IP nexthop add id 10 group 1/2
$IP nexthop replace id 1 via 192.0.2.4 dev dummy1
check_err $? "Failed to replace nexthop when should not"
nexthop_check "id 10" "id 10 group 1/2 trap"
check_err $? "Unexpected nexthop group entry"
nexthop_resource_check 4
check_err $? "Wrong nexthop occupancy"
log_test "Single nexthop replace while in group"
$IP nexthop flush &> /dev/null
}
nexthop_single_in_group_replace_err_test()
{
RET=0
nexthop_resource_set 5
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop add id 2 via 192.0.2.3 dev dummy1
$IP nexthop add id 10 group 1/2
$IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
check_fail $? "Nexthop replacement succeeded when should fail"
nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
check_err $? "Unexpected nexthop entry after failure"
nexthop_check "id 10" "id 10 group 1/2 trap"
check_err $? "Unexpected nexthop group entry after failure"
nexthop_resource_check 4
check_err $? "Wrong nexthop occupancy"
log_test "Single nexthop replace while in group failure"
$IP nexthop flush &> /dev/null
nexthop_resource_set 9999
}
nexthop_single_in_group_delete_test()
{
RET=0
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop add id 2 via 192.0.2.3 dev dummy1
$IP nexthop add id 10 group 1/2
$IP nexthop del id 1
nexthop_check "id 10" "id 10 group 2 trap"
check_err $? "Unexpected nexthop group entry"
nexthop_resource_check 2
check_err $? "Wrong nexthop occupancy"
log_test "Single nexthop delete while in group"
$IP nexthop flush &> /dev/null
}
nexthop_single_in_group_delete_err_test()
{
RET=0
# First, nexthop 1 will be deleted, which will reduce the occupancy to
# 5. Afterwards, a replace notification will be sent for nexthop group
# 10 with only two nexthops. Since the new group is allocated before
# the old is deleted, the replacement will fail as it will result in an
# occupancy of 7.
nexthop_resource_set 6
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop add id 2 via 192.0.2.3 dev dummy1
$IP nexthop add id 3 via 192.0.2.4 dev dummy1
$IP nexthop add id 10 group 1/2/3
$IP nexthop del id 1
nexthop_resource_check 5
check_err $? "Wrong nexthop occupancy"
log_test "Single nexthop delete while in group failure"
$IP nexthop flush &> /dev/null
nexthop_resource_set 9999
}
nexthop_replay_test()
{
RET=0
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop add id 2 via 192.0.2.3 dev dummy1
$IP nexthop add id 10 group 1/2
$DEVLINK dev reload $DEVLINK_DEV
check_err $? "Failed to reload when should not"
nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
check_err $? "Unexpected nexthop entry after reload"
nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap"
check_err $? "Unexpected nexthop entry after reload"
nexthop_check "id 10" "id 10 group 1/2 trap"
check_err $? "Unexpected nexthop group entry after reload"
nexthop_resource_check 4
check_err $? "Wrong nexthop occupancy"
log_test "Nexthop replay"
$IP nexthop flush &> /dev/null
}
nexthop_replay_err_test()
{
RET=0
$IP nexthop add id 1 via 192.0.2.2 dev dummy1
$IP nexthop add id 2 via 192.0.2.3 dev dummy1
$IP nexthop add id 10 group 1/2
# Reduce size of nexthop resource so that reload will fail.
$DEVLINK resource set $DEVLINK_DEV path nexthops size 3
$DEVLINK dev reload $DEVLINK_DEV &> /dev/null
check_fail $? "Reload succeeded when should fail"
$DEVLINK resource set $DEVLINK_DEV path nexthops size 9999
$DEVLINK dev reload $DEVLINK_DEV
check_err $? "Failed to reload when should not"
log_test "Nexthop replay failure"
$IP nexthop flush &> /dev/null
}
setup_prepare()
{
local netdev
modprobe netdevsim &> /dev/null
echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
while [ ! -d $SYSFS_NET_DIR ] ; do :; done
set -e
ip netns add testns1
devlink dev reload $DEVLINK_DEV netns testns1
IP="ip -netns testns1"
DEVLINK="devlink -N testns1"
$IP link add name dummy1 up type dummy
$IP address add 192.0.2.1/24 dev dummy1
set +e
}
cleanup()
{
pre_cleanup
ip netns del testns1
echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
modprobe -r netdevsim &> /dev/null
}
trap cleanup EXIT
setup_prepare
tests_run
exit $EXIT_STATUS