devlink: Introduce rate object

Allow registering rate object for devlink ports with dedicated
devlink_rate_leaf_{create|destroy}() API. Implement new netlink
DEVLINK_CMD_RATE_GET command that is used to retrieve rate object info.
Add new DEVLINK_CMD_RATE_{NEW|DEL} commands that are used for
notifications when creating/deleting leaf rate object.

Rate API is intended to be used for rate limiting of individual
devlink ports (leafs) and their aggregates (nodes).

Example:

$ devlink port show
pci/0000:03:00.0/0
pci/0000:03:00.0/1

$ devlink port function rate show
pci/0000:03:00.0/0: type leaf
pci/0000:03:00.0/1: type leaf

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Dmytro Linkin 2021-06-02 15:17:19 +03:00 committed by David S. Miller
parent 160dc373ee
commit 4677efc486
3 changed files with 253 additions and 1 deletions

View File

@ -34,6 +34,7 @@ struct devlink_ops;
struct devlink {
struct list_head list;
struct list_head port_list;
struct list_head rate_list;
struct list_head sb_list;
struct list_head dpipe_table_list;
struct list_head resource_list;
@ -133,6 +134,15 @@ struct devlink_port_attrs {
};
};
struct devlink_rate {
struct list_head list;
enum devlink_rate_type type;
struct devlink *devlink;
void *priv;
struct devlink_port *devlink_port;
};
struct devlink_port {
struct list_head list;
struct list_head param_list;
@ -152,6 +162,8 @@ struct devlink_port {
struct delayed_work type_warn_dw;
struct list_head reporter_list;
struct mutex reporters_lock; /* Protects reporter_list */
struct devlink_rate *devlink_rate;
};
struct devlink_port_new_attrs {
@ -1512,6 +1524,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
u32 controller, u16 pf, u32 sf,
bool external);
int devlink_rate_leaf_create(struct devlink_port *port, void *priv);
void devlink_rate_leaf_destroy(struct devlink_port *devlink_port);
int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,

View File

@ -126,6 +126,11 @@ enum devlink_command {
DEVLINK_CMD_HEALTH_REPORTER_TEST,
DEVLINK_CMD_RATE_GET, /* can dump */
DEVLINK_CMD_RATE_SET,
DEVLINK_CMD_RATE_NEW,
DEVLINK_CMD_RATE_DEL,
/* add new commands above here */
__DEVLINK_CMD_MAX,
DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@ -206,6 +211,10 @@ enum devlink_port_flavour {
*/
};
enum devlink_rate_type {
DEVLINK_RATE_TYPE_LEAF,
};
enum devlink_param_cmode {
DEVLINK_PARAM_CMODE_RUNTIME,
DEVLINK_PARAM_CMODE_DRIVERINIT,
@ -534,6 +543,8 @@ enum devlink_attr {
DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */
DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */
DEVLINK_ATTR_RATE_TYPE, /* u16 */
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,

View File

@ -190,6 +190,25 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
return devlink_port_get_from_attrs(devlink, info->attrs);
}
static inline bool
devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
{
return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
}
static struct devlink_rate *
devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
{
struct devlink_rate *devlink_rate;
struct devlink_port *devlink_port;
devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
if (IS_ERR(devlink_port))
return ERR_CAST(devlink_port);
devlink_rate = devlink_port->devlink_rate;
return devlink_rate ?: ERR_PTR(-ENODEV);
}
struct devlink_sb {
struct list_head list;
unsigned int index;
@ -408,12 +427,13 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
/* The per devlink instance lock is taken by default in the pre-doit
* operation, yet several commands do not require this. The global
* devlink lock is taken and protects from disruption by user-calls.
*/
#define DEVLINK_NL_FLAG_NO_LOCK BIT(2)
#define DEVLINK_NL_FLAG_NO_LOCK BIT(3)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
@ -442,6 +462,15 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
devlink_port = devlink_port_get_from_info(devlink, info);
if (!IS_ERR(devlink_port))
info->user_ptr[1] = devlink_port;
} else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
struct devlink_rate *devlink_rate;
devlink_rate = devlink_rate_leaf_get_from_info(devlink, info);
if (IS_ERR(devlink_rate)) {
err = PTR_ERR(devlink_rate);
goto unlock;
}
info->user_ptr[1] = devlink_rate;
}
return 0;
@ -749,6 +778,39 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
return 0;
}
static int devlink_nl_rate_fill(struct sk_buff *msg,
struct devlink *devlink,
struct devlink_rate *devlink_rate,
enum devlink_command cmd, u32 portid,
u32 seq, int flags,
struct netlink_ext_ack *extack)
{
void *hdr;
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
if (!hdr)
return -EMSGSIZE;
if (devlink_nl_put_handle(msg, devlink))
goto nla_put_failure;
if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
goto nla_put_failure;
if (devlink_rate_is_leaf(devlink_rate)) {
if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
devlink_rate->devlink_port->index))
goto nla_put_failure;
}
genlmsg_end(msg, hdr);
return 0;
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
}
static bool
devlink_port_fn_state_valid(enum devlink_port_fn_state state)
{
@ -920,6 +982,99 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
static void devlink_rate_notify(struct devlink_rate *devlink_rate,
enum devlink_command cmd)
{
struct devlink *devlink = devlink_rate->devlink;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_RATE_NEW &&
cmd != DEVLINK_CMD_RATE_DEL);
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
cmd, 0, 0, 0, NULL);
if (err) {
nlmsg_free(msg);
return;
}
genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
struct netlink_callback *cb)
{
struct devlink_rate *devlink_rate;
struct devlink *devlink;
int start = cb->args[0];
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
continue;
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
u32 id = NETLINK_CB(cb->skb).portid;
if (idx < start) {
idx++;
continue;
}
err = devlink_nl_rate_fill(msg, devlink,
devlink_rate,
cmd, id,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, NULL);
if (err) {
mutex_unlock(&devlink->lock);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
}
out:
mutex_unlock(&devlink_mutex);
if (err != -EMSGSIZE)
return err;
cb->args[0] = idx;
return msg->len;
}
static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink_rate *devlink_rate = info->user_ptr[1];
struct devlink *devlink = devlink_rate->devlink;
struct sk_buff *msg;
int err;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
DEVLINK_CMD_RATE_NEW,
info->snd_portid, info->snd_seq, 0,
info->extack);
if (err) {
nlmsg_free(msg);
return err;
}
return genlmsg_reply(msg, info);
}
static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
@ -7802,6 +7957,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
[DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
[DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
};
static const struct genl_small_ops devlink_nl_ops[] = {
@ -7827,6 +7983,13 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_RATE_GET,
.doit = devlink_nl_cmd_rate_get_doit,
.dumpit = devlink_nl_cmd_rate_get_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_PORT_SPLIT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@ -8202,6 +8365,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
__devlink_net_set(devlink, &init_net);
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->rate_list);
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
INIT_LIST_HEAD(&devlink->resource_list);
@ -8304,6 +8468,7 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->resource_list));
WARN_ON(!list_empty(&devlink->dpipe_table_list));
WARN_ON(!list_empty(&devlink->sb_list));
WARN_ON(!list_empty(&devlink->rate_list));
WARN_ON(!list_empty(&devlink->port_list));
xa_destroy(&devlink->snapshot_ids);
@ -8620,6 +8785,68 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
/**
* devlink_rate_leaf_create - create devlink rate leaf
*
* @devlink_port: devlink port object to create rate object on
* @priv: driver private data
*
* Create devlink rate object of type leaf on provided @devlink_port.
* Throws call trace if @devlink_port already has a devlink rate object.
*
* Context: Takes and release devlink->lock <mutex>.
*
* Return: -ENOMEM if failed to allocate rate object, 0 otherwise.
*/
int
devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
{
struct devlink *devlink = devlink_port->devlink;
struct devlink_rate *devlink_rate;
devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
if (!devlink_rate)
return -ENOMEM;
mutex_lock(&devlink->lock);
WARN_ON(devlink_port->devlink_rate);
devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
devlink_rate->devlink = devlink;
devlink_rate->devlink_port = devlink_port;
devlink_rate->priv = priv;
list_add_tail(&devlink_rate->list, &devlink->rate_list);
devlink_port->devlink_rate = devlink_rate;
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
mutex_unlock(&devlink->lock);
return 0;
}
EXPORT_SYMBOL_GPL(devlink_rate_leaf_create);
/**
* devlink_rate_leaf_destroy - destroy devlink rate leaf
*
* @devlink_port: devlink port linked to the rate object
*
* Context: Takes and release devlink->lock <mutex>.
*/
void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
{
struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
struct devlink *devlink = devlink_port->devlink;
if (!devlink_rate)
return;
mutex_lock(&devlink->lock);
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
list_del(&devlink_rate->list);
devlink_port->devlink_rate = NULL;
mutex_unlock(&devlink->lock);
kfree(devlink_rate);
}
EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
char *name, size_t len)
{