mlx5-update-2018-02-23 (IB representors)

From: Mark Bloch <markb@mellanox.com>
 =========
 Add IB representor when in switchdev mode
 
 The following series adds support for an IB (RAW Ethernet only) device
 representor which is created when the user switches to switchdev mode.
 
 Today when switching to switchdev mode the only representors which are
 created are net devices. Each netdev is a representor of a virtual
 function and any data sent via the representor is received on the virtual
 function, and any data sent via the virtual function is received by the
 representor.
 
 For the mlx5 driver the main use of this functionality is to be able to
 use Open vSwitch on the hypervisor in order to manage/control traffic
 from/to the virtual functions. Open vSwitch can also work with  DPDK
 devices and not just net devices, this series exposes an IB device, which
 Mellanox PMD driver uses, which then can be used by Open vSwitch DPDK.
 
 An IB device representor exposes only RAW Ethernet QP capabilities and
 the ability to create flow rules to direct traffic to its RX queues. The
 state of the IB device (ACTIVE/DOWN etc..) is based on the state of the
 corresponding net device representor. No other RDMA/RoCE functionality is
 currently supported and no GID table is exposed.
 =========
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJakH7zAAoJEEg/ir3gV/o+c/MIAMGGgNajr49+JP3t9wnrs011
 +cTfAfM88HBzTlfb/COEBz+jurH2oB7ZF4RZC29S+6pR3loKKBuvbiPndE0XKjSg
 Ue4sOkawybmDvfo9ZiMsusOiMfTp5wsLmqJP1HRUvGMAlSBeriMTZfbiKzx5c3Ok
 X8cMnRIvUOtCoQaJTfKarDUn4OF8aFam4tQW8k/RAo77kTPyihb1NlGiblrcCA2E
 PWYAOWW3D8gvE0cr19JVgEqpKIaJ/VRyjwQ7m8XSvfBJtw1ZTO6YMXiXbWMOsRzD
 fx33H+n/qwJT0cnxDmSpZrR7mEk+Wr2HL92O85KDupOSgLOIlywmtIIkEAnCeaw=
 =Fq6m
 -----END PGP SIGNATURE-----

Merge tag 'mlx5-updates-2018-02-23' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux

Saeed Mahameed says:

mlx5-update-2018-02-23 (IB representors)

From: Mark Bloch <markb@mellanox.com>
=========
Add IB representor when in switchdev mode

The following series adds support for an IB (RAW Ethernet only) device
representor which is created when the user switches to switchdev mode.

Today when switching to switchdev mode the only representors which are
created are net devices. Each netdev is a representor of a virtual
function and any data sent via the representor is received on the virtual
function, and any data sent via the virtual function is received by the
representor.

For the mlx5 driver the main use of this functionality is to be able to
use Open vSwitch on the hypervisor in order to manage/control traffic
from/to the virtual functions. Open vSwitch can also work with  DPDK
devices and not just net devices, this series exposes an IB device, which
Mellanox PMD driver uses, which then can be used by Open vSwitch DPDK.

An IB device representor exposes only RAW Ethernet QP capabilities and
the ability to create flow rules to direct traffic to its RX queues. The
state of the IB device (ACTIVE/DOWN etc..) is based on the state of the
corresponding net device representor. No other RDMA/RoCE functionality is
currently supported and no GID table is exposed.
=========

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-02-28 09:54:54 -05:00
commit fb66cb0775
15 changed files with 796 additions and 147 deletions

View File

@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o

View File

@ -0,0 +1,189 @@
/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
/*
* Copyright (c) 2018 Mellanox Technologies. All rights reserved.
*/
#include "ib_rep.h"
static const struct mlx5_ib_profile rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
mlx5_ib_stage_rep_flow_db_init,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
mlx5_ib_stage_rep_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_rep_roce_init,
mlx5_ib_stage_rep_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_stage_counters_init,
mlx5_ib_stage_counters_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
mlx5_ib_stage_bfrag_init,
mlx5_ib_stage_bfrag_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
mlx5_ib_stage_umr_res_init,
mlx5_ib_stage_umr_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
mlx5_ib_stage_class_attr_init,
NULL),
};
static int
mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
return 0;
}
static void
mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
{
rep->rep_if[REP_IB].priv = NULL;
}
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
struct mlx5_ib_dev *ibdev;
ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
if (!ibdev)
return -ENOMEM;
ibdev->rep = rep;
ibdev->mdev = dev;
ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
MLX5_CAP_GEN(dev, num_vhca_ports));
if (!__mlx5_ib_add(ibdev, &rep_profile))
return -EINVAL;
rep->rep_if[REP_IB].priv = ibdev;
return 0;
}
static void
mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
{
struct mlx5_ib_dev *dev;
if (!rep->rep_if[REP_IB].priv)
return;
dev = mlx5_ib_rep_to_dev(rep);
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
rep->rep_if[REP_IB].priv = NULL;
}
static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
{
return mlx5_ib_rep_to_dev(rep);
}
static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
{
struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
int vport;
for (vport = 1; vport < total_vfs; vport++) {
struct mlx5_eswitch_rep_if rep_if = {};
rep_if.load = mlx5_ib_vport_rep_load;
rep_if.unload = mlx5_ib_vport_rep_unload;
rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
}
}
static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
{
struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
int vport;
for (vport = 1; vport < total_vfs; vport++)
mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
}
void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
{
struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
struct mlx5_eswitch_rep_if rep_if = {};
rep_if.load = mlx5_ib_nic_rep_load;
rep_if.unload = mlx5_ib_nic_rep_unload;
rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
rep_if.priv = dev;
mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
mlx5_ib_rep_register_vf_vports(dev);
}
void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
{
struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
}
u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
{
return mlx5_eswitch_mode(esw);
}
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
int vport_index)
{
return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
}
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
int vport_index)
{
return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
}
struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
{
return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
}
struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
{
return mlx5_eswitch_vport_rep(esw, vport);
}
int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq)
{
struct mlx5_flow_handle *flow_rule;
struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
if (!dev->rep)
return 0;
flow_rule =
mlx5_eswitch_add_send_to_vport_rule(esw,
dev->rep->vport,
sq->base.mqp.qpn);
if (IS_ERR(flow_rule))
return PTR_ERR(flow_rule);
sq->flow_rule = flow_rule;
return 0;
}

View File

@ -0,0 +1,72 @@
/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
/*
* Copyright (c) 2018 Mellanox Technologies. All rights reserved.
*/
#ifndef __MLX5_IB_REP_H__
#define __MLX5_IB_REP_H__
#include <linux/mlx5/eswitch.h>
#include "mlx5_ib.h"
#ifdef CONFIG_MLX5_ESWITCH
u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
int vport_index);
struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
int vport_index);
void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq);
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
int vport_index);
#else /* CONFIG_MLX5_ESWITCH */
static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
{
return SRIOV_NONE;
}
static inline
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
int vport_index)
{
return NULL;
}
static inline
struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
{
return NULL;
}
static inline
struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
int vport_index)
{
return NULL;
}
static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq)
{
return 0;
}
static inline
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
int vport_index)
{
return NULL;
}
#endif
static inline
struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
{
return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
}
#endif /* __MLX5_IB_REP_H__ */

View File

@ -57,6 +57,7 @@
#include <linux/in.h>
#include <linux/etherdevice.h>
#include "mlx5_ib.h"
#include "ib_rep.h"
#include "cmd.h"
#define DRIVER_NAME "mlx5_ib"
@ -130,7 +131,7 @@ static int get_port_state(struct ib_device *ibdev,
int ret;
memset(&attr, 0, sizeof(attr));
ret = mlx5_ib_query_port(ibdev, port_num, &attr);
ret = ibdev->query_port(ibdev, port_num, &attr);
if (!ret)
*state = attr.state;
return ret;
@ -154,10 +155,19 @@ static int mlx5_netdev_event(struct notifier_block *this,
case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
write_lock(&roce->netdev_lock);
if (ibdev->rep) {
struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
struct net_device *rep_ndev;
if (ndev->dev.parent == &mdev->pdev->dev)
roce->netdev = (event == NETDEV_UNREGISTER) ?
rep_ndev = mlx5_ib_get_rep_netdev(esw,
ibdev->rep->vport);
if (rep_ndev == ndev)
roce->netdev = (event == NETDEV_UNREGISTER) ?
NULL : ndev;
} else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
roce->netdev = (event == NETDEV_UNREGISTER) ?
NULL : ndev;
}
write_unlock(&roce->netdev_lock);
break;
@ -1268,6 +1278,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
return ret;
}
static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
int ret;
/* Only link layer == ethernet is valid for representors */
ret = mlx5_query_port_roce(ibdev, port, props);
if (ret || !props)
return ret;
/* We don't support GIDS */
props->gid_tbl_len = 0;
return ret;
}
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
@ -2631,7 +2657,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
ibflow);
struct mlx5_ib_flow_handler *iter, *tmp;
mutex_lock(&dev->flow_db.lock);
mutex_lock(&dev->flow_db->lock);
list_for_each_entry_safe(iter, tmp, &handler->list, list) {
mlx5_del_flow_rules(iter->rule);
@ -2642,7 +2668,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
mlx5_del_flow_rules(handler->rule);
put_flow_table(dev, handler->prio, true);
mutex_unlock(&dev->flow_db.lock);
mutex_unlock(&dev->flow_db->lock);
kfree(handler);
@ -2691,7 +2717,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
prio = &dev->flow_db.prios[priority];
prio = &dev->flow_db->prios[priority];
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
ns = mlx5_get_flow_namespace(dev->mdev,
@ -2699,7 +2725,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
build_leftovers_ft_param(&priority,
&num_entries,
&num_groups);
prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
} else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
allow_sniffer_and_nic_rx_shared_tir))
@ -2709,7 +2735,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
MLX5_FLOW_NAMESPACE_SNIFFER_RX :
MLX5_FLOW_NAMESPACE_SNIFFER_TX);
prio = &dev->flow_db.sniffer[ft_type];
prio = &dev->flow_db->sniffer[ft_type];
priority = 0;
num_entries = 1;
num_groups = 1;
@ -2802,6 +2828,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
if (dev->rep) {
void *misc;
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
misc_parameters);
MLX5_SET(fte_match_set_misc, misc, source_port,
dev->rep->vport);
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
misc_parameters);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
if (is_drop) {
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
@ -2999,7 +3037,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
if (!dst)
return ERR_PTR(-ENOMEM);
mutex_lock(&dev->flow_db.lock);
mutex_lock(&dev->flow_db->lock);
ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
if (IS_ERR(ft_prio)) {
@ -3048,7 +3086,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
goto destroy_ft;
}
mutex_unlock(&dev->flow_db.lock);
mutex_unlock(&dev->flow_db->lock);
kfree(dst);
return &handler->ibflow;
@ -3058,7 +3096,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
if (ft_prio_tx)
put_flow_table(dev, ft_prio_tx, false);
unlock:
mutex_unlock(&dev->flow_db.lock);
mutex_unlock(&dev->flow_db->lock);
kfree(dst);
kfree(handler);
return ERR_PTR(err);
@ -3772,6 +3810,25 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
int err;
immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
return 0;
}
static void get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct mlx5_ib_dev *dev =
@ -3802,7 +3859,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
goto err_destroy_vport_lag;
}
dev->flow_db.lag_demux_ft = ft;
dev->flow_db->lag_demux_ft = ft;
return 0;
err_destroy_vport_lag:
@ -3814,9 +3871,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
if (dev->flow_db.lag_demux_ft) {
mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
dev->flow_db.lag_demux_ft = NULL;
if (dev->flow_db->lag_demux_ft) {
mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
dev->flow_db->lag_demux_ft = NULL;
mlx5_cmd_destroy_vport_lag(mdev);
}
@ -3848,14 +3905,10 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
{
int err;
err = mlx5_add_netdev_notifier(dev, port_num);
if (err)
return err;
if (MLX5_CAP_GEN(dev->mdev, roce)) {
err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err)
goto err_unregister_netdevice_notifier;
return err;
}
err = mlx5_eth_lag_init(dev);
@ -3868,8 +3921,6 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
if (MLX5_CAP_GEN(dev->mdev, roce))
mlx5_nic_vport_disable_roce(dev->mdev);
err_unregister_netdevice_notifier:
mlx5_remove_netdev_notifier(dev, port_num);
return err;
}
@ -4503,7 +4554,7 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@ -4512,7 +4563,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
kfree(dev->port);
}
static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
const char *name;
@ -4564,7 +4615,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->mdev->priv.eq_table.num_comp_vectors;
dev->ib_dev.dev.parent = &mdev->pdev->dev;
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
@ -4585,7 +4635,38 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
return -ENOMEM;
}
static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
{
dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
if (!dev->flow_db)
return -ENOMEM;
mutex_init(&dev->flow_db->lock);
return 0;
}
int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_dev *nic_dev;
nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
if (!nic_dev)
return -EINVAL;
dev->flow_db = nic_dev->flow_db;
return 0;
}
static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
{
kfree(dev->flow_db);
}
int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
int err;
@ -4626,7 +4707,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
dev->ib_dev.add_gid = mlx5_ib_add_gid;
@ -4669,7 +4749,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
@ -4720,6 +4799,80 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
return 0;
}
static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
{
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.query_port = mlx5_ib_query_port;
return 0;
}
int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
{
dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable;
dev->ib_dev.query_port = mlx5_ib_rep_query_port;
return 0;
}
static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
u8 port_num)
{
int i;
for (i = 0; i < dev->num_ports; i++) {
dev->roce[i].dev = dev;
dev->roce[i].native_port_num = i + 1;
dev->roce[i].last_port_state = IB_PORT_DOWN;
}
dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.create_wq = mlx5_ib_create_wq;
dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
return mlx5_add_netdev_notifier(dev, port_num);
}
static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
{
u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
mlx5_remove_netdev_notifier(dev, port_num);
}
int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
int err = 0;
u8 port_num;
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET)
err = mlx5_ib_stage_common_roce_init(dev, port_num);
return err;
}
void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_stage_common_roce_cleanup(dev);
}
static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
@ -4727,37 +4880,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
int port_type_cap;
u8 port_num;
int err;
int i;
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
for (i = 0; i < dev->num_ports; i++) {
dev->roce[i].dev = dev;
dev->roce[i].native_port_num = i + 1;
dev->roce[i].last_port_state = IB_PORT_DOWN;
}
dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.create_wq = mlx5_ib_create_wq;
dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
err = mlx5_enable_eth(dev, port_num);
err = mlx5_ib_stage_common_roce_init(dev, port_num);
if (err)
return err;
err = mlx5_enable_eth(dev, port_num);
if (err)
goto cleanup;
}
return 0;
cleanup:
mlx5_ib_stage_common_roce_cleanup(dev);
return err;
}
static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
@ -4773,16 +4915,16 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
if (ll == IB_LINK_LAYER_ETHERNET) {
mlx5_disable_eth(dev);
mlx5_remove_netdev_notifier(dev, port_num);
mlx5_ib_stage_common_roce_cleanup(dev);
}
}
static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
{
return create_dev_resources(&dev->devr);
}
static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
destroy_dev_resources(&dev->devr);
}
@ -4794,7 +4936,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
return mlx5_ib_odp_init_one(dev);
}
static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
@ -4806,7 +4948,7 @@ static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
return 0;
}
static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
@ -4837,7 +4979,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
}
static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
{
int err;
@ -4852,28 +4994,28 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
return err;
}
static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
return ib_register_device(&dev->ib_dev, NULL);
}
static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
{
ib_unregister_device(&dev->ib_dev);
}
static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
{
return create_umr_res(dev);
}
static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
{
destroy_umrc_res(dev);
}
@ -4890,7 +5032,7 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
cancel_delay_drop(dev);
}
static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
{
int err;
int i;
@ -4905,9 +5047,21 @@ static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
return 0;
}
static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage)
static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
{
mlx5_ib_register_vport_reps(dev);
return 0;
}
static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_unregister_vport_reps(dev);
}
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage)
{
/* Number of stages to cleanup */
while (stage) {
@ -4921,23 +5075,14 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
const struct mlx5_ib_profile *profile)
void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile)
{
struct mlx5_ib_dev *dev;
int err;
int i;
printk_once(KERN_INFO "%s", mlx5_version);
dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
if (!dev)
return NULL;
dev->mdev = mdev;
dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
MLX5_CAP_GEN(mdev, num_vhca_ports));
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
err = profile->stage[i].init(dev);
@ -4961,9 +5106,15 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
mlx5_ib_stage_flow_db_init,
mlx5_ib_stage_flow_db_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
mlx5_ib_stage_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_roce_init,
mlx5_ib_stage_roce_cleanup),
@ -4999,6 +5150,48 @@ static const struct mlx5_ib_profile pf_profile = {
NULL),
};
static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
mlx5_ib_stage_flow_db_init,
mlx5_ib_stage_flow_db_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
mlx5_ib_stage_rep_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_rep_roce_init,
mlx5_ib_stage_rep_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_stage_counters_init,
mlx5_ib_stage_counters_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_UAR,
mlx5_ib_stage_uar_init,
mlx5_ib_stage_uar_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
mlx5_ib_stage_bfrag_init,
mlx5_ib_stage_bfrag_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
mlx5_ib_stage_umr_res_init,
mlx5_ib_stage_umr_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
mlx5_ib_stage_class_attr_init,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
mlx5_ib_stage_rep_reg_init,
mlx5_ib_stage_rep_reg_cleanup),
};
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
{
struct mlx5_ib_multiport_info *mpi;
@ -5044,8 +5237,11 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
enum rdma_link_layer ll;
struct mlx5_ib_dev *dev;
int port_type_cap;
printk_once(KERN_INFO "%s", mlx5_version);
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
@ -5055,7 +5251,22 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
return mlx5_ib_add_slave_port(mdev, port_num);
}
return __mlx5_ib_add(mdev, &pf_profile);
dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
if (!dev)
return NULL;
dev->mdev = mdev;
dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
MLX5_CAP_GEN(mdev, num_vhca_ports));
if (MLX5_VPORT_MANAGER(mdev) &&
mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
return __mlx5_ib_add(dev, &nic_rep_profile);
}
return __mlx5_ib_add(dev, &pf_profile);
}
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)

View File

@ -343,6 +343,7 @@ struct mlx5_ib_sq {
struct mlx5_ib_wq *sq;
struct mlx5_ib_ubuffer ubuffer;
struct mlx5_db *doorbell;
struct mlx5_flow_handle *flow_rule;
u32 tisn;
u8 state;
};
@ -731,7 +732,9 @@ struct mlx5_ib_delay_drop {
enum mlx5_ib_stages {
MLX5_IB_STAGE_INIT,
MLX5_IB_STAGE_FLOW_DB,
MLX5_IB_STAGE_CAPS,
MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
MLX5_IB_STAGE_DEVICE_RESOURCES,
MLX5_IB_STAGE_ODP,
@ -743,6 +746,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_UMR_RESOURCES,
MLX5_IB_STAGE_DELAY_DROP,
MLX5_IB_STAGE_CLASS_ATTR,
MLX5_IB_STAGE_REP_REG,
MLX5_IB_STAGE_MAX,
};
@ -797,7 +801,7 @@ struct mlx5_ib_dev {
struct srcu_struct mr_srcu;
u32 null_mkey;
#endif
struct mlx5_ib_flow_db flow_db;
struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
@ -807,6 +811,7 @@ struct mlx5_ib_dev {
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
struct mlx5_eswitch_rep *rep;
/* protect the user_td */
struct mutex lb_mutex;
@ -1049,6 +1054,31 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
/* Needed for rep profile */
int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev);
void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev);
void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev);
void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev);
void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev);
void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev);
void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage);
void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile);
int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
u8 port, struct ifla_vf_info *info);
int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,

View File

@ -587,7 +587,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
{
if (!mlx5_debugfs_root)
if (!mlx5_debugfs_root || dev->rep)
return;
debugfs_remove_recursive(dev->cache.root);
@ -600,7 +600,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
struct mlx5_cache_ent *ent;
int i;
if (!mlx5_debugfs_root)
if (!mlx5_debugfs_root || dev->rep)
return 0;
cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
@ -690,6 +690,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
MLX5_IB_UMR_OCTOWORD;
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
!dev->rep &&
mlx5_core_is_pf(dev->mdev))
ent->limit = dev->mdev->profile->mr_cache[i].limit;
else

View File

@ -36,6 +36,7 @@
#include <rdma/ib_user_verbs.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
#include "ib_rep.h"
/* not supported currently */
static int wq_signature;
@ -1082,6 +1083,13 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
mlx5_core_destroy_tis(dev->mdev, sq->tisn);
}
static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq)
{
if (sq->flow_rule)
mlx5_del_flow_rules(sq->flow_rule);
}
static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq, void *qpin,
struct ib_pd *pd)
@ -1145,8 +1153,15 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
if (err)
goto err_umem;
err = create_flow_rule_vport_sq(dev, sq);
if (err)
goto err_flow;
return 0;
err_flow:
mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
err_umem:
ib_umem_release(sq->ubuffer.umem);
sq->ubuffer.umem = NULL;
@ -1157,6 +1172,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq)
{
destroy_flow_rule_vport_sq(dev, sq);
mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
ib_umem_release(sq->ubuffer.umem);
}
@ -1263,6 +1279,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
if (tunnel_offload_en)
MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
if (dev->rep)
MLX5_SET(tirc, tirc, self_lb_block,
MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
kvfree(in);
@ -1554,6 +1574,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
if (dev->rep)
MLX5_SET(tirc, tirc, self_lb_block,
MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
if (err)

View File

@ -337,6 +337,14 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
}
EXPORT_SYMBOL(mlx5_unregister_interface);
void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
{
mutex_lock(&mlx5_intf_mutex);
mlx5_remove_dev_by_protocol(mdev, protocol);
mlx5_add_dev_by_protocol(mdev, protocol);
mutex_unlock(&mlx5_intf_mutex);
}
void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
{
struct mlx5_priv *priv = &mdev->priv;

View File

@ -1156,6 +1156,15 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
kfree(ppriv); /* mlx5e_rep_priv */
}
static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
{
struct mlx5e_rep_priv *rpriv;
rpriv = mlx5e_rep_to_rep_priv(rep);
return rpriv->netdev;
}
static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
@ -1168,6 +1177,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
rep_if.load = mlx5e_vport_rep_load;
rep_if.unload = mlx5e_vport_rep_unload;
rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
}
}
@ -1195,6 +1205,7 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
rep_if.load = mlx5e_nic_rep_load;
rep_if.unload = mlx5e_nic_rep_unload;
rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
rep_if.priv = rpriv;
INIT_LIST_HEAD(&rpriv->vport_sqs_list);
mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/

View File

@ -1619,10 +1619,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
esw->mode = mode;
if (mode == SRIOV_LEGACY)
if (mode == SRIOV_LEGACY) {
err = esw_create_legacy_fdb_table(esw, nvfs + 1);
else
} else {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
err = esw_offloads_init(esw, nvfs + 1);
}
if (err)
goto abort;
@ -1644,12 +1648,17 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
abort:
esw->mode = SRIOV_NONE;
if (mode == SRIOV_OFFLOADS)
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
return err;
}
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
{
struct esw_mc_addr *mc_promisc;
int old_mode;
int nvports;
int i;
@ -1675,7 +1684,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
else if (esw->mode == SRIOV_OFFLOADS)
esw_offloads_cleanup(esw, nvports);
old_mode = esw->mode;
esw->mode = SRIOV_NONE;
if (old_mode == SRIOV_OFFLOADS)
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
}
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@ -2175,3 +2188,9 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
kvfree(out);
return err;
}
u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
{
return esw->mode;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);

View File

@ -37,19 +37,9 @@
#include <linux/if_link.h>
#include <net/devlink.h>
#include <linux/mlx5/device.h>
#include <linux/mlx5/eswitch.h>
#include "lib/mpfs.h"
enum {
SRIOV_NONE,
SRIOV_LEGACY,
SRIOV_OFFLOADS
};
enum {
REP_ETH,
NUM_REP_TYPES,
};
#ifdef CONFIG_MLX5_ESWITCH
#define MLX5_MAX_UC_PER_VPORT(dev) \
@ -139,29 +129,13 @@ struct mlx5_eswitch_fdb {
struct mlx5_flow_table *fdb;
struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_handle *miss_rule;
struct mlx5_flow_handle *miss_rule_uni;
struct mlx5_flow_handle *miss_rule_multi;
int vlan_push_pop_refcount;
} offloads;
};
};
struct mlx5_eswitch_rep;
struct mlx5_eswitch_rep_if {
int (*load)(struct mlx5_core_dev *dev,
struct mlx5_eswitch_rep *rep);
void (*unload)(struct mlx5_eswitch_rep *rep);
void *priv;
bool valid;
};
struct mlx5_eswitch_rep {
struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
u16 vport;
u8 hw_id[ETH_ALEN];
u16 vlan;
u32 vlan_refcount;
};
struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
@ -231,9 +205,6 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
int vport,
struct ifla_vf_stats *vf_stats);
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport,
u32 sqn);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
struct mlx5_flow_spec;
@ -278,13 +249,6 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
int vport_index,
struct mlx5_eswitch_rep_if *rep_if,
u8 rep_type);
void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
int vport_index,
u8 rep_type);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,

View File

@ -338,6 +338,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
kvfree(spec);
return flow_rule;
}
EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
{
@ -350,7 +351,11 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
struct mlx5_flow_destination dest = {};
struct mlx5_flow_handle *flow_rule = NULL;
struct mlx5_flow_spec *spec;
void *headers_c;
void *headers_v;
int err = 0;
u8 *dmac_c;
u8 *dmac_v;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec) {
@ -358,6 +363,13 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
goto out;
}
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
outer_headers);
dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
outer_headers.dmac_47_16);
dmac_c[0] = 0x01;
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest.vport_num = 0;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
@ -366,11 +378,28 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
&flow_act, &dest, 1);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err);
esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err);
goto out;
}
esw->fdb_table.offloads.miss_rule = flow_rule;
esw->fdb_table.offloads.miss_rule_uni = flow_rule;
headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers);
dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
outer_headers.dmac_47_16);
dmac_v[0] = 0x01;
flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
&flow_act, &dest, 1);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
goto out;
}
esw->fdb_table.offloads.miss_rule_multi = flow_rule;
out:
kvfree(spec);
return err;
@ -426,6 +455,7 @@ static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
}
#define MAX_PF_SQ 256
#define MAX_SQ_NVPORTS 32
static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
{
@ -438,6 +468,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
struct mlx5_flow_group *g;
void *match_criteria;
u32 *flow_group_in;
u8 *dmac;
esw_debug(esw->dev, "Create offloads FDB Tables\n");
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
@ -455,7 +486,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
if (err)
goto fast_fdb_err;
table_size = nvports + MAX_PF_SQ + 1;
table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
ft_attr.max_fte = table_size;
ft_attr.prio = FDB_SLOW_PATH;
@ -478,7 +509,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
ix = nvports + MAX_PF_SQ;
ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
@ -492,10 +523,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
/* create miss group */
memset(flow_group_in, 0, inlen);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
MLX5_MATCH_OUTER_HEADERS);
match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
match_criteria);
dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
outer_headers.dmac_47_16);
dmac[0] = 0x01;
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
g = mlx5_create_flow_group(fdb, flow_group_in);
if (IS_ERR(g)) {
@ -531,7 +568,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
return;
esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
@ -789,14 +827,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
{
int err;
/* disable PF RoCE so missed packets don't go through RoCE steering */
mlx5_dev_list_lock();
mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
mlx5_dev_list_unlock();
err = esw_create_offloads_fdb_tables(esw, nvports);
if (err)
goto create_fdb_err;
return err;
err = esw_create_offloads_table(esw);
if (err)
@ -821,12 +854,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
create_ft_err:
esw_destroy_offloads_fdb_tables(esw);
create_fdb_err:
/* enable back PF RoCE */
mlx5_dev_list_lock();
mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
mlx5_dev_list_unlock();
return err;
}
@ -844,9 +871,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
}
/* enable back PF RoCE */
mlx5_dev_list_lock();
mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
mlx5_dev_list_unlock();
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
return err;
}
@ -1160,10 +1185,12 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
rep_if->load = __rep_if->load;
rep_if->unload = __rep_if->unload;
rep_if->get_proto_dev = __rep_if->get_proto_dev;
rep_if->priv = __rep_if->priv;
rep_if->valid = true;
}
EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
int vport_index, u8 rep_type)
@ -1178,6 +1205,7 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
rep->rep_if[rep_type].valid = false;
}
EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
{
@ -1188,3 +1216,35 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
rep = &offloads->vport_reps[UPLINK_REP_INDEX];
return rep->rep_if[rep_type].priv;
}
void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
int vport,
u8 rep_type)
{
struct mlx5_esw_offload *offloads = &esw->offloads;
struct mlx5_eswitch_rep *rep;
if (vport == FDB_UPLINK_VPORT)
vport = UPLINK_REP_INDEX;
rep = &offloads->vport_reps[vport];
if (rep->rep_if[rep_type].valid &&
rep->rep_if[rep_type].get_proto_dev)
return rep->rep_if[rep_type].get_proto_dev(rep);
return NULL;
}
EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
{
return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
}
EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
int vport)
{
return &esw->offloads.vport_reps[vport];
}
EXPORT_SYMBOL(mlx5_eswitch_vport_rep);

View File

@ -43,12 +43,6 @@
#define DRIVER_NAME "mlx5_core"
#define DRIVER_VERSION "5.0-0"
#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
#define MLX5_VPORT_MANAGER(mdev) \
(MLX5_CAP_GEN(mdev, vport_group_manager) && \
(MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
mlx5_core_is_pf(mdev))
extern uint mlx5_core_debug_mask;
#define mlx5_core_dbg(__dev, format, ...) \
@ -207,4 +201,5 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
int mlx5_lag_allow(struct mlx5_core_dev *dev);
int mlx5_lag_forbid(struct mlx5_core_dev *dev);
void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
#endif /* __MLX5_CORE_H__ */

View File

@ -1224,6 +1224,12 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
}
#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
#define MLX5_VPORT_MANAGER(mdev) \
(MLX5_CAP_GEN(mdev, vport_group_manager) && \
(MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
mlx5_core_is_pf(mdev))
static inline int mlx5_get_gid_table_len(u16 param)
{
if (param > 4) {

View File

@ -0,0 +1,58 @@
/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
/*
* Copyright (c) 2018 Mellanox Technologies. All rights reserved.
*/
#ifndef _MLX5_ESWITCH_
#define _MLX5_ESWITCH_
#include <linux/mlx5/driver.h>
enum {
SRIOV_NONE,
SRIOV_LEGACY,
SRIOV_OFFLOADS
};
enum {
REP_ETH,
REP_IB,
NUM_REP_TYPES,
};
struct mlx5_eswitch_rep;
struct mlx5_eswitch_rep_if {
int (*load)(struct mlx5_core_dev *dev,
struct mlx5_eswitch_rep *rep);
void (*unload)(struct mlx5_eswitch_rep *rep);
void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
void *priv;
bool valid;
};
struct mlx5_eswitch_rep {
struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
u16 vport;
u8 hw_id[ETH_ALEN];
u16 vlan;
u32 vlan_refcount;
};
void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
int vport_index,
struct mlx5_eswitch_rep_if *rep_if,
u8 rep_type);
void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
int vport_index,
u8 rep_type);
void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
int vport,
u8 rep_type);
struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
int vport);
void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
int vport, u32 sqn);
#endif