IB/mlx4: Add multicast group (MCG) paravirtualization for SR-IOV

MCG paravirtualization support includes:
- Creating multicast groups by VFs, and keeping accounting of them
- Leaving multicast groups by VFs
- Updating SM only with real changes in the overall picture of MCGs status
- Creation of MGID=0 groups (let SM choose MGID)

Note that the MCG module maintains its own internal MCG object
reference counts.  The reason for this is that the IB core is used to
track only the multicast groups joins generated by the PF it runs
over.  The PF IB core layer is unaware of slaves, so it cannot be used
to keep track of MCG joins they generate.

Signed-off-by: Oren Duer <oren@mellanox.co.il>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
Oren Duer 2012-08-03 08:40:46 +00:00 committed by Roland Dreier
parent 0a9a01884d
commit b9c5d6a643
5 changed files with 1285 additions and 12 deletions

View File

@ -1,3 +1,3 @@
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o

View File

@ -75,6 +75,14 @@ struct mlx4_rcv_tunnel_mad {
struct ib_mad mad;
} __packed;
static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
{
return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
cpu_to_be64(0xff00000000000000LL);
}
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad)
@ -209,8 +217,7 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
pinfo->neighbormtu_mastersmsl & 0xf);
if (pinfo->clientrereg_resv_subnetto & 0x80)
mlx4_ib_dispatch_event(dev, port_num,
IB_EVENT_CLIENT_REREGISTER);
handle_client_rereg_event(dev, port_num);
if (prev_lid != lid)
mlx4_ib_dispatch_event(dev, port_num,
@ -308,7 +315,17 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
struct ib_sa_mad *sa_mad)
{
return 0;
int ret = 0;
/* dispatch to different sa handlers */
switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
case IB_SA_ATTR_MC_MEMBER_REC:
ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
break;
default:
break;
}
return ret;
}
int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
@ -768,6 +785,16 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
}
}
static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
{
/* re-configure the mcg's */
if (mlx4_is_master(dev->dev)) {
if (!dev->sriov.is_going_down)
mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
}
mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
}
void handle_port_mgmt_change_event(struct work_struct *work)
{
struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
@ -797,8 +824,7 @@ void handle_port_mgmt_change_event(struct work_struct *work)
mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
mlx4_ib_dispatch_event(dev, port,
IB_EVENT_CLIENT_REREGISTER);
handle_client_rereg_event(dev, port);
break;
case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
@ -868,7 +894,17 @@ static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
int slave, struct ib_sa_mad *sa_mad)
{
return 0;
int ret = 0;
/* dispatch to different sa handlers */
switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
case IB_SA_ATTR_MC_MEMBER_REC:
ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
break;
default:
break;
}
return ret;
}
static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
@ -1590,6 +1626,7 @@ static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
int ret = 0;
if (!do_init) {
clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
/* for master, destroy real sqp resources */
if (slave == mlx4_master_func_num(dev->dev))
destroy_pv_resources(dev, slave, port,
@ -1643,10 +1680,16 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
if (ret) {
ret = -ENOMEM;
goto err_wq;
goto err_mcg;
}
}
ret = mlx4_ib_mcg_port_init(ctx);
if (ret) {
pr_err("Failed initializing mcg para-virt (%d)\n", ret);
goto err_mcg;
}
snprintf(name, sizeof name, "mlx4_ibt%d", port);
ctx->wq = create_singlethread_workqueue(name);
if (!ctx->wq) {
@ -1670,6 +1713,8 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
ctx->wq = NULL;
err_wq:
mlx4_ib_mcg_port_cleanup(ctx, 1);
err_mcg:
for (i = 0; i < dev->dev->caps.sqp_demux; i++)
free_pv_object(dev, i, port);
kfree(ctx->tun);
@ -1705,6 +1750,7 @@ static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
int i;
if (ctx) {
struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
mlx4_ib_mcg_port_cleanup(ctx, 1);
for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
if (!ctx->tun[i])
continue;

View File

@ -1628,18 +1628,28 @@ static int __init mlx4_ib_init(void)
if (!wq)
return -ENOMEM;
err = mlx4_ib_mcg_init();
if (err)
goto clean_wq;
err = mlx4_register_interface(&mlx4_ib_interface);
if (err) {
destroy_workqueue(wq);
return err;
}
if (err)
goto clean_mcg;
return 0;
clean_mcg:
mlx4_ib_mcg_destroy();
clean_wq:
destroy_workqueue(wq);
return err;
}
static void __exit mlx4_ib_cleanup(void)
{
mlx4_unregister_interface(&mlx4_ib_interface);
mlx4_ib_mcg_destroy();
destroy_workqueue(wq);
}

File diff suppressed because it is too large Load Diff

View File

@ -37,9 +37,11 @@
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_mad.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
@ -329,7 +331,14 @@ struct mlx4_ib_demux_ctx {
__be64 subnet_prefix;
__be64 guid_cache[128];
struct mlx4_ib_dev *dev;
/* the following lock protects both mcg_table and mcg_mgid0_list */
struct mutex mcg_table_lock;
struct rb_root mcg_table;
struct list_head mcg_mgid0_list;
struct workqueue_struct *mcg_wq;
struct mlx4_ib_demux_pv_ctx **tun;
atomic_t tid;
int flushing; /* flushing the work queue */
};
struct mlx4_ib_sriov {
@ -553,6 +562,19 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
return !!(ah->av.ib.g_slid & 0x80);
}
int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave);
int mlx4_ib_mcg_init(void);
void mlx4_ib_mcg_destroy(void);
int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid);
int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave,
struct ib_sa_mad *sa_mad);
int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
struct ib_sa_mad *mad);
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid);
@ -561,4 +583,12 @@ void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
void mlx4_ib_tunnels_update_work(struct work_struct *work);
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad);
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
#endif /* MLX4_IB_H */