IB/mlx4: Separate tunnel and wire bufs parameters

Using CX-3 in virtualized mode, MAD packets are proxied through the PF
driver. The feed is N tunnel QPs, and what is received from the VFs is
multiplexed out on the wire QP. Since this is a many-to-one scenario, it
is better to have separate initialization parameters for the two usages.

The number of wire and tunnel bufs are yanked up to 2K and 512
respectively. With this set of parameters, a system consisting of eight
physical servers, each with eight VMs and 14 I/O servers (BM), can run
switch fail-over without seeing:

mlx4_ib_demux_mad: failed sending GSI to slave 3 via tunnel qp (-11)

or

mlx4_ib_multiplex_mad: failed sending GSI to wire on behalf of slave 2 (-11)

Fixes: 3cf69cc8db ("IB/mlx4: Add CM paravirtualization")
Link: https://lore.kernel.org/r/20200803061941.1139994-4-haakon.bugge@oracle.com
Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Håkon Bugge 2020-08-03 08:19:38 +02:00 committed by Jason Gunthorpe
parent e7d087fce6
commit 0ae207fb91
2 changed files with 26 additions and 21 deletions

View File

@ -1391,10 +1391,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_lock(&sqp->tx_lock);
if (sqp->tx_ix_head - sqp->tx_ix_tail >=
(MLX4_NUM_TUNNEL_BUFS - 1))
(MLX4_NUM_WIRE_BUFS - 1))
ret = -EAGAIN;
else
wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1);
spin_unlock(&sqp->tx_lock);
if (ret)
goto out;
@ -1590,19 +1590,20 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
int i;
struct mlx4_ib_demux_pv_qp *tun_qp;
int rx_buf_size, tx_buf_size;
const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return -EINVAL;
tun_qp = &ctx->qp[qp_type];
tun_qp->ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
tun_qp->ring = kcalloc(nmbr_bufs,
sizeof(struct mlx4_ib_buf),
GFP_KERNEL);
if (!tun_qp->ring)
return -ENOMEM;
tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
tun_qp->tx_ring = kcalloc(nmbr_bufs,
sizeof (struct mlx4_ib_tun_tx_buf),
GFP_KERNEL);
if (!tun_qp->tx_ring) {
@ -1619,7 +1620,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
}
for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
for (i = 0; i < nmbr_bufs; i++) {
tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
if (!tun_qp->ring[i].addr)
goto err;
@ -1633,7 +1634,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
}
}
for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
for (i = 0; i < nmbr_bufs; i++) {
tun_qp->tx_ring[i].buf.addr =
kmalloc(tx_buf_size, GFP_KERNEL);
if (!tun_qp->tx_ring[i].buf.addr)
@ -1664,7 +1665,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
}
i = MLX4_NUM_TUNNEL_BUFS;
i = nmbr_bufs;
err:
while (i > 0) {
--i;
@ -1685,6 +1686,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
int i;
struct mlx4_ib_demux_pv_qp *tun_qp;
int rx_buf_size, tx_buf_size;
const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return;
@ -1699,13 +1701,13 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
}
for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
for (i = 0; i < nmbr_bufs; i++) {
ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
rx_buf_size, DMA_FROM_DEVICE);
kfree(tun_qp->ring[i].addr);
}
for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
for (i = 0; i < nmbr_bufs; i++) {
ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
@ -1785,6 +1787,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
struct ib_qp_attr attr;
int qp_attr_mask_INIT;
const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return -EINVAL;
@ -1795,8 +1798,8 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
qp_init_attr.init_attr.send_cq = ctx->cq;
qp_init_attr.init_attr.recv_cq = ctx->cq;
qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs;
qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs;
qp_init_attr.init_attr.cap.max_send_sge = 1;
qp_init_attr.init_attr.cap.max_recv_sge = 1;
if (create_tun) {
@ -1858,7 +1861,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
goto err_qp;
}
for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
for (i = 0; i < nmbr_bufs; i++) {
ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
if (ret) {
pr_err(" mlx4_ib_post_pv_buf error"
@ -1894,8 +1897,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
switch (wc.opcode) {
case IB_WC_SEND:
kfree(sqp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
(MLX4_NUM_WIRE_BUFS - 1)].ah);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
sqp->tx_ix_tail++;
@ -1904,13 +1907,13 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
case IB_WC_RECV:
mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
(sqp->ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
(MLX4_NUM_WIRE_BUFS - 1)].addr))->payload);
grh = &(((struct mlx4_mad_rcv_buf *)
(sqp->ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
(MLX4_NUM_WIRE_BUFS - 1)].addr))->grh);
mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)))
(MLX4_NUM_WIRE_BUFS - 1)))
pr_err("Failed reposting SQP "
"buf:%lld\n", wc.wr_id);
break;
@ -1923,8 +1926,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
kfree(sqp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
(MLX4_NUM_WIRE_BUFS - 1)].ah);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
sqp->tx_ix_tail++;
@ -1964,6 +1967,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
{
int ret, cq_size;
struct ib_cq_init_attr cq_attr = {};
const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (ctx->state != DEMUX_PV_STATE_DOWN)
return -EEXIST;
@ -1988,7 +1992,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
goto err_out_qp0;
}
cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
cq_size = 2 * nmbr_bufs;
if (ctx->has_smi)
cq_size *= 2;

View File

@ -233,7 +233,8 @@ enum mlx4_ib_mad_ifc_flags {
};
enum {
MLX4_NUM_TUNNEL_BUFS = 256,
MLX4_NUM_TUNNEL_BUFS = 512,
MLX4_NUM_WIRE_BUFS = 2048,
};
struct mlx4_ib_tunnel_header {