2010-04-22 06:30:06 +08:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* This software is available to you under a choice of one of two
|
|
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
|
|
* General Public License (GPL) Version 2, available from the file
|
|
|
|
* COPYING in the main directory of this source tree, or the
|
|
|
|
* OpenIB.org BSD license below:
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or
|
|
|
|
* without modification, are permitted provided that the following
|
|
|
|
* conditions are met:
|
|
|
|
*
|
|
|
|
* - Redistributions of source code must retain the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer.
|
|
|
|
*
|
|
|
|
* - Redistributions in binary form must reproduce the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer in the documentation and/or other materials
|
|
|
|
* provided with the distribution.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
|
|
|
*/
|
2011-05-28 03:35:46 +08:00
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
#include "iw_cxgb4.h"
|
|
|
|
|
2012-05-18 17:59:28 +08:00
|
|
|
static int db_delay_usecs = 1;
|
|
|
|
module_param(db_delay_usecs, int, 0644);
|
|
|
|
MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain");
|
|
|
|
|
2011-03-12 06:30:11 +08:00
|
|
|
static int ocqp_support = 1;
|
2010-09-14 00:23:57 +08:00
|
|
|
module_param(ocqp_support, int, 0644);
|
2011-03-12 06:30:11 +08:00
|
|
|
MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
|
2010-09-14 00:23:57 +08:00
|
|
|
|
2013-03-14 13:08:59 +08:00
|
|
|
int db_fc_threshold = 1000;
|
2012-05-18 17:59:30 +08:00
|
|
|
module_param(db_fc_threshold, int, 0644);
|
2013-03-14 13:08:59 +08:00
|
|
|
MODULE_PARM_DESC(db_fc_threshold,
|
|
|
|
"QP count/threshold that triggers"
|
|
|
|
" automatic db flow control mode (default = 1000)");
|
|
|
|
|
|
|
|
int db_coalescing_threshold;
|
|
|
|
module_param(db_coalescing_threshold, int, 0644);
|
|
|
|
MODULE_PARM_DESC(db_coalescing_threshold,
|
|
|
|
"QP count/threshold that triggers"
|
|
|
|
" disabling db coalescing (default = 0)");
|
2012-05-18 17:59:30 +08:00
|
|
|
|
2013-03-14 13:09:01 +08:00
|
|
|
static int max_fr_immd = T4_MAX_FR_IMMD;
|
|
|
|
module_param(max_fr_immd, int, 0644);
|
|
|
|
MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate");
|
|
|
|
|
2014-07-15 00:04:52 +08:00
|
|
|
static int alloc_ird(struct c4iw_dev *dev, u32 ird)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
spin_lock_irq(&dev->lock);
|
|
|
|
if (ird <= dev->avail_ird)
|
|
|
|
dev->avail_ird -= ird;
|
|
|
|
else
|
|
|
|
ret = -ENOMEM;
|
|
|
|
spin_unlock_irq(&dev->lock);
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
dev_warn(&dev->rdev.lldi.pdev->dev,
|
|
|
|
"device IRD resources exhausted\n");
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void free_ird(struct c4iw_dev *dev, int ird)
|
|
|
|
{
|
|
|
|
spin_lock_irq(&dev->lock);
|
|
|
|
dev->avail_ird += ird;
|
|
|
|
spin_unlock_irq(&dev->lock);
|
|
|
|
}
|
|
|
|
|
2010-09-11 00:15:36 +08:00
|
|
|
static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
|
|
|
|
{
|
|
|
|
unsigned long flag;
|
|
|
|
spin_lock_irqsave(&qhp->lock, flag);
|
|
|
|
qhp->attr.state = state;
|
|
|
|
spin_unlock_irqrestore(&qhp->lock, flag);
|
|
|
|
}
|
|
|
|
|
2010-09-14 00:23:57 +08:00
|
|
|
static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
|
|
|
|
{
|
|
|
|
c4iw_ocqp_pool_free(rdev, sq->dma_addr, sq->memsize);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
|
|
|
|
{
|
|
|
|
dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue,
|
|
|
|
pci_unmap_addr(sq, mapping));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
|
|
|
|
{
|
|
|
|
if (t4_sq_onchip(sq))
|
|
|
|
dealloc_oc_sq(rdev, sq);
|
|
|
|
else
|
|
|
|
dealloc_host_sq(rdev, sq);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
|
|
|
|
{
|
2013-03-14 13:08:58 +08:00
|
|
|
if (!ocqp_support || !ocqp_supported(&rdev->lldi))
|
2010-09-14 00:23:57 +08:00
|
|
|
return -ENOSYS;
|
|
|
|
sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize);
|
|
|
|
if (!sq->dma_addr)
|
|
|
|
return -ENOMEM;
|
|
|
|
sq->phys_addr = rdev->oc_mw_pa + sq->dma_addr -
|
|
|
|
rdev->lldi.vr->ocq.start;
|
|
|
|
sq->queue = (__force union t4_wr *)(rdev->oc_mw_kva + sq->dma_addr -
|
|
|
|
rdev->lldi.vr->ocq.start);
|
|
|
|
sq->flags |= T4_SQ_ONCHIP;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
|
|
|
|
{
|
|
|
|
sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize,
|
|
|
|
&(sq->dma_addr), GFP_KERNEL);
|
|
|
|
if (!sq->queue)
|
|
|
|
return -ENOMEM;
|
|
|
|
sq->phys_addr = virt_to_phys(sq->queue);
|
|
|
|
pci_unmap_addr_set(sq, mapping, sq->dma_addr);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-04-02 04:13:39 +08:00
|
|
|
static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user)
|
|
|
|
{
|
|
|
|
int ret = -ENOSYS;
|
|
|
|
if (user)
|
|
|
|
ret = alloc_oc_sq(rdev, sq);
|
|
|
|
if (ret)
|
|
|
|
ret = alloc_host_sq(rdev, sq);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
|
|
|
|
struct c4iw_dev_ucontext *uctx)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* uP clears EQ contexts when the connection exits rdma mode,
|
|
|
|
* so no need to post a RESET WR for these EQs.
|
|
|
|
*/
|
|
|
|
dma_free_coherent(&(rdev->lldi.pdev->dev),
|
|
|
|
wq->rq.memsize, wq->rq.queue,
|
2010-06-03 13:37:50 +08:00
|
|
|
dma_unmap_addr(&wq->rq, mapping));
|
2010-09-14 00:23:57 +08:00
|
|
|
dealloc_sq(rdev, &wq->sq);
|
2010-04-22 06:30:06 +08:00
|
|
|
c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
|
|
|
|
kfree(wq->rq.sw_rq);
|
|
|
|
kfree(wq->sq.sw_sq);
|
|
|
|
c4iw_put_qpid(rdev, wq->rq.qid, uctx);
|
|
|
|
c4iw_put_qpid(rdev, wq->sq.qid, uctx);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-06-09 20:53:12 +08:00
|
|
|
/*
|
|
|
|
* Determine the BAR2 virtual address and qid. If pbar2_pa is not NULL,
|
|
|
|
* then this is a user mapping so compute the page-aligned physical address
|
|
|
|
* for mapping.
|
|
|
|
*/
|
|
|
|
void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
|
|
|
|
enum cxgb4_bar2_qtype qtype,
|
|
|
|
unsigned int *pbar2_qid, u64 *pbar2_pa)
|
|
|
|
{
|
|
|
|
u64 bar2_qoffset;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = cxgb4_bar2_sge_qregs(rdev->lldi.ports[0], qid, qtype,
|
|
|
|
pbar2_pa ? 1 : 0,
|
|
|
|
&bar2_qoffset, pbar2_qid);
|
|
|
|
if (ret)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (pbar2_pa)
|
|
|
|
*pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK;
|
2016-04-05 12:53:48 +08:00
|
|
|
|
|
|
|
if (is_t4(rdev->lldi.adapter_type))
|
|
|
|
return NULL;
|
|
|
|
|
2015-06-09 20:53:12 +08:00
|
|
|
return rdev->bar2_kva + bar2_qoffset;
|
|
|
|
}
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
|
|
|
|
struct t4_cq *rcq, struct t4_cq *scq,
|
2017-09-27 04:11:36 +08:00
|
|
|
struct c4iw_dev_ucontext *uctx,
|
|
|
|
struct c4iw_wr_wait *wr_waitp)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
|
|
|
int user = (uctx != &rdev->uctx);
|
|
|
|
struct fw_ri_res_wr *res_wr;
|
|
|
|
struct fw_ri_res *res;
|
|
|
|
int wr_len;
|
|
|
|
struct sk_buff *skb;
|
2013-03-14 13:09:04 +08:00
|
|
|
int ret = 0;
|
2010-04-22 06:30:06 +08:00
|
|
|
int eqsize;
|
|
|
|
|
|
|
|
wq->sq.qid = c4iw_get_qpid(rdev, uctx);
|
|
|
|
if (!wq->sq.qid)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
wq->rq.qid = c4iw_get_qpid(rdev, uctx);
|
2012-08-20 01:59:40 +08:00
|
|
|
if (!wq->rq.qid) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free_sq_qid;
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
if (!user) {
|
treewide: kzalloc() -> kcalloc()
The kzalloc() function has a 2-factor argument form, kcalloc(). This
patch replaces cases of:
kzalloc(a * b, gfp)
with:
kcalloc(a * b, gfp)
as well as handling cases of:
kzalloc(a * b * c, gfp)
with:
kzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kzalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kzalloc
+ kcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kzalloc(sizeof(THING) * C2, ...)
|
kzalloc(sizeof(TYPE) * C2, ...)
|
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(C1 * C2, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 05:03:40 +08:00
|
|
|
wq->sq.sw_sq = kcalloc(wq->sq.size, sizeof(*wq->sq.sw_sq),
|
|
|
|
GFP_KERNEL);
|
2012-08-20 01:59:40 +08:00
|
|
|
if (!wq->sq.sw_sq) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free_rq_qid;
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
|
treewide: kzalloc() -> kcalloc()
The kzalloc() function has a 2-factor argument form, kcalloc(). This
patch replaces cases of:
kzalloc(a * b, gfp)
with:
kcalloc(a * b, gfp)
as well as handling cases of:
kzalloc(a * b * c, gfp)
with:
kzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kzalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kzalloc
+ kcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kzalloc(sizeof(THING) * C2, ...)
|
kzalloc(sizeof(TYPE) * C2, ...)
|
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(C1 * C2, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 05:03:40 +08:00
|
|
|
wq->rq.sw_rq = kcalloc(wq->rq.size, sizeof(*wq->rq.sw_rq),
|
|
|
|
GFP_KERNEL);
|
2012-08-20 01:59:40 +08:00
|
|
|
if (!wq->rq.sw_rq) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free_sw_sq;
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2014-07-21 23:25:15 +08:00
|
|
|
* RQT must be a power of 2 and at least 16 deep.
|
2010-04-22 06:30:06 +08:00
|
|
|
*/
|
2014-07-21 23:25:15 +08:00
|
|
|
wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
|
2010-04-22 06:30:06 +08:00
|
|
|
wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
|
2012-08-20 01:59:40 +08:00
|
|
|
if (!wq->rq.rqt_hwaddr) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free_sw_rq;
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2013-04-02 04:13:39 +08:00
|
|
|
ret = alloc_sq(rdev, &wq->sq, user);
|
|
|
|
if (ret)
|
|
|
|
goto free_hwaddr;
|
2010-04-22 06:30:06 +08:00
|
|
|
memset(wq->sq.queue, 0, wq->sq.memsize);
|
2010-06-03 13:37:50 +08:00
|
|
|
dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev),
|
|
|
|
wq->rq.memsize, &(wq->rq.dma_addr),
|
|
|
|
GFP_KERNEL);
|
2013-03-15 17:42:12 +08:00
|
|
|
if (!wq->rq.queue) {
|
|
|
|
ret = -ENOMEM;
|
2012-08-20 01:59:40 +08:00
|
|
|
goto free_sq;
|
2013-03-15 17:42:12 +08:00
|
|
|
}
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
|
|
|
|
wq->sq.queue,
|
2017-02-10 06:23:51 +08:00
|
|
|
(unsigned long long)virt_to_phys(wq->sq.queue),
|
|
|
|
wq->rq.queue,
|
|
|
|
(unsigned long long)virt_to_phys(wq->rq.queue));
|
2010-04-22 06:30:06 +08:00
|
|
|
memset(wq->rq.queue, 0, wq->rq.memsize);
|
2010-06-03 13:37:50 +08:00
|
|
|
dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
wq->db = rdev->lldi.db_reg;
|
2014-04-09 22:38:25 +08:00
|
|
|
|
2015-06-09 20:53:12 +08:00
|
|
|
wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
|
|
|
|
&wq->sq.bar2_qid,
|
|
|
|
user ? &wq->sq.bar2_pa : NULL);
|
|
|
|
wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS,
|
|
|
|
&wq->rq.bar2_qid,
|
|
|
|
user ? &wq->rq.bar2_pa : NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* User mode must have bar2 access.
|
|
|
|
*/
|
2016-04-05 12:53:48 +08:00
|
|
|
if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
|
2017-02-10 06:23:50 +08:00
|
|
|
pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
|
2015-06-09 20:53:12 +08:00
|
|
|
pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
|
|
|
|
goto free_dma;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
2015-06-09 20:53:12 +08:00
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
wq->rdev = rdev;
|
|
|
|
wq->rq.msn = 1;
|
|
|
|
|
|
|
|
/* build fw_ri_res_wr */
|
|
|
|
wr_len = sizeof *res_wr + 2 * sizeof *res;
|
|
|
|
|
2010-07-21 10:44:56 +08:00
|
|
|
skb = alloc_skb(wr_len, GFP_KERNEL);
|
2010-04-22 06:30:06 +08:00
|
|
|
if (!skb) {
|
|
|
|
ret = -ENOMEM;
|
2012-08-20 01:59:40 +08:00
|
|
|
goto free_dma;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
|
|
|
|
|
net: introduce __skb_put_[zero, data, u8]
follow Johannes Berg, semantic patch file as below,
@@
identifier p, p2;
expression len;
expression skb;
type t, t2;
@@
(
-p = __skb_put(skb, len);
+p = __skb_put_zero(skb, len);
|
-p = (t)__skb_put(skb, len);
+p = __skb_put_zero(skb, len);
)
... when != p
(
p2 = (t2)p;
-memset(p2, 0, len);
|
-memset(p, 0, len);
)
@@
identifier p;
expression len;
expression skb;
type t;
@@
(
-t p = __skb_put(skb, len);
+t p = __skb_put_zero(skb, len);
)
... when != p
(
-memset(p, 0, len);
)
@@
type t, t2;
identifier p, p2;
expression skb;
@@
t *p;
...
(
-p = __skb_put(skb, sizeof(t));
+p = __skb_put_zero(skb, sizeof(t));
|
-p = (t *)__skb_put(skb, sizeof(t));
+p = __skb_put_zero(skb, sizeof(t));
)
... when != p
(
p2 = (t2)p;
-memset(p2, 0, sizeof(*p));
|
-memset(p, 0, sizeof(*p));
)
@@
expression skb, len;
@@
-memset(__skb_put(skb, len), 0, len);
+__skb_put_zero(skb, len);
@@
expression skb, len, data;
@@
-memcpy(__skb_put(skb, len), data, len);
+__skb_put_data(skb, data, len);
@@
expression SKB, C, S;
typedef u8;
identifier fn = {__skb_put};
fresh identifier fn2 = fn ## "_u8";
@@
- *(u8 *)fn(SKB, S) = C;
+ fn2(SKB, C);
Signed-off-by: yuan linyu <Linyu.Yuan@alcatel-sbell.com.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-06-18 22:48:17 +08:00
|
|
|
res_wr = __skb_put_zero(skb, wr_len);
|
2010-04-22 06:30:06 +08:00
|
|
|
res_wr->op_nres = cpu_to_be32(
|
2014-11-07 12:05:25 +08:00
|
|
|
FW_WR_OP_V(FW_RI_RES_WR) |
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_RES_WR_NRES_V(2) |
|
2014-11-07 12:05:25 +08:00
|
|
|
FW_WR_COMPL_F);
|
2010-04-22 06:30:06 +08:00
|
|
|
res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
|
2017-09-27 04:11:36 +08:00
|
|
|
res_wr->cookie = (uintptr_t)wr_waitp;
|
2010-04-22 06:30:06 +08:00
|
|
|
res = res_wr->res;
|
|
|
|
res->u.sqrq.restype = FW_RI_RES_TYPE_SQ;
|
|
|
|
res->u.sqrq.op = FW_RI_RES_OP_WRITE;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* eqsize is the number of 64B entries plus the status page size.
|
|
|
|
*/
|
2014-07-15 00:04:51 +08:00
|
|
|
eqsize = wq->sq.size * T4_SQ_NUM_SLOTS +
|
|
|
|
rdev->hw_queue.t4_eq_status_entries;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_RES_WR_HOSTFCMODE_V(0) | /* no host cidx updates */
|
|
|
|
FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */
|
|
|
|
FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */
|
|
|
|
(t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_ONCHIP_F : 0) |
|
|
|
|
FW_RI_RES_WR_IQID_V(scq->cqid));
|
2010-04-22 06:30:06 +08:00
|
|
|
res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_RES_WR_DCAEN_V(0) |
|
|
|
|
FW_RI_RES_WR_DCACPU_V(0) |
|
|
|
|
FW_RI_RES_WR_FBMIN_V(2) |
|
2016-12-16 00:09:35 +08:00
|
|
|
(t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_FBMAX_V(2) :
|
|
|
|
FW_RI_RES_WR_FBMAX_V(3)) |
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
|
|
|
|
FW_RI_RES_WR_CIDXFTHRESH_V(0) |
|
|
|
|
FW_RI_RES_WR_EQSIZE_V(eqsize));
|
2010-04-22 06:30:06 +08:00
|
|
|
res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
|
|
|
|
res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
|
|
|
|
res++;
|
|
|
|
res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
|
|
|
|
res->u.sqrq.op = FW_RI_RES_OP_WRITE;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* eqsize is the number of 64B entries plus the status page size.
|
|
|
|
*/
|
2014-07-15 00:04:51 +08:00
|
|
|
eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
|
|
|
|
rdev->hw_queue.t4_eq_status_entries;
|
2010-04-22 06:30:06 +08:00
|
|
|
res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_RES_WR_HOSTFCMODE_V(0) | /* no host cidx updates */
|
|
|
|
FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */
|
|
|
|
FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */
|
|
|
|
FW_RI_RES_WR_IQID_V(rcq->cqid));
|
2010-04-22 06:30:06 +08:00
|
|
|
res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_RES_WR_DCAEN_V(0) |
|
|
|
|
FW_RI_RES_WR_DCACPU_V(0) |
|
|
|
|
FW_RI_RES_WR_FBMIN_V(2) |
|
2016-12-16 00:09:35 +08:00
|
|
|
FW_RI_RES_WR_FBMAX_V(3) |
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
|
|
|
|
FW_RI_RES_WR_CIDXFTHRESH_V(0) |
|
|
|
|
FW_RI_RES_WR_EQSIZE_V(eqsize));
|
2010-04-22 06:30:06 +08:00
|
|
|
res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
|
|
|
|
res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
|
|
|
|
|
2017-09-27 04:11:36 +08:00
|
|
|
c4iw_init_wr_wait(wr_waitp);
|
iw_cxgb4: add referencing to wait objects
For messages sent from the host to fw that solicit a reply from fw,
the c4iw_wr_wait struct pointer is passed in the host->fw message, and
included in the fw->host fw6_msg reply. This allows the sender to wait
until the reply is received, and the code processing the ingress reply
to wake up the sender.
If c4iw_wait_for_reply() times out, however, we need to keep the
c4iw_wr_wait object around in case the reply eventually does arrive.
Otherwise we have touch-after-free bugs in the wake_up paths.
This was hit due to a bad kernel driver that blocked ingress processing
of cxgb4 for a long time, causing iw_cxgb4 timeouts, but eventually
resuming ingress processing and thus hitting the touch-after-free bug.
So I want to fix iw_cxgb4 such that we'll at least keep the wait object
around until the reply comes. If it never comes we leak a small amount of
memory, but if it does come late, we won't potentially crash the system.
So add a kref struct in the c4iw_wr_wait struct, and take a reference
before sending a message to FW that will generate a FW6 reply. And remove
the reference (and potentially free the wait object) when the reply
is processed.
The ep code also uses the wr_wait for non FW6 CPL messages and doesn't
embed the c4iw_wr_wait object in the message sent to firmware. So for
those cases we add c4iw_wake_up_noref().
The mr/mw, cq, and qp object create/destroy paths do need this reference
logic. For these paths, c4iw_ref_send_wait() is introduced to take the
wr_wait reference, send the msg to fw, and then wait for the reply.
So going forward, iw_cxgb4 either uses c4iw_ofld_send(),
c4iw_wait_for_reply() and c4iw_wake_up_noref() like is done in the some
of the endpoint logic, or c4iw_ref_send_wait() and c4iw_wake_up_deref()
(formerly c4iw_wake_up()) when sending messages with the c4iw_wr_wait
object pointer embedded in the message and resulting FW6 reply.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2017-09-27 04:13:17 +08:00
|
|
|
ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__);
|
2010-04-22 06:30:06 +08:00
|
|
|
if (ret)
|
2012-08-20 01:59:40 +08:00
|
|
|
goto free_dma;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
|
|
|
|
wq->sq.qid, wq->rq.qid, wq->db,
|
2017-02-10 06:23:51 +08:00
|
|
|
wq->sq.bar2_va, wq->rq.bar2_va);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
return 0;
|
2012-08-20 01:59:40 +08:00
|
|
|
free_dma:
|
2010-04-22 06:30:06 +08:00
|
|
|
dma_free_coherent(&(rdev->lldi.pdev->dev),
|
|
|
|
wq->rq.memsize, wq->rq.queue,
|
2010-06-03 13:37:50 +08:00
|
|
|
dma_unmap_addr(&wq->rq, mapping));
|
2012-08-20 01:59:40 +08:00
|
|
|
free_sq:
|
2010-09-14 00:23:57 +08:00
|
|
|
dealloc_sq(rdev, &wq->sq);
|
2012-08-20 01:59:40 +08:00
|
|
|
free_hwaddr:
|
2010-04-22 06:30:06 +08:00
|
|
|
c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
|
2012-08-20 01:59:40 +08:00
|
|
|
free_sw_rq:
|
2010-04-22 06:30:06 +08:00
|
|
|
kfree(wq->rq.sw_rq);
|
2012-08-20 01:59:40 +08:00
|
|
|
free_sw_sq:
|
2010-04-22 06:30:06 +08:00
|
|
|
kfree(wq->sq.sw_sq);
|
2012-08-20 01:59:40 +08:00
|
|
|
free_rq_qid:
|
2010-04-22 06:30:06 +08:00
|
|
|
c4iw_put_qpid(rdev, wq->rq.qid, uctx);
|
2012-08-20 01:59:40 +08:00
|
|
|
free_sq_qid:
|
2010-04-22 06:30:06 +08:00
|
|
|
c4iw_put_qpid(rdev, wq->sq.qid, uctx);
|
2012-08-20 01:59:40 +08:00
|
|
|
return ret;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
|
2010-06-11 03:03:00 +08:00
|
|
|
static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
|
|
|
|
struct ib_send_wr *wr, int max, u32 *plenp)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
2010-06-11 03:03:00 +08:00
|
|
|
u8 *dstp, *srcp;
|
|
|
|
u32 plen = 0;
|
2010-04-22 06:30:06 +08:00
|
|
|
int i;
|
2010-06-11 03:03:00 +08:00
|
|
|
int rem, len;
|
|
|
|
|
|
|
|
dstp = (u8 *)immdp->data;
|
|
|
|
for (i = 0; i < wr->num_sge; i++) {
|
|
|
|
if ((plen + wr->sg_list[i].length) > max)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
|
|
|
|
plen += wr->sg_list[i].length;
|
|
|
|
rem = wr->sg_list[i].length;
|
|
|
|
while (rem) {
|
|
|
|
if (dstp == (u8 *)&sq->queue[sq->size])
|
|
|
|
dstp = (u8 *)sq->queue;
|
|
|
|
if (rem <= (u8 *)&sq->queue[sq->size] - dstp)
|
|
|
|
len = rem;
|
|
|
|
else
|
|
|
|
len = (u8 *)&sq->queue[sq->size] - dstp;
|
|
|
|
memcpy(dstp, srcp, len);
|
|
|
|
dstp += len;
|
|
|
|
srcp += len;
|
|
|
|
rem -= len;
|
|
|
|
}
|
|
|
|
}
|
2010-09-11 00:14:53 +08:00
|
|
|
len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp);
|
|
|
|
if (len)
|
|
|
|
memset(dstp, 0, len);
|
2010-06-11 03:03:00 +08:00
|
|
|
immdp->op = FW_RI_DATA_IMMD;
|
|
|
|
immdp->r1 = 0;
|
|
|
|
immdp->r2 = 0;
|
|
|
|
immdp->immdlen = cpu_to_be32(plen);
|
|
|
|
*plenp = plen;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int build_isgl(__be64 *queue_start, __be64 *queue_end,
|
|
|
|
struct fw_ri_isgl *isglp, struct ib_sge *sg_list,
|
|
|
|
int num_sge, u32 *plenp)
|
|
|
|
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
u32 plen = 0;
|
|
|
|
__be64 *flitp = (__be64 *)isglp->sge;
|
|
|
|
|
|
|
|
for (i = 0; i < num_sge; i++) {
|
|
|
|
if ((plen + sg_list[i].length) < plen)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
plen += sg_list[i].length;
|
|
|
|
*flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) |
|
|
|
|
sg_list[i].length);
|
|
|
|
if (++flitp == queue_end)
|
|
|
|
flitp = queue_start;
|
|
|
|
*flitp = cpu_to_be64(sg_list[i].addr);
|
|
|
|
if (++flitp == queue_end)
|
|
|
|
flitp = queue_start;
|
|
|
|
}
|
2010-09-11 00:14:53 +08:00
|
|
|
*flitp = (__force __be64)0;
|
2010-06-11 03:03:00 +08:00
|
|
|
isglp->op = FW_RI_DATA_ISGL;
|
|
|
|
isglp->r1 = 0;
|
|
|
|
isglp->nsge = cpu_to_be16(num_sge);
|
|
|
|
isglp->r2 = 0;
|
|
|
|
if (plenp)
|
|
|
|
*plenp = plen;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
|
|
|
|
struct ib_send_wr *wr, u8 *len16)
|
|
|
|
{
|
2010-04-22 06:30:06 +08:00
|
|
|
u32 plen;
|
|
|
|
int size;
|
2010-06-11 03:03:00 +08:00
|
|
|
int ret;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
if (wr->num_sge > T4_MAX_SEND_SGE)
|
|
|
|
return -EINVAL;
|
|
|
|
switch (wr->opcode) {
|
|
|
|
case IB_WR_SEND:
|
|
|
|
if (wr->send_flags & IB_SEND_SOLICITED)
|
|
|
|
wqe->send.sendop_pkd = cpu_to_be32(
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE));
|
2010-04-22 06:30:06 +08:00
|
|
|
else
|
|
|
|
wqe->send.sendop_pkd = cpu_to_be32(
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND));
|
2010-04-22 06:30:06 +08:00
|
|
|
wqe->send.stag_inv = 0;
|
|
|
|
break;
|
|
|
|
case IB_WR_SEND_WITH_INV:
|
|
|
|
if (wr->send_flags & IB_SEND_SOLICITED)
|
|
|
|
wqe->send.sendop_pkd = cpu_to_be32(
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE_INV));
|
2010-04-22 06:30:06 +08:00
|
|
|
else
|
|
|
|
wqe->send.sendop_pkd = cpu_to_be32(
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_INV));
|
2010-04-22 06:30:06 +08:00
|
|
|
wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2014-04-09 22:38:27 +08:00
|
|
|
wqe->send.r3 = 0;
|
|
|
|
wqe->send.r4 = 0;
|
2010-06-11 03:03:00 +08:00
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
plen = 0;
|
|
|
|
if (wr->num_sge) {
|
|
|
|
if (wr->send_flags & IB_SEND_INLINE) {
|
2010-06-11 03:03:00 +08:00
|
|
|
ret = build_immd(sq, wqe->send.u.immd_src, wr,
|
|
|
|
T4_MAX_SEND_INLINE, &plen);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2010-04-22 06:30:06 +08:00
|
|
|
size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
|
|
|
|
plen;
|
|
|
|
} else {
|
2010-06-11 03:03:00 +08:00
|
|
|
ret = build_isgl((__be64 *)sq->queue,
|
|
|
|
(__be64 *)&sq->queue[sq->size],
|
|
|
|
wqe->send.u.isgl_src,
|
|
|
|
wr->sg_list, wr->num_sge, &plen);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2010-04-22 06:30:06 +08:00
|
|
|
size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
|
|
|
|
wr->num_sge * sizeof(struct fw_ri_sge);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
|
|
|
|
wqe->send.u.immd_src[0].r1 = 0;
|
|
|
|
wqe->send.u.immd_src[0].r2 = 0;
|
|
|
|
wqe->send.u.immd_src[0].immdlen = 0;
|
|
|
|
size = sizeof wqe->send + sizeof(struct fw_ri_immd);
|
2010-06-11 03:03:00 +08:00
|
|
|
plen = 0;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
*len16 = DIV_ROUND_UP(size, 16);
|
|
|
|
wqe->send.plen = cpu_to_be32(plen);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-06-11 03:03:00 +08:00
|
|
|
static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
|
|
|
|
struct ib_send_wr *wr, u8 *len16)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
|
|
|
u32 plen;
|
|
|
|
int size;
|
2010-06-11 03:03:00 +08:00
|
|
|
int ret;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2010-06-11 03:03:00 +08:00
|
|
|
if (wr->num_sge > T4_MAX_SEND_SGE)
|
2010-04-22 06:30:06 +08:00
|
|
|
return -EINVAL;
|
|
|
|
wqe->write.r2 = 0;
|
2015-10-08 16:16:33 +08:00
|
|
|
wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
|
|
|
|
wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
|
2010-04-22 06:30:06 +08:00
|
|
|
if (wr->num_sge) {
|
|
|
|
if (wr->send_flags & IB_SEND_INLINE) {
|
2010-06-11 03:03:00 +08:00
|
|
|
ret = build_immd(sq, wqe->write.u.immd_src, wr,
|
|
|
|
T4_MAX_WRITE_INLINE, &plen);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2010-04-22 06:30:06 +08:00
|
|
|
size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
|
|
|
|
plen;
|
|
|
|
} else {
|
2010-06-11 03:03:00 +08:00
|
|
|
ret = build_isgl((__be64 *)sq->queue,
|
|
|
|
(__be64 *)&sq->queue[sq->size],
|
|
|
|
wqe->write.u.isgl_src,
|
|
|
|
wr->sg_list, wr->num_sge, &plen);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2010-04-22 06:30:06 +08:00
|
|
|
size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
|
|
|
|
wr->num_sge * sizeof(struct fw_ri_sge);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
|
|
|
|
wqe->write.u.immd_src[0].r1 = 0;
|
|
|
|
wqe->write.u.immd_src[0].r2 = 0;
|
|
|
|
wqe->write.u.immd_src[0].immdlen = 0;
|
|
|
|
size = sizeof wqe->write + sizeof(struct fw_ri_immd);
|
2010-06-11 03:03:00 +08:00
|
|
|
plen = 0;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
*len16 = DIV_ROUND_UP(size, 16);
|
|
|
|
wqe->write.plen = cpu_to_be32(plen);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
|
|
|
|
{
|
|
|
|
if (wr->num_sge > 1)
|
|
|
|
return -EINVAL;
|
2017-06-21 22:25:43 +08:00
|
|
|
if (wr->num_sge && wr->sg_list[0].length) {
|
2015-10-08 16:16:33 +08:00
|
|
|
wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey);
|
|
|
|
wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr
|
2010-04-22 06:30:06 +08:00
|
|
|
>> 32));
|
2015-10-08 16:16:33 +08:00
|
|
|
wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr);
|
2010-04-22 06:30:06 +08:00
|
|
|
wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey);
|
|
|
|
wqe->read.plen = cpu_to_be32(wr->sg_list[0].length);
|
|
|
|
wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr
|
|
|
|
>> 32));
|
|
|
|
wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr));
|
|
|
|
} else {
|
|
|
|
wqe->read.stag_src = cpu_to_be32(2);
|
|
|
|
wqe->read.to_src_hi = 0;
|
|
|
|
wqe->read.to_src_lo = 0;
|
|
|
|
wqe->read.stag_sink = cpu_to_be32(2);
|
|
|
|
wqe->read.plen = 0;
|
|
|
|
wqe->read.to_sink_hi = 0;
|
|
|
|
wqe->read.to_sink_lo = 0;
|
|
|
|
}
|
|
|
|
wqe->read.r2 = 0;
|
|
|
|
wqe->read.r5 = 0;
|
|
|
|
*len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
|
|
|
|
struct ib_recv_wr *wr, u8 *len16)
|
|
|
|
{
|
2010-06-11 03:03:00 +08:00
|
|
|
int ret;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2010-06-11 03:03:00 +08:00
|
|
|
ret = build_isgl((__be64 *)qhp->wq.rq.queue,
|
|
|
|
(__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size],
|
|
|
|
&wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2010-04-22 06:30:06 +08:00
|
|
|
*len16 = DIV_ROUND_UP(sizeof wqe->recv +
|
|
|
|
wr->num_sge * sizeof(struct fw_ri_sge), 16);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-16 22:54:52 +08:00
|
|
|
static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
|
|
|
|
struct ib_reg_wr *wr, struct c4iw_mr *mhp,
|
|
|
|
u8 *len16)
|
|
|
|
{
|
|
|
|
__be64 *p = (__be64 *)fr->pbl;
|
|
|
|
|
|
|
|
fr->r2 = cpu_to_be32(0);
|
|
|
|
fr->stag = cpu_to_be32(mhp->ibmr.rkey);
|
|
|
|
|
|
|
|
fr->tpte.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
|
|
|
|
FW_RI_TPTE_STAGKEY_V((mhp->ibmr.rkey & FW_RI_TPTE_STAGKEY_M)) |
|
|
|
|
FW_RI_TPTE_STAGSTATE_V(1) |
|
|
|
|
FW_RI_TPTE_STAGTYPE_V(FW_RI_STAG_NSMR) |
|
|
|
|
FW_RI_TPTE_PDID_V(mhp->attr.pdid));
|
|
|
|
fr->tpte.locread_to_qpid = cpu_to_be32(
|
|
|
|
FW_RI_TPTE_PERM_V(c4iw_ib_to_tpt_access(wr->access)) |
|
|
|
|
FW_RI_TPTE_ADDRTYPE_V(FW_RI_VA_BASED_TO) |
|
|
|
|
FW_RI_TPTE_PS_V(ilog2(wr->mr->page_size) - 12));
|
|
|
|
fr->tpte.nosnoop_pbladdr = cpu_to_be32(FW_RI_TPTE_PBLADDR_V(
|
|
|
|
PBL_OFF(&mhp->rhp->rdev, mhp->attr.pbl_addr)>>3));
|
|
|
|
fr->tpte.dca_mwbcnt_pstag = cpu_to_be32(0);
|
|
|
|
fr->tpte.len_hi = cpu_to_be32(0);
|
|
|
|
fr->tpte.len_lo = cpu_to_be32(mhp->ibmr.length);
|
|
|
|
fr->tpte.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
|
|
|
|
fr->tpte.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
|
|
|
|
|
|
|
|
p[0] = cpu_to_be64((u64)mhp->mpl[0]);
|
|
|
|
p[1] = cpu_to_be64((u64)mhp->mpl[1]);
|
|
|
|
|
|
|
|
*len16 = DIV_ROUND_UP(sizeof(*fr), 16);
|
|
|
|
}
|
|
|
|
|
2015-10-14 00:11:30 +08:00
|
|
|
static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
|
2016-09-16 22:54:52 +08:00
|
|
|
struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16,
|
|
|
|
bool dsgl_supported)
|
2015-10-14 00:11:30 +08:00
|
|
|
{
|
|
|
|
struct fw_ri_immd *imdp;
|
|
|
|
__be64 *p;
|
|
|
|
int i;
|
|
|
|
int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
|
|
|
|
int rem;
|
|
|
|
|
2016-02-12 18:40:35 +08:00
|
|
|
if (mhp->mpl_len > t4_max_fr_depth(dsgl_supported && use_dsgl))
|
2015-10-14 00:11:30 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
wqe->fr.qpbinde_to_dcacpu = 0;
|
|
|
|
wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12;
|
|
|
|
wqe->fr.addr_type = FW_RI_VA_BASED_TO;
|
|
|
|
wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access);
|
|
|
|
wqe->fr.len_hi = 0;
|
|
|
|
wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length);
|
|
|
|
wqe->fr.stag = cpu_to_be32(wr->key);
|
|
|
|
wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
|
|
|
|
wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
|
|
|
|
0xffffffff);
|
|
|
|
|
2016-02-12 18:40:35 +08:00
|
|
|
if (dsgl_supported && use_dsgl && (pbllen > max_fr_immd)) {
|
2015-10-14 00:11:30 +08:00
|
|
|
struct fw_ri_dsgl *sglp;
|
|
|
|
|
|
|
|
for (i = 0; i < mhp->mpl_len; i++)
|
|
|
|
mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]);
|
|
|
|
|
|
|
|
sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
|
|
|
|
sglp->op = FW_RI_DATA_DSGL;
|
|
|
|
sglp->r1 = 0;
|
|
|
|
sglp->nsge = cpu_to_be16(1);
|
|
|
|
sglp->addr0 = cpu_to_be64(mhp->mpl_addr);
|
|
|
|
sglp->len0 = cpu_to_be32(pbllen);
|
|
|
|
|
|
|
|
*len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16);
|
|
|
|
} else {
|
|
|
|
imdp = (struct fw_ri_immd *)(&wqe->fr + 1);
|
|
|
|
imdp->op = FW_RI_DATA_IMMD;
|
|
|
|
imdp->r1 = 0;
|
|
|
|
imdp->r2 = 0;
|
|
|
|
imdp->immdlen = cpu_to_be32(pbllen);
|
|
|
|
p = (__be64 *)(imdp + 1);
|
|
|
|
rem = pbllen;
|
|
|
|
for (i = 0; i < mhp->mpl_len; i++) {
|
|
|
|
*p = cpu_to_be64((u64)mhp->mpl[i]);
|
|
|
|
rem -= sizeof(*p);
|
|
|
|
if (++p == (__be64 *)&sq->queue[sq->size])
|
|
|
|
p = (__be64 *)sq->queue;
|
|
|
|
}
|
|
|
|
while (rem) {
|
|
|
|
*p = 0;
|
|
|
|
rem -= sizeof(*p);
|
|
|
|
if (++p == (__be64 *)&sq->queue[sq->size])
|
|
|
|
p = (__be64 *)sq->queue;
|
|
|
|
}
|
|
|
|
*len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp)
|
|
|
|
+ pbllen, 16);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-11-04 03:09:38 +08:00
|
|
|
static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
|
|
|
wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
|
|
|
|
wqe->inv.r2 = 0;
|
|
|
|
*len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-12-22 23:40:36 +08:00
|
|
|
static void free_qp_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct c4iw_ucontext *ucontext;
|
|
|
|
struct c4iw_qp *qhp;
|
|
|
|
struct c4iw_dev *rhp;
|
|
|
|
|
|
|
|
qhp = container_of(work, struct c4iw_qp, free_work);
|
|
|
|
ucontext = qhp->ucontext;
|
|
|
|
rhp = qhp->rhp;
|
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
|
2016-12-22 23:40:36 +08:00
|
|
|
destroy_qp(&rhp->rdev, &qhp->wq,
|
|
|
|
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
|
|
|
|
|
|
|
|
if (ucontext)
|
|
|
|
c4iw_put_ucontext(ucontext);
|
iw_cxgb4: add referencing to wait objects
For messages sent from the host to fw that solicit a reply from fw,
the c4iw_wr_wait struct pointer is passed in the host->fw message, and
included in the fw->host fw6_msg reply. This allows the sender to wait
until the reply is received, and the code processing the ingress reply
to wake up the sender.
If c4iw_wait_for_reply() times out, however, we need to keep the
c4iw_wr_wait object around in case the reply eventually does arrive.
Otherwise we have touch-after-free bugs in the wake_up paths.
This was hit due to a bad kernel driver that blocked ingress processing
of cxgb4 for a long time, causing iw_cxgb4 timeouts, but eventually
resuming ingress processing and thus hitting the touch-after-free bug.
So I want to fix iw_cxgb4 such that we'll at least keep the wait object
around until the reply comes. If it never comes we leak a small amount of
memory, but if it does come late, we won't potentially crash the system.
So add a kref struct in the c4iw_wr_wait struct, and take a reference
before sending a message to FW that will generate a FW6 reply. And remove
the reference (and potentially free the wait object) when the reply
is processed.
The ep code also uses the wr_wait for non FW6 CPL messages and doesn't
embed the c4iw_wr_wait object in the message sent to firmware. So for
those cases we add c4iw_wake_up_noref().
The mr/mw, cq, and qp object create/destroy paths do need this reference
logic. For these paths, c4iw_ref_send_wait() is introduced to take the
wr_wait reference, send the msg to fw, and then wait for the reply.
So going forward, iw_cxgb4 either uses c4iw_ofld_send(),
c4iw_wait_for_reply() and c4iw_wake_up_noref() like is done in the some
of the endpoint logic, or c4iw_ref_send_wait() and c4iw_wake_up_deref()
(formerly c4iw_wake_up()) when sending messages with the c4iw_wr_wait
object pointer embedded in the message and resulting FW6 reply.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2017-09-27 04:13:17 +08:00
|
|
|
c4iw_put_wr_wait(qhp->wr_waitp);
|
2016-12-22 23:40:36 +08:00
|
|
|
kfree(qhp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void queue_qp_free(struct kref *kref)
|
2016-07-30 02:00:54 +08:00
|
|
|
{
|
|
|
|
struct c4iw_qp *qhp;
|
|
|
|
|
|
|
|
qhp = container_of(kref, struct c4iw_qp, kref);
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("qhp %p\n", qhp);
|
2016-12-22 23:40:36 +08:00
|
|
|
queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work);
|
2016-07-30 02:00:54 +08:00
|
|
|
}
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
void c4iw_qp_add_ref(struct ib_qp *qp)
|
|
|
|
{
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("ib_qp %p\n", qp);
|
2016-07-30 02:00:54 +08:00
|
|
|
kref_get(&to_c4iw_qp(qp)->kref);
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void c4iw_qp_rem_ref(struct ib_qp *qp)
|
|
|
|
{
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("ib_qp %p\n", qp);
|
2016-12-22 23:40:36 +08:00
|
|
|
kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free);
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
static void add_to_fc_list(struct list_head *head, struct list_head *entry)
|
|
|
|
{
|
|
|
|
if (list_empty(entry))
|
|
|
|
list_add_tail(entry, head);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&qhp->rhp->lock, flags);
|
|
|
|
spin_lock(&qhp->lock);
|
2014-04-09 22:38:25 +08:00
|
|
|
if (qhp->rhp->db_state == NORMAL)
|
2015-09-23 19:49:27 +08:00
|
|
|
t4_ring_sq_db(&qhp->wq, inc, NULL);
|
2014-04-09 22:38:25 +08:00
|
|
|
else {
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
|
|
|
|
qhp->wq.sq.wq_pidx_inc += inc;
|
|
|
|
}
|
|
|
|
spin_unlock(&qhp->lock);
|
|
|
|
spin_unlock_irqrestore(&qhp->rhp->lock, flags);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&qhp->rhp->lock, flags);
|
|
|
|
spin_lock(&qhp->lock);
|
2014-04-09 22:38:25 +08:00
|
|
|
if (qhp->rhp->db_state == NORMAL)
|
2015-09-23 19:49:27 +08:00
|
|
|
t4_ring_rq_db(&qhp->wq, inc, NULL);
|
2014-04-09 22:38:25 +08:00
|
|
|
else {
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
|
|
|
|
qhp->wq.rq.wq_pidx_inc += inc;
|
|
|
|
}
|
|
|
|
spin_unlock(&qhp->lock);
|
|
|
|
spin_unlock_irqrestore(&qhp->rhp->lock, flags);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-12-20 02:29:25 +08:00
|
|
|
static int ib_to_fw_opcode(int ib_opcode)
|
|
|
|
{
|
|
|
|
int opcode;
|
|
|
|
|
|
|
|
switch (ib_opcode) {
|
|
|
|
case IB_WR_SEND_WITH_INV:
|
|
|
|
opcode = FW_RI_SEND_WITH_INV;
|
|
|
|
break;
|
|
|
|
case IB_WR_SEND:
|
|
|
|
opcode = FW_RI_SEND;
|
|
|
|
break;
|
|
|
|
case IB_WR_RDMA_WRITE:
|
|
|
|
opcode = FW_RI_RDMA_WRITE;
|
|
|
|
break;
|
|
|
|
case IB_WR_RDMA_READ:
|
|
|
|
case IB_WR_RDMA_READ_WITH_INV:
|
|
|
|
opcode = FW_RI_READ_REQ;
|
|
|
|
break;
|
|
|
|
case IB_WR_REG_MR:
|
|
|
|
opcode = FW_RI_FAST_REGISTER;
|
|
|
|
break;
|
|
|
|
case IB_WR_LOCAL_INV:
|
|
|
|
opcode = FW_RI_LOCAL_INV;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
opcode = -EINVAL;
|
|
|
|
}
|
|
|
|
return opcode;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
{
|
|
|
|
struct t4_cqe cqe = {};
|
|
|
|
struct c4iw_cq *schp;
|
|
|
|
unsigned long flag;
|
|
|
|
struct t4_cq *cq;
|
2017-12-20 02:29:25 +08:00
|
|
|
int opcode;
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
|
|
|
|
schp = to_c4iw_cq(qhp->ibqp.send_cq);
|
|
|
|
cq = &schp->cq;
|
|
|
|
|
2017-12-20 02:29:25 +08:00
|
|
|
opcode = ib_to_fw_opcode(wr->opcode);
|
|
|
|
if (opcode < 0)
|
|
|
|
return opcode;
|
|
|
|
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
cqe.u.drain_cookie = wr->wr_id;
|
|
|
|
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
|
2017-12-20 02:29:25 +08:00
|
|
|
CQE_OPCODE_V(opcode) |
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
CQE_TYPE_V(1) |
|
|
|
|
CQE_SWCQE_V(1) |
|
2017-12-20 02:29:25 +08:00
|
|
|
CQE_DRAIN_V(1) |
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
CQE_QPID_V(qhp->wq.sq.qid));
|
|
|
|
|
|
|
|
spin_lock_irqsave(&schp->lock, flag);
|
|
|
|
cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
|
|
|
|
cq->sw_queue[cq->sw_pidx] = cqe;
|
|
|
|
t4_swcq_produce(cq);
|
|
|
|
spin_unlock_irqrestore(&schp->lock, flag);
|
|
|
|
|
2017-11-09 23:14:43 +08:00
|
|
|
if (t4_clear_cq_armed(&schp->cq)) {
|
|
|
|
spin_lock_irqsave(&schp->comp_handler_lock, flag);
|
|
|
|
(*schp->ibcq.comp_handler)(&schp->ibcq,
|
|
|
|
schp->ibcq.cq_context);
|
|
|
|
spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
|
|
|
|
}
|
2017-12-20 02:29:25 +08:00
|
|
|
return 0;
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
}
|
|
|
|
|
2017-12-20 06:02:10 +08:00
|
|
|
static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
|
|
|
|
struct ib_send_wr **bad_wr)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
while (wr) {
|
|
|
|
ret = complete_sq_drain_wr(qhp, wr);
|
|
|
|
if (ret) {
|
|
|
|
*bad_wr = wr;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
wr = wr->next;
|
|
|
|
}
|
|
|
|
return ret;
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
|
|
|
|
{
|
|
|
|
struct t4_cqe cqe = {};
|
|
|
|
struct c4iw_cq *rchp;
|
|
|
|
unsigned long flag;
|
|
|
|
struct t4_cq *cq;
|
|
|
|
|
|
|
|
rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
|
|
|
|
cq = &rchp->cq;
|
|
|
|
|
|
|
|
cqe.u.drain_cookie = wr->wr_id;
|
|
|
|
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
|
2017-12-20 02:29:25 +08:00
|
|
|
CQE_OPCODE_V(FW_RI_SEND) |
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
CQE_TYPE_V(0) |
|
|
|
|
CQE_SWCQE_V(1) |
|
2017-12-20 02:29:25 +08:00
|
|
|
CQE_DRAIN_V(1) |
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
CQE_QPID_V(qhp->wq.sq.qid));
|
|
|
|
|
|
|
|
spin_lock_irqsave(&rchp->lock, flag);
|
|
|
|
cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
|
|
|
|
cq->sw_queue[cq->sw_pidx] = cqe;
|
|
|
|
t4_swcq_produce(cq);
|
|
|
|
spin_unlock_irqrestore(&rchp->lock, flag);
|
|
|
|
|
2017-11-09 23:14:43 +08:00
|
|
|
if (t4_clear_cq_armed(&rchp->cq)) {
|
|
|
|
spin_lock_irqsave(&rchp->comp_handler_lock, flag);
|
|
|
|
(*rchp->ibcq.comp_handler)(&rchp->ibcq,
|
|
|
|
rchp->ibcq.cq_context);
|
|
|
|
spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
|
|
|
|
}
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
}
|
|
|
|
|
2017-12-20 06:02:10 +08:00
|
|
|
static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
|
|
|
|
{
|
|
|
|
while (wr) {
|
|
|
|
complete_rq_drain_wr(qhp, wr);
|
|
|
|
wr = wr->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
|
|
|
|
struct ib_send_wr **bad_wr)
|
|
|
|
{
|
|
|
|
int err = 0;
|
|
|
|
u8 len16 = 0;
|
|
|
|
enum fw_wr_opcodes fw_opcode = 0;
|
|
|
|
enum fw_ri_wr_flags fw_flags;
|
|
|
|
struct c4iw_qp *qhp;
|
2014-04-09 22:38:25 +08:00
|
|
|
union t4_wr *wqe = NULL;
|
2010-04-22 06:30:06 +08:00
|
|
|
u32 num_wrs;
|
|
|
|
struct t4_swsqe *swsqe;
|
|
|
|
unsigned long flag;
|
|
|
|
u16 idx = 0;
|
|
|
|
|
|
|
|
qhp = to_c4iw_qp(ibqp);
|
|
|
|
spin_lock_irqsave(&qhp->lock, flag);
|
2017-11-28 05:16:32 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the qp has been flushed, then just insert a special
|
|
|
|
* drain cqe.
|
|
|
|
*/
|
|
|
|
if (qhp->wq.flushed) {
|
2010-04-22 06:30:06 +08:00
|
|
|
spin_unlock_irqrestore(&qhp->lock, flag);
|
2017-12-20 06:02:10 +08:00
|
|
|
err = complete_sq_drain_wrs(qhp, wr, bad_wr);
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
return err;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
num_wrs = t4_sq_avail(&qhp->wq);
|
|
|
|
if (num_wrs == 0) {
|
|
|
|
spin_unlock_irqrestore(&qhp->lock, flag);
|
2016-10-19 05:04:39 +08:00
|
|
|
*bad_wr = wr;
|
2010-04-22 06:30:06 +08:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
while (wr) {
|
|
|
|
if (num_wrs == 0) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
*bad_wr = wr;
|
|
|
|
break;
|
|
|
|
}
|
2010-06-11 03:03:00 +08:00
|
|
|
wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
|
|
|
|
qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
fw_flags = 0;
|
|
|
|
if (wr->send_flags & IB_SEND_SOLICITED)
|
|
|
|
fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
|
2014-03-19 20:14:43 +08:00
|
|
|
if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all)
|
2010-04-22 06:30:06 +08:00
|
|
|
fw_flags |= FW_RI_COMPLETION_FLAG;
|
|
|
|
swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
|
|
|
|
switch (wr->opcode) {
|
|
|
|
case IB_WR_SEND_WITH_INV:
|
|
|
|
case IB_WR_SEND:
|
|
|
|
if (wr->send_flags & IB_SEND_FENCE)
|
|
|
|
fw_flags |= FW_RI_READ_FENCE_FLAG;
|
|
|
|
fw_opcode = FW_RI_SEND_WR;
|
|
|
|
if (wr->opcode == IB_WR_SEND)
|
|
|
|
swsqe->opcode = FW_RI_SEND;
|
|
|
|
else
|
|
|
|
swsqe->opcode = FW_RI_SEND_WITH_INV;
|
2010-06-11 03:03:00 +08:00
|
|
|
err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
|
2010-04-22 06:30:06 +08:00
|
|
|
break;
|
|
|
|
case IB_WR_RDMA_WRITE:
|
|
|
|
fw_opcode = FW_RI_RDMA_WRITE_WR;
|
|
|
|
swsqe->opcode = FW_RI_RDMA_WRITE;
|
2010-06-11 03:03:00 +08:00
|
|
|
err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
|
2010-04-22 06:30:06 +08:00
|
|
|
break;
|
|
|
|
case IB_WR_RDMA_READ:
|
2010-05-21 05:58:16 +08:00
|
|
|
case IB_WR_RDMA_READ_WITH_INV:
|
2010-04-22 06:30:06 +08:00
|
|
|
fw_opcode = FW_RI_RDMA_READ_WR;
|
|
|
|
swsqe->opcode = FW_RI_READ_REQ;
|
2016-11-04 03:09:38 +08:00
|
|
|
if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
|
|
|
|
c4iw_invalidate_mr(qhp->rhp,
|
|
|
|
wr->sg_list[0].lkey);
|
2010-09-18 04:40:09 +08:00
|
|
|
fw_flags = FW_RI_RDMA_READ_INVALIDATE;
|
2016-11-04 03:09:38 +08:00
|
|
|
} else {
|
2010-05-21 05:58:16 +08:00
|
|
|
fw_flags = 0;
|
2016-11-04 03:09:38 +08:00
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
err = build_rdma_read(wqe, wr, &len16);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
swsqe->read_len = wr->sg_list[0].length;
|
|
|
|
if (!qhp->wq.sq.oldest_read)
|
|
|
|
qhp->wq.sq.oldest_read = swsqe;
|
|
|
|
break;
|
2016-09-16 22:54:52 +08:00
|
|
|
case IB_WR_REG_MR: {
|
|
|
|
struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr);
|
|
|
|
|
2015-10-14 00:11:30 +08:00
|
|
|
swsqe->opcode = FW_RI_FAST_REGISTER;
|
2016-09-16 22:54:52 +08:00
|
|
|
if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
|
|
|
|
!mhp->attr.state && mhp->mpl_len <= 2) {
|
|
|
|
fw_opcode = FW_RI_FR_NSMR_TPTE_WR;
|
|
|
|
build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr),
|
|
|
|
mhp, &len16);
|
|
|
|
} else {
|
|
|
|
fw_opcode = FW_RI_FR_NSMR_WR;
|
|
|
|
err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr),
|
|
|
|
mhp, &len16,
|
|
|
|
qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
mhp->attr.state = 1;
|
2015-10-14 00:11:30 +08:00
|
|
|
break;
|
2016-09-16 22:54:52 +08:00
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
case IB_WR_LOCAL_INV:
|
2010-05-21 05:58:10 +08:00
|
|
|
if (wr->send_flags & IB_SEND_FENCE)
|
|
|
|
fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
|
2010-04-22 06:30:06 +08:00
|
|
|
fw_opcode = FW_RI_INV_LSTAG_WR;
|
|
|
|
swsqe->opcode = FW_RI_LOCAL_INV;
|
2016-11-04 03:09:38 +08:00
|
|
|
err = build_inv_stag(wqe, wr, &len16);
|
|
|
|
c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
|
2010-04-22 06:30:06 +08:00
|
|
|
break;
|
|
|
|
default:
|
2017-09-27 15:35:50 +08:00
|
|
|
pr_warn("%s post of type=%d TBD!\n", __func__,
|
|
|
|
wr->opcode);
|
2010-04-22 06:30:06 +08:00
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
if (err) {
|
|
|
|
*bad_wr = wr;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
swsqe->idx = qhp->wq.sq.pidx;
|
|
|
|
swsqe->complete = 0;
|
2014-03-19 20:14:43 +08:00
|
|
|
swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
|
|
|
|
qhp->sq_sig_all;
|
2013-08-06 23:34:35 +08:00
|
|
|
swsqe->flushed = 0;
|
2010-04-22 06:30:06 +08:00
|
|
|
swsqe->wr_id = wr->wr_id;
|
2014-07-15 00:04:54 +08:00
|
|
|
if (c4iw_wr_log) {
|
|
|
|
swsqe->sge_ts = cxgb4_read_sge_timestamp(
|
|
|
|
qhp->rhp->rdev.lldi.ports[0]);
|
2017-11-27 19:44:53 +08:00
|
|
|
swsqe->host_time = ktime_get();
|
2014-07-15 00:04:54 +08:00
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
|
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
|
2017-02-10 06:23:51 +08:00
|
|
|
(unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
|
|
|
|
swsqe->opcode, swsqe->read_len);
|
2010-04-22 06:30:06 +08:00
|
|
|
wr = wr->next;
|
|
|
|
num_wrs--;
|
2010-06-11 03:03:00 +08:00
|
|
|
t4_sq_produce(&qhp->wq, len16);
|
|
|
|
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
if (!qhp->rhp->rdev.status_page->db_off) {
|
2015-09-23 19:49:27 +08:00
|
|
|
t4_ring_sq_db(&qhp->wq, idx, wqe);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
spin_unlock_irqrestore(&qhp->lock, flag);
|
|
|
|
} else {
|
|
|
|
spin_unlock_irqrestore(&qhp->lock, flag);
|
|
|
|
ring_kernel_sq_db(qhp, idx);
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
|
|
|
|
struct ib_recv_wr **bad_wr)
|
|
|
|
{
|
|
|
|
int err = 0;
|
|
|
|
struct c4iw_qp *qhp;
|
2014-04-09 22:38:25 +08:00
|
|
|
union t4_recv_wr *wqe = NULL;
|
2010-04-22 06:30:06 +08:00
|
|
|
u32 num_wrs;
|
|
|
|
u8 len16 = 0;
|
|
|
|
unsigned long flag;
|
|
|
|
u16 idx = 0;
|
|
|
|
|
|
|
|
qhp = to_c4iw_qp(ibqp);
|
|
|
|
spin_lock_irqsave(&qhp->lock, flag);
|
2017-11-28 05:16:32 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the qp has been flushed, then just insert a special
|
|
|
|
* drain cqe.
|
|
|
|
*/
|
|
|
|
if (qhp->wq.flushed) {
|
2010-04-22 06:30:06 +08:00
|
|
|
spin_unlock_irqrestore(&qhp->lock, flag);
|
2017-12-20 06:02:10 +08:00
|
|
|
complete_rq_drain_wrs(qhp, wr);
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
return err;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
num_wrs = t4_rq_avail(&qhp->wq);
|
|
|
|
if (num_wrs == 0) {
|
|
|
|
spin_unlock_irqrestore(&qhp->lock, flag);
|
2016-10-19 05:04:39 +08:00
|
|
|
*bad_wr = wr;
|
2010-04-22 06:30:06 +08:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
while (wr) {
|
|
|
|
if (wr->num_sge > T4_MAX_RECV_SGE) {
|
|
|
|
err = -EINVAL;
|
|
|
|
*bad_wr = wr;
|
|
|
|
break;
|
|
|
|
}
|
2010-06-11 03:03:00 +08:00
|
|
|
wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue +
|
|
|
|
qhp->wq.rq.wq_pidx *
|
|
|
|
T4_EQ_ENTRY_SIZE);
|
2010-04-22 06:30:06 +08:00
|
|
|
if (num_wrs)
|
|
|
|
err = build_rdma_recv(qhp, wqe, wr, &len16);
|
|
|
|
else
|
|
|
|
err = -ENOMEM;
|
|
|
|
if (err) {
|
|
|
|
*bad_wr = wr;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id;
|
2014-07-15 00:04:54 +08:00
|
|
|
if (c4iw_wr_log) {
|
|
|
|
qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts =
|
|
|
|
cxgb4_read_sge_timestamp(
|
|
|
|
qhp->rhp->rdev.lldi.ports[0]);
|
2017-11-27 19:44:53 +08:00
|
|
|
qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_time =
|
|
|
|
ktime_get();
|
2014-07-15 00:04:54 +08:00
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
wqe->recv.opcode = FW_RI_RECV_WR;
|
|
|
|
wqe->recv.r1 = 0;
|
|
|
|
wqe->recv.wrid = qhp->wq.rq.pidx;
|
|
|
|
wqe->recv.r2[0] = 0;
|
|
|
|
wqe->recv.r2[1] = 0;
|
|
|
|
wqe->recv.r2[2] = 0;
|
|
|
|
wqe->recv.len16 = len16;
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("cookie 0x%llx pidx %u\n",
|
2017-02-10 06:23:51 +08:00
|
|
|
(unsigned long long)wr->wr_id, qhp->wq.rq.pidx);
|
2010-06-11 03:03:00 +08:00
|
|
|
t4_rq_produce(&qhp->wq, len16);
|
|
|
|
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
|
2010-04-22 06:30:06 +08:00
|
|
|
wr = wr->next;
|
|
|
|
num_wrs--;
|
|
|
|
}
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
if (!qhp->rhp->rdev.status_page->db_off) {
|
2015-09-23 19:49:27 +08:00
|
|
|
t4_ring_rq_db(&qhp->wq, idx, wqe);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
spin_unlock_irqrestore(&qhp->lock, flag);
|
|
|
|
} else {
|
|
|
|
spin_unlock_irqrestore(&qhp->lock, flag);
|
|
|
|
ring_kernel_rq_db(qhp, idx);
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
|
|
|
|
u8 *ecode)
|
|
|
|
{
|
|
|
|
int status;
|
|
|
|
int tagged;
|
|
|
|
int opcode;
|
|
|
|
int rqtype;
|
|
|
|
int send_inv;
|
|
|
|
|
|
|
|
if (!err_cqe) {
|
|
|
|
*layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
|
|
|
|
*ecode = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
status = CQE_STATUS(err_cqe);
|
|
|
|
opcode = CQE_OPCODE(err_cqe);
|
|
|
|
rqtype = RQ_TYPE(err_cqe);
|
|
|
|
send_inv = (opcode == FW_RI_SEND_WITH_INV) ||
|
|
|
|
(opcode == FW_RI_SEND_WITH_SE_INV);
|
|
|
|
tagged = (opcode == FW_RI_RDMA_WRITE) ||
|
|
|
|
(rqtype && (opcode == FW_RI_READ_RESP));
|
|
|
|
|
|
|
|
switch (status) {
|
|
|
|
case T4_ERR_STAG:
|
|
|
|
if (send_inv) {
|
|
|
|
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
|
|
|
|
*ecode = RDMAP_CANT_INV_STAG;
|
|
|
|
} else {
|
|
|
|
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
|
|
|
|
*ecode = RDMAP_INV_STAG;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case T4_ERR_PDID:
|
|
|
|
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
|
|
|
|
if ((opcode == FW_RI_SEND_WITH_INV) ||
|
|
|
|
(opcode == FW_RI_SEND_WITH_SE_INV))
|
|
|
|
*ecode = RDMAP_CANT_INV_STAG;
|
|
|
|
else
|
|
|
|
*ecode = RDMAP_STAG_NOT_ASSOC;
|
|
|
|
break;
|
|
|
|
case T4_ERR_QPID:
|
|
|
|
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
|
|
|
|
*ecode = RDMAP_STAG_NOT_ASSOC;
|
|
|
|
break;
|
|
|
|
case T4_ERR_ACCESS:
|
|
|
|
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
|
|
|
|
*ecode = RDMAP_ACC_VIOL;
|
|
|
|
break;
|
|
|
|
case T4_ERR_WRAP:
|
|
|
|
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
|
|
|
|
*ecode = RDMAP_TO_WRAP;
|
|
|
|
break;
|
|
|
|
case T4_ERR_BOUND:
|
|
|
|
if (tagged) {
|
|
|
|
*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
|
|
|
|
*ecode = DDPT_BASE_BOUNDS;
|
|
|
|
} else {
|
|
|
|
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
|
|
|
|
*ecode = RDMAP_BASE_BOUNDS;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case T4_ERR_INVALIDATE_SHARED_MR:
|
|
|
|
case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
|
|
|
|
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
|
|
|
|
*ecode = RDMAP_CANT_INV_STAG;
|
|
|
|
break;
|
|
|
|
case T4_ERR_ECC:
|
|
|
|
case T4_ERR_ECC_PSTAG:
|
|
|
|
case T4_ERR_INTERNAL_ERR:
|
|
|
|
*layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
|
|
|
|
*ecode = 0;
|
|
|
|
break;
|
|
|
|
case T4_ERR_OUT_OF_RQE:
|
|
|
|
*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
|
|
|
|
*ecode = DDPU_INV_MSN_NOBUF;
|
|
|
|
break;
|
|
|
|
case T4_ERR_PBL_ADDR_BOUND:
|
|
|
|
*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
|
|
|
|
*ecode = DDPT_BASE_BOUNDS;
|
|
|
|
break;
|
|
|
|
case T4_ERR_CRC:
|
|
|
|
*layer_type = LAYER_MPA|DDP_LLP;
|
|
|
|
*ecode = MPA_CRC_ERR;
|
|
|
|
break;
|
|
|
|
case T4_ERR_MARKER:
|
|
|
|
*layer_type = LAYER_MPA|DDP_LLP;
|
|
|
|
*ecode = MPA_MARKER_ERR;
|
|
|
|
break;
|
|
|
|
case T4_ERR_PDU_LEN_ERR:
|
|
|
|
*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
|
|
|
|
*ecode = DDPU_MSG_TOOBIG;
|
|
|
|
break;
|
|
|
|
case T4_ERR_DDP_VERSION:
|
|
|
|
if (tagged) {
|
|
|
|
*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
|
|
|
|
*ecode = DDPT_INV_VERS;
|
|
|
|
} else {
|
|
|
|
*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
|
|
|
|
*ecode = DDPU_INV_VERS;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case T4_ERR_RDMA_VERSION:
|
|
|
|
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
|
|
|
|
*ecode = RDMAP_INV_VERS;
|
|
|
|
break;
|
|
|
|
case T4_ERR_OPCODE:
|
|
|
|
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
|
|
|
|
*ecode = RDMAP_INV_OPCODE;
|
|
|
|
break;
|
|
|
|
case T4_ERR_DDP_QUEUE_NUM:
|
|
|
|
*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
|
|
|
|
*ecode = DDPU_INV_QN;
|
|
|
|
break;
|
|
|
|
case T4_ERR_MSN:
|
|
|
|
case T4_ERR_MSN_GAP:
|
|
|
|
case T4_ERR_MSN_RANGE:
|
|
|
|
case T4_ERR_IRD_OVERFLOW:
|
|
|
|
*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
|
|
|
|
*ecode = DDPU_INV_MSN_RANGE;
|
|
|
|
break;
|
|
|
|
case T4_ERR_TBIT:
|
|
|
|
*layer_type = LAYER_DDP|DDP_LOCAL_CATA;
|
|
|
|
*ecode = 0;
|
|
|
|
break;
|
|
|
|
case T4_ERR_MO:
|
|
|
|
*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
|
|
|
|
*ecode = DDPU_INV_MO;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
*layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
|
|
|
|
*ecode = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-05-06 05:45:40 +08:00
|
|
|
static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
|
|
|
|
gfp_t gfp)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
|
|
|
struct fw_ri_wr *wqe;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
struct terminate_message *term;
|
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid,
|
2017-02-10 06:23:51 +08:00
|
|
|
qhp->ep->hwtid);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2016-06-10 03:35:15 +08:00
|
|
|
skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
|
|
|
|
if (WARN_ON(!skb))
|
2010-05-06 05:45:40 +08:00
|
|
|
return;
|
2016-06-10 03:35:15 +08:00
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
|
|
|
|
|
2018-04-28 15:31:06 +08:00
|
|
|
wqe = __skb_put_zero(skb, sizeof(*wqe));
|
2014-11-07 12:05:25 +08:00
|
|
|
wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR));
|
2010-04-22 06:30:06 +08:00
|
|
|
wqe->flowid_len16 = cpu_to_be32(
|
2014-11-07 12:05:25 +08:00
|
|
|
FW_WR_FLOWID_V(qhp->ep->hwtid) |
|
|
|
|
FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
|
|
|
|
wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
|
|
|
|
term = (struct terminate_message *)wqe->u.terminate.termmsg;
|
2011-09-25 22:47:44 +08:00
|
|
|
if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
|
|
|
|
term->layer_etype = qhp->attr.layer_etype;
|
|
|
|
term->ecode = qhp->attr.ecode;
|
|
|
|
} else
|
|
|
|
build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
|
2010-05-06 05:45:40 +08:00
|
|
|
c4iw_ofld_send(&qhp->rhp->rdev, skb);
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Assumes qhp lock is held.
|
|
|
|
*/
|
|
|
|
static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
|
2010-09-11 00:15:36 +08:00
|
|
|
struct c4iw_cq *schp)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
|
|
|
int count;
|
2014-08-01 03:35:43 +08:00
|
|
|
int rq_flushed, sq_flushed;
|
2010-09-11 00:15:36 +08:00
|
|
|
unsigned long flag;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2017-11-09 23:21:26 +08:00
|
|
|
/* locking hierarchy: cqs lock first, then qp lock. */
|
2010-09-11 00:15:36 +08:00
|
|
|
spin_lock_irqsave(&rchp->lock, flag);
|
2017-11-09 23:21:26 +08:00
|
|
|
if (schp != rchp)
|
|
|
|
spin_lock(&schp->lock);
|
2010-04-22 06:30:06 +08:00
|
|
|
spin_lock(&qhp->lock);
|
2013-08-06 23:34:35 +08:00
|
|
|
|
|
|
|
if (qhp->wq.flushed) {
|
|
|
|
spin_unlock(&qhp->lock);
|
2017-11-09 23:21:26 +08:00
|
|
|
if (schp != rchp)
|
|
|
|
spin_unlock(&schp->lock);
|
2013-08-06 23:34:35 +08:00
|
|
|
spin_unlock_irqrestore(&rchp->lock, flag);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
qhp->wq.flushed = 1;
|
2017-11-09 23:21:26 +08:00
|
|
|
t4_set_wq_in_error(&qhp->wq);
|
2013-08-06 23:34:35 +08:00
|
|
|
|
2018-04-27 19:11:16 +08:00
|
|
|
c4iw_flush_hw_cq(rchp, qhp);
|
2010-04-22 06:30:06 +08:00
|
|
|
c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
|
2014-08-01 03:35:43 +08:00
|
|
|
rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2013-08-06 23:34:35 +08:00
|
|
|
if (schp != rchp)
|
2018-04-27 19:11:16 +08:00
|
|
|
c4iw_flush_hw_cq(schp, qhp);
|
2014-08-01 03:35:43 +08:00
|
|
|
sq_flushed = c4iw_flush_sq(qhp);
|
2017-11-09 23:21:26 +08:00
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
spin_unlock(&qhp->lock);
|
2017-11-09 23:21:26 +08:00
|
|
|
if (schp != rchp)
|
|
|
|
spin_unlock(&schp->lock);
|
|
|
|
spin_unlock_irqrestore(&rchp->lock, flag);
|
2014-08-01 03:35:43 +08:00
|
|
|
|
|
|
|
if (schp == rchp) {
|
2017-12-01 01:41:56 +08:00
|
|
|
if ((rq_flushed || sq_flushed) &&
|
|
|
|
t4_clear_cq_armed(&rchp->cq)) {
|
2014-08-01 03:35:43 +08:00
|
|
|
spin_lock_irqsave(&rchp->comp_handler_lock, flag);
|
|
|
|
(*rchp->ibcq.comp_handler)(&rchp->ibcq,
|
|
|
|
rchp->ibcq.cq_context);
|
|
|
|
spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
|
|
|
|
}
|
|
|
|
} else {
|
2017-12-01 01:41:56 +08:00
|
|
|
if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) {
|
2014-08-01 03:35:43 +08:00
|
|
|
spin_lock_irqsave(&rchp->comp_handler_lock, flag);
|
|
|
|
(*rchp->ibcq.comp_handler)(&rchp->ibcq,
|
|
|
|
rchp->ibcq.cq_context);
|
|
|
|
spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
|
|
|
|
}
|
2017-12-01 01:41:56 +08:00
|
|
|
if (sq_flushed && t4_clear_cq_armed(&schp->cq)) {
|
2014-08-01 03:35:43 +08:00
|
|
|
spin_lock_irqsave(&schp->comp_handler_lock, flag);
|
|
|
|
(*schp->ibcq.comp_handler)(&schp->ibcq,
|
|
|
|
schp->ibcq.cq_context);
|
|
|
|
spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
|
|
|
|
}
|
2011-10-24 23:50:21 +08:00
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
|
2010-09-11 00:15:36 +08:00
|
|
|
static void flush_qp(struct c4iw_qp *qhp)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
|
|
|
struct c4iw_cq *rchp, *schp;
|
2011-10-24 23:50:21 +08:00
|
|
|
unsigned long flag;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2013-08-06 23:34:35 +08:00
|
|
|
rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
|
|
|
|
schp = to_c4iw_cq(qhp->ibqp.send_cq);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
if (qhp->ibqp.uobject) {
|
2017-11-09 23:21:26 +08:00
|
|
|
t4_set_wq_in_error(&qhp->wq);
|
2010-04-22 06:30:06 +08:00
|
|
|
t4_set_cq_in_error(&rchp->cq);
|
2011-10-24 23:50:21 +08:00
|
|
|
spin_lock_irqsave(&rchp->comp_handler_lock, flag);
|
2011-10-13 16:21:30 +08:00
|
|
|
(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
|
2011-10-24 23:50:21 +08:00
|
|
|
spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
|
2011-10-13 16:21:30 +08:00
|
|
|
if (schp != rchp) {
|
2010-04-22 06:30:06 +08:00
|
|
|
t4_set_cq_in_error(&schp->cq);
|
2011-10-24 23:50:21 +08:00
|
|
|
spin_lock_irqsave(&schp->comp_handler_lock, flag);
|
2011-10-13 16:21:30 +08:00
|
|
|
(*schp->ibcq.comp_handler)(&schp->ibcq,
|
|
|
|
schp->ibcq.cq_context);
|
2011-10-24 23:50:21 +08:00
|
|
|
spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
|
2011-10-13 16:21:30 +08:00
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
return;
|
|
|
|
}
|
2010-09-11 00:15:36 +08:00
|
|
|
__flush_qp(qhp, rchp, schp);
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
|
2010-07-24 03:12:27 +08:00
|
|
|
static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
|
|
|
|
struct c4iw_ep *ep)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
|
|
|
struct fw_ri_wr *wqe;
|
|
|
|
int ret;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid, ep->hwtid);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2016-06-10 03:35:15 +08:00
|
|
|
skb = skb_dequeue(&ep->com.ep_skb_list);
|
|
|
|
if (WARN_ON(!skb))
|
2010-04-22 06:30:06 +08:00
|
|
|
return -ENOMEM;
|
2016-06-10 03:35:15 +08:00
|
|
|
|
2010-07-24 03:12:27 +08:00
|
|
|
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2018-04-28 15:31:06 +08:00
|
|
|
wqe = __skb_put_zero(skb, sizeof(*wqe));
|
2010-04-22 06:30:06 +08:00
|
|
|
wqe->op_compl = cpu_to_be32(
|
2014-11-07 12:05:25 +08:00
|
|
|
FW_WR_OP_V(FW_RI_INIT_WR) |
|
|
|
|
FW_WR_COMPL_F);
|
2010-04-22 06:30:06 +08:00
|
|
|
wqe->flowid_len16 = cpu_to_be32(
|
2014-11-07 12:05:25 +08:00
|
|
|
FW_WR_FLOWID_V(ep->hwtid) |
|
|
|
|
FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
|
2017-09-27 04:12:16 +08:00
|
|
|
wqe->cookie = (uintptr_t)ep->com.wr_waitp;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
wqe->u.fini.type = FW_RI_TYPE_FINI;
|
|
|
|
|
iw_cxgb4: add referencing to wait objects
For messages sent from the host to fw that solicit a reply from fw,
the c4iw_wr_wait struct pointer is passed in the host->fw message, and
included in the fw->host fw6_msg reply. This allows the sender to wait
until the reply is received, and the code processing the ingress reply
to wake up the sender.
If c4iw_wait_for_reply() times out, however, we need to keep the
c4iw_wr_wait object around in case the reply eventually does arrive.
Otherwise we have touch-after-free bugs in the wake_up paths.
This was hit due to a bad kernel driver that blocked ingress processing
of cxgb4 for a long time, causing iw_cxgb4 timeouts, but eventually
resuming ingress processing and thus hitting the touch-after-free bug.
So I want to fix iw_cxgb4 such that we'll at least keep the wait object
around until the reply comes. If it never comes we leak a small amount of
memory, but if it does come late, we won't potentially crash the system.
So add a kref struct in the c4iw_wr_wait struct, and take a reference
before sending a message to FW that will generate a FW6 reply. And remove
the reference (and potentially free the wait object) when the reply
is processed.
The ep code also uses the wr_wait for non FW6 CPL messages and doesn't
embed the c4iw_wr_wait object in the message sent to firmware. So for
those cases we add c4iw_wake_up_noref().
The mr/mw, cq, and qp object create/destroy paths do need this reference
logic. For these paths, c4iw_ref_send_wait() is introduced to take the
wr_wait reference, send the msg to fw, and then wait for the reply.
So going forward, iw_cxgb4 either uses c4iw_ofld_send(),
c4iw_wait_for_reply() and c4iw_wake_up_noref() like is done in the some
of the endpoint logic, or c4iw_ref_send_wait() and c4iw_wake_up_deref()
(formerly c4iw_wake_up()) when sending messages with the c4iw_wr_wait
object pointer embedded in the message and resulting FW6 reply.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2017-09-27 04:13:17 +08:00
|
|
|
ret = c4iw_ref_send_wait(&rhp->rdev, skb, ep->com.wr_waitp,
|
|
|
|
qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
|
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("ret %d\n", ret);
|
2010-04-22 06:30:06 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
|
|
|
|
{
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("p2p_type = %d\n", p2p_type);
|
2010-04-22 06:30:06 +08:00
|
|
|
memset(&init->u, 0, sizeof init->u);
|
|
|
|
switch (p2p_type) {
|
|
|
|
case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
|
|
|
|
init->u.write.opcode = FW_RI_RDMA_WRITE_WR;
|
|
|
|
init->u.write.stag_sink = cpu_to_be32(1);
|
|
|
|
init->u.write.to_sink = cpu_to_be64(1);
|
|
|
|
init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD;
|
|
|
|
init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write +
|
|
|
|
sizeof(struct fw_ri_immd),
|
|
|
|
16);
|
|
|
|
break;
|
|
|
|
case FW_RI_INIT_P2PTYPE_READ_REQ:
|
|
|
|
init->u.write.opcode = FW_RI_RDMA_READ_WR;
|
|
|
|
init->u.read.stag_src = cpu_to_be32(1);
|
|
|
|
init->u.read.to_src_lo = cpu_to_be32(1);
|
|
|
|
init->u.read.stag_sink = cpu_to_be32(1);
|
|
|
|
init->u.read.to_sink_lo = cpu_to_be32(1);
|
|
|
|
init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
|
|
|
|
{
|
|
|
|
struct fw_ri_wr *wqe;
|
|
|
|
int ret;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("qhp %p qid 0x%x tid %u ird %u ord %u\n", qhp,
|
2017-02-10 06:23:51 +08:00
|
|
|
qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2010-07-21 10:44:56 +08:00
|
|
|
skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
|
2014-07-15 00:04:52 +08:00
|
|
|
if (!skb) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
ret = alloc_ird(rhp, qhp->attr.max_ird);
|
|
|
|
if (ret) {
|
|
|
|
qhp->attr.max_ird = 0;
|
|
|
|
kfree_skb(skb);
|
|
|
|
goto out;
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
|
|
|
|
|
2018-04-28 15:31:06 +08:00
|
|
|
wqe = __skb_put_zero(skb, sizeof(*wqe));
|
2010-04-22 06:30:06 +08:00
|
|
|
wqe->op_compl = cpu_to_be32(
|
2014-11-07 12:05:25 +08:00
|
|
|
FW_WR_OP_V(FW_RI_INIT_WR) |
|
|
|
|
FW_WR_COMPL_F);
|
2010-04-22 06:30:06 +08:00
|
|
|
wqe->flowid_len16 = cpu_to_be32(
|
2014-11-07 12:05:25 +08:00
|
|
|
FW_WR_FLOWID_V(qhp->ep->hwtid) |
|
|
|
|
FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2017-09-27 04:12:16 +08:00
|
|
|
wqe->cookie = (uintptr_t)qhp->ep->com.wr_waitp;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
wqe->u.init.type = FW_RI_TYPE_INIT;
|
|
|
|
wqe->u.init.mpareqbit_p2ptype =
|
2015-01-16 11:54:48 +08:00
|
|
|
FW_RI_WR_MPAREQBIT_V(qhp->attr.mpa_attr.initiator) |
|
|
|
|
FW_RI_WR_P2PTYPE_V(qhp->attr.mpa_attr.p2p_type);
|
2010-04-22 06:30:06 +08:00
|
|
|
wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE;
|
|
|
|
if (qhp->attr.mpa_attr.recv_marker_enabled)
|
|
|
|
wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE;
|
|
|
|
if (qhp->attr.mpa_attr.xmit_marker_enabled)
|
|
|
|
wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE;
|
|
|
|
if (qhp->attr.mpa_attr.crc_enabled)
|
|
|
|
wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE;
|
|
|
|
|
|
|
|
wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE |
|
|
|
|
FW_RI_QP_RDMA_WRITE_ENABLE |
|
|
|
|
FW_RI_QP_BIND_ENABLE;
|
|
|
|
if (!qhp->ibqp.uobject)
|
|
|
|
wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE |
|
|
|
|
FW_RI_QP_STAG0_ENABLE;
|
|
|
|
wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq));
|
|
|
|
wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
|
|
|
|
wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
|
|
|
|
wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
|
|
|
|
wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
|
|
|
|
wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
|
|
|
|
wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
|
|
|
|
wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
|
|
|
|
wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
|
|
|
|
wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
|
|
|
|
wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
|
|
|
|
wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
|
|
|
|
wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
|
|
|
|
rhp->rdev.lldi.vr->rq.start);
|
|
|
|
if (qhp->attr.mpa_attr.initiator)
|
|
|
|
build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
|
|
|
|
|
iw_cxgb4: add referencing to wait objects
For messages sent from the host to fw that solicit a reply from fw,
the c4iw_wr_wait struct pointer is passed in the host->fw message, and
included in the fw->host fw6_msg reply. This allows the sender to wait
until the reply is received, and the code processing the ingress reply
to wake up the sender.
If c4iw_wait_for_reply() times out, however, we need to keep the
c4iw_wr_wait object around in case the reply eventually does arrive.
Otherwise we have touch-after-free bugs in the wake_up paths.
This was hit due to a bad kernel driver that blocked ingress processing
of cxgb4 for a long time, causing iw_cxgb4 timeouts, but eventually
resuming ingress processing and thus hitting the touch-after-free bug.
So I want to fix iw_cxgb4 such that we'll at least keep the wait object
around until the reply comes. If it never comes we leak a small amount of
memory, but if it does come late, we won't potentially crash the system.
So add a kref struct in the c4iw_wr_wait struct, and take a reference
before sending a message to FW that will generate a FW6 reply. And remove
the reference (and potentially free the wait object) when the reply
is processed.
The ep code also uses the wr_wait for non FW6 CPL messages and doesn't
embed the c4iw_wr_wait object in the message sent to firmware. So for
those cases we add c4iw_wake_up_noref().
The mr/mw, cq, and qp object create/destroy paths do need this reference
logic. For these paths, c4iw_ref_send_wait() is introduced to take the
wr_wait reference, send the msg to fw, and then wait for the reply.
So going forward, iw_cxgb4 either uses c4iw_ofld_send(),
c4iw_wait_for_reply() and c4iw_wake_up_noref() like is done in the some
of the endpoint logic, or c4iw_ref_send_wait() and c4iw_wake_up_deref()
(formerly c4iw_wake_up()) when sending messages with the c4iw_wr_wait
object pointer embedded in the message and resulting FW6 reply.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2017-09-27 04:13:17 +08:00
|
|
|
ret = c4iw_ref_send_wait(&rhp->rdev, skb, qhp->ep->com.wr_waitp,
|
|
|
|
qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
|
2014-07-15 00:04:52 +08:00
|
|
|
if (!ret)
|
|
|
|
goto out;
|
iw_cxgb4: add referencing to wait objects
For messages sent from the host to fw that solicit a reply from fw,
the c4iw_wr_wait struct pointer is passed in the host->fw message, and
included in the fw->host fw6_msg reply. This allows the sender to wait
until the reply is received, and the code processing the ingress reply
to wake up the sender.
If c4iw_wait_for_reply() times out, however, we need to keep the
c4iw_wr_wait object around in case the reply eventually does arrive.
Otherwise we have touch-after-free bugs in the wake_up paths.
This was hit due to a bad kernel driver that blocked ingress processing
of cxgb4 for a long time, causing iw_cxgb4 timeouts, but eventually
resuming ingress processing and thus hitting the touch-after-free bug.
So I want to fix iw_cxgb4 such that we'll at least keep the wait object
around until the reply comes. If it never comes we leak a small amount of
memory, but if it does come late, we won't potentially crash the system.
So add a kref struct in the c4iw_wr_wait struct, and take a reference
before sending a message to FW that will generate a FW6 reply. And remove
the reference (and potentially free the wait object) when the reply
is processed.
The ep code also uses the wr_wait for non FW6 CPL messages and doesn't
embed the c4iw_wr_wait object in the message sent to firmware. So for
those cases we add c4iw_wake_up_noref().
The mr/mw, cq, and qp object create/destroy paths do need this reference
logic. For these paths, c4iw_ref_send_wait() is introduced to take the
wr_wait reference, send the msg to fw, and then wait for the reply.
So going forward, iw_cxgb4 either uses c4iw_ofld_send(),
c4iw_wait_for_reply() and c4iw_wake_up_noref() like is done in the some
of the endpoint logic, or c4iw_ref_send_wait() and c4iw_wake_up_deref()
(formerly c4iw_wake_up()) when sending messages with the c4iw_wr_wait
object pointer embedded in the message and resulting FW6 reply.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2017-09-27 04:13:17 +08:00
|
|
|
|
2014-07-15 00:04:52 +08:00
|
|
|
free_ird(rhp, qhp->attr.max_ird);
|
2010-04-22 06:30:06 +08:00
|
|
|
out:
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("ret %d\n", ret);
|
2010-04-22 06:30:06 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
|
|
|
|
enum c4iw_qp_attr_mask mask,
|
|
|
|
struct c4iw_qp_attributes *attrs,
|
|
|
|
int internal)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
struct c4iw_qp_attributes newattr = qhp->attr;
|
|
|
|
int disconnect = 0;
|
|
|
|
int terminate = 0;
|
|
|
|
int abort = 0;
|
|
|
|
int free = 0;
|
|
|
|
struct c4iw_ep *ep = NULL;
|
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n",
|
2017-02-10 06:23:51 +08:00
|
|
|
qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
|
|
|
|
(mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2010-09-11 00:15:36 +08:00
|
|
|
mutex_lock(&qhp->mutex);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
/* Process attr changes if in IDLE */
|
|
|
|
if (mask & C4IW_QP_ATTR_VALID_MODIFY) {
|
|
|
|
if (qhp->attr.state != C4IW_QP_STATE_IDLE) {
|
|
|
|
ret = -EIO;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ)
|
|
|
|
newattr.enable_rdma_read = attrs->enable_rdma_read;
|
|
|
|
if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE)
|
|
|
|
newattr.enable_rdma_write = attrs->enable_rdma_write;
|
|
|
|
if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND)
|
|
|
|
newattr.enable_bind = attrs->enable_bind;
|
|
|
|
if (mask & C4IW_QP_ATTR_MAX_ORD) {
|
2010-05-06 05:45:40 +08:00
|
|
|
if (attrs->max_ord > c4iw_max_read_depth) {
|
2010-04-22 06:30:06 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
newattr.max_ord = attrs->max_ord;
|
|
|
|
}
|
|
|
|
if (mask & C4IW_QP_ATTR_MAX_IRD) {
|
2014-07-15 00:04:52 +08:00
|
|
|
if (attrs->max_ird > cur_max_read_depth(rhp)) {
|
2010-04-22 06:30:06 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
newattr.max_ird = attrs->max_ird;
|
|
|
|
}
|
|
|
|
qhp->attr = newattr;
|
|
|
|
}
|
|
|
|
|
2012-05-18 17:59:28 +08:00
|
|
|
if (mask & C4IW_QP_ATTR_SQ_DB) {
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc);
|
2012-05-18 17:59:28 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (mask & C4IW_QP_ATTR_RQ_DB) {
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc);
|
2012-05-18 17:59:28 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
|
|
|
|
goto out;
|
|
|
|
if (qhp->attr.state == attrs->next_state)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
switch (qhp->attr.state) {
|
|
|
|
case C4IW_QP_STATE_IDLE:
|
|
|
|
switch (attrs->next_state) {
|
|
|
|
case C4IW_QP_STATE_RTS:
|
|
|
|
if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
qhp->attr.mpa_attr = attrs->mpa_attr;
|
|
|
|
qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
|
|
|
|
qhp->ep = qhp->attr.llp_stream_handle;
|
2010-09-11 00:15:36 +08:00
|
|
|
set_state(qhp, C4IW_QP_STATE_RTS);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Ref the endpoint here and deref when we
|
|
|
|
* disassociate the endpoint from the QP. This
|
|
|
|
* happens in CLOSING->IDLE transition or *->ERROR
|
|
|
|
* transition.
|
|
|
|
*/
|
|
|
|
c4iw_get_ep(&qhp->ep->com);
|
|
|
|
ret = rdma_init(rhp, qhp);
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
break;
|
|
|
|
case C4IW_QP_STATE_ERROR:
|
2010-09-11 00:15:36 +08:00
|
|
|
set_state(qhp, C4IW_QP_STATE_ERROR);
|
|
|
|
flush_qp(qhp);
|
2010-04-22 06:30:06 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case C4IW_QP_STATE_RTS:
|
|
|
|
switch (attrs->next_state) {
|
|
|
|
case C4IW_QP_STATE_CLOSING:
|
RDMA/cxgb4: SQ flush fix
There is a race when moving a QP from RTS->CLOSING where a SQ work
request could be posted after the FW receives the RDMA_RI/FINI WR.
The SQ work request will never get processed, and should be completed
with FLUSHED status. Function c4iw_flush_sq(), however was dropping
the oldest SQ work request when in CLOSING or IDLE states, instead of
completing the pending work request. If that oldest pending work
request was actually complete and has a CQE in the CQ, then when that
CQE is proceessed in poll_cq, we'll BUG_ON() due to the inconsistent
SQ/CQ state.
This is a very small timing hole and has only been hit once so far.
The fix is two-fold:
1) c4iw_flush_sq() MUST always flush all non-completed WRs with FLUSHED
status regardless of the QP state.
2) In c4iw_modify_rc_qp(), always set the "in error" bit on the queue
before moving the state out of RTS. This ensures that the state
transition will not happen while another thread is in
post_rc_send(), because set_state() and post_rc_send() both aquire
the qp spinlock. Also, once we transition the state out of RTS,
subsequent calls to post_rc_send() will fail because the "in error"
bit is set. I don't think this fully closes the race where the FW
can get a FINI followed a SQ work request being posted (because
they are posted to differente EQs), but the #1 fix will handle the
issue by flushing the SQ work request.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2014-04-09 22:38:26 +08:00
|
|
|
t4_set_wq_in_error(&qhp->wq);
|
2010-09-11 00:15:36 +08:00
|
|
|
set_state(qhp, C4IW_QP_STATE_CLOSING);
|
2010-07-24 03:12:27 +08:00
|
|
|
ep = qhp->ep;
|
2010-04-22 06:30:06 +08:00
|
|
|
if (!internal) {
|
|
|
|
abort = 0;
|
|
|
|
disconnect = 1;
|
2010-09-11 00:15:36 +08:00
|
|
|
c4iw_get_ep(&qhp->ep->com);
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
2010-07-24 03:12:27 +08:00
|
|
|
ret = rdma_fini(rhp, qhp, ep);
|
2011-06-15 04:59:27 +08:00
|
|
|
if (ret)
|
2010-04-22 06:30:06 +08:00
|
|
|
goto err;
|
|
|
|
break;
|
|
|
|
case C4IW_QP_STATE_TERMINATE:
|
RDMA/cxgb4: SQ flush fix
There is a race when moving a QP from RTS->CLOSING where a SQ work
request could be posted after the FW receives the RDMA_RI/FINI WR.
The SQ work request will never get processed, and should be completed
with FLUSHED status. Function c4iw_flush_sq(), however was dropping
the oldest SQ work request when in CLOSING or IDLE states, instead of
completing the pending work request. If that oldest pending work
request was actually complete and has a CQE in the CQ, then when that
CQE is proceessed in poll_cq, we'll BUG_ON() due to the inconsistent
SQ/CQ state.
This is a very small timing hole and has only been hit once so far.
The fix is two-fold:
1) c4iw_flush_sq() MUST always flush all non-completed WRs with FLUSHED
status regardless of the QP state.
2) In c4iw_modify_rc_qp(), always set the "in error" bit on the queue
before moving the state out of RTS. This ensures that the state
transition will not happen while another thread is in
post_rc_send(), because set_state() and post_rc_send() both aquire
the qp spinlock. Also, once we transition the state out of RTS,
subsequent calls to post_rc_send() will fail because the "in error"
bit is set. I don't think this fully closes the race where the FW
can get a FINI followed a SQ work request being posted (because
they are posted to differente EQs), but the #1 fix will handle the
issue by flushing the SQ work request.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2014-04-09 22:38:26 +08:00
|
|
|
t4_set_wq_in_error(&qhp->wq);
|
2010-09-11 00:15:36 +08:00
|
|
|
set_state(qhp, C4IW_QP_STATE_TERMINATE);
|
2011-09-25 22:47:44 +08:00
|
|
|
qhp->attr.layer_etype = attrs->layer_etype;
|
|
|
|
qhp->attr.ecode = attrs->ecode;
|
2010-05-06 05:45:40 +08:00
|
|
|
ep = qhp->ep;
|
2014-04-25 03:31:53 +08:00
|
|
|
if (!internal) {
|
|
|
|
c4iw_get_ep(&qhp->ep->com);
|
2010-09-11 00:15:09 +08:00
|
|
|
terminate = 1;
|
2014-04-25 03:31:53 +08:00
|
|
|
disconnect = 1;
|
|
|
|
} else {
|
|
|
|
terminate = qhp->attr.send_term;
|
2013-08-06 23:34:40 +08:00
|
|
|
ret = rdma_fini(rhp, qhp, ep);
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
break;
|
|
|
|
case C4IW_QP_STATE_ERROR:
|
2013-08-06 23:34:35 +08:00
|
|
|
t4_set_wq_in_error(&qhp->wq);
|
RDMA/cxgb4: SQ flush fix
There is a race when moving a QP from RTS->CLOSING where a SQ work
request could be posted after the FW receives the RDMA_RI/FINI WR.
The SQ work request will never get processed, and should be completed
with FLUSHED status. Function c4iw_flush_sq(), however was dropping
the oldest SQ work request when in CLOSING or IDLE states, instead of
completing the pending work request. If that oldest pending work
request was actually complete and has a CQE in the CQ, then when that
CQE is proceessed in poll_cq, we'll BUG_ON() due to the inconsistent
SQ/CQ state.
This is a very small timing hole and has only been hit once so far.
The fix is two-fold:
1) c4iw_flush_sq() MUST always flush all non-completed WRs with FLUSHED
status regardless of the QP state.
2) In c4iw_modify_rc_qp(), always set the "in error" bit on the queue
before moving the state out of RTS. This ensures that the state
transition will not happen while another thread is in
post_rc_send(), because set_state() and post_rc_send() both aquire
the qp spinlock. Also, once we transition the state out of RTS,
subsequent calls to post_rc_send() will fail because the "in error"
bit is set. I don't think this fully closes the race where the FW
can get a FINI followed a SQ work request being posted (because
they are posted to differente EQs), but the #1 fix will handle the
issue by flushing the SQ work request.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2014-04-09 22:38:26 +08:00
|
|
|
set_state(qhp, C4IW_QP_STATE_ERROR);
|
2010-04-22 06:30:06 +08:00
|
|
|
if (!internal) {
|
|
|
|
abort = 1;
|
|
|
|
disconnect = 1;
|
|
|
|
ep = qhp->ep;
|
2010-09-11 00:15:36 +08:00
|
|
|
c4iw_get_ep(&qhp->ep->com);
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
goto err;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case C4IW_QP_STATE_CLOSING:
|
iw_cxgb4: refactor sq/rq drain logic
With the addition of the IB/Core drain API, iw_cxgb4 supported drain
by watching the CQs when the QP was out of RTS and signalling "drain
complete" when the last CQE is polled. This, however, doesn't fully
support the drain semantics. Namely, the drain logic is supposed to signal
"drain complete" only when the application has _processed_ the last CQE,
not just removed them from the CQ. Thus a small timing hole exists that
can cause touch after free type bugs in applications using the drain API
(nvmf, iSER, for example). So iw_cxgb4 needs a better solution.
The iWARP Verbs spec mandates that "_at some point_ after the QP is
moved to ERROR", the iWARP driver MUST synchronously fail post_send and
post_recv calls. iw_cxgb4 was currently not allowing any posts once the
QP is in ERROR. This was in part due to the fact that the HW queues for
the QP in ERROR state are disabled at this point, so there wasn't much
else to do but fail the post operation synchronously. This restriction
is what drove the first drain implementation in iw_cxgb4 that has the
above mentioned flaw.
This patch changes iw_cxgb4 to allow post_send and post_recv WRs after
the QP is moved to ERROR state for kernel mode users, thus still adhering
to the Verbs spec for user mode users, but allowing flush WRs for kernel
users. Since the HW queues are disabled, we just synthesize a CQE for
this post, queue it to the SW CQ, and then call the CQ event handler.
This enables proper drain operations for the various storage applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2016-12-22 23:04:59 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Allow kernel users to move to ERROR for qp draining.
|
|
|
|
*/
|
|
|
|
if (!internal && (qhp->ibqp.uobject || attrs->next_state !=
|
|
|
|
C4IW_QP_STATE_ERROR)) {
|
2010-04-22 06:30:06 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
switch (attrs->next_state) {
|
|
|
|
case C4IW_QP_STATE_IDLE:
|
2010-09-11 00:15:36 +08:00
|
|
|
flush_qp(qhp);
|
|
|
|
set_state(qhp, C4IW_QP_STATE_IDLE);
|
2010-04-22 06:30:06 +08:00
|
|
|
qhp->attr.llp_stream_handle = NULL;
|
|
|
|
c4iw_put_ep(&qhp->ep->com);
|
|
|
|
qhp->ep = NULL;
|
|
|
|
wake_up(&qhp->wait);
|
|
|
|
break;
|
|
|
|
case C4IW_QP_STATE_ERROR:
|
|
|
|
goto err;
|
|
|
|
default:
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case C4IW_QP_STATE_ERROR:
|
|
|
|
if (attrs->next_state != C4IW_QP_STATE_IDLE) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
2010-09-11 00:15:36 +08:00
|
|
|
set_state(qhp, C4IW_QP_STATE_IDLE);
|
2010-04-22 06:30:06 +08:00
|
|
|
break;
|
|
|
|
case C4IW_QP_STATE_TERMINATE:
|
|
|
|
if (!internal) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
goto err;
|
|
|
|
break;
|
|
|
|
default:
|
2017-02-10 06:23:50 +08:00
|
|
|
pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
|
2010-04-22 06:30:06 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
goto out;
|
|
|
|
err:
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("disassociating ep %p qpid 0x%x\n", qhp->ep,
|
2017-02-10 06:23:51 +08:00
|
|
|
qhp->wq.sq.qid);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
/* disassociate the LLP connection */
|
|
|
|
qhp->attr.llp_stream_handle = NULL;
|
2010-09-11 00:14:48 +08:00
|
|
|
if (!ep)
|
|
|
|
ep = qhp->ep;
|
2010-04-22 06:30:06 +08:00
|
|
|
qhp->ep = NULL;
|
2010-09-11 00:15:36 +08:00
|
|
|
set_state(qhp, C4IW_QP_STATE_ERROR);
|
2010-04-22 06:30:06 +08:00
|
|
|
free = 1;
|
2013-01-07 21:11:51 +08:00
|
|
|
abort = 1;
|
2010-09-11 00:15:36 +08:00
|
|
|
flush_qp(qhp);
|
2014-11-21 23:36:36 +08:00
|
|
|
wake_up(&qhp->wait);
|
2010-04-22 06:30:06 +08:00
|
|
|
out:
|
2010-09-11 00:15:36 +08:00
|
|
|
mutex_unlock(&qhp->mutex);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
if (terminate)
|
2010-05-06 05:45:40 +08:00
|
|
|
post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If disconnect is 1, then we need to initiate a disconnect
|
|
|
|
* on the EP. This can be a normal close (RTS->CLOSING) or
|
|
|
|
* an abnormal close (RTS/CLOSING->ERROR).
|
|
|
|
*/
|
|
|
|
if (disconnect) {
|
2010-05-06 05:45:40 +08:00
|
|
|
c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC :
|
|
|
|
GFP_KERNEL);
|
2010-04-22 06:30:06 +08:00
|
|
|
c4iw_put_ep(&ep->com);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If free is 1, then we've disassociated the EP from the QP
|
|
|
|
* and we need to dereference the EP.
|
|
|
|
*/
|
|
|
|
if (free)
|
|
|
|
c4iw_put_ep(&ep->com);
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("exit state %d\n", qhp->attr.state);
|
2010-04-22 06:30:06 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int c4iw_destroy_qp(struct ib_qp *ib_qp)
|
|
|
|
{
|
|
|
|
struct c4iw_dev *rhp;
|
|
|
|
struct c4iw_qp *qhp;
|
|
|
|
struct c4iw_qp_attributes attrs;
|
|
|
|
|
|
|
|
qhp = to_c4iw_qp(ib_qp);
|
|
|
|
rhp = qhp->rhp;
|
|
|
|
|
|
|
|
attrs.next_state = C4IW_QP_STATE_ERROR;
|
2011-09-25 22:47:44 +08:00
|
|
|
if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
|
|
|
|
c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
|
|
|
|
else
|
|
|
|
c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
|
2010-04-22 06:30:06 +08:00
|
|
|
wait_event(qhp->wait, !qhp->ep);
|
|
|
|
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
spin_lock_irq(&rhp->lock);
|
|
|
|
if (!list_empty(&qhp->db_fc_entry))
|
|
|
|
list_del_init(&qhp->db_fc_entry);
|
|
|
|
spin_unlock_irq(&rhp->lock);
|
2014-07-15 00:04:52 +08:00
|
|
|
free_ird(rhp, qhp->attr.max_ird);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
|
2016-07-30 02:00:54 +08:00
|
|
|
c4iw_qp_rem_ref(ib_qp);
|
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("ib_qp %p qpid 0x%0x\n", ib_qp, qhp->wq.sq.qid);
|
2010-04-22 06:30:06 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
|
|
|
|
struct ib_udata *udata)
|
|
|
|
{
|
|
|
|
struct c4iw_dev *rhp;
|
|
|
|
struct c4iw_qp *qhp;
|
|
|
|
struct c4iw_pd *php;
|
|
|
|
struct c4iw_cq *schp;
|
|
|
|
struct c4iw_cq *rchp;
|
|
|
|
struct c4iw_create_qp_resp uresp;
|
2013-10-19 17:14:12 +08:00
|
|
|
unsigned int sqsize, rqsize;
|
2010-04-22 06:30:06 +08:00
|
|
|
struct c4iw_ucontext *ucontext;
|
|
|
|
int ret;
|
2016-02-05 14:13:28 +08:00
|
|
|
struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
|
|
|
|
struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("ib_pd %p\n", pd);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
if (attrs->qp_type != IB_QPT_RC)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
php = to_c4iw_pd(pd);
|
|
|
|
rhp = php->rhp;
|
|
|
|
schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid);
|
|
|
|
rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid);
|
|
|
|
if (!schp || !rchp)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
2014-07-21 23:25:15 +08:00
|
|
|
if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
|
2010-04-22 06:30:06 +08:00
|
|
|
return ERR_PTR(-E2BIG);
|
2014-07-21 23:25:15 +08:00
|
|
|
rqsize = attrs->cap.max_recv_wr + 1;
|
|
|
|
if (rqsize < 8)
|
|
|
|
rqsize = 8;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2014-07-21 23:25:15 +08:00
|
|
|
if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
|
2010-04-22 06:30:06 +08:00
|
|
|
return ERR_PTR(-E2BIG);
|
2014-07-21 23:25:15 +08:00
|
|
|
sqsize = attrs->cap.max_send_wr + 1;
|
|
|
|
if (sqsize < 8)
|
|
|
|
sqsize = 8;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
|
|
|
|
|
|
|
|
qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
|
|
|
|
if (!qhp)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
2017-09-27 04:11:36 +08:00
|
|
|
|
iw_cxgb4: add referencing to wait objects
For messages sent from the host to fw that solicit a reply from fw,
the c4iw_wr_wait struct pointer is passed in the host->fw message, and
included in the fw->host fw6_msg reply. This allows the sender to wait
until the reply is received, and the code processing the ingress reply
to wake up the sender.
If c4iw_wait_for_reply() times out, however, we need to keep the
c4iw_wr_wait object around in case the reply eventually does arrive.
Otherwise we have touch-after-free bugs in the wake_up paths.
This was hit due to a bad kernel driver that blocked ingress processing
of cxgb4 for a long time, causing iw_cxgb4 timeouts, but eventually
resuming ingress processing and thus hitting the touch-after-free bug.
So I want to fix iw_cxgb4 such that we'll at least keep the wait object
around until the reply comes. If it never comes we leak a small amount of
memory, but if it does come late, we won't potentially crash the system.
So add a kref struct in the c4iw_wr_wait struct, and take a reference
before sending a message to FW that will generate a FW6 reply. And remove
the reference (and potentially free the wait object) when the reply
is processed.
The ep code also uses the wr_wait for non FW6 CPL messages and doesn't
embed the c4iw_wr_wait object in the message sent to firmware. So for
those cases we add c4iw_wake_up_noref().
The mr/mw, cq, and qp object create/destroy paths do need this reference
logic. For these paths, c4iw_ref_send_wait() is introduced to take the
wr_wait reference, send the msg to fw, and then wait for the reply.
So going forward, iw_cxgb4 either uses c4iw_ofld_send(),
c4iw_wait_for_reply() and c4iw_wake_up_noref() like is done in the some
of the endpoint logic, or c4iw_ref_send_wait() and c4iw_wake_up_deref()
(formerly c4iw_wake_up()) when sending messages with the c4iw_wr_wait
object pointer embedded in the message and resulting FW6 reply.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2017-09-27 04:13:17 +08:00
|
|
|
qhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
|
2017-09-27 04:11:36 +08:00
|
|
|
if (!qhp->wr_waitp) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err_free_qhp;
|
|
|
|
}
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
qhp->wq.sq.size = sqsize;
|
2014-07-21 23:25:15 +08:00
|
|
|
qhp->wq.sq.memsize =
|
|
|
|
(sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
|
|
|
|
sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64);
|
2013-08-06 23:34:35 +08:00
|
|
|
qhp->wq.sq.flush_cidx = -1;
|
2010-04-22 06:30:06 +08:00
|
|
|
qhp->wq.rq.size = rqsize;
|
2014-07-21 23:25:15 +08:00
|
|
|
qhp->wq.rq.memsize =
|
|
|
|
(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
|
|
|
|
sizeof(*qhp->wq.rq.queue);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
if (ucontext) {
|
|
|
|
qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
|
|
|
|
qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
|
2017-09-27 04:11:36 +08:00
|
|
|
ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
|
|
|
|
qhp->wr_waitp);
|
2010-04-22 06:30:06 +08:00
|
|
|
if (ret)
|
2017-09-27 04:11:36 +08:00
|
|
|
goto err_free_wr_wait;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
attrs->cap.max_recv_wr = rqsize - 1;
|
|
|
|
attrs->cap.max_send_wr = sqsize - 1;
|
|
|
|
attrs->cap.max_inline_data = T4_MAX_SEND_INLINE;
|
|
|
|
|
|
|
|
qhp->rhp = rhp;
|
|
|
|
qhp->attr.pd = php->pdid;
|
|
|
|
qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
|
|
|
|
qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
|
|
|
|
qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
|
|
|
|
qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
|
|
|
|
qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
|
|
|
|
qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
|
|
|
|
qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
|
|
|
|
qhp->attr.state = C4IW_QP_STATE_IDLE;
|
|
|
|
qhp->attr.next_state = C4IW_QP_STATE_IDLE;
|
|
|
|
qhp->attr.enable_rdma_read = 1;
|
|
|
|
qhp->attr.enable_rdma_write = 1;
|
|
|
|
qhp->attr.enable_bind = 1;
|
2014-07-15 00:04:52 +08:00
|
|
|
qhp->attr.max_ord = 0;
|
|
|
|
qhp->attr.max_ird = 0;
|
2014-03-19 20:14:43 +08:00
|
|
|
qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
|
2010-04-22 06:30:06 +08:00
|
|
|
spin_lock_init(&qhp->lock);
|
2010-09-11 00:15:36 +08:00
|
|
|
mutex_init(&qhp->mutex);
|
2010-04-22 06:30:06 +08:00
|
|
|
init_waitqueue_head(&qhp->wait);
|
2016-07-30 02:00:54 +08:00
|
|
|
kref_init(&qhp->kref);
|
2016-12-22 23:40:36 +08:00
|
|
|
INIT_WORK(&qhp->free_work, free_qp_work);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
|
2010-04-22 06:30:06 +08:00
|
|
|
if (ret)
|
2017-09-27 04:11:36 +08:00
|
|
|
goto err_destroy_qp;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2017-10-30 03:34:35 +08:00
|
|
|
if (udata && ucontext) {
|
2016-02-05 14:13:28 +08:00
|
|
|
sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL);
|
|
|
|
if (!sq_key_mm) {
|
2010-04-22 06:30:06 +08:00
|
|
|
ret = -ENOMEM;
|
2017-09-27 04:11:36 +08:00
|
|
|
goto err_remove_handle;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
2016-02-05 14:13:28 +08:00
|
|
|
rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
|
|
|
|
if (!rq_key_mm) {
|
2010-04-22 06:30:06 +08:00
|
|
|
ret = -ENOMEM;
|
2017-09-27 04:11:36 +08:00
|
|
|
goto err_free_sq_key;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
2016-02-05 14:13:28 +08:00
|
|
|
sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
|
|
|
|
if (!sq_db_key_mm) {
|
2010-04-22 06:30:06 +08:00
|
|
|
ret = -ENOMEM;
|
2017-09-27 04:11:36 +08:00
|
|
|
goto err_free_rq_key;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
2016-02-05 14:13:28 +08:00
|
|
|
rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
|
|
|
|
if (!rq_db_key_mm) {
|
2010-04-22 06:30:06 +08:00
|
|
|
ret = -ENOMEM;
|
2017-09-27 04:11:36 +08:00
|
|
|
goto err_free_sq_db_key;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
2010-09-14 00:23:57 +08:00
|
|
|
if (t4_sq_onchip(&qhp->wq.sq)) {
|
2016-02-05 14:13:28 +08:00
|
|
|
ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!ma_sync_key_mm) {
|
2010-09-14 00:23:57 +08:00
|
|
|
ret = -ENOMEM;
|
2017-09-27 04:11:36 +08:00
|
|
|
goto err_free_rq_db_key;
|
2010-09-14 00:23:57 +08:00
|
|
|
}
|
|
|
|
uresp.flags = C4IW_QPF_ONCHIP;
|
|
|
|
} else
|
|
|
|
uresp.flags = 0;
|
2010-04-22 06:30:06 +08:00
|
|
|
uresp.qid_mask = rhp->rdev.qpmask;
|
|
|
|
uresp.sqid = qhp->wq.sq.qid;
|
|
|
|
uresp.sq_size = qhp->wq.sq.size;
|
|
|
|
uresp.sq_memsize = qhp->wq.sq.memsize;
|
|
|
|
uresp.rqid = qhp->wq.rq.qid;
|
|
|
|
uresp.rq_size = qhp->wq.rq.size;
|
|
|
|
uresp.rq_memsize = qhp->wq.rq.memsize;
|
|
|
|
spin_lock(&ucontext->mmap_lock);
|
2016-02-05 14:13:28 +08:00
|
|
|
if (ma_sync_key_mm) {
|
2010-09-14 00:23:57 +08:00
|
|
|
uresp.ma_sync_key = ucontext->key;
|
|
|
|
ucontext->key += PAGE_SIZE;
|
2013-07-26 00:48:32 +08:00
|
|
|
} else {
|
|
|
|
uresp.ma_sync_key = 0;
|
2010-09-14 00:23:57 +08:00
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
uresp.sq_key = ucontext->key;
|
|
|
|
ucontext->key += PAGE_SIZE;
|
|
|
|
uresp.rq_key = ucontext->key;
|
|
|
|
ucontext->key += PAGE_SIZE;
|
|
|
|
uresp.sq_db_gts_key = ucontext->key;
|
|
|
|
ucontext->key += PAGE_SIZE;
|
|
|
|
uresp.rq_db_gts_key = ucontext->key;
|
|
|
|
ucontext->key += PAGE_SIZE;
|
|
|
|
spin_unlock(&ucontext->mmap_lock);
|
|
|
|
ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
|
|
|
|
if (ret)
|
2017-09-27 04:11:36 +08:00
|
|
|
goto err_free_ma_sync_key;
|
2016-02-05 14:13:28 +08:00
|
|
|
sq_key_mm->key = uresp.sq_key;
|
|
|
|
sq_key_mm->addr = qhp->wq.sq.phys_addr;
|
|
|
|
sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
|
|
|
|
insert_mmap(ucontext, sq_key_mm);
|
|
|
|
rq_key_mm->key = uresp.rq_key;
|
|
|
|
rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
|
|
|
|
rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
|
|
|
|
insert_mmap(ucontext, rq_key_mm);
|
|
|
|
sq_db_key_mm->key = uresp.sq_db_gts_key;
|
|
|
|
sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
|
|
|
|
sq_db_key_mm->len = PAGE_SIZE;
|
|
|
|
insert_mmap(ucontext, sq_db_key_mm);
|
|
|
|
rq_db_key_mm->key = uresp.rq_db_gts_key;
|
|
|
|
rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa;
|
|
|
|
rq_db_key_mm->len = PAGE_SIZE;
|
|
|
|
insert_mmap(ucontext, rq_db_key_mm);
|
|
|
|
if (ma_sync_key_mm) {
|
|
|
|
ma_sync_key_mm->key = uresp.ma_sync_key;
|
|
|
|
ma_sync_key_mm->addr =
|
|
|
|
(pci_resource_start(rhp->rdev.lldi.pdev, 0) +
|
|
|
|
PCIE_MA_SYNC_A) & PAGE_MASK;
|
|
|
|
ma_sync_key_mm->len = PAGE_SIZE;
|
|
|
|
insert_mmap(ucontext, ma_sync_key_mm);
|
2010-09-14 00:23:57 +08:00
|
|
|
}
|
2016-12-22 23:40:36 +08:00
|
|
|
|
|
|
|
c4iw_get_ucontext(ucontext);
|
|
|
|
qhp->ucontext = ucontext;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
qhp->ibqp.qp_num = qhp->wq.sq.qid;
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
INIT_LIST_HEAD(&qhp->db_fc_entry);
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
|
2017-02-10 06:23:51 +08:00
|
|
|
qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
|
|
|
|
attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
|
|
|
|
qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
|
2010-04-22 06:30:06 +08:00
|
|
|
return &qhp->ibqp;
|
2017-09-27 04:11:36 +08:00
|
|
|
err_free_ma_sync_key:
|
2016-02-05 14:13:28 +08:00
|
|
|
kfree(ma_sync_key_mm);
|
2017-09-27 04:11:36 +08:00
|
|
|
err_free_rq_db_key:
|
2016-02-05 14:13:28 +08:00
|
|
|
kfree(rq_db_key_mm);
|
2017-09-27 04:11:36 +08:00
|
|
|
err_free_sq_db_key:
|
2016-02-05 14:13:28 +08:00
|
|
|
kfree(sq_db_key_mm);
|
2017-09-27 04:11:36 +08:00
|
|
|
err_free_rq_key:
|
2016-02-05 14:13:28 +08:00
|
|
|
kfree(rq_key_mm);
|
2017-09-27 04:11:36 +08:00
|
|
|
err_free_sq_key:
|
2016-02-05 14:13:28 +08:00
|
|
|
kfree(sq_key_mm);
|
2017-09-27 04:11:36 +08:00
|
|
|
err_remove_handle:
|
2010-04-22 06:30:06 +08:00
|
|
|
remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
|
2017-09-27 04:11:36 +08:00
|
|
|
err_destroy_qp:
|
2010-04-22 06:30:06 +08:00
|
|
|
destroy_qp(&rhp->rdev, &qhp->wq,
|
|
|
|
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
|
2017-09-27 04:11:36 +08:00
|
|
|
err_free_wr_wait:
|
iw_cxgb4: add referencing to wait objects
For messages sent from the host to fw that solicit a reply from fw,
the c4iw_wr_wait struct pointer is passed in the host->fw message, and
included in the fw->host fw6_msg reply. This allows the sender to wait
until the reply is received, and the code processing the ingress reply
to wake up the sender.
If c4iw_wait_for_reply() times out, however, we need to keep the
c4iw_wr_wait object around in case the reply eventually does arrive.
Otherwise we have touch-after-free bugs in the wake_up paths.
This was hit due to a bad kernel driver that blocked ingress processing
of cxgb4 for a long time, causing iw_cxgb4 timeouts, but eventually
resuming ingress processing and thus hitting the touch-after-free bug.
So I want to fix iw_cxgb4 such that we'll at least keep the wait object
around until the reply comes. If it never comes we leak a small amount of
memory, but if it does come late, we won't potentially crash the system.
So add a kref struct in the c4iw_wr_wait struct, and take a reference
before sending a message to FW that will generate a FW6 reply. And remove
the reference (and potentially free the wait object) when the reply
is processed.
The ep code also uses the wr_wait for non FW6 CPL messages and doesn't
embed the c4iw_wr_wait object in the message sent to firmware. So for
those cases we add c4iw_wake_up_noref().
The mr/mw, cq, and qp object create/destroy paths do need this reference
logic. For these paths, c4iw_ref_send_wait() is introduced to take the
wr_wait reference, send the msg to fw, and then wait for the reply.
So going forward, iw_cxgb4 either uses c4iw_ofld_send(),
c4iw_wait_for_reply() and c4iw_wake_up_noref() like is done in the some
of the endpoint logic, or c4iw_ref_send_wait() and c4iw_wake_up_deref()
(formerly c4iw_wake_up()) when sending messages with the c4iw_wr_wait
object pointer embedded in the message and resulting FW6 reply.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
2017-09-27 04:13:17 +08:00
|
|
|
c4iw_put_wr_wait(qhp->wr_waitp);
|
2017-09-27 04:11:36 +08:00
|
|
|
err_free_qhp:
|
2010-04-22 06:30:06 +08:00
|
|
|
kfree(qhp);
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
|
|
|
|
int attr_mask, struct ib_udata *udata)
|
|
|
|
{
|
|
|
|
struct c4iw_dev *rhp;
|
|
|
|
struct c4iw_qp *qhp;
|
|
|
|
enum c4iw_qp_attr_mask mask = 0;
|
|
|
|
struct c4iw_qp_attributes attrs;
|
|
|
|
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("ib_qp %p\n", ibqp);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
/* iwarp does not support the RTR state */
|
|
|
|
if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
|
|
|
|
attr_mask &= ~IB_QP_STATE;
|
|
|
|
|
|
|
|
/* Make sure we still have something left to do */
|
|
|
|
if (!attr_mask)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
memset(&attrs, 0, sizeof attrs);
|
|
|
|
qhp = to_c4iw_qp(ibqp);
|
|
|
|
rhp = qhp->rhp;
|
|
|
|
|
|
|
|
attrs.next_state = c4iw_convert_state(attr->qp_state);
|
|
|
|
attrs.enable_rdma_read = (attr->qp_access_flags &
|
|
|
|
IB_ACCESS_REMOTE_READ) ? 1 : 0;
|
|
|
|
attrs.enable_rdma_write = (attr->qp_access_flags &
|
|
|
|
IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
|
|
|
|
attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
|
|
|
|
|
|
|
|
|
|
|
|
mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0;
|
|
|
|
mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
|
|
|
|
(C4IW_QP_ATTR_ENABLE_RDMA_READ |
|
|
|
|
C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
|
|
|
|
C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;
|
|
|
|
|
2012-05-18 17:59:28 +08:00
|
|
|
/*
|
|
|
|
* Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
|
|
|
|
* ringing the queue db when we're in DB_FULL mode.
|
2014-04-25 03:32:04 +08:00
|
|
|
* Only allow this on T4 devices.
|
2012-05-18 17:59:28 +08:00
|
|
|
*/
|
|
|
|
attrs.sq_db_inc = attr->sq_psn;
|
|
|
|
attrs.rq_db_inc = attr->rq_psn;
|
|
|
|
mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
|
|
|
|
mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
|
2015-09-23 19:49:27 +08:00
|
|
|
if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
|
2014-04-25 03:32:04 +08:00
|
|
|
(mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
|
|
|
|
return -EINVAL;
|
2012-05-18 17:59:28 +08:00
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
|
|
|
|
{
|
2017-09-27 15:35:49 +08:00
|
|
|
pr_debug("ib_dev %p qpn 0x%x\n", dev, qpn);
|
2010-04-22 06:30:06 +08:00
|
|
|
return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
|
|
|
|
}
|
2012-05-18 17:59:33 +08:00
|
|
|
|
|
|
|
int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
|
|
|
|
int attr_mask, struct ib_qp_init_attr *init_attr)
|
|
|
|
{
|
|
|
|
struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
|
|
|
|
|
|
|
|
memset(attr, 0, sizeof *attr);
|
|
|
|
memset(init_attr, 0, sizeof *init_attr);
|
|
|
|
attr->qp_state = to_ib_qp_state(qhp->attr.state);
|
2014-07-21 23:25:14 +08:00
|
|
|
init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
|
|
|
|
init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
|
|
|
|
init_attr->cap.max_send_sge = qhp->attr.sq_max_sges;
|
|
|
|
init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges;
|
|
|
|
init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE;
|
|
|
|
init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
|
2012-05-18 17:59:33 +08:00
|
|
|
return 0;
|
|
|
|
}
|