2010-04-22 06:30:06 +08:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* This software is available to you under a choice of one of two
|
|
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
|
|
* General Public License (GPL) Version 2, available from the file
|
|
|
|
* COPYING in the main directory of this source tree, or the
|
|
|
|
* OpenIB.org BSD license below:
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or
|
|
|
|
* without modification, are permitted provided that the following
|
|
|
|
* conditions are met:
|
|
|
|
*
|
|
|
|
* - Redistributions of source code must retain the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer.
|
|
|
|
*
|
|
|
|
* - Redistributions in binary form must reproduce the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer in the documentation and/or other materials
|
|
|
|
* provided with the distribution.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
|
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/moduleparam.h>
|
|
|
|
#include <linux/debugfs.h>
|
2012-05-21 20:01:13 +08:00
|
|
|
#include <linux/vmalloc.h>
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
#include <rdma/ib_verbs.h>
|
|
|
|
|
|
|
|
#include "iw_cxgb4.h"
|
|
|
|
|
|
|
|
#define DRV_VERSION "0.1"
|
|
|
|
|
|
|
|
MODULE_AUTHOR("Steve Wise");
|
2013-03-14 13:08:58 +08:00
|
|
|
MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver");
|
2010-04-22 06:30:06 +08:00
|
|
|
MODULE_LICENSE("Dual BSD/GPL");
|
|
|
|
MODULE_VERSION(DRV_VERSION);
|
|
|
|
|
2013-03-14 13:09:00 +08:00
|
|
|
static int allow_db_fc_on_t5;
|
|
|
|
module_param(allow_db_fc_on_t5, int, 0644);
|
|
|
|
MODULE_PARM_DESC(allow_db_fc_on_t5,
|
|
|
|
"Allow DB Flow Control on T5 (default = 0)");
|
|
|
|
|
|
|
|
static int allow_db_coalescing_on_t5;
|
|
|
|
module_param(allow_db_coalescing_on_t5, int, 0644);
|
|
|
|
MODULE_PARM_DESC(allow_db_coalescing_on_t5,
|
|
|
|
"Allow DB Coalescing on T5 (default = 0)");
|
|
|
|
|
2012-05-18 17:59:28 +08:00
|
|
|
struct uld_ctx {
|
|
|
|
struct list_head entry;
|
|
|
|
struct cxgb4_lld_info lldi;
|
|
|
|
struct c4iw_dev *dev;
|
|
|
|
};
|
|
|
|
|
2011-05-10 13:06:23 +08:00
|
|
|
static LIST_HEAD(uld_ctx_list);
|
2010-04-22 06:30:06 +08:00
|
|
|
static DEFINE_MUTEX(dev_mutex);
|
|
|
|
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
#define DB_FC_RESUME_SIZE 64
|
|
|
|
#define DB_FC_RESUME_DELAY 1
|
|
|
|
#define DB_FC_DRAIN_THRESH 0
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
static struct dentry *c4iw_debugfs_root;
|
|
|
|
|
2010-09-11 00:15:20 +08:00
|
|
|
struct c4iw_debugfs_data {
|
2010-04-22 06:30:06 +08:00
|
|
|
struct c4iw_dev *devp;
|
|
|
|
char *buf;
|
|
|
|
int bufsize;
|
|
|
|
int pos;
|
|
|
|
};
|
|
|
|
|
2010-09-11 00:15:20 +08:00
|
|
|
static int count_idrs(int id, void *p, void *data)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
|
|
|
int *countp = data;
|
|
|
|
|
|
|
|
*countp = *countp + 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-09-11 00:15:20 +08:00
|
|
|
static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
|
|
|
|
loff_t *ppos)
|
|
|
|
{
|
|
|
|
struct c4iw_debugfs_data *d = file->private_data;
|
|
|
|
|
2010-09-30 02:21:33 +08:00
|
|
|
return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos);
|
2010-09-11 00:15:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int dump_qp(int id, void *p, void *data)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
|
|
|
struct c4iw_qp *qp = p;
|
2010-09-11 00:15:20 +08:00
|
|
|
struct c4iw_debugfs_data *qpd = data;
|
2010-04-22 06:30:06 +08:00
|
|
|
int space;
|
|
|
|
int cc;
|
|
|
|
|
|
|
|
if (id != qp->wq.sq.qid)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
space = qpd->bufsize - qpd->pos - 1;
|
|
|
|
if (space == 0)
|
|
|
|
return 1;
|
|
|
|
|
2013-07-04 18:40:47 +08:00
|
|
|
if (qp->ep) {
|
|
|
|
if (qp->ep->com.local_addr.ss_family == AF_INET) {
|
|
|
|
struct sockaddr_in *lsin = (struct sockaddr_in *)
|
|
|
|
&qp->ep->com.local_addr;
|
|
|
|
struct sockaddr_in *rsin = (struct sockaddr_in *)
|
|
|
|
&qp->ep->com.remote_addr;
|
|
|
|
|
|
|
|
cc = snprintf(qpd->buf + qpd->pos, space,
|
|
|
|
"rc qp sq id %u rq id %u state %u "
|
|
|
|
"onchip %u ep tid %u state %u "
|
|
|
|
"%pI4:%u->%pI4:%u\n",
|
|
|
|
qp->wq.sq.qid, qp->wq.rq.qid,
|
|
|
|
(int)qp->attr.state,
|
|
|
|
qp->wq.sq.flags & T4_SQ_ONCHIP,
|
|
|
|
qp->ep->hwtid, (int)qp->ep->com.state,
|
|
|
|
&lsin->sin_addr, ntohs(lsin->sin_port),
|
|
|
|
&rsin->sin_addr, ntohs(rsin->sin_port));
|
|
|
|
} else {
|
|
|
|
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
|
|
|
|
&qp->ep->com.local_addr;
|
|
|
|
struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
|
|
|
|
&qp->ep->com.remote_addr;
|
|
|
|
|
|
|
|
cc = snprintf(qpd->buf + qpd->pos, space,
|
|
|
|
"rc qp sq id %u rq id %u state %u "
|
|
|
|
"onchip %u ep tid %u state %u "
|
|
|
|
"%pI6:%u->%pI6:%u\n",
|
|
|
|
qp->wq.sq.qid, qp->wq.rq.qid,
|
|
|
|
(int)qp->attr.state,
|
|
|
|
qp->wq.sq.flags & T4_SQ_ONCHIP,
|
|
|
|
qp->ep->hwtid, (int)qp->ep->com.state,
|
|
|
|
&lsin6->sin6_addr,
|
|
|
|
ntohs(lsin6->sin6_port),
|
|
|
|
&rsin6->sin6_addr,
|
|
|
|
ntohs(rsin6->sin6_port));
|
|
|
|
}
|
|
|
|
} else
|
2011-03-12 06:29:50 +08:00
|
|
|
cc = snprintf(qpd->buf + qpd->pos, space,
|
|
|
|
"qp sq id %u rq id %u state %u onchip %u\n",
|
|
|
|
qp->wq.sq.qid, qp->wq.rq.qid,
|
|
|
|
(int)qp->attr.state,
|
|
|
|
qp->wq.sq.flags & T4_SQ_ONCHIP);
|
2010-04-22 06:30:06 +08:00
|
|
|
if (cc < space)
|
|
|
|
qpd->pos += cc;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int qp_release(struct inode *inode, struct file *file)
|
|
|
|
{
|
2010-09-11 00:15:20 +08:00
|
|
|
struct c4iw_debugfs_data *qpd = file->private_data;
|
2010-04-22 06:30:06 +08:00
|
|
|
if (!qpd) {
|
|
|
|
printk(KERN_INFO "%s null qpd?\n", __func__);
|
|
|
|
return 0;
|
|
|
|
}
|
2012-05-18 17:59:31 +08:00
|
|
|
vfree(qpd->buf);
|
2010-04-22 06:30:06 +08:00
|
|
|
kfree(qpd);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int qp_open(struct inode *inode, struct file *file)
|
|
|
|
{
|
2010-09-11 00:15:20 +08:00
|
|
|
struct c4iw_debugfs_data *qpd;
|
2010-04-22 06:30:06 +08:00
|
|
|
int ret = 0;
|
|
|
|
int count = 1;
|
|
|
|
|
|
|
|
qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
|
|
|
|
if (!qpd) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
qpd->devp = inode->i_private;
|
|
|
|
qpd->pos = 0;
|
|
|
|
|
|
|
|
spin_lock_irq(&qpd->devp->lock);
|
2010-09-11 00:15:20 +08:00
|
|
|
idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
|
2010-04-22 06:30:06 +08:00
|
|
|
spin_unlock_irq(&qpd->devp->lock);
|
|
|
|
|
|
|
|
qpd->bufsize = count * 128;
|
2012-05-18 17:59:31 +08:00
|
|
|
qpd->buf = vmalloc(qpd->bufsize);
|
2010-04-22 06:30:06 +08:00
|
|
|
if (!qpd->buf) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err1;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_lock_irq(&qpd->devp->lock);
|
2010-09-11 00:15:20 +08:00
|
|
|
idr_for_each(&qpd->devp->qpidr, dump_qp, qpd);
|
2010-04-22 06:30:06 +08:00
|
|
|
spin_unlock_irq(&qpd->devp->lock);
|
|
|
|
|
|
|
|
qpd->buf[qpd->pos++] = 0;
|
|
|
|
file->private_data = qpd;
|
|
|
|
goto out;
|
|
|
|
err1:
|
|
|
|
kfree(qpd);
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2010-09-11 00:15:20 +08:00
|
|
|
static const struct file_operations qp_debugfs_fops = {
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.open = qp_open,
|
|
|
|
.release = qp_release,
|
|
|
|
.read = debugfs_read,
|
2010-09-29 22:11:12 +08:00
|
|
|
.llseek = default_llseek,
|
2010-09-11 00:15:20 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static int dump_stag(int id, void *p, void *data)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
2010-09-11 00:15:20 +08:00
|
|
|
struct c4iw_debugfs_data *stagd = data;
|
|
|
|
int space;
|
|
|
|
int cc;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2010-09-11 00:15:20 +08:00
|
|
|
space = stagd->bufsize - stagd->pos - 1;
|
|
|
|
if (space == 0)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
cc = snprintf(stagd->buf + stagd->pos, space, "0x%x\n", id<<8);
|
|
|
|
if (cc < space)
|
|
|
|
stagd->pos += cc;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int stag_release(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
struct c4iw_debugfs_data *stagd = file->private_data;
|
|
|
|
if (!stagd) {
|
|
|
|
printk(KERN_INFO "%s null stagd?\n", __func__);
|
2010-04-22 06:30:06 +08:00
|
|
|
return 0;
|
2010-09-11 00:15:20 +08:00
|
|
|
}
|
|
|
|
kfree(stagd->buf);
|
|
|
|
kfree(stagd);
|
|
|
|
return 0;
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2010-09-11 00:15:20 +08:00
|
|
|
static int stag_open(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
struct c4iw_debugfs_data *stagd;
|
|
|
|
int ret = 0;
|
|
|
|
int count = 1;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2010-09-11 00:15:20 +08:00
|
|
|
stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
|
|
|
|
if (!stagd) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
stagd->devp = inode->i_private;
|
|
|
|
stagd->pos = 0;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2010-09-11 00:15:20 +08:00
|
|
|
spin_lock_irq(&stagd->devp->lock);
|
|
|
|
idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
|
|
|
|
spin_unlock_irq(&stagd->devp->lock);
|
|
|
|
|
|
|
|
stagd->bufsize = count * sizeof("0x12345678\n");
|
|
|
|
stagd->buf = kmalloc(stagd->bufsize, GFP_KERNEL);
|
|
|
|
if (!stagd->buf) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err1;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
2010-09-11 00:15:20 +08:00
|
|
|
|
|
|
|
spin_lock_irq(&stagd->devp->lock);
|
|
|
|
idr_for_each(&stagd->devp->mmidr, dump_stag, stagd);
|
|
|
|
spin_unlock_irq(&stagd->devp->lock);
|
|
|
|
|
|
|
|
stagd->buf[stagd->pos++] = 0;
|
|
|
|
file->private_data = stagd;
|
|
|
|
goto out;
|
|
|
|
err1:
|
|
|
|
kfree(stagd);
|
|
|
|
out:
|
|
|
|
return ret;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
|
2010-09-11 00:15:20 +08:00
|
|
|
static const struct file_operations stag_debugfs_fops = {
|
2010-04-22 06:30:06 +08:00
|
|
|
.owner = THIS_MODULE,
|
2010-09-11 00:15:20 +08:00
|
|
|
.open = stag_open,
|
|
|
|
.release = stag_release,
|
|
|
|
.read = debugfs_read,
|
2010-09-29 22:11:12 +08:00
|
|
|
.llseek = default_llseek,
|
2010-04-22 06:30:06 +08:00
|
|
|
};
|
|
|
|
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"};
|
2012-05-18 17:59:30 +08:00
|
|
|
|
2012-05-18 17:59:27 +08:00
|
|
|
static int stats_show(struct seq_file *seq, void *v)
|
|
|
|
{
|
|
|
|
struct c4iw_dev *dev = seq->private;
|
|
|
|
|
2012-05-18 17:59:32 +08:00
|
|
|
seq_printf(seq, " Object: %10s %10s %10s %10s\n", "Total", "Current",
|
|
|
|
"Max", "Fail");
|
|
|
|
seq_printf(seq, " PDID: %10llu %10llu %10llu %10llu\n",
|
2012-05-18 17:59:27 +08:00
|
|
|
dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail);
|
|
|
|
seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n",
|
2012-05-18 17:59:27 +08:00
|
|
|
dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
|
|
|
|
seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n",
|
2012-05-18 17:59:27 +08:00
|
|
|
dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
|
|
|
|
seq_printf(seq, " PBLMEM: %10llu %10llu %10llu %10llu\n",
|
2012-05-18 17:59:27 +08:00
|
|
|
dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail);
|
|
|
|
seq_printf(seq, " RQTMEM: %10llu %10llu %10llu %10llu\n",
|
2012-05-18 17:59:27 +08:00
|
|
|
dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail);
|
|
|
|
seq_printf(seq, " OCQPMEM: %10llu %10llu %10llu %10llu\n",
|
2012-05-18 17:59:27 +08:00
|
|
|
dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail);
|
2012-05-18 17:59:28 +08:00
|
|
|
seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);
|
|
|
|
seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
|
|
|
|
seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n",
|
2012-05-18 17:59:30 +08:00
|
|
|
db_state_str[dev->db_state],
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
dev->rdev.stats.db_state_transitions,
|
|
|
|
dev->rdev.stats.db_fc_interruptions);
|
2012-12-10 17:30:55 +08:00
|
|
|
seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
|
2012-12-10 17:30:56 +08:00
|
|
|
seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
|
|
|
|
dev->rdev.stats.act_ofld_conn_fails);
|
|
|
|
seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
|
|
|
|
dev->rdev.stats.pas_ofld_conn_fails);
|
2012-05-18 17:59:27 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int stats_open(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
return single_open(file, stats_show, inode->i_private);
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t stats_clear(struct file *file, const char __user *buf,
|
|
|
|
size_t count, loff_t *pos)
|
|
|
|
{
|
|
|
|
struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
|
|
|
|
|
|
|
|
mutex_lock(&dev->rdev.stats.lock);
|
|
|
|
dev->rdev.stats.pd.max = 0;
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.pd.fail = 0;
|
2012-05-18 17:59:27 +08:00
|
|
|
dev->rdev.stats.qid.max = 0;
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.qid.fail = 0;
|
2012-05-18 17:59:27 +08:00
|
|
|
dev->rdev.stats.stag.max = 0;
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.stag.fail = 0;
|
2012-05-18 17:59:27 +08:00
|
|
|
dev->rdev.stats.pbl.max = 0;
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.pbl.fail = 0;
|
2012-05-18 17:59:27 +08:00
|
|
|
dev->rdev.stats.rqt.max = 0;
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.rqt.fail = 0;
|
2012-05-18 17:59:27 +08:00
|
|
|
dev->rdev.stats.ocqp.max = 0;
|
2012-05-18 17:59:32 +08:00
|
|
|
dev->rdev.stats.ocqp.fail = 0;
|
2012-05-18 17:59:28 +08:00
|
|
|
dev->rdev.stats.db_full = 0;
|
|
|
|
dev->rdev.stats.db_empty = 0;
|
|
|
|
dev->rdev.stats.db_drop = 0;
|
2012-05-18 17:59:30 +08:00
|
|
|
dev->rdev.stats.db_state_transitions = 0;
|
2012-12-10 17:30:56 +08:00
|
|
|
dev->rdev.stats.tcam_full = 0;
|
|
|
|
dev->rdev.stats.act_ofld_conn_fails = 0;
|
|
|
|
dev->rdev.stats.pas_ofld_conn_fails = 0;
|
2012-05-18 17:59:27 +08:00
|
|
|
mutex_unlock(&dev->rdev.stats.lock);
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct file_operations stats_debugfs_fops = {
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.open = stats_open,
|
|
|
|
.release = single_release,
|
|
|
|
.read = seq_read,
|
|
|
|
.llseek = seq_lseek,
|
|
|
|
.write = stats_clear,
|
|
|
|
};
|
|
|
|
|
2012-12-10 17:30:56 +08:00
|
|
|
static int dump_ep(int id, void *p, void *data)
|
|
|
|
{
|
|
|
|
struct c4iw_ep *ep = p;
|
|
|
|
struct c4iw_debugfs_data *epd = data;
|
|
|
|
int space;
|
|
|
|
int cc;
|
|
|
|
|
|
|
|
space = epd->bufsize - epd->pos - 1;
|
|
|
|
if (space == 0)
|
|
|
|
return 1;
|
|
|
|
|
2013-07-04 18:40:47 +08:00
|
|
|
if (ep->com.local_addr.ss_family == AF_INET) {
|
|
|
|
struct sockaddr_in *lsin = (struct sockaddr_in *)
|
|
|
|
&ep->com.local_addr;
|
|
|
|
struct sockaddr_in *rsin = (struct sockaddr_in *)
|
|
|
|
&ep->com.remote_addr;
|
|
|
|
|
|
|
|
cc = snprintf(epd->buf + epd->pos, space,
|
|
|
|
"ep %p cm_id %p qp %p state %d flags 0x%lx "
|
|
|
|
"history 0x%lx hwtid %d atid %d "
|
|
|
|
"%pI4:%d <-> %pI4:%d\n",
|
|
|
|
ep, ep->com.cm_id, ep->com.qp,
|
|
|
|
(int)ep->com.state, ep->com.flags,
|
|
|
|
ep->com.history, ep->hwtid, ep->atid,
|
|
|
|
&lsin->sin_addr, ntohs(lsin->sin_port),
|
|
|
|
&rsin->sin_addr, ntohs(rsin->sin_port));
|
|
|
|
} else {
|
|
|
|
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
|
|
|
|
&ep->com.local_addr;
|
|
|
|
struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
|
|
|
|
&ep->com.remote_addr;
|
|
|
|
|
|
|
|
cc = snprintf(epd->buf + epd->pos, space,
|
|
|
|
"ep %p cm_id %p qp %p state %d flags 0x%lx "
|
|
|
|
"history 0x%lx hwtid %d atid %d "
|
|
|
|
"%pI6:%d <-> %pI6:%d\n",
|
|
|
|
ep, ep->com.cm_id, ep->com.qp,
|
|
|
|
(int)ep->com.state, ep->com.flags,
|
|
|
|
ep->com.history, ep->hwtid, ep->atid,
|
|
|
|
&lsin6->sin6_addr, ntohs(lsin6->sin6_port),
|
|
|
|
&rsin6->sin6_addr, ntohs(rsin6->sin6_port));
|
|
|
|
}
|
2012-12-10 17:30:56 +08:00
|
|
|
if (cc < space)
|
|
|
|
epd->pos += cc;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dump_listen_ep(int id, void *p, void *data)
|
|
|
|
{
|
|
|
|
struct c4iw_listen_ep *ep = p;
|
|
|
|
struct c4iw_debugfs_data *epd = data;
|
|
|
|
int space;
|
|
|
|
int cc;
|
|
|
|
|
|
|
|
space = epd->bufsize - epd->pos - 1;
|
|
|
|
if (space == 0)
|
|
|
|
return 1;
|
|
|
|
|
2013-07-04 18:40:47 +08:00
|
|
|
if (ep->com.local_addr.ss_family == AF_INET) {
|
|
|
|
struct sockaddr_in *lsin = (struct sockaddr_in *)
|
|
|
|
&ep->com.local_addr;
|
|
|
|
|
|
|
|
cc = snprintf(epd->buf + epd->pos, space,
|
|
|
|
"ep %p cm_id %p state %d flags 0x%lx stid %d "
|
|
|
|
"backlog %d %pI4:%d\n",
|
|
|
|
ep, ep->com.cm_id, (int)ep->com.state,
|
|
|
|
ep->com.flags, ep->stid, ep->backlog,
|
|
|
|
&lsin->sin_addr, ntohs(lsin->sin_port));
|
|
|
|
} else {
|
|
|
|
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
|
|
|
|
&ep->com.local_addr;
|
|
|
|
|
|
|
|
cc = snprintf(epd->buf + epd->pos, space,
|
|
|
|
"ep %p cm_id %p state %d flags 0x%lx stid %d "
|
|
|
|
"backlog %d %pI6:%d\n",
|
|
|
|
ep, ep->com.cm_id, (int)ep->com.state,
|
|
|
|
ep->com.flags, ep->stid, ep->backlog,
|
|
|
|
&lsin6->sin6_addr, ntohs(lsin6->sin6_port));
|
|
|
|
}
|
2012-12-10 17:30:56 +08:00
|
|
|
if (cc < space)
|
|
|
|
epd->pos += cc;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ep_release(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
struct c4iw_debugfs_data *epd = file->private_data;
|
|
|
|
if (!epd) {
|
|
|
|
pr_info("%s null qpd?\n", __func__);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
vfree(epd->buf);
|
|
|
|
kfree(epd);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ep_open(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
struct c4iw_debugfs_data *epd;
|
|
|
|
int ret = 0;
|
|
|
|
int count = 1;
|
|
|
|
|
|
|
|
epd = kmalloc(sizeof(*epd), GFP_KERNEL);
|
|
|
|
if (!epd) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
epd->devp = inode->i_private;
|
|
|
|
epd->pos = 0;
|
|
|
|
|
|
|
|
spin_lock_irq(&epd->devp->lock);
|
|
|
|
idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count);
|
|
|
|
idr_for_each(&epd->devp->atid_idr, count_idrs, &count);
|
|
|
|
idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
|
|
|
|
spin_unlock_irq(&epd->devp->lock);
|
|
|
|
|
|
|
|
epd->bufsize = count * 160;
|
|
|
|
epd->buf = vmalloc(epd->bufsize);
|
|
|
|
if (!epd->buf) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err1;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_lock_irq(&epd->devp->lock);
|
|
|
|
idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd);
|
|
|
|
idr_for_each(&epd->devp->atid_idr, dump_ep, epd);
|
|
|
|
idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd);
|
|
|
|
spin_unlock_irq(&epd->devp->lock);
|
|
|
|
|
|
|
|
file->private_data = epd;
|
|
|
|
goto out;
|
|
|
|
err1:
|
|
|
|
kfree(epd);
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct file_operations ep_debugfs_fops = {
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.open = ep_open,
|
|
|
|
.release = ep_release,
|
|
|
|
.read = debugfs_read,
|
|
|
|
};
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
static int setup_debugfs(struct c4iw_dev *devp)
|
|
|
|
{
|
|
|
|
struct dentry *de;
|
|
|
|
|
|
|
|
if (!devp->debugfs_root)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
de = debugfs_create_file("qps", S_IWUSR, devp->debugfs_root,
|
|
|
|
(void *)devp, &qp_debugfs_fops);
|
|
|
|
if (de && de->d_inode)
|
|
|
|
de->d_inode->i_size = 4096;
|
2010-09-11 00:15:20 +08:00
|
|
|
|
|
|
|
de = debugfs_create_file("stags", S_IWUSR, devp->debugfs_root,
|
|
|
|
(void *)devp, &stag_debugfs_fops);
|
|
|
|
if (de && de->d_inode)
|
|
|
|
de->d_inode->i_size = 4096;
|
2012-05-18 17:59:27 +08:00
|
|
|
|
|
|
|
de = debugfs_create_file("stats", S_IWUSR, devp->debugfs_root,
|
|
|
|
(void *)devp, &stats_debugfs_fops);
|
|
|
|
if (de && de->d_inode)
|
|
|
|
de->d_inode->i_size = 4096;
|
|
|
|
|
2012-12-10 17:30:56 +08:00
|
|
|
de = debugfs_create_file("eps", S_IWUSR, devp->debugfs_root,
|
|
|
|
(void *)devp, &ep_debugfs_fops);
|
|
|
|
if (de && de->d_inode)
|
|
|
|
de->d_inode->i_size = 4096;
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
|
|
|
|
struct c4iw_dev_ucontext *uctx)
|
|
|
|
{
|
|
|
|
struct list_head *pos, *nxt;
|
|
|
|
struct c4iw_qid_list *entry;
|
|
|
|
|
|
|
|
mutex_lock(&uctx->lock);
|
|
|
|
list_for_each_safe(pos, nxt, &uctx->qpids) {
|
|
|
|
entry = list_entry(pos, struct c4iw_qid_list, entry);
|
|
|
|
list_del_init(&entry->entry);
|
2012-05-18 17:59:27 +08:00
|
|
|
if (!(entry->qid & rdev->qpmask)) {
|
2012-05-18 17:59:32 +08:00
|
|
|
c4iw_put_resource(&rdev->resource.qid_table,
|
|
|
|
entry->qid);
|
2012-05-18 17:59:27 +08:00
|
|
|
mutex_lock(&rdev->stats.lock);
|
|
|
|
rdev->stats.qid.cur -= rdev->qpmask + 1;
|
|
|
|
mutex_unlock(&rdev->stats.lock);
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
kfree(entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
list_for_each_safe(pos, nxt, &uctx->qpids) {
|
|
|
|
entry = list_entry(pos, struct c4iw_qid_list, entry);
|
|
|
|
list_del_init(&entry->entry);
|
|
|
|
kfree(entry);
|
|
|
|
}
|
|
|
|
mutex_unlock(&uctx->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
|
|
|
|
struct c4iw_dev_ucontext *uctx)
|
|
|
|
{
|
|
|
|
INIT_LIST_HEAD(&uctx->qpids);
|
|
|
|
INIT_LIST_HEAD(&uctx->cqids);
|
|
|
|
mutex_init(&uctx->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Caller takes care of locking if needed */
|
|
|
|
static int c4iw_rdev_open(struct c4iw_rdev *rdev)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
c4iw_init_dev_ucontext(rdev, &rdev->uctx);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* qpshift is the number of bits to shift the qpid left in order
|
|
|
|
* to get the correct address of the doorbell for that qp.
|
|
|
|
*/
|
|
|
|
rdev->qpshift = PAGE_SHIFT - ilog2(rdev->lldi.udb_density);
|
|
|
|
rdev->qpmask = rdev->lldi.udb_density - 1;
|
|
|
|
rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density);
|
|
|
|
rdev->cqmask = rdev->lldi.ucq_density - 1;
|
|
|
|
PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
|
2010-06-23 23:46:55 +08:00
|
|
|
"pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
|
|
|
|
"qp qid start %u size %u cq qid start %u size %u\n",
|
2010-04-22 06:30:06 +08:00
|
|
|
__func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
|
|
|
|
rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
|
|
|
|
rdev->lldi.vr->pbl.start,
|
|
|
|
rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
|
2010-06-23 23:46:55 +08:00
|
|
|
rdev->lldi.vr->rq.size,
|
|
|
|
rdev->lldi.vr->qp.start,
|
|
|
|
rdev->lldi.vr->qp.size,
|
|
|
|
rdev->lldi.vr->cq.start,
|
|
|
|
rdev->lldi.vr->cq.size);
|
2013-10-28 05:50:45 +08:00
|
|
|
PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu "
|
2010-04-22 06:30:06 +08:00
|
|
|
"qpmask 0x%x cqshift %lu cqmask 0x%x\n",
|
|
|
|
(unsigned)pci_resource_len(rdev->lldi.pdev, 2),
|
2013-10-28 05:50:45 +08:00
|
|
|
(u64)pci_resource_start(rdev->lldi.pdev, 2),
|
2010-04-22 06:30:06 +08:00
|
|
|
rdev->lldi.db_reg,
|
|
|
|
rdev->lldi.gts_reg,
|
|
|
|
rdev->qpshift, rdev->qpmask,
|
|
|
|
rdev->cqshift, rdev->cqmask);
|
|
|
|
|
|
|
|
if (c4iw_num_stags(rdev) == 0) {
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err1;
|
|
|
|
}
|
|
|
|
|
2012-05-18 17:59:27 +08:00
|
|
|
rdev->stats.pd.total = T4_MAX_NUM_PD;
|
|
|
|
rdev->stats.stag.total = rdev->lldi.vr->stag.size;
|
|
|
|
rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
|
|
|
|
rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
|
|
|
|
rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
|
|
|
|
rdev->stats.qid.total = rdev->lldi.vr->qp.size;
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
|
|
|
|
if (err) {
|
|
|
|
printk(KERN_ERR MOD "error %d initializing resources\n", err);
|
|
|
|
goto err1;
|
|
|
|
}
|
|
|
|
err = c4iw_pblpool_create(rdev);
|
|
|
|
if (err) {
|
|
|
|
printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
|
|
|
|
goto err2;
|
|
|
|
}
|
|
|
|
err = c4iw_rqtpool_create(rdev);
|
|
|
|
if (err) {
|
|
|
|
printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
|
|
|
|
goto err3;
|
|
|
|
}
|
2010-09-14 00:23:57 +08:00
|
|
|
err = c4iw_ocqp_pool_create(rdev);
|
|
|
|
if (err) {
|
|
|
|
printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
|
|
|
|
goto err4;
|
|
|
|
}
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
rdev->status_page = (struct t4_dev_status_page *)
|
|
|
|
__get_free_page(GFP_KERNEL);
|
|
|
|
if (!rdev->status_page) {
|
|
|
|
pr_err(MOD "error allocating status page\n");
|
|
|
|
goto err4;
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
return 0;
|
2010-09-14 00:23:57 +08:00
|
|
|
err4:
|
|
|
|
c4iw_rqtpool_destroy(rdev);
|
2010-04-22 06:30:06 +08:00
|
|
|
err3:
|
|
|
|
c4iw_pblpool_destroy(rdev);
|
|
|
|
err2:
|
|
|
|
c4iw_destroy_resource(&rdev->resource);
|
|
|
|
err1:
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void c4iw_rdev_close(struct c4iw_rdev *rdev)
|
|
|
|
{
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
free_page((unsigned long)rdev->status_page);
|
2010-04-22 06:30:06 +08:00
|
|
|
c4iw_pblpool_destroy(rdev);
|
|
|
|
c4iw_rqtpool_destroy(rdev);
|
|
|
|
c4iw_destroy_resource(&rdev->resource);
|
|
|
|
}
|
|
|
|
|
2011-10-07 00:32:44 +08:00
|
|
|
static void c4iw_dealloc(struct uld_ctx *ctx)
|
2010-04-22 06:30:06 +08:00
|
|
|
{
|
2011-05-10 13:06:23 +08:00
|
|
|
c4iw_rdev_close(&ctx->dev->rdev);
|
|
|
|
idr_destroy(&ctx->dev->cqidr);
|
|
|
|
idr_destroy(&ctx->dev->qpidr);
|
|
|
|
idr_destroy(&ctx->dev->mmidr);
|
2012-12-10 17:30:56 +08:00
|
|
|
idr_destroy(&ctx->dev->hwtid_idr);
|
|
|
|
idr_destroy(&ctx->dev->stid_idr);
|
|
|
|
idr_destroy(&ctx->dev->atid_idr);
|
2014-04-09 22:38:25 +08:00
|
|
|
if (ctx->dev->rdev.bar2_kva)
|
|
|
|
iounmap(ctx->dev->rdev.bar2_kva);
|
|
|
|
if (ctx->dev->rdev.oc_mw_kva)
|
|
|
|
iounmap(ctx->dev->rdev.oc_mw_kva);
|
2011-05-10 13:06:23 +08:00
|
|
|
ib_dealloc_device(&ctx->dev->ibdev);
|
|
|
|
ctx->dev = NULL;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
|
2011-10-07 00:32:44 +08:00
|
|
|
static void c4iw_remove(struct uld_ctx *ctx)
|
|
|
|
{
|
|
|
|
PDBG("%s c4iw_dev %p\n", __func__, ctx->dev);
|
|
|
|
c4iw_unregister_device(ctx->dev);
|
|
|
|
c4iw_dealloc(ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int rdma_supported(const struct cxgb4_lld_info *infop)
|
|
|
|
{
|
|
|
|
return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
|
|
|
|
infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
|
2013-03-14 13:08:58 +08:00
|
|
|
infop->vr->cq.size > 0;
|
2011-10-07 00:32:44 +08:00
|
|
|
}
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
|
|
|
|
{
|
|
|
|
struct c4iw_dev *devp;
|
|
|
|
int ret;
|
|
|
|
|
2011-10-07 00:32:44 +08:00
|
|
|
if (!rdma_supported(infop)) {
|
|
|
|
printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
|
|
|
|
pci_name(infop->pdev));
|
|
|
|
return ERR_PTR(-ENOSYS);
|
|
|
|
}
|
2013-03-14 13:08:58 +08:00
|
|
|
if (!ocqp_supported(infop))
|
|
|
|
pr_info("%s: On-Chip Queues not supported on this device.\n",
|
|
|
|
pci_name(infop->pdev));
|
2013-03-14 13:09:00 +08:00
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
|
|
|
|
if (!devp) {
|
|
|
|
printk(KERN_ERR MOD "Cannot allocate ib device\n");
|
2011-05-10 13:06:22 +08:00
|
|
|
return ERR_PTR(-ENOMEM);
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
devp->rdev.lldi = *infop;
|
|
|
|
|
2014-04-09 22:38:25 +08:00
|
|
|
/*
|
|
|
|
* For T5 devices, we map all of BAR2 with WC.
|
|
|
|
* For T4 devices with onchip qp mem, we map only that part
|
|
|
|
* of BAR2 with WC.
|
|
|
|
*/
|
|
|
|
devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
|
|
|
|
if (is_t5(devp->rdev.lldi.adapter_type)) {
|
|
|
|
devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
|
|
|
|
pci_resource_len(devp->rdev.lldi.pdev, 2));
|
|
|
|
if (!devp->rdev.bar2_kva) {
|
|
|
|
pr_err(MOD "Unable to ioremap BAR2\n");
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
} else if (ocqp_supported(infop)) {
|
|
|
|
devp->rdev.oc_mw_pa =
|
|
|
|
pci_resource_start(devp->rdev.lldi.pdev, 2) +
|
|
|
|
pci_resource_len(devp->rdev.lldi.pdev, 2) -
|
|
|
|
roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
|
|
|
|
devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
|
|
|
|
devp->rdev.lldi.vr->ocq.size);
|
|
|
|
if (!devp->rdev.oc_mw_kva) {
|
|
|
|
pr_err(MOD "Unable to ioremap onchip mem\n");
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
}
|
2010-09-14 00:23:57 +08:00
|
|
|
|
2011-05-10 13:06:23 +08:00
|
|
|
PDBG(KERN_INFO MOD "ocq memory: "
|
2010-09-14 00:23:57 +08:00
|
|
|
"hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
|
|
|
|
devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
|
|
|
|
devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
ret = c4iw_rdev_open(&devp->rdev);
|
|
|
|
if (ret) {
|
|
|
|
printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
|
|
|
|
ib_dealloc_device(&devp->ibdev);
|
2011-05-10 13:06:22 +08:00
|
|
|
return ERR_PTR(ret);
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
idr_init(&devp->cqidr);
|
|
|
|
idr_init(&devp->qpidr);
|
|
|
|
idr_init(&devp->mmidr);
|
2012-12-10 17:30:56 +08:00
|
|
|
idr_init(&devp->hwtid_idr);
|
|
|
|
idr_init(&devp->stid_idr);
|
|
|
|
idr_init(&devp->atid_idr);
|
2010-04-22 06:30:06 +08:00
|
|
|
spin_lock_init(&devp->lock);
|
2012-05-18 17:59:27 +08:00
|
|
|
mutex_init(&devp->rdev.stats.lock);
|
2012-05-18 17:59:28 +08:00
|
|
|
mutex_init(&devp->db_mutex);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
INIT_LIST_HEAD(&devp->db_fc_list);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
if (c4iw_debugfs_root) {
|
|
|
|
devp->debugfs_root = debugfs_create_dir(
|
|
|
|
pci_name(devp->rdev.lldi.pdev),
|
|
|
|
c4iw_debugfs_root);
|
|
|
|
setup_debugfs(devp);
|
|
|
|
}
|
|
|
|
return devp;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
|
|
|
|
{
|
2011-05-10 13:06:23 +08:00
|
|
|
struct uld_ctx *ctx;
|
2010-04-22 06:30:06 +08:00
|
|
|
static int vers_printed;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!vers_printed++)
|
2013-03-14 13:08:58 +08:00
|
|
|
pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
|
|
|
|
DRV_VERSION);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2011-05-10 13:06:23 +08:00
|
|
|
ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
|
|
|
|
if (!ctx) {
|
|
|
|
ctx = ERR_PTR(-ENOMEM);
|
2010-04-22 06:30:06 +08:00
|
|
|
goto out;
|
2011-05-10 13:06:23 +08:00
|
|
|
}
|
|
|
|
ctx->lldi = *infop;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
|
2011-05-10 13:06:23 +08:00
|
|
|
__func__, pci_name(ctx->lldi.pdev),
|
|
|
|
ctx->lldi.nchan, ctx->lldi.nrxq,
|
|
|
|
ctx->lldi.ntxq, ctx->lldi.nports);
|
|
|
|
|
|
|
|
mutex_lock(&dev_mutex);
|
|
|
|
list_add_tail(&ctx->entry, &uld_ctx_list);
|
|
|
|
mutex_unlock(&dev_mutex);
|
2010-04-22 06:30:06 +08:00
|
|
|
|
2011-05-10 13:06:23 +08:00
|
|
|
for (i = 0; i < ctx->lldi.nrxq; i++)
|
|
|
|
PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
|
2010-04-22 06:30:06 +08:00
|
|
|
out:
|
2011-05-10 13:06:23 +08:00
|
|
|
return ctx;
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
|
2012-12-10 17:30:55 +08:00
|
|
|
static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
|
|
|
|
const __be64 *rsp,
|
|
|
|
u32 pktshift)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate space for cpl_pass_accept_req which will be synthesized by
|
|
|
|
* driver. Once the driver synthesizes the request the skb will go
|
|
|
|
* through the regular cpl_pass_accept_req processing.
|
|
|
|
* The math here assumes sizeof cpl_pass_accept_req >= sizeof
|
|
|
|
* cpl_rx_pkt.
|
|
|
|
*/
|
|
|
|
skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
|
|
|
|
sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
|
|
|
|
if (unlikely(!skb))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
__skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) +
|
|
|
|
sizeof(struct rss_header) - pktshift);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This skb will contain:
|
|
|
|
* rss_header from the rspq descriptor (1 flit)
|
|
|
|
* cpl_rx_pkt struct from the rspq descriptor (2 flits)
|
|
|
|
* space for the difference between the size of an
|
|
|
|
* rx_pkt and pass_accept_req cpl (1 flit)
|
|
|
|
* the packet data from the gl
|
|
|
|
*/
|
|
|
|
skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) +
|
|
|
|
sizeof(struct rss_header));
|
|
|
|
skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) +
|
|
|
|
sizeof(struct cpl_pass_accept_req),
|
|
|
|
gl->va + pktshift,
|
|
|
|
gl->tot_len - pktshift);
|
|
|
|
return skb;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
|
|
|
|
const __be64 *rsp)
|
|
|
|
{
|
|
|
|
unsigned int opcode = *(u8 *)rsp;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
|
|
|
if (opcode != CPL_RX_PKT)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift);
|
|
|
|
if (skb == NULL)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
if (c4iw_handlers[opcode] == NULL) {
|
|
|
|
pr_info("%s no handler opcode 0x%x...\n", __func__,
|
|
|
|
opcode);
|
|
|
|
kfree_skb(skb);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
c4iw_handlers[opcode](dev, skb);
|
|
|
|
return 1;
|
|
|
|
out:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
|
|
|
|
const struct pkt_gl *gl)
|
|
|
|
{
|
2011-05-10 13:06:23 +08:00
|
|
|
struct uld_ctx *ctx = handle;
|
|
|
|
struct c4iw_dev *dev = ctx->dev;
|
2010-04-22 06:30:06 +08:00
|
|
|
struct sk_buff *skb;
|
2012-12-10 17:30:55 +08:00
|
|
|
u8 opcode;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
if (gl == NULL) {
|
|
|
|
/* omit RSS and rsp_ctrl at end of descriptor */
|
|
|
|
unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8;
|
|
|
|
|
|
|
|
skb = alloc_skb(256, GFP_ATOMIC);
|
|
|
|
if (!skb)
|
|
|
|
goto nomem;
|
|
|
|
__skb_put(skb, len);
|
|
|
|
skb_copy_to_linear_data(skb, &rsp[1], len);
|
|
|
|
} else if (gl == CXGB4_MSG_AN) {
|
|
|
|
const struct rsp_ctrl *rc = (void *)rsp;
|
|
|
|
|
|
|
|
u32 qid = be32_to_cpu(rc->pldbuflen_qid);
|
|
|
|
c4iw_ev_handler(dev, qid);
|
2012-12-10 17:30:55 +08:00
|
|
|
return 0;
|
|
|
|
} else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) {
|
|
|
|
if (recv_rx_pkt(dev, gl, rsp))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
pr_info("%s: unexpected FL contents at %p, " \
|
|
|
|
"RSS %#llx, FL %#llx, len %u\n",
|
|
|
|
pci_name(ctx->lldi.pdev), gl->va,
|
|
|
|
(unsigned long long)be64_to_cpu(*rsp),
|
2013-01-07 21:12:00 +08:00
|
|
|
(unsigned long long)be64_to_cpu(
|
|
|
|
*(__force __be64 *)gl->va),
|
2012-12-10 17:30:55 +08:00
|
|
|
gl->tot_len);
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
return 0;
|
|
|
|
} else {
|
2010-10-18 23:16:45 +08:00
|
|
|
skb = cxgb4_pktgl_to_skb(gl, 128, 128);
|
2010-04-22 06:30:06 +08:00
|
|
|
if (unlikely(!skb))
|
|
|
|
goto nomem;
|
|
|
|
}
|
|
|
|
|
2012-12-10 17:30:55 +08:00
|
|
|
opcode = *(u8 *)rsp;
|
2014-03-21 23:10:30 +08:00
|
|
|
if (c4iw_handlers[opcode]) {
|
2010-04-22 06:30:06 +08:00
|
|
|
c4iw_handlers[opcode](dev, skb);
|
2014-03-21 23:10:30 +08:00
|
|
|
} else {
|
2012-12-10 17:30:55 +08:00
|
|
|
pr_info("%s no handler opcode 0x%x...\n", __func__,
|
2010-04-22 06:30:06 +08:00
|
|
|
opcode);
|
2014-03-21 23:10:30 +08:00
|
|
|
kfree_skb(skb);
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
nomem:
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
|
|
|
|
{
|
2011-05-10 13:06:23 +08:00
|
|
|
struct uld_ctx *ctx = handle;
|
2010-05-21 05:57:32 +08:00
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
PDBG("%s new_state %u\n", __func__, new_state);
|
2010-05-21 05:57:32 +08:00
|
|
|
switch (new_state) {
|
|
|
|
case CXGB4_STATE_UP:
|
2011-05-10 13:06:23 +08:00
|
|
|
printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
|
|
|
|
if (!ctx->dev) {
|
2011-10-07 00:32:44 +08:00
|
|
|
int ret;
|
2011-05-10 13:06:23 +08:00
|
|
|
|
|
|
|
ctx->dev = c4iw_alloc(&ctx->lldi);
|
2011-10-07 00:32:44 +08:00
|
|
|
if (IS_ERR(ctx->dev)) {
|
|
|
|
printk(KERN_ERR MOD
|
|
|
|
"%s: initialization failed: %ld\n",
|
|
|
|
pci_name(ctx->lldi.pdev),
|
|
|
|
PTR_ERR(ctx->dev));
|
|
|
|
ctx->dev = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
ret = c4iw_register_device(ctx->dev);
|
|
|
|
if (ret) {
|
2010-05-21 05:57:32 +08:00
|
|
|
printk(KERN_ERR MOD
|
|
|
|
"%s: RDMA registration failed: %d\n",
|
2011-05-10 13:06:23 +08:00
|
|
|
pci_name(ctx->lldi.pdev), ret);
|
2011-10-07 00:32:44 +08:00
|
|
|
c4iw_dealloc(ctx);
|
|
|
|
}
|
2010-05-21 05:57:32 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case CXGB4_STATE_DOWN:
|
|
|
|
printk(KERN_INFO MOD "%s: Down\n",
|
2011-05-10 13:06:23 +08:00
|
|
|
pci_name(ctx->lldi.pdev));
|
|
|
|
if (ctx->dev)
|
|
|
|
c4iw_remove(ctx);
|
2010-05-21 05:57:32 +08:00
|
|
|
break;
|
|
|
|
case CXGB4_STATE_START_RECOVERY:
|
|
|
|
printk(KERN_INFO MOD "%s: Fatal Error\n",
|
2011-05-10 13:06:23 +08:00
|
|
|
pci_name(ctx->lldi.pdev));
|
|
|
|
if (ctx->dev) {
|
2011-03-12 06:30:53 +08:00
|
|
|
struct ib_event event;
|
|
|
|
|
2011-05-10 13:06:23 +08:00
|
|
|
ctx->dev->rdev.flags |= T4_FATAL_ERROR;
|
2011-03-12 06:30:53 +08:00
|
|
|
memset(&event, 0, sizeof event);
|
|
|
|
event.event = IB_EVENT_DEVICE_FATAL;
|
2011-05-10 13:06:23 +08:00
|
|
|
event.device = &ctx->dev->ibdev;
|
2011-03-12 06:30:53 +08:00
|
|
|
ib_dispatch_event(&event);
|
2011-05-10 13:06:23 +08:00
|
|
|
c4iw_remove(ctx);
|
2011-03-12 06:30:53 +08:00
|
|
|
}
|
2010-05-21 05:57:32 +08:00
|
|
|
break;
|
|
|
|
case CXGB4_STATE_DETACH:
|
|
|
|
printk(KERN_INFO MOD "%s: Detach\n",
|
2011-05-10 13:06:23 +08:00
|
|
|
pci_name(ctx->lldi.pdev));
|
|
|
|
if (ctx->dev)
|
|
|
|
c4iw_remove(ctx);
|
2010-05-21 05:57:32 +08:00
|
|
|
break;
|
|
|
|
}
|
2010-04-22 06:30:06 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-05-18 17:59:28 +08:00
|
|
|
static int disable_qp_db(int id, void *p, void *data)
|
|
|
|
{
|
|
|
|
struct c4iw_qp *qp = p;
|
|
|
|
|
|
|
|
t4_disable_wq_db(&qp->wq);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void stop_queues(struct uld_ctx *ctx)
|
|
|
|
{
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&ctx->dev->lock, flags);
|
|
|
|
ctx->dev->rdev.stats.db_state_transitions++;
|
|
|
|
ctx->dev->db_state = STOPPED;
|
|
|
|
if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED)
|
2012-05-18 17:59:30 +08:00
|
|
|
idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
else
|
|
|
|
ctx->dev->rdev.status_page->db_off = 1;
|
|
|
|
spin_unlock_irqrestore(&ctx->dev->lock, flags);
|
2012-05-18 17:59:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int enable_qp_db(int id, void *p, void *data)
|
|
|
|
{
|
|
|
|
struct c4iw_qp *qp = p;
|
|
|
|
|
|
|
|
t4_enable_wq_db(&qp->wq);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
static void resume_rc_qp(struct c4iw_qp *qp)
|
|
|
|
{
|
|
|
|
spin_lock(&qp->lock);
|
2014-04-09 22:38:25 +08:00
|
|
|
t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc,
|
|
|
|
is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
qp->wq.sq.wq_pidx_inc = 0;
|
2014-04-09 22:38:25 +08:00
|
|
|
t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc,
|
|
|
|
is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
qp->wq.rq.wq_pidx_inc = 0;
|
|
|
|
spin_unlock(&qp->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void resume_a_chunk(struct uld_ctx *ctx)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct c4iw_qp *qp;
|
|
|
|
|
|
|
|
for (i = 0; i < DB_FC_RESUME_SIZE; i++) {
|
|
|
|
qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp,
|
|
|
|
db_fc_entry);
|
|
|
|
list_del_init(&qp->db_fc_entry);
|
|
|
|
resume_rc_qp(qp);
|
|
|
|
if (list_empty(&ctx->dev->db_fc_list))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-05-18 17:59:28 +08:00
|
|
|
static void resume_queues(struct uld_ctx *ctx)
|
|
|
|
{
|
|
|
|
spin_lock_irq(&ctx->dev->lock);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
if (ctx->dev->db_state != STOPPED)
|
|
|
|
goto out;
|
|
|
|
ctx->dev->db_state = FLOW_CONTROL;
|
|
|
|
while (1) {
|
|
|
|
if (list_empty(&ctx->dev->db_fc_list)) {
|
|
|
|
WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
|
|
|
|
ctx->dev->db_state = NORMAL;
|
|
|
|
ctx->dev->rdev.stats.db_state_transitions++;
|
|
|
|
if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
|
|
|
|
idr_for_each(&ctx->dev->qpidr, enable_qp_db,
|
|
|
|
NULL);
|
|
|
|
} else {
|
|
|
|
ctx->dev->rdev.status_page->db_off = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1)
|
|
|
|
< (ctx->dev->rdev.lldi.dbfifo_int_thresh <<
|
|
|
|
DB_FC_DRAIN_THRESH)) {
|
|
|
|
resume_a_chunk(ctx);
|
|
|
|
}
|
|
|
|
if (!list_empty(&ctx->dev->db_fc_list)) {
|
|
|
|
spin_unlock_irq(&ctx->dev->lock);
|
|
|
|
if (DB_FC_RESUME_DELAY) {
|
|
|
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
|
|
|
schedule_timeout(DB_FC_RESUME_DELAY);
|
|
|
|
}
|
|
|
|
spin_lock_irq(&ctx->dev->lock);
|
|
|
|
if (ctx->dev->db_state != FLOW_CONTROL)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2012-05-18 17:59:30 +08:00
|
|
|
}
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
out:
|
|
|
|
if (ctx->dev->db_state != NORMAL)
|
|
|
|
ctx->dev->rdev.stats.db_fc_interruptions++;
|
2012-05-18 17:59:30 +08:00
|
|
|
spin_unlock_irq(&ctx->dev->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct qp_list {
|
|
|
|
unsigned idx;
|
|
|
|
struct c4iw_qp **qps;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int add_and_ref_qp(int id, void *p, void *data)
|
|
|
|
{
|
|
|
|
struct qp_list *qp_listp = data;
|
|
|
|
struct c4iw_qp *qp = p;
|
|
|
|
|
|
|
|
c4iw_qp_add_ref(&qp->ibqp);
|
|
|
|
qp_listp->qps[qp_listp->idx++] = qp;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int count_qps(int id, void *p, void *data)
|
|
|
|
{
|
|
|
|
unsigned *countp = data;
|
|
|
|
(*countp)++;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
static void deref_qps(struct qp_list *qp_list)
|
2012-05-18 17:59:30 +08:00
|
|
|
{
|
|
|
|
int idx;
|
|
|
|
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
for (idx = 0; idx < qp_list->idx; idx++)
|
|
|
|
c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp);
|
2012-05-18 17:59:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
|
|
|
|
{
|
|
|
|
int idx;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
for (idx = 0; idx < qp_list->idx; idx++) {
|
|
|
|
struct c4iw_qp *qp = qp_list->qps[idx];
|
|
|
|
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
spin_lock_irq(&qp->rhp->lock);
|
|
|
|
spin_lock(&qp->lock);
|
2012-05-18 17:59:30 +08:00
|
|
|
ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
|
|
|
|
qp->wq.sq.qid,
|
|
|
|
t4_sq_host_wq_pidx(&qp->wq),
|
|
|
|
t4_sq_wq_size(&qp->wq));
|
|
|
|
if (ret) {
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
pr_err(KERN_ERR MOD "%s: Fatal error - "
|
2012-05-18 17:59:30 +08:00
|
|
|
"DB overflow recovery failed - "
|
|
|
|
"error syncing SQ qid %u\n",
|
|
|
|
pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
spin_unlock(&qp->lock);
|
|
|
|
spin_unlock_irq(&qp->rhp->lock);
|
2012-05-18 17:59:30 +08:00
|
|
|
return;
|
|
|
|
}
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
qp->wq.sq.wq_pidx_inc = 0;
|
2012-05-18 17:59:30 +08:00
|
|
|
|
|
|
|
ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
|
|
|
|
qp->wq.rq.qid,
|
|
|
|
t4_rq_host_wq_pidx(&qp->wq),
|
|
|
|
t4_rq_wq_size(&qp->wq));
|
|
|
|
|
|
|
|
if (ret) {
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
pr_err(KERN_ERR MOD "%s: Fatal error - "
|
2012-05-18 17:59:30 +08:00
|
|
|
"DB overflow recovery failed - "
|
|
|
|
"error syncing RQ qid %u\n",
|
|
|
|
pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
spin_unlock(&qp->lock);
|
|
|
|
spin_unlock_irq(&qp->rhp->lock);
|
2012-05-18 17:59:30 +08:00
|
|
|
return;
|
|
|
|
}
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
qp->wq.rq.wq_pidx_inc = 0;
|
|
|
|
spin_unlock(&qp->lock);
|
|
|
|
spin_unlock_irq(&qp->rhp->lock);
|
2012-05-18 17:59:30 +08:00
|
|
|
|
|
|
|
/* Wait for the dbfifo to drain */
|
|
|
|
while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
|
|
|
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
|
|
|
schedule_timeout(usecs_to_jiffies(10));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void recover_queues(struct uld_ctx *ctx)
|
|
|
|
{
|
|
|
|
int count = 0;
|
|
|
|
struct qp_list qp_list;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* slow everybody down */
|
|
|
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
|
|
|
schedule_timeout(usecs_to_jiffies(1000));
|
|
|
|
|
|
|
|
/* flush the SGE contexts */
|
|
|
|
ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
|
|
|
|
if (ret) {
|
|
|
|
printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
|
|
|
|
pci_name(ctx->lldi.pdev));
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
return;
|
2012-05-18 17:59:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Count active queues so we can build a list of queues to recover */
|
|
|
|
spin_lock_irq(&ctx->dev->lock);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
WARN_ON(ctx->dev->db_state != STOPPED);
|
|
|
|
ctx->dev->db_state = RECOVERY;
|
2012-05-18 17:59:30 +08:00
|
|
|
idr_for_each(&ctx->dev->qpidr, count_qps, &count);
|
|
|
|
|
|
|
|
qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
|
|
|
|
if (!qp_list.qps) {
|
|
|
|
printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
|
|
|
|
pci_name(ctx->lldi.pdev));
|
|
|
|
spin_unlock_irq(&ctx->dev->lock);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
return;
|
2012-05-18 17:59:30 +08:00
|
|
|
}
|
|
|
|
qp_list.idx = 0;
|
|
|
|
|
|
|
|
/* add and ref each qp so it doesn't get freed */
|
|
|
|
idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
|
|
|
|
|
2012-05-18 17:59:28 +08:00
|
|
|
spin_unlock_irq(&ctx->dev->lock);
|
2012-05-18 17:59:30 +08:00
|
|
|
|
|
|
|
/* now traverse the list in a safe context to recover the db state*/
|
|
|
|
recover_lost_dbs(ctx, &qp_list);
|
|
|
|
|
|
|
|
/* we're almost done! deref the qps and clean up */
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
deref_qps(&qp_list);
|
2012-05-18 17:59:30 +08:00
|
|
|
kfree(qp_list.qps);
|
|
|
|
|
|
|
|
spin_lock_irq(&ctx->dev->lock);
|
cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes
The current logic suffers from a slow response time to disable user DB
usage, and also fails to avoid DB FIFO drops under heavy load. This commit
fixes these deficiencies and makes the avoidance logic more optimal.
This is done by more efficiently notifying the ULDs of potential DB
problems, and implements a smoother flow control algorithm in iw_cxgb4,
which is the ULD that puts the most load on the DB fifo.
Design:
cxgb4:
Direct ULD callback from the DB FULL/DROP interrupt handler. This allows
the ULD to stop doing user DB writes as quickly as possible.
While user DB usage is disabled, the LLD will accumulate DB write events
for its queues. Then once DB usage is reenabled, a single DB write is
done for each queue with its accumulated write count. This reduces the
load put on the DB fifo when reenabling.
iw_cxgb4:
Instead of marking each qp to indicate DB writes are disabled, we create
a device-global status page that each user process maps. This allows
iw_cxgb4 to only set this single bit to disable all DB writes for all
user QPs vs traversing the idr of all the active QPs. If the libcxgb4
doesn't support this, then we fall back to the old approach of marking
each QP. Thus we allow the new driver to work with an older libcxgb4.
When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes
via the status page and transition the DB state to STOPPED. As user
processes see that DB writes are disabled, they call into iw_cxgb4
to submit their DB write events. Since the DB state is in STOPPED,
the QP trying to write gets enqueued on a new DB "flow control" list.
As subsequent DB writes are submitted for this flow controlled QP, the
amount of writes are accumulated for each QP on the flow control list.
So all the user QPs that are actively ringing the DB get put on this
list and the number of writes they request are accumulated.
When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq
context, we change the DB state to FLOW_CONTROL, and begin resuming all
the QPs that are on the flow control list. This logic runs on until
the flow control list is empty or we exit FLOW_CONTROL mode (due to
a DB DROP upcall, for example). QPs are removed from this list, and
their accumulated DB write counts written to the DB FIFO. Sets of QPs,
called chunks in the code, are removed at one time. The chunk size is 64.
So 64 QPs are resumed at a time, and before the next chunk is resumed, the
logic waits (blocks) for the DB FIFO to drain. This prevents resuming to
quickly and overflowing the FIFO. Once the flow control list is empty,
the db state transitions back to NORMAL and user QPs are again allowed
to write directly to the user DB register.
The algorithm is designed such that if the DB write load is high enough,
then all the DB writes get submitted by the kernel using this flow
controlled approach to avoid DB drops. As the load lightens though, we
resume to normal DB writes directly by user applications.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-15 00:22:08 +08:00
|
|
|
WARN_ON(ctx->dev->db_state != RECOVERY);
|
|
|
|
ctx->dev->db_state = STOPPED;
|
2012-05-18 17:59:30 +08:00
|
|
|
spin_unlock_irq(&ctx->dev->lock);
|
2012-05-18 17:59:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
|
|
|
|
{
|
|
|
|
struct uld_ctx *ctx = handle;
|
|
|
|
|
|
|
|
switch (control) {
|
|
|
|
case CXGB4_CONTROL_DB_FULL:
|
|
|
|
stop_queues(ctx);
|
|
|
|
ctx->dev->rdev.stats.db_full++;
|
|
|
|
break;
|
|
|
|
case CXGB4_CONTROL_DB_EMPTY:
|
|
|
|
resume_queues(ctx);
|
|
|
|
mutex_lock(&ctx->dev->rdev.stats.lock);
|
|
|
|
ctx->dev->rdev.stats.db_empty++;
|
|
|
|
mutex_unlock(&ctx->dev->rdev.stats.lock);
|
|
|
|
break;
|
|
|
|
case CXGB4_CONTROL_DB_DROP:
|
2012-05-18 17:59:30 +08:00
|
|
|
recover_queues(ctx);
|
2012-05-18 17:59:28 +08:00
|
|
|
mutex_lock(&ctx->dev->rdev.stats.lock);
|
|
|
|
ctx->dev->rdev.stats.db_drop++;
|
|
|
|
mutex_unlock(&ctx->dev->rdev.stats.lock);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
|
|
|
|
pci_name(ctx->lldi.pdev), control);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-04-22 06:30:06 +08:00
|
|
|
static struct cxgb4_uld_info c4iw_uld_info = {
|
|
|
|
.name = DRV_NAME,
|
|
|
|
.add = c4iw_uld_add,
|
|
|
|
.rx_handler = c4iw_uld_rx_handler,
|
|
|
|
.state_change = c4iw_uld_state_change,
|
2012-05-18 17:59:28 +08:00
|
|
|
.control = c4iw_uld_control,
|
2010-04-22 06:30:06 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static int __init c4iw_init_module(void)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = c4iw_cm_init();
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
|
|
|
|
if (!c4iw_debugfs_root)
|
|
|
|
printk(KERN_WARNING MOD
|
|
|
|
"could not create debugfs entry, continuing\n");
|
|
|
|
|
|
|
|
cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit c4iw_exit_module(void)
|
|
|
|
{
|
2011-05-10 13:06:23 +08:00
|
|
|
struct uld_ctx *ctx, *tmp;
|
2010-04-22 06:30:06 +08:00
|
|
|
|
|
|
|
mutex_lock(&dev_mutex);
|
2011-05-10 13:06:23 +08:00
|
|
|
list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) {
|
|
|
|
if (ctx->dev)
|
|
|
|
c4iw_remove(ctx);
|
|
|
|
kfree(ctx);
|
2010-04-22 06:30:06 +08:00
|
|
|
}
|
|
|
|
mutex_unlock(&dev_mutex);
|
2010-05-21 05:57:27 +08:00
|
|
|
cxgb4_unregister_uld(CXGB4_ULD_RDMA);
|
2010-04-22 06:30:06 +08:00
|
|
|
c4iw_cm_term();
|
|
|
|
debugfs_remove_recursive(c4iw_debugfs_root);
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(c4iw_init_module);
|
|
|
|
module_exit(c4iw_exit_module);
|