2007-05-09 09:00:38 +08:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
|
2008-07-26 01:32:52 +08:00
|
|
|
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
|
2007-05-09 09:00:38 +08:00
|
|
|
*
|
|
|
|
* This software is available to you under a choice of one of two
|
|
|
|
* licenses. You may choose to be licensed under the terms of the GNU
|
|
|
|
* General Public License (GPL) Version 2, available from the file
|
|
|
|
* COPYING in the main directory of this source tree, or the
|
|
|
|
* OpenIB.org BSD license below:
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or
|
|
|
|
* without modification, are permitted provided that the following
|
|
|
|
* conditions are met:
|
|
|
|
*
|
|
|
|
* - Redistributions of source code must retain the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer.
|
|
|
|
*
|
|
|
|
* - Redistributions in binary form must reproduce the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer in the documentation and/or other materials
|
|
|
|
* provided with the distribution.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/init.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
|
|
|
#include <linux/slab.h>
|
2007-05-09 09:00:38 +08:00
|
|
|
#include <linux/errno.h>
|
2010-10-25 12:08:52 +08:00
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/inetdevice.h>
|
|
|
|
#include <linux/rtnetlink.h>
|
2010-08-26 22:19:22 +08:00
|
|
|
#include <linux/if_vlan.h>
|
2013-12-13 00:03:13 +08:00
|
|
|
#include <net/ipv6.h>
|
|
|
|
#include <net/addrconf.h>
|
2016-02-27 00:32:24 +08:00
|
|
|
#include <net/devlink.h>
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
#include <rdma/ib_smi.h>
|
|
|
|
#include <rdma/ib_user_verbs.h>
|
2010-10-25 12:08:52 +08:00
|
|
|
#include <rdma/ib_addr.h>
|
2015-07-30 23:33:29 +08:00
|
|
|
#include <rdma/ib_cache.h>
|
|
|
|
|
|
|
|
#include <net/bonding.h>
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
#include <linux/mlx4/driver.h>
|
|
|
|
#include <linux/mlx4/cmd.h>
|
2014-05-15 20:29:28 +08:00
|
|
|
#include <linux/mlx4/qp.h>
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
#include "mlx4_ib.h"
|
|
|
|
#include "user.h"
|
|
|
|
|
2012-06-19 16:21:35 +08:00
|
|
|
#define DRV_NAME MLX4_IB_DRV_NAME
|
2014-02-19 23:47:31 +08:00
|
|
|
#define DRV_VERSION "2.2-1"
|
|
|
|
#define DRV_RELDATE "Feb 2014"
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2013-08-14 18:58:31 +08:00
|
|
|
#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
|
2013-11-07 21:25:16 +08:00
|
|
|
#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
|
2014-08-13 22:07:30 +08:00
|
|
|
#define MLX4_IB_CARD_REV_A0 0xA0
|
2013-08-14 18:58:31 +08:00
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
MODULE_AUTHOR("Roland Dreier");
|
|
|
|
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
|
|
|
|
MODULE_LICENSE("Dual BSD/GPL");
|
|
|
|
MODULE_VERSION(DRV_VERSION);
|
|
|
|
|
2015-02-12 15:49:43 +08:00
|
|
|
int mlx4_ib_sm_guid_assign = 0;
|
2012-08-03 16:40:49 +08:00
|
|
|
module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
|
2015-02-12 15:49:43 +08:00
|
|
|
MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
|
2012-08-03 16:40:49 +08:00
|
|
|
|
2008-02-05 12:20:44 +08:00
|
|
|
static const char mlx4_ib_version[] =
|
2007-05-09 09:00:38 +08:00
|
|
|
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
|
|
|
|
DRV_VERSION " (" DRV_RELDATE ")\n";
|
|
|
|
|
2012-08-03 16:40:58 +08:00
|
|
|
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
static struct workqueue_struct *wq;
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
static void init_query_mad(struct ib_smp *mad)
|
|
|
|
{
|
|
|
|
mad->base_version = 1;
|
|
|
|
mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
|
|
|
|
mad->class_version = 1;
|
|
|
|
mad->method = IB_MGMT_METHOD_GET;
|
|
|
|
}
|
|
|
|
|
2013-08-14 18:58:31 +08:00
|
|
|
static int check_flow_steering_support(struct mlx4_dev *dev)
|
|
|
|
{
|
2013-11-07 21:25:15 +08:00
|
|
|
int eth_num_ports = 0;
|
2013-08-14 18:58:31 +08:00
|
|
|
int ib_num_ports = 0;
|
|
|
|
|
2013-11-07 21:25:15 +08:00
|
|
|
int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
|
|
|
|
|
|
|
|
if (dmfs) {
|
|
|
|
int i;
|
|
|
|
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
|
|
|
|
eth_num_ports++;
|
|
|
|
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
|
|
|
|
ib_num_ports++;
|
|
|
|
dmfs &= (!ib_num_ports ||
|
|
|
|
(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
|
|
|
|
(!eth_num_ports ||
|
|
|
|
(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
|
|
|
|
if (ib_num_ports && mlx4_is_mfunc(dev)) {
|
|
|
|
pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
|
|
|
|
dmfs = 0;
|
2013-08-14 18:58:31 +08:00
|
|
|
}
|
|
|
|
}
|
2013-11-07 21:25:15 +08:00
|
|
|
return dmfs;
|
2013-08-14 18:58:31 +08:00
|
|
|
}
|
|
|
|
|
2014-09-11 19:11:19 +08:00
|
|
|
static int num_ib_ports(struct mlx4_dev *dev)
|
|
|
|
{
|
|
|
|
int ib_ports = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
|
|
|
|
ib_ports++;
|
|
|
|
|
|
|
|
return ib_ports;
|
|
|
|
}
|
|
|
|
|
2015-07-30 23:33:29 +08:00
|
|
|
static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_dev *ibdev = to_mdev(device);
|
|
|
|
struct net_device *dev;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
|
|
|
|
|
|
|
|
if (dev) {
|
|
|
|
if (mlx4_is_bonded(ibdev->dev)) {
|
|
|
|
struct net_device *upper = NULL;
|
|
|
|
|
|
|
|
upper = netdev_master_upper_dev_get_rcu(dev);
|
|
|
|
if (upper) {
|
|
|
|
struct net_device *active;
|
|
|
|
|
|
|
|
active = bond_option_active_slave_get_rcu(netdev_priv(upper));
|
|
|
|
if (active)
|
|
|
|
dev = active;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (dev)
|
|
|
|
dev_hold(dev);
|
|
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
return dev;
|
|
|
|
}
|
|
|
|
|
2016-01-14 23:50:35 +08:00
|
|
|
static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
|
|
|
|
struct mlx4_ib_dev *ibdev,
|
|
|
|
u8 port_num)
|
2015-07-30 23:33:29 +08:00
|
|
|
{
|
|
|
|
struct mlx4_cmd_mailbox *mailbox;
|
|
|
|
int err;
|
|
|
|
struct mlx4_dev *dev = ibdev->dev;
|
|
|
|
int i;
|
|
|
|
union ib_gid *gid_tbl;
|
|
|
|
|
|
|
|
mailbox = mlx4_alloc_cmd_mailbox(dev);
|
|
|
|
if (IS_ERR(mailbox))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
gid_tbl = mailbox->buf;
|
|
|
|
|
|
|
|
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
|
|
|
|
memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
|
|
|
|
|
|
|
|
err = mlx4_cmd(dev, mailbox->dma,
|
|
|
|
MLX4_SET_PORT_GID_TABLE << 8 | port_num,
|
|
|
|
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
|
|
|
|
MLX4_CMD_WRAPPED);
|
|
|
|
if (mlx4_is_bonded(dev))
|
|
|
|
err += mlx4_cmd(dev, mailbox->dma,
|
|
|
|
MLX4_SET_PORT_GID_TABLE << 8 | 2,
|
|
|
|
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
|
|
|
|
MLX4_CMD_WRAPPED);
|
|
|
|
|
|
|
|
mlx4_free_cmd_mailbox(dev, mailbox);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2016-01-14 23:50:35 +08:00
|
|
|
static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
|
|
|
|
struct mlx4_ib_dev *ibdev,
|
|
|
|
u8 port_num)
|
|
|
|
{
|
|
|
|
struct mlx4_cmd_mailbox *mailbox;
|
|
|
|
int err;
|
|
|
|
struct mlx4_dev *dev = ibdev->dev;
|
|
|
|
int i;
|
|
|
|
struct {
|
|
|
|
union ib_gid gid;
|
|
|
|
__be32 rsrvd1[2];
|
|
|
|
__be16 rsrvd2;
|
|
|
|
u8 type;
|
|
|
|
u8 version;
|
|
|
|
__be32 rsrvd3;
|
|
|
|
} *gid_tbl;
|
|
|
|
|
|
|
|
mailbox = mlx4_alloc_cmd_mailbox(dev);
|
|
|
|
if (IS_ERR(mailbox))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
gid_tbl = mailbox->buf;
|
|
|
|
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
|
|
|
|
memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
|
|
|
|
if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
|
|
|
|
gid_tbl[i].version = 2;
|
|
|
|
if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
|
|
|
|
gid_tbl[i].type = 1;
|
|
|
|
else
|
|
|
|
memset(&gid_tbl[i].gid, 0, 12);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
err = mlx4_cmd(dev, mailbox->dma,
|
|
|
|
MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
|
|
|
|
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
|
|
|
|
MLX4_CMD_WRAPPED);
|
|
|
|
if (mlx4_is_bonded(dev))
|
|
|
|
err += mlx4_cmd(dev, mailbox->dma,
|
|
|
|
MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
|
|
|
|
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
|
|
|
|
MLX4_CMD_WRAPPED);
|
|
|
|
|
|
|
|
mlx4_free_cmd_mailbox(dev, mailbox);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mlx4_ib_update_gids(struct gid_entry *gids,
|
|
|
|
struct mlx4_ib_dev *ibdev,
|
|
|
|
u8 port_num)
|
|
|
|
{
|
|
|
|
if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
|
|
|
|
return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
|
|
|
|
|
|
|
|
return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
|
|
|
|
}
|
|
|
|
|
2015-07-30 23:33:29 +08:00
|
|
|
static int mlx4_ib_add_gid(struct ib_device *device,
|
|
|
|
u8 port_num,
|
|
|
|
unsigned int index,
|
|
|
|
const union ib_gid *gid,
|
|
|
|
const struct ib_gid_attr *attr,
|
|
|
|
void **context)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_dev *ibdev = to_mdev(device);
|
|
|
|
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
|
|
|
|
struct mlx4_port_gid_table *port_gid_table;
|
|
|
|
int free = -1, found = -1;
|
|
|
|
int ret = 0;
|
|
|
|
int hw_update = 0;
|
|
|
|
int i;
|
|
|
|
struct gid_entry *gids = NULL;
|
|
|
|
|
|
|
|
if (!rdma_cap_roce_gid_table(device, port_num))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (port_num > MLX4_MAX_PORTS)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (!context)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
port_gid_table = &iboe->gids[port_num - 1];
|
|
|
|
spin_lock_bh(&iboe->lock);
|
|
|
|
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
|
2016-01-14 23:50:33 +08:00
|
|
|
if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
|
|
|
|
(port_gid_table->gids[i].gid_type == attr->gid_type)) {
|
2015-07-30 23:33:29 +08:00
|
|
|
found = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid)))
|
|
|
|
free = i; /* HW has space */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (found < 0) {
|
|
|
|
if (free < 0) {
|
|
|
|
ret = -ENOSPC;
|
|
|
|
} else {
|
|
|
|
port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
|
|
|
|
if (!port_gid_table->gids[free].ctx) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
} else {
|
|
|
|
*context = port_gid_table->gids[free].ctx;
|
|
|
|
memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
|
2016-01-14 23:50:33 +08:00
|
|
|
port_gid_table->gids[free].gid_type = attr->gid_type;
|
2015-07-30 23:33:29 +08:00
|
|
|
port_gid_table->gids[free].ctx->real_index = free;
|
|
|
|
port_gid_table->gids[free].ctx->refcount = 1;
|
|
|
|
hw_update = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
|
|
|
|
*context = ctx;
|
|
|
|
ctx->refcount++;
|
|
|
|
}
|
|
|
|
if (!ret && hw_update) {
|
|
|
|
gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
|
|
|
|
if (!gids) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
} else {
|
2016-01-14 23:50:33 +08:00
|
|
|
for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
|
2015-07-30 23:33:29 +08:00
|
|
|
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
|
2016-01-14 23:50:33 +08:00
|
|
|
gids[i].gid_type = port_gid_table->gids[i].gid_type;
|
|
|
|
}
|
2015-07-30 23:33:29 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
spin_unlock_bh(&iboe->lock);
|
|
|
|
|
|
|
|
if (!ret && hw_update) {
|
|
|
|
ret = mlx4_ib_update_gids(gids, ibdev, port_num);
|
|
|
|
kfree(gids);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mlx4_ib_del_gid(struct ib_device *device,
|
|
|
|
u8 port_num,
|
|
|
|
unsigned int index,
|
|
|
|
void **context)
|
|
|
|
{
|
|
|
|
struct gid_cache_context *ctx = *context;
|
|
|
|
struct mlx4_ib_dev *ibdev = to_mdev(device);
|
|
|
|
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
|
|
|
|
struct mlx4_port_gid_table *port_gid_table;
|
|
|
|
int ret = 0;
|
|
|
|
int hw_update = 0;
|
|
|
|
struct gid_entry *gids = NULL;
|
|
|
|
|
|
|
|
if (!rdma_cap_roce_gid_table(device, port_num))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (port_num > MLX4_MAX_PORTS)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
port_gid_table = &iboe->gids[port_num - 1];
|
|
|
|
spin_lock_bh(&iboe->lock);
|
|
|
|
if (ctx) {
|
|
|
|
ctx->refcount--;
|
|
|
|
if (!ctx->refcount) {
|
|
|
|
unsigned int real_index = ctx->real_index;
|
|
|
|
|
|
|
|
memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid));
|
|
|
|
kfree(port_gid_table->gids[real_index].ctx);
|
|
|
|
port_gid_table->gids[real_index].ctx = NULL;
|
|
|
|
hw_update = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!ret && hw_update) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
|
|
|
|
if (!gids) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
|
|
|
|
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
spin_unlock_bh(&iboe->lock);
|
|
|
|
|
|
|
|
if (!ret && hw_update) {
|
|
|
|
ret = mlx4_ib_update_gids(gids, ibdev, port_num);
|
|
|
|
kfree(gids);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
|
|
|
|
u8 port_num, int index)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
|
|
|
|
struct gid_cache_context *ctx = NULL;
|
|
|
|
union ib_gid gid;
|
|
|
|
struct mlx4_port_gid_table *port_gid_table;
|
|
|
|
int real_index = -EINVAL;
|
|
|
|
int i;
|
|
|
|
int ret;
|
|
|
|
unsigned long flags;
|
2016-01-14 23:50:33 +08:00
|
|
|
struct ib_gid_attr attr;
|
2015-07-30 23:33:29 +08:00
|
|
|
|
|
|
|
if (port_num > MLX4_MAX_PORTS)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (mlx4_is_bonded(ibdev->dev))
|
|
|
|
port_num = 1;
|
|
|
|
|
|
|
|
if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
|
|
|
|
return index;
|
|
|
|
|
2016-01-14 23:50:33 +08:00
|
|
|
ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
|
2015-07-30 23:33:29 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2016-01-14 23:50:33 +08:00
|
|
|
if (attr.ndev)
|
|
|
|
dev_put(attr.ndev);
|
|
|
|
|
2015-07-30 23:33:29 +08:00
|
|
|
if (!memcmp(&gid, &zgid, sizeof(gid)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&iboe->lock, flags);
|
|
|
|
port_gid_table = &iboe->gids[port_num - 1];
|
|
|
|
|
|
|
|
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
|
2016-01-14 23:50:33 +08:00
|
|
|
if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
|
|
|
|
attr.gid_type == port_gid_table->gids[i].gid_type) {
|
2015-07-30 23:33:29 +08:00
|
|
|
ctx = port_gid_table->gids[i].ctx;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (ctx)
|
|
|
|
real_index = ctx->real_index;
|
|
|
|
spin_unlock_irqrestore(&iboe->lock, flags);
|
|
|
|
return real_index;
|
|
|
|
}
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
static int mlx4_ib_query_device(struct ib_device *ibdev,
|
2015-06-11 21:35:25 +08:00
|
|
|
struct ib_device_attr *props,
|
|
|
|
struct ib_udata *uhw)
|
2007-05-09 09:00:38 +08:00
|
|
|
{
|
|
|
|
struct mlx4_ib_dev *dev = to_mdev(ibdev);
|
|
|
|
struct ib_smp *in_mad = NULL;
|
|
|
|
struct ib_smp *out_mad = NULL;
|
|
|
|
int err = -ENOMEM;
|
2014-09-11 19:11:19 +08:00
|
|
|
int have_ib_ports;
|
2015-06-11 21:35:27 +08:00
|
|
|
struct mlx4_uverbs_ex_query_device cmd;
|
|
|
|
struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
|
|
|
|
struct mlx4_clock_params clock_params;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2015-06-11 21:35:27 +08:00
|
|
|
if (uhw->inlen) {
|
|
|
|
if (uhw->inlen < sizeof(cmd))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (cmd.comp_mask)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (cmd.reserved)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2015-06-11 21:35:25 +08:00
|
|
|
|
2015-06-11 21:35:27 +08:00
|
|
|
resp.response_length = offsetof(typeof(resp), response_length) +
|
|
|
|
sizeof(resp.response_length);
|
2007-05-09 09:00:38 +08:00
|
|
|
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
|
|
|
|
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
|
|
|
|
if (!in_mad || !out_mad)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
init_query_mad(in_mad);
|
|
|
|
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
|
|
|
|
|
2012-08-03 16:40:45 +08:00
|
|
|
err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
|
|
|
|
1, NULL, NULL, in_mad, out_mad);
|
2007-05-09 09:00:38 +08:00
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
memset(props, 0, sizeof *props);
|
|
|
|
|
2014-09-11 19:11:19 +08:00
|
|
|
have_ib_ports = num_ib_ports(dev->dev);
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
props->fw_ver = dev->dev->caps.fw_ver;
|
|
|
|
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
|
|
|
|
IB_DEVICE_PORT_ACTIVE_EVENT |
|
|
|
|
IB_DEVICE_SYS_IMAGE_GUID |
|
2008-07-15 14:48:48 +08:00
|
|
|
IB_DEVICE_RC_RNR_NAK_GEN |
|
|
|
|
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
|
2007-05-09 09:00:38 +08:00
|
|
|
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
|
|
|
|
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
|
|
|
|
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
|
|
|
|
props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
|
2014-09-11 19:11:19 +08:00
|
|
|
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
|
2007-05-09 09:00:38 +08:00
|
|
|
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
|
|
|
|
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
|
|
|
|
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
|
2008-04-17 12:01:10 +08:00
|
|
|
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
|
|
|
|
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
|
2014-08-13 22:07:30 +08:00
|
|
|
if (dev->dev->caps.max_gso_sz &&
|
|
|
|
(dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
|
|
|
|
(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
|
2008-04-17 12:09:27 +08:00
|
|
|
props->device_cap_flags |= IB_DEVICE_UD_TSO;
|
2008-07-23 23:12:26 +08:00
|
|
|
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
|
|
|
|
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
|
|
|
|
if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
|
|
|
|
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
|
|
|
|
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
|
|
|
|
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
|
2011-06-03 02:32:15 +08:00
|
|
|
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
|
|
|
|
props->device_cap_flags |= IB_DEVICE_XRC;
|
2013-02-07 00:19:16 +08:00
|
|
|
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
|
|
|
|
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
|
|
|
|
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
|
|
|
|
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
|
|
|
|
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
|
|
|
|
else
|
|
|
|
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
|
2013-11-07 21:25:15 +08:00
|
|
|
if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
|
2013-08-14 18:58:31 +08:00
|
|
|
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
|
2013-02-07 00:19:16 +08:00
|
|
|
}
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2015-09-23 04:18:11 +08:00
|
|
|
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
|
|
|
|
0xffffff;
|
2015-01-25 22:59:35 +08:00
|
|
|
props->vendor_part_id = dev->dev->persist->pdev->device;
|
2007-05-09 09:00:38 +08:00
|
|
|
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
|
|
|
|
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
|
|
|
|
|
|
|
|
props->max_mr_size = ~0ull;
|
|
|
|
props->page_size_cap = dev->dev->caps.page_size_cap;
|
mlx4: Structures and init/teardown for VF resource quotas
This is step #1 for implementing SRIOV resource quotas for VFs.
Quotas are implemented per resource type for VFs and the PF, to prevent
any entity from simply grabbing all the resources for itself and leaving
the other entities unable to obtain such resources.
Resources which are allocated using quotas: QPs, CQs, SRQs, MPTs, MTTs, MAC,
VLAN, and Counters.
The quota system works as follows:
Each entity (VF or PF) is given a max number of a given resource (its quota),
and a guaranteed minimum number for each resource (starvation prevention).
For QPs, CQs, SRQs, MPTs and MTTs:
50% of the available quantity for the resource is divided equally among
the PF and all the active VFs (i.e., the number of VFs in the mlx4_core module
parameter "num_vfs"). This 50% represents the "guaranteed minimum" pool.
The other 50% is the "free pool", allocated on a first-come-first-serve basis.
For each VF/PF, resources are first allocated from its "guaranteed-minimum"
pool. When that pool is exhausted, the driver attempts to allocate from
the resource "free-pool".
The quota (i.e., max) for the VFs and the PF is:
The free-pool amount (50% of the real max) + the guaranteed minimum
For MACs:
Guarantee 2 MACs per VF/PF per port. As a result, since we have only
128 MACs per port, reduce the allowable number of VFs from 64 to 63.
Any remaining MACs are put into a free pool.
For VLANs:
For the PF, the per-port quota is 128 and guarantee is 64
(to allow the PF to register at least a VLAN per VF in VST mode).
For the VFs, the per-port quota is 64 and the guarantee is 0.
We assume that VGT VFs are trusted not to abuse the VLAN resource.
For Counters:
For all functions (PF and VFs), the quota is 128 and the guarantee is 0.
In this patch, we define the needed structures, which are added to the
resource-tracker struct. In addition, we do initialization
for the resource quota, and adjust the query_device response to use quotas
rather than resource maxima.
As part of the implementation, we introduce a new field in
mlx4_dev: quotas. This field holds the resource quotas used
to report maxima to the upper layers (ib_core, via query_device).
The HCA maxima of these values are passed to the VFs (via
QUERY_HCA) so that they may continue to use these in handling
QPs, CQs, SRQs and MPTs.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-03 16:03:23 +08:00
|
|
|
props->max_qp = dev->dev->quotas.qp;
|
2012-05-24 21:08:08 +08:00
|
|
|
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
|
2007-05-09 09:00:38 +08:00
|
|
|
props->max_sge = min(dev->dev->caps.max_sq_sg,
|
|
|
|
dev->dev->caps.max_rq_sg);
|
2015-10-28 19:28:15 +08:00
|
|
|
props->max_sge_rd = MLX4_MAX_SGE_RD;
|
mlx4: Structures and init/teardown for VF resource quotas
This is step #1 for implementing SRIOV resource quotas for VFs.
Quotas are implemented per resource type for VFs and the PF, to prevent
any entity from simply grabbing all the resources for itself and leaving
the other entities unable to obtain such resources.
Resources which are allocated using quotas: QPs, CQs, SRQs, MPTs, MTTs, MAC,
VLAN, and Counters.
The quota system works as follows:
Each entity (VF or PF) is given a max number of a given resource (its quota),
and a guaranteed minimum number for each resource (starvation prevention).
For QPs, CQs, SRQs, MPTs and MTTs:
50% of the available quantity for the resource is divided equally among
the PF and all the active VFs (i.e., the number of VFs in the mlx4_core module
parameter "num_vfs"). This 50% represents the "guaranteed minimum" pool.
The other 50% is the "free pool", allocated on a first-come-first-serve basis.
For each VF/PF, resources are first allocated from its "guaranteed-minimum"
pool. When that pool is exhausted, the driver attempts to allocate from
the resource "free-pool".
The quota (i.e., max) for the VFs and the PF is:
The free-pool amount (50% of the real max) + the guaranteed minimum
For MACs:
Guarantee 2 MACs per VF/PF per port. As a result, since we have only
128 MACs per port, reduce the allowable number of VFs from 64 to 63.
Any remaining MACs are put into a free pool.
For VLANs:
For the PF, the per-port quota is 128 and guarantee is 64
(to allow the PF to register at least a VLAN per VF in VST mode).
For the VFs, the per-port quota is 64 and the guarantee is 0.
We assume that VGT VFs are trusted not to abuse the VLAN resource.
For Counters:
For all functions (PF and VFs), the quota is 128 and the guarantee is 0.
In this patch, we define the needed structures, which are added to the
resource-tracker struct. In addition, we do initialization
for the resource quota, and adjust the query_device response to use quotas
rather than resource maxima.
As part of the implementation, we introduce a new field in
mlx4_dev: quotas. This field holds the resource quotas used
to report maxima to the upper layers (ib_core, via query_device).
The HCA maxima of these values are passed to the VFs (via
QUERY_HCA) so that they may continue to use these in handling
QPs, CQs, SRQs and MPTs.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-03 16:03:23 +08:00
|
|
|
props->max_cq = dev->dev->quotas.cq;
|
2007-05-09 09:00:38 +08:00
|
|
|
props->max_cqe = dev->dev->caps.max_cqes;
|
mlx4: Structures and init/teardown for VF resource quotas
This is step #1 for implementing SRIOV resource quotas for VFs.
Quotas are implemented per resource type for VFs and the PF, to prevent
any entity from simply grabbing all the resources for itself and leaving
the other entities unable to obtain such resources.
Resources which are allocated using quotas: QPs, CQs, SRQs, MPTs, MTTs, MAC,
VLAN, and Counters.
The quota system works as follows:
Each entity (VF or PF) is given a max number of a given resource (its quota),
and a guaranteed minimum number for each resource (starvation prevention).
For QPs, CQs, SRQs, MPTs and MTTs:
50% of the available quantity for the resource is divided equally among
the PF and all the active VFs (i.e., the number of VFs in the mlx4_core module
parameter "num_vfs"). This 50% represents the "guaranteed minimum" pool.
The other 50% is the "free pool", allocated on a first-come-first-serve basis.
For each VF/PF, resources are first allocated from its "guaranteed-minimum"
pool. When that pool is exhausted, the driver attempts to allocate from
the resource "free-pool".
The quota (i.e., max) for the VFs and the PF is:
The free-pool amount (50% of the real max) + the guaranteed minimum
For MACs:
Guarantee 2 MACs per VF/PF per port. As a result, since we have only
128 MACs per port, reduce the allowable number of VFs from 64 to 63.
Any remaining MACs are put into a free pool.
For VLANs:
For the PF, the per-port quota is 128 and guarantee is 64
(to allow the PF to register at least a VLAN per VF in VST mode).
For the VFs, the per-port quota is 64 and the guarantee is 0.
We assume that VGT VFs are trusted not to abuse the VLAN resource.
For Counters:
For all functions (PF and VFs), the quota is 128 and the guarantee is 0.
In this patch, we define the needed structures, which are added to the
resource-tracker struct. In addition, we do initialization
for the resource quota, and adjust the query_device response to use quotas
rather than resource maxima.
As part of the implementation, we introduce a new field in
mlx4_dev: quotas. This field holds the resource quotas used
to report maxima to the upper layers (ib_core, via query_device).
The HCA maxima of these values are passed to the VFs (via
QUERY_HCA) so that they may continue to use these in handling
QPs, CQs, SRQs and MPTs.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-03 16:03:23 +08:00
|
|
|
props->max_mr = dev->dev->quotas.mpt;
|
2007-05-09 09:00:38 +08:00
|
|
|
props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
|
|
|
|
props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
|
|
|
|
props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
|
|
|
|
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
|
mlx4: Structures and init/teardown for VF resource quotas
This is step #1 for implementing SRIOV resource quotas for VFs.
Quotas are implemented per resource type for VFs and the PF, to prevent
any entity from simply grabbing all the resources for itself and leaving
the other entities unable to obtain such resources.
Resources which are allocated using quotas: QPs, CQs, SRQs, MPTs, MTTs, MAC,
VLAN, and Counters.
The quota system works as follows:
Each entity (VF or PF) is given a max number of a given resource (its quota),
and a guaranteed minimum number for each resource (starvation prevention).
For QPs, CQs, SRQs, MPTs and MTTs:
50% of the available quantity for the resource is divided equally among
the PF and all the active VFs (i.e., the number of VFs in the mlx4_core module
parameter "num_vfs"). This 50% represents the "guaranteed minimum" pool.
The other 50% is the "free pool", allocated on a first-come-first-serve basis.
For each VF/PF, resources are first allocated from its "guaranteed-minimum"
pool. When that pool is exhausted, the driver attempts to allocate from
the resource "free-pool".
The quota (i.e., max) for the VFs and the PF is:
The free-pool amount (50% of the real max) + the guaranteed minimum
For MACs:
Guarantee 2 MACs per VF/PF per port. As a result, since we have only
128 MACs per port, reduce the allowable number of VFs from 64 to 63.
Any remaining MACs are put into a free pool.
For VLANs:
For the PF, the per-port quota is 128 and guarantee is 64
(to allow the PF to register at least a VLAN per VF in VST mode).
For the VFs, the per-port quota is 64 and the guarantee is 0.
We assume that VGT VFs are trusted not to abuse the VLAN resource.
For Counters:
For all functions (PF and VFs), the quota is 128 and the guarantee is 0.
In this patch, we define the needed structures, which are added to the
resource-tracker struct. In addition, we do initialization
for the resource quota, and adjust the query_device response to use quotas
rather than resource maxima.
As part of the implementation, we introduce a new field in
mlx4_dev: quotas. This field holds the resource quotas used
to report maxima to the upper layers (ib_core, via query_device).
The HCA maxima of these values are passed to the VFs (via
QUERY_HCA) so that they may continue to use these in handling
QPs, CQs, SRQs and MPTs.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-03 16:03:23 +08:00
|
|
|
props->max_srq = dev->dev->quotas.srq;
|
2007-06-22 04:39:10 +08:00
|
|
|
props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
|
2007-05-09 09:00:38 +08:00
|
|
|
props->max_srq_sge = dev->dev->caps.max_srq_sge;
|
2010-10-07 22:24:16 +08:00
|
|
|
props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
|
2007-05-09 09:00:38 +08:00
|
|
|
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
|
|
|
|
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
|
|
|
|
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
|
2012-07-11 23:39:29 +08:00
|
|
|
props->masked_atomic_cap = props->atomic_cap;
|
2007-06-18 23:15:02 +08:00
|
|
|
props->max_pkeys = dev->dev->caps.pkey_table_len[1];
|
2007-05-09 09:00:38 +08:00
|
|
|
props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
|
|
|
|
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
|
|
|
|
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
|
|
|
|
props->max_mcast_grp;
|
2012-02-10 00:10:06 +08:00
|
|
|
props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
|
2015-06-11 21:35:27 +08:00
|
|
|
props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
|
|
|
|
props->timestamp_mask = 0xFFFFFFFFFFFFULL;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2015-07-01 19:31:02 +08:00
|
|
|
if (!mlx4_is_slave(dev->dev))
|
|
|
|
err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
|
2015-06-11 21:35:27 +08:00
|
|
|
|
|
|
|
if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
|
|
|
|
resp.response_length += sizeof(resp.hca_core_clock_offset);
|
2015-07-01 19:31:02 +08:00
|
|
|
if (!err && !mlx4_is_slave(dev->dev)) {
|
|
|
|
resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP;
|
|
|
|
resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
|
|
|
|
}
|
2015-06-11 21:35:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (uhw->outlen) {
|
|
|
|
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
}
|
2007-05-09 09:00:38 +08:00
|
|
|
out:
|
|
|
|
kfree(in_mad);
|
|
|
|
kfree(out_mad);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
static enum rdma_link_layer
|
|
|
|
mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
|
2007-05-09 09:00:38 +08:00
|
|
|
{
|
2010-10-25 12:08:52 +08:00
|
|
|
struct mlx4_dev *dev = to_mdev(device)->dev;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2011-12-13 12:10:41 +08:00
|
|
|
return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
|
2010-10-25 12:08:52 +08:00
|
|
|
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
|
|
|
|
}
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
static int ib_link_query_port(struct ib_device *ibdev, u8 port,
|
2012-08-03 16:40:45 +08:00
|
|
|
struct ib_port_attr *props, int netw_view)
|
2010-10-25 12:08:52 +08:00
|
|
|
{
|
2012-01-12 01:00:29 +08:00
|
|
|
struct ib_smp *in_mad = NULL;
|
|
|
|
struct ib_smp *out_mad = NULL;
|
2011-10-04 00:04:20 +08:00
|
|
|
int ext_active_speed;
|
2012-08-03 16:40:45 +08:00
|
|
|
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
|
2012-01-12 01:00:29 +08:00
|
|
|
int err = -ENOMEM;
|
|
|
|
|
|
|
|
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
|
|
|
|
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
|
|
|
|
if (!in_mad || !out_mad)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
init_query_mad(in_mad);
|
|
|
|
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
|
|
|
|
in_mad->attr_mod = cpu_to_be32(port);
|
|
|
|
|
2012-08-03 16:40:45 +08:00
|
|
|
if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
|
|
|
|
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
|
|
|
|
|
|
|
|
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
|
2012-01-12 01:00:29 +08:00
|
|
|
in_mad, out_mad);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
2011-10-04 00:04:20 +08:00
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
|
|
|
|
props->lmc = out_mad->data[34] & 0x7;
|
|
|
|
props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
|
|
|
|
props->sm_sl = out_mad->data[36] & 0xf;
|
|
|
|
props->state = out_mad->data[32] & 0xf;
|
|
|
|
props->phys_state = out_mad->data[33] >> 4;
|
|
|
|
props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
|
2012-08-03 16:40:45 +08:00
|
|
|
if (netw_view)
|
|
|
|
props->gid_tbl_len = out_mad->data[50];
|
|
|
|
else
|
|
|
|
props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
|
2007-06-26 20:55:28 +08:00
|
|
|
props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
|
2007-06-18 23:15:02 +08:00
|
|
|
props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
|
2007-05-09 09:00:38 +08:00
|
|
|
props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
|
|
|
|
props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
|
|
|
|
props->active_width = out_mad->data[31] & 0xf;
|
|
|
|
props->active_speed = out_mad->data[35] >> 4;
|
|
|
|
props->max_mtu = out_mad->data[41] & 0xf;
|
|
|
|
props->active_mtu = out_mad->data[36] >> 4;
|
|
|
|
props->subnet_timeout = out_mad->data[51] & 0x1f;
|
|
|
|
props->max_vl_num = out_mad->data[37] >> 4;
|
|
|
|
props->init_type_reply = out_mad->data[41] >> 4;
|
|
|
|
|
2011-10-04 00:04:20 +08:00
|
|
|
/* Check if extended speeds (EDR/FDR/...) are supported */
|
|
|
|
if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
|
|
|
|
ext_active_speed = out_mad->data[62] >> 4;
|
|
|
|
|
|
|
|
switch (ext_active_speed) {
|
|
|
|
case 1:
|
2012-02-29 00:49:50 +08:00
|
|
|
props->active_speed = IB_SPEED_FDR;
|
2011-10-04 00:04:20 +08:00
|
|
|
break;
|
|
|
|
case 2:
|
2012-02-29 00:49:50 +08:00
|
|
|
props->active_speed = IB_SPEED_EDR;
|
2011-10-04 00:04:20 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If reported active speed is QDR, check if is FDR-10 */
|
2012-02-29 00:49:50 +08:00
|
|
|
if (props->active_speed == IB_SPEED_QDR) {
|
2012-03-06 21:50:50 +08:00
|
|
|
init_query_mad(in_mad);
|
|
|
|
in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
|
|
|
|
in_mad->attr_mod = cpu_to_be32(port);
|
|
|
|
|
2012-08-03 16:40:45 +08:00
|
|
|
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
|
2012-03-06 21:50:50 +08:00
|
|
|
NULL, NULL, in_mad, out_mad);
|
|
|
|
if (err)
|
2012-04-12 05:43:29 +08:00
|
|
|
goto out;
|
2012-03-06 21:50:50 +08:00
|
|
|
|
|
|
|
/* Checking LinkSpeedActive for FDR-10 */
|
|
|
|
if (out_mad->data[15] & 0x1)
|
|
|
|
props->active_speed = IB_SPEED_FDR10;
|
2011-10-04 00:04:20 +08:00
|
|
|
}
|
2012-04-02 22:45:20 +08:00
|
|
|
|
|
|
|
/* Avoid wrong speed value returned by FW if the IB link is down. */
|
|
|
|
if (props->state == IB_PORT_DOWN)
|
|
|
|
props->active_speed = IB_SPEED_SDR;
|
|
|
|
|
2012-01-12 01:00:29 +08:00
|
|
|
out:
|
|
|
|
kfree(in_mad);
|
|
|
|
kfree(out_mad);
|
|
|
|
return err;
|
2010-10-25 12:08:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static u8 state_to_phys_state(enum ib_port_state state)
|
|
|
|
{
|
|
|
|
return state == IB_PORT_ACTIVE ? 5 : 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
|
2012-08-03 16:40:45 +08:00
|
|
|
struct ib_port_attr *props, int netw_view)
|
2010-10-25 12:08:52 +08:00
|
|
|
{
|
2012-01-12 01:00:29 +08:00
|
|
|
|
|
|
|
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
|
|
|
|
struct mlx4_ib_iboe *iboe = &mdev->iboe;
|
2010-10-25 12:08:52 +08:00
|
|
|
struct net_device *ndev;
|
|
|
|
enum ib_mtu tmp;
|
2012-01-12 01:00:29 +08:00
|
|
|
struct mlx4_cmd_mailbox *mailbox;
|
|
|
|
int err = 0;
|
2015-02-03 22:48:37 +08:00
|
|
|
int is_bonded = mlx4_is_bonded(mdev->dev);
|
2012-01-12 01:00:29 +08:00
|
|
|
|
|
|
|
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
|
|
|
|
if (IS_ERR(mailbox))
|
|
|
|
return PTR_ERR(mailbox);
|
2010-10-25 12:08:52 +08:00
|
|
|
|
2012-01-12 01:00:29 +08:00
|
|
|
err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
|
|
|
|
MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
|
|
|
|
MLX4_CMD_WRAPPED);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ?
|
|
|
|
IB_WIDTH_4X : IB_WIDTH_1X;
|
2012-02-29 00:49:50 +08:00
|
|
|
props->active_speed = IB_SPEED_QDR;
|
2014-02-09 17:54:34 +08:00
|
|
|
props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
|
2012-01-12 01:00:29 +08:00
|
|
|
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
|
|
|
|
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
|
2010-10-25 12:08:52 +08:00
|
|
|
props->pkey_tbl_len = 1;
|
2011-10-10 16:53:41 +08:00
|
|
|
props->max_mtu = IB_MTU_4096;
|
2012-01-12 01:00:29 +08:00
|
|
|
props->max_vl_num = 2;
|
2010-10-25 12:08:52 +08:00
|
|
|
props->state = IB_PORT_DOWN;
|
|
|
|
props->phys_state = state_to_phys_state(props->state);
|
|
|
|
props->active_mtu = IB_MTU_256;
|
2014-08-21 19:28:41 +08:00
|
|
|
spin_lock_bh(&iboe->lock);
|
2010-10-25 12:08:52 +08:00
|
|
|
ndev = iboe->netdevs[port - 1];
|
2015-07-30 23:33:30 +08:00
|
|
|
if (ndev && is_bonded) {
|
|
|
|
rcu_read_lock(); /* required to get upper dev */
|
|
|
|
ndev = netdev_master_upper_dev_get_rcu(ndev);
|
|
|
|
rcu_read_unlock();
|
|
|
|
}
|
2010-10-25 12:08:52 +08:00
|
|
|
if (!ndev)
|
2012-01-12 01:00:29 +08:00
|
|
|
goto out_unlock;
|
2010-10-25 12:08:52 +08:00
|
|
|
|
|
|
|
tmp = iboe_get_mtu(ndev->mtu);
|
|
|
|
props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
|
|
|
|
|
2010-11-12 05:05:58 +08:00
|
|
|
props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
|
2010-10-25 12:08:52 +08:00
|
|
|
IB_PORT_ACTIVE : IB_PORT_DOWN;
|
|
|
|
props->phys_state = state_to_phys_state(props->state);
|
2012-01-12 01:00:29 +08:00
|
|
|
out_unlock:
|
2014-08-21 19:28:41 +08:00
|
|
|
spin_unlock_bh(&iboe->lock);
|
2012-01-12 01:00:29 +08:00
|
|
|
out:
|
|
|
|
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
|
|
|
|
return err;
|
2010-10-25 12:08:52 +08:00
|
|
|
}
|
|
|
|
|
2012-08-03 16:40:45 +08:00
|
|
|
int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
|
|
|
|
struct ib_port_attr *props, int netw_view)
|
2010-10-25 12:08:52 +08:00
|
|
|
{
|
2012-01-12 01:00:29 +08:00
|
|
|
int err;
|
2010-10-25 12:08:52 +08:00
|
|
|
|
|
|
|
memset(props, 0, sizeof *props);
|
|
|
|
|
|
|
|
err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
|
2012-08-03 16:40:45 +08:00
|
|
|
ib_link_query_port(ibdev, port, props, netw_view) :
|
|
|
|
eth_link_query_port(ibdev, port, props, netw_view);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2012-08-03 16:40:45 +08:00
|
|
|
static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
|
|
|
|
struct ib_port_attr *props)
|
|
|
|
{
|
|
|
|
/* returns host view */
|
|
|
|
return __mlx4_ib_query_port(ibdev, port, props, 0);
|
|
|
|
}
|
|
|
|
|
2012-08-03 16:40:49 +08:00
|
|
|
int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
|
|
|
|
union ib_gid *gid, int netw_view)
|
2007-05-09 09:00:38 +08:00
|
|
|
{
|
|
|
|
struct ib_smp *in_mad = NULL;
|
|
|
|
struct ib_smp *out_mad = NULL;
|
|
|
|
int err = -ENOMEM;
|
2012-08-03 16:40:49 +08:00
|
|
|
struct mlx4_ib_dev *dev = to_mdev(ibdev);
|
|
|
|
int clear = 0;
|
|
|
|
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
|
|
|
|
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
|
|
|
|
if (!in_mad || !out_mad)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
init_query_mad(in_mad);
|
|
|
|
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
|
|
|
|
in_mad->attr_mod = cpu_to_be32(port);
|
|
|
|
|
2012-08-03 16:40:49 +08:00
|
|
|
if (mlx4_is_mfunc(dev->dev) && netw_view)
|
|
|
|
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
|
|
|
|
|
|
|
|
err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
|
2007-05-09 09:00:38 +08:00
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
memcpy(gid->raw, out_mad->data + 8, 8);
|
|
|
|
|
2012-08-03 16:40:49 +08:00
|
|
|
if (mlx4_is_mfunc(dev->dev) && !netw_view) {
|
|
|
|
if (index) {
|
|
|
|
/* For any index > 0, return the null guid */
|
|
|
|
err = 0;
|
|
|
|
clear = 1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
init_query_mad(in_mad);
|
|
|
|
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
|
|
|
|
in_mad->attr_mod = cpu_to_be32(index / 8);
|
|
|
|
|
2012-08-03 16:40:49 +08:00
|
|
|
err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
|
2012-08-03 16:40:45 +08:00
|
|
|
NULL, NULL, in_mad, out_mad);
|
2007-05-09 09:00:38 +08:00
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
|
|
|
|
|
|
|
|
out:
|
2012-08-03 16:40:49 +08:00
|
|
|
if (clear)
|
|
|
|
memset(gid->raw + 8, 0, 8);
|
2007-05-09 09:00:38 +08:00
|
|
|
kfree(in_mad);
|
|
|
|
kfree(out_mad);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
|
|
|
|
union ib_gid *gid)
|
|
|
|
{
|
2015-07-30 23:33:30 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (rdma_protocol_ib(ibdev, port))
|
2012-08-03 16:40:49 +08:00
|
|
|
return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
|
2015-07-30 23:33:30 +08:00
|
|
|
|
|
|
|
if (!rdma_protocol_roce(ibdev, port))
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
if (!rdma_cap_roce_gid_table(ibdev, port))
|
|
|
|
return -ENODEV;
|
|
|
|
|
2015-10-15 23:38:45 +08:00
|
|
|
ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
|
2015-07-30 23:33:30 +08:00
|
|
|
if (ret == -EAGAIN) {
|
|
|
|
memcpy(gid, &zgid, sizeof(*gid));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2010-10-25 12:08:52 +08:00
|
|
|
}
|
|
|
|
|
2012-08-03 16:40:45 +08:00
|
|
|
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
|
|
|
|
u16 *pkey, int netw_view)
|
2007-05-09 09:00:38 +08:00
|
|
|
{
|
|
|
|
struct ib_smp *in_mad = NULL;
|
|
|
|
struct ib_smp *out_mad = NULL;
|
2012-08-03 16:40:45 +08:00
|
|
|
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
|
2007-05-09 09:00:38 +08:00
|
|
|
int err = -ENOMEM;
|
|
|
|
|
|
|
|
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
|
|
|
|
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
|
|
|
|
if (!in_mad || !out_mad)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
init_query_mad(in_mad);
|
|
|
|
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
|
|
|
|
in_mad->attr_mod = cpu_to_be32(index / 32);
|
|
|
|
|
2012-08-03 16:40:45 +08:00
|
|
|
if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
|
|
|
|
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
|
|
|
|
|
|
|
|
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
|
|
|
|
in_mad, out_mad);
|
2007-05-09 09:00:38 +08:00
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
|
|
|
|
|
|
|
|
out:
|
|
|
|
kfree(in_mad);
|
|
|
|
kfree(out_mad);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2012-08-03 16:40:45 +08:00
|
|
|
static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
|
|
|
|
{
|
|
|
|
return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
|
|
|
|
}
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
|
|
|
|
struct ib_device_modify *props)
|
|
|
|
{
|
2010-10-04 20:11:34 +08:00
|
|
|
struct mlx4_cmd_mailbox *mailbox;
|
2012-08-03 16:26:45 +08:00
|
|
|
unsigned long flags;
|
2010-10-04 20:11:34 +08:00
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
2010-10-04 20:11:34 +08:00
|
|
|
if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
|
|
|
|
return 0;
|
|
|
|
|
2012-08-03 16:40:54 +08:00
|
|
|
if (mlx4_is_slave(to_mdev(ibdev)->dev))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
2012-08-03 16:26:45 +08:00
|
|
|
spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
|
2010-10-04 20:11:34 +08:00
|
|
|
memcpy(ibdev->node_desc, props->node_desc, 64);
|
2012-08-03 16:26:45 +08:00
|
|
|
spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
|
2010-10-04 20:11:34 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If possible, pass node desc to FW, so it can generate
|
|
|
|
* a 144 trap. If cmd fails, just ignore.
|
|
|
|
*/
|
|
|
|
mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
|
|
|
|
if (IS_ERR(mailbox))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
memcpy(mailbox->buf, props->node_desc, 64);
|
|
|
|
mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
|
2012-08-03 16:40:54 +08:00
|
|
|
MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
|
2010-10-04 20:11:34 +08:00
|
|
|
|
|
|
|
mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-05-29 21:31:01 +08:00
|
|
|
static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
|
|
|
|
u32 cap_mask)
|
2007-05-09 09:00:38 +08:00
|
|
|
{
|
|
|
|
struct mlx4_cmd_mailbox *mailbox;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
|
|
|
|
if (IS_ERR(mailbox))
|
|
|
|
return PTR_ERR(mailbox);
|
|
|
|
|
2007-06-18 23:15:02 +08:00
|
|
|
if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
|
|
|
|
*(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
|
|
|
|
((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
|
|
|
|
} else {
|
|
|
|
((u8 *) mailbox->buf)[3] = !!reset_qkey_viols;
|
|
|
|
((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
|
|
|
|
}
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2015-04-02 21:31:19 +08:00
|
|
|
err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
|
|
|
|
MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
|
|
|
|
MLX4_CMD_WRAPPED);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
mlx4_free_cmd_mailbox(dev->dev, mailbox);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
|
|
|
|
struct ib_port_modify *props)
|
|
|
|
{
|
2014-05-29 21:31:01 +08:00
|
|
|
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
|
|
|
|
u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
|
2007-05-09 09:00:38 +08:00
|
|
|
struct ib_port_attr attr;
|
|
|
|
u32 cap_mask;
|
|
|
|
int err;
|
|
|
|
|
2014-05-29 21:31:01 +08:00
|
|
|
/* return OK if this is RoCE. CM calls ib_modify_port() regardless
|
|
|
|
* of whether port link layer is ETH or IB. For ETH ports, qkey
|
|
|
|
* violations and port capabilities are not meaningful.
|
|
|
|
*/
|
|
|
|
if (is_eth)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
mutex_lock(&mdev->cap_mask_mutex);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
err = mlx4_ib_query_port(ibdev, port, &attr);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
|
|
|
|
~props->clr_port_cap_mask;
|
|
|
|
|
2014-05-29 21:31:01 +08:00
|
|
|
err = mlx4_ib_SET_PORT(mdev, port,
|
|
|
|
!!(mask & IB_PORT_RESET_QKEY_CNTR),
|
|
|
|
cap_mask);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
out:
|
|
|
|
mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
|
|
|
|
struct ib_udata *udata)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_dev *dev = to_mdev(ibdev);
|
|
|
|
struct mlx4_ib_ucontext *context;
|
2012-10-21 22:59:24 +08:00
|
|
|
struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
|
2007-05-09 09:00:38 +08:00
|
|
|
struct mlx4_ib_alloc_ucontext_resp resp;
|
|
|
|
int err;
|
|
|
|
|
2009-09-06 11:24:50 +08:00
|
|
|
if (!dev->ib_active)
|
|
|
|
return ERR_PTR(-EAGAIN);
|
|
|
|
|
2012-10-21 22:59:24 +08:00
|
|
|
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
|
|
|
|
resp_v3.qp_tab_size = dev->dev->caps.num_qps;
|
|
|
|
resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
|
|
|
|
resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
|
|
|
|
} else {
|
|
|
|
resp.dev_caps = dev->dev->caps.userspace_caps;
|
|
|
|
resp.qp_tab_size = dev->dev->caps.num_qps;
|
|
|
|
resp.bf_reg_size = dev->dev->caps.bf_reg_size;
|
|
|
|
resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
|
|
|
|
resp.cqe_size = dev->dev->caps.cqe_size;
|
|
|
|
}
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2015-08-13 23:32:06 +08:00
|
|
|
context = kzalloc(sizeof(*context), GFP_KERNEL);
|
2007-05-09 09:00:38 +08:00
|
|
|
if (!context)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
|
|
|
|
if (err) {
|
|
|
|
kfree(context);
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&context->db_page_list);
|
|
|
|
mutex_init(&context->db_page_mutex);
|
|
|
|
|
2012-10-21 22:59:24 +08:00
|
|
|
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
|
|
|
|
err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
|
|
|
|
else
|
|
|
|
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
if (err) {
|
|
|
|
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
|
|
|
|
kfree(context);
|
|
|
|
return ERR_PTR(-EFAULT);
|
|
|
|
}
|
|
|
|
|
|
|
|
return &context->ibucontext;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
|
|
|
|
|
|
|
|
mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
|
|
|
|
kfree(context);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-08-13 23:32:06 +08:00
|
|
|
static void mlx4_ib_vma_open(struct vm_area_struct *area)
|
|
|
|
{
|
|
|
|
/* vma_open is called when a new VMA is created on top of our VMA.
|
|
|
|
* This is done through either mremap flow or split_vma (usually due
|
|
|
|
* to mlock, madvise, munmap, etc.). We do not support a clone of the
|
|
|
|
* vma, as this VMA is strongly hardware related. Therefore we set the
|
|
|
|
* vm_ops of the newly created/cloned VMA to NULL, to prevent it from
|
|
|
|
* calling us again and trying to do incorrect actions. We assume that
|
|
|
|
* the original vma size is exactly a single page that there will be no
|
|
|
|
* "splitting" operations on.
|
|
|
|
*/
|
|
|
|
area->vm_ops = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mlx4_ib_vma_close(struct vm_area_struct *area)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
|
|
|
|
|
|
|
|
/* It's guaranteed that all VMAs opened on a FD are closed before the
|
|
|
|
* file itself is closed, therefore no sync is needed with the regular
|
|
|
|
* closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
|
|
|
|
* with accessing the vma as part of mlx4_ib_disassociate_ucontext.
|
|
|
|
* The close operation is usually called under mm->mmap_sem except when
|
|
|
|
* process is exiting. The exiting case is handled explicitly as part
|
|
|
|
* of mlx4_ib_disassociate_ucontext.
|
|
|
|
*/
|
|
|
|
mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
|
|
|
|
area->vm_private_data;
|
|
|
|
|
|
|
|
/* set the vma context pointer to null in the mlx4_ib driver's private
|
|
|
|
* data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
|
|
|
|
*/
|
|
|
|
mlx4_ib_vma_priv_data->vma = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct vm_operations_struct mlx4_ib_vm_ops = {
|
|
|
|
.open = mlx4_ib_vma_open,
|
|
|
|
.close = mlx4_ib_vma_close
|
|
|
|
};
|
|
|
|
|
|
|
|
static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int ret = 0;
|
|
|
|
struct vm_area_struct *vma;
|
|
|
|
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
|
|
|
|
struct task_struct *owning_process = NULL;
|
|
|
|
struct mm_struct *owning_mm = NULL;
|
|
|
|
|
|
|
|
owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
|
|
|
|
if (!owning_process)
|
|
|
|
return;
|
|
|
|
|
|
|
|
owning_mm = get_task_mm(owning_process);
|
|
|
|
if (!owning_mm) {
|
|
|
|
pr_info("no mm, disassociate ucontext is pending task termination\n");
|
|
|
|
while (1) {
|
|
|
|
/* make sure that task is dead before returning, it may
|
|
|
|
* prevent a rare case of module down in parallel to a
|
|
|
|
* call to mlx4_ib_vma_close.
|
|
|
|
*/
|
|
|
|
put_task_struct(owning_process);
|
|
|
|
msleep(1);
|
|
|
|
owning_process = get_pid_task(ibcontext->tgid,
|
|
|
|
PIDTYPE_PID);
|
|
|
|
if (!owning_process ||
|
|
|
|
owning_process->state == TASK_DEAD) {
|
|
|
|
pr_info("disassociate ucontext done, task was terminated\n");
|
|
|
|
/* in case task was dead need to release the task struct */
|
|
|
|
if (owning_process)
|
|
|
|
put_task_struct(owning_process);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* need to protect from a race on closing the vma as part of
|
|
|
|
* mlx4_ib_vma_close().
|
|
|
|
*/
|
|
|
|
down_read(&owning_mm->mmap_sem);
|
|
|
|
for (i = 0; i < HW_BAR_COUNT; i++) {
|
|
|
|
vma = context->hw_bar_info[i].vma;
|
|
|
|
if (!vma)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ret = zap_vma_ptes(context->hw_bar_info[i].vma,
|
|
|
|
context->hw_bar_info[i].vma->vm_start,
|
|
|
|
PAGE_SIZE);
|
|
|
|
if (ret) {
|
|
|
|
pr_err("Error: zap_vma_ptes failed for index=%d, ret=%d\n", i, ret);
|
|
|
|
BUG_ON(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* context going to be destroyed, should not access ops any more */
|
|
|
|
context->hw_bar_info[i].vma->vm_ops = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
up_read(&owning_mm->mmap_sem);
|
|
|
|
mmput(owning_mm);
|
|
|
|
put_task_struct(owning_process);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
|
|
|
|
struct mlx4_ib_vma_private_data *vma_private_data)
|
|
|
|
{
|
|
|
|
vma_private_data->vma = vma;
|
|
|
|
vma->vm_private_data = vma_private_data;
|
|
|
|
vma->vm_ops = &mlx4_ib_vm_ops;
|
|
|
|
}
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_dev *dev = to_mdev(context->device);
|
2015-08-13 23:32:06 +08:00
|
|
|
struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (vma->vm_pgoff == 0) {
|
2015-08-13 23:32:06 +08:00
|
|
|
/* We prevent double mmaping on same context */
|
|
|
|
if (mucontext->hw_bar_info[HW_BAR_DB].vma)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
|
|
|
|
|
|
|
if (io_remap_pfn_range(vma, vma->vm_start,
|
|
|
|
to_mucontext(context)->uar.pfn,
|
|
|
|
PAGE_SIZE, vma->vm_page_prot))
|
|
|
|
return -EAGAIN;
|
2015-08-13 23:32:06 +08:00
|
|
|
|
|
|
|
mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
|
2015-08-13 23:32:06 +08:00
|
|
|
/* We prevent double mmaping on same context */
|
|
|
|
if (mucontext->hw_bar_info[HW_BAR_BF].vma)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2009-03-30 23:31:05 +08:00
|
|
|
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
if (io_remap_pfn_range(vma, vma->vm_start,
|
|
|
|
to_mucontext(context)->uar.pfn +
|
|
|
|
dev->dev->caps.num_uars,
|
|
|
|
PAGE_SIZE, vma->vm_page_prot))
|
|
|
|
return -EAGAIN;
|
2015-08-13 23:32:06 +08:00
|
|
|
|
|
|
|
mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
|
|
|
|
|
2015-06-11 21:35:26 +08:00
|
|
|
} else if (vma->vm_pgoff == 3) {
|
|
|
|
struct mlx4_clock_params params;
|
2015-08-13 23:32:06 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* We prevent double mmaping on same context */
|
|
|
|
if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
ret = mlx4_get_internal_clock_params(dev->dev, ¶ms);
|
2015-06-11 21:35:26 +08:00
|
|
|
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
|
|
|
if (io_remap_pfn_range(vma, vma->vm_start,
|
|
|
|
(pci_resource_start(dev->dev->persist->pdev,
|
|
|
|
params.bar) +
|
|
|
|
params.offset)
|
|
|
|
>> PAGE_SHIFT,
|
|
|
|
PAGE_SIZE, vma->vm_page_prot))
|
|
|
|
return -EAGAIN;
|
2015-08-13 23:32:06 +08:00
|
|
|
|
|
|
|
mlx4_ib_set_vma_data(vma,
|
|
|
|
&mucontext->hw_bar_info[HW_BAR_CLOCK]);
|
2015-06-11 21:35:26 +08:00
|
|
|
} else {
|
2007-05-09 09:00:38 +08:00
|
|
|
return -EINVAL;
|
2015-06-11 21:35:26 +08:00
|
|
|
}
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
|
|
|
|
struct ib_ucontext *context,
|
|
|
|
struct ib_udata *udata)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_pd *pd;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
pd = kmalloc(sizeof *pd, GFP_KERNEL);
|
|
|
|
if (!pd)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
|
|
|
|
if (err) {
|
|
|
|
kfree(pd);
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (context)
|
|
|
|
if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
|
|
|
|
mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
|
|
|
|
kfree(pd);
|
|
|
|
return ERR_PTR(-EFAULT);
|
|
|
|
}
|
|
|
|
|
|
|
|
return &pd->ibpd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
|
|
|
|
{
|
|
|
|
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
|
|
|
|
kfree(pd);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-06-03 00:01:33 +08:00
|
|
|
static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
|
|
|
|
struct ib_ucontext *context,
|
|
|
|
struct ib_udata *udata)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_xrcd *xrcd;
|
2015-06-11 21:35:21 +08:00
|
|
|
struct ib_cq_init_attr cq_attr = {};
|
2011-06-03 00:01:33 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
|
|
|
|
return ERR_PTR(-ENOSYS);
|
|
|
|
|
|
|
|
xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
|
|
|
|
if (!xrcd)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
|
|
|
|
if (err)
|
|
|
|
goto err1;
|
|
|
|
|
|
|
|
xrcd->pd = ib_alloc_pd(ibdev);
|
|
|
|
if (IS_ERR(xrcd->pd)) {
|
|
|
|
err = PTR_ERR(xrcd->pd);
|
|
|
|
goto err2;
|
|
|
|
}
|
|
|
|
|
2015-06-11 21:35:21 +08:00
|
|
|
cq_attr.cqe = 1;
|
|
|
|
xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
|
2011-06-03 00:01:33 +08:00
|
|
|
if (IS_ERR(xrcd->cq)) {
|
|
|
|
err = PTR_ERR(xrcd->cq);
|
|
|
|
goto err3;
|
|
|
|
}
|
|
|
|
|
|
|
|
return &xrcd->ibxrcd;
|
|
|
|
|
|
|
|
err3:
|
|
|
|
ib_dealloc_pd(xrcd->pd);
|
|
|
|
err2:
|
|
|
|
mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
|
|
|
|
err1:
|
|
|
|
kfree(xrcd);
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
|
|
|
|
{
|
|
|
|
ib_destroy_cq(to_mxrcd(xrcd)->cq);
|
|
|
|
ib_dealloc_pd(to_mxrcd(xrcd)->pd);
|
|
|
|
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
|
|
|
|
kfree(xrcd);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
|
|
|
|
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
|
|
|
|
struct mlx4_ib_gid_entry *ge;
|
|
|
|
|
|
|
|
ge = kzalloc(sizeof *ge, GFP_KERNEL);
|
|
|
|
if (!ge)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
ge->gid = *gid;
|
|
|
|
if (mlx4_ib_add_mc(mdev, mqp, gid)) {
|
|
|
|
ge->port = mqp->port;
|
|
|
|
ge->added = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_lock(&mqp->mutex);
|
|
|
|
list_add_tail(&ge->list, &mqp->gid_list);
|
|
|
|
mutex_unlock(&mqp->mutex);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-10-15 19:44:40 +08:00
|
|
|
static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
|
|
|
|
struct mlx4_ib_counters *ctr_table)
|
|
|
|
{
|
|
|
|
struct counter_index *counter, *tmp_count;
|
|
|
|
|
|
|
|
mutex_lock(&ctr_table->mutex);
|
|
|
|
list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
|
|
|
|
list) {
|
|
|
|
if (counter->allocated)
|
|
|
|
mlx4_counter_free(ibdev->dev, counter->index);
|
|
|
|
list_del(&counter->list);
|
|
|
|
kfree(counter);
|
|
|
|
}
|
|
|
|
mutex_unlock(&ctr_table->mutex);
|
|
|
|
}
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
|
|
|
|
union ib_gid *gid)
|
|
|
|
{
|
|
|
|
struct net_device *ndev;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (!mqp->port)
|
|
|
|
return 0;
|
|
|
|
|
2014-08-21 19:28:41 +08:00
|
|
|
spin_lock_bh(&mdev->iboe.lock);
|
2010-10-25 12:08:52 +08:00
|
|
|
ndev = mdev->iboe.netdevs[mqp->port - 1];
|
|
|
|
if (ndev)
|
|
|
|
dev_hold(ndev);
|
2014-08-21 19:28:41 +08:00
|
|
|
spin_unlock_bh(&mdev->iboe.lock);
|
2010-10-25 12:08:52 +08:00
|
|
|
|
|
|
|
if (ndev) {
|
|
|
|
ret = 1;
|
|
|
|
dev_put(ndev);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
struct mlx4_ib_steering {
|
|
|
|
struct list_head list;
|
2015-02-03 22:48:38 +08:00
|
|
|
struct mlx4_flow_reg_id reg_id;
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
union ib_gid gid;
|
|
|
|
};
|
|
|
|
|
2013-08-14 18:58:31 +08:00
|
|
|
static int parse_flow_attr(struct mlx4_dev *dev,
|
2013-11-07 21:25:16 +08:00
|
|
|
u32 qp_num,
|
2013-08-14 18:58:31 +08:00
|
|
|
union ib_flow_spec *ib_spec,
|
|
|
|
struct _rule_hw *mlx4_spec)
|
|
|
|
{
|
|
|
|
enum mlx4_net_trans_rule_id type;
|
|
|
|
|
|
|
|
switch (ib_spec->type) {
|
|
|
|
case IB_FLOW_SPEC_ETH:
|
|
|
|
type = MLX4_NET_TRANS_RULE_ID_ETH;
|
|
|
|
memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
|
|
|
|
ETH_ALEN);
|
|
|
|
memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
|
|
|
|
ETH_ALEN);
|
|
|
|
mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
|
|
|
|
mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
|
|
|
|
break;
|
2013-11-07 21:25:16 +08:00
|
|
|
case IB_FLOW_SPEC_IB:
|
|
|
|
type = MLX4_NET_TRANS_RULE_ID_IB;
|
|
|
|
mlx4_spec->ib.l3_qpn =
|
|
|
|
cpu_to_be32(qp_num);
|
|
|
|
mlx4_spec->ib.qpn_mask =
|
|
|
|
cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
|
|
|
|
break;
|
|
|
|
|
2013-08-14 18:58:31 +08:00
|
|
|
|
|
|
|
case IB_FLOW_SPEC_IPV4:
|
|
|
|
type = MLX4_NET_TRANS_RULE_ID_IPV4;
|
|
|
|
mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
|
|
|
|
mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
|
|
|
|
mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
|
|
|
|
mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IB_FLOW_SPEC_TCP:
|
|
|
|
case IB_FLOW_SPEC_UDP:
|
|
|
|
type = ib_spec->type == IB_FLOW_SPEC_TCP ?
|
|
|
|
MLX4_NET_TRANS_RULE_ID_TCP :
|
|
|
|
MLX4_NET_TRANS_RULE_ID_UDP;
|
|
|
|
mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
|
|
|
|
mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
|
|
|
|
mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
|
|
|
|
mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
|
|
|
|
mlx4_hw_rule_sz(dev, type) < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
|
|
|
|
mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
|
|
|
|
return mlx4_hw_rule_sz(dev, type);
|
|
|
|
}
|
|
|
|
|
2013-11-07 21:25:16 +08:00
|
|
|
struct default_rules {
|
|
|
|
__u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
|
|
|
|
__u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
|
|
|
|
__u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
|
|
|
|
__u8 link_layer;
|
|
|
|
};
|
|
|
|
static const struct default_rules default_table[] = {
|
|
|
|
{
|
|
|
|
.mandatory_fields = {IB_FLOW_SPEC_IPV4},
|
|
|
|
.mandatory_not_fields = {IB_FLOW_SPEC_ETH},
|
|
|
|
.rules_create_list = {IB_FLOW_SPEC_IB},
|
|
|
|
.link_layer = IB_LINK_LAYER_INFINIBAND
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
|
|
|
|
struct ib_flow_attr *flow_attr)
|
|
|
|
{
|
|
|
|
int i, j, k;
|
|
|
|
void *ib_flow;
|
|
|
|
const struct default_rules *pdefault_rules = default_table;
|
|
|
|
u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
|
|
|
|
|
2014-08-13 07:20:07 +08:00
|
|
|
for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
|
2013-11-07 21:25:16 +08:00
|
|
|
__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
|
|
|
|
memset(&field_types, 0, sizeof(field_types));
|
|
|
|
|
|
|
|
if (link_layer != pdefault_rules->link_layer)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ib_flow = flow_attr + 1;
|
|
|
|
/* we assume the specs are sorted */
|
|
|
|
for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
|
|
|
|
j < flow_attr->num_of_specs; k++) {
|
|
|
|
union ib_flow_spec *current_flow =
|
|
|
|
(union ib_flow_spec *)ib_flow;
|
|
|
|
|
|
|
|
/* same layer but different type */
|
|
|
|
if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
|
|
|
|
(pdefault_rules->mandatory_fields[k] &
|
|
|
|
IB_FLOW_SPEC_LAYER_MASK)) &&
|
|
|
|
(current_flow->type !=
|
|
|
|
pdefault_rules->mandatory_fields[k]))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* same layer, try match next one */
|
|
|
|
if (current_flow->type ==
|
|
|
|
pdefault_rules->mandatory_fields[k]) {
|
|
|
|
j++;
|
|
|
|
ib_flow +=
|
|
|
|
((union ib_flow_spec *)ib_flow)->size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ib_flow = flow_attr + 1;
|
|
|
|
for (j = 0; j < flow_attr->num_of_specs;
|
|
|
|
j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
|
|
|
|
for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
|
|
|
|
/* same layer and same type */
|
|
|
|
if (((union ib_flow_spec *)ib_flow)->type ==
|
|
|
|
pdefault_rules->mandatory_not_fields[k])
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __mlx4_ib_create_default_rules(
|
|
|
|
struct mlx4_ib_dev *mdev,
|
|
|
|
struct ib_qp *qp,
|
|
|
|
const struct default_rules *pdefault_rules,
|
|
|
|
struct _rule_hw *mlx4_spec) {
|
|
|
|
int size = 0;
|
|
|
|
int i;
|
|
|
|
|
2014-08-13 07:20:07 +08:00
|
|
|
for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
|
2013-11-07 21:25:16 +08:00
|
|
|
int ret;
|
|
|
|
union ib_flow_spec ib_spec;
|
|
|
|
switch (pdefault_rules->rules_create_list[i]) {
|
|
|
|
case 0:
|
|
|
|
/* no rule */
|
|
|
|
continue;
|
|
|
|
case IB_FLOW_SPEC_IB:
|
|
|
|
ib_spec.type = IB_FLOW_SPEC_IB;
|
|
|
|
ib_spec.size = sizeof(struct ib_flow_spec_ib);
|
|
|
|
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* invalid rule */
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* We must put empty rule, qpn is being ignored */
|
|
|
|
ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
|
|
|
|
mlx4_spec);
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_info("invalid parsing\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
mlx4_spec = (void *)mlx4_spec + ret;
|
|
|
|
size += ret;
|
|
|
|
}
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2013-08-14 18:58:31 +08:00
|
|
|
static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
|
|
|
|
int domain,
|
|
|
|
enum mlx4_net_trans_promisc_mode flow_type,
|
|
|
|
u64 *reg_id)
|
|
|
|
{
|
|
|
|
int ret, i;
|
|
|
|
int size = 0;
|
|
|
|
void *ib_flow;
|
|
|
|
struct mlx4_ib_dev *mdev = to_mdev(qp->device);
|
|
|
|
struct mlx4_cmd_mailbox *mailbox;
|
|
|
|
struct mlx4_net_trans_rule_hw_ctrl *ctrl;
|
2013-11-07 21:25:16 +08:00
|
|
|
int default_flow;
|
2013-08-14 18:58:31 +08:00
|
|
|
|
|
|
|
static const u16 __mlx4_domain[] = {
|
|
|
|
[IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
|
|
|
|
[IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
|
|
|
|
[IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
|
|
|
|
[IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
|
|
|
|
};
|
|
|
|
|
|
|
|
if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
|
|
|
|
pr_err("Invalid priority value %d\n", flow_attr->priority);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (domain >= IB_FLOW_DOMAIN_NUM) {
|
|
|
|
pr_err("Invalid domain value %d\n", domain);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
|
|
|
|
if (IS_ERR(mailbox))
|
|
|
|
return PTR_ERR(mailbox);
|
|
|
|
ctrl = mailbox->buf;
|
|
|
|
|
|
|
|
ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
|
|
|
|
flow_attr->priority);
|
|
|
|
ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
|
|
|
|
ctrl->port = flow_attr->port;
|
|
|
|
ctrl->qpn = cpu_to_be32(qp->qp_num);
|
|
|
|
|
|
|
|
ib_flow = flow_attr + 1;
|
|
|
|
size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
|
2013-11-07 21:25:16 +08:00
|
|
|
/* Add default flows */
|
|
|
|
default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
|
|
|
|
if (default_flow >= 0) {
|
|
|
|
ret = __mlx4_ib_create_default_rules(
|
|
|
|
mdev, qp, default_table + default_flow,
|
|
|
|
mailbox->buf + size);
|
|
|
|
if (ret < 0) {
|
|
|
|
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
size += ret;
|
|
|
|
}
|
2013-08-14 18:58:31 +08:00
|
|
|
for (i = 0; i < flow_attr->num_of_specs; i++) {
|
2013-11-07 21:25:16 +08:00
|
|
|
ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
|
|
|
|
mailbox->buf + size);
|
2013-08-14 18:58:31 +08:00
|
|
|
if (ret < 0) {
|
|
|
|
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
ib_flow += ((union ib_flow_spec *) ib_flow)->size;
|
|
|
|
size += ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
|
|
|
|
MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
|
2015-05-31 14:30:15 +08:00
|
|
|
MLX4_CMD_WRAPPED);
|
2013-08-14 18:58:31 +08:00
|
|
|
if (ret == -ENOMEM)
|
|
|
|
pr_err("mcg table is full. Fail to register network rule.\n");
|
|
|
|
else if (ret == -ENXIO)
|
|
|
|
pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
|
|
|
|
else if (ret)
|
2016-04-26 03:26:50 +08:00
|
|
|
pr_err("Invalid argument. Fail to register network rule.\n");
|
2013-08-14 18:58:31 +08:00
|
|
|
|
|
|
|
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
err = mlx4_cmd(dev, reg_id, 0, 0,
|
|
|
|
MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
|
2015-05-31 14:30:15 +08:00
|
|
|
MLX4_CMD_WRAPPED);
|
2013-08-14 18:58:31 +08:00
|
|
|
if (err)
|
|
|
|
pr_err("Fail to detach network rule. registration id = 0x%llx\n",
|
|
|
|
reg_id);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2014-08-27 21:47:49 +08:00
|
|
|
static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
|
|
|
|
u64 *reg_id)
|
|
|
|
{
|
|
|
|
void *ib_flow;
|
|
|
|
union ib_flow_spec *ib_spec;
|
|
|
|
struct mlx4_dev *dev = to_mdev(qp->device)->dev;
|
|
|
|
int err = 0;
|
|
|
|
|
2015-01-15 21:28:54 +08:00
|
|
|
if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
|
|
|
|
dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
|
2014-08-27 21:47:49 +08:00
|
|
|
return 0; /* do nothing */
|
|
|
|
|
|
|
|
ib_flow = flow_attr + 1;
|
|
|
|
ib_spec = (union ib_flow_spec *)ib_flow;
|
|
|
|
|
|
|
|
if (ib_spec->type != IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1)
|
|
|
|
return 0; /* do nothing */
|
|
|
|
|
|
|
|
err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac,
|
|
|
|
flow_attr->port, qp->qp_num,
|
|
|
|
MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff),
|
|
|
|
reg_id);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2016-02-19 00:31:06 +08:00
|
|
|
static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
|
|
|
|
struct ib_flow_attr *flow_attr,
|
|
|
|
enum mlx4_net_trans_promisc_mode *type)
|
|
|
|
{
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
|
|
|
|
(dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
|
|
|
|
(flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flow_attr->num_of_specs == 0) {
|
|
|
|
type[0] = MLX4_FS_MC_SNIFFER;
|
|
|
|
type[1] = MLX4_FS_UC_SNIFFER;
|
|
|
|
} else {
|
|
|
|
union ib_flow_spec *ib_spec;
|
|
|
|
|
|
|
|
ib_spec = (union ib_flow_spec *)(flow_attr + 1);
|
|
|
|
if (ib_spec->type != IB_FLOW_SPEC_ETH)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* if all is zero than MC and UC */
|
|
|
|
if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
|
|
|
|
type[0] = MLX4_FS_MC_SNIFFER;
|
|
|
|
type[1] = MLX4_FS_UC_SNIFFER;
|
|
|
|
} else {
|
|
|
|
u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
|
|
|
|
ib_spec->eth.mask.dst_mac[1],
|
|
|
|
ib_spec->eth.mask.dst_mac[2],
|
|
|
|
ib_spec->eth.mask.dst_mac[3],
|
|
|
|
ib_spec->eth.mask.dst_mac[4],
|
|
|
|
ib_spec->eth.mask.dst_mac[5]};
|
|
|
|
|
|
|
|
/* Above xor was only on MC bit, non empty mask is valid
|
|
|
|
* only if this bit is set and rest are zero.
|
|
|
|
*/
|
|
|
|
if (!is_zero_ether_addr(&mac[0]))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
|
|
|
|
type[0] = MLX4_FS_MC_SNIFFER;
|
|
|
|
else
|
|
|
|
type[0] = MLX4_FS_UC_SNIFFER;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2013-08-14 18:58:31 +08:00
|
|
|
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
|
|
|
|
struct ib_flow_attr *flow_attr,
|
|
|
|
int domain)
|
|
|
|
{
|
2015-02-03 22:48:38 +08:00
|
|
|
int err = 0, i = 0, j = 0;
|
2013-08-14 18:58:31 +08:00
|
|
|
struct mlx4_ib_flow *mflow;
|
|
|
|
enum mlx4_net_trans_promisc_mode type[2];
|
2015-02-03 22:48:38 +08:00
|
|
|
struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
|
|
|
|
int is_bonded = mlx4_is_bonded(dev);
|
2013-08-14 18:58:31 +08:00
|
|
|
|
2016-02-19 00:31:06 +08:00
|
|
|
if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
|
|
|
|
(flow_attr->type != IB_FLOW_ATTR_NORMAL))
|
2016-02-19 00:31:05 +08:00
|
|
|
return ERR_PTR(-EOPNOTSUPP);
|
|
|
|
|
2013-08-14 18:58:31 +08:00
|
|
|
memset(type, 0, sizeof(type));
|
|
|
|
|
|
|
|
mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
|
|
|
|
if (!mflow) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (flow_attr->type) {
|
|
|
|
case IB_FLOW_ATTR_NORMAL:
|
2016-02-19 00:31:06 +08:00
|
|
|
/* If dont trap flag (continue match) is set, under specific
|
|
|
|
* condition traffic be replicated to given qp,
|
|
|
|
* without stealing it
|
|
|
|
*/
|
|
|
|
if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
|
|
|
|
err = mlx4_ib_add_dont_trap_rule(dev,
|
|
|
|
flow_attr,
|
|
|
|
type);
|
|
|
|
if (err)
|
|
|
|
goto err_free;
|
|
|
|
} else {
|
|
|
|
type[0] = MLX4_FS_REGULAR;
|
|
|
|
}
|
2013-08-14 18:58:31 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
case IB_FLOW_ATTR_ALL_DEFAULT:
|
|
|
|
type[0] = MLX4_FS_ALL_DEFAULT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IB_FLOW_ATTR_MC_DEFAULT:
|
|
|
|
type[0] = MLX4_FS_MC_DEFAULT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IB_FLOW_ATTR_SNIFFER:
|
2016-02-19 00:31:06 +08:00
|
|
|
type[0] = MLX4_FS_MIRROR_RX_PORT;
|
|
|
|
type[1] = MLX4_FS_MIRROR_SX_PORT;
|
2013-08-14 18:58:31 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (i < ARRAY_SIZE(type) && type[i]) {
|
|
|
|
err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
|
2015-02-03 22:48:38 +08:00
|
|
|
&mflow->reg_id[i].id);
|
2013-08-14 18:58:31 +08:00
|
|
|
if (err)
|
2014-10-30 21:59:28 +08:00
|
|
|
goto err_create_flow;
|
2015-02-03 22:48:38 +08:00
|
|
|
if (is_bonded) {
|
2015-02-08 17:49:33 +08:00
|
|
|
/* Application always sees one port so the mirror rule
|
|
|
|
* must be on port #2
|
|
|
|
*/
|
2015-02-03 22:48:38 +08:00
|
|
|
flow_attr->port = 2;
|
|
|
|
err = __mlx4_ib_create_flow(qp, flow_attr,
|
|
|
|
domain, type[j],
|
|
|
|
&mflow->reg_id[j].mirror);
|
|
|
|
flow_attr->port = 1;
|
|
|
|
if (err)
|
|
|
|
goto err_create_flow;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
|
2015-05-30 14:11:27 +08:00
|
|
|
i++;
|
2013-08-14 18:58:31 +08:00
|
|
|
}
|
|
|
|
|
2014-08-27 21:47:49 +08:00
|
|
|
if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
|
2015-02-03 22:48:38 +08:00
|
|
|
err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
|
|
|
|
&mflow->reg_id[i].id);
|
2014-08-27 21:47:49 +08:00
|
|
|
if (err)
|
2014-10-30 21:59:28 +08:00
|
|
|
goto err_create_flow;
|
2015-05-30 14:11:27 +08:00
|
|
|
|
2015-02-03 22:48:38 +08:00
|
|
|
if (is_bonded) {
|
|
|
|
flow_attr->port = 2;
|
|
|
|
err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
|
|
|
|
&mflow->reg_id[j].mirror);
|
|
|
|
flow_attr->port = 1;
|
|
|
|
if (err)
|
|
|
|
goto err_create_flow;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
/* function to create mirror rule */
|
2015-05-30 14:11:27 +08:00
|
|
|
i++;
|
2014-08-27 21:47:49 +08:00
|
|
|
}
|
|
|
|
|
2013-08-14 18:58:31 +08:00
|
|
|
return &mflow->ibflow;
|
|
|
|
|
2014-10-30 21:59:28 +08:00
|
|
|
err_create_flow:
|
|
|
|
while (i) {
|
2015-02-03 22:48:38 +08:00
|
|
|
(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
|
|
|
|
mflow->reg_id[i].id);
|
2014-10-30 21:59:28 +08:00
|
|
|
i--;
|
|
|
|
}
|
2015-02-03 22:48:38 +08:00
|
|
|
|
|
|
|
while (j) {
|
|
|
|
(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
|
|
|
|
mflow->reg_id[j].mirror);
|
|
|
|
j--;
|
|
|
|
}
|
2013-08-14 18:58:31 +08:00
|
|
|
err_free:
|
|
|
|
kfree(mflow);
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
|
|
|
|
{
|
|
|
|
int err, ret = 0;
|
|
|
|
int i = 0;
|
|
|
|
struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
|
|
|
|
struct mlx4_ib_flow *mflow = to_mflow(flow_id);
|
|
|
|
|
2015-02-03 22:48:38 +08:00
|
|
|
while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
|
|
|
|
err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
|
2013-08-14 18:58:31 +08:00
|
|
|
if (err)
|
|
|
|
ret = err;
|
2015-02-03 22:48:38 +08:00
|
|
|
if (mflow->reg_id[i].mirror) {
|
|
|
|
err = __mlx4_ib_destroy_flow(mdev->dev,
|
|
|
|
mflow->reg_id[i].mirror);
|
|
|
|
if (err)
|
|
|
|
ret = err;
|
|
|
|
}
|
2013-08-14 18:58:31 +08:00
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
kfree(mflow);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
|
|
|
|
{
|
2010-10-25 12:08:52 +08:00
|
|
|
int err;
|
|
|
|
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
|
2015-02-03 22:48:38 +08:00
|
|
|
struct mlx4_dev *dev = mdev->dev;
|
2010-10-25 12:08:52 +08:00
|
|
|
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
struct mlx4_ib_steering *ib_steering = NULL;
|
2014-12-17 22:17:34 +08:00
|
|
|
enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
|
2015-02-03 22:48:38 +08:00
|
|
|
struct mlx4_flow_reg_id reg_id;
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
|
|
|
|
if (mdev->dev->caps.steering_mode ==
|
|
|
|
MLX4_STEERING_MODE_DEVICE_MANAGED) {
|
|
|
|
ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
|
|
|
|
if (!ib_steering)
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
2010-10-25 12:08:52 +08:00
|
|
|
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
|
|
|
|
!!(mqp->flags &
|
|
|
|
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
|
2015-02-03 22:48:38 +08:00
|
|
|
prot, ®_id.id);
|
2014-12-17 22:17:34 +08:00
|
|
|
if (err) {
|
|
|
|
pr_err("multicast attach op failed, err %d\n", err);
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
goto err_malloc;
|
2014-12-17 22:17:34 +08:00
|
|
|
}
|
2010-10-25 12:08:52 +08:00
|
|
|
|
2015-02-03 22:48:38 +08:00
|
|
|
reg_id.mirror = 0;
|
|
|
|
if (mlx4_is_bonded(dev)) {
|
2015-02-08 17:49:33 +08:00
|
|
|
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
|
|
|
|
(mqp->port == 1) ? 2 : 1,
|
2015-02-03 22:48:38 +08:00
|
|
|
!!(mqp->flags &
|
|
|
|
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
|
|
|
|
prot, ®_id.mirror);
|
|
|
|
if (err)
|
|
|
|
goto err_add;
|
|
|
|
}
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
err = add_gid_entry(ibqp, gid);
|
|
|
|
if (err)
|
|
|
|
goto err_add;
|
|
|
|
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
if (ib_steering) {
|
|
|
|
memcpy(ib_steering->gid.raw, gid->raw, 16);
|
|
|
|
ib_steering->reg_id = reg_id;
|
|
|
|
mutex_lock(&mqp->mutex);
|
|
|
|
list_add(&ib_steering->list, &mqp->steering_rules);
|
|
|
|
mutex_unlock(&mqp->mutex);
|
|
|
|
}
|
2010-10-25 12:08:52 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_add:
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
|
2015-02-03 22:48:38 +08:00
|
|
|
prot, reg_id.id);
|
|
|
|
if (reg_id.mirror)
|
|
|
|
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
|
|
|
|
prot, reg_id.mirror);
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
err_malloc:
|
|
|
|
kfree(ib_steering);
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_gid_entry *ge;
|
|
|
|
struct mlx4_ib_gid_entry *tmp;
|
|
|
|
struct mlx4_ib_gid_entry *ret = NULL;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
|
|
|
|
if (!memcmp(raw, ge->gid.raw, 16)) {
|
|
|
|
ret = ge;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2007-05-09 09:00:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
|
|
|
|
{
|
2010-10-25 12:08:52 +08:00
|
|
|
int err;
|
|
|
|
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
|
2015-02-03 22:48:38 +08:00
|
|
|
struct mlx4_dev *dev = mdev->dev;
|
2010-10-25 12:08:52 +08:00
|
|
|
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
|
|
|
|
struct net_device *ndev;
|
|
|
|
struct mlx4_ib_gid_entry *ge;
|
2015-02-03 22:48:38 +08:00
|
|
|
struct mlx4_flow_reg_id reg_id = {0, 0};
|
2014-12-17 22:17:34 +08:00
|
|
|
enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
|
|
|
|
if (mdev->dev->caps.steering_mode ==
|
|
|
|
MLX4_STEERING_MODE_DEVICE_MANAGED) {
|
|
|
|
struct mlx4_ib_steering *ib_steering;
|
|
|
|
|
|
|
|
mutex_lock(&mqp->mutex);
|
|
|
|
list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
|
|
|
|
if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
|
|
|
|
list_del(&ib_steering->list);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mutex_unlock(&mqp->mutex);
|
|
|
|
if (&ib_steering->list == &mqp->steering_rules) {
|
|
|
|
pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
reg_id = ib_steering->reg_id;
|
|
|
|
kfree(ib_steering);
|
|
|
|
}
|
2010-10-25 12:08:52 +08:00
|
|
|
|
{NET, IB}/mlx4: Add device managed flow steering firmware API
The driver is modified to support three operation modes.
If supported by firmware use the device managed flow steering
API, that which we call device managed steering mode. Else, if
the firmware supports the B0 steering mode use it, and finally,
if none of the above, use the A0 steering mode.
When the steering mode is device managed, the code is modified
such that L2 based rules set by the mlx4_en driver for Ethernet
unicast and multicast, and the IB stack multicast attach calls
done through the mlx4_ib driver are all routed to use the device
managed API.
When attaching rule using device managed flow steering API,
the firmware returns a 64 bit registration id, which is to be
provided during detach.
Currently the firmware is always programmed during HCA initialization
to use standard L2 hashing. Future work should be done to allow
configuring the flow-steering hash function with common, non
proprietary means.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-07-05 12:03:46 +08:00
|
|
|
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
|
2015-02-03 22:48:38 +08:00
|
|
|
prot, reg_id.id);
|
2010-10-25 12:08:52 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2015-02-03 22:48:38 +08:00
|
|
|
if (mlx4_is_bonded(dev)) {
|
|
|
|
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
|
|
|
|
prot, reg_id.mirror);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
mutex_lock(&mqp->mutex);
|
|
|
|
ge = find_gid_entry(mqp, gid->raw);
|
|
|
|
if (ge) {
|
2014-08-21 19:28:41 +08:00
|
|
|
spin_lock_bh(&mdev->iboe.lock);
|
2010-10-25 12:08:52 +08:00
|
|
|
ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
|
|
|
|
if (ndev)
|
|
|
|
dev_hold(ndev);
|
2014-08-21 19:28:41 +08:00
|
|
|
spin_unlock_bh(&mdev->iboe.lock);
|
2013-12-13 00:03:13 +08:00
|
|
|
if (ndev)
|
2010-10-25 12:08:52 +08:00
|
|
|
dev_put(ndev);
|
|
|
|
list_del(&ge->list);
|
|
|
|
kfree(ge);
|
|
|
|
} else
|
2012-04-29 22:04:26 +08:00
|
|
|
pr_warn("could not find mgid entry\n");
|
2010-10-25 12:08:52 +08:00
|
|
|
|
|
|
|
mutex_unlock(&mqp->mutex);
|
|
|
|
|
|
|
|
return 0;
|
2007-05-09 09:00:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int init_node_data(struct mlx4_ib_dev *dev)
|
|
|
|
{
|
|
|
|
struct ib_smp *in_mad = NULL;
|
|
|
|
struct ib_smp *out_mad = NULL;
|
2012-08-03 16:40:45 +08:00
|
|
|
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
|
2007-05-09 09:00:38 +08:00
|
|
|
int err = -ENOMEM;
|
|
|
|
|
|
|
|
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
|
|
|
|
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
|
|
|
|
if (!in_mad || !out_mad)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
init_query_mad(in_mad);
|
|
|
|
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
|
2012-08-03 16:40:45 +08:00
|
|
|
if (mlx4_is_master(dev->dev))
|
|
|
|
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2012-08-03 16:40:45 +08:00
|
|
|
err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
|
2007-05-09 09:00:38 +08:00
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
|
|
|
|
|
|
|
|
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
|
|
|
|
|
2012-08-03 16:40:45 +08:00
|
|
|
err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
|
2007-05-09 09:00:38 +08:00
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
2012-08-03 16:40:54 +08:00
|
|
|
dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
|
2007-05-09 09:00:38 +08:00
|
|
|
memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
|
|
|
|
|
|
|
|
out:
|
|
|
|
kfree(in_mad);
|
|
|
|
kfree(out_mad);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2008-02-22 07:13:36 +08:00
|
|
|
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
|
|
|
|
char *buf)
|
2007-09-18 15:14:18 +08:00
|
|
|
{
|
2008-02-22 07:13:36 +08:00
|
|
|
struct mlx4_ib_dev *dev =
|
|
|
|
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
|
2015-01-25 22:59:35 +08:00
|
|
|
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
|
2007-09-18 15:14:18 +08:00
|
|
|
}
|
|
|
|
|
2008-02-22 07:13:36 +08:00
|
|
|
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
|
|
|
|
char *buf)
|
2007-09-18 15:14:18 +08:00
|
|
|
{
|
2008-02-22 07:13:36 +08:00
|
|
|
struct mlx4_ib_dev *dev =
|
|
|
|
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
|
2007-09-18 15:14:18 +08:00
|
|
|
return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
|
|
|
|
(int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
|
|
|
|
(int) dev->dev->caps.fw_ver & 0xffff);
|
|
|
|
}
|
|
|
|
|
2008-02-22 07:13:36 +08:00
|
|
|
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
|
|
|
|
char *buf)
|
2007-09-18 15:14:18 +08:00
|
|
|
{
|
2008-02-22 07:13:36 +08:00
|
|
|
struct mlx4_ib_dev *dev =
|
|
|
|
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
|
2007-09-18 15:14:18 +08:00
|
|
|
return sprintf(buf, "%x\n", dev->dev->rev_id);
|
|
|
|
}
|
|
|
|
|
2008-02-22 07:13:36 +08:00
|
|
|
static ssize_t show_board(struct device *device, struct device_attribute *attr,
|
|
|
|
char *buf)
|
2007-09-18 15:14:18 +08:00
|
|
|
{
|
2008-02-22 07:13:36 +08:00
|
|
|
struct mlx4_ib_dev *dev =
|
|
|
|
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
|
|
|
|
return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
|
|
|
|
dev->dev->board_id);
|
2007-09-18 15:14:18 +08:00
|
|
|
}
|
|
|
|
|
2008-02-22 07:13:36 +08:00
|
|
|
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
|
|
|
|
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
|
|
|
|
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
|
|
|
|
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
|
2007-09-18 15:14:18 +08:00
|
|
|
|
2008-02-22 07:13:36 +08:00
|
|
|
static struct device_attribute *mlx4_class_attributes[] = {
|
|
|
|
&dev_attr_hw_rev,
|
|
|
|
&dev_attr_fw_ver,
|
|
|
|
&dev_attr_hca_type,
|
|
|
|
&dev_attr_board_id
|
2007-09-18 15:14:18 +08:00
|
|
|
};
|
|
|
|
|
2014-05-15 20:29:28 +08:00
|
|
|
#define MLX4_IB_INVALID_MAC ((u64)-1)
|
|
|
|
static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
|
|
|
|
struct net_device *dev,
|
|
|
|
int port)
|
|
|
|
{
|
|
|
|
u64 new_smac = 0;
|
|
|
|
u64 release_mac = MLX4_IB_INVALID_MAC;
|
|
|
|
struct mlx4_ib_qp *qp;
|
|
|
|
|
|
|
|
read_lock(&dev_base_lock);
|
|
|
|
new_smac = mlx4_mac_to_u64(dev->dev_addr);
|
|
|
|
read_unlock(&dev_base_lock);
|
|
|
|
|
2014-09-11 19:11:17 +08:00
|
|
|
atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
|
|
|
|
|
2014-09-11 19:11:18 +08:00
|
|
|
/* no need for update QP1 and mac registration in non-SRIOV */
|
|
|
|
if (!mlx4_is_mfunc(ibdev->dev))
|
|
|
|
return;
|
|
|
|
|
2014-05-15 20:29:28 +08:00
|
|
|
mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
|
|
|
|
qp = ibdev->qp1_proxy[port - 1];
|
|
|
|
if (qp) {
|
|
|
|
int new_smac_index;
|
2014-09-11 19:11:20 +08:00
|
|
|
u64 old_smac;
|
2014-05-15 20:29:28 +08:00
|
|
|
struct mlx4_update_qp_params update_params;
|
|
|
|
|
2014-09-11 19:11:20 +08:00
|
|
|
mutex_lock(&qp->mutex);
|
|
|
|
old_smac = qp->pri.smac;
|
2014-05-15 20:29:28 +08:00
|
|
|
if (new_smac == old_smac)
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
|
|
|
|
|
|
|
|
if (new_smac_index < 0)
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
update_params.smac_index = new_smac_index;
|
2014-09-10 21:41:56 +08:00
|
|
|
if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
|
2014-05-15 20:29:28 +08:00
|
|
|
&update_params)) {
|
|
|
|
release_mac = new_smac;
|
|
|
|
goto unlock;
|
|
|
|
}
|
2014-09-11 19:11:20 +08:00
|
|
|
/* if old port was zero, no mac was yet registered for this QP */
|
|
|
|
if (qp->pri.smac_port)
|
|
|
|
release_mac = old_smac;
|
2014-05-15 20:29:28 +08:00
|
|
|
qp->pri.smac = new_smac;
|
2014-09-11 19:11:20 +08:00
|
|
|
qp->pri.smac_port = port;
|
2014-05-15 20:29:28 +08:00
|
|
|
qp->pri.smac_index = new_smac_index;
|
|
|
|
}
|
|
|
|
|
|
|
|
unlock:
|
|
|
|
if (release_mac != MLX4_IB_INVALID_MAC)
|
|
|
|
mlx4_unregister_mac(ibdev->dev, port, release_mac);
|
2014-09-11 19:11:20 +08:00
|
|
|
if (qp)
|
|
|
|
mutex_unlock(&qp->mutex);
|
|
|
|
mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
|
2014-05-15 20:29:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
|
|
|
|
struct net_device *dev,
|
|
|
|
unsigned long event)
|
|
|
|
|
2013-12-13 00:03:13 +08:00
|
|
|
{
|
2010-10-25 12:08:52 +08:00
|
|
|
struct mlx4_ib_iboe *iboe;
|
2014-05-15 20:29:28 +08:00
|
|
|
int update_qps_port = -1;
|
2010-10-25 12:08:52 +08:00
|
|
|
int port;
|
|
|
|
|
2015-07-30 23:33:30 +08:00
|
|
|
ASSERT_RTNL();
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
iboe = &ibdev->iboe;
|
|
|
|
|
2014-08-21 19:28:41 +08:00
|
|
|
spin_lock_bh(&iboe->lock);
|
2010-10-25 12:08:52 +08:00
|
|
|
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
|
2014-02-05 21:13:02 +08:00
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
iboe->netdevs[port - 1] =
|
2011-03-23 06:38:17 +08:00
|
|
|
mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
|
2010-10-25 12:08:52 +08:00
|
|
|
|
2014-05-15 20:29:28 +08:00
|
|
|
if (dev == iboe->netdevs[port - 1] &&
|
|
|
|
(event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
|
|
|
|
event == NETDEV_UP || event == NETDEV_CHANGE))
|
|
|
|
update_qps_port = port;
|
|
|
|
|
2013-12-13 00:03:13 +08:00
|
|
|
}
|
2014-08-21 19:28:41 +08:00
|
|
|
spin_unlock_bh(&iboe->lock);
|
2014-05-15 20:29:28 +08:00
|
|
|
|
|
|
|
if (update_qps_port > 0)
|
|
|
|
mlx4_ib_update_qps(ibdev, dev, update_qps_port);
|
2013-12-13 00:03:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int mlx4_ib_netdev_event(struct notifier_block *this,
|
|
|
|
unsigned long event, void *ptr)
|
|
|
|
{
|
|
|
|
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
|
|
|
struct mlx4_ib_dev *ibdev;
|
|
|
|
|
|
|
|
if (!net_eq(dev_net(dev), &init_net))
|
|
|
|
return NOTIFY_DONE;
|
|
|
|
|
|
|
|
ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
|
2014-05-15 20:29:28 +08:00
|
|
|
mlx4_ib_scan_netdevs(ibdev, dev, event);
|
2010-10-25 12:08:52 +08:00
|
|
|
|
|
|
|
return NOTIFY_DONE;
|
|
|
|
}
|
|
|
|
|
2012-08-03 16:40:43 +08:00
|
|
|
static void init_pkeys(struct mlx4_ib_dev *ibdev)
|
|
|
|
{
|
|
|
|
int port;
|
|
|
|
int slave;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (mlx4_is_master(ibdev->dev)) {
|
2015-01-25 22:59:35 +08:00
|
|
|
for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
|
|
|
|
++slave) {
|
2012-08-03 16:40:43 +08:00
|
|
|
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
|
|
|
|
for (i = 0;
|
|
|
|
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
|
|
|
|
++i) {
|
|
|
|
ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
|
|
|
|
/* master has the identity virt2phys pkey mapping */
|
|
|
|
(slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
|
|
|
|
ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
|
|
|
|
mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
|
|
|
|
ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* initialize pkey cache */
|
|
|
|
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
|
|
|
|
for (i = 0;
|
|
|
|
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
|
|
|
|
++i)
|
|
|
|
ibdev->pkeys.phys_pkey_cache[port-1][i] =
|
|
|
|
(i) ? 0 : 0xFFFF;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-04-29 22:04:27 +08:00
|
|
|
static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
|
|
|
|
{
|
2015-05-31 14:30:16 +08:00
|
|
|
int i, j, eq = 0, total_eqs = 0;
|
2012-04-29 22:04:27 +08:00
|
|
|
|
2015-05-31 14:30:16 +08:00
|
|
|
ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
|
|
|
|
sizeof(ibdev->eq_table[0]), GFP_KERNEL);
|
2012-04-29 22:04:27 +08:00
|
|
|
if (!ibdev->eq_table)
|
|
|
|
return;
|
|
|
|
|
2015-05-31 14:30:16 +08:00
|
|
|
for (i = 1; i <= dev->caps.num_ports; i++) {
|
|
|
|
for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
|
|
|
|
j++, total_eqs++) {
|
|
|
|
if (i > 1 && mlx4_is_eq_shared(dev, total_eqs))
|
|
|
|
continue;
|
|
|
|
ibdev->eq_table[eq] = total_eqs;
|
|
|
|
if (!mlx4_assign_eq(dev, i,
|
|
|
|
&ibdev->eq_table[eq]))
|
|
|
|
eq++;
|
|
|
|
else
|
|
|
|
ibdev->eq_table[eq] = -1;
|
2012-04-29 22:04:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-31 14:30:16 +08:00
|
|
|
for (i = eq; i < dev->caps.num_comp_vectors;
|
|
|
|
ibdev->eq_table[i++] = -1)
|
|
|
|
;
|
2012-04-29 22:04:27 +08:00
|
|
|
|
|
|
|
/* Advertise the new number of EQs to clients */
|
2015-05-31 14:30:16 +08:00
|
|
|
ibdev->ib_dev.num_comp_vectors = eq;
|
2012-04-29 22:04:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
|
|
|
|
{
|
|
|
|
int i;
|
2015-05-31 14:30:16 +08:00
|
|
|
int total_eqs = ibdev->ib_dev.num_comp_vectors;
|
2012-05-24 21:08:07 +08:00
|
|
|
|
2015-05-31 14:30:16 +08:00
|
|
|
/* no eqs were allocated */
|
2012-05-24 21:08:07 +08:00
|
|
|
if (!ibdev->eq_table)
|
|
|
|
return;
|
2012-04-29 22:04:27 +08:00
|
|
|
|
|
|
|
/* Reset the advertised EQ number */
|
2015-05-31 14:30:16 +08:00
|
|
|
ibdev->ib_dev.num_comp_vectors = 0;
|
2012-04-29 22:04:27 +08:00
|
|
|
|
2015-05-31 14:30:16 +08:00
|
|
|
for (i = 0; i < total_eqs; i++)
|
2012-04-29 22:04:27 +08:00
|
|
|
mlx4_release_eq(dev, ibdev->eq_table[i]);
|
|
|
|
|
|
|
|
kfree(ibdev->eq_table);
|
2015-05-31 14:30:16 +08:00
|
|
|
ibdev->eq_table = NULL;
|
2012-04-29 22:04:27 +08:00
|
|
|
}
|
|
|
|
|
2015-05-14 08:02:58 +08:00
|
|
|
static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
|
|
|
|
struct ib_port_immutable *immutable)
|
|
|
|
{
|
|
|
|
struct ib_port_attr attr;
|
2016-01-14 23:50:43 +08:00
|
|
|
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
|
2015-05-14 08:02:58 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
err = mlx4_ib_query_port(ibdev, port_num, &attr);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
immutable->pkey_tbl_len = attr.pkey_tbl_len;
|
|
|
|
immutable->gid_tbl_len = attr.gid_tbl_len;
|
|
|
|
|
2016-01-14 23:50:43 +08:00
|
|
|
if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
|
2015-05-14 08:02:59 +08:00
|
|
|
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
|
2016-01-14 23:50:43 +08:00
|
|
|
} else {
|
|
|
|
if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
|
|
|
|
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
|
|
|
|
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
|
|
|
|
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
|
|
|
|
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
|
|
|
|
}
|
2015-05-14 08:02:59 +08:00
|
|
|
|
2015-06-07 02:38:29 +08:00
|
|
|
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
|
|
|
|
|
2015-05-14 08:02:58 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
static void *mlx4_ib_add(struct mlx4_dev *dev)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_dev *ibdev;
|
2009-01-10 05:22:29 +08:00
|
|
|
int num_ports = 0;
|
2012-05-11 04:28:09 +08:00
|
|
|
int i, j;
|
2010-10-25 12:08:52 +08:00
|
|
|
int err;
|
|
|
|
struct mlx4_ib_iboe *iboe;
|
2014-02-02 23:06:47 +08:00
|
|
|
int ib_num_ports = 0;
|
2015-02-03 22:48:37 +08:00
|
|
|
int num_req_counters;
|
2015-06-15 22:59:03 +08:00
|
|
|
int allocated;
|
|
|
|
u32 counter_index;
|
2015-10-15 19:44:40 +08:00
|
|
|
struct counter_index *new_counter_index = NULL;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2012-04-29 22:04:26 +08:00
|
|
|
pr_info_once("%s", mlx4_ib_version);
|
2008-02-05 12:20:44 +08:00
|
|
|
|
2012-08-03 16:40:55 +08:00
|
|
|
num_ports = 0;
|
2010-10-25 12:08:52 +08:00
|
|
|
mlx4_foreach_ib_transport_port(i, dev)
|
2009-01-10 05:22:29 +08:00
|
|
|
num_ports++;
|
|
|
|
|
|
|
|
/* No point in registering a device with no ports... */
|
|
|
|
if (num_ports == 0)
|
|
|
|
return NULL;
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
|
|
|
|
if (!ibdev) {
|
2015-01-25 22:59:35 +08:00
|
|
|
dev_err(&dev->persist->pdev->dev,
|
|
|
|
"Device struct alloc failed\n");
|
2007-05-09 09:00:38 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
iboe = &ibdev->iboe;
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
|
|
|
|
goto err_dealloc;
|
|
|
|
|
|
|
|
if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
|
|
|
|
goto err_pd;
|
|
|
|
|
2011-01-13 01:50:36 +08:00
|
|
|
ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
|
|
|
|
PAGE_SIZE);
|
2007-05-09 09:00:38 +08:00
|
|
|
if (!ibdev->uar_map)
|
|
|
|
goto err_uar;
|
2007-05-13 22:18:23 +08:00
|
|
|
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
ibdev->dev = dev;
|
2015-02-03 22:48:39 +08:00
|
|
|
ibdev->bond_next_port = 0;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
|
|
|
|
ibdev->ib_dev.owner = THIS_MODULE;
|
|
|
|
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
|
2008-07-23 23:12:26 +08:00
|
|
|
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
|
2009-01-10 05:22:29 +08:00
|
|
|
ibdev->num_ports = num_ports;
|
2015-02-03 22:48:37 +08:00
|
|
|
ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
|
|
|
|
1 : ibdev->num_ports;
|
2008-12-22 23:15:03 +08:00
|
|
|
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
|
2015-01-25 22:59:35 +08:00
|
|
|
ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
|
2015-07-30 23:33:30 +08:00
|
|
|
ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
|
|
|
|
ibdev->ib_dev.add_gid = mlx4_ib_add_gid;
|
|
|
|
ibdev->ib_dev.del_gid = mlx4_ib_del_gid;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2012-10-21 22:59:24 +08:00
|
|
|
if (dev->caps.userspace_caps)
|
|
|
|
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
|
|
|
|
else
|
|
|
|
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
ibdev->ib_dev.uverbs_cmd_mask =
|
|
|
|
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_REG_MR) |
|
2014-07-31 16:01:30 +08:00
|
|
|
(1ull << IB_USER_VERBS_CMD_REREG_MR) |
|
2007-05-09 09:00:38 +08:00
|
|
|
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
|
2008-04-17 12:09:33 +08:00
|
|
|
(1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
|
2007-05-09 09:00:38 +08:00
|
|
|
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
|
2007-06-21 17:27:47 +08:00
|
|
|
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
|
2007-05-09 09:00:38 +08:00
|
|
|
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
|
2007-06-21 18:03:11 +08:00
|
|
|
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
|
2011-06-03 01:43:26 +08:00
|
|
|
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
|
2011-08-12 04:57:43 +08:00
|
|
|
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
ibdev->ib_dev.query_device = mlx4_ib_query_device;
|
|
|
|
ibdev->ib_dev.query_port = mlx4_ib_query_port;
|
2010-10-25 12:08:52 +08:00
|
|
|
ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer;
|
2007-05-09 09:00:38 +08:00
|
|
|
ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
|
|
|
|
ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
|
|
|
|
ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
|
|
|
|
ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
|
|
|
|
ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
|
|
|
|
ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
|
|
|
|
ibdev->ib_dev.mmap = mlx4_ib_mmap;
|
|
|
|
ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
|
|
|
|
ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
|
|
|
|
ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
|
|
|
|
ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
|
|
|
|
ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
|
|
|
|
ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
|
|
|
|
ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
|
2007-06-21 18:03:11 +08:00
|
|
|
ibdev->ib_dev.query_srq = mlx4_ib_query_srq;
|
2007-05-09 09:00:38 +08:00
|
|
|
ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
|
|
|
|
ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
|
|
|
|
ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
|
|
|
|
ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
|
2007-06-21 17:27:47 +08:00
|
|
|
ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
|
2007-05-09 09:00:38 +08:00
|
|
|
ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
|
|
|
|
ibdev->ib_dev.post_send = mlx4_ib_post_send;
|
|
|
|
ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
|
|
|
|
ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
|
2008-04-17 12:09:33 +08:00
|
|
|
ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq;
|
2008-04-17 12:09:33 +08:00
|
|
|
ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq;
|
2007-05-09 09:00:38 +08:00
|
|
|
ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
|
|
|
|
ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
|
|
|
|
ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
|
|
|
|
ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
|
|
|
|
ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
|
2014-07-31 16:01:30 +08:00
|
|
|
ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
|
2007-05-09 09:00:38 +08:00
|
|
|
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
|
2015-07-30 15:32:42 +08:00
|
|
|
ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr;
|
2015-10-14 00:11:27 +08:00
|
|
|
ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg;
|
2007-05-09 09:00:38 +08:00
|
|
|
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
|
|
|
|
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
|
|
|
|
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
|
2015-05-14 08:02:58 +08:00
|
|
|
ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
|
2015-08-13 23:32:06 +08:00
|
|
|
ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2012-08-03 16:40:54 +08:00
|
|
|
if (!mlx4_is_slave(ibdev->dev)) {
|
|
|
|
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
|
|
|
|
ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
|
|
|
|
ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
|
|
|
|
ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
|
|
|
|
}
|
2007-08-01 17:29:05 +08:00
|
|
|
|
2013-02-07 00:19:16 +08:00
|
|
|
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
|
|
|
|
dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
|
|
|
|
ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
|
|
|
|
ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
|
|
|
|
|
|
|
|
ibdev->ib_dev.uverbs_cmd_mask |=
|
|
|
|
(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
|
|
|
|
}
|
|
|
|
|
2011-06-03 00:01:33 +08:00
|
|
|
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
|
|
|
|
ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
|
|
|
|
ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
|
|
|
|
ibdev->ib_dev.uverbs_cmd_mask |=
|
|
|
|
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
|
|
|
|
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
|
|
|
|
}
|
|
|
|
|
2013-08-14 18:58:31 +08:00
|
|
|
if (check_flow_steering_support(dev)) {
|
2013-11-07 21:25:15 +08:00
|
|
|
ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
|
2013-08-14 18:58:31 +08:00
|
|
|
ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
|
|
|
|
ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
|
|
|
|
|
IB/core: extended command: an improved infrastructure for uverbs commands
Commit 400dbc96583f ("IB/core: Infrastructure for extensible uverbs
commands") added an infrastructure for extensible uverbs commands
while later commit 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow
through uverbs") exported ib_create_flow()/ib_destroy_flow() functions
using this new infrastructure.
According to the commit 400dbc96583f, the purpose of this
infrastructure is to support passing around provider (eg. hardware)
specific buffers when userspace issue commands to the kernel, so that
it would be possible to extend uverbs (eg. core) buffers independently
from the provider buffers.
But the new kernel command function prototypes were not modified to
take advantage of this extension. This issue was exposed by Roland
Dreier in a previous review[1].
So the following patch is an attempt to a revised extensible command
infrastructure.
This improved extensible command infrastructure distinguish between
core (eg. legacy)'s command/response buffers from provider
(eg. hardware)'s command/response buffers: each extended command
implementing function is given a struct ib_udata to hold core
(eg. uverbs) input and output buffers, and another struct ib_udata to
hold the hw (eg. provider) input and output buffers.
Having those buffers identified separately make it easier to increase
one buffer to support extension without having to add some code to
guess the exact size of each command/response parts: This should make
the extended functions more reliable.
Additionally, instead of relying on command identifier being greater
than IB_USER_VERBS_CMD_THRESHOLD, the proposed infrastructure rely on
unused bits in command field: on the 32 bits provided by command
field, only 6 bits are really needed to encode the identifier of
commands currently supported by the kernel. (Even using only 6 bits
leaves room for about 23 new commands).
So this patch makes use of some high order bits in command field to
store flags, leaving enough room for more command identifiers than one
will ever need (eg. 256).
The new flags are used to specify if the command should be processed
as an extended one or a legacy one. While designing the new command
format, care was taken to make usage of flags itself extensible.
Using high order bits of the commands field ensure that newer
libibverbs on older kernel will properly fail when trying to call
extended commands. On the other hand, older libibverbs on newer kernel
will never be able to issue calls to extended commands.
The extended command header includes the optional response pointer so
that output buffer length and output buffer pointer are located
together in the command, allowing proper parameters checking. This
should make implementing functions easier and safer.
Additionally the extended header ensure 64bits alignment, while making
all sizes multiple of 8 bytes, extending the maximum buffer size:
legacy extended
Maximum command buffer: 256KBytes 1024KBytes (512KBytes + 512KBytes)
Maximum response buffer: 256KBytes 1024KBytes (512KBytes + 512KBytes)
For the purpose of doing proper buffer size accounting, the headers
size are no more taken in account in "in_words".
One of the odds of the current extensible infrastructure, reading
twice the "legacy" command header, is fixed by removing the "legacy"
command header from the extended command header: they are processed as
two different parts of the command: memory is read once and
information are not duplicated: it's making clear that's an extended
command scheme and not a different command scheme.
The proposed scheme will format input (command) and output (response)
buffers this way:
- command:
legacy header +
extended header +
command data (core + hw):
+----------------------------------------+
| flags | 00 00 | command |
| in_words | out_words |
+----------------------------------------+
| response |
| response |
| provider_in_words | provider_out_words |
| padding |
+----------------------------------------+
| |
. <uverbs input> .
. (in_words * 8) .
| |
+----------------------------------------+
| |
. <provider input> .
. (provider_in_words * 8) .
| |
+----------------------------------------+
- response, if present:
+----------------------------------------+
| |
. <uverbs output space> .
. (out_words * 8) .
| |
+----------------------------------------+
| |
. <provider output space> .
. (provider_out_words * 8) .
| |
+----------------------------------------+
The overall design is to ensure that the extensible infrastructure is
itself extensible while begin more reliable with more input and bound
checking.
Note:
The unused field in the extended header would be perfect candidate to
hold the command "comp_mask" (eg. bit field used to handle
compatibility). This was suggested by Roland Dreier in a previous
review[2]. But "comp_mask" field is likely to be present in the uverb
input and/or provider input, likewise for the response, as noted by
Matan Barak[3], so it doesn't make sense to put "comp_mask" in the
header.
[1]:
http://marc.info/?i=CAL1RGDWxmM17W2o_era24A-TTDeKyoL6u3NRu_=t_dhV_ZA9MA@mail.gmail.com
[2]:
http://marc.info/?i=CAL1RGDXJtrc849M6_XNZT5xO1+ybKtLWGq6yg6LhoSsKpsmkYA@mail.gmail.com
[3]:
http://marc.info/?i=525C1149.6000701@mellanox.com
Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
Link: http://marc.info/?i=cover.1383773832.git.ydroneaud@opteya.com
[ Convert "ret ? ret : 0" to the equivalent "ret". - Roland ]
Signed-off-by: Roland Dreier <roland@purestorage.com>
2013-11-07 06:21:49 +08:00
|
|
|
ibdev->ib_dev.uverbs_ex_cmd_mask |=
|
|
|
|
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
|
|
|
|
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
|
2013-08-14 18:58:31 +08:00
|
|
|
}
|
|
|
|
|
2015-06-11 21:35:27 +08:00
|
|
|
ibdev->ib_dev.uverbs_ex_cmd_mask |=
|
|
|
|
(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
|
2015-10-15 19:44:42 +08:00
|
|
|
(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
|
|
|
|
(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
|
2015-06-11 21:35:27 +08:00
|
|
|
|
2012-04-29 22:04:27 +08:00
|
|
|
mlx4_ib_alloc_eqs(dev, ibdev);
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
spin_lock_init(&iboe->lock);
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
if (init_node_data(ibdev))
|
|
|
|
goto err_map;
|
|
|
|
|
2015-10-15 19:44:40 +08:00
|
|
|
for (i = 0; i < ibdev->num_ports; ++i) {
|
|
|
|
mutex_init(&ibdev->counters_table[i].mutex);
|
|
|
|
INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
|
|
|
|
}
|
|
|
|
|
2015-02-03 22:48:37 +08:00
|
|
|
num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
|
|
|
|
for (i = 0; i < num_req_counters; ++i) {
|
2014-05-15 20:29:28 +08:00
|
|
|
mutex_init(&ibdev->qp1_proxy_lock[i]);
|
2015-06-15 22:59:03 +08:00
|
|
|
allocated = 0;
|
2011-06-15 22:49:57 +08:00
|
|
|
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
|
|
|
|
IB_LINK_LAYER_ETHERNET) {
|
2015-06-15 22:59:03 +08:00
|
|
|
err = mlx4_counter_alloc(ibdev->dev, &counter_index);
|
|
|
|
/* if failed to allocate a new counter, use default */
|
2011-06-15 22:49:57 +08:00
|
|
|
if (err)
|
2015-06-15 22:59:03 +08:00
|
|
|
counter_index =
|
|
|
|
mlx4_get_default_counter_index(dev,
|
|
|
|
i + 1);
|
|
|
|
else
|
|
|
|
allocated = 1;
|
|
|
|
} else { /* IB_LINK_LAYER_INFINIBAND use the default counter */
|
|
|
|
counter_index = mlx4_get_default_counter_index(dev,
|
|
|
|
i + 1);
|
2014-03-28 16:21:39 +08:00
|
|
|
}
|
2015-10-15 19:44:40 +08:00
|
|
|
new_counter_index = kmalloc(sizeof(*new_counter_index),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!new_counter_index) {
|
|
|
|
if (allocated)
|
|
|
|
mlx4_counter_free(ibdev->dev, counter_index);
|
|
|
|
goto err_counter;
|
|
|
|
}
|
|
|
|
new_counter_index->index = counter_index;
|
|
|
|
new_counter_index->allocated = allocated;
|
|
|
|
list_add_tail(&new_counter_index->list,
|
|
|
|
&ibdev->counters_table[i].counters_list);
|
|
|
|
ibdev->counters_table[i].default_counter = counter_index;
|
2015-06-15 22:59:03 +08:00
|
|
|
pr_info("counter index %d for port %d allocated %d\n",
|
|
|
|
counter_index, i + 1, allocated);
|
2011-06-15 22:49:57 +08:00
|
|
|
}
|
2015-02-03 22:48:37 +08:00
|
|
|
if (mlx4_is_bonded(dev))
|
2015-06-15 22:59:03 +08:00
|
|
|
for (i = 1; i < ibdev->num_ports ; ++i) {
|
2015-10-15 19:44:40 +08:00
|
|
|
new_counter_index =
|
|
|
|
kmalloc(sizeof(struct counter_index),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!new_counter_index)
|
|
|
|
goto err_counter;
|
|
|
|
new_counter_index->index = counter_index;
|
|
|
|
new_counter_index->allocated = 0;
|
|
|
|
list_add_tail(&new_counter_index->list,
|
|
|
|
&ibdev->counters_table[i].counters_list);
|
|
|
|
ibdev->counters_table[i].default_counter =
|
|
|
|
counter_index;
|
2015-06-15 22:59:03 +08:00
|
|
|
}
|
2011-06-15 22:49:57 +08:00
|
|
|
|
2014-02-02 23:06:47 +08:00
|
|
|
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
|
|
|
|
ib_num_ports++;
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
spin_lock_init(&ibdev->sm_lock);
|
|
|
|
mutex_init(&ibdev->cap_mask_mutex);
|
2015-02-08 17:49:34 +08:00
|
|
|
INIT_LIST_HEAD(&ibdev->qp_list);
|
|
|
|
spin_lock_init(&ibdev->reset_flow_resource_lock);
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2014-02-02 23:06:47 +08:00
|
|
|
if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
|
|
|
|
ib_num_ports) {
|
2013-11-07 21:25:17 +08:00
|
|
|
ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
|
|
|
|
err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
|
|
|
|
MLX4_IB_UC_STEER_QPN_ALIGN,
|
net/mlx4: Change QP allocation scheme
When using BF (Blue-Flame), the QPN overrides the VLAN, CV, and SV fields
in the WQE. Thus, BF may only be used for QPNs with bits 6,7 unset.
The current Ethernet driver code reserves a Tx QP range with 256b alignment.
This is wrong because if there are more than 64 Tx QPs in use,
QPNs >= base + 65 will have bits 6/7 set.
This problem is not specific for the Ethernet driver, any entity that
tries to reserve more than 64 BF-enabled QPs should fail. Also, using
ranges is not necessary here and is wasteful.
The new mechanism introduced here will support reservation for
"Eth QPs eligible for BF" for all drivers: bare-metal, multi-PF, and VFs
(when hypervisors support WC in VMs). The flow we use is:
1. In mlx4_en, allocate Tx QPs one by one instead of a range allocation,
and request "BF enabled QPs" if BF is supported for the function
2. In the ALLOC_RES FW command, change param1 to:
a. param1[23:0] - number of QPs
b. param1[31-24] - flags controlling QPs reservation
Bit 31 refers to Eth blueflame supported QPs. Those QPs must have
bits 6 and 7 unset in order to be used in Ethernet.
Bits 24-30 of the flags are currently reserved.
When a function tries to allocate a QP, it states the required attributes
for this QP. Those attributes are considered "best-effort". If an attribute,
such as Ethernet BF enabled QP, is a must-have attribute, the function has
to check that attribute is supported before trying to do the allocation.
In a lower layer of the code, mlx4_qp_reserve_range masks out the bits
which are unsupported. If SRIOV is used, the PF validates those attributes
and masks out unsupported attributes as well. In order to notify VFs which
attributes are supported, the VF uses QUERY_FUNC_CAP command. This command's
mailbox is filled by the PF, which notifies which QP allocation attributes
it supports.
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-12-11 16:57:54 +08:00
|
|
|
&ibdev->steer_qpn_base, 0);
|
2013-11-07 21:25:17 +08:00
|
|
|
if (err)
|
|
|
|
goto err_counter;
|
|
|
|
|
|
|
|
ibdev->ib_uc_qpns_bitmap =
|
|
|
|
kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
|
|
|
|
sizeof(long),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!ibdev->ib_uc_qpns_bitmap) {
|
2015-01-25 22:59:35 +08:00
|
|
|
dev_err(&dev->persist->pdev->dev,
|
|
|
|
"bit map alloc failed\n");
|
2013-11-07 21:25:17 +08:00
|
|
|
goto err_steer_qp_release;
|
|
|
|
}
|
|
|
|
|
|
|
|
bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
|
|
|
|
|
|
|
|
err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
|
|
|
|
dev, ibdev->steer_qpn_base,
|
|
|
|
ibdev->steer_qpn_base +
|
|
|
|
ibdev->steer_qpn_count - 1);
|
|
|
|
if (err)
|
|
|
|
goto err_steer_free_bitmap;
|
|
|
|
}
|
|
|
|
|
2014-09-11 19:11:17 +08:00
|
|
|
for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
|
|
|
|
atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
|
|
|
|
|
2010-05-07 08:03:25 +08:00
|
|
|
if (ib_register_device(&ibdev->ib_dev, NULL))
|
2013-11-07 21:25:17 +08:00
|
|
|
goto err_steer_free_bitmap;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
if (mlx4_ib_mad_init(ibdev))
|
|
|
|
goto err_reg;
|
|
|
|
|
2012-08-03 16:40:42 +08:00
|
|
|
if (mlx4_ib_init_sriov(ibdev))
|
|
|
|
goto err_mad;
|
|
|
|
|
2016-01-14 23:50:40 +08:00
|
|
|
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
|
|
|
|
dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
|
2013-12-13 00:03:13 +08:00
|
|
|
if (!iboe->nb.notifier_call) {
|
|
|
|
iboe->nb.notifier_call = mlx4_ib_netdev_event;
|
|
|
|
err = register_netdevice_notifier(&iboe->nb);
|
|
|
|
if (err) {
|
|
|
|
iboe->nb.notifier_call = NULL;
|
|
|
|
goto err_notif;
|
|
|
|
}
|
|
|
|
}
|
2016-01-14 23:50:40 +08:00
|
|
|
if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
|
|
|
|
err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
|
|
|
|
if (err) {
|
|
|
|
goto err_notif;
|
|
|
|
}
|
|
|
|
}
|
2010-10-25 12:08:52 +08:00
|
|
|
}
|
|
|
|
|
2012-05-11 04:28:09 +08:00
|
|
|
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
|
2008-02-22 07:13:36 +08:00
|
|
|
if (device_create_file(&ibdev->ib_dev.dev,
|
2012-05-11 04:28:09 +08:00
|
|
|
mlx4_class_attributes[j]))
|
2010-10-25 12:08:52 +08:00
|
|
|
goto err_notif;
|
2007-09-18 15:14:18 +08:00
|
|
|
}
|
|
|
|
|
2009-09-06 11:24:50 +08:00
|
|
|
ibdev->ib_active = true;
|
2016-02-27 00:32:24 +08:00
|
|
|
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
|
|
|
|
devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
|
|
|
|
&ibdev->ib_dev);
|
2009-09-06 11:24:50 +08:00
|
|
|
|
2012-08-03 16:40:43 +08:00
|
|
|
if (mlx4_is_mfunc(ibdev->dev))
|
|
|
|
init_pkeys(ibdev);
|
|
|
|
|
2012-08-03 16:40:58 +08:00
|
|
|
/* create paravirt contexts for any VFs which are active */
|
|
|
|
if (mlx4_is_master(ibdev->dev)) {
|
|
|
|
for (j = 0; j < MLX4_MFUNC_MAX; j++) {
|
|
|
|
if (j == mlx4_master_func_num(ibdev->dev))
|
|
|
|
continue;
|
|
|
|
if (mlx4_is_slave_active(ibdev->dev, j))
|
|
|
|
do_slave_init(ibdev, j, 1);
|
|
|
|
}
|
|
|
|
}
|
2007-05-09 09:00:38 +08:00
|
|
|
return ibdev;
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
err_notif:
|
2013-12-13 00:03:13 +08:00
|
|
|
if (ibdev->iboe.nb.notifier_call) {
|
|
|
|
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
|
|
|
|
pr_warn("failure unregistering notifier\n");
|
|
|
|
ibdev->iboe.nb.notifier_call = NULL;
|
|
|
|
}
|
2010-10-25 12:08:52 +08:00
|
|
|
flush_workqueue(wq);
|
|
|
|
|
2012-08-03 16:40:42 +08:00
|
|
|
mlx4_ib_close_sriov(ibdev);
|
|
|
|
|
|
|
|
err_mad:
|
|
|
|
mlx4_ib_mad_cleanup(ibdev);
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
err_reg:
|
|
|
|
ib_unregister_device(&ibdev->ib_dev);
|
|
|
|
|
2013-11-07 21:25:17 +08:00
|
|
|
err_steer_free_bitmap:
|
|
|
|
kfree(ibdev->ib_uc_qpns_bitmap);
|
|
|
|
|
|
|
|
err_steer_qp_release:
|
|
|
|
if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
|
|
|
|
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
|
|
|
|
ibdev->steer_qpn_count);
|
2011-06-15 22:49:57 +08:00
|
|
|
err_counter:
|
2015-10-15 19:44:40 +08:00
|
|
|
for (i = 0; i < ibdev->num_ports; ++i)
|
|
|
|
mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
err_map:
|
|
|
|
iounmap(ibdev->uar_map);
|
|
|
|
|
|
|
|
err_uar:
|
|
|
|
mlx4_uar_free(dev, &ibdev->priv_uar);
|
|
|
|
|
|
|
|
err_pd:
|
|
|
|
mlx4_pd_free(dev, ibdev->priv_pdn);
|
|
|
|
|
|
|
|
err_dealloc:
|
|
|
|
ib_dealloc_device(&ibdev->ib_dev);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2013-11-07 21:25:17 +08:00
|
|
|
int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
|
|
|
|
{
|
|
|
|
int offset;
|
|
|
|
|
|
|
|
WARN_ON(!dev->ib_uc_qpns_bitmap);
|
|
|
|
|
|
|
|
offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
|
|
|
|
dev->steer_qpn_count,
|
|
|
|
get_count_order(count));
|
|
|
|
if (offset < 0)
|
|
|
|
return offset;
|
|
|
|
|
|
|
|
*qpn = dev->steer_qpn_base + offset;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
|
|
|
|
{
|
|
|
|
if (!qpn ||
|
|
|
|
dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
|
|
|
|
return;
|
|
|
|
|
|
|
|
BUG_ON(qpn < dev->steer_qpn_base);
|
|
|
|
|
|
|
|
bitmap_release_region(dev->ib_uc_qpns_bitmap,
|
|
|
|
qpn - dev->steer_qpn_base,
|
|
|
|
get_count_order(count));
|
|
|
|
}
|
|
|
|
|
|
|
|
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
|
|
|
|
int is_attach)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
size_t flow_size;
|
|
|
|
struct ib_flow_attr *flow = NULL;
|
|
|
|
struct ib_flow_spec_ib *ib_spec;
|
|
|
|
|
|
|
|
if (is_attach) {
|
|
|
|
flow_size = sizeof(struct ib_flow_attr) +
|
|
|
|
sizeof(struct ib_flow_spec_ib);
|
|
|
|
flow = kzalloc(flow_size, GFP_KERNEL);
|
|
|
|
if (!flow)
|
|
|
|
return -ENOMEM;
|
|
|
|
flow->port = mqp->port;
|
|
|
|
flow->num_of_specs = 1;
|
|
|
|
flow->size = flow_size;
|
|
|
|
ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
|
|
|
|
ib_spec->type = IB_FLOW_SPEC_IB;
|
|
|
|
ib_spec->size = sizeof(struct ib_flow_spec_ib);
|
|
|
|
/* Add an empty rule for IB L2 */
|
|
|
|
memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
|
|
|
|
|
|
|
|
err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
|
|
|
|
IB_FLOW_DOMAIN_NIC,
|
|
|
|
MLX4_FS_REGULAR,
|
|
|
|
&mqp->reg_id);
|
|
|
|
} else {
|
|
|
|
err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
|
|
|
|
}
|
|
|
|
kfree(flow);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_dev *ibdev = ibdev_ptr;
|
|
|
|
int p;
|
2016-02-27 00:32:24 +08:00
|
|
|
int i;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
2016-02-27 00:32:24 +08:00
|
|
|
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
|
|
|
|
devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
|
2014-08-21 19:28:42 +08:00
|
|
|
ibdev->ib_active = false;
|
|
|
|
flush_workqueue(wq);
|
|
|
|
|
2012-08-03 16:40:42 +08:00
|
|
|
mlx4_ib_close_sriov(ibdev);
|
2009-03-19 10:49:54 +08:00
|
|
|
mlx4_ib_mad_cleanup(ibdev);
|
|
|
|
ib_unregister_device(&ibdev->ib_dev);
|
2010-10-25 12:08:52 +08:00
|
|
|
if (ibdev->iboe.nb.notifier_call) {
|
|
|
|
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
|
2012-04-29 22:04:26 +08:00
|
|
|
pr_warn("failure unregistering notifier\n");
|
2010-10-25 12:08:52 +08:00
|
|
|
ibdev->iboe.nb.notifier_call = NULL;
|
|
|
|
}
|
2013-11-07 21:25:17 +08:00
|
|
|
|
|
|
|
if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
|
|
|
|
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
|
|
|
|
ibdev->steer_qpn_count);
|
|
|
|
kfree(ibdev->ib_uc_qpns_bitmap);
|
|
|
|
}
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
iounmap(ibdev->uar_map);
|
2011-06-15 22:49:57 +08:00
|
|
|
for (p = 0; p < ibdev->num_ports; ++p)
|
2015-10-15 19:44:40 +08:00
|
|
|
mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
|
2007-05-09 09:00:38 +08:00
|
|
|
mlx4_CLOSE_PORT(dev, p);
|
|
|
|
|
2012-04-29 22:04:27 +08:00
|
|
|
mlx4_ib_free_eqs(dev, ibdev);
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
mlx4_uar_free(dev, &ibdev->priv_uar);
|
|
|
|
mlx4_pd_free(dev, ibdev->priv_pdn);
|
|
|
|
ib_dealloc_device(&ibdev->ib_dev);
|
|
|
|
}
|
|
|
|
|
2012-08-03 16:40:42 +08:00
|
|
|
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_demux_work **dm = NULL;
|
|
|
|
struct mlx4_dev *dev = ibdev->dev;
|
|
|
|
int i;
|
|
|
|
unsigned long flags;
|
2014-03-20 00:11:52 +08:00
|
|
|
struct mlx4_active_ports actv_ports;
|
|
|
|
unsigned int ports;
|
|
|
|
unsigned int first_port;
|
2012-08-03 16:40:42 +08:00
|
|
|
|
|
|
|
if (!mlx4_is_master(dev))
|
|
|
|
return;
|
|
|
|
|
2014-03-20 00:11:52 +08:00
|
|
|
actv_ports = mlx4_get_active_ports(dev, slave);
|
|
|
|
ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
|
|
|
|
first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
|
|
|
|
|
|
|
|
dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
|
2012-08-03 16:40:42 +08:00
|
|
|
if (!dm) {
|
|
|
|
pr_err("failed to allocate memory for tunneling qp update\n");
|
2015-07-08 12:13:35 +08:00
|
|
|
return;
|
2012-08-03 16:40:42 +08:00
|
|
|
}
|
|
|
|
|
2014-03-20 00:11:52 +08:00
|
|
|
for (i = 0; i < ports; i++) {
|
2012-08-03 16:40:42 +08:00
|
|
|
dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
|
|
|
|
if (!dm[i]) {
|
|
|
|
pr_err("failed to allocate memory for tunneling qp update work struct\n");
|
2015-07-08 12:13:35 +08:00
|
|
|
while (--i >= 0)
|
|
|
|
kfree(dm[i]);
|
2012-08-03 16:40:42 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
|
2014-03-20 00:11:52 +08:00
|
|
|
dm[i]->port = first_port + i + 1;
|
2012-08-03 16:40:42 +08:00
|
|
|
dm[i]->slave = slave;
|
|
|
|
dm[i]->do_init = do_init;
|
|
|
|
dm[i]->dev = ibdev;
|
2015-07-09 22:21:08 +08:00
|
|
|
}
|
|
|
|
/* initialize or tear down tunnel QPs for the slave */
|
|
|
|
spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
|
|
|
|
if (!ibdev->sriov.is_going_down) {
|
|
|
|
for (i = 0; i < ports; i++)
|
2012-08-03 16:40:42 +08:00
|
|
|
queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
|
|
|
|
spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
|
2015-07-09 22:21:08 +08:00
|
|
|
} else {
|
|
|
|
spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
|
|
|
|
for (i = 0; i < ports; i++)
|
|
|
|
kfree(dm[i]);
|
2012-08-03 16:40:42 +08:00
|
|
|
}
|
|
|
|
out:
|
2013-02-25 07:20:05 +08:00
|
|
|
kfree(dm);
|
2012-08-03 16:40:42 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-02-08 17:49:34 +08:00
|
|
|
static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
|
|
|
|
{
|
|
|
|
struct mlx4_ib_qp *mqp;
|
|
|
|
unsigned long flags_qp;
|
|
|
|
unsigned long flags_cq;
|
|
|
|
struct mlx4_ib_cq *send_mcq, *recv_mcq;
|
|
|
|
struct list_head cq_notify_list;
|
|
|
|
struct mlx4_cq *mcq;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
pr_warn("mlx4_ib_handle_catas_error was started\n");
|
|
|
|
INIT_LIST_HEAD(&cq_notify_list);
|
|
|
|
|
|
|
|
/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
|
|
|
|
spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
|
|
|
|
|
|
|
|
list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
|
|
|
|
spin_lock_irqsave(&mqp->sq.lock, flags_qp);
|
|
|
|
if (mqp->sq.tail != mqp->sq.head) {
|
|
|
|
send_mcq = to_mcq(mqp->ibqp.send_cq);
|
|
|
|
spin_lock_irqsave(&send_mcq->lock, flags_cq);
|
|
|
|
if (send_mcq->mcq.comp &&
|
|
|
|
mqp->ibqp.send_cq->comp_handler) {
|
|
|
|
if (!send_mcq->mcq.reset_notify_added) {
|
|
|
|
send_mcq->mcq.reset_notify_added = 1;
|
|
|
|
list_add_tail(&send_mcq->mcq.reset_notify,
|
|
|
|
&cq_notify_list);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
|
|
|
|
/* Now, handle the QP's receive queue */
|
|
|
|
spin_lock_irqsave(&mqp->rq.lock, flags_qp);
|
|
|
|
/* no handling is needed for SRQ */
|
|
|
|
if (!mqp->ibqp.srq) {
|
|
|
|
if (mqp->rq.tail != mqp->rq.head) {
|
|
|
|
recv_mcq = to_mcq(mqp->ibqp.recv_cq);
|
|
|
|
spin_lock_irqsave(&recv_mcq->lock, flags_cq);
|
|
|
|
if (recv_mcq->mcq.comp &&
|
|
|
|
mqp->ibqp.recv_cq->comp_handler) {
|
|
|
|
if (!recv_mcq->mcq.reset_notify_added) {
|
|
|
|
recv_mcq->mcq.reset_notify_added = 1;
|
|
|
|
list_add_tail(&recv_mcq->mcq.reset_notify,
|
|
|
|
&cq_notify_list);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&recv_mcq->lock,
|
|
|
|
flags_cq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
|
|
|
|
}
|
|
|
|
|
|
|
|
list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
|
|
|
|
mcq->comp(mcq);
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
|
|
|
|
pr_warn("mlx4_ib_handle_catas_error ended\n");
|
|
|
|
}
|
|
|
|
|
2015-02-03 22:48:37 +08:00
|
|
|
static void handle_bonded_port_state_event(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct ib_event_work *ew =
|
|
|
|
container_of(work, struct ib_event_work, work);
|
|
|
|
struct mlx4_ib_dev *ibdev = ew->ib_dev;
|
|
|
|
enum ib_port_state bonded_port_state = IB_PORT_NOP;
|
|
|
|
int i;
|
|
|
|
struct ib_event ibev;
|
|
|
|
|
|
|
|
kfree(ew);
|
|
|
|
spin_lock_bh(&ibdev->iboe.lock);
|
|
|
|
for (i = 0; i < MLX4_MAX_PORTS; ++i) {
|
|
|
|
struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
|
2015-03-18 22:51:35 +08:00
|
|
|
enum ib_port_state curr_port_state;
|
2015-02-03 22:48:37 +08:00
|
|
|
|
2015-03-18 22:51:35 +08:00
|
|
|
if (!curr_netdev)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
curr_port_state =
|
2015-02-03 22:48:37 +08:00
|
|
|
(netif_running(curr_netdev) &&
|
|
|
|
netif_carrier_ok(curr_netdev)) ?
|
|
|
|
IB_PORT_ACTIVE : IB_PORT_DOWN;
|
|
|
|
|
|
|
|
bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
|
|
|
|
curr_port_state : IB_PORT_ACTIVE;
|
|
|
|
}
|
|
|
|
spin_unlock_bh(&ibdev->iboe.lock);
|
|
|
|
|
|
|
|
ibev.device = &ibdev->ib_dev;
|
|
|
|
ibev.element.port_num = 1;
|
|
|
|
ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
|
|
|
|
IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
|
|
|
|
|
|
|
|
ib_dispatch_event(&ibev);
|
|
|
|
}
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
enum mlx4_dev_event event, unsigned long param)
|
2007-05-09 09:00:38 +08:00
|
|
|
{
|
|
|
|
struct ib_event ibev;
|
2008-10-23 06:38:42 +08:00
|
|
|
struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
struct mlx4_eqe *eqe = NULL;
|
|
|
|
struct ib_event_work *ew;
|
2012-08-03 16:40:42 +08:00
|
|
|
int p = 0;
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
|
2015-02-03 22:48:37 +08:00
|
|
|
if (mlx4_is_bonded(dev) &&
|
|
|
|
((event == MLX4_DEV_EVENT_PORT_UP) ||
|
|
|
|
(event == MLX4_DEV_EVENT_PORT_DOWN))) {
|
|
|
|
ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
|
|
|
|
if (!ew)
|
|
|
|
return;
|
|
|
|
INIT_WORK(&ew->work, handle_bonded_port_state_event);
|
|
|
|
ew->ib_dev = ibdev;
|
|
|
|
queue_work(wq, &ew->work);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
|
|
|
|
eqe = (struct mlx4_eqe *)param;
|
|
|
|
else
|
2012-08-03 16:40:42 +08:00
|
|
|
p = (int) param;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
switch (event) {
|
2008-04-17 12:01:08 +08:00
|
|
|
case MLX4_DEV_EVENT_PORT_UP:
|
2012-08-03 16:40:42 +08:00
|
|
|
if (p > ibdev->num_ports)
|
|
|
|
return;
|
2012-08-03 16:40:49 +08:00
|
|
|
if (mlx4_is_master(dev) &&
|
|
|
|
rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
|
|
|
|
IB_LINK_LAYER_INFINIBAND) {
|
|
|
|
mlx4_ib_invalidate_all_guid_record(ibdev, p);
|
|
|
|
}
|
2008-04-17 12:01:08 +08:00
|
|
|
ibev.event = IB_EVENT_PORT_ACTIVE;
|
2007-05-09 09:00:38 +08:00
|
|
|
break;
|
|
|
|
|
2008-04-17 12:01:08 +08:00
|
|
|
case MLX4_DEV_EVENT_PORT_DOWN:
|
2012-08-03 16:40:42 +08:00
|
|
|
if (p > ibdev->num_ports)
|
|
|
|
return;
|
2008-04-17 12:01:08 +08:00
|
|
|
ibev.event = IB_EVENT_PORT_ERR;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
|
2009-09-06 11:24:50 +08:00
|
|
|
ibdev->ib_active = false;
|
2007-05-09 09:00:38 +08:00
|
|
|
ibev.event = IB_EVENT_DEVICE_FATAL;
|
2015-02-08 17:49:34 +08:00
|
|
|
mlx4_ib_handle_catas_error(ibdev);
|
2007-05-09 09:00:38 +08:00
|
|
|
break;
|
|
|
|
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
|
|
|
|
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
|
|
|
|
if (!ew) {
|
|
|
|
pr_err("failed to allocate memory for events work\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
|
|
|
|
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
|
|
|
|
ew->ib_dev = ibdev;
|
2012-08-03 16:40:54 +08:00
|
|
|
/* need to queue only for port owner, which uses GEN_EQE */
|
|
|
|
if (mlx4_is_master(dev))
|
|
|
|
queue_work(wq, &ew->work);
|
|
|
|
else
|
|
|
|
handle_port_mgmt_change_event(&ew->work);
|
mlx4: Use port management change event instead of smp_snoop
The port management change event can replace smp_snoop. If the
capability bit for this event is set in dev-caps, the event is used
(by the driver setting the PORT_MNG_CHG_EVENT bit in the async event
mask in the MAP_EQ fw command). In this case, when the driver passes
incoming SMP PORT_INFO SET mads to the FW, the FW generates port
management change events to signal any changes to the driver.
If the FW generates these events, smp_snoop shouldn't be invoked in
ib_process_mad(), or duplicate events will occur (once from the
FW-generated event, and once from smp_snoop).
In the case where the FW does not generate port management change
events smp_snoop needs to be invoked to create these events. The flow
in smp_snoop has been modified to make use of the same procedures as
in the fw-generated-event event case to generate the port management
events (LID change, Client-rereg, Pkey change, and/or GID change).
Port management change event handling required changing the
mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument
(last argument) had to be changed to unsigned long in order to
accomodate passing the EQE pointer.
We also needed to move the definition of struct mlx4_eqe from
net/mlx4.h to file device.h -- to make it available to the IB driver,
to handle port management change events.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2012-06-19 16:21:40 +08:00
|
|
|
return;
|
|
|
|
|
2012-08-03 16:40:42 +08:00
|
|
|
case MLX4_DEV_EVENT_SLAVE_INIT:
|
|
|
|
/* here, p is the slave id */
|
|
|
|
do_slave_init(ibdev, p, 1);
|
2015-03-03 23:28:49 +08:00
|
|
|
if (mlx4_is_master(dev)) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 1; i <= ibdev->num_ports; i++) {
|
|
|
|
if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
|
|
|
|
== IB_LINK_LAYER_INFINIBAND)
|
|
|
|
mlx4_ib_slave_alias_guid_event(ibdev,
|
|
|
|
p, i,
|
|
|
|
1);
|
|
|
|
}
|
|
|
|
}
|
2012-08-03 16:40:42 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
|
2015-03-03 23:28:49 +08:00
|
|
|
if (mlx4_is_master(dev)) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 1; i <= ibdev->num_ports; i++) {
|
|
|
|
if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
|
|
|
|
== IB_LINK_LAYER_INFINIBAND)
|
|
|
|
mlx4_ib_slave_alias_guid_event(ibdev,
|
|
|
|
p, i,
|
|
|
|
0);
|
|
|
|
}
|
|
|
|
}
|
2012-08-03 16:40:42 +08:00
|
|
|
/* here, p is the slave id */
|
|
|
|
do_slave_init(ibdev, p, 0);
|
|
|
|
return;
|
|
|
|
|
2007-05-09 09:00:38 +08:00
|
|
|
default:
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ibev.device = ibdev_ptr;
|
2015-02-03 22:48:37 +08:00
|
|
|
ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
|
2007-05-09 09:00:38 +08:00
|
|
|
|
|
|
|
ib_dispatch_event(&ibev);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct mlx4_interface mlx4_ib_interface = {
|
2010-10-25 12:08:52 +08:00
|
|
|
.add = mlx4_ib_add,
|
|
|
|
.remove = mlx4_ib_remove,
|
|
|
|
.event = mlx4_ib_event,
|
2015-02-03 22:48:37 +08:00
|
|
|
.protocol = MLX4_PROT_IB_IPV6,
|
|
|
|
.flags = MLX4_INTFF_BONDING
|
2007-05-09 09:00:38 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static int __init mlx4_ib_init(void)
|
|
|
|
{
|
2010-10-25 12:08:52 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
wq = create_singlethread_workqueue("mlx4_ib");
|
|
|
|
if (!wq)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2012-08-03 16:40:46 +08:00
|
|
|
err = mlx4_ib_mcg_init();
|
|
|
|
if (err)
|
|
|
|
goto clean_wq;
|
|
|
|
|
2010-10-25 12:08:52 +08:00
|
|
|
err = mlx4_register_interface(&mlx4_ib_interface);
|
2012-08-03 16:40:46 +08:00
|
|
|
if (err)
|
|
|
|
goto clean_mcg;
|
2010-10-25 12:08:52 +08:00
|
|
|
|
|
|
|
return 0;
|
2012-08-03 16:40:46 +08:00
|
|
|
|
|
|
|
clean_mcg:
|
|
|
|
mlx4_ib_mcg_destroy();
|
|
|
|
|
|
|
|
clean_wq:
|
|
|
|
destroy_workqueue(wq);
|
|
|
|
return err;
|
2007-05-09 09:00:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit mlx4_ib_cleanup(void)
|
|
|
|
{
|
|
|
|
mlx4_unregister_interface(&mlx4_ib_interface);
|
2012-08-03 16:40:46 +08:00
|
|
|
mlx4_ib_mcg_destroy();
|
2010-10-25 12:08:52 +08:00
|
|
|
destroy_workqueue(wq);
|
2007-05-09 09:00:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
module_init(mlx4_ib_init);
|
|
|
|
module_exit(mlx4_ib_cleanup);
|