InfiniBand/RDMA updates for 4.1:

- IPoIB fixes from Doug Ledford and Erez Shitrit
  - iSER updates from Sagi Grimberg
  - mlx4 GUID handling changes from Yishai Hadas
  - other misc fixes
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABCAAGBQJVN9SzAAoJEENa44ZhAt0hWq4QAJRFrwoe9ubTextSHeTU0FkY
 CydiQtGWrhyAHTX/KtdB1Uv9FzGHc6gqkAOXImouacYTM9ffypMF6Oj4xIYIMQtz
 MvNlNm07KOtQYlubiaZWcP5BjdLfMZjQxb03/9smygLTBjm80dAEt5X1znx7YrqI
 ZfE+ibPdvRqVEvFZKfT2U0kGU6oEVKrbJEiUCoJPwwcghDZQl18YmGOxt5qdI2uO
 V+71ozwozT8utSIl7S2YTJZBdkJ7tLrqrX2D/D2jUAmh1rqHIDrsXXiZ44UJj82i
 oXuwqmHXfq1LfuC9kxCX5JJpGeLE7E3OoxM1zIev31710zPA0v57rNKKweCi2Tj6
 Z36B0SIRV4ipWr/sBhVDr1Ffc/uap3DOIEU9Z+t8rwhELCEVuxmNaNb0K1e5nPiy
 YOQYp/ctC0NslM4mqQJLhGMVl6H8PjodbM1whnYZLsF1+8clNvdtLYzy/cA5fGbO
 tngUGXu0YZGdwvfuQhi5FB45XLaErJaPcMH0QRI5G0JgtjvbzXiMlqWtekTUBi7W
 DJNQlVRI4S1RYRBYkq709ymXiWwTeh3rhH+ZJpM+aY8b0NR/lx+dNyesNG+7GBJH
 y5UOOUck0w+JbQzZo264I6a5e8pXq3kMi3BH8pF4Jbo5WvxSF6uriXb6Q1JzfH20
 Jn0J6W9ghCSfrhMI1zgQ
 =v1jB
 -----END PGP SIGNATURE-----

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull InfiniBand/RDMA updates from Roland Dreier:

 - IPoIB fixes from Doug Ledford and Erez Shitrit

 - iSER updates from Sagi Grimberg

 - mlx4 GUID handling changes from Yishai Hadas

 - other misc fixes

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (51 commits)
  mlx5: wrong page mask if CONFIG_ARCH_DMA_ADDR_T_64BIT enabled for 32Bit architectures
  IB/iser: Rewrite bounce buffer code path
  IB/iser: Bump version to 1.6
  IB/iser: Remove code duplication for a single DMA entry
  IB/iser: Pass struct iser_mem_reg to iser_fast_reg_mr and iser_reg_sig_mr
  IB/iser: Modify struct iser_mem_reg members
  IB/iser: Make fastreg pool cache friendly
  IB/iser: Move PI context alloc/free to routines
  IB/iser: Move fastreg descriptor pool get/put to helper functions
  IB/iser: Merge build page-vec into register page-vec
  IB/iser: Get rid of struct iser_rdma_regd
  IB/iser: Remove redundant assignments in iser_reg_page_vec
  IB/iser: Move memory reg/dereg routines to iser_memory.c
  IB/iser: Don't pass ib_device to fall_to_bounce_buff routine
  IB/iser: Remove a redundant struct iser_data_buf
  IB/iser: Remove redundant cmd_data_len calculation
  IB/iser: Fix wrong calculation of protection buffer length
  IB/iser: Handle fastreg/local_inv completion errors
  IB/iser: Fix unload during ep_poll wrong dereference
  ib_srpt: convert printk's to pr_* functions
  ...
This commit is contained in:
Linus Torvalds 2015-04-22 11:50:05 -07:00
commit 7c034dfd58
28 changed files with 1519 additions and 1140 deletions

View File

@ -187,8 +187,10 @@ Check RDMA and NFS Setup
To further test the InfiniBand software stack, use IPoIB (this To further test the InfiniBand software stack, use IPoIB (this
assumes you have two IB hosts named host1 and host2): assumes you have two IB hosts named host1 and host2):
host1$ ifconfig ib0 a.b.c.x host1$ ip link set dev ib0 up
host2$ ifconfig ib0 a.b.c.y host1$ ip address add dev ib0 a.b.c.x
host2$ ip link set dev ib0 up
host2$ ip address add dev ib0 a.b.c.y
host1$ ping a.b.c.y host1$ ping a.b.c.y
host2$ ping a.b.c.x host2$ ping a.b.c.x
@ -229,7 +231,8 @@ NFS/RDMA Setup
$ modprobe ib_mthca $ modprobe ib_mthca
$ modprobe ib_ipoib $ modprobe ib_ipoib
$ ifconfig ib0 a.b.c.d $ ip li set dev ib0 up
$ ip addr add dev ib0 a.b.c.d
NOTE: use unique addresses for the client and server NOTE: use unique addresses for the client and server

View File

@ -8803,6 +8803,15 @@ W: http://www.emulex.com
S: Supported S: Supported
F: drivers/net/ethernet/emulex/benet/ F: drivers/net/ethernet/emulex/benet/
EMULEX ONECONNECT ROCE DRIVER
M: Selvin Xavier <selvin.xavier@emulex.com>
M: Devesh Sharma <devesh.sharma@emulex.com>
M: Mitesh Ahuja <mitesh.ahuja@emulex.com>
L: linux-rdma@vger.kernel.org
W: http://www.emulex.com
S: Supported
F: drivers/infiniband/hw/ocrdma/
SFC NETWORK DRIVER SFC NETWORK DRIVER
M: Solarflare linux maintainers <linux-net-drivers@solarflare.com> M: Solarflare linux maintainers <linux-net-drivers@solarflare.com>
M: Shradha Shah <sshah@solarflare.com> M: Shradha Shah <sshah@solarflare.com>

View File

@ -99,12 +99,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (dmasync) if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
if (!size)
return ERR_PTR(-EINVAL);
/* /*
* If the combination of the addr and size requested for this memory * If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error. * region causes an integer overflow, return error.
*/ */
if ((PAGE_ALIGN(addr + size) <= size) || if (((addr + size) < addr) ||
(PAGE_ALIGN(addr + size) <= addr)) PAGE_ALIGN(addr + size) < (addr + size))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
if (!can_do_mlock()) if (!can_do_mlock())

View File

@ -246,6 +246,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(uqp); kfree(uqp);
} }
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
struct ib_uevent_object *uevent =
container_of(uobj, struct ib_uevent_object, uobject);
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
ib_destroy_srq(srq);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
struct ib_cq *cq = uobj->object; struct ib_cq *cq = uobj->object;
struct ib_uverbs_event_file *ev_file = cq->cq_context; struct ib_uverbs_event_file *ev_file = cq->cq_context;
@ -258,17 +269,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(ucq); kfree(ucq);
} }
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
struct ib_uevent_object *uevent =
container_of(uobj, struct ib_uevent_object, uobject);
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
ib_destroy_srq(srq);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object; struct ib_mr *mr = uobj->object;

View File

@ -58,14 +58,19 @@ struct mlx4_alias_guid_work_context {
int query_id; int query_id;
struct list_head list; struct list_head list;
int block_num; int block_num;
ib_sa_comp_mask guid_indexes;
u8 method;
}; };
struct mlx4_next_alias_guid_work { struct mlx4_next_alias_guid_work {
u8 port; u8 port;
u8 block_num; u8 block_num;
u8 method;
struct mlx4_sriov_alias_guid_info_rec_det rec_det; struct mlx4_sriov_alias_guid_info_rec_det rec_det;
}; };
static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
int *resched_delay_sec);
void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num, void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
u8 port_num, u8 *p_data) u8 port_num, u8 *p_data)
@ -118,6 +123,57 @@ ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
return IB_SA_COMP_MASK(4 + index); return IB_SA_COMP_MASK(4 + index);
} }
void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
int port, int slave_init)
{
__be64 curr_guid, required_guid;
int record_num = slave / 8;
int index = slave % 8;
int port_index = port - 1;
unsigned long flags;
int do_work = 0;
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
if (dev->sriov.alias_guid.ports_guid[port_index].state_flags &
GUID_STATE_NEED_PORT_INIT)
goto unlock;
if (!slave_init) {
curr_guid = *(__be64 *)&dev->sriov.
alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].
all_recs[GUID_REC_SIZE * index];
if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) ||
!curr_guid)
goto unlock;
required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
} else {
required_guid = mlx4_get_admin_guid(dev->dev, slave, port);
if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
goto unlock;
}
*(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].
all_recs[GUID_REC_SIZE * index] = required_guid;
dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].guid_indexes
|= mlx4_ib_get_aguid_comp_mask_from_ix(index);
dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].status
= MLX4_GUID_INFO_STATUS_IDLE;
/* set to run immediately */
dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].time_to_run = 0;
dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].
guids_retry_schedule[index] = 0;
do_work = 1;
unlock:
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
if (do_work)
mlx4_ib_init_alias_guid_work(dev, port_index);
}
/* /*
* Whenever new GUID is set/unset (guid table change) create event and * Whenever new GUID is set/unset (guid table change) create event and
* notify the relevant slave (master also should be notified). * notify the relevant slave (master also should be notified).
@ -138,10 +194,15 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
enum slave_port_state prev_state; enum slave_port_state prev_state;
__be64 tmp_cur_ag, form_cache_ag; __be64 tmp_cur_ag, form_cache_ag;
enum slave_port_gen_event gen_event; enum slave_port_gen_event gen_event;
struct mlx4_sriov_alias_guid_info_rec_det *rec;
unsigned long flags;
__be64 required_value;
if (!mlx4_is_master(dev->dev)) if (!mlx4_is_master(dev->dev))
return; return;
rec = &dev->sriov.alias_guid.ports_guid[port_num - 1].
all_rec_per_port[block_num];
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1]. ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes); all_rec_per_port[block_num].guid_indexes);
@ -166,8 +227,27 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
*/ */
if (tmp_cur_ag != form_cache_ag) if (tmp_cur_ag != form_cache_ag)
continue; continue;
mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
required_value = 0;
if (tmp_cur_ag == required_value) {
rec->guid_indexes = rec->guid_indexes &
~mlx4_ib_get_aguid_comp_mask_from_ix(i);
} else {
/* may notify port down if value is 0 */
if (tmp_cur_ag != MLX4_NOT_SET_GUID) {
spin_unlock_irqrestore(&dev->sriov.
alias_guid.ag_work_lock, flags);
continue;
}
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock,
flags);
mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
/*2 cases: Valid GUID, and Invalid Guid*/ /*2 cases: Valid GUID, and Invalid Guid*/
if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/ if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
@ -188,10 +268,14 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
set_and_calc_slave_port_state(dev->dev, slave_id, port_num, set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
MLX4_PORT_STATE_IB_EVENT_GID_INVALID, MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
&gen_event); &gen_event);
pr_debug("sending PORT DOWN event to slave: %d, port: %d\n", if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
slave_id, port_num); pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num, slave_id, port_num);
MLX4_PORT_CHANGE_SUBTYPE_DOWN); mlx4_gen_port_state_change_eqe(dev->dev,
slave_id,
port_num,
MLX4_PORT_CHANGE_SUBTYPE_DOWN);
}
} }
} }
} }
@ -206,6 +290,9 @@ static void aliasguid_query_handler(int status,
int i; int i;
struct mlx4_sriov_alias_guid_info_rec_det *rec; struct mlx4_sriov_alias_guid_info_rec_det *rec;
unsigned long flags, flags1; unsigned long flags, flags1;
ib_sa_comp_mask declined_guid_indexes = 0;
ib_sa_comp_mask applied_guid_indexes = 0;
unsigned int resched_delay_sec = 0;
if (!context) if (!context)
return; return;
@ -216,9 +303,9 @@ static void aliasguid_query_handler(int status,
all_rec_per_port[cb_ctx->block_num]; all_rec_per_port[cb_ctx->block_num];
if (status) { if (status) {
rec->status = MLX4_GUID_INFO_STATUS_IDLE;
pr_debug("(port: %d) failed: status = %d\n", pr_debug("(port: %d) failed: status = %d\n",
cb_ctx->port, status); cb_ctx->port, status);
rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC;
goto out; goto out;
} }
@ -235,57 +322,101 @@ static void aliasguid_query_handler(int status,
rec = &dev->sriov.alias_guid.ports_guid[port_index]. rec = &dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[guid_rec->block_num]; all_rec_per_port[guid_rec->block_num];
rec->status = MLX4_GUID_INFO_STATUS_SET; spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
rec->method = MLX4_GUID_INFO_RECORD_SET;
for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) { for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
__be64 tmp_cur_ag; __be64 sm_response, required_val;
tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
/* check if the SM didn't assign one of the records.
* if it didn't, if it was not sysadmin request:
* ask the SM to give a new GUID, (instead of the driver request).
*/
if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
"block_num: %d was declined by SM, "
"ownership by %d (0 = driver, 1=sysAdmin,"
" 2=None)\n", __func__, i,
guid_rec->block_num, rec->ownership);
if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
/* if it is driver assign, asks for new GUID from SM*/
*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
MLX4_NOT_SET_GUID;
/* Mark the record as not assigned, and let it if (!(cb_ctx->guid_indexes &
* be sent again in the next work sched.*/ mlx4_ib_get_aguid_comp_mask_from_ix(i)))
rec->status = MLX4_GUID_INFO_STATUS_IDLE; continue;
rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i); sm_response = *(__be64 *)&guid_rec->guid_info_list
} [i * GUID_REC_SIZE];
required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) {
if (required_val ==
cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
goto next_entry;
/* A new value was set till we got the response */
pr_debug("need to set new value %llx, record num %d, block_num:%d\n",
be64_to_cpu(required_val),
i, guid_rec->block_num);
goto entry_declined;
}
/* check if the SM didn't assign one of the records.
* if it didn't, re-ask for.
*/
if (sm_response == MLX4_NOT_SET_GUID) {
if (rec->guids_retry_schedule[i] == 0)
mlx4_ib_warn(&dev->ib_dev,
"%s:Record num %d in block_num: %d was declined by SM\n",
__func__, i,
guid_rec->block_num);
goto entry_declined;
} else { } else {
/* properly assigned record. */ /* properly assigned record. */
/* We save the GUID we just got from the SM in the /* We save the GUID we just got from the SM in the
* admin_guid in order to be persistent, and in the * admin_guid in order to be persistent, and in the
* request from the sm the process will ask for the same GUID */ * request from the sm the process will ask for the same GUID */
if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN && if (required_val &&
tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) { sm_response != required_val) {
/* the sysadmin assignment failed.*/ /* Warn only on first retry */
mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set" if (rec->guids_retry_schedule[i] == 0)
" admin guid after SysAdmin " mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
"configuration. " " admin guid after SysAdmin "
"Record num %d in block_num:%d " "configuration. "
"was declined by SM, " "Record num %d in block_num:%d "
"new val(0x%llx) was kept\n", "was declined by SM, "
__func__, i, "new val(0x%llx) was kept, SM returned (0x%llx)\n",
guid_rec->block_num, __func__, i,
be64_to_cpu(*(__be64 *) & guid_rec->block_num,
rec->all_recs[i * GUID_REC_SIZE])); be64_to_cpu(required_val),
be64_to_cpu(sm_response));
goto entry_declined;
} else { } else {
memcpy(&rec->all_recs[i * GUID_REC_SIZE], *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
&guid_rec->guid_info_list[i * GUID_REC_SIZE], sm_response;
GUID_REC_SIZE); if (required_val == 0)
mlx4_set_admin_guid(dev->dev,
sm_response,
(guid_rec->block_num
* NUM_ALIAS_GUID_IN_REC) + i,
cb_ctx->port);
goto next_entry;
} }
} }
entry_declined:
declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
rec->guids_retry_schedule[i] =
(rec->guids_retry_schedule[i] == 0) ? 1 :
min((unsigned int)60,
rec->guids_retry_schedule[i] * 2);
/* using the minimum value among all entries in that record */
resched_delay_sec = (resched_delay_sec == 0) ?
rec->guids_retry_schedule[i] :
min(resched_delay_sec,
rec->guids_retry_schedule[i]);
continue;
next_entry:
rec->guids_retry_schedule[i] = 0;
} }
applied_guid_indexes = cb_ctx->guid_indexes & ~declined_guid_indexes;
if (declined_guid_indexes ||
rec->guid_indexes & ~(applied_guid_indexes)) {
pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n",
guid_rec->block_num,
be64_to_cpu((__force __be64)rec->guid_indexes),
be64_to_cpu((__force __be64)applied_guid_indexes),
be64_to_cpu((__force __be64)declined_guid_indexes));
rec->time_to_run = ktime_get_real_ns() +
resched_delay_sec * NSEC_PER_SEC;
} else {
rec->status = MLX4_GUID_INFO_STATUS_SET;
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
/* /*
The func is call here to close the cases when the The func is call here to close the cases when the
sm doesn't send smp, so in the sa response the driver sm doesn't send smp, so in the sa response the driver
@ -297,10 +428,13 @@ static void aliasguid_query_handler(int status,
out: out:
spin_lock_irqsave(&dev->sriov.going_down_lock, flags); spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (!dev->sriov.is_going_down) if (!dev->sriov.is_going_down) {
get_low_record_time_index(dev, port_index, &resched_delay_sec);
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq, queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
&dev->sriov.alias_guid.ports_guid[port_index]. &dev->sriov.alias_guid.ports_guid[port_index].
alias_guid_work, 0); alias_guid_work,
msecs_to_jiffies(resched_delay_sec * 1000));
}
if (cb_ctx->sa_query) { if (cb_ctx->sa_query) {
list_del(&cb_ctx->list); list_del(&cb_ctx->list);
kfree(cb_ctx); kfree(cb_ctx);
@ -317,9 +451,7 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
ib_sa_comp_mask comp_mask = 0; ib_sa_comp_mask comp_mask = 0;
dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
= MLX4_GUID_INFO_STATUS_IDLE; = MLX4_GUID_INFO_STATUS_SET;
dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
= MLX4_GUID_INFO_RECORD_SET;
/* calculate the comp_mask for that record.*/ /* calculate the comp_mask for that record.*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
@ -333,19 +465,21 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
need to assign GUIDs, then don't put it up for assignment. need to assign GUIDs, then don't put it up for assignment.
*/ */
if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val || if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
(!index && !i) || (!index && !i))
MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
ports_guid[port - 1].all_rec_per_port[index].ownership)
continue; continue;
comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i); comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
} }
dev->sriov.alias_guid.ports_guid[port - 1]. dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].guid_indexes = comp_mask; all_rec_per_port[index].guid_indexes |= comp_mask;
if (dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].guid_indexes)
dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE;
} }
static int set_guid_rec(struct ib_device *ibdev, static int set_guid_rec(struct ib_device *ibdev,
u8 port, int index, struct mlx4_next_alias_guid_work *rec)
struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
{ {
int err; int err;
struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_dev *dev = to_mdev(ibdev);
@ -354,6 +488,9 @@ static int set_guid_rec(struct ib_device *ibdev,
struct ib_port_attr attr; struct ib_port_attr attr;
struct mlx4_alias_guid_work_context *callback_context; struct mlx4_alias_guid_work_context *callback_context;
unsigned long resched_delay, flags, flags1; unsigned long resched_delay, flags, flags1;
u8 port = rec->port + 1;
int index = rec->block_num;
struct mlx4_sriov_alias_guid_info_rec_det *rec_det = &rec->rec_det;
struct list_head *head = struct list_head *head =
&dev->sriov.alias_guid.ports_guid[port - 1].cb_list; &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
@ -380,6 +517,8 @@ static int set_guid_rec(struct ib_device *ibdev,
callback_context->port = port; callback_context->port = port;
callback_context->dev = dev; callback_context->dev = dev;
callback_context->block_num = index; callback_context->block_num = index;
callback_context->guid_indexes = rec_det->guid_indexes;
callback_context->method = rec->method;
memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
@ -399,7 +538,7 @@ static int set_guid_rec(struct ib_device *ibdev,
callback_context->query_id = callback_context->query_id =
ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client, ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
ibdev, port, &guid_info_rec, ibdev, port, &guid_info_rec,
comp_mask, rec_det->method, 1000, comp_mask, rec->method, 1000,
GFP_KERNEL, aliasguid_query_handler, GFP_KERNEL, aliasguid_query_handler,
callback_context, callback_context,
&callback_context->sa_query); &callback_context->sa_query);
@ -434,6 +573,30 @@ static int set_guid_rec(struct ib_device *ibdev,
return err; return err;
} }
static void mlx4_ib_guid_port_init(struct mlx4_ib_dev *dev, int port)
{
int j, k, entry;
__be64 guid;
/*Check if the SM doesn't need to assign the GUIDs*/
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
entry = j * NUM_ALIAS_GUID_IN_REC + k;
/* no request for the 0 entry (hw guid) */
if (!entry || entry > dev->dev->persist->num_vfs ||
!mlx4_is_slave_active(dev->dev, entry))
continue;
guid = mlx4_get_admin_guid(dev->dev, entry, port);
*(__be64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[j].all_recs
[GUID_REC_SIZE * k] = guid;
pr_debug("guid was set, entry=%d, val=0x%llx, port=%d\n",
entry,
be64_to_cpu(guid),
port);
}
}
}
void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port) void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
{ {
int i; int i;
@ -443,6 +606,13 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
spin_lock_irqsave(&dev->sriov.going_down_lock, flags); spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags &
GUID_STATE_NEED_PORT_INIT) {
mlx4_ib_guid_port_init(dev, port);
dev->sriov.alias_guid.ports_guid[port - 1].state_flags &=
(~GUID_STATE_NEED_PORT_INIT);
}
for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++) for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
invalidate_guid_record(dev, port, i); invalidate_guid_record(dev, port, i);
@ -462,60 +632,107 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
} }
static void set_required_record(struct mlx4_ib_dev *dev, u8 port,
struct mlx4_next_alias_guid_work *next_rec,
int record_index)
{
int i;
int lowset_time_entry = -1;
int lowest_time = 0;
ib_sa_comp_mask delete_guid_indexes = 0;
ib_sa_comp_mask set_guid_indexes = 0;
struct mlx4_sriov_alias_guid_info_rec_det *rec =
&dev->sriov.alias_guid.ports_guid[port].
all_rec_per_port[record_index];
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
if (!(rec->guid_indexes &
mlx4_ib_get_aguid_comp_mask_from_ix(i)))
continue;
if (*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] ==
cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
delete_guid_indexes |=
mlx4_ib_get_aguid_comp_mask_from_ix(i);
else
set_guid_indexes |=
mlx4_ib_get_aguid_comp_mask_from_ix(i);
if (lowset_time_entry == -1 || rec->guids_retry_schedule[i] <=
lowest_time) {
lowset_time_entry = i;
lowest_time = rec->guids_retry_schedule[i];
}
}
memcpy(&next_rec->rec_det, rec, sizeof(*rec));
next_rec->port = port;
next_rec->block_num = record_index;
if (*(__be64 *)&rec->all_recs[lowset_time_entry * GUID_REC_SIZE] ==
cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) {
next_rec->rec_det.guid_indexes = delete_guid_indexes;
next_rec->method = MLX4_GUID_INFO_RECORD_DELETE;
} else {
next_rec->rec_det.guid_indexes = set_guid_indexes;
next_rec->method = MLX4_GUID_INFO_RECORD_SET;
}
}
/* return index of record that should be updated based on lowest
* rescheduled time
*/
static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
int *resched_delay_sec)
{
int record_index = -1;
u64 low_record_time = 0;
struct mlx4_sriov_alias_guid_info_rec_det rec;
int j;
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
rec = dev->sriov.alias_guid.ports_guid[port].
all_rec_per_port[j];
if (rec.status == MLX4_GUID_INFO_STATUS_IDLE &&
rec.guid_indexes) {
if (record_index == -1 ||
rec.time_to_run < low_record_time) {
record_index = j;
low_record_time = rec.time_to_run;
}
}
}
if (resched_delay_sec) {
u64 curr_time = ktime_get_real_ns();
*resched_delay_sec = (low_record_time < curr_time) ? 0 :
div_u64((low_record_time - curr_time), NSEC_PER_SEC);
}
return record_index;
}
/* The function returns the next record that was /* The function returns the next record that was
* not configured (or failed to be configured) */ * not configured (or failed to be configured) */
static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port, static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
struct mlx4_next_alias_guid_work *rec) struct mlx4_next_alias_guid_work *rec)
{ {
int j;
unsigned long flags; unsigned long flags;
int record_index;
int ret = 0;
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); record_index = get_low_record_time_index(dev, port, NULL);
if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
MLX4_GUID_INFO_STATUS_IDLE) { if (record_index < 0) {
memcpy(&rec->rec_det, ret = -ENOENT;
&dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j], goto out;
sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
rec->port = port;
rec->block_num = j;
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
MLX4_GUID_INFO_STATUS_PENDING;
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
return 0;
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
} }
return -ENOENT;
}
static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port, set_required_record(dev, port, rec, record_index);
int rec_index, out:
struct mlx4_sriov_alias_guid_info_rec_det *rec_det) spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
{ return ret;
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
rec_det->guid_indexes;
memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
rec_det->status;
}
static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
{
int j;
struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
IB_SA_GUIDINFO_REC_GID7;
rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
set_administratively_guid_record(dev, port, j, &rec_det);
}
} }
static void alias_guid_work(struct work_struct *work) static void alias_guid_work(struct work_struct *work)
@ -545,9 +762,7 @@ static void alias_guid_work(struct work_struct *work)
goto out; goto out;
} }
set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num, set_guid_rec(&dev->ib_dev, rec);
&rec->rec_det);
out: out:
kfree(rec); kfree(rec);
} }
@ -562,6 +777,12 @@ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
spin_lock_irqsave(&dev->sriov.going_down_lock, flags); spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (!dev->sriov.is_going_down) { if (!dev->sriov.is_going_down) {
/* If there is pending one should cancell then run, otherwise
* won't run till previous one is ended as same work
* struct is used.
*/
cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port].
alias_guid_work);
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq, queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
&dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0); &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
} }
@ -609,7 +830,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
{ {
char alias_wq_name[15]; char alias_wq_name[15];
int ret = 0; int ret = 0;
int i, j, k; int i, j;
union ib_gid gid; union ib_gid gid;
if (!mlx4_is_master(dev->dev)) if (!mlx4_is_master(dev->dev))
@ -633,33 +854,25 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
for (i = 0 ; i < dev->num_ports; i++) { for (i = 0 ; i < dev->num_ports; i++) {
memset(&dev->sriov.alias_guid.ports_guid[i], 0, memset(&dev->sriov.alias_guid.ports_guid[i], 0,
sizeof (struct mlx4_sriov_alias_guid_port_rec_det)); sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
/*Check if the SM doesn't need to assign the GUIDs*/ dev->sriov.alias_guid.ports_guid[i].state_flags |=
GUID_STATE_NEED_PORT_INIT;
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
if (mlx4_ib_sm_guid_assign) { /* mark each val as it was deleted */
dev->sriov.alias_guid.ports_guid[i]. memset(dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j]. all_rec_per_port[j].all_recs, 0xFF,
ownership = MLX4_GUID_DRIVER_ASSIGN; sizeof(dev->sriov.alias_guid.ports_guid[i].
continue; all_rec_per_port[j].all_recs));
}
dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
ownership = MLX4_GUID_NONE_ASSIGN;
/*mark each val as it was deleted,
till the sysAdmin will give it valid val*/
for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
*(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
}
} }
INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list); INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
/*prepare the records, set them to be allocated by sm*/ /*prepare the records, set them to be allocated by sm*/
if (mlx4_ib_sm_guid_assign)
for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++)
mlx4_set_admin_guid(dev->dev, 0, j, i + 1);
for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
invalidate_guid_record(dev, i + 1, j); invalidate_guid_record(dev, i + 1, j);
dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid; dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
dev->sriov.alias_guid.ports_guid[i].port = i; dev->sriov.alias_guid.ports_guid[i].port = i;
if (mlx4_ib_sm_guid_assign)
set_all_slaves_guids(dev, i);
snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i); snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
dev->sriov.alias_guid.ports_guid[i].wq = dev->sriov.alias_guid.ports_guid[i].wq =

View File

@ -1430,6 +1430,10 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->ring[i].addr, tun_qp->ring[i].addr,
rx_buf_size, rx_buf_size,
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
if (ib_dma_mapping_error(ctx->ib_dev, tun_qp->ring[i].map)) {
kfree(tun_qp->ring[i].addr);
goto err;
}
} }
for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
@ -1442,6 +1446,11 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->tx_ring[i].buf.addr, tun_qp->tx_ring[i].buf.addr,
tx_buf_size, tx_buf_size,
DMA_TO_DEVICE); DMA_TO_DEVICE);
if (ib_dma_mapping_error(ctx->ib_dev,
tun_qp->tx_ring[i].buf.map)) {
kfree(tun_qp->tx_ring[i].buf.addr);
goto tx_err;
}
tun_qp->tx_ring[i].ah = NULL; tun_qp->tx_ring[i].ah = NULL;
} }
spin_lock_init(&tun_qp->tx_lock); spin_lock_init(&tun_qp->tx_lock);

View File

@ -66,9 +66,9 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL"); MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION); MODULE_VERSION(DRV_VERSION);
int mlx4_ib_sm_guid_assign = 1; int mlx4_ib_sm_guid_assign = 0;
module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)"); MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
static const char mlx4_ib_version[] = static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
@ -2791,9 +2791,31 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_SLAVE_INIT: case MLX4_DEV_EVENT_SLAVE_INIT:
/* here, p is the slave id */ /* here, p is the slave id */
do_slave_init(ibdev, p, 1); do_slave_init(ibdev, p, 1);
if (mlx4_is_master(dev)) {
int i;
for (i = 1; i <= ibdev->num_ports; i++) {
if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
== IB_LINK_LAYER_INFINIBAND)
mlx4_ib_slave_alias_guid_event(ibdev,
p, i,
1);
}
}
return; return;
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
if (mlx4_is_master(dev)) {
int i;
for (i = 1; i <= ibdev->num_ports; i++) {
if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
== IB_LINK_LAYER_INFINIBAND)
mlx4_ib_slave_alias_guid_event(ibdev,
p, i,
0);
}
}
/* here, p is the slave id */ /* here, p is the slave id */
do_slave_init(ibdev, p, 0); do_slave_init(ibdev, p, 0);
return; return;

View File

@ -342,14 +342,9 @@ struct mlx4_ib_ah {
enum mlx4_guid_alias_rec_status { enum mlx4_guid_alias_rec_status {
MLX4_GUID_INFO_STATUS_IDLE, MLX4_GUID_INFO_STATUS_IDLE,
MLX4_GUID_INFO_STATUS_SET, MLX4_GUID_INFO_STATUS_SET,
MLX4_GUID_INFO_STATUS_PENDING,
}; };
enum mlx4_guid_alias_rec_ownership { #define GUID_STATE_NEED_PORT_INIT 0x01
MLX4_GUID_DRIVER_ASSIGN,
MLX4_GUID_SYSADMIN_ASSIGN,
MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
};
enum mlx4_guid_alias_rec_method { enum mlx4_guid_alias_rec_method {
MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET, MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
@ -360,8 +355,8 @@ struct mlx4_sriov_alias_guid_info_rec_det {
u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC]; u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/ ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/ enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
u8 method; /*set or delete*/ unsigned int guids_retry_schedule[NUM_ALIAS_GUID_IN_REC];
enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/ u64 time_to_run;
}; };
struct mlx4_sriov_alias_guid_port_rec_det { struct mlx4_sriov_alias_guid_port_rec_det {
@ -369,6 +364,7 @@ struct mlx4_sriov_alias_guid_port_rec_det {
struct workqueue_struct *wq; struct workqueue_struct *wq;
struct delayed_work alias_guid_work; struct delayed_work alias_guid_work;
u8 port; u8 port;
u32 state_flags;
struct mlx4_sriov_alias_guid *parent; struct mlx4_sriov_alias_guid *parent;
struct list_head cb_list; struct list_head cb_list;
}; };
@ -802,6 +798,8 @@ int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr); struct attribute *attr);
ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index); ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
int port, int slave_init);
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ; int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;

View File

@ -566,6 +566,10 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr, ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
sizeof (struct mlx4_ib_proxy_sqp_hdr), sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) {
kfree(qp->sqp_proxy_rcv[i].addr);
goto err;
}
} }
return 0; return 0;
@ -2605,8 +2609,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
*lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | *lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
wr->wr.ud.hlen);
*lso_seg_len = halign; *lso_seg_len = halign;
return 0; return 0;
} }

View File

@ -46,21 +46,17 @@
static ssize_t show_admin_alias_guid(struct device *dev, static ssize_t show_admin_alias_guid(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
int record_num;/*0-15*/
int guid_index_in_rec; /*0 - 7*/
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev; struct mlx4_ib_dev *mdev = port->dev;
__be64 sysadmin_ag_val;
record_num = mlx4_ib_iov_dentry->entry_num / 8 ; sysadmin_ag_val = mlx4_get_admin_guid(mdev->dev,
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ; mlx4_ib_iov_dentry->entry_num,
port->num);
return sprintf(buf, "%llx\n", return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
ports_guid[port->num - 1].
all_rec_per_port[record_num].
all_recs[8 * guid_index_in_rec]));
} }
/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
@ -80,6 +76,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev; struct mlx4_ib_dev *mdev = port->dev;
u64 sysadmin_ag_val; u64 sysadmin_ag_val;
unsigned long flags;
record_num = mlx4_ib_iov_dentry->entry_num / 8; record_num = mlx4_ib_iov_dentry->entry_num / 8;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8; guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
@ -87,6 +84,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
pr_err("GUID 0 block 0 is RO\n"); pr_err("GUID 0 block 0 is RO\n");
return count; return count;
} }
spin_lock_irqsave(&mdev->sriov.alias_guid.ag_work_lock, flags);
sscanf(buf, "%llx", &sysadmin_ag_val); sscanf(buf, "%llx", &sysadmin_ag_val);
*(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1]. *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
all_rec_per_port[record_num]. all_rec_per_port[record_num].
@ -96,33 +94,15 @@ static ssize_t store_admin_alias_guid(struct device *dev,
/* Change the state to be pending for update */ /* Change the state to be pending for update */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
= MLX4_GUID_INFO_STATUS_IDLE ; = MLX4_GUID_INFO_STATUS_IDLE ;
mlx4_set_admin_guid(mdev->dev, cpu_to_be64(sysadmin_ag_val),
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method mlx4_ib_iov_dentry->entry_num,
= MLX4_GUID_INFO_RECORD_SET; port->num);
switch (sysadmin_ag_val) {
case MLX4_GUID_FOR_DELETE_VAL:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
= MLX4_GUID_INFO_RECORD_DELETE;
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
/* The sysadmin requests the SM to re-assign */
case MLX4_NOT_SET_GUID:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_DRIVER_ASSIGN;
break;
/* The sysadmin requests a specific value.*/
default:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
}
/* set the record index */ /* set the record index */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec); |= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
spin_unlock_irqrestore(&mdev->sriov.alias_guid.ag_work_lock, flags);
mlx4_ib_init_alias_guid_work(mdev, port->num - 1); mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
return count; return count;

View File

@ -87,7 +87,6 @@ enum {
IPOIB_FLAG_ADMIN_UP = 2, IPOIB_FLAG_ADMIN_UP = 2,
IPOIB_PKEY_ASSIGNED = 3, IPOIB_PKEY_ASSIGNED = 3,
IPOIB_FLAG_SUBINTERFACE = 5, IPOIB_FLAG_SUBINTERFACE = 5,
IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7, IPOIB_STOP_REAPER = 7,
IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_ADMIN_CM = 9,
IPOIB_FLAG_UMCAST = 10, IPOIB_FLAG_UMCAST = 10,
@ -98,9 +97,15 @@ enum {
IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
IPOIB_MCAST_FLAG_SENDONLY = 1, IPOIB_MCAST_FLAG_SENDONLY = 1,
IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ /*
* For IPOIB_MCAST_FLAG_BUSY
* When set, in flight join and mcast->mc is unreliable
* When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
* haven't started yet
* When clear and mcast->mc is valid pointer, join was successful
*/
IPOIB_MCAST_FLAG_BUSY = 2,
IPOIB_MCAST_FLAG_ATTACHED = 3, IPOIB_MCAST_FLAG_ATTACHED = 3,
IPOIB_MCAST_JOIN_STARTED = 4,
MAX_SEND_CQE = 16, MAX_SEND_CQE = 16,
IPOIB_CM_COPYBREAK = 256, IPOIB_CM_COPYBREAK = 256,
@ -148,6 +153,7 @@ struct ipoib_mcast {
unsigned long created; unsigned long created;
unsigned long backoff; unsigned long backoff;
unsigned long delay_until;
unsigned long flags; unsigned long flags;
unsigned char logcount; unsigned char logcount;
@ -292,6 +298,11 @@ struct ipoib_neigh_table {
struct completion deleted; struct completion deleted;
}; };
struct ipoib_qp_state_validate {
struct work_struct work;
struct ipoib_dev_priv *priv;
};
/* /*
* Device private locking: network stack tx_lock protects members used * Device private locking: network stack tx_lock protects members used
* in TX fast path, lock protects everything else. lock nests inside * in TX fast path, lock protects everything else. lock nests inside
@ -317,6 +328,7 @@ struct ipoib_dev_priv {
struct list_head multicast_list; struct list_head multicast_list;
struct rb_root multicast_tree; struct rb_root multicast_tree;
struct workqueue_struct *wq;
struct delayed_work mcast_task; struct delayed_work mcast_task;
struct work_struct carrier_on_task; struct work_struct carrier_on_task;
struct work_struct flush_light; struct work_struct flush_light;
@ -426,11 +438,6 @@ struct ipoib_neigh {
#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) #define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES) #define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES)
static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
{
return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
}
void ipoib_neigh_dtor(struct ipoib_neigh *neigh); void ipoib_neigh_dtor(struct ipoib_neigh *neigh);
static inline void ipoib_neigh_put(struct ipoib_neigh *neigh) static inline void ipoib_neigh_put(struct ipoib_neigh *neigh)
{ {
@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev); void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev, int flush); int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev); int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush); int ipoib_ib_dev_down(struct net_device *dev);
int ipoib_ib_dev_stop(struct net_device *dev, int flush); int ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev); void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work); void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev); int ipoib_mcast_start_thread(struct net_device *dev);
int ipoib_mcast_stop_thread(struct net_device *dev, int flush); int ipoib_mcast_stop_thread(struct net_device *dev);
void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_down(struct net_device *dev);
void ipoib_mcast_dev_flush(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev);

View File

@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
} }
spin_lock_irq(&priv->lock); spin_lock_irq(&priv->lock);
queue_delayed_work(ipoib_workqueue, queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY); &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
/* Add this entry to passive ids list head, but do not re-add it /* Add this entry to passive ids list head, but do not re-add it
* if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv); ipoib_cm_start_rx_drain(priv);
queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); queue_work(priv->wq, &priv->cm.rx_reap_task);
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
} else } else
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
list_move(&p->list, &priv->cm.rx_reap_list); list_move(&p->list, &priv->cm.rx_reap_list);
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); queue_work(priv->wq, &priv->cm.rx_reap_task);
} }
return; return;
} }
@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list); list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task); queue_work(priv->wq, &priv->cm.reap_task);
} }
clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list); list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task); queue_work(priv->wq, &priv->cm.reap_task);
} }
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
tx->dev = dev; tx->dev = dev;
list_add(&tx->list, &priv->cm.start_list); list_add(&tx->list, &priv->cm.start_list);
set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
queue_work(ipoib_workqueue, &priv->cm.start_task); queue_work(priv->wq, &priv->cm.start_task);
return tx; return tx;
} }
@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
list_move(&tx->list, &priv->cm.reap_list); list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task); queue_work(priv->wq, &priv->cm.reap_task);
ipoib_dbg(priv, "Reap connection for gid %pI6\n", ipoib_dbg(priv, "Reap connection for gid %pI6\n",
tx->neigh->daddr + 4); tx->neigh->daddr + 4);
tx->neigh = NULL; tx->neigh = NULL;
@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
skb_queue_tail(&priv->cm.skb_queue, skb); skb_queue_tail(&priv->cm.skb_queue, skb);
if (e) if (e)
queue_work(ipoib_workqueue, &priv->cm.skb_task); queue_work(priv->wq, &priv->cm.skb_task);
} }
static void ipoib_cm_rx_reap(struct work_struct *work) static void ipoib_cm_rx_reap(struct work_struct *work)
@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
} }
if (!list_empty(&priv->cm.passive_ids)) if (!list_empty(&priv->cm.passive_ids))
queue_delayed_work(ipoib_workqueue, queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY); &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
} }

View File

@ -94,39 +94,9 @@ void ipoib_free_ah(struct kref *kref)
static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
u64 mapping[IPOIB_UD_RX_SG]) u64 mapping[IPOIB_UD_RX_SG])
{ {
if (ipoib_ud_need_sg(priv->max_ib_mtu)) { ib_dma_unmap_single(priv->ca, mapping[0],
ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
DMA_FROM_DEVICE);
} else
ib_dma_unmap_single(priv->ca, mapping[0],
IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
DMA_FROM_DEVICE);
}
static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
struct sk_buff *skb,
unsigned int length)
{
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
unsigned int size;
/*
* There is only two buffers needed for max_payload = 4K,
* first buf size is IPOIB_UD_HEAD_SIZE
*/
skb->tail += IPOIB_UD_HEAD_SIZE;
skb->len += length;
size = length - IPOIB_UD_HEAD_SIZE;
skb_frag_size_set(frag, size);
skb->data_len += size;
skb->truesize += PAGE_SIZE;
} else
skb_put(skb, length);
} }
static int ipoib_ib_post_receive(struct net_device *dev, int id) static int ipoib_ib_post_receive(struct net_device *dev, int id)
@ -156,18 +126,11 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb; struct sk_buff *skb;
int buf_size; int buf_size;
int tailroom;
u64 *mapping; u64 *mapping;
if (ipoib_ud_need_sg(priv->max_ib_mtu)) { buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
buf_size = IPOIB_UD_HEAD_SIZE;
tailroom = 128; /* reserve some tailroom for IP/TCP headers */
} else {
buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
tailroom = 0;
}
skb = dev_alloc_skb(buf_size + tailroom + 4); skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN);
if (unlikely(!skb)) if (unlikely(!skb))
return NULL; return NULL;
@ -184,23 +147,8 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
goto error; goto error;
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
struct page *page = alloc_page(GFP_ATOMIC);
if (!page)
goto partial_error;
skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
mapping[1] =
ib_dma_map_page(priv->ca, page,
0, PAGE_SIZE, DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
goto partial_error;
}
priv->rx_ring[id].skb = skb; priv->rx_ring[id].skb = skb;
return skb; return skb;
partial_error:
ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE);
error: error:
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
return NULL; return NULL;
@ -278,7 +226,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
wc->byte_len, wc->slid); wc->byte_len, wc->slid);
ipoib_ud_dma_unmap_rx(priv, mapping); ipoib_ud_dma_unmap_rx(priv, mapping);
ipoib_ud_skb_put_frags(priv, skb, wc->byte_len);
skb_put(skb, wc->byte_len);
/* First byte of dgid signals multicast when 0xff */ /* First byte of dgid signals multicast when 0xff */
dgid = &((struct ib_grh *)skb->data)->dgid; dgid = &((struct ib_grh *)skb->data)->dgid;
@ -296,6 +245,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
skb_pull(skb, IPOIB_ENCAP_LEN); skb_pull(skb, IPOIB_ENCAP_LEN);
skb->truesize = SKB_TRUESIZE(skb->len);
++dev->stats.rx_packets; ++dev->stats.rx_packets;
dev->stats.rx_bytes += skb->len; dev->stats.rx_bytes += skb->len;
@ -376,6 +327,51 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
} }
} }
/*
* As the result of a completion error the QP Can be transferred to SQE states.
* The function checks if the (send)QP is in SQE state and
* moves it back to RTS state, that in order to have it functional again.
*/
static void ipoib_qp_state_validate_work(struct work_struct *work)
{
struct ipoib_qp_state_validate *qp_work =
container_of(work, struct ipoib_qp_state_validate, work);
struct ipoib_dev_priv *priv = qp_work->priv;
struct ib_qp_attr qp_attr;
struct ib_qp_init_attr query_init_attr;
int ret;
ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
if (ret) {
ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
__func__, ret);
goto free_res;
}
pr_info("%s: QP: 0x%x is in state: %d\n",
__func__, priv->qp->qp_num, qp_attr.qp_state);
/* currently support only in SQE->RTS transition*/
if (qp_attr.qp_state == IB_QPS_SQE) {
qp_attr.qp_state = IB_QPS_RTS;
ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
if (ret) {
pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
ret, priv->qp->qp_num);
goto free_res;
}
pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to IB_QPS_RTS\n",
__func__, priv->qp->qp_num);
} else {
pr_warn("QP (%d) will stay in state: %d\n",
priv->qp->qp_num, qp_attr.qp_state);
}
free_res:
kfree(qp_work);
}
static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
@ -407,10 +403,22 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
netif_wake_queue(dev); netif_wake_queue(dev);
if (wc->status != IB_WC_SUCCESS && if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR) wc->status != IB_WC_WR_FLUSH_ERR) {
struct ipoib_qp_state_validate *qp_work;
ipoib_warn(priv, "failed send event " ipoib_warn(priv, "failed send event "
"(status=%d, wrid=%d vend_err %x)\n", "(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err); wc->status, wr_id, wc->vendor_err);
qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
if (!qp_work) {
ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n",
__func__, priv->qp->qp_num);
return;
}
INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
qp_work->priv = priv;
queue_work(priv->wq, &qp_work->work);
}
} }
static int poll_tx(struct ipoib_dev_priv *priv) static int poll_tx(struct ipoib_dev_priv *priv)
@ -655,16 +663,33 @@ void ipoib_reap_ah(struct work_struct *work)
__ipoib_reap_ah(dev); __ipoib_reap_ah(dev);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ)); round_jiffies_relative(HZ));
} }
static void ipoib_flush_ah(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(priv->wq);
ipoib_reap_ah(&priv->ah_reap_task.work);
}
static void ipoib_stop_ah(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
set_bit(IPOIB_STOP_REAPER, &priv->flags);
ipoib_flush_ah(dev);
}
static void ipoib_ib_tx_timer_func(unsigned long ctx) static void ipoib_ib_tx_timer_func(unsigned long ctx)
{ {
drain_tx_cq((struct net_device *)ctx); drain_tx_cq((struct net_device *)ctx);
} }
int ipoib_ib_dev_open(struct net_device *dev, int flush) int ipoib_ib_dev_open(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret; int ret;
@ -696,7 +721,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
} }
clear_bit(IPOIB_STOP_REAPER, &priv->flags); clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ)); round_jiffies_relative(HZ));
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@ -706,7 +731,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
dev_stop: dev_stop:
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
napi_enable(&priv->napi); napi_enable(&priv->napi);
ipoib_ib_dev_stop(dev, flush); ipoib_ib_dev_stop(dev);
return -1; return -1;
} }
@ -738,7 +763,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
return ipoib_mcast_start_thread(dev); return ipoib_mcast_start_thread(dev);
} }
int ipoib_ib_dev_down(struct net_device *dev, int flush) int ipoib_ib_dev_down(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
@ -747,7 +772,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
netif_carrier_off(dev); netif_carrier_off(dev);
ipoib_mcast_stop_thread(dev, flush); ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev); ipoib_mcast_dev_flush(dev);
ipoib_flush_paths(dev); ipoib_flush_paths(dev);
@ -807,7 +832,7 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable(); local_bh_enable();
} }
int ipoib_ib_dev_stop(struct net_device *dev, int flush) int ipoib_ib_dev_stop(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr; struct ib_qp_attr qp_attr;
@ -877,24 +902,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
ipoib_warn(priv, "Failed to modify QP to RESET state\n"); ipoib_warn(priv, "Failed to modify QP to RESET state\n");
/* Wait for all AHs to be reaped */ ipoib_flush_ah(dev);
set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task);
if (flush)
flush_workqueue(ipoib_workqueue);
begin = jiffies;
while (!list_empty(&priv->dead_ahs)) {
__ipoib_reap_ah(dev);
if (time_after(jiffies, begin + HZ)) {
ipoib_warn(priv, "timing out; will leak address handles\n");
break;
}
msleep(1);
}
ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
@ -918,7 +926,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
(unsigned long) dev); (unsigned long) dev);
if (dev->flags & IFF_UP) { if (dev->flags & IFF_UP) {
if (ipoib_ib_dev_open(dev, 1)) { if (ipoib_ib_dev_open(dev)) {
ipoib_transport_dev_cleanup(dev); ipoib_transport_dev_cleanup(dev);
return -ENODEV; return -ENODEV;
} }
@ -1037,15 +1045,16 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
if (level == IPOIB_FLUSH_LIGHT) { if (level == IPOIB_FLUSH_LIGHT) {
ipoib_mark_paths_invalid(dev); ipoib_mark_paths_invalid(dev);
ipoib_mcast_dev_flush(dev); ipoib_mcast_dev_flush(dev);
ipoib_flush_ah(dev);
} }
if (level >= IPOIB_FLUSH_NORMAL) if (level >= IPOIB_FLUSH_NORMAL)
ipoib_ib_dev_down(dev, 0); ipoib_ib_dev_down(dev);
if (level == IPOIB_FLUSH_HEAVY) { if (level == IPOIB_FLUSH_HEAVY) {
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
ipoib_ib_dev_stop(dev, 0); ipoib_ib_dev_stop(dev);
if (ipoib_ib_dev_open(dev, 0) != 0) if (ipoib_ib_dev_open(dev) != 0)
return; return;
if (netif_queue_stopped(dev)) if (netif_queue_stopped(dev))
netif_start_queue(dev); netif_start_queue(dev);
@ -1097,9 +1106,17 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/ */
ipoib_flush_paths(dev); ipoib_flush_paths(dev);
ipoib_mcast_stop_thread(dev, 1); ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev); ipoib_mcast_dev_flush(dev);
/*
* All of our ah references aren't free until after
* ipoib_mcast_dev_flush(), ipoib_flush_paths, and
* the neighbor garbage collection is stopped and reaped.
* That should all be done now, so make a final ah flush.
*/
ipoib_stop_ah(dev);
ipoib_transport_dev_cleanup(dev); ipoib_transport_dev_cleanup(dev);
} }

View File

@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
if (ipoib_ib_dev_open(dev, 1)) { if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0; return 0;
goto err_disable; goto err_disable;
@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
return 0; return 0;
err_stop: err_stop:
ipoib_ib_dev_stop(dev, 1); ipoib_ib_dev_stop(dev);
err_disable: err_disable:
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev); netif_stop_queue(dev);
ipoib_ib_dev_down(dev, 1); ipoib_ib_dev_down(dev);
ipoib_ib_dev_stop(dev, 0); ipoib_ib_dev_stop(dev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv; struct ipoib_dev_priv *cpriv;
@ -640,8 +640,10 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
if (!path->query && path_rec_start(dev, path)) if (!path->query && path_rec_start(dev, path))
goto err_path; goto err_path;
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
__skb_queue_tail(&neigh->queue, skb); __skb_queue_tail(&neigh->queue, skb);
else
goto err_drop;
} }
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
@ -676,7 +678,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
new_path = 1; new_path = 1;
} }
if (path) { if (path) {
__skb_queue_tail(&path->queue, skb); if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
if (!path->query && path_rec_start(dev, path)) { if (!path->query && path_rec_start(dev, path)) {
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
@ -839,7 +846,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
return; return;
} }
queue_work(ipoib_workqueue, &priv->restart_task); queue_work(priv->wq, &priv->restart_task);
} }
static int ipoib_get_iflink(const struct net_device *dev) static int ipoib_get_iflink(const struct net_device *dev)
@ -966,7 +973,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
__ipoib_reap_neigh(priv); __ipoib_reap_neigh(priv);
if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval); arp_tbl.gc_interval);
} }
@ -1145,7 +1152,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
/* start garbage collection */ /* start garbage collection */
clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval); arp_tbl.gc_interval);
return 0; return 0;
@ -1274,15 +1281,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
if (ipoib_neigh_hash_init(priv) < 0)
goto out;
/* Allocate RX/TX "rings" to hold queued skbs */ /* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL); GFP_KERNEL);
if (!priv->rx_ring) { if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, ipoib_recvq_size); ca->name, ipoib_recvq_size);
goto out_neigh_hash_cleanup; goto out;
} }
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@ -1297,16 +1302,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
if (ipoib_ib_dev_init(dev, ca, port)) if (ipoib_ib_dev_init(dev, ca, port))
goto out_tx_ring_cleanup; goto out_tx_ring_cleanup;
/*
* Must be after ipoib_ib_dev_init so we can allocate a per
* device wq there and use it here
*/
if (ipoib_neigh_hash_init(priv) < 0)
goto out_dev_uninit;
return 0; return 0;
out_dev_uninit:
ipoib_ib_dev_cleanup(dev);
out_tx_ring_cleanup: out_tx_ring_cleanup:
vfree(priv->tx_ring); vfree(priv->tx_ring);
out_rx_ring_cleanup: out_rx_ring_cleanup:
kfree(priv->rx_ring); kfree(priv->rx_ring);
out_neigh_hash_cleanup:
ipoib_neigh_hash_uninit(dev);
out: out:
return -ENOMEM; return -ENOMEM;
} }
@ -1329,6 +1342,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
} }
unregister_netdevice_many(&head); unregister_netdevice_many(&head);
/*
* Must be before ipoib_ib_dev_cleanup or we delete an in use
* work queue
*/
ipoib_neigh_hash_uninit(dev);
ipoib_ib_dev_cleanup(dev); ipoib_ib_dev_cleanup(dev);
kfree(priv->rx_ring); kfree(priv->rx_ring);
@ -1336,8 +1355,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
priv->rx_ring = NULL; priv->rx_ring = NULL;
priv->tx_ring = NULL; priv->tx_ring = NULL;
ipoib_neigh_hash_uninit(dev);
} }
static const struct header_ops ipoib_header_ops = { static const struct header_ops ipoib_header_ops = {
@ -1646,10 +1663,11 @@ static struct net_device *ipoib_add_port(const char *format,
register_failed: register_failed:
ib_unregister_event_handler(&priv->event_handler); ib_unregister_event_handler(&priv->event_handler);
flush_workqueue(ipoib_workqueue);
/* Stop GC if started before flush */ /* Stop GC if started before flush */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task); cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(ipoib_workqueue); flush_workqueue(priv->wq);
event_failed: event_failed:
ipoib_dev_cleanup(priv->dev); ipoib_dev_cleanup(priv->dev);
@ -1712,6 +1730,7 @@ static void ipoib_remove_one(struct ib_device *device)
list_for_each_entry_safe(priv, tmp, dev_list, list) { list_for_each_entry_safe(priv, tmp, dev_list, list) {
ib_unregister_event_handler(&priv->event_handler); ib_unregister_event_handler(&priv->event_handler);
flush_workqueue(ipoib_workqueue);
rtnl_lock(); rtnl_lock();
dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
@ -1720,7 +1739,7 @@ static void ipoib_remove_one(struct ib_device *device)
/* Stop GC */ /* Stop GC */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task); cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(ipoib_workqueue); flush_workqueue(priv->wq);
unregister_netdev(priv->dev); unregister_netdev(priv->dev);
free_netdev(priv->dev); free_netdev(priv->dev);
@ -1755,14 +1774,16 @@ static int __init ipoib_init_module(void)
return ret; return ret;
/* /*
* We create our own workqueue mainly because we want to be * We create a global workqueue here that is used for all flush
* able to flush it when devices are being removed. We can't * operations. However, if you attempt to flush a workqueue
* use schedule_work()/flush_scheduled_work() because both * from a task on that same workqueue, it deadlocks the system.
* unregister_netdev() and linkwatch_event take the rtnl lock, * We want to be able to flush the tasks associated with a
* so flush_scheduled_work() can deadlock during device * specific net device, so we also create a workqueue for each
* removal. * netdevice. We queue up the tasks for that device only on
* its private workqueue, and we only queue up flush events
* on our global flush workqueue. This avoids the deadlocks.
*/ */
ipoib_workqueue = create_singlethread_workqueue("ipoib"); ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
if (!ipoib_workqueue) { if (!ipoib_workqueue) {
ret = -ENOMEM; ret = -ENOMEM;
goto err_fs; goto err_fs;

View File

@ -55,8 +55,6 @@ MODULE_PARM_DESC(mcast_debug_level,
"Enable multicast debug tracing if > 0"); "Enable multicast debug tracing if > 0");
#endif #endif
static DEFINE_MUTEX(mcast_mutex);
struct ipoib_mcast_iter { struct ipoib_mcast_iter {
struct net_device *dev; struct net_device *dev;
union ib_gid mgid; union ib_gid mgid;
@ -66,6 +64,48 @@ struct ipoib_mcast_iter {
unsigned int send_only; unsigned int send_only;
}; };
/*
* This should be called with the priv->lock held
*/
static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
struct ipoib_mcast *mcast,
bool delay)
{
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return;
/*
* We will be scheduling *something*, so cancel whatever is
* currently scheduled first
*/
cancel_delayed_work(&priv->mcast_task);
if (mcast && delay) {
/*
* We had a failure and want to schedule a retry later
*/
mcast->backoff *= 2;
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
mcast->delay_until = jiffies + (mcast->backoff * HZ);
/*
* Mark this mcast for its delay, but restart the
* task immediately. The join task will make sure to
* clear out all entries without delays, and then
* schedule itself to run again when the earliest
* delay expires
*/
queue_delayed_work(priv->wq, &priv->mcast_task, 0);
} else if (delay) {
/*
* Special case of retrying after a failure to
* allocate the broadcast multicast group, wait
* 1 second and try again
*/
queue_delayed_work(priv->wq, &priv->mcast_task, HZ);
} else
queue_delayed_work(priv->wq, &priv->mcast_task, 0);
}
static void ipoib_mcast_free(struct ipoib_mcast *mcast) static void ipoib_mcast_free(struct ipoib_mcast *mcast)
{ {
struct net_device *dev = mcast->dev; struct net_device *dev = mcast->dev;
@ -103,6 +143,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
mcast->dev = dev; mcast->dev = dev;
mcast->created = jiffies; mcast->created = jiffies;
mcast->delay_until = jiffies;
mcast->backoff = 1; mcast->backoff = 1;
INIT_LIST_HEAD(&mcast->list); INIT_LIST_HEAD(&mcast->list);
@ -185,17 +226,27 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
return -EAGAIN; return -EAGAIN;
} }
priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); /*update priv member according to the new mcast*/
priv->broadcast->mcmember.qkey = mcmember->qkey;
priv->broadcast->mcmember.mtu = mcmember->mtu;
priv->broadcast->mcmember.traffic_class = mcmember->traffic_class;
priv->broadcast->mcmember.rate = mcmember->rate;
priv->broadcast->mcmember.sl = mcmember->sl;
priv->broadcast->mcmember.flow_label = mcmember->flow_label;
priv->broadcast->mcmember.hop_limit = mcmember->hop_limit;
/* assume if the admin and the mcast are the same both can be changed */
if (priv->mcast_mtu == priv->admin_mtu)
priv->admin_mtu =
priv->mcast_mtu =
IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
else
priv->mcast_mtu =
IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
priv->tx_wr.wr.ud.remote_qkey = priv->qkey; priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
set_qkey = 1; set_qkey = 1;
if (!ipoib_cm_admin_enabled(dev)) {
rtnl_lock();
dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
rtnl_unlock();
}
} }
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@ -270,107 +321,35 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
return 0; return 0;
} }
static int
ipoib_mcast_sendonly_join_complete(int status,
struct ib_sa_multicast *multicast)
{
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
/* We trap for port events ourselves. */
if (status == -ENETRESET)
return 0;
if (!status)
status = ipoib_mcast_join_finish(mcast, &multicast->rec);
if (status) {
if (mcast->logcount++ < 20)
ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
mcast->mcmember.mgid.raw, status);
/* Flush out any queued packets */
netif_tx_lock_bh(dev);
while (!skb_queue_empty(&mcast->pkt_queue)) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
}
netif_tx_unlock_bh(dev);
/* Clear the busy flag so we try again */
status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
&mcast->flags);
}
return status;
}
static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
{
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_sa_mcmember_rec rec = {
#if 0 /* Some SMs don't support send-only yet */
.join_state = 4
#else
.join_state = 1
#endif
};
int ret = 0;
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
return -ENODEV;
}
if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
return -EBUSY;
}
rec.mgid = mcast->mcmember.mgid;
rec.port_gid = priv->local_gid;
rec.pkey = cpu_to_be16(priv->pkey);
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
priv->port, &rec,
IB_SA_MCMEMBER_REC_MGID |
IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY |
IB_SA_MCMEMBER_REC_JOIN_STATE,
GFP_ATOMIC,
ipoib_mcast_sendonly_join_complete,
mcast);
if (IS_ERR(mcast->mc)) {
ret = PTR_ERR(mcast->mc);
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
ret);
} else {
ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
mcast->mcmember.mgid.raw);
}
return ret;
}
void ipoib_mcast_carrier_on_task(struct work_struct *work) void ipoib_mcast_carrier_on_task(struct work_struct *work)
{ {
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task); carrier_on_task);
struct ib_port_attr attr; struct ib_port_attr attr;
/*
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
* removed.
*/
if (ib_query_port(priv->ca, priv->port, &attr) || if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) { attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
return; return;
} }
rtnl_lock(); /*
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
* removed. However, ipoib_stop() will attempt to flush
* the workqueue while holding the rtnl lock, so loop
* on trylock until either we get the lock or we see
* FLAG_OPER_UP go away as that signals that we are bailing
* and can safely ignore the carrier on work.
*/
while (!rtnl_trylock()) {
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return;
else
msleep(20);
}
if (!ipoib_cm_admin_enabled(priv->dev))
dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
netif_carrier_on(priv->dev); netif_carrier_on(priv->dev);
rtnl_unlock(); rtnl_unlock();
} }
@ -382,7 +361,9 @@ static int ipoib_mcast_join_complete(int status,
struct net_device *dev = mcast->dev; struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
"sendonly " : "",
mcast->mcmember.mgid.raw, status); mcast->mcmember.mgid.raw, status);
/* We trap for port events ourselves. */ /* We trap for port events ourselves. */
@ -396,49 +377,74 @@ static int ipoib_mcast_join_complete(int status,
if (!status) { if (!status) {
mcast->backoff = 1; mcast->backoff = 1;
mutex_lock(&mcast_mutex); mcast->delay_until = jiffies;
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(ipoib_workqueue,
&priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
/* /*
* Defer carrier on work to ipoib_workqueue to avoid a * Defer carrier on work to priv->wq to avoid a
* deadlock on rtnl_lock here. * deadlock on rtnl_lock here. Requeue our multicast
* work too, which will end up happening right after
* our carrier on task work and will allow us to
* send out all of the non-broadcast joins
*/ */
if (mcast == priv->broadcast) if (mcast == priv->broadcast) {
queue_work(ipoib_workqueue, &priv->carrier_on_task); spin_lock_irq(&priv->lock);
queue_work(priv->wq, &priv->carrier_on_task);
__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
goto out_locked;
}
} else {
if (mcast->logcount++ < 20) {
if (status == -ETIMEDOUT || status == -EAGAIN) {
ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
mcast->mcmember.mgid.raw, status);
} else {
ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n",
test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
mcast->mcmember.mgid.raw, status);
}
}
status = 0; if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
goto out; mcast->backoff >= 2) {
} /*
* We only retry sendonly joins once before we drop
if (mcast->logcount++ < 20) { * the packet and quit trying to deal with the
if (status == -ETIMEDOUT || status == -EAGAIN) { * group. However, we leave the group in the
ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", * mcast list as an unjoined group. If we want to
mcast->mcmember.mgid.raw, status); * try joining again, we simply queue up a packet
* and restart the join thread. The empty queue
* is why the join thread ignores this group.
*/
mcast->backoff = 1;
netif_tx_lock_bh(dev);
while (!skb_queue_empty(&mcast->pkt_queue)) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
}
netif_tx_unlock_bh(dev);
} else { } else {
ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", spin_lock_irq(&priv->lock);
mcast->mcmember.mgid.raw, status); /* Requeue this join task with a backoff delay */
__ipoib_mcast_schedule_join_thread(priv, mcast, 1);
goto out_locked;
} }
} }
mcast->backoff *= 2;
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
/* Clear the busy flag so we try again */
status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
mutex_lock(&mcast_mutex);
spin_lock_irq(&priv->lock);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
mcast->backoff * HZ);
spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
out: out:
spin_lock_irq(&priv->lock);
out_locked:
/*
* Make sure to set mcast->mc before we clear the busy flag to avoid
* racing with code that checks for BUSY before checking mcast->mc
*/
if (status)
mcast->mc = NULL;
else
mcast->mc = multicast;
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
spin_unlock_irq(&priv->lock);
complete(&mcast->done); complete(&mcast->done);
return status; return status;
} }
@ -446,6 +452,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
int create) int create)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_sa_multicast *multicast;
struct ib_sa_mcmember_rec rec = { struct ib_sa_mcmember_rec rec = {
.join_state = 1 .join_state = 1
}; };
@ -487,29 +494,18 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
rec.hop_limit = priv->broadcast->mcmember.hop_limit; rec.hop_limit = priv->broadcast->mcmember.hop_limit;
} }
set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
init_completion(&mcast->done);
set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
&rec, comp_mask, GFP_KERNEL, &rec, comp_mask, GFP_KERNEL,
ipoib_mcast_join_complete, mcast); ipoib_mcast_join_complete, mcast);
if (IS_ERR(mcast->mc)) { if (IS_ERR(multicast)) {
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); ret = PTR_ERR(multicast);
complete(&mcast->done);
ret = PTR_ERR(mcast->mc);
ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
spin_lock_irq(&priv->lock);
mcast->backoff *= 2; /* Requeue this join task with a backoff delay */
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
spin_unlock_irq(&priv->lock);
mutex_lock(&mcast_mutex); complete(&mcast->done);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(ipoib_workqueue,
&priv->mcast_task,
mcast->backoff * HZ);
mutex_unlock(&mcast_mutex);
} }
} }
@ -519,8 +515,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
container_of(work, struct ipoib_dev_priv, mcast_task.work); container_of(work, struct ipoib_dev_priv, mcast_task.work);
struct net_device *dev = priv->dev; struct net_device *dev = priv->dev;
struct ib_port_attr port_attr; struct ib_port_attr port_attr;
unsigned long delay_until = 0;
struct ipoib_mcast *mcast = NULL;
int create = 1;
if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return; return;
if (ib_query_port(priv->ca, priv->port, &port_attr) || if (ib_query_port(priv->ca, priv->port, &port_attr) ||
@ -536,93 +535,118 @@ void ipoib_mcast_join_task(struct work_struct *work)
else else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
spin_lock_irq(&priv->lock);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
goto out;
if (!priv->broadcast) { if (!priv->broadcast) {
struct ipoib_mcast *broadcast; struct ipoib_mcast *broadcast;
if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) broadcast = ipoib_mcast_alloc(dev, 0);
return;
broadcast = ipoib_mcast_alloc(dev, 1);
if (!broadcast) { if (!broadcast) {
ipoib_warn(priv, "failed to allocate broadcast group\n"); ipoib_warn(priv, "failed to allocate broadcast group\n");
mutex_lock(&mcast_mutex); /*
if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) * Restart us after a 1 second delay to retry
queue_delayed_work(ipoib_workqueue, * creating our broadcast group and attaching to
&priv->mcast_task, HZ); * it. Until this succeeds, this ipoib dev is
mutex_unlock(&mcast_mutex); * completely stalled (multicast wise).
return; */
__ipoib_mcast_schedule_join_thread(priv, NULL, 1);
goto out;
} }
spin_lock_irq(&priv->lock);
memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
sizeof (union ib_gid)); sizeof (union ib_gid));
priv->broadcast = broadcast; priv->broadcast = broadcast;
__ipoib_mcast_add(dev, priv->broadcast); __ipoib_mcast_add(dev, priv->broadcast);
spin_unlock_irq(&priv->lock);
} }
if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
ipoib_mcast_join(dev, priv->broadcast, 0); !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
return; mcast = priv->broadcast;
} create = 0;
if (mcast->backoff > 1 &&
while (1) { time_before(jiffies, mcast->delay_until)) {
struct ipoib_mcast *mcast = NULL; delay_until = mcast->delay_until;
mcast = NULL;
spin_lock_irq(&priv->lock);
list_for_each_entry(mcast, &priv->multicast_list, list) {
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
&& !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
&& !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
/* Found the next unjoined group */
break;
} }
} }
spin_unlock_irq(&priv->lock); goto out;
if (&mcast->list == &priv->multicast_list) {
/* All done */
break;
}
ipoib_mcast_join(dev, mcast, 1);
return;
} }
ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); /*
* We'll never get here until the broadcast group is both allocated
* and attached
*/
list_for_each_entry(mcast, &priv->multicast_list, list) {
if (IS_ERR_OR_NULL(mcast->mc) &&
!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
(!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
!skb_queue_empty(&mcast->pkt_queue))) {
if (mcast->backoff == 1 ||
time_after_eq(jiffies, mcast->delay_until)) {
/* Found the next unjoined group */
init_completion(&mcast->done);
set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
create = 0;
else
create = 1;
spin_unlock_irq(&priv->lock);
ipoib_mcast_join(dev, mcast, create);
spin_lock_irq(&priv->lock);
} else if (!delay_until ||
time_before(mcast->delay_until, delay_until))
delay_until = mcast->delay_until;
}
}
clear_bit(IPOIB_MCAST_RUN, &priv->flags); mcast = NULL;
ipoib_dbg_mcast(priv, "successfully started all multicast joins\n");
out:
if (delay_until) {
cancel_delayed_work(&priv->mcast_task);
queue_delayed_work(priv->wq, &priv->mcast_task,
delay_until - jiffies);
}
if (mcast) {
init_completion(&mcast->done);
set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
}
spin_unlock_irq(&priv->lock);
if (mcast)
ipoib_mcast_join(dev, mcast, create);
} }
int ipoib_mcast_start_thread(struct net_device *dev) int ipoib_mcast_start_thread(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "starting multicast thread\n"); ipoib_dbg_mcast(priv, "starting multicast thread\n");
mutex_lock(&mcast_mutex); spin_lock_irqsave(&priv->lock, flags);
if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); spin_unlock_irqrestore(&priv->lock, flags);
mutex_unlock(&mcast_mutex);
return 0; return 0;
} }
int ipoib_mcast_stop_thread(struct net_device *dev, int flush) int ipoib_mcast_stop_thread(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "stopping multicast thread\n"); ipoib_dbg_mcast(priv, "stopping multicast thread\n");
mutex_lock(&mcast_mutex); spin_lock_irqsave(&priv->lock, flags);
clear_bit(IPOIB_MCAST_RUN, &priv->flags);
cancel_delayed_work(&priv->mcast_task); cancel_delayed_work(&priv->mcast_task);
mutex_unlock(&mcast_mutex); spin_unlock_irqrestore(&priv->lock, flags);
if (flush) flush_workqueue(priv->wq);
flush_workqueue(ipoib_workqueue);
return 0; return 0;
} }
@ -633,6 +657,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
int ret = 0; int ret = 0;
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
if (!IS_ERR_OR_NULL(mcast->mc))
ib_sa_free_multicast(mcast->mc); ib_sa_free_multicast(mcast->mc);
if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@ -644,7 +671,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
be16_to_cpu(mcast->mcmember.mlid)); be16_to_cpu(mcast->mcmember.mlid));
if (ret) if (ret)
ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
} } else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
ipoib_dbg(priv, "leaving with no mcmember but not a "
"SENDONLY join\n");
return 0; return 0;
} }
@ -667,49 +696,37 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
} }
mcast = __ipoib_mcast_find(dev, mgid); mcast = __ipoib_mcast_find(dev, mgid);
if (!mcast) { if (!mcast || !mcast->ah) {
/* Let's create a new send only group now */
ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
mgid);
mcast = ipoib_mcast_alloc(dev, 0);
if (!mcast) { if (!mcast) {
ipoib_warn(priv, "unable to allocate memory for " /* Let's create a new send only group now */
"multicast structure\n"); ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
++dev->stats.tx_dropped; mgid);
dev_kfree_skb_any(skb);
goto out; mcast = ipoib_mcast_alloc(dev, 0);
if (!mcast) {
ipoib_warn(priv, "unable to allocate memory "
"for multicast structure\n");
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
goto unlock;
}
set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
memcpy(mcast->mcmember.mgid.raw, mgid,
sizeof (union ib_gid));
__ipoib_mcast_add(dev, mcast);
list_add_tail(&mcast->list, &priv->multicast_list);
} }
set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
__ipoib_mcast_add(dev, mcast);
list_add_tail(&mcast->list, &priv->multicast_list);
}
if (!mcast->ah) {
if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
skb_queue_tail(&mcast->pkt_queue, skb); skb_queue_tail(&mcast->pkt_queue, skb);
else { else {
++dev->stats.tx_dropped; ++dev->stats.tx_dropped;
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
} }
if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
ipoib_dbg_mcast(priv, "no address vector, " }
"but multicast join already started\n"); } else {
else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
ipoib_mcast_sendonly_join(mcast);
/*
* If lookup completes between here and out:, don't
* want to send packet twice.
*/
mcast = NULL;
}
out:
if (mcast && mcast->ah) {
struct ipoib_neigh *neigh; struct ipoib_neigh *neigh;
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
@ -759,9 +776,12 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
/* seperate between the wait to the leave*/ /*
* make sure the in-flight joins have finished before we attempt
* to leave
*/
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags)) if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done); wait_for_completion(&mcast->done);
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@ -792,9 +812,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
unsigned long flags; unsigned long flags;
struct ib_sa_mcmember_rec rec; struct ib_sa_mcmember_rec rec;
ipoib_dbg_mcast(priv, "restarting multicast task\n"); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
/*
* shortcut...on shutdown flush is called next, just
* let it do all the work
*/
return;
ipoib_mcast_stop_thread(dev, 0); ipoib_dbg_mcast(priv, "restarting multicast task\n");
local_irq_save(flags); local_irq_save(flags);
netif_addr_lock(dev); netif_addr_lock(dev);
@ -880,14 +905,27 @@ void ipoib_mcast_restart_task(struct work_struct *work)
netif_addr_unlock(dev); netif_addr_unlock(dev);
local_irq_restore(flags); local_irq_restore(flags);
/* We have to cancel outside of the spinlock */ /*
* make sure the in-flight joins have finished before we attempt
* to leave
*/
list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
ipoib_mcast_leave(mcast->dev, mcast); ipoib_mcast_leave(mcast->dev, mcast);
ipoib_mcast_free(mcast); ipoib_mcast_free(mcast);
} }
if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) /*
ipoib_mcast_start_thread(dev); * Double check that we are still up
*/
if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
spin_lock_irqsave(&priv->lock, flags);
__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
spin_unlock_irqrestore(&priv->lock, flags);
}
} }
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG

View File

@ -157,6 +157,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_pd; goto out_free_pd;
} }
/*
* the various IPoIB tasks assume they will never race against
* themselves, so always use a single thread workqueue
*/
priv->wq = create_singlethread_workqueue("ipoib_wq");
if (!priv->wq) {
printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
goto out_free_mr;
}
size = ipoib_recvq_size + 1; size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev); ret = ipoib_cm_dev_init(dev);
if (!ret) { if (!ret) {
@ -165,12 +175,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */ size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
else else
size += ipoib_recvq_size * ipoib_max_conn_qp; size += ipoib_recvq_size * ipoib_max_conn_qp;
} } else
goto out_free_wq;
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
if (IS_ERR(priv->recv_cq)) { if (IS_ERR(priv->recv_cq)) {
printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
goto out_free_mr; goto out_cm_dev_cleanup;
} }
priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL, priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
@ -216,15 +227,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->tx_wr.send_flags = IB_SEND_SIGNALED; priv->tx_wr.send_flags = IB_SEND_SIGNALED;
priv->rx_sge[0].lkey = priv->mr->lkey; priv->rx_sge[0].lkey = priv->mr->lkey;
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE; priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
priv->rx_sge[1].length = PAGE_SIZE; priv->rx_wr.num_sge = 1;
priv->rx_sge[1].lkey = priv->mr->lkey;
priv->rx_wr.num_sge = IPOIB_UD_RX_SG;
} else {
priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
priv->rx_wr.num_sge = 1;
}
priv->rx_wr.next = NULL; priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge; priv->rx_wr.sg_list = priv->rx_sge;
@ -236,12 +242,19 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
out_free_recv_cq: out_free_recv_cq:
ib_destroy_cq(priv->recv_cq); ib_destroy_cq(priv->recv_cq);
out_cm_dev_cleanup:
ipoib_cm_dev_cleanup(dev);
out_free_wq:
destroy_workqueue(priv->wq);
priv->wq = NULL;
out_free_mr: out_free_mr:
ib_dereg_mr(priv->mr); ib_dereg_mr(priv->mr);
ipoib_cm_dev_cleanup(dev);
out_free_pd: out_free_pd:
ib_dealloc_pd(priv->pd); ib_dealloc_pd(priv->pd);
return -ENODEV; return -ENODEV;
} }
@ -265,11 +278,18 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
ipoib_cm_dev_cleanup(dev); ipoib_cm_dev_cleanup(dev);
if (priv->wq) {
flush_workqueue(priv->wq);
destroy_workqueue(priv->wq);
priv->wq = NULL;
}
if (ib_dereg_mr(priv->mr)) if (ib_dereg_mr(priv->mr))
ipoib_warn(priv, "ib_dereg_mr failed\n"); ipoib_warn(priv, "ib_dereg_mr failed\n");
if (ib_dealloc_pd(priv->pd)) if (ib_dealloc_pd(priv->pd))
ipoib_warn(priv, "ib_dealloc_pd failed\n"); ipoib_warn(priv, "ib_dealloc_pd failed\n");
} }
void ipoib_event(struct ib_event_handler *handler, void ipoib_event(struct ib_event_handler *handler,

View File

@ -69,7 +69,7 @@
#define DRV_NAME "iser" #define DRV_NAME "iser"
#define PFX DRV_NAME ": " #define PFX DRV_NAME ": "
#define DRV_VER "1.5" #define DRV_VER "1.6"
#define iser_dbg(fmt, arg...) \ #define iser_dbg(fmt, arg...) \
do { \ do { \
@ -218,22 +218,21 @@ enum iser_data_dir {
/** /**
* struct iser_data_buf - iSER data buffer * struct iser_data_buf - iSER data buffer
* *
* @buf: pointer to the sg list * @sg: pointer to the sg list
* @size: num entries of this sg * @size: num entries of this sg
* @data_len: total beffer byte len * @data_len: total beffer byte len
* @dma_nents: returned by dma_map_sg * @dma_nents: returned by dma_map_sg
* @copy_buf: allocated copy buf for SGs unaligned * @orig_sg: pointer to the original sg list (in case
* for rdma which are copied * we used a copy)
* @sg_single: SG-ified clone of a non SG SC or * @orig_size: num entris of orig sg list
* unaligned SG
*/ */
struct iser_data_buf { struct iser_data_buf {
void *buf; struct scatterlist *sg;
unsigned int size; unsigned int size;
unsigned long data_len; unsigned long data_len;
unsigned int dma_nents; unsigned int dma_nents;
char *copy_buf; struct scatterlist *orig_sg;
struct scatterlist sg_single; unsigned int orig_size;
}; };
/* fwd declarations */ /* fwd declarations */
@ -244,35 +243,14 @@ struct iscsi_endpoint;
/** /**
* struct iser_mem_reg - iSER memory registration info * struct iser_mem_reg - iSER memory registration info
* *
* @lkey: MR local key * @sge: memory region sg element
* @rkey: MR remote key * @rkey: memory region remote key
* @va: MR start address (buffer va)
* @len: MR length
* @mem_h: pointer to registration context (FMR/Fastreg) * @mem_h: pointer to registration context (FMR/Fastreg)
*/ */
struct iser_mem_reg { struct iser_mem_reg {
u32 lkey; struct ib_sge sge;
u32 rkey; u32 rkey;
u64 va; void *mem_h;
u64 len;
void *mem_h;
};
/**
* struct iser_regd_buf - iSER buffer registration desc
*
* @reg: memory registration info
* @virt_addr: virtual address of buffer
* @device: reference to iser device
* @direction: dma direction (for dma_unmap)
* @data_size: data buffer size in bytes
*/
struct iser_regd_buf {
struct iser_mem_reg reg;
void *virt_addr;
struct iser_device *device;
enum dma_data_direction direction;
unsigned int data_size;
}; };
enum iser_desc_type { enum iser_desc_type {
@ -534,11 +512,9 @@ struct iser_conn {
* @sc: link to scsi command * @sc: link to scsi command
* @command_sent: indicate if command was sent * @command_sent: indicate if command was sent
* @dir: iser data direction * @dir: iser data direction
* @rdma_regd: task rdma registration desc * @rdma_reg: task rdma registration desc
* @data: iser data buffer desc * @data: iser data buffer desc
* @data_copy: iser data copy buffer desc (bounce buffer)
* @prot: iser protection buffer desc * @prot: iser protection buffer desc
* @prot_copy: iser protection copy buffer desc (bounce buffer)
*/ */
struct iscsi_iser_task { struct iscsi_iser_task {
struct iser_tx_desc desc; struct iser_tx_desc desc;
@ -547,11 +523,9 @@ struct iscsi_iser_task {
struct scsi_cmnd *sc; struct scsi_cmnd *sc;
int command_sent; int command_sent;
int dir[ISER_DIRS_NUM]; int dir[ISER_DIRS_NUM];
struct iser_regd_buf rdma_regd[ISER_DIRS_NUM]; struct iser_mem_reg rdma_reg[ISER_DIRS_NUM];
struct iser_data_buf data[ISER_DIRS_NUM]; struct iser_data_buf data[ISER_DIRS_NUM];
struct iser_data_buf data_copy[ISER_DIRS_NUM];
struct iser_data_buf prot[ISER_DIRS_NUM]; struct iser_data_buf prot[ISER_DIRS_NUM];
struct iser_data_buf prot_copy[ISER_DIRS_NUM];
}; };
struct iser_page_vec { struct iser_page_vec {
@ -621,7 +595,6 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn);
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem, struct iser_data_buf *mem,
struct iser_data_buf *mem_copy,
enum iser_data_dir cmd_dir); enum iser_data_dir cmd_dir);
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
@ -634,10 +607,6 @@ int iser_connect(struct iser_conn *iser_conn,
struct sockaddr *dst_addr, struct sockaddr *dst_addr,
int non_blocking); int non_blocking);
int iser_reg_page_vec(struct ib_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg);
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir); enum iser_data_dir cmd_dir);
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
@ -667,4 +636,9 @@ int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
void iser_free_fastreg_pool(struct ib_conn *ib_conn); void iser_free_fastreg_pool(struct ib_conn *ib_conn);
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector); enum iser_data_dir cmd_dir, sector_t *sector);
struct fast_reg_descriptor *
iser_reg_desc_get(struct ib_conn *ib_conn);
void
iser_reg_desc_put(struct ib_conn *ib_conn,
struct fast_reg_descriptor *desc);
#endif #endif

View File

@ -50,7 +50,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
{ {
struct iscsi_iser_task *iser_task = task->dd_data; struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_regd_buf *regd_buf; struct iser_mem_reg *mem_reg;
int err; int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header; struct iser_hdr *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
@ -78,15 +78,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
iser_err("Failed to set up Data-IN RDMA\n"); iser_err("Failed to set up Data-IN RDMA\n");
return err; return err;
} }
regd_buf = &iser_task->rdma_regd[ISER_DIR_IN]; mem_reg = &iser_task->rdma_reg[ISER_DIR_IN];
hdr->flags |= ISER_RSV; hdr->flags |= ISER_RSV;
hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey); hdr->read_stag = cpu_to_be32(mem_reg->rkey);
hdr->read_va = cpu_to_be64(regd_buf->reg.va); hdr->read_va = cpu_to_be64(mem_reg->sge.addr);
iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
task->itt, regd_buf->reg.rkey, task->itt, mem_reg->rkey,
(unsigned long long)regd_buf->reg.va); (unsigned long long)mem_reg->sge.addr);
return 0; return 0;
} }
@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
{ {
struct iscsi_iser_task *iser_task = task->dd_data; struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_regd_buf *regd_buf; struct iser_mem_reg *mem_reg;
int err; int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header; struct iser_hdr *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
@ -134,25 +134,25 @@ iser_prepare_write_cmd(struct iscsi_task *task,
return err; return err;
} }
regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
if (unsol_sz < edtl) { if (unsol_sz < edtl) {
hdr->flags |= ISER_WSV; hdr->flags |= ISER_WSV;
hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey); hdr->write_stag = cpu_to_be32(mem_reg->rkey);
hdr->write_va = cpu_to_be64(regd_buf->reg.va + unsol_sz); hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
"VA:%#llX + unsol:%d\n", "VA:%#llX + unsol:%d\n",
task->itt, regd_buf->reg.rkey, task->itt, mem_reg->rkey,
(unsigned long long)regd_buf->reg.va, unsol_sz); (unsigned long long)mem_reg->sge.addr, unsol_sz);
} }
if (imm_sz > 0) { if (imm_sz > 0) {
iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
task->itt, imm_sz); task->itt, imm_sz);
tx_dsg->addr = regd_buf->reg.va; tx_dsg->addr = mem_reg->sge.addr;
tx_dsg->length = imm_sz; tx_dsg->length = imm_sz;
tx_dsg->lkey = regd_buf->reg.lkey; tx_dsg->lkey = mem_reg->sge.lkey;
iser_task->desc.num_sge = 2; iser_task->desc.num_sge = 2;
} }
@ -401,16 +401,16 @@ int iser_send_command(struct iscsi_conn *conn,
} }
if (scsi_sg_count(sc)) { /* using a scatter list */ if (scsi_sg_count(sc)) { /* using a scatter list */
data_buf->buf = scsi_sglist(sc); data_buf->sg = scsi_sglist(sc);
data_buf->size = scsi_sg_count(sc); data_buf->size = scsi_sg_count(sc);
} }
data_buf->data_len = scsi_bufflen(sc); data_buf->data_len = scsi_bufflen(sc);
if (scsi_prot_sg_count(sc)) { if (scsi_prot_sg_count(sc)) {
prot_buf->buf = scsi_prot_sglist(sc); prot_buf->sg = scsi_prot_sglist(sc);
prot_buf->size = scsi_prot_sg_count(sc); prot_buf->size = scsi_prot_sg_count(sc);
prot_buf->data_len = data_buf->data_len >> prot_buf->data_len = (data_buf->data_len >>
ilog2(sc->device->sector_size) * 8; ilog2(sc->device->sector_size)) * 8;
} }
if (hdr->flags & ISCSI_FLAG_CMD_READ) { if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@ -450,7 +450,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
struct iser_conn *iser_conn = conn->dd_data; struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data; struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *tx_desc = NULL; struct iser_tx_desc *tx_desc = NULL;
struct iser_regd_buf *regd_buf; struct iser_mem_reg *mem_reg;
unsigned long buf_offset; unsigned long buf_offset;
unsigned long data_seg_len; unsigned long data_seg_len;
uint32_t itt; uint32_t itt;
@ -477,11 +477,11 @@ int iser_send_data_out(struct iscsi_conn *conn,
/* build the tx desc */ /* build the tx desc */
iser_initialize_task_headers(task, tx_desc); iser_initialize_task_headers(task, tx_desc);
regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
tx_dsg = &tx_desc->tx_sg[1]; tx_dsg = &tx_desc->tx_sg[1];
tx_dsg->addr = regd_buf->reg.va + buf_offset; tx_dsg->addr = mem_reg->sge.addr + buf_offset;
tx_dsg->length = data_seg_len; tx_dsg->length = data_seg_len;
tx_dsg->lkey = regd_buf->reg.lkey; tx_dsg->lkey = mem_reg->sge.lkey;
tx_desc->num_sge = 2; tx_desc->num_sge = 2;
if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
@ -658,10 +658,10 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
iser_task->prot[ISER_DIR_IN].data_len = 0; iser_task->prot[ISER_DIR_IN].data_len = 0;
iser_task->prot[ISER_DIR_OUT].data_len = 0; iser_task->prot[ISER_DIR_OUT].data_len = 0;
memset(&iser_task->rdma_regd[ISER_DIR_IN], 0, memset(&iser_task->rdma_reg[ISER_DIR_IN], 0,
sizeof(struct iser_regd_buf)); sizeof(struct iser_mem_reg));
memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0,
sizeof(struct iser_regd_buf)); sizeof(struct iser_mem_reg));
} }
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
@ -674,35 +674,31 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
/* if we were reading, copy back to unaligned sglist, /* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy * anyway dma_unmap and free the copy
*/ */
if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { if (iser_task->data[ISER_DIR_IN].orig_sg) {
is_rdma_data_aligned = 0; is_rdma_data_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->data[ISER_DIR_IN], &iser_task->data[ISER_DIR_IN],
&iser_task->data_copy[ISER_DIR_IN],
ISER_DIR_IN); ISER_DIR_IN);
} }
if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { if (iser_task->data[ISER_DIR_OUT].orig_sg) {
is_rdma_data_aligned = 0; is_rdma_data_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->data[ISER_DIR_OUT], &iser_task->data[ISER_DIR_OUT],
&iser_task->data_copy[ISER_DIR_OUT],
ISER_DIR_OUT); ISER_DIR_OUT);
} }
if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) { if (iser_task->prot[ISER_DIR_IN].orig_sg) {
is_rdma_prot_aligned = 0; is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_IN], &iser_task->prot[ISER_DIR_IN],
&iser_task->prot_copy[ISER_DIR_IN],
ISER_DIR_IN); ISER_DIR_IN);
} }
if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) { if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
is_rdma_prot_aligned = 0; is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_OUT], &iser_task->prot[ISER_DIR_OUT],
&iser_task->prot_copy[ISER_DIR_OUT],
ISER_DIR_OUT); ISER_DIR_OUT);
} }

View File

@ -39,68 +39,173 @@
#include "iscsi_iser.h" #include "iscsi_iser.h"
#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ static void
iser_free_bounce_sg(struct iser_data_buf *data)
{
struct scatterlist *sg;
int count;
for_each_sg(data->sg, sg, data->size, count)
__free_page(sg_page(sg));
kfree(data->sg);
data->sg = data->orig_sg;
data->size = data->orig_size;
data->orig_sg = NULL;
data->orig_size = 0;
}
static int
iser_alloc_bounce_sg(struct iser_data_buf *data)
{
struct scatterlist *sg;
struct page *page;
unsigned long length = data->data_len;
int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
if (!sg)
goto err;
sg_init_table(sg, nents);
while (length) {
u32 page_len = min_t(u32, length, PAGE_SIZE);
page = alloc_page(GFP_ATOMIC);
if (!page)
goto err;
sg_set_page(&sg[i], page, page_len, 0);
length -= page_len;
i++;
}
data->orig_sg = data->sg;
data->orig_size = data->size;
data->sg = sg;
data->size = nents;
return 0;
err:
for (; i > 0; i--)
__free_page(sg_page(&sg[i - 1]));
kfree(sg);
return -ENOMEM;
}
static void
iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
{
struct scatterlist *osg, *bsg = data->sg;
void *oaddr, *baddr;
unsigned int left = data->data_len;
unsigned int bsg_off = 0;
int i;
for_each_sg(data->orig_sg, osg, data->orig_size, i) {
unsigned int copy_len, osg_off = 0;
oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
copy_len = min(left, osg->length);
while (copy_len) {
unsigned int len = min(copy_len, bsg->length - bsg_off);
baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
if (to_buffer)
memcpy(baddr + bsg_off, oaddr + osg_off, len);
else
memcpy(oaddr + osg_off, baddr + bsg_off, len);
kunmap_atomic(baddr - bsg->offset);
osg_off += len;
bsg_off += len;
copy_len -= len;
if (bsg_off >= bsg->length) {
bsg = sg_next(bsg);
bsg_off = 0;
}
}
kunmap_atomic(oaddr - osg->offset);
left -= osg_off;
}
}
static inline void
iser_copy_from_bounce(struct iser_data_buf *data)
{
iser_copy_bounce(data, false);
}
static inline void
iser_copy_to_bounce(struct iser_data_buf *data)
{
iser_copy_bounce(data, true);
}
struct fast_reg_descriptor *
iser_reg_desc_get(struct ib_conn *ib_conn)
{
struct fast_reg_descriptor *desc;
unsigned long flags;
spin_lock_irqsave(&ib_conn->lock, flags);
desc = list_first_entry(&ib_conn->fastreg.pool,
struct fast_reg_descriptor, list);
list_del(&desc->list);
spin_unlock_irqrestore(&ib_conn->lock, flags);
return desc;
}
void
iser_reg_desc_put(struct ib_conn *ib_conn,
struct fast_reg_descriptor *desc)
{
unsigned long flags;
spin_lock_irqsave(&ib_conn->lock, flags);
list_add(&desc->list, &ib_conn->fastreg.pool);
spin_unlock_irqrestore(&ib_conn->lock, flags);
}
/** /**
* iser_start_rdma_unaligned_sg * iser_start_rdma_unaligned_sg
*/ */
static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data, struct iser_data_buf *data,
struct iser_data_buf *data_copy,
enum iser_data_dir cmd_dir) enum iser_data_dir cmd_dir)
{ {
struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device; struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
struct scatterlist *sgl = (struct scatterlist *)data->buf; int rc;
struct scatterlist *sg;
char *mem = NULL;
unsigned long cmd_data_len = 0;
int dma_nents, i;
for_each_sg(sgl, sg, data->size, i) rc = iser_alloc_bounce_sg(data);
cmd_data_len += ib_sg_dma_len(dev, sg); if (rc) {
iser_err("Failed to allocate bounce for data len %lu\n",
if (cmd_data_len > ISER_KMALLOC_THRESHOLD) data->data_len);
mem = (void *)__get_free_pages(GFP_ATOMIC, return rc;
ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
else
mem = kmalloc(cmd_data_len, GFP_ATOMIC);
if (mem == NULL) {
iser_err("Failed to allocate mem size %d %d for copying sglist\n",
data->size, (int)cmd_data_len);
return -ENOMEM;
} }
if (cmd_dir == ISER_DIR_OUT) { if (cmd_dir == ISER_DIR_OUT)
/* copy the unaligned sg the buffer which is used for RDMA */ iser_copy_to_bounce(data);
char *p, *from;
sgl = (struct scatterlist *)data->buf; data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
p = mem; (cmd_dir == ISER_DIR_OUT) ?
for_each_sg(sgl, sg, data->size, i) { DMA_TO_DEVICE : DMA_FROM_DEVICE);
from = kmap_atomic(sg_page(sg)); if (!data->dma_nents) {
memcpy(p, iser_err("Got dma_nents %d, something went wrong...\n",
from + sg->offset, data->dma_nents);
sg->length); rc = -ENOMEM;
kunmap_atomic(from); goto err;
p += sg->length;
}
} }
sg_init_one(&data_copy->sg_single, mem, cmd_data_len);
data_copy->buf = &data_copy->sg_single;
data_copy->size = 1;
data_copy->copy_buf = mem;
dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1,
(cmd_dir == ISER_DIR_OUT) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE);
BUG_ON(dma_nents == 0);
data_copy->dma_nents = dma_nents;
data_copy->data_len = cmd_data_len;
return 0; return 0;
err:
iser_free_bounce_sg(data);
return rc;
} }
/** /**
@ -109,51 +214,18 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data, struct iser_data_buf *data,
struct iser_data_buf *data_copy,
enum iser_data_dir cmd_dir) enum iser_data_dir cmd_dir)
{ {
struct ib_device *dev; struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
unsigned long cmd_data_len;
dev = iser_task->iser_conn->ib_conn.device->ib_device; ib_dma_unmap_sg(dev, data->sg, data->size,
ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,
(cmd_dir == ISER_DIR_OUT) ? (cmd_dir == ISER_DIR_OUT) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE); DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (cmd_dir == ISER_DIR_IN) { if (cmd_dir == ISER_DIR_IN)
char *mem; iser_copy_from_bounce(data);
struct scatterlist *sgl, *sg;
unsigned char *p, *to;
unsigned int sg_size;
int i;
/* copy back read RDMA to unaligned sg */ iser_free_bounce_sg(data);
mem = data_copy->copy_buf;
sgl = (struct scatterlist *)data->buf;
sg_size = data->size;
p = mem;
for_each_sg(sgl, sg, sg_size, i) {
to = kmap_atomic(sg_page(sg));
memcpy(to + sg->offset,
p,
sg->length);
kunmap_atomic(to);
p += sg->length;
}
}
cmd_data_len = data->data_len;
if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
free_pages((unsigned long)data_copy->copy_buf,
ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
else
kfree(data_copy->copy_buf);
data_copy->copy_buf = NULL;
} }
#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) #define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
@ -175,7 +247,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
struct ib_device *ibdev, u64 *pages, struct ib_device *ibdev, u64 *pages,
int *offset, int *data_size) int *offset, int *data_size)
{ {
struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf; struct scatterlist *sg, *sgl = data->sg;
u64 start_addr, end_addr, page, chunk_start = 0; u64 start_addr, end_addr, page, chunk_start = 0;
unsigned long total_sz = 0; unsigned long total_sz = 0;
unsigned int dma_len; unsigned int dma_len;
@ -227,14 +299,14 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
static int iser_data_buf_aligned_len(struct iser_data_buf *data, static int iser_data_buf_aligned_len(struct iser_data_buf *data,
struct ib_device *ibdev) struct ib_device *ibdev)
{ {
struct scatterlist *sgl, *sg, *next_sg = NULL; struct scatterlist *sg, *sgl, *next_sg = NULL;
u64 start_addr, end_addr; u64 start_addr, end_addr;
int i, ret_len, start_check = 0; int i, ret_len, start_check = 0;
if (data->dma_nents == 1) if (data->dma_nents == 1)
return 1; return 1;
sgl = (struct scatterlist *)data->buf; sgl = data->sg;
start_addr = ib_sg_dma_address(ibdev, sgl); start_addr = ib_sg_dma_address(ibdev, sgl);
for_each_sg(sgl, sg, data->dma_nents, i) { for_each_sg(sgl, sg, data->dma_nents, i) {
@ -266,11 +338,10 @@ static int iser_data_buf_aligned_len(struct iser_data_buf *data,
static void iser_data_buf_dump(struct iser_data_buf *data, static void iser_data_buf_dump(struct iser_data_buf *data,
struct ib_device *ibdev) struct ib_device *ibdev)
{ {
struct scatterlist *sgl = (struct scatterlist *)data->buf;
struct scatterlist *sg; struct scatterlist *sg;
int i; int i;
for_each_sg(sgl, sg, data->dma_nents, i) for_each_sg(data->sg, sg, data->dma_nents, i)
iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p " iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
"off:0x%x sz:0x%x dma_len:0x%x\n", "off:0x%x sz:0x%x dma_len:0x%x\n",
i, (unsigned long)ib_sg_dma_address(ibdev, sg), i, (unsigned long)ib_sg_dma_address(ibdev, sg),
@ -288,31 +359,6 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]); iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
} }
static void iser_page_vec_build(struct iser_data_buf *data,
struct iser_page_vec *page_vec,
struct ib_device *ibdev)
{
int page_vec_len = 0;
page_vec->length = 0;
page_vec->offset = 0;
iser_dbg("Translating sg sz: %d\n", data->dma_nents);
page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
&page_vec->offset,
&page_vec->data_size);
iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
page_vec->length = page_vec_len;
if (page_vec_len * SIZE_4K < page_vec->data_size) {
iser_err("page_vec too short to hold this SG\n");
iser_data_buf_dump(data, ibdev);
iser_dump_page_vec(page_vec);
BUG();
}
}
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data, struct iser_data_buf *data,
enum iser_data_dir iser_dir, enum iser_data_dir iser_dir,
@ -323,7 +369,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
iser_task->dir[iser_dir] = 1; iser_task->dir[iser_dir] = 1;
dev = iser_task->iser_conn->ib_conn.device->ib_device; dev = iser_task->iser_conn->ib_conn.device->ib_device;
data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir); data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
if (data->dma_nents == 0) { if (data->dma_nents == 0) {
iser_err("dma_map_sg failed!!!\n"); iser_err("dma_map_sg failed!!!\n");
return -EINVAL; return -EINVAL;
@ -338,24 +384,41 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
struct ib_device *dev; struct ib_device *dev;
dev = iser_task->iser_conn->ib_conn.device->ib_device; dev = iser_task->iser_conn->ib_conn.device->ib_device;
ib_dma_unmap_sg(dev, data->buf, data->size, dir); ib_dma_unmap_sg(dev, data->sg, data->size, dir);
}
static int
iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
struct iser_mem_reg *reg)
{
struct scatterlist *sg = mem->sg;
reg->sge.lkey = device->mr->lkey;
reg->rkey = device->mr->rkey;
reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
" length=0x%x\n", reg->sge.lkey, reg->rkey,
reg->sge.addr, reg->sge.length);
return 0;
} }
static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
struct ib_device *ibdev,
struct iser_data_buf *mem, struct iser_data_buf *mem,
struct iser_data_buf *mem_copy,
enum iser_data_dir cmd_dir, enum iser_data_dir cmd_dir,
int aligned_len) int aligned_len)
{ {
struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
struct iser_device *device = iser_task->iser_conn->ib_conn.device;
iscsi_conn->fmr_unalign_cnt++; iscsi_conn->fmr_unalign_cnt++;
iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
aligned_len, mem->size); aligned_len, mem->size);
if (iser_debug_level > 0) if (iser_debug_level > 0)
iser_data_buf_dump(mem, ibdev); iser_data_buf_dump(mem, device->ib_device);
/* unmap the command data before accessing it */ /* unmap the command data before accessing it */
iser_dma_unmap_task_data(iser_task, mem, iser_dma_unmap_task_data(iser_task, mem,
@ -364,12 +427,94 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
/* allocate copy buf, if we are writing, copy the */ /* allocate copy buf, if we are writing, copy the */
/* unaligned scatterlist, dma map the copy */ /* unaligned scatterlist, dma map the copy */
if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0) if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
return -ENOMEM; return -ENOMEM;
return 0; return 0;
} }
/**
* iser_reg_page_vec - Register physical memory
*
* returns: 0 on success, errno code on failure
*/
static
int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg)
{
struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_pool_fmr *fmr;
int ret, plen;
plen = iser_sg_to_page_vec(mem, device->ib_device,
page_vec->pages,
&page_vec->offset,
&page_vec->data_size);
page_vec->length = plen;
if (plen * SIZE_4K < page_vec->data_size) {
iser_err("page vec too short to hold this SG\n");
iser_data_buf_dump(mem, device->ib_device);
iser_dump_page_vec(page_vec);
return -EINVAL;
}
fmr = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
page_vec->pages,
page_vec->length,
page_vec->pages[0]);
if (IS_ERR(fmr)) {
ret = PTR_ERR(fmr);
iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
return ret;
}
mem_reg->sge.lkey = fmr->fmr->lkey;
mem_reg->rkey = fmr->fmr->rkey;
mem_reg->sge.addr = page_vec->pages[0] + page_vec->offset;
mem_reg->sge.length = page_vec->data_size;
mem_reg->mem_h = fmr;
return 0;
}
/**
* Unregister (previosuly registered using FMR) memory.
* If memory is non-FMR does nothing.
*/
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
int ret;
if (!reg->mem_h)
return;
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
if (ret)
iser_err("ib_fmr_pool_unmap failed %d\n", ret);
reg->mem_h = NULL;
}
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
if (!reg->mem_h)
return;
iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
reg->mem_h);
reg->mem_h = NULL;
}
/** /**
* iser_reg_rdma_mem_fmr - Registers memory intended for RDMA, * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
* using FMR (if possible) obtaining rkey and va * using FMR (if possible) obtaining rkey and va
@ -383,45 +528,29 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
struct iser_device *device = ib_conn->device; struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device; struct ib_device *ibdev = device->ib_device;
struct iser_data_buf *mem = &iser_task->data[cmd_dir]; struct iser_data_buf *mem = &iser_task->data[cmd_dir];
struct iser_regd_buf *regd_buf; struct iser_mem_reg *mem_reg;
int aligned_len; int aligned_len;
int err; int err;
int i; int i;
struct scatterlist *sg;
regd_buf = &iser_task->rdma_regd[cmd_dir]; mem_reg = &iser_task->rdma_reg[cmd_dir];
aligned_len = iser_data_buf_aligned_len(mem, ibdev); aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents) { if (aligned_len != mem->dma_nents) {
err = fall_to_bounce_buf(iser_task, ibdev, mem, err = fall_to_bounce_buf(iser_task, mem,
&iser_task->data_copy[cmd_dir],
cmd_dir, aligned_len); cmd_dir, aligned_len);
if (err) { if (err) {
iser_err("failed to allocate bounce buffer\n"); iser_err("failed to allocate bounce buffer\n");
return err; return err;
} }
mem = &iser_task->data_copy[cmd_dir];
} }
/* if there a single dma entry, FMR is not needed */ /* if there a single dma entry, FMR is not needed */
if (mem->dma_nents == 1) { if (mem->dma_nents == 1) {
sg = (struct scatterlist *)mem->buf; return iser_reg_dma(device, mem, mem_reg);
regd_buf->reg.lkey = device->mr->lkey;
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
"va: 0x%08lX sz: %ld]\n",
(unsigned int)regd_buf->reg.lkey,
(unsigned int)regd_buf->reg.rkey,
(unsigned long)regd_buf->reg.va,
(unsigned long)regd_buf->reg.len);
} else { /* use FMR for multiple dma entries */ } else { /* use FMR for multiple dma entries */
iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev); err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec,
err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec, mem_reg);
&regd_buf->reg);
if (err && err != -EAGAIN) { if (err && err != -EAGAIN) {
iser_data_buf_dump(mem, ibdev); iser_data_buf_dump(mem, ibdev);
iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
@ -519,8 +648,10 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
static int static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task, iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
struct fast_reg_descriptor *desc, struct ib_sge *data_sge, struct fast_reg_descriptor *desc,
struct ib_sge *prot_sge, struct ib_sge *sig_sge) struct iser_mem_reg *data_reg,
struct iser_mem_reg *prot_reg,
struct iser_mem_reg *sig_reg)
{ {
struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_pi_context *pi_ctx = desc->pi_ctx; struct iser_pi_context *pi_ctx = desc->pi_ctx;
@ -544,12 +675,12 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
memset(&sig_wr, 0, sizeof(sig_wr)); memset(&sig_wr, 0, sizeof(sig_wr));
sig_wr.opcode = IB_WR_REG_SIG_MR; sig_wr.opcode = IB_WR_REG_SIG_MR;
sig_wr.wr_id = ISER_FASTREG_LI_WRID; sig_wr.wr_id = ISER_FASTREG_LI_WRID;
sig_wr.sg_list = data_sge; sig_wr.sg_list = &data_reg->sge;
sig_wr.num_sge = 1; sig_wr.num_sge = 1;
sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
if (scsi_prot_sg_count(iser_task->sc)) if (scsi_prot_sg_count(iser_task->sc))
sig_wr.wr.sig_handover.prot = prot_sge; sig_wr.wr.sig_handover.prot = &prot_reg->sge;
sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE | sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE; IB_ACCESS_REMOTE_WRITE;
@ -566,27 +697,26 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
} }
desc->reg_indicators &= ~ISER_SIG_KEY_VALID; desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
sig_sge->lkey = pi_ctx->sig_mr->lkey; sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
sig_sge->addr = 0; sig_reg->rkey = pi_ctx->sig_mr->rkey;
sig_sge->length = scsi_transfer_length(iser_task->sc); sig_reg->sge.addr = 0;
sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
iser_dbg("sig_sge: addr: 0x%llx length: %u lkey: 0x%x\n", iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
sig_sge->addr, sig_sge->length, sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
sig_sge->lkey); sig_reg->sge.length);
err: err:
return ret; return ret;
} }
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_regd_buf *regd_buf,
struct iser_data_buf *mem, struct iser_data_buf *mem,
struct fast_reg_descriptor *desc,
enum iser_reg_indicator ind, enum iser_reg_indicator ind,
struct ib_sge *sge) struct iser_mem_reg *reg)
{ {
struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device; struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct ib_mr *mr; struct ib_mr *mr;
struct ib_fast_reg_page_list *frpl; struct ib_fast_reg_page_list *frpl;
struct ib_send_wr fastreg_wr, inv_wr; struct ib_send_wr fastreg_wr, inv_wr;
@ -594,17 +724,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
int ret, offset, size, plen; int ret, offset, size, plen;
/* if there a single dma entry, dma mr suffices */ /* if there a single dma entry, dma mr suffices */
if (mem->dma_nents == 1) { if (mem->dma_nents == 1)
struct scatterlist *sg = (struct scatterlist *)mem->buf; return iser_reg_dma(device, mem, reg);
sge->lkey = device->mr->lkey;
sge->addr = ib_sg_dma_address(ibdev, &sg[0]);
sge->length = ib_sg_dma_len(ibdev, &sg[0]);
iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
sge->lkey, sge->addr, sge->length);
return 0;
}
if (ind == ISER_DATA_KEY_VALID) { if (ind == ISER_DATA_KEY_VALID) {
mr = desc->data_mr; mr = desc->data_mr;
@ -652,9 +773,10 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
} }
desc->reg_indicators &= ~ind; desc->reg_indicators &= ~ind;
sge->lkey = mr->lkey; reg->sge.lkey = mr->lkey;
sge->addr = frpl->page_list[0] + offset; reg->rkey = mr->rkey;
sge->length = size; reg->sge.addr = frpl->page_list[0] + offset;
reg->sge.length = size;
return ret; return ret;
} }
@ -672,93 +794,66 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
struct iser_device *device = ib_conn->device; struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device; struct ib_device *ibdev = device->ib_device;
struct iser_data_buf *mem = &iser_task->data[cmd_dir]; struct iser_data_buf *mem = &iser_task->data[cmd_dir];
struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir]; struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
struct fast_reg_descriptor *desc = NULL; struct fast_reg_descriptor *desc = NULL;
struct ib_sge data_sge;
int err, aligned_len; int err, aligned_len;
unsigned long flags;
aligned_len = iser_data_buf_aligned_len(mem, ibdev); aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents) { if (aligned_len != mem->dma_nents) {
err = fall_to_bounce_buf(iser_task, ibdev, mem, err = fall_to_bounce_buf(iser_task, mem,
&iser_task->data_copy[cmd_dir],
cmd_dir, aligned_len); cmd_dir, aligned_len);
if (err) { if (err) {
iser_err("failed to allocate bounce buffer\n"); iser_err("failed to allocate bounce buffer\n");
return err; return err;
} }
mem = &iser_task->data_copy[cmd_dir];
} }
if (mem->dma_nents != 1 || if (mem->dma_nents != 1 ||
scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
spin_lock_irqsave(&ib_conn->lock, flags); desc = iser_reg_desc_get(ib_conn);
desc = list_first_entry(&ib_conn->fastreg.pool, mem_reg->mem_h = desc;
struct fast_reg_descriptor, list);
list_del(&desc->list);
spin_unlock_irqrestore(&ib_conn->lock, flags);
regd_buf->reg.mem_h = desc;
} }
err = iser_fast_reg_mr(iser_task, regd_buf, mem, err = iser_fast_reg_mr(iser_task, mem, desc,
ISER_DATA_KEY_VALID, &data_sge); ISER_DATA_KEY_VALID, mem_reg);
if (err) if (err)
goto err_reg; goto err_reg;
if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
struct ib_sge prot_sge, sig_sge; struct iser_mem_reg prot_reg;
memset(&prot_sge, 0, sizeof(prot_sge)); memset(&prot_reg, 0, sizeof(prot_reg));
if (scsi_prot_sg_count(iser_task->sc)) { if (scsi_prot_sg_count(iser_task->sc)) {
mem = &iser_task->prot[cmd_dir]; mem = &iser_task->prot[cmd_dir];
aligned_len = iser_data_buf_aligned_len(mem, ibdev); aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents) { if (aligned_len != mem->dma_nents) {
err = fall_to_bounce_buf(iser_task, ibdev, mem, err = fall_to_bounce_buf(iser_task, mem,
&iser_task->prot_copy[cmd_dir],
cmd_dir, aligned_len); cmd_dir, aligned_len);
if (err) { if (err) {
iser_err("failed to allocate bounce buffer\n"); iser_err("failed to allocate bounce buffer\n");
return err; return err;
} }
mem = &iser_task->prot_copy[cmd_dir];
} }
err = iser_fast_reg_mr(iser_task, regd_buf, mem, err = iser_fast_reg_mr(iser_task, mem, desc,
ISER_PROT_KEY_VALID, &prot_sge); ISER_PROT_KEY_VALID, &prot_reg);
if (err) if (err)
goto err_reg; goto err_reg;
} }
err = iser_reg_sig_mr(iser_task, desc, &data_sge, err = iser_reg_sig_mr(iser_task, desc, mem_reg,
&prot_sge, &sig_sge); &prot_reg, mem_reg);
if (err) { if (err) {
iser_err("Failed to register signature mr\n"); iser_err("Failed to register signature mr\n");
return err; return err;
} }
desc->reg_indicators |= ISER_FASTREG_PROTECTED; desc->reg_indicators |= ISER_FASTREG_PROTECTED;
regd_buf->reg.lkey = sig_sge.lkey;
regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
regd_buf->reg.va = sig_sge.addr;
regd_buf->reg.len = sig_sge.length;
} else {
if (desc)
regd_buf->reg.rkey = desc->data_mr->rkey;
else
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.lkey = data_sge.lkey;
regd_buf->reg.va = data_sge.addr;
regd_buf->reg.len = data_sge.length;
} }
return 0; return 0;
err_reg: err_reg:
if (desc) { if (desc)
spin_lock_irqsave(&ib_conn->lock, flags); iser_reg_desc_put(ib_conn, desc);
list_add_tail(&desc->list, &ib_conn->fastreg.pool);
spin_unlock_irqrestore(&ib_conn->lock, flags);
}
return err; return err;
} }

View File

@ -273,6 +273,65 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
ib_conn->fmr.page_vec = NULL; ib_conn->fmr.page_vec = NULL;
} }
static int
iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd,
struct fast_reg_descriptor *desc)
{
struct iser_pi_context *pi_ctx = NULL;
struct ib_mr_init_attr mr_init_attr = {.max_reg_descriptors = 2,
.flags = IB_MR_SIGNATURE_EN};
int ret = 0;
desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
if (!desc->pi_ctx)
return -ENOMEM;
pi_ctx = desc->pi_ctx;
pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(pi_ctx->prot_frpl)) {
ret = PTR_ERR(pi_ctx->prot_frpl);
goto prot_frpl_failure;
}
pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
ISCSI_ISER_SG_TABLESIZE + 1);
if (IS_ERR(pi_ctx->prot_mr)) {
ret = PTR_ERR(pi_ctx->prot_mr);
goto prot_mr_failure;
}
desc->reg_indicators |= ISER_PROT_KEY_VALID;
pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
if (IS_ERR(pi_ctx->sig_mr)) {
ret = PTR_ERR(pi_ctx->sig_mr);
goto sig_mr_failure;
}
desc->reg_indicators |= ISER_SIG_KEY_VALID;
desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
return 0;
sig_mr_failure:
ib_dereg_mr(desc->pi_ctx->prot_mr);
prot_mr_failure:
ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
prot_frpl_failure:
kfree(desc->pi_ctx);
return ret;
}
static void
iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
{
ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
ib_dereg_mr(pi_ctx->prot_mr);
ib_destroy_mr(pi_ctx->sig_mr);
kfree(pi_ctx);
}
static int static int
iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
bool pi_enable, struct fast_reg_descriptor *desc) bool pi_enable, struct fast_reg_descriptor *desc)
@ -297,59 +356,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
desc->reg_indicators |= ISER_DATA_KEY_VALID; desc->reg_indicators |= ISER_DATA_KEY_VALID;
if (pi_enable) { if (pi_enable) {
struct ib_mr_init_attr mr_init_attr = {0}; ret = iser_alloc_pi_ctx(ib_device, pd, desc);
struct iser_pi_context *pi_ctx = NULL; if (ret)
desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
if (!desc->pi_ctx) {
iser_err("Failed to allocate pi context\n");
ret = -ENOMEM;
goto pi_ctx_alloc_failure; goto pi_ctx_alloc_failure;
}
pi_ctx = desc->pi_ctx;
pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(pi_ctx->prot_frpl)) {
ret = PTR_ERR(pi_ctx->prot_frpl);
iser_err("Failed to allocate prot frpl ret=%d\n",
ret);
goto prot_frpl_failure;
}
pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
ISCSI_ISER_SG_TABLESIZE + 1);
if (IS_ERR(pi_ctx->prot_mr)) {
ret = PTR_ERR(pi_ctx->prot_mr);
iser_err("Failed to allocate prot frmr ret=%d\n",
ret);
goto prot_mr_failure;
}
desc->reg_indicators |= ISER_PROT_KEY_VALID;
mr_init_attr.max_reg_descriptors = 2;
mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
if (IS_ERR(pi_ctx->sig_mr)) {
ret = PTR_ERR(pi_ctx->sig_mr);
iser_err("Failed to allocate signature enabled mr err=%d\n",
ret);
goto sig_mr_failure;
}
desc->reg_indicators |= ISER_SIG_KEY_VALID;
} }
desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
iser_dbg("Create fr_desc %p page_list %p\n",
desc, desc->data_frpl->page_list);
return 0; return 0;
sig_mr_failure:
ib_dereg_mr(desc->pi_ctx->prot_mr);
prot_mr_failure:
ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
prot_frpl_failure:
kfree(desc->pi_ctx);
pi_ctx_alloc_failure: pi_ctx_alloc_failure:
ib_dereg_mr(desc->data_mr); ib_dereg_mr(desc->data_mr);
fast_reg_mr_failure: fast_reg_mr_failure:
@ -416,12 +428,8 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
list_del(&desc->list); list_del(&desc->list);
ib_free_fast_reg_page_list(desc->data_frpl); ib_free_fast_reg_page_list(desc->data_frpl);
ib_dereg_mr(desc->data_mr); ib_dereg_mr(desc->data_mr);
if (desc->pi_ctx) { if (desc->pi_ctx)
ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); iser_free_pi_ctx(desc->pi_ctx);
ib_dereg_mr(desc->pi_ctx->prot_mr);
ib_destroy_mr(desc->pi_ctx->sig_mr);
kfree(desc->pi_ctx);
}
kfree(desc); kfree(desc);
++i; ++i;
} }
@ -721,7 +729,7 @@ static void iser_connect_error(struct rdma_cm_id *cma_id)
struct iser_conn *iser_conn; struct iser_conn *iser_conn;
iser_conn = (struct iser_conn *)cma_id->context; iser_conn = (struct iser_conn *)cma_id->context;
iser_conn->state = ISER_CONN_DOWN; iser_conn->state = ISER_CONN_TERMINATING;
} }
/** /**
@ -992,93 +1000,6 @@ int iser_connect(struct iser_conn *iser_conn,
return err; return err;
} }
/**
* iser_reg_page_vec - Register physical memory
*
* returns: 0 on success, errno code on failure
*/
int iser_reg_page_vec(struct ib_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg)
{
struct ib_pool_fmr *mem;
u64 io_addr;
u64 *page_list;
int status;
page_list = page_vec->pages;
io_addr = page_list[0];
mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
page_list,
page_vec->length,
io_addr);
if (IS_ERR(mem)) {
status = (int)PTR_ERR(mem);
iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
return status;
}
mem_reg->lkey = mem->fmr->lkey;
mem_reg->rkey = mem->fmr->rkey;
mem_reg->len = page_vec->length * SIZE_4K;
mem_reg->va = io_addr;
mem_reg->mem_h = (void *)mem;
mem_reg->va += page_vec->offset;
mem_reg->len = page_vec->data_size;
iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
"entry[0]: (0x%08lx,%ld)] -> "
"[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
page_vec, page_vec->length,
(unsigned long)page_vec->pages[0],
(unsigned long)page_vec->data_size,
(unsigned int)mem_reg->lkey, mem_reg->mem_h,
(unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
return 0;
}
/**
* Unregister (previosuly registered using FMR) memory.
* If memory is non-FMR does nothing.
*/
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
int ret;
if (!reg->mem_h)
return;
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
if (ret)
iser_err("ib_fmr_pool_unmap failed %d\n", ret);
reg->mem_h = NULL;
}
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
struct iser_conn *iser_conn = iser_task->iser_conn;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct fast_reg_descriptor *desc = reg->mem_h;
if (!desc)
return;
reg->mem_h = NULL;
spin_lock_bh(&ib_conn->lock);
list_add_tail(&desc->list, &ib_conn->fastreg.pool);
spin_unlock_bh(&ib_conn->lock);
}
int iser_post_recvl(struct iser_conn *iser_conn) int iser_post_recvl(struct iser_conn *iser_conn)
{ {
struct ib_recv_wr rx_wr, *rx_wr_failed; struct ib_recv_wr rx_wr, *rx_wr_failed;
@ -1210,6 +1131,9 @@ iser_handle_comp_error(struct ib_conn *ib_conn,
iscsi_conn_failure(iser_conn->iscsi_conn, iscsi_conn_failure(iser_conn->iscsi_conn,
ISCSI_ERR_CONN_FAILED); ISCSI_ERR_CONN_FAILED);
if (wc->wr_id == ISER_FASTREG_LI_WRID)
return;
if (is_iser_tx_desc(iser_conn, wr_id)) { if (is_iser_tx_desc(iser_conn, wr_id)) {
struct iser_tx_desc *desc = wr_id; struct iser_tx_desc *desc = wr_id;
@ -1254,13 +1178,11 @@ static void iser_handle_wc(struct ib_wc *wc)
else else
iser_dbg("flush error: wr id %llx\n", wc->wr_id); iser_dbg("flush error: wr id %llx\n", wc->wr_id);
if (wc->wr_id != ISER_FASTREG_LI_WRID &&
wc->wr_id != ISER_BEACON_WRID)
iser_handle_comp_error(ib_conn, wc);
/* complete in case all flush errors were consumed */
if (wc->wr_id == ISER_BEACON_WRID) if (wc->wr_id == ISER_BEACON_WRID)
/* all flush errors were consumed */
complete(&ib_conn->flush_comp); complete(&ib_conn->flush_comp);
else
iser_handle_comp_error(ib_conn, wc);
} }
} }
@ -1306,7 +1228,7 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector) enum iser_data_dir cmd_dir, sector_t *sector)
{ {
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
struct fast_reg_descriptor *desc = reg->mem_h; struct fast_reg_descriptor *desc = reg->mem_h;
unsigned long sector_size = iser_task->sc->device->sector_size; unsigned long sector_size = iser_task->sc->device->sector_size;
struct ib_mr_status mr_status; struct ib_mr_status mr_status;

View File

@ -40,6 +40,7 @@
#include <linux/parser.h> #include <linux/parser.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <rdma/ib_cache.h>
#include <linux/atomic.h> #include <linux/atomic.h>
@ -265,10 +266,10 @@ static int srp_init_qp(struct srp_target_port *target,
if (!attr) if (!attr)
return -ENOMEM; return -ENOMEM;
ret = ib_find_pkey(target->srp_host->srp_dev->dev, ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
target->srp_host->port, target->srp_host->port,
be16_to_cpu(target->pkey), be16_to_cpu(target->pkey),
&attr->pkey_index); &attr->pkey_index);
if (ret) if (ret)
goto out; goto out;

View File

@ -207,7 +207,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
} }
break; break;
default: default:
printk(KERN_ERR "received unrecognized IB event %d\n", pr_err("received unrecognized IB event %d\n",
event->event); event->event);
break; break;
} }
@ -218,7 +218,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
*/ */
static void srpt_srq_event(struct ib_event *event, void *ctx) static void srpt_srq_event(struct ib_event *event, void *ctx)
{ {
printk(KERN_INFO "SRQ event %d\n", event->event); pr_info("SRQ event %d\n", event->event);
} }
/** /**
@ -242,8 +242,7 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
ch->sess_name, srpt_get_ch_state(ch)); ch->sess_name, srpt_get_ch_state(ch));
break; break;
default: default:
printk(KERN_ERR "received unrecognized IB QP event %d\n", pr_err("received unrecognized IB QP event %d\n", event->event);
event->event);
break; break;
} }
} }
@ -602,7 +601,7 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev)
sport = &sdev->port[i - 1]; sport = &sdev->port[i - 1];
WARN_ON(sport->port != i); WARN_ON(sport->port != i);
if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0) if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
printk(KERN_ERR "disabling MAD processing failed.\n"); pr_err("disabling MAD processing failed.\n");
if (sport->mad_agent) { if (sport->mad_agent) {
ib_unregister_mad_agent(sport->mad_agent); ib_unregister_mad_agent(sport->mad_agent);
sport->mad_agent = NULL; sport->mad_agent = NULL;
@ -810,7 +809,7 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
ret = -ENOMEM; ret = -ENOMEM;
if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) { if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
printk(KERN_WARNING "IB send queue full (needed 1)\n"); pr_warn("IB send queue full (needed 1)\n");
goto out; goto out;
} }
@ -912,7 +911,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
if (ioctx->n_rbuf > if (ioctx->n_rbuf >
(srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
printk(KERN_ERR "received unsupported SRP_CMD request" pr_err("received unsupported SRP_CMD request"
" type (%u out + %u in != %u / %zu)\n", " type (%u out + %u in != %u / %zu)\n",
srp_cmd->data_out_desc_cnt, srp_cmd->data_out_desc_cnt,
srp_cmd->data_in_desc_cnt, srp_cmd->data_in_desc_cnt,
@ -1432,7 +1431,7 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
srpt_unmap_sg_to_ib_sge(ch, ioctx); srpt_unmap_sg_to_ib_sge(ch, ioctx);
transport_generic_free_cmd(&ioctx->cmd, 0); transport_generic_free_cmd(&ioctx->cmd, 0);
} else { } else {
printk(KERN_ERR "IB completion has been received too late for" pr_err("IB completion has been received too late for"
" wr_id = %u.\n", ioctx->ioctx.index); " wr_id = %u.\n", ioctx->ioctx.index);
} }
} }
@ -1457,7 +1456,7 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
SRPT_STATE_DATA_IN)) SRPT_STATE_DATA_IN))
target_execute_cmd(&ioctx->cmd); target_execute_cmd(&ioctx->cmd);
else else
printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__, pr_err("%s[%d]: wrong state = %d\n", __func__,
__LINE__, srpt_get_cmd_state(ioctx)); __LINE__, srpt_get_cmd_state(ioctx));
} else if (opcode == SRPT_RDMA_ABORT) { } else if (opcode == SRPT_RDMA_ABORT) {
ioctx->rdma_aborted = true; ioctx->rdma_aborted = true;
@ -1481,7 +1480,7 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
switch (opcode) { switch (opcode) {
case SRPT_RDMA_READ_LAST: case SRPT_RDMA_READ_LAST:
if (ioctx->n_rdma <= 0) { if (ioctx->n_rdma <= 0) {
printk(KERN_ERR "Received invalid RDMA read" pr_err("Received invalid RDMA read"
" error completion with idx %d\n", " error completion with idx %d\n",
ioctx->ioctx.index); ioctx->ioctx.index);
break; break;
@ -1490,14 +1489,13 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
if (state == SRPT_STATE_NEED_DATA) if (state == SRPT_STATE_NEED_DATA)
srpt_abort_cmd(ioctx); srpt_abort_cmd(ioctx);
else else
printk(KERN_ERR "%s[%d]: wrong state = %d\n", pr_err("%s[%d]: wrong state = %d\n",
__func__, __LINE__, state); __func__, __LINE__, state);
break; break;
case SRPT_RDMA_WRITE_LAST: case SRPT_RDMA_WRITE_LAST:
break; break;
default: default:
printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__, pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
__LINE__, opcode);
break; break;
} }
} }
@ -1549,8 +1547,8 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
if (sense_data_len > max_sense_len) { if (sense_data_len > max_sense_len) {
printk(KERN_WARNING "truncated sense data from %d to %d" pr_warn("truncated sense data from %d to %d"
" bytes\n", sense_data_len, max_sense_len); " bytes\n", sense_data_len, max_sense_len);
sense_data_len = max_sense_len; sense_data_len = max_sense_len;
} }
@ -1628,8 +1626,8 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
int addressing_method; int addressing_method;
if (unlikely(len < 2)) { if (unlikely(len < 2)) {
printk(KERN_ERR "Illegal LUN length %d, expected 2 bytes or " pr_err("Illegal LUN length %d, expected 2 bytes or more\n",
"more", len); len);
goto out; goto out;
} }
@ -1663,7 +1661,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN: case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN:
default: default:
printk(KERN_ERR "Unimplemented LUN addressing method %u", pr_err("Unimplemented LUN addressing method %u\n",
addressing_method); addressing_method);
break; break;
} }
@ -1672,8 +1670,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
return res; return res;
out_err: out_err:
printk(KERN_ERR "Support for multi-level LUNs has not yet been" pr_err("Support for multi-level LUNs has not yet been implemented\n");
" implemented");
goto out; goto out;
} }
@ -1723,7 +1720,7 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
} }
if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) { if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
printk(KERN_ERR "0x%llx: parsing SRP descriptor table failed.\n", pr_err("0x%llx: parsing SRP descriptor table failed.\n",
srp_cmd->tag); srp_cmd->tag);
ret = TCM_INVALID_CDB_FIELD; ret = TCM_INVALID_CDB_FIELD;
goto send_sense; goto send_sense;
@ -1912,7 +1909,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx); srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx);
break; break;
case SRP_I_LOGOUT: case SRP_I_LOGOUT:
printk(KERN_ERR "Not yet implemented: SRP_I_LOGOUT\n"); pr_err("Not yet implemented: SRP_I_LOGOUT\n");
break; break;
case SRP_CRED_RSP: case SRP_CRED_RSP:
pr_debug("received SRP_CRED_RSP\n"); pr_debug("received SRP_CRED_RSP\n");
@ -1921,10 +1918,10 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
pr_debug("received SRP_AER_RSP\n"); pr_debug("received SRP_AER_RSP\n");
break; break;
case SRP_RSP: case SRP_RSP:
printk(KERN_ERR "Received SRP_RSP\n"); pr_err("Received SRP_RSP\n");
break; break;
default: default:
printk(KERN_ERR "received IU with unknown opcode 0x%x\n", pr_err("received IU with unknown opcode 0x%x\n",
srp_cmd->opcode); srp_cmd->opcode);
break; break;
} }
@ -1948,12 +1945,12 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
req_lim = atomic_dec_return(&ch->req_lim); req_lim = atomic_dec_return(&ch->req_lim);
if (unlikely(req_lim < 0)) if (unlikely(req_lim < 0))
printk(KERN_ERR "req_lim = %d < 0\n", req_lim); pr_err("req_lim = %d < 0\n", req_lim);
ioctx = sdev->ioctx_ring[index]; ioctx = sdev->ioctx_ring[index];
srpt_handle_new_iu(ch, ioctx, NULL); srpt_handle_new_iu(ch, ioctx, NULL);
} else { } else {
printk(KERN_INFO "receiving failed for idx %u with status %d\n", pr_info("receiving failed for idx %u with status %d\n",
index, wc->status); index, wc->status);
} }
} }
@ -1993,12 +1990,12 @@ static void srpt_process_send_completion(struct ib_cq *cq,
} }
} else { } else {
if (opcode == SRPT_SEND) { if (opcode == SRPT_SEND) {
printk(KERN_INFO "sending response for idx %u failed" pr_info("sending response for idx %u failed"
" with status %d\n", index, wc->status); " with status %d\n", index, wc->status);
srpt_handle_send_err_comp(ch, wc->wr_id); srpt_handle_send_err_comp(ch, wc->wr_id);
} else if (opcode != SRPT_RDMA_MID) { } else if (opcode != SRPT_RDMA_MID) {
printk(KERN_INFO "RDMA t %d for idx %u failed with" pr_info("RDMA t %d for idx %u failed with"
" status %d", opcode, index, wc->status); " status %d\n", opcode, index, wc->status);
srpt_handle_rdma_err_comp(ch, send_ioctx, opcode); srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
} }
} }
@ -2062,15 +2059,15 @@ static int srpt_compl_thread(void *arg)
ch = arg; ch = arg;
BUG_ON(!ch); BUG_ON(!ch);
printk(KERN_INFO "Session %s: kernel thread %s (PID %d) started\n", pr_info("Session %s: kernel thread %s (PID %d) started\n",
ch->sess_name, ch->thread->comm, current->pid); ch->sess_name, ch->thread->comm, current->pid);
while (!kthread_should_stop()) { while (!kthread_should_stop()) {
wait_event_interruptible(ch->wait_queue, wait_event_interruptible(ch->wait_queue,
(srpt_process_completion(ch->cq, ch), (srpt_process_completion(ch->cq, ch),
kthread_should_stop())); kthread_should_stop()));
} }
printk(KERN_INFO "Session %s: kernel thread %s (PID %d) stopped\n", pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
ch->sess_name, ch->thread->comm, current->pid); ch->sess_name, ch->thread->comm, current->pid);
return 0; return 0;
} }
@ -2097,7 +2094,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
ch->rq_size + srp_sq_size, 0); ch->rq_size + srp_sq_size, 0);
if (IS_ERR(ch->cq)) { if (IS_ERR(ch->cq)) {
ret = PTR_ERR(ch->cq); ret = PTR_ERR(ch->cq);
printk(KERN_ERR "failed to create CQ cqe= %d ret= %d\n", pr_err("failed to create CQ cqe= %d ret= %d\n",
ch->rq_size + srp_sq_size, ret); ch->rq_size + srp_sq_size, ret);
goto out; goto out;
} }
@ -2123,7 +2120,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
goto retry; goto retry;
} }
} }
printk(KERN_ERR "failed to create_qp ret= %d\n", ret); pr_err("failed to create_qp ret= %d\n", ret);
goto err_destroy_cq; goto err_destroy_cq;
} }
@ -2143,7 +2140,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl"); ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
if (IS_ERR(ch->thread)) { if (IS_ERR(ch->thread)) {
printk(KERN_ERR "failed to create kernel thread %ld\n", pr_err("failed to create kernel thread %ld\n",
PTR_ERR(ch->thread)); PTR_ERR(ch->thread));
ch->thread = NULL; ch->thread = NULL;
goto err_destroy_qp; goto err_destroy_qp;
@ -2204,7 +2201,7 @@ static void __srpt_close_ch(struct srpt_rdma_ch *ch)
/* fall through */ /* fall through */
case CH_LIVE: case CH_LIVE:
if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0) if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
printk(KERN_ERR "sending CM DREQ failed.\n"); pr_err("sending CM DREQ failed.\n");
break; break;
case CH_DISCONNECTING: case CH_DISCONNECTING:
break; break;
@ -2291,7 +2288,7 @@ static void srpt_drain_channel(struct ib_cm_id *cm_id)
ret = srpt_ch_qp_err(ch); ret = srpt_ch_qp_err(ch);
if (ret < 0) if (ret < 0)
printk(KERN_ERR "Setting queue pair in error state" pr_err("Setting queue pair in error state"
" failed: %d\n", ret); " failed: %d\n", ret);
} }
} }
@ -2435,17 +2432,17 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
it_iu_len = be32_to_cpu(req->req_it_iu_len); it_iu_len = be32_to_cpu(req->req_it_iu_len);
printk(KERN_INFO "Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx," pr_info("Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
" t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d" " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
" (guid=0x%llx:0x%llx)\n", " (guid=0x%llx:0x%llx)\n",
be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]), be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]), be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
be64_to_cpu(*(__be64 *)&req->target_port_id[0]), be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
be64_to_cpu(*(__be64 *)&req->target_port_id[8]), be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
it_iu_len, it_iu_len,
param->port, param->port,
be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]), be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8])); be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
rsp = kzalloc(sizeof *rsp, GFP_KERNEL); rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
rej = kzalloc(sizeof *rej, GFP_KERNEL); rej = kzalloc(sizeof *rej, GFP_KERNEL);
@ -2460,7 +2457,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
rej->reason = __constant_cpu_to_be32( rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
ret = -EINVAL; ret = -EINVAL;
printk(KERN_ERR "rejected SRP_LOGIN_REQ because its" pr_err("rejected SRP_LOGIN_REQ because its"
" length (%d bytes) is out of range (%d .. %d)\n", " length (%d bytes) is out of range (%d .. %d)\n",
it_iu_len, 64, srp_max_req_size); it_iu_len, 64, srp_max_req_size);
goto reject; goto reject;
@ -2470,7 +2467,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
rej->reason = __constant_cpu_to_be32( rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
ret = -EINVAL; ret = -EINVAL;
printk(KERN_ERR "rejected SRP_LOGIN_REQ because the target port" pr_err("rejected SRP_LOGIN_REQ because the target port"
" has not yet been enabled\n"); " has not yet been enabled\n");
goto reject; goto reject;
} }
@ -2516,7 +2513,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
rej->reason = __constant_cpu_to_be32( rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
ret = -ENOMEM; ret = -ENOMEM;
printk(KERN_ERR "rejected SRP_LOGIN_REQ because it" pr_err("rejected SRP_LOGIN_REQ because it"
" has an invalid target port identifier.\n"); " has an invalid target port identifier.\n");
goto reject; goto reject;
} }
@ -2525,7 +2522,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
if (!ch) { if (!ch) {
rej->reason = __constant_cpu_to_be32( rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
printk(KERN_ERR "rejected SRP_LOGIN_REQ because no memory.\n"); pr_err("rejected SRP_LOGIN_REQ because no memory.\n");
ret = -ENOMEM; ret = -ENOMEM;
goto reject; goto reject;
} }
@ -2562,7 +2559,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
if (ret) { if (ret) {
rej->reason = __constant_cpu_to_be32( rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
printk(KERN_ERR "rejected SRP_LOGIN_REQ because creating" pr_err("rejected SRP_LOGIN_REQ because creating"
" a new RDMA channel failed.\n"); " a new RDMA channel failed.\n");
goto free_ring; goto free_ring;
} }
@ -2571,7 +2568,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
if (ret) { if (ret) {
rej->reason = __constant_cpu_to_be32( rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
printk(KERN_ERR "rejected SRP_LOGIN_REQ because enabling" pr_err("rejected SRP_LOGIN_REQ because enabling"
" RTR failed (error code = %d)\n", ret); " RTR failed (error code = %d)\n", ret);
goto destroy_ib; goto destroy_ib;
} }
@ -2586,8 +2583,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
nacl = srpt_lookup_acl(sport, ch->i_port_id); nacl = srpt_lookup_acl(sport, ch->i_port_id);
if (!nacl) { if (!nacl) {
printk(KERN_INFO "Rejected login because no ACL has been" pr_info("Rejected login because no ACL has been"
" configured yet for initiator %s.\n", ch->sess_name); " configured yet for initiator %s.\n", ch->sess_name);
rej->reason = __constant_cpu_to_be32( rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
goto destroy_ib; goto destroy_ib;
@ -2631,7 +2628,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
ret = ib_send_cm_rep(cm_id, rep_param); ret = ib_send_cm_rep(cm_id, rep_param);
if (ret) { if (ret) {
printk(KERN_ERR "sending SRP_LOGIN_REQ response failed" pr_err("sending SRP_LOGIN_REQ response failed"
" (error code = %d)\n", ret); " (error code = %d)\n", ret);
goto release_channel; goto release_channel;
} }
@ -2679,7 +2676,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
{ {
printk(KERN_INFO "Received IB REJ for cm_id %p.\n", cm_id); pr_info("Received IB REJ for cm_id %p.\n", cm_id);
srpt_drain_channel(cm_id); srpt_drain_channel(cm_id);
} }
@ -2714,13 +2711,13 @@ static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id) static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
{ {
printk(KERN_INFO "Received IB TimeWait exit for cm_id %p.\n", cm_id); pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id);
srpt_drain_channel(cm_id); srpt_drain_channel(cm_id);
} }
static void srpt_cm_rep_error(struct ib_cm_id *cm_id) static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
{ {
printk(KERN_INFO "Received IB REP error for cm_id %p.\n", cm_id); pr_info("Received IB REP error for cm_id %p.\n", cm_id);
srpt_drain_channel(cm_id); srpt_drain_channel(cm_id);
} }
@ -2755,9 +2752,9 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
if (send_drep) { if (send_drep) {
if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0) if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
printk(KERN_ERR "Sending IB DREP failed.\n"); pr_err("Sending IB DREP failed.\n");
printk(KERN_INFO "Received DREQ and sent DREP for session %s.\n", pr_info("Received DREQ and sent DREP for session %s.\n",
ch->sess_name); ch->sess_name);
} }
} }
@ -2766,8 +2763,7 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
*/ */
static void srpt_cm_drep_recv(struct ib_cm_id *cm_id) static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
{ {
printk(KERN_INFO "Received InfiniBand DREP message for cm_id %p.\n", pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id);
cm_id);
srpt_drain_channel(cm_id); srpt_drain_channel(cm_id);
} }
@ -2811,14 +2807,13 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
srpt_cm_rep_error(cm_id); srpt_cm_rep_error(cm_id);
break; break;
case IB_CM_DREQ_ERROR: case IB_CM_DREQ_ERROR:
printk(KERN_INFO "Received IB DREQ ERROR event.\n"); pr_info("Received IB DREQ ERROR event.\n");
break; break;
case IB_CM_MRA_RECEIVED: case IB_CM_MRA_RECEIVED:
printk(KERN_INFO "Received IB MRA event\n"); pr_info("Received IB MRA event\n");
break; break;
default: default:
printk(KERN_ERR "received unrecognized IB CM event %d\n", pr_err("received unrecognized IB CM event %d\n", event->event);
event->event);
break; break;
} }
@ -2848,8 +2843,8 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
ret = -ENOMEM; ret = -ENOMEM;
sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail); sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
if (sq_wr_avail < 0) { if (sq_wr_avail < 0) {
printk(KERN_WARNING "IB send queue full (needed %d)\n", pr_warn("IB send queue full (needed %d)\n",
n_rdma); n_rdma);
goto out; goto out;
} }
} }
@ -2889,7 +2884,7 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
} }
if (ret) if (ret)
printk(KERN_ERR "%s[%d]: ib_post_send() returned %d for %d/%d", pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
__func__, __LINE__, ret, i, n_rdma); __func__, __LINE__, ret, i, n_rdma);
if (ret && i > 0) { if (ret && i > 0) {
wr.num_sge = 0; wr.num_sge = 0;
@ -2897,12 +2892,12 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
wr.send_flags = IB_SEND_SIGNALED; wr.send_flags = IB_SEND_SIGNALED;
while (ch->state == CH_LIVE && while (ch->state == CH_LIVE &&
ib_post_send(ch->qp, &wr, &bad_wr) != 0) { ib_post_send(ch->qp, &wr, &bad_wr) != 0) {
printk(KERN_INFO "Trying to abort failed RDMA transfer [%d]", pr_info("Trying to abort failed RDMA transfer [%d]\n",
ioctx->ioctx.index); ioctx->ioctx.index);
msleep(1000); msleep(1000);
} }
while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) { while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
printk(KERN_INFO "Waiting until RDMA abort finished [%d]", pr_info("Waiting until RDMA abort finished [%d]\n",
ioctx->ioctx.index); ioctx->ioctx.index);
msleep(1000); msleep(1000);
} }
@ -2923,17 +2918,17 @@ static int srpt_xfer_data(struct srpt_rdma_ch *ch,
ret = srpt_map_sg_to_ib_sge(ch, ioctx); ret = srpt_map_sg_to_ib_sge(ch, ioctx);
if (ret) { if (ret) {
printk(KERN_ERR "%s[%d] ret=%d\n", __func__, __LINE__, ret); pr_err("%s[%d] ret=%d\n", __func__, __LINE__, ret);
goto out; goto out;
} }
ret = srpt_perform_rdmas(ch, ioctx); ret = srpt_perform_rdmas(ch, ioctx);
if (ret) { if (ret) {
if (ret == -EAGAIN || ret == -ENOMEM) if (ret == -EAGAIN || ret == -ENOMEM)
printk(KERN_INFO "%s[%d] queue full -- ret=%d\n", pr_info("%s[%d] queue full -- ret=%d\n",
__func__, __LINE__, ret); __func__, __LINE__, ret);
else else
printk(KERN_ERR "%s[%d] fatal error -- ret=%d\n", pr_err("%s[%d] fatal error -- ret=%d\n",
__func__, __LINE__, ret); __func__, __LINE__, ret);
goto out_unmap; goto out_unmap;
} }
@ -3058,7 +3053,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
!ioctx->queue_status_only) { !ioctx->queue_status_only) {
ret = srpt_xfer_data(ch, ioctx); ret = srpt_xfer_data(ch, ioctx);
if (ret) { if (ret) {
printk(KERN_ERR "xfer_data failed for tag %llu\n", pr_err("xfer_data failed for tag %llu\n",
ioctx->tag); ioctx->tag);
return; return;
} }
@ -3075,7 +3070,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
} }
ret = srpt_post_send(ch, ioctx, resp_len); ret = srpt_post_send(ch, ioctx, resp_len);
if (ret) { if (ret) {
printk(KERN_ERR "sending cmd response failed for tag %llu\n", pr_err("sending cmd response failed for tag %llu\n",
ioctx->tag); ioctx->tag);
srpt_unmap_sg_to_ib_sge(ch, ioctx); srpt_unmap_sg_to_ib_sge(ch, ioctx);
srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
@ -3154,7 +3149,7 @@ static int srpt_release_sdev(struct srpt_device *sdev)
res = wait_event_interruptible(sdev->ch_releaseQ, res = wait_event_interruptible(sdev->ch_releaseQ,
srpt_ch_list_empty(sdev)); srpt_ch_list_empty(sdev));
if (res) if (res)
printk(KERN_ERR "%s: interrupted.\n", __func__); pr_err("%s: interrupted.\n", __func__);
return 0; return 0;
} }
@ -3293,7 +3288,7 @@ static void srpt_add_one(struct ib_device *device)
spin_lock_init(&sport->port_acl_lock); spin_lock_init(&sport->port_acl_lock);
if (srpt_refresh_port(sport)) { if (srpt_refresh_port(sport)) {
printk(KERN_ERR "MAD registration failed for %s-%d.\n", pr_err("MAD registration failed for %s-%d.\n",
srpt_sdev_name(sdev), i); srpt_sdev_name(sdev), i);
goto err_ring; goto err_ring;
} }
@ -3330,7 +3325,7 @@ static void srpt_add_one(struct ib_device *device)
kfree(sdev); kfree(sdev);
err: err:
sdev = NULL; sdev = NULL;
printk(KERN_INFO "%s(%s) failed.\n", __func__, device->name); pr_info("%s(%s) failed.\n", __func__, device->name);
goto out; goto out;
} }
@ -3344,8 +3339,7 @@ static void srpt_remove_one(struct ib_device *device)
sdev = ib_get_client_data(device, &srpt_client); sdev = ib_get_client_data(device, &srpt_client);
if (!sdev) { if (!sdev) {
printk(KERN_INFO "%s(%s): nothing to do.\n", __func__, pr_info("%s(%s): nothing to do.\n", __func__, device->name);
device->name);
return; return;
} }
@ -3464,7 +3458,7 @@ static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg)
nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL); nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL);
if (!nacl) { if (!nacl) {
printk(KERN_ERR "Unable to allocate struct srpt_node_acl\n"); pr_err("Unable to allocate struct srpt_node_acl\n");
return NULL; return NULL;
} }
@ -3615,7 +3609,7 @@ static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg,
u8 i_port_id[16]; u8 i_port_id[16];
if (srpt_parse_i_port_id(i_port_id, name) < 0) { if (srpt_parse_i_port_id(i_port_id, name) < 0) {
printk(KERN_ERR "invalid initiator port ID %s\n", name); pr_err("invalid initiator port ID %s\n", name);
ret = -EINVAL; ret = -EINVAL;
goto err; goto err;
} }
@ -3816,12 +3810,12 @@ static ssize_t srpt_tpg_store_enable(
ret = kstrtoul(page, 0, &tmp); ret = kstrtoul(page, 0, &tmp);
if (ret < 0) { if (ret < 0) {
printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n"); pr_err("Unable to extract srpt_tpg_store_enable\n");
return -EINVAL; return -EINVAL;
} }
if ((tmp != 0) && (tmp != 1)) { if ((tmp != 0) && (tmp != 1)) {
printk(KERN_ERR "Illegal value for srpt_tpg_store_enable: %lu\n", tmp); pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
return -EINVAL; return -EINVAL;
} }
if (tmp == 1) if (tmp == 1)
@ -3980,7 +3974,7 @@ static int __init srpt_init_module(void)
ret = -EINVAL; ret = -EINVAL;
if (srp_max_req_size < MIN_MAX_REQ_SIZE) { if (srp_max_req_size < MIN_MAX_REQ_SIZE) {
printk(KERN_ERR "invalid value %d for kernel module parameter" pr_err("invalid value %d for kernel module parameter"
" srp_max_req_size -- must be at least %d.\n", " srp_max_req_size -- must be at least %d.\n",
srp_max_req_size, MIN_MAX_REQ_SIZE); srp_max_req_size, MIN_MAX_REQ_SIZE);
goto out; goto out;
@ -3988,7 +3982,7 @@ static int __init srpt_init_module(void)
if (srpt_srq_size < MIN_SRPT_SRQ_SIZE if (srpt_srq_size < MIN_SRPT_SRQ_SIZE
|| srpt_srq_size > MAX_SRPT_SRQ_SIZE) { || srpt_srq_size > MAX_SRPT_SRQ_SIZE) {
printk(KERN_ERR "invalid value %d for kernel module parameter" pr_err("invalid value %d for kernel module parameter"
" srpt_srq_size -- must be in the range [%d..%d].\n", " srpt_srq_size -- must be in the range [%d..%d].\n",
srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE); srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE);
goto out; goto out;
@ -3996,7 +3990,7 @@ static int __init srpt_init_module(void)
srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt"); srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt");
if (IS_ERR(srpt_target)) { if (IS_ERR(srpt_target)) {
printk(KERN_ERR "couldn't register\n"); pr_err("couldn't register\n");
ret = PTR_ERR(srpt_target); ret = PTR_ERR(srpt_target);
goto out; goto out;
} }
@ -4018,13 +4012,13 @@ static int __init srpt_init_module(void)
ret = target_fabric_configfs_register(srpt_target); ret = target_fabric_configfs_register(srpt_target);
if (ret < 0) { if (ret < 0) {
printk(KERN_ERR "couldn't register\n"); pr_err("couldn't register\n");
goto out_free_target; goto out_free_target;
} }
ret = ib_register_client(&srpt_client); ret = ib_register_client(&srpt_client);
if (ret) { if (ret) {
printk(KERN_ERR "couldn't register IB client\n"); pr_err("couldn't register IB client\n");
goto out_unregister_target; goto out_unregister_target;
} }

View File

@ -939,21 +939,34 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
return err; return err;
} }
if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) { if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
/* compute slave's gid block */ __be64 guid = mlx4_get_admin_guid(dev, slave,
smp->attr_mod = cpu_to_be32(slave / 8); port);
/* execute cmd */
err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, /* set the PF admin guid to the FW/HW burned
vhcr->in_modifier, opcode_modifier, * GUID, if it wasn't yet set
vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); */
if (!err) { if (slave == 0 && guid == 0) {
/* if needed, move slave gid to index 0 */ smp->attr_mod = 0;
if (slave % 8) err = mlx4_cmd_box(dev,
memcpy(outsmp->data, inbox->dma,
outsmp->data + (slave % 8) * 8, 8); outbox->dma,
/* delete all other gids */ vhcr->in_modifier,
memset(outsmp->data + 8, 0, 56); opcode_modifier,
vhcr->op,
MLX4_CMD_TIME_CLASS_C,
MLX4_CMD_NATIVE);
if (err)
return err;
mlx4_set_admin_guid(dev,
*(__be64 *)outsmp->
data, slave, port);
} else {
memcpy(outsmp->data, &guid, 8);
} }
return err;
/* clean all other gids */
memset(outsmp->data + 8, 0, 56);
return 0;
} }
if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
@ -2350,6 +2363,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
vf_oper->vport[port].vlan_idx = NO_INDX; vf_oper->vport[port].vlan_idx = NO_INDX;
vf_oper->vport[port].mac_idx = NO_INDX; vf_oper->vport[port].mac_idx = NO_INDX;
mlx4_set_random_admin_guid(dev, i, port);
} }
spin_lock_init(&s_state->lock); spin_lock_init(&s_state->lock);
} }

View File

@ -702,6 +702,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1; priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
} }
spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
flr_slave);
queue_work(priv->mfunc.master.comm_wq, queue_work(priv->mfunc.master.comm_wq,
&priv->mfunc.master.slave_flr_event_work); &priv->mfunc.master.slave_flr_event_work);
break; break;

View File

@ -2260,6 +2260,37 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
} }
EXPORT_SYMBOL_GPL(mlx4_counter_free); EXPORT_SYMBOL_GPL(mlx4_counter_free);
void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
}
EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
return priv->mfunc.master.vf_admin[entry].vport[port].guid;
}
EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
__be64 guid;
/* hw GUID */
if (entry == 0)
return;
get_random_bytes((char *)&guid, sizeof(guid));
guid &= ~(cpu_to_be64(1ULL << 56));
guid |= cpu_to_be64(1ULL << 57);
priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
}
static int mlx4_setup_hca(struct mlx4_dev *dev) static int mlx4_setup_hca(struct mlx4_dev *dev)
{ {
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);

View File

@ -499,6 +499,7 @@ struct mlx4_vport_state {
bool spoofchk; bool spoofchk;
u32 link_state; u32 link_state;
u8 qos_vport; u8 qos_vport;
__be64 guid;
}; };
struct mlx4_vf_admin_state { struct mlx4_vf_admin_state {

View File

@ -211,26 +211,28 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
return 0; return 0;
} }
#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
static void free_4k(struct mlx5_core_dev *dev, u64 addr) static void free_4k(struct mlx5_core_dev *dev, u64 addr)
{ {
struct fw_page *fwp; struct fw_page *fwp;
int n; int n;
fwp = find_fw_page(dev, addr & PAGE_MASK); fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
if (!fwp) { if (!fwp) {
mlx5_core_warn(dev, "page not found\n"); mlx5_core_warn(dev, "page not found\n");
return; return;
} }
n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
fwp->free_count++; fwp->free_count++;
set_bit(n, &fwp->bitmask); set_bit(n, &fwp->bitmask);
if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) { if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
rb_erase(&fwp->rb_node, &dev->priv.page_root); rb_erase(&fwp->rb_node, &dev->priv.page_root);
if (fwp->free_count != 1) if (fwp->free_count != 1)
list_del(&fwp->list); list_del(&fwp->list);
dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE, dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK,
DMA_BIDIRECTIONAL); PAGE_SIZE, DMA_BIDIRECTIONAL);
__free_page(fwp->page); __free_page(fwp->page);
kfree(fwp); kfree(fwp);
} else if (fwp->free_count == 1) { } else if (fwp->free_count == 1) {

View File

@ -1345,6 +1345,10 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry,
int port);
__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port);
void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port);
int mlx4_flow_attach(struct mlx4_dev *dev, int mlx4_flow_attach(struct mlx4_dev *dev,
struct mlx4_net_trans_rule *rule, u64 *reg_id); struct mlx4_net_trans_rule *rule, u64 *reg_id);
int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);