From ef95a90ae6f4f21990e1f7ced6719784a409e811 Mon Sep 17 00:00:00 2001 From: Shamir Rabinovitch Date: Tue, 10 Apr 2018 10:26:23 -0400 Subject: [PATCH 01/52] RDMA/ucma: ucma_context reference leak in error path Validating input parameters should be done before getting the cm_id otherwise it can leak a cm_id reference. Fixes: 6a21dfc0d0db ("RDMA/ucma: Limit possible option size") Signed-off-by: Shamir Rabinovitch Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 74329483af6d..680b3536ad3e 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1316,13 +1316,13 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) + return -EINVAL; + ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); - if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) - return -EINVAL; - optval = memdup_user(u64_to_user_ptr(cmd.optval), cmd.optlen); if (IS_ERR(optval)) { From 25d0e2db3d8f4f79fddd436dcc848d912e98b485 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sat, 14 Apr 2018 21:16:54 -0400 Subject: [PATCH 02/52] IB/mlx5: remove duplicate header file The header file fs_helpers.h is included twice. So it should be removed. Fixes: 802c2125689d ("IB/mlx5: Add IPsec support for egress and ingress") CC: Srinivas Eeda CC: Junxiao Bi Signed-off-by: Zhu Yanjun Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index daa919e5a442..6a749c02b14c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -52,7 +52,6 @@ #include #include #include -#include #include #include #include From b3fe6c62bc66868c45b5bb16050e6bcb333af337 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Apr 2018 18:51:50 -0700 Subject: [PATCH 03/52] infiniband: mlx5: fix build errors when INFINIBAND_USER_ACCESS=m Fix build errors when INFINIBAND_USER_ACCESS=m and MLX5_INFINIBAND=y. The build error occurs when the mlx5 driver code attempts to use USER_ACCESS interfaces, which are built as a loadable module. Fixes these build errors: drivers/infiniband/hw/mlx5/main.o: In function `populate_specs_root': ../drivers/infiniband/hw/mlx5/main.c:4982: undefined reference to `uverbs_default_get_objects' ../drivers/infiniband/hw/mlx5/main.c:4994: undefined reference to `uverbs_alloc_spec_tree' drivers/infiniband/hw/mlx5/main.o: In function `depopulate_specs_root': ../drivers/infiniband/hw/mlx5/main.c:5001: undefined reference to `uverbs_free_spec_tree' Build-tested with multiple config combinations. Fixes: 8c84660bb437 ("IB/mlx5: Initialize the parsing tree root without the help of uverbs") Cc: stable@vger.kernel.org # reported against 4.16 Reported-by: kbuild test robot Signed-off-by: Randy Dunlap Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index bce263b92821..fb4d77be019b 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,6 +1,7 @@ config MLX5_INFINIBAND tristate "Mellanox Connect-IB HCA support" depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE + depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n ---help--- This driver provides low-level InfiniBand support for Mellanox Connect-IB PCI Express host channel adapters (HCAs). From e33514f2e930ad800fa52db19e889bb0fba25419 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 9 Apr 2018 16:52:47 +0200 Subject: [PATCH 04/52] IB/uverbs: Add missing braces in anonymous union initializers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc-4.1.2: drivers/infiniband/core/uverbs_std_types_flow_action.c:366: error: unknown field ‘ptr’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:367: error: unknown field ‘type’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:367: warning: missing braces around initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:367: warning: (near initialization for ‘uverbs_flow_action_esp_keymat[0]..’) drivers/infiniband/core/uverbs_std_types_flow_action.c:368: error: unknown field ‘min_len’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:368: warning: excess elements in union initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:368: warning: (near initialization for ‘uverbs_flow_action_esp_keymat[0].’) drivers/infiniband/core/uverbs_std_types_flow_action.c:368: error: unknown field ‘len’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:368: warning: excess elements in union initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:368: warning: (near initialization for ‘uverbs_flow_action_esp_keymat[0].’) drivers/infiniband/core/uverbs_std_types_flow_action.c:369: error: unknown field ‘flags’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:369: warning: excess elements in union initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:369: warning: (near initialization for ‘uverbs_flow_action_esp_keymat[0].’) drivers/infiniband/core/uverbs_std_types_flow_action.c:376: error: unknown field ‘ptr’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:377: error: unknown field ‘type’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:377: warning: missing braces around initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:377: warning: (near initialization for ‘uverbs_flow_action_esp_replay[0]..’) drivers/infiniband/core/uverbs_std_types_flow_action.c:379: error: unknown field ‘len’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:379: warning: excess elements in union initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:379: warning: (near initialization for ‘uverbs_flow_action_esp_replay[0].’) drivers/infiniband/core/uverbs_std_types_flow_action.c:383: error: unknown field ‘ptr’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:384: error: unknown field ‘type’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:385: error: unknown field ‘min_len’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:385: warning: excess elements in union initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:385: warning: (near initialization for ‘uverbs_flow_action_esp_replay[1].’) drivers/infiniband/core/uverbs_std_types_flow_action.c:385: error: unknown field ‘len’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:385: warning: excess elements in union initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:385: warning: (near initialization for ‘uverbs_flow_action_esp_replay[1].’) drivers/infiniband/core/uverbs_std_types_flow_action.c:386: error: unknown field ‘flags’ specified in initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:386: warning: excess elements in union initializer drivers/infiniband/core/uverbs_std_types_flow_action.c:386: warning: (near initialization for ‘uverbs_flow_action_esp_replay[1].’) Add the missing braces to fix this. Fixes: 2eb9beaee5d7 ("IB/uverbs: Add flow_action create and destroy verbs") Fixes: 7d12f8d5a164 ("IB/uverbs: Add modify ESP flow_action") Signed-off-by: Geert Uytterhoeven Signed-off-by: Jason Gunthorpe --- .../infiniband/core/uverbs_std_types_flow_action.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index cbcec3da12f6..b4f016dfa23d 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -363,28 +363,28 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { - .ptr = { + { .ptr = { .type = UVERBS_ATTR_TYPE_PTR_IN, UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm), .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - }, + } }, }, }; static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { - .ptr = { + { .ptr = { .type = UVERBS_ATTR_TYPE_PTR_IN, /* No need to specify any data */ .len = 0, - } + } } }, [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { - .ptr = { + { .ptr = { .type = UVERBS_ATTR_TYPE_PTR_IN, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size), .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - } + } } }, }; From 8b77586bd8fe600d97f922c79f7222c46f37c118 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Apr 2018 21:00:01 -0600 Subject: [PATCH 05/52] RDMA/ucma: Check for a cm_id->device in all user calls that need it This is done by auditing all callers of ucma_get_ctx and switching the ones that unconditionally touch ->device to ucma_get_ctx_dev. This covers a little less than half of the call sites. The 11 remaining call sites to ucma_get_ctx() were manually audited. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/ucma.c | 36 ++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 680b3536ad3e..0efa0e2a3cd3 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -159,6 +159,23 @@ static void ucma_put_ctx(struct ucma_context *ctx) complete(&ctx->comp); } +/* + * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the + * CM_ID is bound. + */ +static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) +{ + struct ucma_context *ctx = ucma_get_ctx(file, id); + + if (IS_ERR(ctx)) + return ctx; + if (!ctx->cm_id->device) { + ucma_put_ctx(ctx); + return ERR_PTR(-EINVAL); + } + return ctx; +} + static void ucma_close_event_id(struct work_struct *work) { struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); @@ -734,7 +751,7 @@ static ssize_t ucma_resolve_route(struct ucma_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1050,7 +1067,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, if (!cmd.conn_param.valid) return -EINVAL; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1092,7 +1109,7 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1120,7 +1137,7 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1139,7 +1156,7 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1167,15 +1184,10 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file, if (cmd.qp_state > IB_QPS_ERR) return -EINVAL; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); - if (!ctx->cm_id->device) { - ret = -EINVAL; - goto out; - } - resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; @@ -1384,7 +1396,7 @@ static ssize_t ucma_process_join(struct ucma_file *file, else return -EINVAL; - ctx = ucma_get_ctx(file, cmd->id); + ctx = ucma_get_ctx_dev(file, cmd->id); if (IS_ERR(ctx)) return PTR_ERR(ctx); From 09abfe7b5b2f442a85f4c4d59ecf582ad76088d7 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 19 Apr 2018 08:28:11 -0700 Subject: [PATCH 06/52] RDMA/ucma: Allow resolving address w/o specifying source address The RDMA CM will select a source device and address by consulting the routing table if no source address is passed into rdma_resolve_address(). Userspace will ask for this by passing an all-zero source address in the RESOLVE_IP command. Unfortunately the new check for non-zero address size rejects this with EINVAL, which breaks valid userspace applications. Fix this by explicitly allowing a zero address family for the source. Fixes: 2975d5de6428 ("RDMA/ucma: Check AF family prior resolving address") Cc: Signed-off-by: Roland Dreier Signed-off-by: Doug Ledford --- drivers/infiniband/core/ucma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 0efa0e2a3cd3..eab43b17e9cf 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -700,7 +700,7 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - if (!rdma_addr_size_in6(&cmd.src_addr) || + if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) || !rdma_addr_size_in6(&cmd.dst_addr)) return -EINVAL; From d50e14abe2d0024aa527b89c7990147df5d531a5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 20 Apr 2018 09:49:10 -0600 Subject: [PATCH 07/52] uapi: Fix SPDX tags for files referring to the 'OpenIB.org' license Based on discussion with Kate Stewart this license is not a BSD-2-Clause, but is now formally identified as Linux-OpenIB by SPDX. The key difference between the licenses is in the 'warranty' paragraph. if_infiniband.h refers to the 'OpenIB.org' license, but does not include the text, instead it links to an obsolete web site that contains a license that matches the BSD-2-Clause SPX. There is no 'three clause' version of the OpenIB.org license. Signed-off-by: Jason Gunthorpe Acked-by: David S. Miller Signed-off-by: Doug Ledford --- include/uapi/linux/if_infiniband.h | 2 +- include/uapi/linux/rds.h | 2 +- include/uapi/linux/tls.h | 2 +- include/uapi/rdma/cxgb3-abi.h | 2 +- include/uapi/rdma/cxgb4-abi.h | 2 +- include/uapi/rdma/hns-abi.h | 2 +- include/uapi/rdma/ib_user_cm.h | 2 +- include/uapi/rdma/ib_user_ioctl_verbs.h | 2 +- include/uapi/rdma/ib_user_mad.h | 2 +- include/uapi/rdma/ib_user_sa.h | 2 +- include/uapi/rdma/ib_user_verbs.h | 2 +- include/uapi/rdma/mlx4-abi.h | 2 +- include/uapi/rdma/mlx5-abi.h | 2 +- include/uapi/rdma/mthca-abi.h | 2 +- include/uapi/rdma/nes-abi.h | 2 +- include/uapi/rdma/qedr-abi.h | 2 +- include/uapi/rdma/rdma_user_cm.h | 2 +- include/uapi/rdma/rdma_user_ioctl.h | 2 +- include/uapi/rdma/rdma_user_rxe.h | 2 +- 19 files changed, 19 insertions(+), 19 deletions(-) diff --git a/include/uapi/linux/if_infiniband.h b/include/uapi/linux/if_infiniband.h index 050b92dcf8cf..0fc33bf30e45 100644 --- a/include/uapi/linux/if_infiniband.h +++ b/include/uapi/linux/if_infiniband.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index a66b213de3d7..20c6bd0b0007 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2008 Oracle. All rights reserved. * diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index c6633e97eca4..ff02287495ac 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. * diff --git a/include/uapi/rdma/cxgb3-abi.h b/include/uapi/rdma/cxgb3-abi.h index 9acb4b7a6246..85aed672f43e 100644 --- a/include/uapi/rdma/cxgb3-abi.h +++ b/include/uapi/rdma/cxgb3-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2006 Chelsio, Inc. All rights reserved. * diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h index 1fefd0140c26..a159ba8dcf8f 100644 --- a/include/uapi/rdma/cxgb4-abi.h +++ b/include/uapi/rdma/cxgb4-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. * diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 7092c8de4bd8..78613b609fa8 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2016 Hisilicon Limited. * diff --git a/include/uapi/rdma/ib_user_cm.h b/include/uapi/rdma/ib_user_cm.h index 4a8f9562f7cd..e2709bb8cb18 100644 --- a/include/uapi/rdma/ib_user_cm.h +++ b/include/uapi/rdma/ib_user_cm.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 04e46ea517d3..625545d862d7 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2017-2018, Mellanox Technologies inc. All rights reserved. * diff --git a/include/uapi/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h index ef92118dad97..90c0cf228020 100644 --- a/include/uapi/rdma/ib_user_mad.h +++ b/include/uapi/rdma/ib_user_mad.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. diff --git a/include/uapi/rdma/ib_user_sa.h b/include/uapi/rdma/ib_user_sa.h index 0d2607f0cd20..435155d6e1c6 100644 --- a/include/uapi/rdma/ib_user_sa.h +++ b/include/uapi/rdma/ib_user_sa.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2005 Intel Corporation. All rights reserved. * diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 9be07394fdbe..6aeb03315b0b 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 04f64bc4045f..f74557528175 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index cb4a02c4a1ce..fdaf00e20649 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * diff --git a/include/uapi/rdma/mthca-abi.h b/include/uapi/rdma/mthca-abi.h index ac756cd9e807..91b12e1a6f43 100644 --- a/include/uapi/rdma/mthca-abi.h +++ b/include/uapi/rdma/mthca-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. diff --git a/include/uapi/rdma/nes-abi.h b/include/uapi/rdma/nes-abi.h index 35bfd4015d07..f80495baa969 100644 --- a/include/uapi/rdma/nes-abi.h +++ b/include/uapi/rdma/nes-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Topspin Communications. All rights reserved. diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 8ba098900e9a..24c658b3c790 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* QLogic qedr NIC Driver * Copyright (c) 2015-2016 QLogic Corporation * diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index e1269024af47..0d1e78ebad05 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. * diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h index d223f4164a0f..d92d2721b28c 100644 --- a/include/uapi/rdma/rdma_user_ioctl.h +++ b/include/uapi/rdma/rdma_user_ioctl.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2016 Mellanox Technologies, LTD. All rights reserved. * diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index 1f8a9e7daea4..44ef6a3b7afc 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * From a66ed149b0da5b97fa7486ee2bcd00f3f9df5442 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 23 Apr 2018 16:58:17 +0300 Subject: [PATCH 08/52] IB/core: Don't allow default GID addition at non reseved slots Default GIDs are marked reserved at the start of the GID table at index 0 and 1 by gid_table_reserve_default(). Currently when default GID is requested, it can still allocates an empty slot which was not marked as RESERVED for default GID, which is incorrect. At least in current code flow of roce_gid_mgmt.c, in theory we can still request to allocate more than one/two default GIDs depending on how upper devices are setup. Therefore, it is better for cache layer to only allow our reserved slots to be used by default GID allocation requests. Fixes: 598ff6bae689 ("IB/core: Refactor GID modify code for RoCE") Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index e337b08de2ff..92ec845f9c40 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -291,14 +291,18 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, * so lookup free slot only if requested. */ if (pempty && empty < 0) { - if (data->props & GID_TABLE_ENTRY_INVALID) { - /* Found an invalid (free) entry; allocate it */ - if (data->props & GID_TABLE_ENTRY_DEFAULT) { - if (default_gid) - empty = curr_index; - } else { - empty = curr_index; - } + if (data->props & GID_TABLE_ENTRY_INVALID && + (default_gid == + !!(data->props & GID_TABLE_ENTRY_DEFAULT))) { + /* + * Found an invalid (free) entry; allocate it. + * If default GID is requested, then our + * found slot must be one of the DEFAULT + * reserved slots or we fail. + * This ensures that only DEFAULT reserved + * slots are used for default property GIDs. + */ + empty = curr_index; } } From 22c01ee4b8a8c000c490dfc479e175404e64167b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 23 Apr 2018 16:58:18 +0300 Subject: [PATCH 09/52] IB/core: Fix to avoid deleting IPv6 look alike default GIDs When IPv6 link local address is removed, if it matches with the default GID, default GID(s)s gets removed which may not be a desired behavior. This behavior is introduced by refactor work in Fixes tag. When IPv6 link address is removed, removing its equivalent RoCEv2 GID which exactly matches with default RoCEv2 GID, is right thing to do. However achieving it correctly requires lot more changes, likely in roce_gid_mgmt.c and core/cache.c. This should be done as independent patch. Therefore, this patch preserves behavior of not deleteing default GIDs. This is done by providing explicit hint to consider default GID property using mask and default_gid; similar to add_gid(). Fixes: 598ff6bae68 ("IB/core: Refactor GID modify code for RoCE") Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 92ec845f9c40..8e6a4f05f3ea 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -424,8 +424,10 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, return ret; } -int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, - union ib_gid *gid, struct ib_gid_attr *attr) +static int +_ib_cache_gid_del(struct ib_device *ib_dev, u8 port, + union ib_gid *gid, struct ib_gid_attr *attr, + bool default_gid) { struct ib_gid_table *table; int ret = 0; @@ -435,9 +437,10 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); - ix = find_gid(table, gid, attr, false, + ix = find_gid(table, gid, attr, default_gid, GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE | + GID_ATTR_FIND_MASK_DEFAULT | GID_ATTR_FIND_MASK_NETDEV, NULL); if (ix < 0) { @@ -456,6 +459,12 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, return ret; } +int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, + union ib_gid *gid, struct ib_gid_attr *attr) +{ + return _ib_cache_gid_del(ib_dev, port, gid, attr, false); +} + int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, struct net_device *ndev) { @@ -756,7 +765,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, __ib_cache_gid_add(ib_dev, port, &gid, &gid_attr, mask, true); } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) { - ib_cache_gid_del(ib_dev, port, &gid, &gid_attr); + _ib_cache_gid_del(ib_dev, port, &gid, &gid_attr, true); } } } From dc5640f294e4ff6b89047cb4a0dfa931d5f0cd1f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 23 Apr 2018 16:58:19 +0300 Subject: [PATCH 10/52] IB/core: Fix deleting default GIDs when changing mac adddress Before [1], When MAC address of the netdevice is changed, default GID is supposed to get deleted and added back which affects the node and/or port GUID in below sequence. netdevice_event() -> NETDEV_CHANGEADDR default_del_cmd() del_netdev_default_ips() bond_delete_netdev_default_gids() ib_cache_gid_set_default_gid() ib_cache_gid_del() add_cmd() [..] However, ib_cache_gid_del() was not getting invoked in non bonding scenarios because event_ndev and rdma_ndev are same. Therefore, fix such condition to ignore checking upper device when event ndev and rdma_dev are same; similar to bond_set_netdev_default_gids(). Which this fix ib_cache_gid_del() is invoked correctly; however ib_cache_gid_del() doesn't find the default GID for deletion because find_gid() was given default_gid = false with GID_ATTR_FIND_MASK_DEFAULT set. But it was getting overwritten by ib_cache_gid_set_default_gid() later on as part of add_cmd(). Therefore, mac address change used to work for default GID. With refactor series [1], this incorrect behavior is detected. Therefore, when deleting default GID, set default_gid and set MASK flag. when deleting IP based GID, clear default_gid and set MASK flag. [1] https://patchwork.kernel.org/patch/10319151/ Fixes: 238fdf48f2b5 ("IB/core: Add RoCE table bonding support") Fixes: 598ff6bae689 ("IB/core: Refactor GID modify code for RoCE") Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 28 +++++++++++++------------ drivers/infiniband/core/roce_gid_mgmt.c | 28 +++++++++++++------------ 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 8e6a4f05f3ea..fb2d347f760f 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -427,7 +427,7 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, static int _ib_cache_gid_del(struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct ib_gid_attr *attr, - bool default_gid) + unsigned long mask, bool default_gid) { struct ib_gid_table *table; int ret = 0; @@ -437,12 +437,7 @@ _ib_cache_gid_del(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); - ix = find_gid(table, gid, attr, default_gid, - GID_ATTR_FIND_MASK_GID | - GID_ATTR_FIND_MASK_GID_TYPE | - GID_ATTR_FIND_MASK_DEFAULT | - GID_ATTR_FIND_MASK_NETDEV, - NULL); + ix = find_gid(table, gid, attr, default_gid, mask, NULL); if (ix < 0) { ret = -EINVAL; goto out_unlock; @@ -462,7 +457,12 @@ _ib_cache_gid_del(struct ib_device *ib_dev, u8 port, int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct ib_gid_attr *attr) { - return _ib_cache_gid_del(ib_dev, port, gid, attr, false); + unsigned long mask = GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_GID_TYPE | + GID_ATTR_FIND_MASK_DEFAULT | + GID_ATTR_FIND_MASK_NETDEV; + + return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false); } int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, @@ -741,7 +741,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, unsigned long gid_type_mask, enum ib_cache_gid_default_mode mode) { - union ib_gid gid; + union ib_gid gid = { }; struct ib_gid_attr gid_attr; struct ib_gid_table *table; unsigned int gid_type; @@ -749,7 +749,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; - make_default_gid(ndev, &gid); + mask = GID_ATTR_FIND_MASK_GID_TYPE | + GID_ATTR_FIND_MASK_DEFAULT | + GID_ATTR_FIND_MASK_NETDEV; memset(&gid_attr, 0, sizeof(gid_attr)); gid_attr.ndev = ndev; @@ -760,12 +762,12 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, gid_attr.gid_type = gid_type; if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { - mask = GID_ATTR_FIND_MASK_GID_TYPE | - GID_ATTR_FIND_MASK_DEFAULT; + make_default_gid(ndev, &gid); __ib_cache_gid_add(ib_dev, port, &gid, &gid_attr, mask, true); } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) { - _ib_cache_gid_del(ib_dev, port, &gid, &gid_attr, true); + _ib_cache_gid_del(ib_dev, port, &gid, + &gid_attr, mask, true); } } } diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index cc2966380c0c..c0e4fd55e2cc 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -255,6 +255,7 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, struct net_device *rdma_ndev) { struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev); + unsigned long gid_type_mask; if (!rdma_ndev) return; @@ -264,21 +265,22 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, rcu_read_lock(); - if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) && - is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) == - BONDING_SLAVE_STATE_INACTIVE) { - unsigned long gid_type_mask; - - rcu_read_unlock(); - - gid_type_mask = roce_gid_type_mask_support(ib_dev, port); - - ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, - gid_type_mask, - IB_CACHE_GID_DEFAULT_MODE_DELETE); - } else { + if (((rdma_ndev != event_ndev && + !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) || + is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) + == + BONDING_SLAVE_STATE_INACTIVE)) { rcu_read_unlock(); + return; } + + rcu_read_unlock(); + + gid_type_mask = roce_gid_type_mask_support(ib_dev, port); + + ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, + gid_type_mask, + IB_CACHE_GID_DEFAULT_MODE_DELETE); } static void enum_netdev_ipv4_ips(struct ib_device *ib_dev, From b4bd701ac469075d94ed9699a28755f2862252b9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 23 Apr 2018 17:01:52 +0300 Subject: [PATCH 11/52] RDMA/mlx5: Fix multiple NULL-ptr deref errors in rereg_mr flow Failure in rereg MR releases UMEM but leaves the MR to be destroyed by the user. As a result the following scenario may happen: "create MR -> rereg MR with failure -> call to rereg MR again" and hit "NULL-ptr deref or user memory access" errors. Ensure that rereg MR is only performed on a non-dead MR. Cc: syzkaller Cc: # 4.5 Fixes: 395a8e4c32ea ("IB/mlx5: Refactoring register MR code") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1520a2f20f98..90a9c461cedc 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -866,25 +866,28 @@ static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, int *order) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem *u; int err; - *umem = ib_umem_get(pd->uobject->context, start, length, - access_flags, 0); - err = PTR_ERR_OR_ZERO(*umem); + *umem = NULL; + + u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); + err = PTR_ERR_OR_ZERO(u); if (err) { - *umem = NULL; - mlx5_ib_err(dev, "umem get failed (%d)\n", err); + mlx5_ib_dbg(dev, "umem get failed (%d)\n", err); return err; } - mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, + mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, page_shift, ncont, order); if (!*npages) { mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(*umem); + ib_umem_release(u); return -EINVAL; } + *umem = u; + mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", *npages, *ncont, *order, *page_shift); @@ -1458,13 +1461,12 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, int access_flags = flags & IB_MR_REREG_ACCESS ? new_access_flags : mr->access_flags; - u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; - u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; int page_shift = 0; int upd_flags = 0; int npages = 0; int ncont = 0; int order = 0; + u64 addr, len; int err; mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", @@ -1472,6 +1474,17 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + if (!mr->umem) + return -EINVAL; + + if (flags & IB_MR_REREG_TRANS) { + addr = virt_addr; + len = length; + } else { + addr = mr->umem->address; + len = mr->umem->length; + } + if (flags != IB_MR_REREG_PD) { /* * Replace umem. This needs to be done whether or not UMR is @@ -1479,6 +1492,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, */ flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); + mr->umem = NULL; err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, &npages, &page_shift, &ncont, &order); if (err) From 002bf2282b2d7318e444dca9ffcb994afc5d5f15 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 23 Apr 2018 17:01:53 +0300 Subject: [PATCH 12/52] RDMA/mlx5: Protect from shift operand overflow Ensure that user didn't supply values too large that can cause overflow. UBSAN: Undefined behaviour in drivers/infiniband/hw/mlx5/qp.c:263:23 shift exponent -2147483648 is negative CPU: 0 PID: 292 Comm: syzkaller612609 Not tainted 4.16.0-rc1+ #131 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xde/0x164 ubsan_epilogue+0xe/0x81 set_rq_size+0x7c2/0xa90 create_qp_common+0xc18/0x43c0 mlx5_ib_create_qp+0x379/0x1ca0 create_qp.isra.5+0xc94/0x2260 ib_uverbs_create_qp+0x21b/0x2a0 ib_uverbs_write+0xc2c/0x1010 vfs_write+0x1b0/0x550 SyS_write+0xc7/0x1a0 do_syscall_64+0x1aa/0x740 entry_SYSCALL_64_after_hwframe+0x26/0x9b RIP: 0033:0x433569 RSP: 002b:00007ffc6e62f448 EFLAGS: 00000217 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00000000004002f8 RCX: 0000000000433569 RDX: 0000000000000070 RSI: 00000000200042c0 RDI: 0000000000000003 RBP: 00000000006d5018 R08: 00000000004002f8 R09: 00000000004002f8 R10: 00000000004002f8 R11: 0000000000000217 R12: 0000000000000000 R13: 000000000040c9f0 R14: 000000000040ca80 R15: 0000000000000006 Cc: # 3.10 Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Cc: syzkaller Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7ed4b70f6447..e6219a5f1f37 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -259,7 +259,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, } else { if (ucmd) { qp->rq.wqe_cnt = ucmd->rq_wqe_count; + if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift)) + return -EINVAL; qp->rq.wqe_shift = ucmd->rq_wqe_shift; + if ((1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) < qp->wq_sig) + return -EINVAL; qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; qp->rq.max_post = qp->rq.wqe_cnt; } else { From 4f32ac2e452c2180cd2df581cbadac183e27ecd0 Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Mon, 23 Apr 2018 17:01:54 +0300 Subject: [PATCH 13/52] IB/mlx5: Use unlimited rate when static rate is not supported Before the change, if the user passed a static rate value different than zero and the FW doesn't support static rate, it would end up configuring rate of 2.5 GBps. Fix this by using rate 0; unlimited, in cases where FW doesn't support static rate configuration. Cc: # 3.10 Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Reviewed-by: Majd Dibbiny Signed-off-by: Danit Goldberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index e6219a5f1f37..87b7c1be2a11 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2455,18 +2455,18 @@ enum { static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { - if (rate == IB_RATE_PORT_CURRENT) { + if (rate == IB_RATE_PORT_CURRENT) return 0; - } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { - return -EINVAL; - } else { - while (rate != IB_RATE_2_5_GBPS && - !(1 << (rate + MLX5_STAT_RATE_OFFSET) & - MLX5_CAP_GEN(dev->mdev, stat_rate_support))) - --rate; - } - return rate + MLX5_STAT_RATE_OFFSET; + if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) + return -EINVAL; + + while (rate != IB_RATE_PORT_CURRENT && + !(1 << (rate + MLX5_STAT_RATE_OFFSET) & + MLX5_CAP_GEN(dev->mdev, stat_rate_support))) + --rate; + + return rate ? rate + MLX5_STAT_RATE_OFFSET : rate; } static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, From 84a6a7a99c0ac2f67366288c0625c9fba176b264 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 23 Apr 2018 17:01:55 +0300 Subject: [PATCH 14/52] IB/mlx5: Fix represent correct netdevice in dual port RoCE In commit bcf87f1dbbec ("IB/mlx5: Listen to netdev register/unresiter events in switchdev mode") incorrectly mapped primary device's netdevice to 2nd port netdevice. It always represented primary port's netdevice for 2nd port netdevice when ib representors were not used. This results into failing to process CM request arriving on 2nd port due to incorrect mapping of netdevice. This fix corrects it by considering the right mdev. Cc: # 4.16 Fixes: bcf87f1dbbec ("IB/mlx5: Listen to netdev register/unresiter events in switchdev mode") Reviewed-by: Mark Bloch Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6a749c02b14c..78a4b2797057 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -179,7 +179,7 @@ static int mlx5_netdev_event(struct notifier_block *this, if (rep_ndev == ndev) roce->netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; - } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) { + } else if (ndev->dev.parent == &mdev->pdev->dev) { roce->netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; } From 444261ca6ff201fa03de97a5041237e67a9d8d31 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 23 Apr 2018 17:01:56 +0300 Subject: [PATCH 15/52] RDMA/mlx5: Properly check return value of mlx5_get_uars_page Starting from commit 72f36be06138 ("net/mlx5: Fix mlx5_get_uars_page to return error code") the mlx5_get_uars_page() call returns error in case of failure, but it was mistakenly overlooked in the merge commit. Fixes: e7996a9a77fc ("Merge tag v4.15 of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git") Reported-by: Alaa Hleihel Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 78a4b2797057..a42c6b1cdb5a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5426,9 +5426,7 @@ static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev) static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev) { dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); - if (!dev->mdev->priv.uar) - return -ENOMEM; - return 0; + return PTR_ERR_OR_ZERO(dev->mdev->priv.uar); } static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev) From 3af7a156bdc356946098e13180be66b6420619bf Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 26 Apr 2018 11:19:30 -0700 Subject: [PATCH 16/52] nvme: depend on INFINIBAND_ADDR_TRANS NVME_RDMA code depends on INFINIBAND_ADDR_TRANS provided symbols. So declare the kconfig dependency. This is necessary to allow for enabling INFINIBAND without INFINIBAND_ADDR_TRANS. Signed-off-by: Greg Thelen Cc: Tarick Bedeir Signed-off-by: Doug Ledford --- drivers/nvme/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index b979cf3bce65..88a8b5916624 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -27,7 +27,7 @@ config NVME_FABRICS config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" - depends on INFINIBAND && BLOCK + depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK select NVME_CORE select NVME_FABRICS select SG_POOL From d6fc6a22fc7d3df987666725496ed5dd2dd30f23 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 26 Apr 2018 11:19:31 -0700 Subject: [PATCH 17/52] nvmet-rdma: depend on INFINIBAND_ADDR_TRANS NVME_TARGET_RDMA code depends on INFINIBAND_ADDR_TRANS provided symbols. So declare the kconfig dependency. This is necessary to allow for enabling INFINIBAND without INFINIBAND_ADDR_TRANS. Signed-off-by: Greg Thelen Cc: Tarick Bedeir Signed-off-by: Doug Ledford --- drivers/nvme/target/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 5f4f8b16685f..3c7b61ddb0d1 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -27,7 +27,7 @@ config NVME_TARGET_LOOP config NVME_TARGET_RDMA tristate "NVMe over Fabrics RDMA target support" - depends on INFINIBAND + depends on INFINIBAND && INFINIBAND_ADDR_TRANS depends on NVME_TARGET select SGL_ALLOC help From 346a47b65d10e450778ec0d21e4a9409f25daaa8 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 26 Apr 2018 11:19:32 -0700 Subject: [PATCH 18/52] ib_srpt: depend on INFINIBAND_ADDR_TRANS INFINIBAND_SRPT code depends on INFINIBAND_ADDR_TRANS provided symbols. So declare the kconfig dependency. This is necessary to allow for enabling INFINIBAND without INFINIBAND_ADDR_TRANS. Signed-off-by: Greg Thelen Cc: Tarick Bedeir Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srpt/Kconfig b/drivers/infiniband/ulp/srpt/Kconfig index 31ee83d528d9..fb8b7182f05e 100644 --- a/drivers/infiniband/ulp/srpt/Kconfig +++ b/drivers/infiniband/ulp/srpt/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_SRPT tristate "InfiniBand SCSI RDMA Protocol target support" - depends on INFINIBAND && TARGET_CORE + depends on INFINIBAND && INFINIBAND_ADDR_TRANS && TARGET_CORE ---help--- Support for the SCSI RDMA Protocol (SRP) Target driver. The From 3c6b03d18df657d677808d7090b4d03bc6026efd Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 26 Apr 2018 11:19:33 -0700 Subject: [PATCH 19/52] cifs: smbd: depend on INFINIBAND_ADDR_TRANS CIFS_SMB_DIRECT code depends on INFINIBAND_ADDR_TRANS provided symbols. So declare the kconfig dependency. This is necessary to allow for enabling INFINIBAND without INFINIBAND_ADDR_TRANS. Signed-off-by: Greg Thelen Cc: Tarick Bedeir Reviewed-by: Long Li Signed-off-by: Doug Ledford --- fs/cifs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 741749a98614..5f132d59dfc2 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -197,7 +197,7 @@ config CIFS_SMB311 config CIFS_SMB_DIRECT bool "SMB Direct support (Experimental)" - depends on CIFS=m && INFINIBAND || CIFS=y && INFINIBAND=y + depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y help Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1. SMB Direct allows transferring SMB packets over RDMA. If unsure, From 5a3bc8a4abbd2d553430218d3a320400dce811b7 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 26 Apr 2018 11:19:34 -0700 Subject: [PATCH 20/52] ib_srp: depend on INFINIBAND_ADDR_TRANS INFINIBAND_SRP code depends on INFINIBAND_ADDR_TRANS provided symbols. So declare the kconfig dependency. This is necessary to allow for enabling INFINIBAND without INFINIBAND_ADDR_TRANS. Signed-off-by: Greg Thelen Cc: Tarick Bedeir Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srp/Kconfig b/drivers/infiniband/ulp/srp/Kconfig index c74ee9633041..99db8fe5173a 100644 --- a/drivers/infiniband/ulp/srp/Kconfig +++ b/drivers/infiniband/ulp/srp/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_SRP tristate "InfiniBand SCSI RDMA Protocol" - depends on SCSI + depends on SCSI && INFINIBAND_ADDR_TRANS select SCSI_SRP_ATTRS ---help--- Support for the SCSI RDMA Protocol over InfiniBand. This From f7cb7b85be55a4906b4b4b30596db1043dae6335 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 26 Apr 2018 11:19:35 -0700 Subject: [PATCH 21/52] IB: make INFINIBAND_ADDR_TRANS configurable Allow INFINIBAND without INFINIBAND_ADDR_TRANS because fuzzing has been finding fair number of CM bugs. So provide option to disable it. Signed-off-by: Greg Thelen Cc: Tarick Bedeir Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/Kconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index ee270e065ba9..2a972ed6851b 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -61,9 +61,12 @@ config INFINIBAND_ON_DEMAND_PAGING pages on demand instead. config INFINIBAND_ADDR_TRANS - bool + bool "RDMA/CM" depends on INFINIBAND default y + ---help--- + Support for RDMA communication manager (CM). + This allows for a generic connection abstraction over RDMA. config INFINIBAND_ADDR_TRANS_CONFIGFS bool From 26bff1bd74a4f7417509a83295614e9dab995b2a Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 23 Apr 2018 21:42:37 +0530 Subject: [PATCH 22/52] RDMA/cxgb4: release hw resources on device removal The c4iw_rdev_close() logic was not releasing all the hw resources (PBL and RQT memory) during the device removal event (driver unload / system reboot). This can cause panic in gen_pool_destroy(). The module remove function will wait for all the hw resources to be released during the device removal event. Fixes c12a67fe(iw_cxgb4: free EQ queue memory on last deref) Signed-off-by: Raju Rangoju Reviewed-by: Steve Wise Cc: stable@vger.kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/device.c | 9 ++++++++- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 4 ++++ drivers/infiniband/hw/cxgb4/resource.c | 26 ++++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index feeb8ee6f4a2..44161ca4d2a8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -875,6 +875,11 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->status_page->db_off = 0; + init_completion(&rdev->rqt_compl); + init_completion(&rdev->pbl_compl); + kref_init(&rdev->rqt_kref); + kref_init(&rdev->pbl_kref); + return 0; err_free_status_page_and_wr_log: if (c4iw_wr_log && rdev->wr_log) @@ -893,13 +898,15 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) static void c4iw_rdev_close(struct c4iw_rdev *rdev) { - destroy_workqueue(rdev->free_workq); kfree(rdev->wr_log); c4iw_release_dev_ucontext(rdev, &rdev->uctx); free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); c4iw_rqtpool_destroy(rdev); + wait_for_completion(&rdev->pbl_compl); + wait_for_completion(&rdev->rqt_compl); c4iw_ocqp_pool_destroy(rdev); + destroy_workqueue(rdev->free_workq); c4iw_destroy_resource(&rdev->resource); } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index cc929002c05e..a60def23e9ef 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -185,6 +185,10 @@ struct c4iw_rdev { struct wr_log_entry *wr_log; int wr_log_size; struct workqueue_struct *free_workq; + struct completion rqt_compl; + struct completion pbl_compl; + struct kref rqt_kref; + struct kref pbl_kref; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 3cf25997ed2b..0ef25ae05e6f 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -260,12 +260,22 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT); if (rdev->stats.pbl.cur > rdev->stats.pbl.max) rdev->stats.pbl.max = rdev->stats.pbl.cur; + kref_get(&rdev->pbl_kref); } else rdev->stats.pbl.fail++; mutex_unlock(&rdev->stats.lock); return (u32)addr; } +static void destroy_pblpool(struct kref *kref) +{ + struct c4iw_rdev *rdev; + + rdev = container_of(kref, struct c4iw_rdev, pbl_kref); + gen_pool_destroy(rdev->pbl_pool); + complete(&rdev->pbl_compl); +} + void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { pr_debug("addr 0x%x size %d\n", addr, size); @@ -273,6 +283,7 @@ void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT); mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size); + kref_put(&rdev->pbl_kref, destroy_pblpool); } int c4iw_pblpool_create(struct c4iw_rdev *rdev) @@ -310,7 +321,7 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev) void c4iw_pblpool_destroy(struct c4iw_rdev *rdev) { - gen_pool_destroy(rdev->pbl_pool); + kref_put(&rdev->pbl_kref, destroy_pblpool); } /* @@ -331,12 +342,22 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT); if (rdev->stats.rqt.cur > rdev->stats.rqt.max) rdev->stats.rqt.max = rdev->stats.rqt.cur; + kref_get(&rdev->rqt_kref); } else rdev->stats.rqt.fail++; mutex_unlock(&rdev->stats.lock); return (u32)addr; } +static void destroy_rqtpool(struct kref *kref) +{ + struct c4iw_rdev *rdev; + + rdev = container_of(kref, struct c4iw_rdev, rqt_kref); + gen_pool_destroy(rdev->rqt_pool); + complete(&rdev->rqt_compl); +} + void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { pr_debug("addr 0x%x size %d\n", addr, size << 6); @@ -344,6 +365,7 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT); mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6); + kref_put(&rdev->rqt_kref, destroy_rqtpool); } int c4iw_rqtpool_create(struct c4iw_rdev *rdev) @@ -380,7 +402,7 @@ int c4iw_rqtpool_create(struct c4iw_rdev *rdev) void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) { - gen_pool_destroy(rdev->rqt_pool); + kref_put(&rdev->rqt_kref, destroy_rqtpool); } /* From f604db645a66b7ba4f21c426fe73253928dada41 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 24 Apr 2018 08:15:20 +0000 Subject: [PATCH 23/52] IB/uverbs: Fix validating mandatory attributes Previously, if a method contained mandatory attributes in a namespace that wasn't given by the user, these attributes weren't validated. Fixing this by iterating over all specification namespaces. Fixes: fac9658cabb9 ("IB/core: Add new ioctl interface") Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_ioctl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 8c93970dc8f1..8d32c4ae368c 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -234,6 +234,15 @@ static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *met return -EINVAL; } + for (; i < method_spec->num_buckets; i++) { + struct uverbs_attr_spec_hash *attr_spec_bucket = + method_spec->attr_buckets[i]; + + if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask, + attr_spec_bucket->num_attrs)) + return -EINVAL; + } + return 0; } From 2918c1a900252b4a0c730715ec205437c7daf79d Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 24 Apr 2018 20:13:45 +0300 Subject: [PATCH 24/52] RDMA/cma: Fix use after destroy access to net namespace for IPoIB There are few issues with validation of netdevice and listen id lookup for IB (IPoIB) while processing incoming CM request as below. 1. While performing lookup of bind_list in cma_ps_find(), net namespace of the netdevice can get deleted in cma_exit_net(), resulting in use after free access of idr and/or net namespace structures. This lookup occurs from the workqueue context (and not userspace context where net namespace is always valid). CPU0 CPU1 ==== ==== bind_list = cma_ps_find(); move netdevice to new namespace delete net namespace cma_exit_net() idr_destroy(idr); [..] cma_find_listener(bind_list, ..); 2. While netdevice is validated for IP address in given net namespace, netdevice's net namespace and/or ifindex can change in cma_get_net_dev() and cma_match_net_dev(). Above issues are overcome by using rcu lock along with netdevice UP/DOWN state as described below. When a net namespace is getting deleted, netdevice is closed and shutdown before moving it back to init_net namespace. change_net_namespace() synchronizes with any existing use of netdevice before changing the netdev properties such as net or ifindex. Once netdevice IFF_UP flags is cleared, such fields are not guaranteed to be valid. Therefore, rcu lock along with netdevice state check ensures that, while route lookup and cm_id lookup is in progress, netdevice of interest won't migrate to any other net namespace. This ensures that associated net namespace of netdevice won't get deleted while rcu lock is held for netdevice which is in IFF_UP state. Fixes: fa20105e09e9 ("IB/cma: Add support for network namespaces") Fixes: 4be74b42a6d0 ("IB/cma: Separate port allocation to network namespaces") Fixes: f887f2ac87c2 ("IB/cma: Validate routing of incoming requests") Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 53 ++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 51a641002e10..8364223422d0 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -382,6 +382,8 @@ struct cma_hdr { #define CMA_VERSION 0x00 struct cma_req_info { + struct sockaddr_storage listen_addr_storage; + struct sockaddr_storage src_addr_storage; struct ib_device *device; int port; union ib_gid local_gid; @@ -1340,11 +1342,11 @@ static bool validate_net_dev(struct net_device *net_dev, } static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, - const struct cma_req_info *req) + struct cma_req_info *req) { - struct sockaddr_storage listen_addr_storage, src_addr_storage; - struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, - *src_addr = (struct sockaddr *)&src_addr_storage; + struct sockaddr *listen_addr = + (struct sockaddr *)&req->listen_addr_storage; + struct sockaddr *src_addr = (struct sockaddr *)&req->src_addr_storage; struct net_device *net_dev; const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; int err; @@ -1359,11 +1361,6 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, if (!net_dev) return ERR_PTR(-ENODEV); - if (!validate_net_dev(net_dev, listen_addr, src_addr)) { - dev_put(net_dev); - return ERR_PTR(-EHOSTUNREACH); - } - return net_dev; } @@ -1490,15 +1487,51 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, } } + /* + * Net namespace might be getting deleted while route lookup, + * cm_id lookup is in progress. Therefore, perform netdevice + * validation, cm_id lookup under rcu lock. + * RCU lock along with netdevice state check, synchronizes with + * netdevice migrating to different net namespace and also avoids + * case where net namespace doesn't get deleted while lookup is in + * progress. + * If the device state is not IFF_UP, its properties such as ifindex + * and nd_net cannot be trusted to remain valid without rcu lock. + * net/core/dev.c change_net_namespace() ensures to synchronize with + * ongoing operations on net device after device is closed using + * synchronize_net(). + */ + rcu_read_lock(); + if (*net_dev) { + /* + * If netdevice is down, it is likely that it is administratively + * down or it might be migrating to different namespace. + * In that case avoid further processing, as the net namespace + * or ifindex may change. + */ + if (((*net_dev)->flags & IFF_UP) == 0) { + id_priv = ERR_PTR(-EHOSTUNREACH); + goto err; + } + + if (!validate_net_dev(*net_dev, + (struct sockaddr *)&req.listen_addr_storage, + (struct sockaddr *)&req.src_addr_storage)) { + id_priv = ERR_PTR(-EHOSTUNREACH); + goto err; + } + } + bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, rdma_ps_from_service_id(req.service_id), cma_port_from_service_id(req.service_id)); id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); +err: + rcu_read_unlock(); if (IS_ERR(id_priv) && *net_dev) { dev_put(*net_dev); *net_dev = NULL; } - return id_priv; } From c192a12ce82ca0a951bd5449a21d53c5f3f88697 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 24 Apr 2018 15:15:45 +0200 Subject: [PATCH 25/52] IB/nes: fix nes_netdev_start_xmit()'s return type The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, but the implementation in this driver returns an 'int'. Fix this by returning 'netdev_tx_t' in this driver too. Signed-off-by: Luc Van Oostenryck Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_nic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 0a75164cedea..007d5e8a0121 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -461,7 +461,7 @@ static bool nes_nic_send(struct sk_buff *skb, struct net_device *netdev) /** * nes_netdev_start_xmit */ -static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; From 47a3968a985e1686f41a55b4099fd1b5e16a5969 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 24 Apr 2018 15:15:47 +0200 Subject: [PATCH 26/52] IB/ipoib: fix ipoib_start_xmit()'s return type The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, but the implementation in this driver returns an 'int'. Fix this by returning 'netdev_tx_t' in this driver too. Signed-off-by: Luc Van Oostenryck Reviewed-by: Yuval Shaia Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 161ba8c76285..cf291f90b58f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1094,7 +1094,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, spin_unlock_irqrestore(&priv->lock, flags); } -static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct rdma_netdev *rn = netdev_priv(dev); From f96416cea7bce9afe619c15e87fced70f93f9098 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 25 Apr 2018 17:24:04 +0100 Subject: [PATCH 27/52] RDMA/iwpm: fix memory leak on map_info In the cases where iwpm_hash_bucket is NULL and where function get_mapinfo_hash_bucket returns NULL then the map_info is never added to hash_bucket_head and hence there is a leak of map_info. Fix this by nullifying hash_bucket_head and if that is null we know that that map_info was not added to hash_bucket_head and hence map_info should be free'd. Detected by CoverityScan, CID#1222481 ("Resource Leak") Fixes: 30dc5e63d6a5 ("RDMA/core: Add support for iWARP Port Mapper user space service") Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwpm_util.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 9821ae900f6d..da12da1c36f6 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -114,7 +114,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, struct sockaddr_storage *mapped_sockaddr, u8 nl_client) { - struct hlist_head *hash_bucket_head; + struct hlist_head *hash_bucket_head = NULL; struct iwpm_mapping_info *map_info; unsigned long flags; int ret = -EINVAL; @@ -142,6 +142,9 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, } } spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); + + if (!hash_bucket_head) + kfree(map_info); return ret; } From 2da36d44a9d54a2c6e1f8da1f7ccc26b0bc6cfec Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Thu, 26 Apr 2018 11:52:39 +0800 Subject: [PATCH 28/52] IB/rxe: add RXE_START_MASK for rxe_opcode IB_OPCODE_RC_SEND_ONLY_INV w/o RXE_START_MASK, the last_psn of IB_OPCODE_RC_SEND_ONLY_INV will not be updated in update_wqe_psn, and the corresponding wqe will not be acked in rxe_completer due to its last_psn is zero. Finally, the other wqe will also not be able to be acked, because the wqe of IB_OPCODE_RC_SEND_ONLY_INV with last_psn 0 is still there. This causes large amount of io timeout when nvmeof is over rxe. Add RXE_START_MASK for IB_OPCODE_RC_SEND_ONLY_INV to fix this. Signed-off-by: Jianchao Wang Reviewed-by: Zhu Yanjun Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_opcode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index 61927c165b59..4cf11063e0b5 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -390,7 +390,7 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { .name = "IB_OPCODE_RC_SEND_ONLY_INV", .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_END_MASK, + | RXE_END_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_IETH_BYTES, .offset = { [RXE_BTH] = 0, From 9fd4350ba8953804f05215999e11a6cfb7b41f2b Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 26 Apr 2018 00:41:10 -0400 Subject: [PATCH 29/52] IB/rxe: avoid double kfree_skb When skb is sent, it will pass the following functions in soft roce. rxe_send [rdma_rxe] ip_local_out __ip_local_out ip_output ip_finish_output ip_finish_output2 dev_queue_xmit __dev_queue_xmit dev_hard_start_xmit In the above functions, if error occurs in the above functions or iptables rules drop skb after ip_local_out, kfree_skb will be called. So it is not necessary to call kfree_skb in soft roce module again. Or else crash will occur. The steps to reproduce: server client --------- --------- |1.1.1.1|<----rxe-channel--->|1.1.1.2| --------- --------- On server: rping -s -a 1.1.1.1 -v -C 10000 -S 512 On client: rping -c -a 1.1.1.1 -v -C 10000 -S 512 The kernel configs CONFIG_DEBUG_KMEMLEAK and CONFIG_DEBUG_OBJECTS are enabled on both server and client. When rping runs, run the following command in server: iptables -I OUTPUT -p udp --dport 4791 -j DROP Without this patch, crash will occur. CC: Srinivas Eeda CC: Junxiao Bi Signed-off-by: Zhu Yanjun Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_req.c | 1 - drivers/infiniband/sw/rxe/rxe_resp.c | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 7bdaf71b8221..785199990457 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -728,7 +728,6 @@ int rxe_requester(void *arg) rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { - kfree_skb(skb); rxe_run_task(&qp->req.task, 1); goto exit; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index a65c9969f7fc..955ff3b6da9c 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -742,7 +742,6 @@ static enum resp_states read_reply(struct rxe_qp *qp, err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); if (err) { pr_err("Failed sending RDMA reply.\n"); - kfree_skb(skb); return RESPST_ERR_RNR; } @@ -954,10 +953,8 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, } err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); - if (err) { + if (err) pr_err_ratelimited("Failed sending ack\n"); - kfree_skb(skb); - } err1: return err; @@ -1141,7 +1138,6 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, if (rc) { pr_err("Failed resending result. This flow is not handled - skb ignored\n"); rxe_drop_ref(qp); - kfree_skb(skb_copy); rc = RESPST_CLEANUP; goto out; } From 215a8c09e5e2aa6ae1fbcef87f8f27d65d5d1ca4 Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 26 Apr 2018 14:46:15 +0800 Subject: [PATCH 30/52] RDMA/hns: Bugfix for init hem table During init hem table, type should be used instead of table->type which is finally initializaed with type. Signed-off-by: Lijun Ou Signed-off-by: Yixian Liu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 0eeabfbee192..0d8c113083ad 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -912,7 +912,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, obj_per_chunk = buf_chunk_size / obj_size; num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; bt_chunk_num = bt_chunk_size / 8; - if (table->type >= HEM_TYPE_MTT) + if (type >= HEM_TYPE_MTT) num_bt_l0 = bt_chunk_num; table->hem = kcalloc(num_hem, sizeof(*table->hem), @@ -920,7 +920,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, if (!table->hem) goto err_kcalloc_hem_buf; - if (check_whether_bt_num_3(table->type, hop_num)) { + if (check_whether_bt_num_3(type, hop_num)) { unsigned long num_bt_l1; num_bt_l1 = (num_hem + bt_chunk_num - 1) / @@ -939,8 +939,8 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, goto err_kcalloc_l1_dma; } - if (check_whether_bt_num_2(table->type, hop_num) || - check_whether_bt_num_3(table->type, hop_num)) { + if (check_whether_bt_num_2(type, hop_num) || + check_whether_bt_num_3(type, hop_num)) { table->bt_l0 = kcalloc(num_bt_l0, sizeof(*table->bt_l0), GFP_KERNEL); if (!table->bt_l0) From 328d405b3d4c8dd1f06bfd77f498e23281ae348c Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 26 Apr 2018 14:46:16 +0800 Subject: [PATCH 31/52] RDMA/hns: Intercept illegal RDMA operation when use inline data RDMA read operation is not supported inline data. If user cofigures issue a RDMA read and use inline data, it will happen a hardware error. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8b84ab7800d8..aa5f9b3e1c10 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -71,6 +71,11 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, return -EINVAL; } + if (wr->opcode == IB_WR_RDMA_READ) { + dev_err(hr_dev->dev, "Not support inline data!\n"); + return -EINVAL; + } + for (i = 0; i < wr->num_sge; i++) { memcpy(wqe, ((void *)wr->sg_list[i].addr), wr->sg_list[i].length); From 6e1a70943cecdca9bb13b601b1a9772a7bdcc2c3 Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 26 Apr 2018 14:46:17 +0800 Subject: [PATCH 32/52] RDMA/hns: Fix the qp context state diagram According to RoCE protocol, it is possible to transition from error to error state for modifying qp in hip08. This patch fix it. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index aa5f9b3e1c10..a4eea701d19d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3166,7 +3166,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR)) { + (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) { /* Nothing */ ; } else { From 6852af86627c7bd8de11c9ad3eb5cca7d99e5884 Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 26 Apr 2018 14:46:18 +0800 Subject: [PATCH 33/52] RDMA/hns: Only assign mtu if IB_QP_PATH_MTU bit is set Only when the IB_QP_PATH_MTU flag of attr_mask is set it is valid to assign the mtu field of qp context when qp type is not GSI and UD. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a4eea701d19d..9a3148103a5f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2850,7 +2850,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, IB_MTU_4096); - else + else if (attr_mask & IB_QP_PATH_MTU) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, attr->path_mtu); From 734f38638d7b91c173ddfe46ceadb0382fae9b1f Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 26 Apr 2018 14:46:19 +0800 Subject: [PATCH 34/52] RDMA/hns: Remove some unnecessary attr_mask judgement This patch deletes some unnecessary attr_mask if condition in hip08 according to the IB protocol. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 9a3148103a5f..97850f41a633 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2655,8 +2655,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, return -EINVAL; } - if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || - (attr_mask & IB_QP_PKEY_INDEX) || (attr_mask & IB_QP_QKEY)) { + if (attr_mask & IB_QP_ALT_PATH) { dev_err(dev, "INIT2RTR attr_mask (0x%x) error\n", attr_mask); return -EINVAL; } @@ -2927,11 +2926,9 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return -EINVAL; } - /* If exist optional param, return error */ - if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || - (attr_mask & IB_QP_QKEY) || (attr_mask & IB_QP_PATH_MIG_STATE) || - (attr_mask & IB_QP_CUR_STATE) || - (attr_mask & IB_QP_MIN_RNR_TIMER)) { + /* Not support alternate path and path migration */ + if ((attr_mask & IB_QP_ALT_PATH) || + (attr_mask & IB_QP_PATH_MIG_STATE)) { dev_err(dev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask); return -EINVAL; } From b6dd9b34838506f5307850d8d320d7b1ac6761d3 Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 26 Apr 2018 14:46:20 +0800 Subject: [PATCH 35/52] RDMA/hns: Only assign dqpn if IB_QP_PATH_DEST_QPN bit is set Only when the IB_QP_PATH_DEST_QPN flag of attr_mask is set is it valid to assign the dqpn field of qp context Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 97850f41a633..51ec60a9cfd5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2597,10 +2597,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, V2_QPC_BYTE_4_SQPN_S, 0); - roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn); - roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, 0); + if (attr_mask & IB_QP_DEST_QPN) { + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn); + roce_set_field(qpc_mask->byte_56_dqpn_err, + V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); + } roce_set_field(context->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, @@ -2804,10 +2806,12 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_140_RR_MAX_S, 0); } - roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); - roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, 0); + if (attr_mask & IB_QP_DEST_QPN) { + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); + roce_set_field(qpc_mask->byte_56_dqpn_err, + V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); + } /* Configure GID index */ port_num = rdma_ah_get_port_num(&attr->ah_attr); From ae25db00285bf664fbd78222491b29aac6242902 Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 26 Apr 2018 14:46:21 +0800 Subject: [PATCH 36/52] RDMA/hns: Adjust the order of cleanup hem table This patch update the order of cleaning hem table for trrl_table and irrl_table as well as mtt_cqe_table and mtt_table. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 0d8c113083ad..63b5b3edabcb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1039,14 +1039,14 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); - hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.trrl_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); - hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_cqe_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); } From 634f63902277700d0a02eaa101cc685958c35aa4 Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 26 Apr 2018 14:46:22 +0800 Subject: [PATCH 37/52] RDMA/hns: Update assignment method for owner field of send wqe When posting a work reqeust, it need to update the owner bit of send wqe. This patch mainly fix the bug when posting multiply work request. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 51ec60a9cfd5..8ea5470303ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -187,7 +187,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; - owner_bit = ~(qp->sq.head >> ilog2(qp->sq.wqe_cnt)) & 0x1; + owner_bit = + ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { From 137ae3208416278aabef3b71e0ea1052940ca362 Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 26 Apr 2018 14:46:23 +0800 Subject: [PATCH 38/52] RDMA/hns: Submit bad wr When generated bad work reqeust, it needs to report to user. This patch mainly fixes it. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8ea5470303ee..38a0b6b3446e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -153,7 +153,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_UD)) { dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type); - *bad_wr = NULL; + *bad_wr = wr; return -EOPNOTSUPP; } @@ -462,6 +462,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } else { dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); spin_unlock_irqrestore(&qp->sq.lock, flags); + *bad_wr = wr; return -EOPNOTSUPP; } } From ab17884903f97054c719a3c68017513b922efe43 Mon Sep 17 00:00:00 2001 From: oulijun Date: Thu, 26 Apr 2018 14:46:24 +0800 Subject: [PATCH 39/52] RDMA/hns: Fix a couple misspellings This patch fixes two spelling errors. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 38a0b6b3446e..25916e8522ed 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4487,7 +4487,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0, eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS); if (ret) { - dev_err(dev, "[mailbox cmd] creat eqc failed.\n"); + dev_err(dev, "[mailbox cmd] create eqc failed.\n"); goto err_cmd_mbox; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e289a924e789..d4aad34c21e2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -620,7 +620,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, to_hr_ucontext(ib_pd->uobject->context), ucmd.db_addr, &hr_qp->rdb); if (ret) { - dev_err(dev, "rp record doorbell map failed!\n"); + dev_err(dev, "rq record doorbell map failed!\n"); goto err_mtt; } } From 4f9ca2d8686ecfdd40ca4f0294a3d94f83f05cea Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 26 Apr 2018 15:37:48 +0300 Subject: [PATCH 40/52] RDMA/mlx4: Add missed RSS hash inner header flag Despite being advertised to user space application, the RSS inner header flag was filtered by checks at the beginning of QP creation routine. Cc: # 4.15 Fixes: 4d02ebd9bbbd ("IB/mlx4: Fix RSS hash fields restrictions") Fixes: 07d84f7b6adf ("IB/mlx4: Add support to RSS hash for inner headers") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 50af8915e7ec..199648adac74 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -673,7 +673,8 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx, MLX4_IB_RX_HASH_SRC_PORT_TCP | MLX4_IB_RX_HASH_DST_PORT_TCP | MLX4_IB_RX_HASH_SRC_PORT_UDP | - MLX4_IB_RX_HASH_DST_PORT_UDP)) { + MLX4_IB_RX_HASH_DST_PORT_UDP | + MLX4_IB_RX_HASH_INNER)) { pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n", ucmd->rx_hash_fields_mask); return (-EOPNOTSUPP); From 5ccbf63f87a39c279729670d2e31166844a335c6 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 26 Apr 2018 15:42:54 +0300 Subject: [PATCH 41/52] IB/uverbs: Prevent reregistration of DM_MR to regular MR This patch adds a check in the ib_uverbs_rereg_mr flow to make sure there's no attempt to rereg a device memory MR to regular MR. In such case the command will fail with -EINVAL status. fixes: be934cca9e98 ("IB/uverbs: Add device memory registration ioctl support") Signed-off-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 13cb5e4deb86..1837924415c9 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -765,6 +765,11 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, mr = uobj->object; + if (mr->dm) { + ret = -EINVAL; + goto put_uobjs; + } + if (cmd.flags & IB_MR_REREG_ACCESS) { ret = ib_check_mr_access(cmd.access_flags); if (ret) From 54e7e48b13c85d9a730b989fe7dc5250199a4f81 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 26 Apr 2018 15:42:55 +0300 Subject: [PATCH 42/52] IB/uverbs: Fix kernel crash during MR deregistration flow This patch fixes a crash that happens due to access to an uninitialized DM pointer within the MR object. The change makes sure the DM pointer in the MR object is set to NULL during a non-DM MR creation to prevent a false indication that this MR is related to a DM in the dereg flow. Fixes: be934cca9e98 ("IB/uverbs: Add device memory registration ioctl support") Reported-by: Lijun Ou Signed-off-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 1 + drivers/infiniband/core/verbs.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 1837924415c9..21a887c9523b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -691,6 +691,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, mr->device = pd->device; mr->pd = pd; + mr->dm = NULL; mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 7eff3aeffe01..6ddfb1fade79 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1656,6 +1656,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, if (!IS_ERR(mr)) { mr->device = pd->device; mr->pd = pd; + mr->dm = NULL; mr->uobject = NULL; atomic_inc(&pd->usecnt); mr->need_inval = false; From 2df19e19ae90d94fd8724083f161f368a2797537 Mon Sep 17 00:00:00 2001 From: Bharat Potnuri Date: Fri, 27 Apr 2018 16:41:16 +0530 Subject: [PATCH 43/52] iw_cxgb4: Atomically flush per QP HW CQEs When a CQ is shared by multiple QPs, c4iw_flush_hw_cq() needs to acquire corresponding QP lock before moving the CQEs into its corresponding SW queue and accessing the SQ contents for completing a WR. Ignore CQEs if corresponding QP is already flushed. Cc: stable@vger.kernel.org Signed-off-by: Potnuri Bharat Teja Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cq.c | 11 ++++++++++- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- drivers/infiniband/hw/cxgb4/qp.c | 4 ++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 6f2b26126c64..2be2e1ac1b5f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -315,7 +315,7 @@ static void advance_oldest_read(struct t4_wq *wq) * Deal with out-of-order and/or completions that complete * prior unsignalled WRs. */ -void c4iw_flush_hw_cq(struct c4iw_cq *chp) +void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp) { struct t4_cqe *hw_cqe, *swcqe, read_cqe; struct c4iw_qp *qhp; @@ -339,6 +339,13 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp) if (qhp == NULL) goto next_cqe; + if (flush_qhp != qhp) { + spin_lock(&qhp->lock); + + if (qhp->wq.flushed == 1) + goto next_cqe; + } + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) goto next_cqe; @@ -390,6 +397,8 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp) next_cqe: t4_hwcq_consume(&chp->cq); ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); + if (qhp && flush_qhp != qhp) + spin_unlock(&qhp->lock); } } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index a60def23e9ef..831027717121 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -1053,7 +1053,7 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size); -void c4iw_flush_hw_cq(struct c4iw_cq *chp); +void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp); void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp); int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index de77b6027d69..ae167b686608 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1343,12 +1343,12 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, qhp->wq.flushed = 1; t4_set_wq_in_error(&qhp->wq); - c4iw_flush_hw_cq(rchp); + c4iw_flush_hw_cq(rchp, qhp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); if (schp != rchp) - c4iw_flush_hw_cq(schp); + c4iw_flush_hw_cq(schp, qhp); sq_flushed = c4iw_flush_sq(qhp); spin_unlock(&qhp->lock); From db82476f37413eaeff5f836a9d8b022d6544accf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Wed, 18 Apr 2018 16:24:50 +0200 Subject: [PATCH 44/52] IB/core: Make ib_mad_client_id atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the kernel protects access to the agent ID allocator on a per port basis using a spinlock, so it is impossible for two apps/threads on the same port to get the same TID, but it is entirely possible for two threads on different ports to end up with the same TID. As this can be confusing (regardless of it being legal according to the IB Spec 1.3, C13-18.1.1, in section 13.4.6.4 - TransactionID usage), and as the rdma-core user space API for /dev/umad devices implies unique TIDs even across ports, make the TID an atomic type so that no two allocations, regardless of port number, will be the same. Signed-off-by: Håkon Bugge Reviewed-by: Jack Morgenstein Reviewed-by: Ira Weiny Reviewed-by: Zhu Yanjun Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index c50596f7f98a..b28452a55a08 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -59,7 +59,7 @@ module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); static struct list_head ib_mad_port_list; -static u32 ib_mad_client_id = 0; +static atomic_t ib_mad_client_id = ATOMIC_INIT(0); /* Port list lock */ static DEFINE_SPINLOCK(ib_mad_port_list_lock); @@ -377,7 +377,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, } spin_lock_irqsave(&port_priv->reg_lock, flags); - mad_agent_priv->agent.hi_tid = ++ib_mad_client_id; + mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id); /* * Make sure MAD registration (if supplied) From f59fb9e05109b836230813e45f71c9ecc2d5dbe6 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 1 May 2018 05:35:36 -0700 Subject: [PATCH 45/52] IB/hfi1: Fix handling of FECN marked multicast packet The code for handling a marked UD packet unconditionally returns the dlid in the header of the FECN marked packet. This is not correct for multicast packets where the DLID is in the multicast range. The subsequent attempt to send the CNP with the multicast lid will cause the chip to halt the ack send context because the source lid doesn't match the chip programming. The send context will be halted and flush any other pending packets in the pio ring causing the CNP to not be sent. A part of investigating the fix, it was determined that the 16B work broke the FECN routine badly with inconsistent use of 16 bit and 32 bits types for lids and pkeys. Since the port's source lid was correctly 32 bits the type mixmatches need to be dealt with at the same time as fixing the CNP header issue. Fix these issues by: - Using the ports lid for as the SLID for responding to FECN marked UD packets - Insure pkey is always 16 bit in this and subordinate routines - Insure lids are 32 bits in this and subordinate routines Cc: # 4.14.x Fixes: 88733e3b8450 ("IB/hfi1: Add 16B UD support") Reviewed-by: Don Hiatt Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 19 +++++++++++++++---- drivers/infiniband/hw/hfi1/hfi.h | 8 ++++---- drivers/infiniband/hw/hfi1/ud.c | 4 ++-- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 46d1475b2154..bd837a048bf4 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -433,31 +433,43 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = pkt->grh; u32 rqpn = 0, bth1; - u16 pkey, rlid, dlid = ib_get_dlid(pkt->hdr); + u16 pkey; + u32 rlid, slid, dlid = 0; u8 hdr_type, sc, svc_type; bool is_mcast = false; + /* can be called from prescan */ if (pkt->etype == RHF_RCV_TYPE_BYPASS) { is_mcast = hfi1_is_16B_mcast(dlid); pkey = hfi1_16B_get_pkey(pkt->hdr); sc = hfi1_16B_get_sc(pkt->hdr); + dlid = hfi1_16B_get_dlid(pkt->hdr); + slid = hfi1_16B_get_slid(pkt->hdr); hdr_type = HFI1_PKT_TYPE_16B; } else { is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); pkey = ib_bth_get_pkey(ohdr); sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf); + dlid = ib_get_dlid(pkt->hdr); + slid = ib_get_slid(pkt->hdr); hdr_type = HFI1_PKT_TYPE_9B; } switch (qp->ibqp.qp_type) { + case IB_QPT_UD: + dlid = ppd->lid; + rlid = slid; + rqpn = ib_get_sqpn(pkt->ohdr); + svc_type = IB_CC_SVCTYPE_UD; + break; case IB_QPT_SMI: case IB_QPT_GSI: - case IB_QPT_UD: - rlid = ib_get_slid(pkt->hdr); + rlid = slid; rqpn = ib_get_sqpn(pkt->ohdr); svc_type = IB_CC_SVCTYPE_UD; break; @@ -482,7 +494,6 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, dlid, rlid, sc, grh); if (!is_mcast && (bth1 & IB_BECN_SMASK)) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 lqpn = bth1 & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc]; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 32c48265405e..cac2c62bc42d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1537,13 +1537,13 @@ void set_link_ipg(struct hfi1_pportdata *ppd); void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, u32 rqpn, u8 svc_type); void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, - u32 pkey, u32 slid, u32 dlid, u8 sc5, + u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, - u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u32 remote_qpn, u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp, - u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u32 remote_qpn, u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); #define PKEY_CHECK_INVALID -1 @@ -2437,7 +2437,7 @@ static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr, ((slid >> OPA_16B_SLID_SHIFT) << OPA_16B_SLID_HIGH_SHIFT); lrh2 = (lrh2 & ~OPA_16B_DLID_MASK) | ((dlid >> OPA_16B_DLID_SHIFT) << OPA_16B_DLID_HIGH_SHIFT); - lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | (pkey << OPA_16B_PKEY_SHIFT); + lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | ((u32)pkey << OPA_16B_PKEY_SHIFT); lrh2 = (lrh2 & ~OPA_16B_L4_MASK) | l4; hdr->lrh[0] = lrh0; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index bcf3b0bebac8..69c17a5ef038 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -628,7 +628,7 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey) } void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, - u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u32 remote_qpn, u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh) { u64 pbc, pbc_flags = 0; @@ -687,7 +687,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, } void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, - u32 pkey, u32 slid, u32 dlid, u8 sc5, + u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh) { u64 pbc, pbc_flags = 0; From 5da9e742be44d9b7c68b1bf6e1aaf46a1aa7a52b Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 1 May 2018 05:35:43 -0700 Subject: [PATCH 46/52] IB/hfi1 Use correct type for num_user_context The module parameter num_user_context is defined as 'int' and defaults to -1. The module_param_named() says that it is uint. Correct module_param_named() type information and update the modinfo text to reflect the default value. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 33eba2356742..c45cca556942 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -88,9 +88,9 @@ * pio buffers per ctxt, etc.) Zero means use one user context per CPU. */ int num_user_contexts = -1; -module_param_named(num_user_contexts, num_user_contexts, uint, S_IRUGO); +module_param_named(num_user_contexts, num_user_contexts, int, 0444); MODULE_PARM_DESC( - num_user_contexts, "Set max number of user contexts to use"); + num_user_contexts, "Set max number of user contexts to use (default: -1 will use the real (non-HT) CPU count)"); uint krcvqs[RXE_NUM_DATA_VL]; int krcvqsset; From 0a0bcb046b2f0c15b89f8c1b08ad3de601a83c66 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 1 May 2018 05:35:51 -0700 Subject: [PATCH 47/52] IB/hfi1: Fix loss of BECN with AHG AHG may be armed to use the stored header, which by design is limited to edits in the PSN/A 32 bit word (bth2). When the code is trying to send a BECN, the use of the stored header will lose the BECN bit. Fix by avoiding AHG when getting ready to send a BECN. This is accomplished by always claiming the packet is not a middle packet which is an AHG precursor. BECNs are not a normal case and this should not hurt AHG optimizations. Cc: # 4.14.x Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/ruc.c | 50 +++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 3daa94bdae3a..c0071ca4147a 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -733,6 +733,20 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, ohdr->bth[2] = cpu_to_be32(bth2); } +/** + * hfi1_make_ruc_header_16B - build a 16B header + * @qp: the queue pair + * @ohdr: a pointer to the destination header memory + * @bth0: bth0 passed in from the RC/UC builder + * @bth2: bth2 passed in from the RC/UC builder + * @middle: non zero implies indicates ahg "could" be used + * @ps: the current packet state + * + * This routine may disarm ahg under these situations: + * - packet needs a GRH + * - BECN needed + * - migration state not IB_MIG_MIGRATED + */ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, @@ -777,6 +791,12 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, else middle = 0; + if (qp->s_flags & RVT_S_ECN) { + qp->s_flags &= ~RVT_S_ECN; + /* we recently received a FECN, so return a BECN */ + becn = true; + middle = 0; + } if (middle) build_ahg(qp, bth2); else @@ -784,11 +804,6 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, bth0 |= pkey; bth0 |= extra_bytes << 20; - if (qp->s_flags & RVT_S_ECN) { - qp->s_flags &= ~RVT_S_ECN; - /* we recently received a FECN, so return a BECN */ - becn = true; - } hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); if (!ppd->lid) @@ -806,6 +821,20 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, pkey, becn, 0, l4, priv->s_sc); } +/** + * hfi1_make_ruc_header_9B - build a 9B header + * @qp: the queue pair + * @ohdr: a pointer to the destination header memory + * @bth0: bth0 passed in from the RC/UC builder + * @bth2: bth2 passed in from the RC/UC builder + * @middle: non zero implies indicates ahg "could" be used + * @ps: the current packet state + * + * This routine may disarm ahg under these situations: + * - packet needs a GRH + * - BECN needed + * - migration state not IB_MIG_MIGRATED + */ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, @@ -839,6 +868,12 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, else middle = 0; + if (qp->s_flags & RVT_S_ECN) { + qp->s_flags &= ~RVT_S_ECN; + /* we recently received a FECN, so return a BECN */ + bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); + middle = 0; + } if (middle) build_ahg(qp, bth2); else @@ -846,11 +881,6 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, bth0 |= pkey; bth0 |= extra_bytes << 20; - if (qp->s_flags & RVT_S_ECN) { - qp->s_flags &= ~RVT_S_ECN; - /* we recently received a FECN, so return a BECN */ - bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); - } hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, lrh0, From 45d924571a5e1329580811f2419da61b07ac3613 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Tue, 1 May 2018 05:35:58 -0700 Subject: [PATCH 48/52] IB/hfi1: Fix NULL pointer dereference when invalid num_vls is used When an invalid num_vls is used as a module parameter, the code execution follows an exception path where the macro dd_dev_err() expects dd->pcidev->dev not to be NULL in hfi1_init_dd(). This causes a NULL pointer dereference. Fix hfi1_init_dd() by initializing dd->pcidev and dd->pcidev->dev earlier in the code. If a dd exists, then dd->pcidev and dd->pcidev->dev always exists. BUG: unable to handle kernel NULL pointer dereference at 00000000000000f0 IP: __dev_printk+0x15/0x90 Workqueue: events work_for_cpu_fn RIP: 0010:__dev_printk+0x15/0x90 Call Trace: dev_err+0x6c/0x90 ? hfi1_init_pportdata+0x38d/0x3f0 [hfi1] hfi1_init_dd+0xdd/0x2530 [hfi1] ? pci_conf1_read+0xb2/0xf0 ? pci_read_config_word.part.9+0x64/0x80 ? pci_conf1_write+0xb0/0xf0 ? pcie_capability_clear_and_set_word+0x57/0x80 init_one+0x141/0x490 [hfi1] local_pci_probe+0x3f/0xa0 work_for_cpu_fn+0x10/0x20 process_one_work+0x152/0x350 worker_thread+0x1cf/0x3e0 kthread+0xf5/0x130 ? max_active_store+0x80/0x80 ? kthread_bind+0x10/0x10 ? do_syscall_64+0x6e/0x1a0 ? SyS_exit_group+0x10/0x10 ret_from_fork+0x35/0x40 Cc: # 4.9.x Reviewed-by: Mike Marciniszyn Reviewed-by: Michael J. Ruhl Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 2 ++ drivers/infiniband/hw/hfi1/pcie.c | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index c45cca556942..b417e3b40e4a 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1265,6 +1265,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) return ERR_PTR(-ENOMEM); dd->num_pports = nports; dd->pport = (struct hfi1_pportdata *)(dd + 1); + dd->pcidev = pdev; + pci_set_drvdata(pdev, dd); INIT_LIST_HEAD(&dd->list); idr_preload(GFP_KERNEL); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 83d66e862207..c1c982908b4b 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -163,9 +163,6 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) resource_size_t addr; int ret = 0; - dd->pcidev = pdev; - pci_set_drvdata(pdev, dd); - addr = pci_resource_start(pdev, 0); len = pci_resource_len(pdev, 0); From e9777ad4399c26c70318c4945f94efac2ed95391 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Tue, 1 May 2018 05:36:06 -0700 Subject: [PATCH 49/52] IB/{hfi1, rdmavt}: Fix memory leak in hfi1_alloc_devdata() upon failure When allocating device data, if there's an allocation failure, the already allocated memory won't be freed such as per-cpu counters. Fix memory leaks in exception path by creating a common reentrant clean up function hfi1_clean_devdata() to be used at driver unload time and device data allocation failure. To accomplish this, free_platform_config() and clean_up_i2c() are changed to be reentrant to remove dependencies when they are called in different order. This helps avoid NULL pointer dereferences introduced by this patch if those two functions weren't reentrant. In addition, set dd->int_counter, dd->rcv_limit, dd->send_schedule and dd->tx_opstats to NULL after they're freed in hfi1_clean_devdata(), so that hfi1_clean_devdata() is fully reentrant. Reviewed-by: Mike Marciniszyn Reviewed-by: Michael J. Ruhl Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 37 +++++++++++++++++++-------- drivers/infiniband/hw/hfi1/platform.c | 1 + drivers/infiniband/hw/hfi1/qsfp.c | 2 ++ 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index b417e3b40e4a..6309edf811df 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1209,30 +1209,49 @@ static void finalize_asic_data(struct hfi1_devdata *dd, kfree(ad); } -static void __hfi1_free_devdata(struct kobject *kobj) +/** + * hfi1_clean_devdata - cleans up per-unit data structure + * @dd: pointer to a valid devdata structure + * + * It cleans up all data structures set up by + * by hfi1_alloc_devdata(). + */ +static void hfi1_clean_devdata(struct hfi1_devdata *dd) { - struct hfi1_devdata *dd = - container_of(kobj, struct hfi1_devdata, kobj); struct hfi1_asic_data *ad; unsigned long flags; spin_lock_irqsave(&hfi1_devs_lock, flags); - idr_remove(&hfi1_unit_table, dd->unit); - list_del(&dd->list); + if (!list_empty(&dd->list)) { + idr_remove(&hfi1_unit_table, dd->unit); + list_del_init(&dd->list); + } ad = release_asic_data(dd); spin_unlock_irqrestore(&hfi1_devs_lock, flags); - if (ad) - finalize_asic_data(dd, ad); + + finalize_asic_data(dd, ad); free_platform_config(dd); rcu_barrier(); /* wait for rcu callbacks to complete */ free_percpu(dd->int_counter); free_percpu(dd->rcv_limit); free_percpu(dd->send_schedule); free_percpu(dd->tx_opstats); + dd->int_counter = NULL; + dd->rcv_limit = NULL; + dd->send_schedule = NULL; + dd->tx_opstats = NULL; sdma_clean(dd, dd->num_sdma); rvt_dealloc_device(&dd->verbs_dev.rdi); } +static void __hfi1_free_devdata(struct kobject *kobj) +{ + struct hfi1_devdata *dd = + container_of(kobj, struct hfi1_devdata, kobj); + + hfi1_clean_devdata(dd); +} + static struct kobj_type hfi1_devdata_type = { .release = __hfi1_free_devdata, }; @@ -1333,9 +1352,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) return dd; bail: - if (!list_empty(&dd->list)) - list_del_init(&dd->list); - rvt_dealloc_device(&dd->verbs_dev.rdi); + hfi1_clean_devdata(dd); return ERR_PTR(ret); } diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index d486355880cb..cbf7faa5038c 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -199,6 +199,7 @@ void free_platform_config(struct hfi1_devdata *dd) { /* Release memory allocated for eprom or fallback file read. */ kfree(dd->platform_config.data); + dd->platform_config.data = NULL; } void get_port_type(struct hfi1_pportdata *ppd) diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 1869f639c3ae..b5966991d647 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -204,6 +204,8 @@ static void clean_i2c_bus(struct hfi1_i2c_bus *bus) void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad) { + if (!ad) + return; clean_i2c_bus(ad->i2c_bus0); ad->i2c_bus0 = NULL; clean_i2c_bus(ad->i2c_bus1); From 59482a14918b282ca2a98f38c69da5ebeb1107d2 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Tue, 1 May 2018 05:36:13 -0700 Subject: [PATCH 50/52] IB/hfi1: Fix memory leak in exception path in get_irq_affinity() When IRQ affinity is set and the interrupt type is unknown, a cpu mask allocated within the function is never freed. Fix this memory leak by allocating memory within the scope where it is used. Reviewed-by: Mike Marciniszyn Reviewed-by: Michael J. Ruhl Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index a97055dd4fbd..b5fab55cc275 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -412,7 +412,6 @@ static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix) static int get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) { - int ret; cpumask_var_t diff; struct hfi1_affinity_node *entry; struct cpu_mask_set *set = NULL; @@ -424,10 +423,6 @@ static int get_irq_affinity(struct hfi1_devdata *dd, extra[0] = '\0'; cpumask_clear(&msix->mask); - ret = zalloc_cpumask_var(&diff, GFP_KERNEL); - if (!ret) - return -ENOMEM; - entry = node_affinity_lookup(dd->node); switch (msix->type) { @@ -458,6 +453,9 @@ static int get_irq_affinity(struct hfi1_devdata *dd, * finds its CPU here. */ if (cpu == -1 && set) { + if (!zalloc_cpumask_var(&diff, GFP_KERNEL)) + return -ENOMEM; + if (cpumask_equal(&set->mask, &set->used)) { /* * We've used up all the CPUs, bump up the generation @@ -469,6 +467,8 @@ static int get_irq_affinity(struct hfi1_devdata *dd, cpumask_andnot(diff, &set->mask, &set->used); cpu = cpumask_first(diff); cpumask_set_cpu(cpu, &set->used); + + free_cpumask_var(diff); } cpumask_set_cpu(cpu, &msix->mask); @@ -482,7 +482,6 @@ static int get_irq_affinity(struct hfi1_devdata *dd, hfi1_setup_sdma_notifier(msix); } - free_cpumask_var(diff); return 0; } From b03bcde962606d2ee59a4e9dd470db9ad53c5418 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 2 May 2018 13:04:25 +0300 Subject: [PATCH 51/52] IB/mlx4: Fix integer overflow when calculating optimal MTT size When the kernel was compiled using the UBSAN option, we saw the following stack trace: [ 1184.827917] UBSAN: Undefined behaviour in drivers/infiniband/hw/mlx4/mr.c:349:27 [ 1184.828114] signed integer overflow: [ 1184.828247] -2147483648 - 1 cannot be represented in type 'int' The problem was caused by calling round_up in procedure mlx4_ib_umem_calc_optimal_mtt_size (on line 349, as noted in the stack trace) with the second parameter (1 << block_shift) (which is an int). The second parameter should have been (1ULL << block_shift) (which is an unsigned long long). (1 << block_shift) is treated by the compiler as an int (because 1 is an integer). Now, local variable block_shift is initialized to 31. If block_shift is 31, 1 << block_shift is 1 << 31 = 0x80000000=-214748368. This is the most negative int value. Inside the round_up macro, there is a cast applied to ((1 << 31) - 1). However, this cast is applied AFTER ((1 << 31) - 1) is calculated. Since (1 << 31) is treated as an int, we get the negative overflow identified by UBSAN in the process of calculating ((1 << 31) - 1). The fix is to change (1 << block_shift) to (1ULL << block_shift) on line 349. Fixes: 9901abf58368 ("IB/mlx4: Use optimal numbers of MTT entries") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 17f4f151a97f..61d8b06375bb 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -346,7 +346,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, /* Add to the first block the misalignment that it suffers from. */ total_len += (first_block_start & ((1ULL << block_shift) - 1ULL)); last_block_end = current_block_start + current_block_len; - last_block_aligned_end = round_up(last_block_end, 1 << block_shift); + last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift); total_len += (last_block_aligned_end - last_block_end); if (total_len & ((1ULL << block_shift) - 1ULL)) From 9aa169213d1166d30ae357a44abbeae93459339d Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 2 May 2018 13:18:59 +0300 Subject: [PATCH 52/52] RDMA/cma: Do not query GID during QP state transition to RTR When commit [1] was added, SGID was queried to derive the SMAC address. Then, later on during a refactor [2], SMAC was no longer needed. However, the now useless GID query remained. Then during additional code changes later on, the GID query was being done in such a way that it caused iWARP queries to start breaking. Remove the useless GID query and resolve the iWARP breakage at the same time. This is discussed in [3]. [1] commit dd5f03beb4f7 ("IB/core: Ethernet L2 attributes in verbs/cm structures") [2] commit 5c266b2304fb ("IB/cm: Remove the usage of smac and vid of qp_attr and cm_av") [3] https://www.spinics.net/lists/linux-rdma/msg63951.html Suggested-by: Shiraz Saleem Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 8364223422d0..a693fcd4c513 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -868,7 +868,6 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; - union ib_gid sgid; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { @@ -891,12 +890,6 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, if (ret) goto out; - ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, - rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index, - &sgid, NULL); - if (ret) - goto out; - BUG_ON(id_priv->cma_dev->device != id_priv->id.device); if (conn_param)