Merge branch 'net-smc-introduce-SMC-Dv2-support'

Karsten Graul says:

====================
net/smc: introduce SMC-Dv2 support

SMC-Dv2 support (see https://www.ibm.com/support/pages/node/6326337)
provides multi-subnet support for SMC-D, eliminating the current
same-subnet restriction. The new version detects if any of the virtual
ISM devices are on the same system and can therefore be used for an
SMC-Dv2 connection. Furthermore, SMC-Dv2 eliminates the need for
PNET IDs on s390.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-09-28 15:19:03 -07:00
commit be589d0428
14 changed files with 1206 additions and 262 deletions

View File

@ -16,6 +16,7 @@
#define ISM_DMB_WORD_OFFSET 1
#define ISM_DMB_BIT_OFFSET (ISM_DMB_WORD_OFFSET * 32)
#define ISM_NR_DMBS 1920
#define ISM_IDENT_MASK 0x00FFFF
#define ISM_REG_SBA 0x1
#define ISM_REG_IEQ 0x2
@ -206,6 +207,12 @@ struct ism_dev {
#define ISM_CREATE_REQ(dmb, idx, sf, offset) \
((dmb) | (idx) << 24 | (sf) << 23 | (offset))
struct ism_systemeid {
u8 seid_string[24];
u8 serial_number[4];
u8 type[4];
};
static inline void __ism_read_cmd(struct ism_dev *ism, void *data,
unsigned long offset, unsigned long len)
{

View File

@ -13,6 +13,8 @@
#include <linux/device.h>
#include <linux/pci.h>
#include <linux/err.h>
#include <linux/ctype.h>
#include <linux/processor.h>
#include <net/smc.h>
#include <asm/debug.h>
@ -387,6 +389,42 @@ static int ism_move(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx,
return 0;
}
static struct ism_systemeid SYSTEM_EID = {
.seid_string = "IBM-SYSZ-IBMSEID00000000",
.serial_number = "0000",
.type = "0000",
};
static void ism_create_system_eid(void)
{
struct cpuid id;
u16 ident_tail;
char tmp[5];
get_cpu_id(&id);
ident_tail = (u16)(id.ident & ISM_IDENT_MASK);
snprintf(tmp, 5, "%04X", ident_tail);
memcpy(&SYSTEM_EID.serial_number, tmp, 4);
snprintf(tmp, 5, "%04X", id.machine);
memcpy(&SYSTEM_EID.type, tmp, 4);
}
static void ism_get_system_eid(struct smcd_dev *smcd, u8 **eid)
{
*eid = &SYSTEM_EID.seid_string[0];
}
static u16 ism_get_chid(struct smcd_dev *smcd)
{
struct ism_dev *ismdev;
ismdev = (struct ism_dev *)smcd->priv;
if (!ismdev || !ismdev->pdev)
return 0;
return to_zpci(ismdev->pdev)->pchid;
}
static void ism_handle_event(struct ism_dev *ism)
{
struct smcd_event *entry;
@ -443,6 +481,8 @@ static const struct smcd_ops ism_ops = {
.reset_vlan_required = ism_reset_vlan_required,
.signal_event = ism_signal_ieq,
.move_data = ism_move,
.get_system_eid = ism_get_system_eid,
.get_chid = ism_get_chid,
};
static int ism_dev_init(struct ism_dev *ism)
@ -471,6 +511,10 @@ static int ism_dev_init(struct ism_dev *ism)
if (ret)
goto unreg_ieq;
if (!ism_add_vlan_id(ism->smcd, ISM_RESERVED_VLANID))
/* hardware is V2 capable */
ism_create_system_eid();
ret = smcd_register_dev(ism->smcd);
if (ret)
goto unreg_ieq;
@ -550,6 +594,9 @@ static void ism_dev_exit(struct ism_dev *ism)
struct pci_dev *pdev = ism->pdev;
smcd_unregister_dev(ism->smcd);
if (SYSTEM_EID.serial_number[0] != '0' ||
SYSTEM_EID.type[0] != '0')
ism_del_vlan_id(ism->smcd, ISM_RESERVED_VLANID);
unregister_ieq(ism);
unregister_sba(ism);
free_irq(pci_irq_vector(pdev, 0), ism);

View File

@ -37,6 +37,8 @@ struct smcd_dmb {
#define ISM_EVENT_GID 1
#define ISM_EVENT_SWR 2
#define ISM_RESERVED_VLANID 0x1FFF
#define ISM_ERROR 0xFFFF
struct smcd_event {
@ -63,6 +65,8 @@ struct smcd_ops {
int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
bool sf, unsigned int offset, void *data,
unsigned int size);
void (*get_system_eid)(struct smcd_dev *dev, u8 **eid);
u16 (*get_chid)(struct smcd_dev *dev);
};
struct smcd_dev {

View File

@ -26,6 +26,7 @@
#include <linux/sched/signal.h>
#include <linux/if_vlan.h>
#include <linux/rcupdate_wait.h>
#include <linux/ctype.h>
#include <net/sock.h>
#include <net/tcp.h>
@ -448,6 +449,16 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc,
smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
}
static bool smc_isascii(char *hostname)
{
int i;
for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
if (!isascii(hostname[i]))
return false;
return true;
}
static void smcd_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
@ -459,6 +470,22 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc,
smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
if (clc->hdr.version > SMC_V1 &&
(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) {
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)clc;
struct smc_clc_first_contact_ext *fce =
(struct smc_clc_first_contact_ext *)
(((u8 *)clc_v2) + sizeof(*clc_v2));
memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid,
SMC_MAX_EID_LEN);
smc->conn.lgr->peer_os = fce->os_type;
smc->conn.lgr->peer_smc_release = fce->release;
if (smc_isascii(fce->hostname))
memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
SMC_MAX_HOSTNAME_LEN);
}
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
@ -504,7 +531,8 @@ static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
}
/* decline and fall back during connect */
static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
u8 version)
{
int rc;
@ -514,7 +542,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
return reason_code;
}
if (reason_code != SMC_CLC_DECL_PEERDECL) {
rc = smc_clc_send_decline(smc, reason_code);
rc = smc_clc_send_decline(smc, reason_code, version);
if (rc < 0) {
if (smc->sk.sk_state == SMC_INIT)
sock_put(&smc->sk); /* passive closing */
@ -564,47 +592,121 @@ static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
{
/* Find ISM device with same PNETID as connecting interface */
smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
if (!ini->ism_dev)
if (!ini->ism_dev[0])
return SMC_CLC_DECL_NOSMCDDEV;
else
ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]);
return 0;
}
/* determine possible V2 ISM devices (either without PNETID or with PNETID plus
* PNETID matching net_device)
*/
static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
struct smc_init_info *ini)
{
int rc = SMC_CLC_DECL_NOSMCDDEV;
struct smcd_dev *smcd;
int i = 1;
if (smcd_indicated(ini->smc_type_v1))
rc = 0; /* already initialized for V1 */
mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(smcd, &smcd_dev_list.list, list) {
if (smcd->going_away || smcd == ini->ism_dev[0])
continue;
if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
ini->ism_dev[i] = smcd;
ini->ism_chid[i] = smc_ism_get_chid(ini->ism_dev[i]);
ini->is_smcd = true;
rc = 0;
i++;
if (i > SMC_MAX_ISM_DEVS)
break;
}
}
mutex_unlock(&smcd_dev_list.mutex);
ini->ism_offered_cnt = i - 1;
if (!ini->ism_dev[0] && !ini->ism_dev[1])
ini->smcd_version = 0;
return rc;
}
/* Check for VLAN ID and register it on ISM device just for CLC handshake */
static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
struct smc_init_info *ini)
{
if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id))
if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id))
return SMC_CLC_DECL_ISMVLANERR;
return 0;
}
static int smc_find_proposal_devices(struct smc_sock *smc,
struct smc_init_info *ini)
{
int rc = 0;
/* check if there is an ism device available */
if (ini->smcd_version & SMC_V1) {
if (smc_find_ism_device(smc, ini) ||
smc_connect_ism_vlan_setup(smc, ini)) {
if (ini->smc_type_v1 == SMC_TYPE_B)
ini->smc_type_v1 = SMC_TYPE_R;
else
ini->smc_type_v1 = SMC_TYPE_N;
} /* else ISM V1 is supported for this connection */
if (smc_find_rdma_device(smc, ini)) {
if (ini->smc_type_v1 == SMC_TYPE_B)
ini->smc_type_v1 = SMC_TYPE_D;
else
ini->smc_type_v1 = SMC_TYPE_N;
} /* else RDMA is supported for this connection */
}
if (smc_ism_v2_capable && smc_find_ism_v2_device_clnt(smc, ini))
ini->smc_type_v2 = SMC_TYPE_N;
/* if neither ISM nor RDMA are supported, fallback */
if (!smcr_indicated(ini->smc_type_v1) &&
ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
rc = SMC_CLC_DECL_NOSMCDEV;
return rc;
}
/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
* used, the VLAN ID will be registered again during the connection setup.
*/
static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
struct smc_init_info *ini)
{
if (!is_smcd)
if (!smcd_indicated(ini->smc_type_v1))
return 0;
if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id))
if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id))
return SMC_CLC_DECL_CNFERR;
return 0;
}
#define SMC_CLC_MAX_ACCEPT_LEN \
(sizeof(struct smc_clc_msg_accept_confirm_v2) + \
sizeof(struct smc_clc_first_contact_ext) + \
sizeof(struct smc_clc_msg_trail))
/* CLC handshake during connect */
static int smc_connect_clc(struct smc_sock *smc, int smc_type,
struct smc_clc_msg_accept_confirm *aclc,
static int smc_connect_clc(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *aclc2,
struct smc_init_info *ini)
{
int rc = 0;
/* do inband token exchange */
rc = smc_clc_send_proposal(smc, smc_type, ini);
rc = smc_clc_send_proposal(smc, ini);
if (rc)
return rc;
/* receive SMC Accept CLC message */
return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT,
CLC_WAIT_TIME);
return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN,
SMC_CLC_ACCEPT, CLC_WAIT_TIME);
}
/* setup for RDMA connection of client */
@ -618,7 +720,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
ini->is_smcd = false;
ini->ib_lcl = &aclc->r0.lcl;
ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
ini->first_contact_peer = aclc->hdr.flag;
ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
mutex_lock(&smc_client_lgr_pending);
reason_code = smc_conn_create(smc, ini);
@ -678,7 +780,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
}
smc_rmb_sync_sg_for_device(&smc->conn);
reason_code = smc_clc_send_confirm(smc);
reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
SMC_V1);
if (reason_code)
return smc_connect_abort(smc, reason_code,
ini->first_contact_local);
@ -704,6 +807,25 @@ static int smc_connect_rdma(struct smc_sock *smc,
return 0;
}
/* The server has chosen one of the proposed ISM devices for the communication.
* Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
*/
static int
smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
struct smc_init_info *ini)
{
int i;
for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
if (ini->ism_chid[i] == ntohs(aclc->chid)) {
ini->ism_selected = i;
return 0;
}
}
return -EPROTO;
}
/* setup for ISM connection of client */
static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
@ -712,8 +834,17 @@ static int smc_connect_ism(struct smc_sock *smc,
int rc = 0;
ini->is_smcd = true;
ini->ism_peer_gid = aclc->d0.gid;
ini->first_contact_peer = aclc->hdr.flag;
ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
if (aclc->hdr.version == SMC_V2) {
struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
if (rc)
return rc;
}
ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid;
/* there is only one lgr role for SMC-D; use server lock */
mutex_lock(&smc_server_lgr_pending);
@ -736,7 +867,8 @@ static int smc_connect_ism(struct smc_sock *smc,
smc_rx_init(smc);
smc_tx_init(smc);
rc = smc_clc_send_confirm(smc);
rc = smc_clc_send_confirm(smc, ini->first_contact_local,
aclc->hdr.version);
if (rc)
return smc_connect_abort(smc, rc, ini->first_contact_local);
mutex_unlock(&smc_server_lgr_pending);
@ -749,13 +881,32 @@ static int smc_connect_ism(struct smc_sock *smc,
return 0;
}
/* check if received accept type and version matches a proposed one */
static int smc_connect_check_aclc(struct smc_init_info *ini,
struct smc_clc_msg_accept_confirm *aclc)
{
if ((aclc->hdr.typev1 == SMC_TYPE_R &&
!smcr_indicated(ini->smc_type_v1)) ||
(aclc->hdr.typev1 == SMC_TYPE_D &&
((!smcd_indicated(ini->smc_type_v1) &&
!smcd_indicated(ini->smc_type_v2)) ||
(aclc->hdr.version == SMC_V1 &&
!smcd_indicated(ini->smc_type_v1)) ||
(aclc->hdr.version == SMC_V2 &&
!smcd_indicated(ini->smc_type_v2)))))
return SMC_CLC_DECL_MODEUNSUPP;
return 0;
}
/* perform steps before actually connecting */
static int __smc_connect(struct smc_sock *smc)
{
bool ism_supported = false, rdma_supported = false;
struct smc_clc_msg_accept_confirm aclc;
struct smc_init_info ini = {0};
int smc_type;
u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1;
struct smc_clc_msg_accept_confirm_v2 *aclc2;
struct smc_clc_msg_accept_confirm *aclc;
struct smc_init_info *ini = NULL;
u8 *buf = NULL;
int rc = 0;
if (smc->use_fallback)
@ -765,58 +916,73 @@ static int __smc_connect(struct smc_sock *smc)
if (!tcp_sk(smc->clcsock->sk)->syn_smc)
return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
/* IPSec connections opt out of SMC-R optimizations */
/* IPSec connections opt out of SMC optimizations */
if (using_ipsec(smc))
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC,
version);
ini = kzalloc(sizeof(*ini), GFP_KERNEL);
if (!ini)
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
version);
ini->smcd_version = SMC_V1;
ini->smcd_version |= smc_ism_v2_capable ? SMC_V2 : 0;
ini->smc_type_v1 = SMC_TYPE_B;
ini->smc_type_v2 = smc_ism_v2_capable ? SMC_TYPE_D : SMC_TYPE_N;
/* get vlan id from IP device */
if (smc_vlan_by_tcpsk(smc->clcsock, &ini))
return smc_connect_decline_fallback(smc,
SMC_CLC_DECL_GETVLANERR);
/* check if there is an ism device available */
if (!smc_find_ism_device(smc, &ini) &&
!smc_connect_ism_vlan_setup(smc, &ini)) {
/* ISM is supported for this connection */
ism_supported = true;
smc_type = SMC_TYPE_D;
if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
ini->smcd_version &= ~SMC_V1;
ini->smc_type_v1 = SMC_TYPE_N;
if (!ini->smcd_version) {
rc = SMC_CLC_DECL_GETVLANERR;
goto fallback;
}
}
/* check if there is a rdma device available */
if (!smc_find_rdma_device(smc, &ini)) {
/* RDMA is supported for this connection */
rdma_supported = true;
if (ism_supported)
smc_type = SMC_TYPE_B; /* both */
else
smc_type = SMC_TYPE_R; /* only RDMA */
}
rc = smc_find_proposal_devices(smc, ini);
if (rc)
goto fallback;
/* if neither ISM nor RDMA are supported, fallback */
if (!rdma_supported && !ism_supported)
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL);
if (!buf) {
rc = SMC_CLC_DECL_MEM;
goto fallback;
}
aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf;
aclc = (struct smc_clc_msg_accept_confirm *)aclc2;
/* perform CLC handshake */
rc = smc_connect_clc(smc, smc_type, &aclc, &ini);
if (rc) {
smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
return smc_connect_decline_fallback(smc, rc);
}
rc = smc_connect_clc(smc, aclc2, ini);
if (rc)
goto vlan_cleanup;
/* check if smc modes and versions of CLC proposal and accept match */
rc = smc_connect_check_aclc(ini, aclc);
version = aclc->hdr.version == SMC_V1 ? SMC_V1 : version;
if (rc)
goto vlan_cleanup;
/* depending on previous steps, connect using rdma or ism */
if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
rc = smc_connect_rdma(smc, &aclc, &ini);
else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
rc = smc_connect_ism(smc, &aclc, &ini);
else
rc = SMC_CLC_DECL_MODEUNSUPP;
if (rc) {
smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
return smc_connect_decline_fallback(smc, rc);
}
if (aclc->hdr.typev1 == SMC_TYPE_R)
rc = smc_connect_rdma(smc, aclc, ini);
else if (aclc->hdr.typev1 == SMC_TYPE_D)
rc = smc_connect_ism(smc, aclc, ini);
if (rc)
goto vlan_cleanup;
smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
smc_connect_ism_vlan_cleanup(smc, ini);
kfree(buf);
kfree(ini);
return 0;
vlan_cleanup:
smc_connect_ism_vlan_cleanup(smc, ini);
kfree(buf);
fallback:
kfree(ini);
return smc_connect_decline_fallback(smc, rc, version);
}
static void smc_connect_work(struct work_struct *work)
@ -1132,10 +1298,10 @@ static void smc_listen_out_err(struct smc_sock *new_smc)
/* listen worker: decline and fall back if possible */
static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
bool local_first)
struct smc_init_info *ini, u8 version)
{
/* RDMA setup failed, switch back to TCP */
if (local_first)
if (ini->first_contact_local)
smc_lgr_cleanup_early(&new_smc->conn);
else
smc_conn_free(&new_smc->conn);
@ -1146,7 +1312,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
smc_switch_to_fallback(new_smc);
new_smc->fallback_rsn = reason_code;
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
if (smc_clc_send_decline(new_smc, reason_code) < 0) {
if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
smc_listen_out_err(new_smc);
return;
}
@ -1154,6 +1320,47 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
smc_listen_out_connected(new_smc);
}
/* listen worker: version checking */
static int smc_listen_v2_check(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext;
struct smc_clc_v2_extension *pclc_v2_ext;
ini->smc_type_v1 = pclc->hdr.typev1;
ini->smc_type_v2 = pclc->hdr.typev2;
ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0;
if (pclc->hdr.version > SMC_V1)
ini->smcd_version |=
ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0;
if (!smc_ism_v2_capable) {
ini->smcd_version &= ~SMC_V2;
goto out;
}
pclc_v2_ext = smc_get_clc_v2_ext(pclc);
if (!pclc_v2_ext) {
ini->smcd_version &= ~SMC_V2;
goto out;
}
pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
if (!pclc_smcd_v2_ext)
ini->smcd_version &= ~SMC_V2;
out:
if (!ini->smcd_version) {
if (pclc->hdr.typev1 == SMC_TYPE_B ||
pclc->hdr.typev2 == SMC_TYPE_B)
return SMC_CLC_DECL_NOSMCDEV;
if (pclc->hdr.typev1 == SMC_TYPE_D ||
pclc->hdr.typev2 == SMC_TYPE_D)
return SMC_CLC_DECL_NOSMCDDEV;
return SMC_CLC_DECL_NOSMCRDEV;
}
return 0;
}
/* listen worker: check prefixes */
static int smc_listen_prfx_check(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc)
@ -1161,6 +1368,8 @@ static int smc_listen_prfx_check(struct smc_sock *new_smc,
struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct socket *newclcsock = new_smc->clcsock;
if (pclc->hdr.typev1 == SMC_TYPE_N)
return 0;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
if (smc_clc_prfx_match(newclcsock, pclc_prfx))
return SMC_CLC_DECL_DIFFPREFIX;
@ -1188,7 +1397,6 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
/* listen worker: initialize connection and buffers for SMC-D */
static int smc_listen_ism_init(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
int rc;
@ -1211,6 +1419,125 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
return 0;
}
static bool smc_is_already_selected(struct smcd_dev *smcd,
struct smc_init_info *ini,
int matches)
{
int i;
for (i = 0; i < matches; i++)
if (smcd == ini->ism_dev[i])
return true;
return false;
}
/* check for ISM devices matching proposed ISM devices */
static void smc_check_ism_v2_match(struct smc_init_info *ini,
u16 proposed_chid, u64 proposed_gid,
unsigned int *matches)
{
struct smcd_dev *smcd;
list_for_each_entry(smcd, &smcd_dev_list.list, list) {
if (smcd->going_away)
continue;
if (smc_is_already_selected(smcd, ini, *matches))
continue;
if (smc_ism_get_chid(smcd) == proposed_chid &&
!smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
ini->ism_peer_gid[*matches] = proposed_gid;
ini->ism_dev[*matches] = smcd;
(*matches)++;
break;
}
}
}
static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
struct smc_clc_smcd_v2_extension *smcd_v2_ext;
struct smc_clc_v2_extension *smc_v2_ext;
struct smc_clc_msg_smcd *pclc_smcd;
unsigned int matches = 0;
u8 *eid = NULL;
int i;
if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2))
return;
pclc_smcd = smc_get_clc_msg_smcd(pclc);
smc_v2_ext = smc_get_clc_v2_ext(pclc);
smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
if (!smcd_v2_ext ||
!smc_v2_ext->hdr.flag.seid) /* no system EID support for SMCD */
goto not_found;
mutex_lock(&smcd_dev_list.mutex);
if (pclc_smcd->ism.chid)
/* check for ISM device matching proposed native ISM device */
smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
ntohll(pclc_smcd->ism.gid), &matches);
for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) {
/* check for ISM devices matching proposed non-native ISM
* devices
*/
smc_check_ism_v2_match(ini,
ntohs(smcd_v2_ext->gidchid[i - 1].chid),
ntohll(smcd_v2_ext->gidchid[i - 1].gid),
&matches);
}
mutex_unlock(&smcd_dev_list.mutex);
if (ini->ism_dev[0]) {
smc_ism_get_system_eid(ini->ism_dev[0], &eid);
if (memcmp(eid, smcd_v2_ext->system_eid, SMC_MAX_EID_LEN))
goto not_found;
} else {
goto not_found;
}
/* separate - outside the smcd_dev_list.lock */
for (i = 0; i < matches; i++) {
ini->smcd_version = SMC_V2;
ini->is_smcd = true;
ini->ism_selected = i;
if (smc_listen_ism_init(new_smc, ini))
/* try next active ISM device */
continue;
return; /* matching and usable V2 ISM device found */
}
not_found:
ini->smcd_version &= ~SMC_V2;
ini->ism_dev[0] = NULL;
ini->is_smcd = false;
}
static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
/* check if ISM V1 is available */
if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1))
goto not_found;
ini->is_smcd = true; /* prepare ISM check */
ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid);
if (smc_find_ism_device(new_smc, ini))
goto not_found;
ini->ism_selected = 0;
if (!smc_listen_ism_init(new_smc, ini))
return; /* V1 ISM device found */
not_found:
ini->ism_dev[0] = NULL;
ini->is_smcd = false;
}
/* listen worker: register buffers */
static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
{
@ -1225,6 +1552,67 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
return 0;
}
static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
int rc;
if (!smcr_indicated(ini->smc_type_v1))
return SMC_CLC_DECL_NOSMCDEV;
/* prepare RDMA check */
ini->ib_lcl = &pclc->lcl;
rc = smc_find_rdma_device(new_smc, ini);
if (rc) {
/* no RDMA device found */
if (ini->smc_type_v1 == SMC_TYPE_B)
/* neither ISM nor RDMA device found */
rc = SMC_CLC_DECL_NOSMCDEV;
return rc;
}
rc = smc_listen_rdma_init(new_smc, ini);
if (rc)
return rc;
return smc_listen_rdma_reg(new_smc, ini->first_contact_local);
}
/* determine the local device matching to proposal */
static int smc_listen_find_device(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
int rc;
/* check for ISM device matching V2 proposed device */
smc_find_ism_v2_device_serv(new_smc, pclc, ini);
if (ini->ism_dev[0])
return 0;
if (!(ini->smcd_version & SMC_V1))
return SMC_CLC_DECL_NOSMCDEV;
/* check for matching IP prefix and subnet length */
rc = smc_listen_prfx_check(new_smc, pclc);
if (rc)
return rc;
/* get vlan id from IP device */
if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
return SMC_CLC_DECL_GETVLANERR;
/* check for ISM device matching V1 proposed device */
smc_find_ism_v1_device_serv(new_smc, pclc, ini);
if (ini->ism_dev[0])
return 0;
if (pclc->hdr.typev1 == SMC_TYPE_D)
return SMC_CLC_DECL_NOSMCDDEV; /* skip RDMA and decline */
/* check if RDMA is available */
return smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
}
/* listen worker: finish RDMA setup */
static int smc_listen_rdma_finish(struct smc_sock *new_smc,
struct smc_clc_msg_accept_confirm *cclc,
@ -1250,17 +1638,18 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
return reason_code;
}
/* setup for RDMA connection of server */
/* setup for connection of server */
static void smc_listen_work(struct work_struct *work)
{
struct smc_sock *new_smc = container_of(work, struct smc_sock,
smc_listen_work);
u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1;
struct socket *newclcsock = new_smc->clcsock;
struct smc_clc_msg_accept_confirm cclc;
struct smc_clc_msg_accept_confirm_v2 *cclc2;
struct smc_clc_msg_accept_confirm *cclc;
struct smc_clc_msg_proposal_area *buf;
struct smc_clc_msg_proposal *pclc;
struct smc_init_info ini = {0};
bool ism_supported = false;
struct smc_init_info *ini = NULL;
int rc = 0;
if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
@ -1292,101 +1681,76 @@ static void smc_listen_work(struct work_struct *work)
SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
if (rc)
goto out_decl;
version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version;
/* IPSec connections opt out of SMC-R optimizations */
/* IPSec connections opt out of SMC optimizations */
if (using_ipsec(new_smc)) {
rc = SMC_CLC_DECL_IPSEC;
goto out_decl;
}
/* check for matching IP prefix and subnet length */
rc = smc_listen_prfx_check(new_smc, pclc);
if (rc)
goto out_decl;
/* get vlan id from IP device */
if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) {
rc = SMC_CLC_DECL_GETVLANERR;
ini = kzalloc(sizeof(*ini), GFP_KERNEL);
if (!ini) {
rc = SMC_CLC_DECL_MEM;
goto out_decl;
}
/* initial version checking */
rc = smc_listen_v2_check(new_smc, pclc, ini);
if (rc)
goto out_decl;
mutex_lock(&smc_server_lgr_pending);
smc_close_init(new_smc);
smc_rx_init(new_smc);
smc_tx_init(new_smc);
/* check if ISM is available */
if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) {
struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
ini.is_smcd = true; /* prepare ISM check */
ini.ism_peer_gid = pclc_smcd->gid;
rc = smc_find_ism_device(new_smc, &ini);
if (!rc)
rc = smc_listen_ism_init(new_smc, pclc, &ini);
if (!rc)
ism_supported = true;
else if (pclc->hdr.path == SMC_TYPE_D)
goto out_unlock; /* skip RDMA and decline */
}
/* check if RDMA is available */
if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
/* prepare RDMA check */
ini.is_smcd = false;
ini.ism_dev = NULL;
ini.ib_lcl = &pclc->lcl;
rc = smc_find_rdma_device(new_smc, &ini);
if (rc) {
/* no RDMA device found */
if (pclc->hdr.path == SMC_TYPE_B)
/* neither ISM nor RDMA device found */
rc = SMC_CLC_DECL_NOSMCDEV;
goto out_unlock;
}
rc = smc_listen_rdma_init(new_smc, &ini);
if (rc)
goto out_unlock;
rc = smc_listen_rdma_reg(new_smc, ini.first_contact_local);
if (rc)
goto out_unlock;
}
/* determine ISM or RoCE device used for connection */
rc = smc_listen_find_device(new_smc, pclc, ini);
if (rc)
goto out_unlock;
/* send SMC Accept CLC message */
rc = smc_clc_send_accept(new_smc, ini.first_contact_local);
rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1);
if (rc)
goto out_unlock;
/* SMC-D does not need this lock any more */
if (ism_supported)
if (ini->is_smcd)
mutex_unlock(&smc_server_lgr_pending);
/* receive SMC Confirm CLC message */
rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
cclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf;
cclc = (struct smc_clc_msg_accept_confirm *)cclc2;
memset(buf, 0, sizeof(struct smc_clc_msg_proposal_area));
rc = smc_clc_wait_msg(new_smc, cclc2,
sizeof(struct smc_clc_msg_proposal_area),
SMC_CLC_CONFIRM, CLC_WAIT_TIME);
if (rc) {
if (!ism_supported)
if (!ini->is_smcd)
goto out_unlock;
goto out_decl;
}
/* finish worker */
if (!ism_supported) {
rc = smc_listen_rdma_finish(new_smc, &cclc,
ini.first_contact_local);
if (!ini->is_smcd) {
rc = smc_listen_rdma_finish(new_smc, cclc,
ini->first_contact_local);
if (rc)
goto out_unlock;
mutex_unlock(&smc_server_lgr_pending);
}
smc_conn_save_peer_info(new_smc, &cclc);
smc_conn_save_peer_info(new_smc, cclc);
smc_listen_out_connected(new_smc);
goto out_free;
out_unlock:
mutex_unlock(&smc_server_lgr_pending);
out_decl:
smc_listen_decline(new_smc, rc, ini.first_contact_local);
smc_listen_decline(new_smc, rc, ini, version);
out_free:
kfree(ini);
kfree(buf);
}
@ -2092,6 +2456,9 @@ static int __init smc_init(void)
if (rc)
return rc;
smc_ism_init();
smc_clc_init();
rc = smc_pnet_init();
if (rc)
goto out_pernet_subsys;

View File

@ -19,10 +19,19 @@
#include "smc_ib.h"
#define SMC_V1 1 /* SMC version V1 */
#define SMC_V2 2 /* SMC version V2 */
#define SMC_RELEASE 0
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
#define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM
* devices
*/
#define SMC_MAX_HOSTNAME_LEN 32
#define SMC_MAX_EID_LEN 32
extern struct proto smc_proto;
extern struct proto smc_proto6;
@ -246,6 +255,9 @@ extern struct workqueue_struct *smc_close_wq; /* wq for close work */
extern u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */
#define ntohll(x) be64_to_cpu(x)
#define htonll(x) cpu_to_be64(x)
/* convert an u32 value into network byte order, store it into a 3 byte field */
static inline void hton24(u8 *net, u32 host)
{

View File

@ -14,6 +14,8 @@
#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/sched/signal.h>
#include <linux/utsname.h>
#include <linux/ctype.h>
#include <net/addrconf.h>
#include <net/sock.h>
@ -27,6 +29,7 @@
#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
#define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78
#define SMC_CLC_RECV_BUF_LEN 100
/* eye catcher "SMCR" EBCDIC for CLC messages */
@ -34,13 +37,88 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
/* eye catcher "SMCD" EBCDIC for CLC messages */
static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN];
/* check arriving CLC proposal */
static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc)
{
struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct smc_clc_smcd_v2_extension *smcd_v2_ext;
struct smc_clc_msg_hdr *hdr = &pclc->hdr;
struct smc_clc_v2_extension *v2_ext;
v2_ext = smc_get_clc_v2_ext(pclc);
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
if (hdr->version == SMC_V1) {
if (hdr->typev1 == SMC_TYPE_N)
return false;
if (ntohs(hdr->length) !=
sizeof(*pclc) + ntohs(pclc->iparea_offset) +
sizeof(*pclc_prfx) +
pclc_prfx->ipv6_prefixes_cnt *
sizeof(struct smc_clc_ipv6_prefix) +
sizeof(struct smc_clc_msg_trail))
return false;
} else {
if (ntohs(hdr->length) !=
sizeof(*pclc) +
sizeof(struct smc_clc_msg_smcd) +
(hdr->typev1 != SMC_TYPE_N ?
sizeof(*pclc_prfx) +
pclc_prfx->ipv6_prefixes_cnt *
sizeof(struct smc_clc_ipv6_prefix) : 0) +
(hdr->typev2 != SMC_TYPE_N ?
sizeof(*v2_ext) +
v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) +
(smcd_indicated(hdr->typev2) ?
sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt *
sizeof(struct smc_clc_smcd_gid_chid) :
0) +
sizeof(struct smc_clc_msg_trail))
return false;
}
return true;
}
/* check arriving CLC accept or confirm */
static bool
smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
{
struct smc_clc_msg_hdr *hdr = &clc_v2->hdr;
if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
return false;
if (hdr->version == SMC_V1) {
if ((hdr->typev1 == SMC_TYPE_R &&
ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
(hdr->typev1 == SMC_TYPE_D &&
ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
return false;
} else {
if (hdr->typev1 == SMC_TYPE_D &&
ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 &&
(ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
sizeof(struct smc_clc_first_contact_ext)))
return false;
}
return true;
}
static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len)
{
memset(fce, 0, sizeof(*fce));
fce->os_type = SMC_CLC_OS_LINUX;
fce->release = SMC_RELEASE;
memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname));
(*len) += sizeof(*fce);
}
/* check if received message has a correct header length and contains valid
* heading and trailing eyecatchers
*/
static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
{
struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct smc_clc_msg_accept_confirm *clc;
struct smc_clc_msg_accept_confirm_v2 *clc_v2;
struct smc_clc_msg_proposal *pclc;
struct smc_clc_msg_decline *dclc;
struct smc_clc_msg_trail *trl;
@ -51,29 +129,19 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
switch (clcm->type) {
case SMC_CLC_PROPOSAL:
pclc = (struct smc_clc_msg_proposal *)clcm;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
if (ntohs(pclc->hdr.length) <
sizeof(*pclc) + ntohs(pclc->iparea_offset) +
sizeof(*pclc_prfx) +
pclc_prfx->ipv6_prefixes_cnt *
sizeof(struct smc_clc_ipv6_prefix) +
sizeof(*trl))
if (!smc_clc_msg_prop_valid(pclc))
return false;
trl = (struct smc_clc_msg_trail *)
((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
break;
case SMC_CLC_ACCEPT:
case SMC_CLC_CONFIRM:
if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D)
return false;
clc = (struct smc_clc_msg_accept_confirm *)clcm;
if ((clcm->path == SMC_TYPE_R &&
ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
(clcm->path == SMC_TYPE_D &&
ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm;
if (!smc_clc_msg_acc_conf_valid(clc_v2))
return false;
trl = (struct smc_clc_msg_trail *)
((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl));
((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) -
sizeof(*trl));
break;
case SMC_CLC_DECLINE:
dclc = (struct smc_clc_msg_decline *)clcm;
@ -327,9 +395,6 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
goto out;
}
if (clcm->type == SMC_CLC_PROPOSAL && clcm->path == SMC_TYPE_N)
reason_code = SMC_CLC_DECL_VERSMISMAT; /* just V2 offered */
/* receive the complete CLC message */
memset(&msg, 0, sizeof(struct msghdr));
if (datlen > buflen) {
@ -365,7 +430,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
dclc = (struct smc_clc_msg_decline *)clcm;
reason_code = SMC_CLC_DECL_PEERDECL;
smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 &
SMC_FIRST_CONTACT_MASK) {
smc->conn.lgr->sync_err = 1;
smc_lgr_terminate_sched(smc->conn.lgr);
}
@ -377,7 +443,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
/* send CLC DECLINE message across internal TCP socket */
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
{
struct smc_clc_msg_decline dclc;
struct msghdr msg;
@ -388,8 +454,10 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
dclc.hdr.type = SMC_CLC_DECLINE;
dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
dclc.hdr.version = SMC_V1;
dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
dclc.hdr.version = version;
dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
SMC_FIRST_CONTACT_MASK : 0;
if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) &&
smc_ib_is_valid_local_systemid())
memcpy(dclc.id_for_peer, local_systemid,
@ -408,18 +476,20 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
}
/* send CLC PROPOSAL message across internal TCP socket */
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
struct smc_init_info *ini)
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_clc_smcd_v2_extension *smcd_v2_ext;
struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct smc_clc_msg_proposal *pclc_base;
struct smc_clc_smcd_gid_chid *gidchids;
struct smc_clc_msg_proposal_area *pclc;
struct smc_clc_ipv6_prefix *ipv6_prfx;
struct smc_clc_v2_extension *v2_ext;
struct smc_clc_msg_smcd *pclc_smcd;
struct smc_clc_msg_trail *trl;
int len, i, plen, rc;
int reason_code = 0;
struct kvec vec[5];
struct kvec vec[8];
struct msghdr msg;
pclc = kzalloc(sizeof(*pclc), GFP_KERNEL);
@ -430,56 +500,121 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
pclc_smcd = &pclc->pclc_smcd;
pclc_prfx = &pclc->pclc_prfx;
ipv6_prfx = pclc->pclc_prfx_ipv6;
v2_ext = &pclc->pclc_v2_ext;
smcd_v2_ext = &pclc->pclc_smcd_v2_ext;
gidchids = pclc->pclc_gidchids;
trl = &pclc->pclc_trl;
pclc_base->hdr.version = SMC_V2;
pclc_base->hdr.typev1 = ini->smc_type_v1;
pclc_base->hdr.typev2 = ini->smc_type_v2;
plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl);
/* retrieve ip prefixes for CLC proposal msg */
rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx);
if (rc) {
kfree(pclc);
return SMC_CLC_DECL_CNFERR; /* configuration error */
if (ini->smc_type_v1 != SMC_TYPE_N) {
rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx);
if (rc) {
if (ini->smc_type_v2 == SMC_TYPE_N) {
kfree(pclc);
return SMC_CLC_DECL_CNFERR;
}
pclc_base->hdr.typev1 = SMC_TYPE_N;
} else {
pclc_base->iparea_offset = htons(sizeof(*pclc_smcd));
plen += sizeof(*pclc_prfx) +
pclc_prfx->ipv6_prefixes_cnt *
sizeof(ipv6_prfx[0]);
}
}
/* send SMC Proposal CLC message */
plen = sizeof(*pclc_base) + sizeof(*pclc_prfx) +
(pclc_prfx->ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
sizeof(*trl);
/* build SMC Proposal CLC message */
memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
pclc_base->hdr.type = SMC_CLC_PROPOSAL;
pclc_base->hdr.version = SMC_V1; /* SMC version */
pclc_base->hdr.path = smc_type;
if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) {
if (smcr_indicated(ini->smc_type_v1)) {
/* add SMC-R specifics */
memcpy(pclc_base->lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE);
memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
ETH_ALEN);
pclc_base->iparea_offset = htons(0);
}
if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
if (smcd_indicated(ini->smc_type_v1)) {
/* add SMC-D specifics */
plen += sizeof(*pclc_smcd);
pclc_base->iparea_offset = htons(sizeof(*pclc_smcd));
pclc_smcd->gid = ini->ism_dev->local_gid;
if (ini->ism_dev[0]) {
pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid);
pclc_smcd->ism.chid =
htons(smc_ism_get_chid(ini->ism_dev[0]));
}
}
if (ini->smc_type_v2 == SMC_TYPE_N) {
pclc_smcd->v2_ext_offset = 0;
} else {
u16 v2_ext_offset;
u8 *eid = NULL;
v2_ext_offset = sizeof(*pclc_smcd) -
offsetofend(struct smc_clc_msg_smcd, v2_ext_offset);
if (ini->smc_type_v1 != SMC_TYPE_N)
v2_ext_offset += sizeof(*pclc_prfx) +
pclc_prfx->ipv6_prefixes_cnt *
sizeof(ipv6_prfx[0]);
pclc_smcd->v2_ext_offset = htons(v2_ext_offset);
v2_ext->hdr.eid_cnt = 0;
v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt;
v2_ext->hdr.flag.release = SMC_RELEASE;
v2_ext->hdr.flag.seid = 1;
v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) -
offsetofend(struct smc_clnt_opts_area_hdr,
smcd_v2_ext_offset) +
v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
if (ini->ism_dev[0])
smc_ism_get_system_eid(ini->ism_dev[0], &eid);
else
smc_ism_get_system_eid(ini->ism_dev[1], &eid);
if (eid)
memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN);
plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext);
if (ini->ism_offered_cnt) {
for (i = 1; i <= ini->ism_offered_cnt; i++) {
gidchids[i - 1].gid =
htonll(ini->ism_dev[i]->local_gid);
gidchids[i - 1].chid =
htons(smc_ism_get_chid(ini->ism_dev[i]));
}
plen += ini->ism_offered_cnt *
sizeof(struct smc_clc_smcd_gid_chid);
}
}
pclc_base->hdr.length = htons(plen);
memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
/* send SMC Proposal CLC message */
memset(&msg, 0, sizeof(msg));
i = 0;
vec[i].iov_base = pclc_base;
vec[i++].iov_len = sizeof(*pclc_base);
if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
vec[i].iov_base = pclc_smcd;
vec[i++].iov_len = sizeof(*pclc_smcd);
vec[i].iov_base = pclc_smcd;
vec[i++].iov_len = sizeof(*pclc_smcd);
if (ini->smc_type_v1 != SMC_TYPE_N) {
vec[i].iov_base = pclc_prfx;
vec[i++].iov_len = sizeof(*pclc_prfx);
if (pclc_prfx->ipv6_prefixes_cnt > 0) {
vec[i].iov_base = ipv6_prfx;
vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt *
sizeof(ipv6_prfx[0]);
}
}
vec[i].iov_base = pclc_prfx;
vec[i++].iov_len = sizeof(*pclc_prfx);
if (pclc_prfx->ipv6_prefixes_cnt > 0) {
vec[i].iov_base = ipv6_prfx;
vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt *
sizeof(ipv6_prfx[0]);
if (ini->smc_type_v2 != SMC_TYPE_N) {
vec[i].iov_base = v2_ext;
vec[i++].iov_len = sizeof(*v2_ext);
vec[i].iov_base = smcd_v2_ext;
vec[i++].iov_len = sizeof(*smcd_v2_ext);
if (ini->ism_offered_cnt) {
vec[i].iov_base = gidchids;
vec[i++].iov_len = ini->ism_offered_cnt *
sizeof(struct smc_clc_smcd_gid_chid);
}
}
vec[i].iov_base = trl;
vec[i++].iov_len = sizeof(*trl);
@ -499,29 +634,47 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
/* build and send CLC CONFIRM / ACCEPT message */
static int smc_clc_send_confirm_accept(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc,
int first_contact)
struct smc_clc_msg_accept_confirm_v2 *clc_v2,
int first_contact, u8 version)
{
struct smc_connection *conn = &smc->conn;
struct smc_clc_msg_accept_confirm *clc;
struct smc_clc_first_contact_ext fce;
struct smc_clc_msg_trail trl;
struct kvec vec[3];
struct msghdr msg;
struct kvec vec;
int i, len;
/* send SMC Confirm CLC msg */
clc->hdr.version = SMC_V1; /* SMC version */
clc = (struct smc_clc_msg_accept_confirm *)clc_v2;
clc->hdr.version = version; /* SMC version */
if (first_contact)
clc->hdr.flag = 1;
clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK;
if (conn->lgr->is_smcd) {
/* SMC-D specific settings */
memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
clc->hdr.path = SMC_TYPE_D;
clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
clc->hdr.typev1 = SMC_TYPE_D;
clc->d0.gid = conn->lgr->smcd->local_gid;
clc->d0.token = conn->rmb_desc->token;
clc->d0.dmbe_size = conn->rmbe_size_short;
clc->d0.dmbe_idx = 0;
memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
memcpy(clc->d0.smcd_trl.eyecatcher, SMCD_EYECATCHER,
if (version == SMC_V1) {
clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
} else {
u8 *eid = NULL;
clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd));
smc_ism_get_system_eid(conn->lgr->smcd, &eid);
if (eid)
memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN);
len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact)
smc_clc_fill_fce(&fce, &len);
clc_v2->hdr.length = htons(len);
}
memcpy(trl.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
} else {
struct smc_link *link = conn->lnk;
@ -530,7 +683,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
link = conn->lnk;
memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
clc->hdr.path = SMC_TYPE_R;
clc->hdr.typev1 = SMC_TYPE_R;
clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
memcpy(clc->r0.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
@ -554,29 +707,43 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(clc->r0.psn, link->psn_initial);
memcpy(clc->r0.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
}
memset(&msg, 0, sizeof(msg));
vec.iov_base = clc;
vec.iov_len = ntohs(clc->hdr.length);
return kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
i = 0;
vec[i].iov_base = clc_v2;
if (version > SMC_V1)
vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl);
else
vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
SMCD_CLC_ACCEPT_CONFIRM_LEN :
SMCR_CLC_ACCEPT_CONFIRM_LEN) -
sizeof(trl);
if (version > SMC_V1 && first_contact) {
vec[i].iov_base = &fce;
vec[i++].iov_len = sizeof(fce);
}
vec[i].iov_base = &trl;
vec[i++].iov_len = sizeof(trl);
return kernel_sendmsg(smc->clcsock, &msg, vec, 1,
ntohs(clc->hdr.length));
}
/* send CLC CONFIRM message across internal TCP socket */
int smc_clc_send_confirm(struct smc_sock *smc)
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version)
{
struct smc_clc_msg_accept_confirm cclc;
struct smc_clc_msg_accept_confirm_v2 cclc_v2;
int reason_code = 0;
int len;
/* send SMC Confirm CLC msg */
memset(&cclc, 0, sizeof(cclc));
cclc.hdr.type = SMC_CLC_CONFIRM;
len = smc_clc_send_confirm_accept(smc, &cclc, 0);
if (len < ntohs(cclc.hdr.length)) {
memset(&cclc_v2, 0, sizeof(cclc_v2));
cclc_v2.hdr.type = SMC_CLC_CONFIRM;
len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact,
version);
if (len < ntohs(cclc_v2.hdr.length)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
smc->sk.sk_err = -reason_code;
@ -589,16 +756,28 @@ int smc_clc_send_confirm(struct smc_sock *smc)
}
/* send CLC ACCEPT message across internal TCP socket */
int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact)
int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
u8 version)
{
struct smc_clc_msg_accept_confirm aclc;
struct smc_clc_msg_accept_confirm_v2 aclc_v2;
int len;
memset(&aclc, 0, sizeof(aclc));
aclc.hdr.type = SMC_CLC_ACCEPT;
len = smc_clc_send_confirm_accept(new_smc, &aclc, srv_first_contact);
if (len < ntohs(aclc.hdr.length))
memset(&aclc_v2, 0, sizeof(aclc_v2));
aclc_v2.hdr.type = SMC_CLC_ACCEPT;
len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
version);
if (len < ntohs(aclc_v2.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
return len > 0 ? 0 : len;
}
void __init smc_clc_init(void)
{
struct new_utsname *u;
memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */
u = utsname();
memcpy(smc_hostname, u->nodename,
min_t(size_t, strlen(u->nodename), sizeof(smc_hostname)));
}

View File

@ -54,19 +54,19 @@
#define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */
#define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */
#define SMC_FIRST_CONTACT_MASK 0b10 /* first contact bit within typev2 */
struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
u8 type; /* proposal / accept / confirm / decline */
__be16 length;
#if defined(__BIG_ENDIAN_BITFIELD)
u8 version : 4,
flag : 1,
rsvd : 1,
path : 2;
typev2 : 2,
typev1 : 2;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 path : 2,
rsvd : 1,
flag : 1,
u8 typev1 : 2,
typev2 : 2,
version : 4;
#endif
} __packed; /* format defined in RFC7609 */
@ -81,8 +81,6 @@ struct smc_clc_msg_local { /* header2 of clc messages */
u8 mac[6]; /* mac of ib_device port */
};
#define SMC_CLC_MAX_V6_PREFIX 8
/* Struct would be 4 byte aligned, but it is used in an array that is sent
* to peers and must conform to RFC7609, hence we need to use packed here.
*/
@ -91,6 +89,44 @@ struct smc_clc_ipv6_prefix {
u8 prefix_len;
} __packed; /* format defined in RFC7609 */
#if defined(__BIG_ENDIAN_BITFIELD)
struct smc_clc_v2_flag {
u8 release : 4,
rsvd : 3,
seid : 1;
};
#elif defined(__LITTLE_ENDIAN_BITFIELD)
struct smc_clc_v2_flag {
u8 seid : 1,
rsvd : 3,
release : 4;
};
#endif
struct smc_clnt_opts_area_hdr {
u8 eid_cnt; /* number of user defined EIDs */
u8 ism_gid_cnt; /* number of ISMv2 GIDs */
u8 reserved1;
struct smc_clc_v2_flag flag;
u8 reserved2[2];
__be16 smcd_v2_ext_offset; /* SMC-Dv2 Extension Offset */
};
struct smc_clc_smcd_gid_chid {
__be64 gid; /* ISM GID */
__be16 chid; /* ISMv2 CHID */
} __packed; /* format defined in
* IBM Shared Memory Communications Version 2
* (https://www.ibm.com/support/pages/node/6326337)
*/
struct smc_clc_v2_extension {
struct smc_clnt_opts_area_hdr hdr;
u8 roce[16]; /* RoCEv2 GID */
u8 reserved[16];
u8 user_eids[0][SMC_MAX_EID_LEN];
};
struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
__be32 outgoing_subnet; /* subnet mask */
u8 prefix_len; /* number of significant bits in mask */
@ -99,8 +135,15 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
} __aligned(4);
struct smc_clc_msg_smcd { /* SMC-D GID information */
u64 gid; /* ISM GID of requestor */
u8 res[32];
struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */
__be16 v2_ext_offset; /* SMC Version 2 Extension Offset */
u8 reserved[28];
};
struct smc_clc_smcd_v2_extension {
u8 system_eid[SMC_MAX_EID_LEN];
u8 reserved[16];
struct smc_clc_smcd_gid_chid gidchid[0];
};
struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
@ -109,11 +152,16 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
__be16 iparea_offset; /* offset to IP address information area */
} __aligned(4);
#define SMC_CLC_MAX_V6_PREFIX 8
struct smc_clc_msg_proposal_area {
struct smc_clc_msg_proposal pclc_base;
struct smc_clc_msg_smcd pclc_smcd;
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_v2_extension pclc_v2_ext;
struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext;
struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS];
struct smc_clc_msg_trail pclc_trl;
};
@ -134,11 +182,9 @@ struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */
__be64 rmb_dma_addr; /* RMB virtual address */
u8 reserved2;
u8 psn[3]; /* packet sequence number */
struct smc_clc_msg_trail smcr_trl;
/* eye catcher "SMCR" EBCDIC */
} __packed;
struct smcd_clc_msg_accept_confirm { /* SMCD accept/confirm */
struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */
u64 gid; /* Sender GID */
u64 token; /* DMB token */
u8 dmbe_idx; /* DMBE index */
@ -150,26 +196,63 @@ struct smcd_clc_msg_accept_confirm { /* SMCD accept/confirm */
dmbe_size : 4;
#endif
u16 reserved4;
u32 linkid; /* Link identifier */
u32 reserved5[3];
struct smc_clc_msg_trail smcd_trl;
/* eye catcher "SMCD" EBCDIC */
__be32 linkid; /* Link identifier */
} __packed;
#define SMC_CLC_OS_ZOS 1
#define SMC_CLC_OS_LINUX 2
#define SMC_CLC_OS_AIX 3
struct smc_clc_first_contact_ext {
u8 reserved1;
#if defined(__BIG_ENDIAN_BITFIELD)
u8 os_type : 4,
release : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 release : 4,
os_type : 4;
#endif
u8 reserved2[2];
u8 hostname[SMC_MAX_HOSTNAME_LEN];
};
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
struct smcd_clc_msg_accept_confirm d0; /* SMC-D */
struct { /* SMC-D */
struct smcd_clc_msg_accept_confirm_common d0;
u32 reserved5[3];
};
};
} __packed; /* format defined in RFC7609 */
struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
struct { /* SMC-D */
struct smcd_clc_msg_accept_confirm_common d0;
__be16 chid;
u8 eid[SMC_MAX_EID_LEN];
u8 reserved5[8];
};
};
};
struct smc_clc_msg_decline { /* clc decline message */
struct smc_clc_msg_hdr hdr;
u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */
__be32 peer_diagnosis; /* diagnosis information */
u8 reserved2[4];
struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 os_type : 4,
reserved : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 reserved : 4,
os_type : 4;
#endif
u8 reserved2[3];
struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
} __aligned(4);
/* determine start of the prefix area within the proposal message */
@ -180,16 +263,58 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}
static inline bool smcr_indicated(int smc_type)
{
return smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B;
}
static inline bool smcd_indicated(int smc_type)
{
return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B;
}
/* get SMC-D info from proposal message */
static inline struct smc_clc_msg_smcd *
smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
{
if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd))
if (smcd_indicated(prop->hdr.typev1) &&
ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd))
return NULL;
return (struct smc_clc_msg_smcd *)(prop + 1);
}
static inline struct smc_clc_v2_extension *
smc_get_clc_v2_ext(struct smc_clc_msg_proposal *prop)
{
struct smc_clc_msg_smcd *prop_smcd = smc_get_clc_msg_smcd(prop);
if (!prop_smcd || !ntohs(prop_smcd->v2_ext_offset))
return NULL;
return (struct smc_clc_v2_extension *)
((u8 *)prop_smcd +
offsetof(struct smc_clc_msg_smcd, v2_ext_offset) +
sizeof(prop_smcd->v2_ext_offset) +
ntohs(prop_smcd->v2_ext_offset));
}
static inline struct smc_clc_smcd_v2_extension *
smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext)
{
if (!prop_v2ext)
return NULL;
if (!ntohs(prop_v2ext->hdr.smcd_v2_ext_offset))
return NULL;
return (struct smc_clc_smcd_v2_extension *)
((u8 *)prop_v2ext +
offsetof(struct smc_clc_v2_extension, hdr) +
offsetof(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) +
sizeof(prop_v2ext->hdr.smcd_v2_ext_offset) +
ntohs(prop_v2ext->hdr.smcd_v2_ext_offset));
}
struct smcd_dev;
struct smc_init_info;
@ -197,10 +322,12 @@ int smc_clc_prfx_match(struct socket *clcsock,
struct smc_clc_msg_proposal_prefix *prop);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type, unsigned long timeout);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version);
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
u8 version);
void smc_clc_init(void) __init;
#endif

View File

@ -375,7 +375,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
int i;
if (ini->is_smcd && ini->vlan_id) {
if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
ini->vlan_id)) {
rc = SMC_CLC_DECL_ISMVLANERR;
goto out;
}
@ -412,13 +413,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->conns_all = RB_ROOT;
if (ini->is_smcd) {
/* SMC-D specific settings */
get_device(&ini->ism_dev->dev);
lgr->peer_gid = ini->ism_peer_gid;
lgr->smcd = ini->ism_dev;
lgr_list = &ini->ism_dev->lgr_list;
get_device(&ini->ism_dev[ini->ism_selected]->dev);
lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
lgr->smcd = ini->ism_dev[ini->ism_selected];
lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
lgr_lock = &lgr->smcd->lgr_lock;
lgr->smc_version = ini->smcd_version;
lgr->peer_shutdown = 0;
atomic_inc(&ini->ism_dev->lgr_cnt);
atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
} else {
/* SMC-R specific settings */
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
@ -449,7 +451,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
kfree(lgr);
ism_put_vlan:
if (ini->is_smcd && ini->vlan_id)
smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
out:
if (rc < 0) {
if (rc == -ENOMEM)
@ -1288,8 +1290,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
spinlock_t *lgr_lock;
int rc = 0;
lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
&smc_lgr_list.list;
lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
&smc_lgr_list.lock;
ini->first_contact_local = 1;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
if (role == SMC_CLNT && ini->first_contact_peer)
@ -1301,7 +1305,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
list_for_each_entry(lgr, lgr_list, list) {
write_lock_bh(&lgr->conns_lock);
if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev, ini->ism_peer_gid) :
smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
ini->ism_peer_gid[ini->ism_selected]) :
smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
!lgr->sync_err &&
lgr->vlan_id == ini->vlan_id &&

View File

@ -231,6 +231,11 @@ struct smc_link_group {
u8 freeing : 1; /* lgr is being freed */
bool is_smcd; /* SMC-R or SMC-D */
u8 smc_version;
u8 negotiated_eid[SMC_MAX_EID_LEN];
u8 peer_os; /* peer operating system */
u8 peer_smc_release;
u8 peer_hostname[SMC_MAX_HOSTNAME_LEN];
union {
struct { /* SMC-R */
enum smc_lgr_role role;
@ -291,6 +296,8 @@ struct smc_clc_msg_local;
struct smc_init_info {
u8 is_smcd;
u8 smc_type_v1;
u8 smc_type_v2;
u8 first_contact_peer;
u8 first_contact_local;
unsigned short vlan_id;
@ -301,8 +308,12 @@ struct smc_init_info {
u8 ib_port;
u32 ib_clcqpn;
/* SMC-D */
u64 ism_peer_gid;
struct smcd_dev *ism_dev;
u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1];
struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1];
u16 ism_chid[SMC_MAX_ISM_DEVS + 1];
u8 ism_offered_cnt; /* # of ISM devices offered */
u8 ism_selected; /* index of selected ISM dev*/
u8 smcd_version;
};
/* Find the connection associated with the given alert token in the link group.

View File

@ -21,7 +21,9 @@ struct smcd_dev_list smcd_dev_list = {
.mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex)
};
/* Test if an ISM communication is possible. */
bool smc_ism_v2_capable;
/* Test if an ISM communication is possible - same CPC */
int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
{
return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0,
@ -39,6 +41,16 @@ int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos,
return rc < 0 ? rc : 0;
}
void smc_ism_get_system_eid(struct smcd_dev *smcd, u8 **eid)
{
smcd->ops->get_system_eid(smcd, eid);
}
u16 smc_ism_get_chid(struct smcd_dev *smcd)
{
return smcd->ops->get_chid(smcd);
}
/* Set a connection using this DMBE. */
void smc_ism_set_conn(struct smc_connection *conn)
{
@ -319,7 +331,18 @@ EXPORT_SYMBOL_GPL(smcd_alloc_dev);
int smcd_register_dev(struct smcd_dev *smcd)
{
mutex_lock(&smcd_dev_list.mutex);
list_add_tail(&smcd->list, &smcd_dev_list.list);
if (list_empty(&smcd_dev_list.list)) {
u8 *system_eid = NULL;
smc_ism_get_system_eid(smcd, &system_eid);
if ((*system_eid) + 24 != '0' || (*system_eid) + 28 != '0')
smc_ism_v2_capable = true;
}
/* sort list: devices without pnetid before devices with pnetid */
if (smcd->pnetid[0])
list_add_tail(&smcd->list, &smcd_dev_list.list);
else
list_add(&smcd->list, &smcd_dev_list.list);
mutex_unlock(&smcd_dev_list.mutex);
pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
@ -399,3 +422,8 @@ void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno)
spin_unlock_irqrestore(&smcd->lock, flags);
}
EXPORT_SYMBOL_GPL(smcd_handle_irq);
void __init smc_ism_init(void)
{
smc_ism_v2_capable = false;
}

View File

@ -19,7 +19,10 @@ struct smcd_dev_list { /* List of SMCD devices */
struct mutex mutex; /* Protects list of devices */
};
extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */
extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */
extern bool smc_ism_v2_capable; /* HW supports ISM V2 and thus
* System EID is defined
*/
struct smc_ism_vlanid { /* VLAN id set on ISM device */
struct list_head list;
@ -47,4 +50,7 @@ int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
void *data, size_t len);
int smc_ism_signal_shutdown(struct smc_link_group *lgr);
void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid);
u16 smc_ism_get_chid(struct smcd_dev *dev);
void smc_ism_init(void);
#endif

View File

@ -16,5 +16,6 @@ extern unsigned int smc_net_id;
/* per-network namespace private data */
struct smc_net {
struct smc_pnettable pnettable;
struct smc_pnetids_ndev pnetids_ndev;
};
#endif

View File

@ -29,8 +29,7 @@
#include "smc_ism.h"
#include "smc_core.h"
#define SMC_ASCII_BLANK 32
static struct net_device *__pnet_find_base_ndev(struct net_device *ndev);
static struct net_device *pnet_find_base_ndev(struct net_device *ndev);
static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
@ -73,14 +72,22 @@ struct smc_pnetentry {
};
};
/* Check if the pnetid is set */
bool smc_pnet_is_pnetid_set(u8 *pnetid)
{
if (pnetid[0] == 0 || pnetid[0] == _S)
return false;
return true;
}
/* Check if two given pnetids match */
static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2)
{
int i;
for (i = 0; i < SMC_MAX_PNETID_LEN; i++) {
if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) &&
(pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK))
if ((pnetid1[i] == 0 || pnetid1[i] == _S) &&
(pnetid2[i] == 0 || pnetid2[i] == _S))
break;
if (pnetid1[i] != pnetid2[i])
return false;
@ -238,11 +245,10 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
char *pnet_name)
{
u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
bool applied = false;
mutex_lock(&smc_ib_devices.mutex);
if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
if (!smc_pnet_is_pnetid_set(ib_dev->pnetid[ib_port - 1])) {
memcpy(ib_dev->pnetid[ib_port - 1], pnet_name,
SMC_MAX_PNETID_LEN);
ib_dev->pnetid_by_user[ib_port - 1] = true;
@ -256,11 +262,10 @@ static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
*/
static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name)
{
u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
bool applied = false;
mutex_lock(&smcd_dev_list.mutex);
if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
if (!smc_pnet_is_pnetid_set(smcd_dev->pnetid)) {
memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN);
smcd_dev->pnetid_by_user = true;
applied = true;
@ -708,10 +713,115 @@ static struct genl_family smc_pnet_nl_family __ro_after_init = {
.n_ops = ARRAY_SIZE(smc_pnet_ops)
};
bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid)
{
struct smc_net *sn = net_generic(net, smc_net_id);
struct smc_pnetids_ndev_entry *pe;
bool rc = false;
read_lock(&sn->pnetids_ndev.lock);
list_for_each_entry(pe, &sn->pnetids_ndev.list, list) {
if (smc_pnet_match(pnetid, pe->pnetid)) {
rc = true;
goto unlock;
}
}
unlock:
read_unlock(&sn->pnetids_ndev.lock);
return rc;
}
static int smc_pnet_add_pnetid(struct net *net, u8 *pnetid)
{
struct smc_net *sn = net_generic(net, smc_net_id);
struct smc_pnetids_ndev_entry *pe, *pi;
pe = kzalloc(sizeof(*pe), GFP_KERNEL);
if (!pe)
return -ENOMEM;
write_lock(&sn->pnetids_ndev.lock);
list_for_each_entry(pi, &sn->pnetids_ndev.list, list) {
if (smc_pnet_match(pnetid, pe->pnetid)) {
refcount_inc(&pi->refcnt);
kfree(pe);
goto unlock;
}
}
refcount_set(&pe->refcnt, 1);
memcpy(pe->pnetid, pnetid, SMC_MAX_PNETID_LEN);
list_add_tail(&pe->list, &sn->pnetids_ndev.list);
unlock:
write_unlock(&sn->pnetids_ndev.lock);
return 0;
}
static void smc_pnet_remove_pnetid(struct net *net, u8 *pnetid)
{
struct smc_net *sn = net_generic(net, smc_net_id);
struct smc_pnetids_ndev_entry *pe, *pe2;
write_lock(&sn->pnetids_ndev.lock);
list_for_each_entry_safe(pe, pe2, &sn->pnetids_ndev.list, list) {
if (smc_pnet_match(pnetid, pe->pnetid)) {
if (refcount_dec_and_test(&pe->refcnt)) {
list_del(&pe->list);
kfree(pe);
}
break;
}
}
write_unlock(&sn->pnetids_ndev.lock);
}
static void smc_pnet_add_base_pnetid(struct net *net, struct net_device *dev,
u8 *ndev_pnetid)
{
struct net_device *base_dev;
base_dev = __pnet_find_base_ndev(dev);
if (base_dev->flags & IFF_UP &&
!smc_pnetid_by_dev_port(base_dev->dev.parent, base_dev->dev_port,
ndev_pnetid)) {
/* add to PNETIDs list */
smc_pnet_add_pnetid(net, ndev_pnetid);
}
}
/* create initial list of netdevice pnetids */
static void smc_pnet_create_pnetids_list(struct net *net)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct net_device *dev;
rtnl_lock();
for_each_netdev(net, dev)
smc_pnet_add_base_pnetid(net, dev, ndev_pnetid);
rtnl_unlock();
}
/* clean up list of netdevice pnetids */
static void smc_pnet_destroy_pnetids_list(struct net *net)
{
struct smc_net *sn = net_generic(net, smc_net_id);
struct smc_pnetids_ndev_entry *pe, *temp_pe;
write_lock(&sn->pnetids_ndev.lock);
list_for_each_entry_safe(pe, temp_pe, &sn->pnetids_ndev.list, list) {
list_del(&pe->list);
kfree(pe);
}
write_unlock(&sn->pnetids_ndev.lock);
}
static int smc_pnet_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(event_dev);
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
switch (event) {
case NETDEV_REBOOT:
@ -721,6 +831,17 @@ static int smc_pnet_netdev_event(struct notifier_block *this,
case NETDEV_REGISTER:
smc_pnet_add_by_ndev(event_dev);
return NOTIFY_OK;
case NETDEV_UP:
smc_pnet_add_base_pnetid(net, event_dev, ndev_pnetid);
return NOTIFY_OK;
case NETDEV_DOWN:
event_dev = __pnet_find_base_ndev(event_dev);
if (!smc_pnetid_by_dev_port(event_dev->dev.parent,
event_dev->dev_port, ndev_pnetid)) {
/* remove from PNETIDs list */
smc_pnet_remove_pnetid(net, ndev_pnetid);
}
return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
@ -735,9 +856,14 @@ int smc_pnet_net_init(struct net *net)
{
struct smc_net *sn = net_generic(net, smc_net_id);
struct smc_pnettable *pnettable = &sn->pnettable;
struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev;
INIT_LIST_HEAD(&pnettable->pnetlist);
rwlock_init(&pnettable->lock);
INIT_LIST_HEAD(&pnetids_ndev->list);
rwlock_init(&pnetids_ndev->lock);
smc_pnet_create_pnetids_list(net);
return 0;
}
@ -752,6 +878,7 @@ int __init smc_pnet_init(void)
rc = register_netdevice_notifier(&smc_netdev_notifier);
if (rc)
genl_unregister_family(&smc_pnet_nl_family);
return rc;
}
@ -760,6 +887,7 @@ void smc_pnet_net_exit(struct net *net)
{
/* flush pnet table */
smc_pnet_remove_by_pnetid(net, NULL);
smc_pnet_destroy_pnetids_list(net);
}
void smc_pnet_exit(void)
@ -768,16 +896,11 @@ void smc_pnet_exit(void)
genl_unregister_family(&smc_pnet_nl_family);
}
/* Determine one base device for stacked net devices.
* If the lower device level contains more than one devices
* (for instance with bonding slaves), just the first device
* is used to reach a base device.
*/
static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
static struct net_device *__pnet_find_base_ndev(struct net_device *ndev)
{
int i, nest_lvl;
rtnl_lock();
ASSERT_RTNL();
nest_lvl = ndev->lower_level;
for (i = 0; i < nest_lvl; i++) {
struct list_head *lower = &ndev->adj_list.lower;
@ -787,6 +910,18 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
lower = lower->next;
ndev = netdev_lower_get_next(ndev, &lower);
}
return ndev;
}
/* Determine one base device for stacked net devices.
* If the lower device level contains more than one devices
* (for instance with bonding slaves), just the first device
* is used to reach a base device.
*/
static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
{
rtnl_lock();
ndev = __pnet_find_base_ndev(ndev);
rtnl_unlock();
return ndev;
}
@ -929,10 +1064,10 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
!ismdev->going_away &&
(!ini->ism_peer_gid ||
!smc_ism_cantalk(ini->ism_peer_gid, ini->vlan_id,
(!ini->ism_peer_gid[0] ||
!smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id,
ismdev))) {
ini->ism_dev = ismdev;
ini->ism_dev[0] = ismdev;
break;
}
}
@ -966,7 +1101,7 @@ void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(sk);
ini->ism_dev = NULL;
ini->ism_dev[0] = NULL;
if (!dst)
goto out;
if (!dst->dev)

View File

@ -12,6 +12,8 @@
#ifndef _SMC_PNET_H
#define _SMC_PNET_H
#include <net/smc.h>
#if IS_ENABLED(CONFIG_HAVE_PNETID)
#include <asm/pnet.h>
#endif
@ -31,6 +33,17 @@ struct smc_pnettable {
struct list_head pnetlist;
};
struct smc_pnetids_ndev { /* list of pnetids for net devices in UP state*/
struct list_head list;
rwlock_t lock;
};
struct smc_pnetids_ndev_entry {
struct list_head list;
u8 pnetid[SMC_MAX_PNETID_LEN];
refcount_t refcnt;
};
static inline int smc_pnetid_by_dev_port(struct device *dev,
unsigned short port, u8 *pnetid)
{
@ -52,4 +65,6 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcd);
void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
struct smc_init_info *ini,
struct smc_ib_device *known_dev);
bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid);
bool smc_pnet_is_pnetid_set(u8 *pnetid);
#endif