mirror of https://gitee.com/openkylin/linux.git
smc: socket closing and linkgroup cleanup
smc_shutdown() and smc_release() handling delayed linkgroup cleanup for linkgroups without connections Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
952310ccf2
commit
b38d732477
|
@ -1,3 +1,3 @@
|
|||
obj-$(CONFIG_SMC) += smc.o
|
||||
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
|
||||
smc-y += smc_cdc.o smc_tx.o smc_rx.o
|
||||
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o
|
||||
|
|
127
net/smc/af_smc.c
127
net/smc/af_smc.c
|
@ -39,6 +39,7 @@
|
|||
#include "smc_pnet.h"
|
||||
#include "smc_tx.h"
|
||||
#include "smc_rx.h"
|
||||
#include "smc_close.h"
|
||||
|
||||
static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
|
||||
* creation
|
||||
|
@ -70,14 +71,29 @@ static int smc_release(struct socket *sock)
|
|||
{
|
||||
struct sock *sk = sock->sk;
|
||||
struct smc_sock *smc;
|
||||
int rc = 0;
|
||||
|
||||
if (!sk)
|
||||
goto out;
|
||||
|
||||
smc = smc_sk(sk);
|
||||
lock_sock(sk);
|
||||
sock_hold(sk);
|
||||
if (sk->sk_state == SMC_LISTEN)
|
||||
/* smc_close_non_accepted() is called and acquires
|
||||
* sock lock for child sockets again
|
||||
*/
|
||||
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
|
||||
else
|
||||
lock_sock(sk);
|
||||
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
if (smc->use_fallback) {
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
sk->sk_state_change(sk);
|
||||
} else {
|
||||
rc = smc_close_active(smc);
|
||||
sock_set_flag(sk, SOCK_DEAD);
|
||||
sk->sk_shutdown |= SHUTDOWN_MASK;
|
||||
}
|
||||
if (smc->clcsock) {
|
||||
sock_release(smc->clcsock);
|
||||
smc->clcsock = NULL;
|
||||
|
@ -86,11 +102,18 @@ static int smc_release(struct socket *sock)
|
|||
/* detach socket */
|
||||
sock_orphan(sk);
|
||||
sock->sk = NULL;
|
||||
if (smc->use_fallback) {
|
||||
schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
|
||||
} else if (sk->sk_state == SMC_CLOSED) {
|
||||
smc_conn_free(&smc->conn);
|
||||
schedule_delayed_work(&smc->sock_put_work,
|
||||
SMC_CLOSE_SOCK_PUT_DELAY);
|
||||
}
|
||||
release_sock(sk);
|
||||
|
||||
sock_put(sk);
|
||||
out:
|
||||
return 0;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void smc_destruct(struct sock *sk)
|
||||
|
@ -120,6 +143,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
|
|||
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
|
||||
INIT_LIST_HEAD(&smc->accept_q);
|
||||
spin_lock_init(&smc->accept_q_lock);
|
||||
INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
|
||||
sk_refcnt_debug_inc(sk);
|
||||
|
||||
return sk;
|
||||
|
@ -417,7 +441,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
|
|||
|
||||
out_connected:
|
||||
smc_copy_sock_settings_to_clc(smc);
|
||||
smc->sk.sk_state = SMC_ACTIVE;
|
||||
if (smc->sk.sk_state == SMC_INIT)
|
||||
smc->sk.sk_state = SMC_ACTIVE;
|
||||
|
||||
return rc ? rc : local_contact;
|
||||
|
||||
|
@ -559,8 +584,8 @@ static void smc_accept_unlink(struct sock *sk)
|
|||
/* remove a sock from the accept queue to bind it to a new socket created
|
||||
* for a socket accept call from user space
|
||||
*/
|
||||
static struct sock *smc_accept_dequeue(struct sock *parent,
|
||||
struct socket *new_sock)
|
||||
struct sock *smc_accept_dequeue(struct sock *parent,
|
||||
struct socket *new_sock)
|
||||
{
|
||||
struct smc_sock *isk, *n;
|
||||
struct sock *new_sk;
|
||||
|
@ -581,11 +606,17 @@ static struct sock *smc_accept_dequeue(struct sock *parent,
|
|||
}
|
||||
|
||||
/* clean up for a created but never accepted sock */
|
||||
static void smc_close_non_accepted(struct sock *sk)
|
||||
void smc_close_non_accepted(struct sock *sk)
|
||||
{
|
||||
struct smc_sock *smc = smc_sk(sk);
|
||||
|
||||
sock_hold(sk);
|
||||
lock_sock(sk);
|
||||
if (!sk->sk_lingertime)
|
||||
/* wait for peer closing */
|
||||
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
|
||||
if (!smc->use_fallback)
|
||||
smc_close_active(smc);
|
||||
if (smc->clcsock) {
|
||||
struct socket *tcp;
|
||||
|
||||
|
@ -593,7 +624,16 @@ static void smc_close_non_accepted(struct sock *sk)
|
|||
smc->clcsock = NULL;
|
||||
sock_release(tcp);
|
||||
}
|
||||
/* more closing stuff to be added with socket closing patch */
|
||||
sock_set_flag(sk, SOCK_DEAD);
|
||||
sk->sk_shutdown |= SHUTDOWN_MASK;
|
||||
if (smc->use_fallback) {
|
||||
schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
|
||||
} else {
|
||||
smc_conn_free(&smc->conn);
|
||||
schedule_delayed_work(&smc->sock_put_work,
|
||||
SMC_CLOSE_SOCK_PUT_DELAY);
|
||||
}
|
||||
release_sock(sk);
|
||||
sock_put(sk);
|
||||
}
|
||||
|
||||
|
@ -761,11 +801,12 @@ static void smc_listen_work(struct work_struct *work)
|
|||
|
||||
out_connected:
|
||||
sk_refcnt_debug_inc(newsmcsk);
|
||||
newsmcsk->sk_state = SMC_ACTIVE;
|
||||
if (newsmcsk->sk_state == SMC_INIT)
|
||||
newsmcsk->sk_state = SMC_ACTIVE;
|
||||
enqueue:
|
||||
if (local_contact == SMC_FIRST_CONTACT)
|
||||
mutex_unlock(&smc_create_lgr_pending);
|
||||
lock_sock(&lsmc->sk);
|
||||
lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
|
||||
if (lsmc->sk.sk_state == SMC_LISTEN) {
|
||||
smc_accept_enqueue(&lsmc->sk, newsmcsk);
|
||||
} else { /* no longer listening */
|
||||
|
@ -791,6 +832,7 @@ static void smc_listen_work(struct work_struct *work)
|
|||
|
||||
out_err:
|
||||
newsmcsk->sk_state = SMC_CLOSED;
|
||||
smc_conn_free(&new_smc->conn);
|
||||
goto enqueue; /* queue new sock with sk_err set */
|
||||
}
|
||||
|
||||
|
@ -911,7 +953,8 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr,
|
|||
{
|
||||
struct smc_sock *smc;
|
||||
|
||||
if (peer && (sock->sk->sk_state != SMC_ACTIVE))
|
||||
if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
|
||||
(sock->sk->sk_state != SMC_APPCLOSEWAIT1))
|
||||
return -ENOTCONN;
|
||||
|
||||
smc = smc_sk(sock->sk);
|
||||
|
@ -927,7 +970,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
|
|||
|
||||
smc = smc_sk(sk);
|
||||
lock_sock(sk);
|
||||
if (sk->sk_state != SMC_ACTIVE)
|
||||
if ((sk->sk_state != SMC_ACTIVE) &&
|
||||
(sk->sk_state != SMC_APPCLOSEWAIT1) &&
|
||||
(sk->sk_state != SMC_INIT))
|
||||
goto out;
|
||||
if (smc->use_fallback)
|
||||
rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
|
||||
|
@ -947,13 +992,21 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
|
|||
|
||||
smc = smc_sk(sk);
|
||||
lock_sock(sk);
|
||||
if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
|
||||
if ((sk->sk_state == SMC_INIT) ||
|
||||
(sk->sk_state == SMC_LISTEN) ||
|
||||
(sk->sk_state == SMC_CLOSED))
|
||||
goto out;
|
||||
|
||||
if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
|
||||
rc = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (smc->use_fallback)
|
||||
rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
|
||||
else
|
||||
rc = smc_rx_recvmsg(smc, msg, len, flags);
|
||||
|
||||
out:
|
||||
release_sock(sk);
|
||||
return rc;
|
||||
|
@ -1013,7 +1066,8 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
|
|||
mask |= smc_accept_poll(sk);
|
||||
if (sk->sk_err)
|
||||
mask |= POLLERR;
|
||||
if (atomic_read(&smc->conn.sndbuf_space)) {
|
||||
if (atomic_read(&smc->conn.sndbuf_space) ||
|
||||
(sk->sk_shutdown & SEND_SHUTDOWN)) {
|
||||
mask |= POLLOUT | POLLWRNORM;
|
||||
} else {
|
||||
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
|
||||
|
@ -1021,7 +1075,14 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
|
|||
}
|
||||
if (atomic_read(&smc->conn.bytes_to_rcv))
|
||||
mask |= POLLIN | POLLRDNORM;
|
||||
/* for now - to be enhanced in follow-on patch */
|
||||
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
|
||||
(sk->sk_state == SMC_CLOSED))
|
||||
mask |= POLLHUP;
|
||||
if (sk->sk_shutdown & RCV_SHUTDOWN)
|
||||
mask |= POLLIN | POLLRDNORM | POLLRDHUP;
|
||||
if (sk->sk_state == SMC_APPCLOSEWAIT1)
|
||||
mask |= POLLIN;
|
||||
|
||||
}
|
||||
|
||||
return mask;
|
||||
|
@ -1032,31 +1093,53 @@ static int smc_shutdown(struct socket *sock, int how)
|
|||
struct sock *sk = sock->sk;
|
||||
struct smc_sock *smc;
|
||||
int rc = -EINVAL;
|
||||
int rc1 = 0;
|
||||
|
||||
smc = smc_sk(sk);
|
||||
|
||||
if ((how < SHUT_RD) || (how > SHUT_RDWR))
|
||||
goto out_err;
|
||||
return rc;
|
||||
|
||||
lock_sock(sk);
|
||||
|
||||
rc = -ENOTCONN;
|
||||
if (sk->sk_state == SMC_CLOSED)
|
||||
if ((sk->sk_state != SMC_LISTEN) &&
|
||||
(sk->sk_state != SMC_ACTIVE) &&
|
||||
(sk->sk_state != SMC_PEERCLOSEWAIT1) &&
|
||||
(sk->sk_state != SMC_PEERCLOSEWAIT2) &&
|
||||
(sk->sk_state != SMC_APPCLOSEWAIT1) &&
|
||||
(sk->sk_state != SMC_APPCLOSEWAIT2) &&
|
||||
(sk->sk_state != SMC_APPFINCLOSEWAIT))
|
||||
goto out;
|
||||
if (smc->use_fallback) {
|
||||
rc = kernel_sock_shutdown(smc->clcsock, how);
|
||||
sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
|
||||
if (sk->sk_shutdown == SHUTDOWN_MASK)
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
} else {
|
||||
rc = sock_no_shutdown(sock, how);
|
||||
goto out;
|
||||
}
|
||||
switch (how) {
|
||||
case SHUT_RDWR: /* shutdown in both directions */
|
||||
rc = smc_close_active(smc);
|
||||
break;
|
||||
case SHUT_WR:
|
||||
rc = smc_close_shutdown_write(smc);
|
||||
break;
|
||||
case SHUT_RD:
|
||||
if (sk->sk_state == SMC_LISTEN)
|
||||
rc = smc_close_active(smc);
|
||||
else
|
||||
rc = 0;
|
||||
/* nothing more to do because peer is not involved */
|
||||
break;
|
||||
}
|
||||
rc1 = kernel_sock_shutdown(smc->clcsock, how);
|
||||
/* map sock_shutdown_cmd constants to sk_shutdown value range */
|
||||
sk->sk_shutdown |= how + 1;
|
||||
|
||||
out:
|
||||
release_sock(sk);
|
||||
|
||||
out_err:
|
||||
return rc;
|
||||
return rc ? rc : rc1;
|
||||
}
|
||||
|
||||
static int smc_setsockopt(struct socket *sock, int level, int optname,
|
||||
|
|
|
@ -30,6 +30,16 @@ enum smc_state { /* possible states of an SMC socket */
|
|||
SMC_INIT = 2,
|
||||
SMC_CLOSED = 7,
|
||||
SMC_LISTEN = 10,
|
||||
/* normal close */
|
||||
SMC_PEERCLOSEWAIT1 = 20,
|
||||
SMC_PEERCLOSEWAIT2 = 21,
|
||||
SMC_APPFINCLOSEWAIT = 24,
|
||||
SMC_APPCLOSEWAIT1 = 22,
|
||||
SMC_APPCLOSEWAIT2 = 23,
|
||||
SMC_PEERFINCLOSEWAIT = 25,
|
||||
/* abnormal close */
|
||||
SMC_PEERABORTWAIT = 26,
|
||||
SMC_PROCESSABORT = 27,
|
||||
};
|
||||
|
||||
struct smc_link_group;
|
||||
|
@ -164,7 +174,13 @@ struct smc_sock { /* smc sock container */
|
|||
struct work_struct smc_listen_work;/* prepare new accept socket */
|
||||
struct list_head accept_q; /* sockets to be accepted */
|
||||
spinlock_t accept_q_lock; /* protects accept_q */
|
||||
struct delayed_work sock_put_work; /* final socket freeing */
|
||||
bool use_fallback; /* fallback to tcp */
|
||||
u8 wait_close_tx_prepared : 1;
|
||||
/* shutdown wr or close
|
||||
* started, waiting for unsent
|
||||
* data to be sent
|
||||
*/
|
||||
};
|
||||
|
||||
static inline struct smc_sock *smc_sk(const struct sock *sk)
|
||||
|
@ -250,5 +266,7 @@ void smc_conn_free(struct smc_connection *conn);
|
|||
int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
|
||||
struct smc_ib_device *smcibdev, u8 ibport,
|
||||
struct smc_clc_msg_local *lcl, int srv_first_contact);
|
||||
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
|
||||
void smc_close_non_accepted(struct sock *sk);
|
||||
|
||||
#endif /* __SMC_H */
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "smc_cdc.h"
|
||||
#include "smc_tx.h"
|
||||
#include "smc_rx.h"
|
||||
#include "smc_close.h"
|
||||
|
||||
/********************************** send *************************************/
|
||||
|
||||
|
@ -55,6 +56,9 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
|
|||
cdcpend->conn);
|
||||
}
|
||||
smc_tx_sndbuf_nonfull(smc);
|
||||
if (smc->sk.sk_state != SMC_ACTIVE)
|
||||
/* wake up smc_close_wait_tx_pends() */
|
||||
smc->sk.sk_state_change(&smc->sk);
|
||||
bh_unlock_sock(&smc->sk);
|
||||
}
|
||||
|
||||
|
@ -149,6 +153,14 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
|
|||
(unsigned long)conn);
|
||||
}
|
||||
|
||||
bool smc_cdc_tx_has_pending(struct smc_connection *conn)
|
||||
{
|
||||
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
|
||||
|
||||
return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE,
|
||||
smc_cdc_tx_filter, (unsigned long)conn);
|
||||
}
|
||||
|
||||
/********************************* receive ***********************************/
|
||||
|
||||
static inline bool smc_cdc_before(u16 seq1, u16 seq2)
|
||||
|
@ -201,15 +213,20 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
|
|||
smc->sk.sk_data_ready(&smc->sk);
|
||||
}
|
||||
|
||||
if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
|
||||
if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
|
||||
smc->sk.sk_err = ECONNRESET;
|
||||
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
|
||||
}
|
||||
if (smc_cdc_rxed_any_close_or_senddone(conn))
|
||||
/* subsequent patch: terminate connection */
|
||||
smc_close_passive_received(smc);
|
||||
|
||||
/* piggy backed tx info */
|
||||
/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
|
||||
if (diff_cons && smc_tx_prepared_sends(conn))
|
||||
if (diff_cons && smc_tx_prepared_sends(conn)) {
|
||||
smc_tx_sndbuf_nonempty(conn);
|
||||
/* trigger socket release if connection closed */
|
||||
smc_close_wake_tx_prepared(smc);
|
||||
}
|
||||
|
||||
/* subsequent patch: trigger socket release if connection closed */
|
||||
|
||||
|
|
|
@ -212,6 +212,7 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
|
|||
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
|
||||
struct smc_cdc_tx_pend *pend);
|
||||
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
|
||||
bool smc_cdc_tx_has_pending(struct smc_connection *conn);
|
||||
int smc_cdc_init(void) __init;
|
||||
|
||||
#endif /* SMC_CDC_H */
|
||||
|
|
|
@ -0,0 +1,441 @@
|
|||
/*
|
||||
* Shared Memory Communications over RDMA (SMC-R) and RoCE
|
||||
*
|
||||
* Socket Closing - normal and abnormal
|
||||
*
|
||||
* Copyright IBM Corp. 2016
|
||||
*
|
||||
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/workqueue.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
#include "smc.h"
|
||||
#include "smc_tx.h"
|
||||
#include "smc_cdc.h"
|
||||
#include "smc_close.h"
|
||||
|
||||
#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
|
||||
|
||||
static void smc_close_cleanup_listen(struct sock *parent)
|
||||
{
|
||||
struct sock *sk;
|
||||
|
||||
/* Close non-accepted connections */
|
||||
while ((sk = smc_accept_dequeue(parent, NULL)))
|
||||
smc_close_non_accepted(sk);
|
||||
}
|
||||
|
||||
static void smc_close_wait_tx_pends(struct smc_sock *smc)
|
||||
{
|
||||
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
||||
struct sock *sk = &smc->sk;
|
||||
signed long timeout;
|
||||
|
||||
timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
|
||||
add_wait_queue(sk_sleep(sk), &wait);
|
||||
while (!signal_pending(current) && timeout) {
|
||||
int rc;
|
||||
|
||||
rc = sk_wait_event(sk, &timeout,
|
||||
!smc_cdc_tx_has_pending(&smc->conn),
|
||||
&wait);
|
||||
if (rc)
|
||||
break;
|
||||
}
|
||||
remove_wait_queue(sk_sleep(sk), &wait);
|
||||
}
|
||||
|
||||
/* wait for sndbuf data being transmitted */
|
||||
static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
|
||||
{
|
||||
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
||||
struct sock *sk = &smc->sk;
|
||||
|
||||
if (!timeout)
|
||||
return;
|
||||
|
||||
if (!smc_tx_prepared_sends(&smc->conn))
|
||||
return;
|
||||
|
||||
smc->wait_close_tx_prepared = 1;
|
||||
add_wait_queue(sk_sleep(sk), &wait);
|
||||
while (!signal_pending(current) && timeout) {
|
||||
int rc;
|
||||
|
||||
rc = sk_wait_event(sk, &timeout,
|
||||
!smc_tx_prepared_sends(&smc->conn) ||
|
||||
(sk->sk_err == ECONNABORTED) ||
|
||||
(sk->sk_err == ECONNRESET),
|
||||
&wait);
|
||||
if (rc)
|
||||
break;
|
||||
}
|
||||
remove_wait_queue(sk_sleep(sk), &wait);
|
||||
smc->wait_close_tx_prepared = 0;
|
||||
}
|
||||
|
||||
void smc_close_wake_tx_prepared(struct smc_sock *smc)
|
||||
{
|
||||
if (smc->wait_close_tx_prepared)
|
||||
/* wake up socket closing */
|
||||
smc->sk.sk_state_change(&smc->sk);
|
||||
}
|
||||
|
||||
static int smc_close_wr(struct smc_connection *conn)
|
||||
{
|
||||
conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
|
||||
|
||||
return smc_cdc_get_slot_and_msg_send(conn);
|
||||
}
|
||||
|
||||
static int smc_close_final(struct smc_connection *conn)
|
||||
{
|
||||
if (atomic_read(&conn->bytes_to_rcv))
|
||||
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
|
||||
else
|
||||
conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
|
||||
|
||||
return smc_cdc_get_slot_and_msg_send(conn);
|
||||
}
|
||||
|
||||
static int smc_close_abort(struct smc_connection *conn)
|
||||
{
|
||||
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
|
||||
|
||||
return smc_cdc_get_slot_and_msg_send(conn);
|
||||
}
|
||||
|
||||
/* terminate smc socket abnormally - active abort
|
||||
* RDMA communication no longer possible
|
||||
*/
|
||||
void smc_close_active_abort(struct smc_sock *smc)
|
||||
{
|
||||
struct smc_cdc_conn_state_flags *txflags =
|
||||
&smc->conn.local_tx_ctrl.conn_state_flags;
|
||||
|
||||
bh_lock_sock(&smc->sk);
|
||||
smc->sk.sk_err = ECONNABORTED;
|
||||
if (smc->clcsock && smc->clcsock->sk) {
|
||||
smc->clcsock->sk->sk_err = ECONNABORTED;
|
||||
smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
|
||||
}
|
||||
switch (smc->sk.sk_state) {
|
||||
case SMC_INIT:
|
||||
smc->sk.sk_state = SMC_PEERABORTWAIT;
|
||||
break;
|
||||
case SMC_APPCLOSEWAIT1:
|
||||
case SMC_APPCLOSEWAIT2:
|
||||
txflags->peer_conn_abort = 1;
|
||||
sock_release(smc->clcsock);
|
||||
if (!smc_cdc_rxed_any_close(&smc->conn))
|
||||
smc->sk.sk_state = SMC_PEERABORTWAIT;
|
||||
else
|
||||
smc->sk.sk_state = SMC_CLOSED;
|
||||
break;
|
||||
case SMC_PEERCLOSEWAIT1:
|
||||
case SMC_PEERCLOSEWAIT2:
|
||||
if (!txflags->peer_conn_closed) {
|
||||
smc->sk.sk_state = SMC_PEERABORTWAIT;
|
||||
txflags->peer_conn_abort = 1;
|
||||
sock_release(smc->clcsock);
|
||||
} else {
|
||||
smc->sk.sk_state = SMC_CLOSED;
|
||||
}
|
||||
break;
|
||||
case SMC_PROCESSABORT:
|
||||
case SMC_APPFINCLOSEWAIT:
|
||||
if (!txflags->peer_conn_closed) {
|
||||
txflags->peer_conn_abort = 1;
|
||||
sock_release(smc->clcsock);
|
||||
}
|
||||
smc->sk.sk_state = SMC_CLOSED;
|
||||
break;
|
||||
case SMC_PEERFINCLOSEWAIT:
|
||||
case SMC_PEERABORTWAIT:
|
||||
case SMC_CLOSED:
|
||||
break;
|
||||
}
|
||||
|
||||
sock_set_flag(&smc->sk, SOCK_DEAD);
|
||||
bh_unlock_sock(&smc->sk);
|
||||
smc->sk.sk_state_change(&smc->sk);
|
||||
}
|
||||
|
||||
int smc_close_active(struct smc_sock *smc)
|
||||
{
|
||||
struct smc_cdc_conn_state_flags *txflags =
|
||||
&smc->conn.local_tx_ctrl.conn_state_flags;
|
||||
long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
|
||||
struct smc_connection *conn = &smc->conn;
|
||||
struct sock *sk = &smc->sk;
|
||||
int old_state;
|
||||
int rc = 0;
|
||||
|
||||
if (sock_flag(sk, SOCK_LINGER) &&
|
||||
!(current->flags & PF_EXITING))
|
||||
timeout = sk->sk_lingertime;
|
||||
|
||||
again:
|
||||
old_state = sk->sk_state;
|
||||
switch (old_state) {
|
||||
case SMC_INIT:
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
if (smc->smc_listen_work.func)
|
||||
flush_work(&smc->smc_listen_work);
|
||||
sock_put(sk);
|
||||
break;
|
||||
case SMC_LISTEN:
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
sk->sk_state_change(sk); /* wake up accept */
|
||||
if (smc->clcsock && smc->clcsock->sk) {
|
||||
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
|
||||
/* wake up kernel_accept of smc_tcp_listen_worker */
|
||||
smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
|
||||
}
|
||||
release_sock(sk);
|
||||
smc_close_cleanup_listen(sk);
|
||||
flush_work(&smc->tcp_listen_work);
|
||||
lock_sock(sk);
|
||||
break;
|
||||
case SMC_ACTIVE:
|
||||
smc_close_stream_wait(smc, timeout);
|
||||
release_sock(sk);
|
||||
cancel_work_sync(&conn->tx_work);
|
||||
lock_sock(sk);
|
||||
if (sk->sk_state == SMC_ACTIVE) {
|
||||
/* send close request */
|
||||
rc = smc_close_final(conn);
|
||||
sk->sk_state = SMC_PEERCLOSEWAIT1;
|
||||
} else {
|
||||
/* peer event has changed the state */
|
||||
goto again;
|
||||
}
|
||||
break;
|
||||
case SMC_APPFINCLOSEWAIT:
|
||||
/* socket already shutdown wr or both (active close) */
|
||||
if (txflags->peer_done_writing &&
|
||||
!txflags->peer_conn_closed) {
|
||||
/* just shutdown wr done, send close request */
|
||||
rc = smc_close_final(conn);
|
||||
}
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
smc_close_wait_tx_pends(smc);
|
||||
break;
|
||||
case SMC_APPCLOSEWAIT1:
|
||||
case SMC_APPCLOSEWAIT2:
|
||||
if (!smc_cdc_rxed_any_close(conn))
|
||||
smc_close_stream_wait(smc, timeout);
|
||||
release_sock(sk);
|
||||
cancel_work_sync(&conn->tx_work);
|
||||
lock_sock(sk);
|
||||
if (sk->sk_err != ECONNABORTED) {
|
||||
/* confirm close from peer */
|
||||
rc = smc_close_final(conn);
|
||||
if (rc)
|
||||
break;
|
||||
}
|
||||
if (smc_cdc_rxed_any_close(conn))
|
||||
/* peer has closed the socket already */
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
else
|
||||
/* peer has just issued a shutdown write */
|
||||
sk->sk_state = SMC_PEERFINCLOSEWAIT;
|
||||
smc_close_wait_tx_pends(smc);
|
||||
break;
|
||||
case SMC_PEERCLOSEWAIT1:
|
||||
case SMC_PEERCLOSEWAIT2:
|
||||
case SMC_PEERFINCLOSEWAIT:
|
||||
/* peer sending PeerConnectionClosed will cause transition */
|
||||
break;
|
||||
case SMC_PROCESSABORT:
|
||||
cancel_work_sync(&conn->tx_work);
|
||||
smc_close_abort(conn);
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
smc_close_wait_tx_pends(smc);
|
||||
break;
|
||||
case SMC_PEERABORTWAIT:
|
||||
case SMC_CLOSED:
|
||||
/* nothing to do, add tracing in future patch */
|
||||
break;
|
||||
}
|
||||
|
||||
if (old_state != sk->sk_state)
|
||||
sk->sk_state_change(&smc->sk);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void smc_close_passive_abort_received(struct smc_sock *smc)
|
||||
{
|
||||
struct smc_cdc_conn_state_flags *txflags =
|
||||
&smc->conn.local_tx_ctrl.conn_state_flags;
|
||||
struct sock *sk = &smc->sk;
|
||||
|
||||
switch (sk->sk_state) {
|
||||
case SMC_ACTIVE:
|
||||
case SMC_APPFINCLOSEWAIT:
|
||||
case SMC_APPCLOSEWAIT1:
|
||||
case SMC_APPCLOSEWAIT2:
|
||||
smc_close_abort(&smc->conn);
|
||||
sk->sk_state = SMC_PROCESSABORT;
|
||||
break;
|
||||
case SMC_PEERCLOSEWAIT1:
|
||||
case SMC_PEERCLOSEWAIT2:
|
||||
if (txflags->peer_done_writing &&
|
||||
!txflags->peer_conn_closed) {
|
||||
/* just shutdown, but not yet closed locally */
|
||||
smc_close_abort(&smc->conn);
|
||||
sk->sk_state = SMC_PROCESSABORT;
|
||||
} else {
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
}
|
||||
break;
|
||||
case SMC_PEERFINCLOSEWAIT:
|
||||
case SMC_PEERABORTWAIT:
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
break;
|
||||
case SMC_INIT:
|
||||
case SMC_PROCESSABORT:
|
||||
/* nothing to do, add tracing in future patch */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
|
||||
* or peer_done_writing.
|
||||
* Called under tasklet context.
|
||||
*/
|
||||
void smc_close_passive_received(struct smc_sock *smc)
|
||||
{
|
||||
struct smc_cdc_conn_state_flags *rxflags =
|
||||
&smc->conn.local_rx_ctrl.conn_state_flags;
|
||||
struct sock *sk = &smc->sk;
|
||||
int old_state;
|
||||
|
||||
sk->sk_shutdown |= RCV_SHUTDOWN;
|
||||
if (smc->clcsock && smc->clcsock->sk)
|
||||
smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
|
||||
sock_set_flag(&smc->sk, SOCK_DONE);
|
||||
|
||||
old_state = sk->sk_state;
|
||||
|
||||
if (rxflags->peer_conn_abort) {
|
||||
smc_close_passive_abort_received(smc);
|
||||
goto wakeup;
|
||||
}
|
||||
|
||||
switch (sk->sk_state) {
|
||||
case SMC_INIT:
|
||||
if (atomic_read(&smc->conn.bytes_to_rcv) ||
|
||||
(rxflags->peer_done_writing &&
|
||||
!rxflags->peer_conn_closed))
|
||||
sk->sk_state = SMC_APPCLOSEWAIT1;
|
||||
else
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
break;
|
||||
case SMC_ACTIVE:
|
||||
sk->sk_state = SMC_APPCLOSEWAIT1;
|
||||
break;
|
||||
case SMC_PEERCLOSEWAIT1:
|
||||
if (rxflags->peer_done_writing)
|
||||
sk->sk_state = SMC_PEERCLOSEWAIT2;
|
||||
/* fall through to check for closing */
|
||||
case SMC_PEERCLOSEWAIT2:
|
||||
case SMC_PEERFINCLOSEWAIT:
|
||||
if (!smc_cdc_rxed_any_close(&smc->conn))
|
||||
break;
|
||||
if (sock_flag(sk, SOCK_DEAD) &&
|
||||
(sk->sk_shutdown == SHUTDOWN_MASK)) {
|
||||
/* smc_release has already been called locally */
|
||||
sk->sk_state = SMC_CLOSED;
|
||||
} else {
|
||||
/* just shutdown, but not yet closed locally */
|
||||
sk->sk_state = SMC_APPFINCLOSEWAIT;
|
||||
}
|
||||
break;
|
||||
case SMC_APPCLOSEWAIT1:
|
||||
case SMC_APPCLOSEWAIT2:
|
||||
case SMC_APPFINCLOSEWAIT:
|
||||
case SMC_PEERABORTWAIT:
|
||||
case SMC_PROCESSABORT:
|
||||
case SMC_CLOSED:
|
||||
/* nothing to do, add tracing in future patch */
|
||||
break;
|
||||
}
|
||||
|
||||
wakeup:
|
||||
if (old_state != sk->sk_state)
|
||||
sk->sk_state_change(sk);
|
||||
sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
|
||||
sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
|
||||
|
||||
if ((sk->sk_state == SMC_CLOSED) &&
|
||||
(sock_flag(sk, SOCK_DEAD) || (old_state == SMC_INIT))) {
|
||||
smc_conn_free(&smc->conn);
|
||||
schedule_delayed_work(&smc->sock_put_work,
|
||||
SMC_CLOSE_SOCK_PUT_DELAY);
|
||||
}
|
||||
}
|
||||
|
||||
void smc_close_sock_put_work(struct work_struct *work)
|
||||
{
|
||||
struct smc_sock *smc = container_of(to_delayed_work(work),
|
||||
struct smc_sock,
|
||||
sock_put_work);
|
||||
|
||||
sock_put(&smc->sk);
|
||||
}
|
||||
|
||||
int smc_close_shutdown_write(struct smc_sock *smc)
|
||||
{
|
||||
struct smc_connection *conn = &smc->conn;
|
||||
long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
|
||||
struct sock *sk = &smc->sk;
|
||||
int old_state;
|
||||
int rc = 0;
|
||||
|
||||
if (sock_flag(sk, SOCK_LINGER))
|
||||
timeout = sk->sk_lingertime;
|
||||
|
||||
again:
|
||||
old_state = sk->sk_state;
|
||||
switch (old_state) {
|
||||
case SMC_ACTIVE:
|
||||
smc_close_stream_wait(smc, timeout);
|
||||
release_sock(sk);
|
||||
cancel_work_sync(&conn->tx_work);
|
||||
lock_sock(sk);
|
||||
/* send close wr request */
|
||||
rc = smc_close_wr(conn);
|
||||
if (sk->sk_state == SMC_ACTIVE)
|
||||
sk->sk_state = SMC_PEERCLOSEWAIT1;
|
||||
else
|
||||
goto again;
|
||||
break;
|
||||
case SMC_APPCLOSEWAIT1:
|
||||
/* passive close */
|
||||
if (!smc_cdc_rxed_any_close(conn))
|
||||
smc_close_stream_wait(smc, timeout);
|
||||
release_sock(sk);
|
||||
cancel_work_sync(&conn->tx_work);
|
||||
lock_sock(sk);
|
||||
/* confirm close from peer */
|
||||
rc = smc_close_wr(conn);
|
||||
sk->sk_state = SMC_APPCLOSEWAIT2;
|
||||
break;
|
||||
case SMC_APPCLOSEWAIT2:
|
||||
case SMC_PEERFINCLOSEWAIT:
|
||||
case SMC_PEERCLOSEWAIT1:
|
||||
case SMC_PEERCLOSEWAIT2:
|
||||
case SMC_APPFINCLOSEWAIT:
|
||||
case SMC_PROCESSABORT:
|
||||
case SMC_PEERABORTWAIT:
|
||||
/* nothing to do, add tracing in future patch */
|
||||
break;
|
||||
}
|
||||
|
||||
if (old_state != sk->sk_state)
|
||||
sk->sk_state_change(&smc->sk);
|
||||
return rc;
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Shared Memory Communications over RDMA (SMC-R) and RoCE
|
||||
*
|
||||
* Socket Closing
|
||||
*
|
||||
* Copyright IBM Corp. 2016
|
||||
*
|
||||
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef SMC_CLOSE_H
|
||||
#define SMC_CLOSE_H
|
||||
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
#include "smc.h"
|
||||
|
||||
#define SMC_MAX_STREAM_WAIT_TIMEOUT (2 * HZ)
|
||||
#define SMC_CLOSE_SOCK_PUT_DELAY HZ
|
||||
|
||||
void smc_close_wake_tx_prepared(struct smc_sock *smc);
|
||||
void smc_close_active_abort(struct smc_sock *smc);
|
||||
int smc_close_active(struct smc_sock *smc);
|
||||
void smc_close_passive_received(struct smc_sock *smc);
|
||||
void smc_close_sock_put_work(struct work_struct *work);
|
||||
int smc_close_shutdown_write(struct smc_sock *smc);
|
||||
|
||||
#endif /* SMC_CLOSE_H */
|
|
@ -23,6 +23,7 @@
|
|||
#include "smc_wr.h"
|
||||
#include "smc_llc.h"
|
||||
#include "smc_cdc.h"
|
||||
#include "smc_close.h"
|
||||
|
||||
#define SMC_LGR_NUM_INCR 256
|
||||
#define SMC_LGR_FREE_DELAY (600 * HZ)
|
||||
|
@ -295,6 +296,7 @@ void smc_lgr_free(struct smc_link_group *lgr)
|
|||
void smc_lgr_terminate(struct smc_link_group *lgr)
|
||||
{
|
||||
struct smc_connection *conn;
|
||||
struct smc_sock *smc;
|
||||
struct rb_node *node;
|
||||
|
||||
spin_lock_bh(&smc_lgr_list.lock);
|
||||
|
@ -311,11 +313,14 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
|
|||
node = rb_first(&lgr->conns_all);
|
||||
while (node) {
|
||||
conn = rb_entry(node, struct smc_connection, alert_node);
|
||||
smc = container_of(conn, struct smc_sock, conn);
|
||||
sock_hold(&smc->sk);
|
||||
__smc_lgr_unregister_conn(conn);
|
||||
smc_close_active_abort(smc);
|
||||
sock_put(&smc->sk);
|
||||
node = rb_first(&lgr->conns_all);
|
||||
}
|
||||
write_unlock_bh(&lgr->conns_lock);
|
||||
schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY);
|
||||
}
|
||||
|
||||
/* Determine vlan of internal TCP socket.
|
||||
|
|
|
@ -139,6 +139,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
|
|||
if (sk->sk_state == SMC_INIT)
|
||||
return -ENOTCONN;
|
||||
if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
|
||||
(smc->sk.sk_err == ECONNABORTED) ||
|
||||
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
|
||||
return -EPIPE;
|
||||
if (smc_cdc_rxed_any_close(conn))
|
||||
|
@ -392,6 +393,13 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
|
|||
&pend);
|
||||
if (rc < 0) {
|
||||
if (rc == -EBUSY) {
|
||||
struct smc_sock *smc =
|
||||
container_of(conn, struct smc_sock, conn);
|
||||
|
||||
if (smc->sk.sk_err == ECONNABORTED) {
|
||||
rc = sock_error(&smc->sk);
|
||||
goto out_unlock;
|
||||
}
|
||||
rc = 0;
|
||||
schedule_work(&conn->tx_work);
|
||||
}
|
||||
|
|
|
@ -81,6 +81,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
|
|||
if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
|
||||
return;
|
||||
if (wc->status) {
|
||||
struct smc_link_group *lgr;
|
||||
|
||||
for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
|
||||
/* clear full struct smc_wr_tx_pend including .priv */
|
||||
memset(&link->wr_tx_pends[i], 0,
|
||||
|
@ -89,9 +91,10 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
|
|||
sizeof(link->wr_tx_bufs[i]));
|
||||
clear_bit(i, link->wr_tx_mask);
|
||||
}
|
||||
/* tbd in future patch: terminate connections of this link
|
||||
* group abnormally
|
||||
*/
|
||||
/* terminate connections of this link group abnormally */
|
||||
lgr = container_of(link, struct smc_link_group,
|
||||
lnk[SMC_SINGLE_LINK]);
|
||||
smc_lgr_terminate(lgr);
|
||||
}
|
||||
if (pnd_snd.handler)
|
||||
pnd_snd.handler(&pnd_snd.priv, link, wc->status);
|
||||
|
@ -176,9 +179,12 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
|
|||
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
|
||||
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
|
||||
if (!rc) {
|
||||
/* tbd in future patch: timeout - terminate connections
|
||||
* of this link group abnormally
|
||||
*/
|
||||
/* timeout - terminate connections */
|
||||
struct smc_link_group *lgr;
|
||||
|
||||
lgr = container_of(link, struct smc_link_group,
|
||||
lnk[SMC_SINGLE_LINK]);
|
||||
smc_lgr_terminate(lgr);
|
||||
return -EPIPE;
|
||||
}
|
||||
if (rc == -ERESTARTSYS)
|
||||
|
@ -256,6 +262,24 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
|
|||
}
|
||||
}
|
||||
|
||||
bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
|
||||
smc_wr_tx_filter filter, unsigned long data)
|
||||
{
|
||||
struct smc_wr_tx_pend_priv *tx_pend;
|
||||
struct smc_wr_rx_hdr *wr_rx;
|
||||
int i;
|
||||
|
||||
for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
|
||||
wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
|
||||
if (wr_rx->type != wr_rx_hdr_type)
|
||||
continue;
|
||||
tx_pend = &link->wr_tx_pends[i].priv;
|
||||
if (filter(tx_pend, data))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/****************************** receive queue ********************************/
|
||||
|
||||
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
|
||||
|
@ -310,14 +334,19 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
|
|||
smc_wr_rx_demultiplex(&wc[i]);
|
||||
smc_wr_rx_post(link); /* refill WR RX */
|
||||
} else {
|
||||
struct smc_link_group *lgr;
|
||||
|
||||
/* handle status errors */
|
||||
switch (wc[i].status) {
|
||||
case IB_WC_RETRY_EXC_ERR:
|
||||
case IB_WC_RNR_RETRY_EXC_ERR:
|
||||
case IB_WC_WR_FLUSH_ERR:
|
||||
/* tbd in future patch: terminate connections of this
|
||||
* link group abnormally
|
||||
*/
|
||||
/* terminate connections of this link group
|
||||
* abnormally
|
||||
*/
|
||||
lgr = container_of(link, struct smc_link_group,
|
||||
lnk[SMC_SINGLE_LINK]);
|
||||
smc_lgr_terminate(lgr);
|
||||
break;
|
||||
default:
|
||||
smc_wr_rx_post(link); /* refill WR RX */
|
||||
|
|
|
@ -92,6 +92,8 @@ int smc_wr_tx_put_slot(struct smc_link *link,
|
|||
int smc_wr_tx_send(struct smc_link *link,
|
||||
struct smc_wr_tx_pend_priv *wr_pend_priv);
|
||||
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
|
||||
bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
|
||||
smc_wr_tx_filter filter, unsigned long data);
|
||||
void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
|
||||
smc_wr_tx_filter filter,
|
||||
smc_wr_tx_dismisser dismisser,
|
||||
|
|
Loading…
Reference in New Issue