fs: dlm: implement tcp graceful shutdown
During my code inspection I saw there is no implementation of a graceful shutdown for tcp. This patch will introduce a graceful shutdown for tcp connections. The shutdown is implemented synchronized as dlm_lowcomms_stop() is called to end all dlm communication. After shutdown is done, a lot of flush and closing functionality will be called. However I don't see a problem with that. The waitqueue for synchronize the shutdown has a timeout of 10 seconds, if timeout a force close will be exectued. Signed-off-by: Alexander Aring <aahringo@redhat.com> Signed-off-by: David Teigland <teigland@redhat.com>
This commit is contained in:
parent
ba3ab3ca68
commit
055923bf6b
|
@ -63,6 +63,7 @@
|
|||
|
||||
/* Number of messages to send before rescheduling */
|
||||
#define MAX_SEND_MSG_COUNT 25
|
||||
#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000)
|
||||
|
||||
struct cbuf {
|
||||
unsigned int base;
|
||||
|
@ -110,10 +111,12 @@ struct connection {
|
|||
#define CF_CLOSE 6
|
||||
#define CF_APP_LIMITED 7
|
||||
#define CF_CLOSING 8
|
||||
#define CF_SHUTDOWN 9
|
||||
struct list_head writequeue; /* List of outgoing writequeue_entries */
|
||||
spinlock_t writequeue_lock;
|
||||
int (*rx_action) (struct connection *); /* What to do when active */
|
||||
void (*connect_action) (struct connection *); /* What to do to connect */
|
||||
void (*shutdown_action)(struct connection *con); /* What to do to shutdown */
|
||||
struct page *rx_page;
|
||||
struct cbuf cb;
|
||||
int retries;
|
||||
|
@ -122,6 +125,7 @@ struct connection {
|
|||
struct connection *othercon;
|
||||
struct work_struct rwork; /* Receive workqueue */
|
||||
struct work_struct swork; /* Send workqueue */
|
||||
wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
|
||||
};
|
||||
#define sock2con(x) ((struct connection *)(x)->sk_user_data)
|
||||
|
||||
|
@ -218,6 +222,7 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
|
|||
spin_lock_init(&con->writequeue_lock);
|
||||
INIT_WORK(&con->swork, process_send_sockets);
|
||||
INIT_WORK(&con->rwork, process_recv_sockets);
|
||||
init_waitqueue_head(&con->shutdown_wait);
|
||||
|
||||
/* Setup action pointers for child sockets */
|
||||
if (con->nodeid) {
|
||||
|
@ -619,6 +624,54 @@ static void close_connection(struct connection *con, bool and_other,
|
|||
clear_bit(CF_CLOSING, &con->flags);
|
||||
}
|
||||
|
||||
static void shutdown_connection(struct connection *con)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (cancel_work_sync(&con->swork)) {
|
||||
log_print("canceled swork for node %d", con->nodeid);
|
||||
clear_bit(CF_WRITE_PENDING, &con->flags);
|
||||
}
|
||||
|
||||
mutex_lock(&con->sock_mutex);
|
||||
/* nothing to shutdown */
|
||||
if (!con->sock) {
|
||||
mutex_unlock(&con->sock_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
set_bit(CF_SHUTDOWN, &con->flags);
|
||||
ret = kernel_sock_shutdown(con->sock, SHUT_WR);
|
||||
mutex_unlock(&con->sock_mutex);
|
||||
if (ret) {
|
||||
log_print("Connection %p failed to shutdown: %d will force close",
|
||||
con, ret);
|
||||
goto force_close;
|
||||
} else {
|
||||
ret = wait_event_timeout(con->shutdown_wait,
|
||||
!test_bit(CF_SHUTDOWN, &con->flags),
|
||||
DLM_SHUTDOWN_WAIT_TIMEOUT);
|
||||
if (ret == 0) {
|
||||
log_print("Connection %p shutdown timed out, will force close",
|
||||
con);
|
||||
goto force_close;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
force_close:
|
||||
clear_bit(CF_SHUTDOWN, &con->flags);
|
||||
close_connection(con, false, true, true);
|
||||
}
|
||||
|
||||
static void dlm_tcp_shutdown(struct connection *con)
|
||||
{
|
||||
if (con->othercon)
|
||||
shutdown_connection(con->othercon);
|
||||
shutdown_connection(con);
|
||||
}
|
||||
|
||||
/* Data received from remote end */
|
||||
static int receive_from_sock(struct connection *con)
|
||||
{
|
||||
|
@ -713,13 +766,18 @@ static int receive_from_sock(struct connection *con)
|
|||
out_close:
|
||||
mutex_unlock(&con->sock_mutex);
|
||||
if (ret != -EAGAIN) {
|
||||
close_connection(con, false, true, false);
|
||||
/* Reconnect when there is something to send */
|
||||
close_connection(con, false, true, false);
|
||||
if (ret == 0) {
|
||||
log_print("connection %p got EOF from %d",
|
||||
con, con->nodeid);
|
||||
/* handling for tcp shutdown */
|
||||
clear_bit(CF_SHUTDOWN, &con->flags);
|
||||
wake_up(&con->shutdown_wait);
|
||||
/* signal to breaking receive worker */
|
||||
ret = -1;
|
||||
}
|
||||
}
|
||||
/* Don't return success if we really got EOF */
|
||||
if (ret == 0)
|
||||
ret = -EAGAIN;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -803,6 +861,7 @@ static int accept_from_sock(struct connection *con)
|
|||
spin_lock_init(&othercon->writequeue_lock);
|
||||
INIT_WORK(&othercon->swork, process_send_sockets);
|
||||
INIT_WORK(&othercon->rwork, process_recv_sockets);
|
||||
init_waitqueue_head(&othercon->shutdown_wait);
|
||||
set_bit(CF_IS_OTHERCON, &othercon->flags);
|
||||
} else {
|
||||
/* close other sock con if we have something new */
|
||||
|
@ -1047,6 +1106,7 @@ static void tcp_connect_to_sock(struct connection *con)
|
|||
|
||||
con->rx_action = receive_from_sock;
|
||||
con->connect_action = tcp_connect_to_sock;
|
||||
con->shutdown_action = dlm_tcp_shutdown;
|
||||
add_sock(sock, con);
|
||||
|
||||
/* Bind to our cluster-known address connecting to avoid
|
||||
|
@ -1542,6 +1602,12 @@ static void stop_conn(struct connection *con)
|
|||
_stop_conn(con, true);
|
||||
}
|
||||
|
||||
static void shutdown_conn(struct connection *con)
|
||||
{
|
||||
if (con->shutdown_action)
|
||||
con->shutdown_action(con);
|
||||
}
|
||||
|
||||
static void free_conn(struct connection *con)
|
||||
{
|
||||
close_connection(con, true, true, true);
|
||||
|
@ -1593,6 +1659,7 @@ void dlm_lowcomms_stop(void)
|
|||
mutex_lock(&connections_lock);
|
||||
dlm_allow_conn = 0;
|
||||
mutex_unlock(&connections_lock);
|
||||
foreach_conn(shutdown_conn);
|
||||
work_flush();
|
||||
clean_writequeues();
|
||||
foreach_conn(free_conn);
|
||||
|
|
Loading…
Reference in New Issue