mirror of https://gitee.com/openkylin/linux.git
Merge branch 'mptcp-simplify-mptcp_accept'
Paolo Abeni says:
====================
mptcp: simplify mptcp_accept()
Currently we allocate the MPTCP master socket at accept time.
The above makes mptcp_accept() quite complex, and requires checks is several
places for NULL MPTCP master socket.
These series simplify the MPTCP accept implementation, moving the master socket
allocation at syn-ack time, so that we drop unneeded checks with the follow-up
patch.
v1 -> v2:
- rebased on top of 2398e3991b
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
a79c838fb0
|
@ -336,7 +336,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
|
||||||
unsigned int ack_size;
|
unsigned int ack_size;
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
bool can_ack;
|
bool can_ack;
|
||||||
u64 ack_seq;
|
|
||||||
u8 tcp_fin;
|
u8 tcp_fin;
|
||||||
|
|
||||||
if (skb) {
|
if (skb) {
|
||||||
|
@ -368,16 +367,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
|
||||||
can_ack = true;
|
can_ack = true;
|
||||||
opts->ext_copy.use_ack = 0;
|
opts->ext_copy.use_ack = 0;
|
||||||
msk = mptcp_sk(subflow->conn);
|
msk = mptcp_sk(subflow->conn);
|
||||||
if (likely(msk && READ_ONCE(msk->can_ack))) {
|
if (!READ_ONCE(msk->can_ack)) {
|
||||||
ack_seq = msk->ack_seq;
|
|
||||||
} else if (subflow->can_ack) {
|
|
||||||
mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
|
|
||||||
ack_seq++;
|
|
||||||
} else {
|
|
||||||
can_ack = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unlikely(!can_ack)) {
|
|
||||||
*size = ALIGN(dss_size, 4);
|
*size = ALIGN(dss_size, 4);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -390,7 +380,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
|
||||||
|
|
||||||
dss_size += ack_size;
|
dss_size += ack_size;
|
||||||
|
|
||||||
opts->ext_copy.data_ack = ack_seq;
|
opts->ext_copy.data_ack = msk->ack_seq;
|
||||||
opts->ext_copy.ack64 = 1;
|
opts->ext_copy.ack64 = 1;
|
||||||
opts->ext_copy.use_ack = 1;
|
opts->ext_copy.use_ack = 1;
|
||||||
|
|
||||||
|
|
|
@ -820,9 +820,12 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
|
struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
|
||||||
{
|
{
|
||||||
|
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
|
||||||
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
|
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
|
||||||
|
struct mptcp_sock *msk;
|
||||||
|
u64 ack_seq;
|
||||||
|
|
||||||
if (!nsk)
|
if (!nsk)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -832,6 +835,36 @@ static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
|
||||||
inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
|
inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
__mptcp_init_sock(nsk);
|
||||||
|
|
||||||
|
msk = mptcp_sk(nsk);
|
||||||
|
msk->local_key = subflow_req->local_key;
|
||||||
|
msk->token = subflow_req->token;
|
||||||
|
msk->subflow = NULL;
|
||||||
|
|
||||||
|
if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) {
|
||||||
|
bh_unlock_sock(nsk);
|
||||||
|
|
||||||
|
/* we can't call into mptcp_close() here - possible BH context
|
||||||
|
* free the sock directly
|
||||||
|
*/
|
||||||
|
nsk->sk_prot->destroy(nsk);
|
||||||
|
sk_free(nsk);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
msk->write_seq = subflow_req->idsn + 1;
|
||||||
|
if (subflow_req->remote_key_valid) {
|
||||||
|
msk->can_ack = true;
|
||||||
|
msk->remote_key = subflow_req->remote_key;
|
||||||
|
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
|
||||||
|
ack_seq++;
|
||||||
|
msk->ack_seq = ack_seq;
|
||||||
|
}
|
||||||
|
bh_unlock_sock(nsk);
|
||||||
|
|
||||||
|
/* keep a single reference */
|
||||||
|
__sock_put(nsk);
|
||||||
return nsk;
|
return nsk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -859,40 +892,26 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
|
||||||
struct mptcp_subflow_context *subflow;
|
struct mptcp_subflow_context *subflow;
|
||||||
struct sock *new_mptcp_sock;
|
struct sock *new_mptcp_sock;
|
||||||
struct sock *ssk = newsk;
|
struct sock *ssk = newsk;
|
||||||
u64 ack_seq;
|
|
||||||
|
|
||||||
subflow = mptcp_subflow_ctx(newsk);
|
subflow = mptcp_subflow_ctx(newsk);
|
||||||
lock_sock(sk);
|
new_mptcp_sock = subflow->conn;
|
||||||
|
|
||||||
|
/* is_mptcp should be false if subflow->conn is missing, see
|
||||||
|
* subflow_syn_recv_sock()
|
||||||
|
*/
|
||||||
|
if (WARN_ON_ONCE(!new_mptcp_sock)) {
|
||||||
|
tcp_sk(newsk)->is_mptcp = 0;
|
||||||
|
return newsk;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* acquire the 2nd reference for the owning socket */
|
||||||
|
sock_hold(new_mptcp_sock);
|
||||||
|
|
||||||
local_bh_disable();
|
local_bh_disable();
|
||||||
new_mptcp_sock = mptcp_sk_clone_lock(sk);
|
bh_lock_sock(new_mptcp_sock);
|
||||||
if (!new_mptcp_sock) {
|
|
||||||
*err = -ENOBUFS;
|
|
||||||
local_bh_enable();
|
|
||||||
release_sock(sk);
|
|
||||||
mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1, 0, 0);
|
|
||||||
tcp_close(newsk, 0);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
__mptcp_init_sock(new_mptcp_sock);
|
|
||||||
|
|
||||||
msk = mptcp_sk(new_mptcp_sock);
|
msk = mptcp_sk(new_mptcp_sock);
|
||||||
msk->local_key = subflow->local_key;
|
|
||||||
msk->token = subflow->token;
|
|
||||||
msk->subflow = NULL;
|
|
||||||
msk->first = newsk;
|
msk->first = newsk;
|
||||||
|
|
||||||
mptcp_token_update_accept(newsk, new_mptcp_sock);
|
|
||||||
|
|
||||||
msk->write_seq = subflow->idsn + 1;
|
|
||||||
if (subflow->can_ack) {
|
|
||||||
msk->can_ack = true;
|
|
||||||
msk->remote_key = subflow->remote_key;
|
|
||||||
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
|
|
||||||
ack_seq++;
|
|
||||||
msk->ack_seq = ack_seq;
|
|
||||||
}
|
|
||||||
newsk = new_mptcp_sock;
|
newsk = new_mptcp_sock;
|
||||||
mptcp_copy_inaddrs(newsk, ssk);
|
mptcp_copy_inaddrs(newsk, ssk);
|
||||||
list_add(&subflow->node, &msk->conn_list);
|
list_add(&subflow->node, &msk->conn_list);
|
||||||
|
@ -903,18 +922,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
|
||||||
inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV);
|
inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV);
|
||||||
bh_unlock_sock(new_mptcp_sock);
|
bh_unlock_sock(new_mptcp_sock);
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
release_sock(sk);
|
|
||||||
|
|
||||||
/* the subflow can already receive packet, avoid racing with
|
|
||||||
* the receive path and process the pending ones
|
|
||||||
*/
|
|
||||||
lock_sock(ssk);
|
|
||||||
subflow->rel_write_seq = 1;
|
|
||||||
subflow->tcp_sock = ssk;
|
|
||||||
subflow->conn = new_mptcp_sock;
|
|
||||||
if (unlikely(!skb_queue_empty(&ssk->sk_receive_queue)))
|
|
||||||
mptcp_subflow_data_available(ssk);
|
|
||||||
release_sock(ssk);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return newsk;
|
return newsk;
|
||||||
|
|
|
@ -193,6 +193,7 @@ void mptcp_proto_init(void);
|
||||||
int mptcp_proto_v6_init(void);
|
int mptcp_proto_v6_init(void);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req);
|
||||||
void mptcp_get_options(const struct sk_buff *skb,
|
void mptcp_get_options(const struct sk_buff *skb,
|
||||||
struct tcp_options_received *opt_rx);
|
struct tcp_options_received *opt_rx);
|
||||||
|
|
||||||
|
@ -202,8 +203,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk);
|
||||||
int mptcp_token_new_request(struct request_sock *req);
|
int mptcp_token_new_request(struct request_sock *req);
|
||||||
void mptcp_token_destroy_request(u32 token);
|
void mptcp_token_destroy_request(u32 token);
|
||||||
int mptcp_token_new_connect(struct sock *sk);
|
int mptcp_token_new_connect(struct sock *sk);
|
||||||
int mptcp_token_new_accept(u32 token);
|
int mptcp_token_new_accept(u32 token, struct sock *conn);
|
||||||
void mptcp_token_update_accept(struct sock *sk, struct sock *conn);
|
|
||||||
void mptcp_token_destroy(u32 token);
|
void mptcp_token_destroy(u32 token);
|
||||||
|
|
||||||
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
|
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
|
||||||
|
|
|
@ -112,7 +112,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
|
||||||
|
|
||||||
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
|
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
|
||||||
|
|
||||||
if (subflow->conn && !subflow->conn_finished) {
|
if (!subflow->conn_finished) {
|
||||||
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
|
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
|
||||||
subflow->remote_key);
|
subflow->remote_key);
|
||||||
mptcp_finish_connect(sk);
|
mptcp_finish_connect(sk);
|
||||||
|
@ -182,6 +182,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
|
||||||
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
|
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
|
||||||
struct mptcp_subflow_request_sock *subflow_req;
|
struct mptcp_subflow_request_sock *subflow_req;
|
||||||
struct tcp_options_received opt_rx;
|
struct tcp_options_received opt_rx;
|
||||||
|
struct sock *new_msk = NULL;
|
||||||
struct sock *child;
|
struct sock *child;
|
||||||
|
|
||||||
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
|
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
|
||||||
|
@ -197,7 +198,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
|
||||||
* out-of-order pkt, which will not carry the MP_CAPABLE
|
* out-of-order pkt, which will not carry the MP_CAPABLE
|
||||||
* opt even on mptcp enabled paths
|
* opt even on mptcp enabled paths
|
||||||
*/
|
*/
|
||||||
goto create_child;
|
goto create_msk;
|
||||||
}
|
}
|
||||||
|
|
||||||
opt_rx.mptcp.mp_capable = 0;
|
opt_rx.mptcp.mp_capable = 0;
|
||||||
|
@ -207,7 +208,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
|
||||||
subflow_req->remote_key_valid = 1;
|
subflow_req->remote_key_valid = 1;
|
||||||
} else {
|
} else {
|
||||||
subflow_req->mp_capable = 0;
|
subflow_req->mp_capable = 0;
|
||||||
|
goto create_child;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
create_msk:
|
||||||
|
new_msk = mptcp_sk_clone(listener->conn, req);
|
||||||
|
if (!new_msk)
|
||||||
|
subflow_req->mp_capable = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
create_child:
|
create_child:
|
||||||
|
@ -221,22 +228,22 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
|
||||||
* handshake
|
* handshake
|
||||||
*/
|
*/
|
||||||
if (!ctx)
|
if (!ctx)
|
||||||
return child;
|
goto out;
|
||||||
|
|
||||||
if (ctx->mp_capable) {
|
if (ctx->mp_capable) {
|
||||||
if (mptcp_token_new_accept(ctx->token))
|
/* new mpc subflow takes ownership of the newly
|
||||||
goto close_child;
|
* created mptcp socket
|
||||||
|
*/
|
||||||
|
ctx->conn = new_msk;
|
||||||
|
new_msk = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
/* dispose of the left over mptcp master, if any */
|
||||||
|
if (unlikely(new_msk))
|
||||||
|
sock_put(new_msk);
|
||||||
return child;
|
return child;
|
||||||
|
|
||||||
close_child:
|
|
||||||
pr_debug("closing child socket");
|
|
||||||
tcp_send_active_reset(child, GFP_ATOMIC);
|
|
||||||
inet_csk_prepare_forced_close(child);
|
|
||||||
tcp_done(child);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct inet_connection_sock_af_ops subflow_specific;
|
static struct inet_connection_sock_af_ops subflow_specific;
|
||||||
|
@ -432,9 +439,6 @@ static bool subflow_check_data_avail(struct sock *ssk)
|
||||||
if (subflow->data_avail)
|
if (subflow->data_avail)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (!subflow->conn)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
msk = mptcp_sk(subflow->conn);
|
msk = mptcp_sk(subflow->conn);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
u32 map_remaining;
|
u32 map_remaining;
|
||||||
|
@ -554,11 +558,10 @@ static void subflow_data_ready(struct sock *sk)
|
||||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||||
struct sock *parent = subflow->conn;
|
struct sock *parent = subflow->conn;
|
||||||
|
|
||||||
if (!parent || !subflow->mp_capable) {
|
if (!subflow->mp_capable) {
|
||||||
subflow->tcp_data_ready(sk);
|
subflow->tcp_data_ready(sk);
|
||||||
|
|
||||||
if (parent)
|
parent->sk_data_ready(parent);
|
||||||
parent->sk_data_ready(parent);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -572,7 +575,7 @@ static void subflow_write_space(struct sock *sk)
|
||||||
struct sock *parent = subflow->conn;
|
struct sock *parent = subflow->conn;
|
||||||
|
|
||||||
sk_stream_write_space(sk);
|
sk_stream_write_space(sk);
|
||||||
if (parent && sk_stream_is_writeable(sk)) {
|
if (sk_stream_is_writeable(sk)) {
|
||||||
set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
|
set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
|
||||||
smp_mb__after_atomic();
|
smp_mb__after_atomic();
|
||||||
/* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
|
/* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
|
||||||
|
@ -687,7 +690,7 @@ static bool subflow_is_done(const struct sock *sk)
|
||||||
static void subflow_state_change(struct sock *sk)
|
static void subflow_state_change(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||||
struct sock *parent = READ_ONCE(subflow->conn);
|
struct sock *parent = subflow->conn;
|
||||||
|
|
||||||
__subflow_state_change(sk);
|
__subflow_state_change(sk);
|
||||||
|
|
||||||
|
@ -695,10 +698,10 @@ static void subflow_state_change(struct sock *sk)
|
||||||
* a fin packet carrying a DSS can be unnoticed if we don't trigger
|
* a fin packet carrying a DSS can be unnoticed if we don't trigger
|
||||||
* the data available machinery here.
|
* the data available machinery here.
|
||||||
*/
|
*/
|
||||||
if (parent && subflow->mp_capable && mptcp_subflow_data_available(sk))
|
if (subflow->mp_capable && mptcp_subflow_data_available(sk))
|
||||||
mptcp_data_ready(parent, sk);
|
mptcp_data_ready(parent, sk);
|
||||||
|
|
||||||
if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) &&
|
if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
|
||||||
!subflow->rx_eof && subflow_is_done(sk)) {
|
!subflow->rx_eof && subflow_is_done(sk)) {
|
||||||
subflow->rx_eof = 1;
|
subflow->rx_eof = 1;
|
||||||
parent->sk_shutdown |= RCV_SHUTDOWN;
|
parent->sk_shutdown |= RCV_SHUTDOWN;
|
||||||
|
@ -793,6 +796,9 @@ static void subflow_ulp_clone(const struct request_sock *req,
|
||||||
new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
|
new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
|
||||||
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
|
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
|
||||||
new_ctx->tcp_write_space = old_ctx->tcp_write_space;
|
new_ctx->tcp_write_space = old_ctx->tcp_write_space;
|
||||||
|
new_ctx->rel_write_seq = 1;
|
||||||
|
new_ctx->tcp_sock = newsk;
|
||||||
|
|
||||||
new_ctx->mp_capable = 1;
|
new_ctx->mp_capable = 1;
|
||||||
new_ctx->fourth_ack = subflow_req->remote_key_valid;
|
new_ctx->fourth_ack = subflow_req->remote_key_valid;
|
||||||
new_ctx->can_ack = subflow_req->remote_key_valid;
|
new_ctx->can_ack = subflow_req->remote_key_valid;
|
||||||
|
|
|
@ -128,45 +128,18 @@ int mptcp_token_new_connect(struct sock *sk)
|
||||||
*
|
*
|
||||||
* Called when a SYN packet creates a new logical connection, i.e.
|
* Called when a SYN packet creates a new logical connection, i.e.
|
||||||
* is not a join request.
|
* is not a join request.
|
||||||
*
|
|
||||||
* We don't have an mptcp socket yet at that point.
|
|
||||||
* This is paired with mptcp_token_update_accept, called on accept().
|
|
||||||
*/
|
*/
|
||||||
int mptcp_token_new_accept(u32 token)
|
int mptcp_token_new_accept(u32 token, struct sock *conn)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
spin_lock_bh(&token_tree_lock);
|
spin_lock_bh(&token_tree_lock);
|
||||||
err = radix_tree_insert(&token_tree, token, &token_used);
|
err = radix_tree_insert(&token_tree, token, conn);
|
||||||
spin_unlock_bh(&token_tree_lock);
|
spin_unlock_bh(&token_tree_lock);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* mptcp_token_update_accept - update token to map to mptcp socket
|
|
||||||
* @conn: the new struct mptcp_sock
|
|
||||||
* @sk: the initial subflow for this mptcp socket
|
|
||||||
*
|
|
||||||
* Called when the first mptcp socket is created on accept to
|
|
||||||
* refresh the dummy mapping (done to reserve the token) with
|
|
||||||
* the mptcp_socket structure that wasn't allocated before.
|
|
||||||
*/
|
|
||||||
void mptcp_token_update_accept(struct sock *sk, struct sock *conn)
|
|
||||||
{
|
|
||||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
|
||||||
void __rcu **slot;
|
|
||||||
|
|
||||||
spin_lock_bh(&token_tree_lock);
|
|
||||||
slot = radix_tree_lookup_slot(&token_tree, subflow->token);
|
|
||||||
WARN_ON_ONCE(!slot);
|
|
||||||
if (slot) {
|
|
||||||
WARN_ON_ONCE(rcu_access_pointer(*slot) != &token_used);
|
|
||||||
radix_tree_replace_slot(&token_tree, slot, conn);
|
|
||||||
}
|
|
||||||
spin_unlock_bh(&token_tree_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* mptcp_token_destroy_request - remove mptcp connection/token
|
* mptcp_token_destroy_request - remove mptcp connection/token
|
||||||
* @token - token of mptcp connection to remove
|
* @token - token of mptcp connection to remove
|
||||||
|
|
Loading…
Reference in New Issue