diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 9c71f427e6e3..63c8ee49cef2 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -336,7 +336,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, unsigned int ack_size; bool ret = false; bool can_ack; - u64 ack_seq; u8 tcp_fin; if (skb) { @@ -368,16 +367,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, can_ack = true; opts->ext_copy.use_ack = 0; msk = mptcp_sk(subflow->conn); - if (likely(msk && READ_ONCE(msk->can_ack))) { - ack_seq = msk->ack_seq; - } else if (subflow->can_ack) { - mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq); - ack_seq++; - } else { - can_ack = false; - } - - if (unlikely(!can_ack)) { + if (!READ_ONCE(msk->can_ack)) { *size = ALIGN(dss_size, 4); return ret; } @@ -390,7 +380,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, dss_size += ack_size; - opts->ext_copy.data_ack = ack_seq; + opts->ext_copy.data_ack = msk->ack_seq; opts->ext_copy.ack64 = 1; opts->ext_copy.use_ack = 1; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c0cef07f4382..04c3caed92df 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -820,9 +820,12 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) } #endif -static struct sock *mptcp_sk_clone_lock(const struct sock *sk) +struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) { + struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); + struct mptcp_sock *msk; + u64 ack_seq; if (!nsk) return NULL; @@ -832,6 +835,36 @@ static struct sock *mptcp_sk_clone_lock(const struct sock *sk) inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); #endif + __mptcp_init_sock(nsk); + + msk = mptcp_sk(nsk); + msk->local_key = subflow_req->local_key; + msk->token = subflow_req->token; + msk->subflow = NULL; + + if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) { + bh_unlock_sock(nsk); + + /* we can't call into mptcp_close() here - possible BH context + * free the sock directly + */ + nsk->sk_prot->destroy(nsk); + sk_free(nsk); + return NULL; + } + + msk->write_seq = subflow_req->idsn + 1; + if (subflow_req->remote_key_valid) { + msk->can_ack = true; + msk->remote_key = subflow_req->remote_key; + mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); + ack_seq++; + msk->ack_seq = ack_seq; + } + bh_unlock_sock(nsk); + + /* keep a single reference */ + __sock_put(nsk); return nsk; } @@ -859,40 +892,26 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, struct mptcp_subflow_context *subflow; struct sock *new_mptcp_sock; struct sock *ssk = newsk; - u64 ack_seq; subflow = mptcp_subflow_ctx(newsk); - lock_sock(sk); + new_mptcp_sock = subflow->conn; + + /* is_mptcp should be false if subflow->conn is missing, see + * subflow_syn_recv_sock() + */ + if (WARN_ON_ONCE(!new_mptcp_sock)) { + tcp_sk(newsk)->is_mptcp = 0; + return newsk; + } + + /* acquire the 2nd reference for the owning socket */ + sock_hold(new_mptcp_sock); local_bh_disable(); - new_mptcp_sock = mptcp_sk_clone_lock(sk); - if (!new_mptcp_sock) { - *err = -ENOBUFS; - local_bh_enable(); - release_sock(sk); - mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1, 0, 0); - tcp_close(newsk, 0); - return NULL; - } - - __mptcp_init_sock(new_mptcp_sock); - + bh_lock_sock(new_mptcp_sock); msk = mptcp_sk(new_mptcp_sock); - msk->local_key = subflow->local_key; - msk->token = subflow->token; - msk->subflow = NULL; msk->first = newsk; - mptcp_token_update_accept(newsk, new_mptcp_sock); - - msk->write_seq = subflow->idsn + 1; - if (subflow->can_ack) { - msk->can_ack = true; - msk->remote_key = subflow->remote_key; - mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); - ack_seq++; - msk->ack_seq = ack_seq; - } newsk = new_mptcp_sock; mptcp_copy_inaddrs(newsk, ssk); list_add(&subflow->node, &msk->conn_list); @@ -903,18 +922,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV); bh_unlock_sock(new_mptcp_sock); local_bh_enable(); - release_sock(sk); - - /* the subflow can already receive packet, avoid racing with - * the receive path and process the pending ones - */ - lock_sock(ssk); - subflow->rel_write_seq = 1; - subflow->tcp_sock = ssk; - subflow->conn = new_mptcp_sock; - if (unlikely(!skb_queue_empty(&ssk->sk_receive_queue))) - mptcp_subflow_data_available(ssk); - release_sock(ssk); } return newsk; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 313558fa8185..9baf6fcba914 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -193,6 +193,7 @@ void mptcp_proto_init(void); int mptcp_proto_v6_init(void); #endif +struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx); @@ -202,8 +203,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk); int mptcp_token_new_request(struct request_sock *req); void mptcp_token_destroy_request(u32 token); int mptcp_token_new_connect(struct sock *sk); -int mptcp_token_new_accept(u32 token); -void mptcp_token_update_accept(struct sock *sk, struct sock *conn); +int mptcp_token_new_accept(u32 token, struct sock *conn); void mptcp_token_destroy(u32 token); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 0de2a44bdaa0..8434c7f5f712 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -112,7 +112,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); - if (subflow->conn && !subflow->conn_finished) { + if (!subflow->conn_finished) { pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), subflow->remote_key); mptcp_finish_connect(sk); @@ -182,6 +182,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; struct tcp_options_received opt_rx; + struct sock *new_msk = NULL; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -197,7 +198,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * out-of-order pkt, which will not carry the MP_CAPABLE * opt even on mptcp enabled paths */ - goto create_child; + goto create_msk; } opt_rx.mptcp.mp_capable = 0; @@ -207,7 +208,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, subflow_req->remote_key_valid = 1; } else { subflow_req->mp_capable = 0; + goto create_child; } + +create_msk: + new_msk = mptcp_sk_clone(listener->conn, req); + if (!new_msk) + subflow_req->mp_capable = 0; } create_child: @@ -221,22 +228,22 @@ create_child: * handshake */ if (!ctx) - return child; + goto out; if (ctx->mp_capable) { - if (mptcp_token_new_accept(ctx->token)) - goto close_child; + /* new mpc subflow takes ownership of the newly + * created mptcp socket + */ + ctx->conn = new_msk; + new_msk = NULL; } } +out: + /* dispose of the left over mptcp master, if any */ + if (unlikely(new_msk)) + sock_put(new_msk); return child; - -close_child: - pr_debug("closing child socket"); - tcp_send_active_reset(child, GFP_ATOMIC); - inet_csk_prepare_forced_close(child); - tcp_done(child); - return NULL; } static struct inet_connection_sock_af_ops subflow_specific; @@ -432,9 +439,6 @@ static bool subflow_check_data_avail(struct sock *ssk) if (subflow->data_avail) return true; - if (!subflow->conn) - return false; - msk = mptcp_sk(subflow->conn); for (;;) { u32 map_remaining; @@ -554,11 +558,10 @@ static void subflow_data_ready(struct sock *sk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; - if (!parent || !subflow->mp_capable) { + if (!subflow->mp_capable) { subflow->tcp_data_ready(sk); - if (parent) - parent->sk_data_ready(parent); + parent->sk_data_ready(parent); return; } @@ -572,7 +575,7 @@ static void subflow_write_space(struct sock *sk) struct sock *parent = subflow->conn; sk_stream_write_space(sk); - if (parent && sk_stream_is_writeable(sk)) { + if (sk_stream_is_writeable(sk)) { set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags); smp_mb__after_atomic(); /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */ @@ -687,7 +690,7 @@ static bool subflow_is_done(const struct sock *sk) static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct sock *parent = READ_ONCE(subflow->conn); + struct sock *parent = subflow->conn; __subflow_state_change(sk); @@ -695,10 +698,10 @@ static void subflow_state_change(struct sock *sk) * a fin packet carrying a DSS can be unnoticed if we don't trigger * the data available machinery here. */ - if (parent && subflow->mp_capable && mptcp_subflow_data_available(sk)) + if (subflow->mp_capable && mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); - if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) && + if (!(parent->sk_shutdown & RCV_SHUTDOWN) && !subflow->rx_eof && subflow_is_done(sk)) { subflow->rx_eof = 1; parent->sk_shutdown |= RCV_SHUTDOWN; @@ -793,6 +796,9 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->tcp_data_ready = old_ctx->tcp_data_ready; new_ctx->tcp_state_change = old_ctx->tcp_state_change; new_ctx->tcp_write_space = old_ctx->tcp_write_space; + new_ctx->rel_write_seq = 1; + new_ctx->tcp_sock = newsk; + new_ctx->mp_capable = 1; new_ctx->fourth_ack = subflow_req->remote_key_valid; new_ctx->can_ack = subflow_req->remote_key_valid; diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 84d887806090..b71b53c0ac8d 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -128,45 +128,18 @@ int mptcp_token_new_connect(struct sock *sk) * * Called when a SYN packet creates a new logical connection, i.e. * is not a join request. - * - * We don't have an mptcp socket yet at that point. - * This is paired with mptcp_token_update_accept, called on accept(). */ -int mptcp_token_new_accept(u32 token) +int mptcp_token_new_accept(u32 token, struct sock *conn) { int err; spin_lock_bh(&token_tree_lock); - err = radix_tree_insert(&token_tree, token, &token_used); + err = radix_tree_insert(&token_tree, token, conn); spin_unlock_bh(&token_tree_lock); return err; } -/** - * mptcp_token_update_accept - update token to map to mptcp socket - * @conn: the new struct mptcp_sock - * @sk: the initial subflow for this mptcp socket - * - * Called when the first mptcp socket is created on accept to - * refresh the dummy mapping (done to reserve the token) with - * the mptcp_socket structure that wasn't allocated before. - */ -void mptcp_token_update_accept(struct sock *sk, struct sock *conn) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - void __rcu **slot; - - spin_lock_bh(&token_tree_lock); - slot = radix_tree_lookup_slot(&token_tree, subflow->token); - WARN_ON_ONCE(!slot); - if (slot) { - WARN_ON_ONCE(rcu_access_pointer(*slot) != &token_used); - radix_tree_replace_slot(&token_tree, slot, conn); - } - spin_unlock_bh(&token_tree_lock); -} - /** * mptcp_token_destroy_request - remove mptcp connection/token * @token - token of mptcp connection to remove