2020-01-22 08:56:15 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/* Multipath TCP
|
|
|
|
*
|
|
|
|
* Copyright (c) 2017 - 2019, Intel Corporation.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define pr_fmt(fmt) "MPTCP: " fmt
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/netdevice.h>
|
2020-01-22 08:56:26 +08:00
|
|
|
#include <linux/sched/signal.h>
|
|
|
|
#include <linux/atomic.h>
|
2020-01-22 08:56:15 +08:00
|
|
|
#include <net/sock.h>
|
|
|
|
#include <net/inet_common.h>
|
|
|
|
#include <net/inet_hashtables.h>
|
|
|
|
#include <net/protocol.h>
|
|
|
|
#include <net/tcp.h>
|
2020-01-22 08:56:19 +08:00
|
|
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
|
|
|
#include <net/transp_v6.h>
|
|
|
|
#endif
|
2020-01-22 08:56:15 +08:00
|
|
|
#include <net/mptcp.h>
|
|
|
|
#include "protocol.h"
|
|
|
|
|
2020-01-22 08:56:17 +08:00
|
|
|
#define MPTCP_SAME_STATE TCP_MAX_STATES
|
|
|
|
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
static void __mptcp_close(struct sock *sk, long timeout);
|
|
|
|
|
2020-01-25 08:04:03 +08:00
|
|
|
static const struct proto_ops *tcp_proto_ops(struct sock *sk)
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
{
|
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
if (sk->sk_family == AF_INET6)
|
|
|
|
return &inet6_stream_ops;
|
|
|
|
#endif
|
|
|
|
return &inet_stream_ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* MP_CAPABLE handshake failed, convert msk to plain tcp, replacing
|
|
|
|
* socket->sk and stream ops and destroying msk
|
|
|
|
* return the msk socket, as we can't access msk anymore after this function
|
|
|
|
* completes
|
|
|
|
* Called with msk lock held, releases such lock before returning
|
|
|
|
*/
|
|
|
|
static struct socket *__mptcp_fallback_to_tcp(struct mptcp_sock *msk,
|
|
|
|
struct sock *ssk)
|
|
|
|
{
|
|
|
|
struct mptcp_subflow_context *subflow;
|
|
|
|
struct socket *sock;
|
|
|
|
struct sock *sk;
|
|
|
|
|
|
|
|
sk = (struct sock *)msk;
|
|
|
|
sock = sk->sk_socket;
|
|
|
|
subflow = mptcp_subflow_ctx(ssk);
|
|
|
|
|
|
|
|
/* detach the msk socket */
|
|
|
|
list_del_init(&subflow->node);
|
|
|
|
sock_orphan(sk);
|
|
|
|
sock->sk = NULL;
|
|
|
|
|
|
|
|
/* socket is now TCP */
|
|
|
|
lock_sock(ssk);
|
|
|
|
sock_graft(ssk, sock);
|
|
|
|
if (subflow->conn) {
|
|
|
|
/* We can't release the ULP data on a live socket,
|
|
|
|
* restore the tcp callback
|
|
|
|
*/
|
|
|
|
mptcp_subflow_tcp_fallback(ssk, subflow);
|
|
|
|
sock_put(subflow->conn);
|
|
|
|
subflow->conn = NULL;
|
|
|
|
}
|
|
|
|
release_sock(ssk);
|
|
|
|
sock->ops = tcp_proto_ops(ssk);
|
|
|
|
|
|
|
|
/* destroy the left-over msk sock */
|
|
|
|
__mptcp_close(sk, 0);
|
|
|
|
return sock;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:17 +08:00
|
|
|
/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
|
|
|
|
* completed yet or has failed, return the subflow socket.
|
|
|
|
* Otherwise return NULL.
|
|
|
|
*/
|
|
|
|
static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
|
|
|
|
{
|
2020-01-22 08:56:32 +08:00
|
|
|
if (!msk->subflow || READ_ONCE(msk->can_ack))
|
2020-01-22 08:56:17 +08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return msk->subflow;
|
|
|
|
}
|
|
|
|
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk)
|
|
|
|
{
|
|
|
|
return msk->first && !sk_is_mptcp(msk->first);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if the mp_capable handshake has failed, it fallbacks msk to plain TCP,
|
|
|
|
* releases the socket lock and returns a reference to the now TCP socket.
|
2020-01-22 08:56:18 +08:00
|
|
|
* Otherwise returns NULL
|
|
|
|
*/
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
|
2020-01-22 08:56:18 +08:00
|
|
|
{
|
|
|
|
sock_owned_by_me((const struct sock *)msk);
|
|
|
|
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
if (likely(!__mptcp_needs_tcp_fallback(msk)))
|
2020-01-22 08:56:18 +08:00
|
|
|
return NULL;
|
|
|
|
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
if (msk->subflow) {
|
|
|
|
/* the first subflow is an active connection, discart the
|
|
|
|
* paired socket
|
|
|
|
*/
|
|
|
|
msk->subflow->sk = NULL;
|
|
|
|
sock_release(msk->subflow);
|
|
|
|
msk->subflow = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return __mptcp_fallback_to_tcp(msk, msk->first);
|
2020-01-22 08:56:18 +08:00
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:17 +08:00
|
|
|
static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
|
|
|
|
{
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
return !msk->first;
|
2020-01-22 08:56:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
|
|
|
|
{
|
|
|
|
struct mptcp_subflow_context *subflow;
|
|
|
|
struct sock *sk = (struct sock *)msk;
|
|
|
|
struct socket *ssock;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
ssock = __mptcp_nmpc_socket(msk);
|
|
|
|
if (ssock)
|
|
|
|
goto set_state;
|
|
|
|
|
|
|
|
if (!__mptcp_can_create_subflow(msk))
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
err = mptcp_subflow_create_socket(sk, &ssock);
|
|
|
|
if (err)
|
|
|
|
return ERR_PTR(err);
|
|
|
|
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
msk->first = ssock->sk;
|
2020-01-22 08:56:17 +08:00
|
|
|
msk->subflow = ssock;
|
|
|
|
subflow = mptcp_subflow_ctx(ssock->sk);
|
2020-01-22 08:56:18 +08:00
|
|
|
list_add(&subflow->node, &msk->conn_list);
|
2020-01-22 08:56:17 +08:00
|
|
|
subflow->request_mptcp = 1;
|
|
|
|
|
|
|
|
set_state:
|
|
|
|
if (state != MPTCP_SAME_STATE)
|
|
|
|
inet_sk_state_store(sk, state);
|
|
|
|
return ssock;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk)
|
|
|
|
{
|
|
|
|
struct mptcp_subflow_context *subflow;
|
|
|
|
|
|
|
|
sock_owned_by_me((const struct sock *)msk);
|
|
|
|
|
|
|
|
mptcp_for_each_subflow(msk, subflow) {
|
|
|
|
return mptcp_subflow_tcp_sock(subflow);
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:23 +08:00
|
|
|
static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
|
|
|
|
{
|
|
|
|
if (!msk->cached_ext)
|
|
|
|
msk->cached_ext = __skb_ext_alloc();
|
|
|
|
|
|
|
|
return !!msk->cached_ext;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:26 +08:00
|
|
|
static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
|
|
|
|
{
|
|
|
|
struct mptcp_subflow_context *subflow;
|
|
|
|
struct sock *sk = (struct sock *)msk;
|
|
|
|
|
|
|
|
sock_owned_by_me(sk);
|
|
|
|
|
|
|
|
mptcp_for_each_subflow(msk, subflow) {
|
|
|
|
if (subflow->data_avail)
|
|
|
|
return mptcp_subflow_tcp_sock(subflow);
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:27 +08:00
|
|
|
static inline bool mptcp_skb_can_collapse_to(const struct mptcp_sock *msk,
|
|
|
|
const struct sk_buff *skb,
|
|
|
|
const struct mptcp_ext *mpext)
|
|
|
|
{
|
|
|
|
if (!tcp_skb_can_collapse_to(skb))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* can collapse only if MPTCP level sequence is in order */
|
|
|
|
return mpext && mpext->data_seq + mpext->data_len == msk->write_seq;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:23 +08:00
|
|
|
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
2020-01-22 08:56:27 +08:00
|
|
|
struct msghdr *msg, long *timeo, int *pmss_now,
|
|
|
|
int *ps_goal)
|
2020-01-22 08:56:23 +08:00
|
|
|
{
|
2020-01-22 08:56:27 +08:00
|
|
|
int mss_now, avail_size, size_goal, ret;
|
2020-01-22 08:56:23 +08:00
|
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
|
|
|
struct mptcp_ext *mpext = NULL;
|
2020-01-22 08:56:27 +08:00
|
|
|
struct sk_buff *skb, *tail;
|
|
|
|
bool can_collapse = false;
|
2020-01-22 08:56:23 +08:00
|
|
|
struct page_frag *pfrag;
|
|
|
|
size_t psize;
|
|
|
|
|
|
|
|
/* use the mptcp page cache so that we can easily move the data
|
|
|
|
* from one substream to another, but do per subflow memory accounting
|
|
|
|
*/
|
|
|
|
pfrag = sk_page_frag(sk);
|
|
|
|
while (!sk_page_frag_refill(ssk, pfrag) ||
|
|
|
|
!mptcp_ext_cache_refill(msk)) {
|
|
|
|
ret = sk_stream_wait_memory(ssk, timeo);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
if (unlikely(__mptcp_needs_tcp_fallback(msk)))
|
|
|
|
return 0;
|
2020-01-22 08:56:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* compute copy limit */
|
|
|
|
mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
|
2020-01-22 08:56:27 +08:00
|
|
|
*pmss_now = mss_now;
|
|
|
|
*ps_goal = size_goal;
|
|
|
|
avail_size = size_goal;
|
|
|
|
skb = tcp_write_queue_tail(ssk);
|
|
|
|
if (skb) {
|
|
|
|
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
|
|
|
|
|
|
|
|
/* Limit the write to the size available in the
|
|
|
|
* current skb, if any, so that we create at most a new skb.
|
|
|
|
* Explicitly tells TCP internals to avoid collapsing on later
|
|
|
|
* queue management operation, to avoid breaking the ext <->
|
|
|
|
* SSN association set here
|
|
|
|
*/
|
|
|
|
can_collapse = (size_goal - skb->len > 0) &&
|
|
|
|
mptcp_skb_can_collapse_to(msk, skb, mpext);
|
|
|
|
if (!can_collapse)
|
|
|
|
TCP_SKB_CB(skb)->eor = 1;
|
|
|
|
else
|
|
|
|
avail_size = size_goal - skb->len;
|
|
|
|
}
|
|
|
|
psize = min_t(size_t, pfrag->size - pfrag->offset, avail_size);
|
2020-01-22 08:56:23 +08:00
|
|
|
|
2020-01-22 08:56:27 +08:00
|
|
|
/* Copy to page */
|
2020-01-22 08:56:23 +08:00
|
|
|
pr_debug("left=%zu", msg_data_left(msg));
|
|
|
|
psize = copy_page_from_iter(pfrag->page, pfrag->offset,
|
|
|
|
min_t(size_t, msg_data_left(msg), psize),
|
|
|
|
&msg->msg_iter);
|
|
|
|
pr_debug("left=%zu", msg_data_left(msg));
|
|
|
|
if (!psize)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-01-22 08:56:27 +08:00
|
|
|
/* tell the TCP stack to delay the push so that we can safely
|
|
|
|
* access the skb after the sendpages call
|
2020-01-22 08:56:23 +08:00
|
|
|
*/
|
|
|
|
ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
|
|
|
|
msg->msg_flags | MSG_SENDPAGE_NOTLAST);
|
|
|
|
if (ret <= 0)
|
|
|
|
return ret;
|
|
|
|
if (unlikely(ret < psize))
|
|
|
|
iov_iter_revert(&msg->msg_iter, psize - ret);
|
|
|
|
|
2020-01-22 08:56:27 +08:00
|
|
|
/* if the tail skb extension is still the cached one, collapsing
|
|
|
|
* really happened. Note: we can't check for 'same skb' as the sk_buff
|
|
|
|
* hdr on tail can be transmitted, freed and re-allocated by the
|
|
|
|
* do_tcp_sendpages() call
|
|
|
|
*/
|
|
|
|
tail = tcp_write_queue_tail(ssk);
|
|
|
|
if (mpext && tail && mpext == skb_ext_find(tail, SKB_EXT_MPTCP)) {
|
|
|
|
WARN_ON_ONCE(!can_collapse);
|
|
|
|
mpext->data_len += ret;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:23 +08:00
|
|
|
skb = tcp_write_queue_tail(ssk);
|
|
|
|
mpext = __skb_ext_set(skb, SKB_EXT_MPTCP, msk->cached_ext);
|
|
|
|
msk->cached_ext = NULL;
|
|
|
|
|
|
|
|
memset(mpext, 0, sizeof(*mpext));
|
|
|
|
mpext->data_seq = msk->write_seq;
|
|
|
|
mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
|
|
|
|
mpext->data_len = ret;
|
|
|
|
mpext->use_map = 1;
|
|
|
|
mpext->dsn64 = 1;
|
|
|
|
|
|
|
|
pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d",
|
|
|
|
mpext->data_seq, mpext->subflow_seq, mpext->data_len,
|
|
|
|
mpext->dsn64);
|
|
|
|
|
2020-01-22 08:56:27 +08:00
|
|
|
out:
|
2020-01-22 08:56:23 +08:00
|
|
|
pfrag->offset += ret;
|
|
|
|
msk->write_seq += ret;
|
|
|
|
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:25 +08:00
|
|
|
static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
|
|
|
|
{
|
|
|
|
struct socket *sock;
|
|
|
|
|
|
|
|
if (likely(sk_stream_is_writeable(ssk)))
|
|
|
|
return;
|
|
|
|
|
|
|
|
sock = READ_ONCE(ssk->sk_socket);
|
|
|
|
|
|
|
|
if (sock) {
|
|
|
|
clear_bit(MPTCP_SEND_SPACE, &msk->flags);
|
|
|
|
smp_mb__after_atomic();
|
|
|
|
/* set NOSPACE only after clearing SEND_SPACE flag */
|
|
|
|
set_bit(SOCK_NOSPACE, &sock->flags);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:15 +08:00
|
|
|
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
|
|
|
{
|
2020-01-22 08:56:27 +08:00
|
|
|
int mss_now = 0, size_goal = 0, ret = 0;
|
2020-01-22 08:56:15 +08:00
|
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
2020-01-22 08:56:18 +08:00
|
|
|
struct socket *ssock;
|
2020-01-22 08:56:23 +08:00
|
|
|
size_t copied = 0;
|
2020-01-22 08:56:18 +08:00
|
|
|
struct sock *ssk;
|
2020-01-22 08:56:23 +08:00
|
|
|
long timeo;
|
2020-01-22 08:56:15 +08:00
|
|
|
|
|
|
|
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
lock_sock(sk);
|
|
|
|
ssock = __mptcp_tcp_fallback(msk);
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
if (unlikely(ssock)) {
|
|
|
|
fallback:
|
2020-01-22 08:56:18 +08:00
|
|
|
pr_debug("fallback passthrough");
|
|
|
|
ret = sock_sendmsg(ssock, msg);
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
return ret >= 0 ? ret + copied : (copied ? copied : ret);
|
2020-01-22 08:56:18 +08:00
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:23 +08:00
|
|
|
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
|
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
ssk = mptcp_subflow_get(msk);
|
|
|
|
if (!ssk) {
|
|
|
|
release_sock(sk);
|
|
|
|
return -ENOTCONN;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:23 +08:00
|
|
|
pr_debug("conn_list->subflow=%p", ssk);
|
2020-01-22 08:56:18 +08:00
|
|
|
|
2020-01-22 08:56:23 +08:00
|
|
|
lock_sock(ssk);
|
|
|
|
while (msg_data_left(msg)) {
|
2020-01-22 08:56:27 +08:00
|
|
|
ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo, &mss_now,
|
|
|
|
&size_goal);
|
2020-01-22 08:56:23 +08:00
|
|
|
if (ret < 0)
|
|
|
|
break;
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
|
|
|
|
release_sock(ssk);
|
|
|
|
ssock = __mptcp_tcp_fallback(msk);
|
|
|
|
goto fallback;
|
|
|
|
}
|
2020-01-22 08:56:23 +08:00
|
|
|
|
|
|
|
copied += ret;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:27 +08:00
|
|
|
if (copied) {
|
2020-01-22 08:56:23 +08:00
|
|
|
ret = copied;
|
2020-01-22 08:56:27 +08:00
|
|
|
tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle,
|
|
|
|
size_goal);
|
|
|
|
}
|
2020-01-22 08:56:23 +08:00
|
|
|
|
2020-01-22 08:56:25 +08:00
|
|
|
ssk_check_wmem(msk, ssk);
|
2020-01-22 08:56:23 +08:00
|
|
|
release_sock(ssk);
|
2020-01-22 08:56:18 +08:00
|
|
|
release_sock(sk);
|
|
|
|
return ret;
|
2020-01-22 08:56:15 +08:00
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:24 +08:00
|
|
|
int mptcp_read_actor(read_descriptor_t *desc, struct sk_buff *skb,
|
|
|
|
unsigned int offset, size_t len)
|
|
|
|
{
|
|
|
|
struct mptcp_read_arg *arg = desc->arg.data;
|
|
|
|
size_t copy_len;
|
|
|
|
|
|
|
|
copy_len = min(desc->count, len);
|
|
|
|
|
|
|
|
if (likely(arg->msg)) {
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = skb_copy_datagram_msg(skb, offset, arg->msg, copy_len);
|
|
|
|
if (err) {
|
|
|
|
pr_debug("error path");
|
|
|
|
desc->error = err;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
pr_debug("Flushing skb payload");
|
|
|
|
}
|
|
|
|
|
|
|
|
desc->count -= copy_len;
|
|
|
|
|
|
|
|
pr_debug("consumed %zu bytes, %zu left", copy_len, desc->count);
|
|
|
|
return copy_len;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:26 +08:00
|
|
|
static void mptcp_wait_data(struct sock *sk, long *timeo)
|
|
|
|
{
|
|
|
|
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
|
|
|
|
|
|
|
add_wait_queue(sk_sleep(sk), &wait);
|
|
|
|
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
|
|
|
|
|
|
|
sk_wait_event(sk, timeo,
|
|
|
|
test_and_clear_bit(MPTCP_DATA_READY, &msk->flags), &wait);
|
|
|
|
|
|
|
|
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
|
|
|
|
remove_wait_queue(sk_sleep(sk), &wait);
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:15 +08:00
|
|
|
static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
|
|
|
int nonblock, int flags, int *addr_len)
|
|
|
|
{
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
2020-01-22 08:56:26 +08:00
|
|
|
struct mptcp_subflow_context *subflow;
|
|
|
|
bool more_data_avail = false;
|
|
|
|
struct mptcp_read_arg arg;
|
|
|
|
read_descriptor_t desc;
|
|
|
|
bool wait_data = false;
|
2020-01-22 08:56:18 +08:00
|
|
|
struct socket *ssock;
|
2020-01-22 08:56:26 +08:00
|
|
|
struct tcp_sock *tp;
|
|
|
|
bool done = false;
|
2020-01-22 08:56:18 +08:00
|
|
|
struct sock *ssk;
|
|
|
|
int copied = 0;
|
2020-01-22 08:56:26 +08:00
|
|
|
int target;
|
|
|
|
long timeo;
|
2020-01-22 08:56:15 +08:00
|
|
|
|
|
|
|
if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
lock_sock(sk);
|
|
|
|
ssock = __mptcp_tcp_fallback(msk);
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
if (unlikely(ssock)) {
|
|
|
|
fallback:
|
2020-01-22 08:56:18 +08:00
|
|
|
pr_debug("fallback-read subflow=%p",
|
|
|
|
mptcp_subflow_ctx(ssock->sk));
|
|
|
|
copied = sock_recvmsg(ssock, msg, flags);
|
|
|
|
return copied;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:26 +08:00
|
|
|
arg.msg = msg;
|
|
|
|
desc.arg.data = &arg;
|
|
|
|
desc.error = 0;
|
|
|
|
|
|
|
|
timeo = sock_rcvtimeo(sk, nonblock);
|
|
|
|
|
|
|
|
len = min_t(size_t, len, INT_MAX);
|
|
|
|
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
|
|
|
|
|
|
|
|
while (!done) {
|
|
|
|
u32 map_remaining;
|
|
|
|
int bytes_read;
|
|
|
|
|
|
|
|
ssk = mptcp_subflow_recv_lookup(msk);
|
|
|
|
pr_debug("msk=%p ssk=%p", msk, ssk);
|
|
|
|
if (!ssk)
|
|
|
|
goto wait_for_data;
|
|
|
|
|
|
|
|
subflow = mptcp_subflow_ctx(ssk);
|
|
|
|
tp = tcp_sk(ssk);
|
|
|
|
|
|
|
|
lock_sock(ssk);
|
|
|
|
do {
|
|
|
|
/* try to read as much data as available */
|
|
|
|
map_remaining = subflow->map_data_len -
|
|
|
|
mptcp_subflow_get_map_offset(subflow);
|
|
|
|
desc.count = min_t(size_t, len - copied, map_remaining);
|
|
|
|
pr_debug("reading %zu bytes, copied %d", desc.count,
|
|
|
|
copied);
|
|
|
|
bytes_read = tcp_read_sock(ssk, &desc,
|
|
|
|
mptcp_read_actor);
|
|
|
|
if (bytes_read < 0) {
|
|
|
|
if (!copied)
|
|
|
|
copied = bytes_read;
|
|
|
|
done = true;
|
|
|
|
goto next;
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_debug("msk ack_seq=%llx -> %llx", msk->ack_seq,
|
|
|
|
msk->ack_seq + bytes_read);
|
|
|
|
msk->ack_seq += bytes_read;
|
|
|
|
copied += bytes_read;
|
|
|
|
if (copied >= len) {
|
|
|
|
done = true;
|
|
|
|
goto next;
|
|
|
|
}
|
|
|
|
if (tp->urg_data && tp->urg_seq == tp->copied_seq) {
|
|
|
|
pr_err("Urgent data present, cannot proceed");
|
|
|
|
done = true;
|
|
|
|
goto next;
|
|
|
|
}
|
|
|
|
next:
|
|
|
|
more_data_avail = mptcp_subflow_data_available(ssk);
|
|
|
|
} while (more_data_avail && !done);
|
|
|
|
release_sock(ssk);
|
|
|
|
continue;
|
|
|
|
|
|
|
|
wait_for_data:
|
|
|
|
more_data_avail = false;
|
|
|
|
|
|
|
|
/* only the master socket status is relevant here. The exit
|
|
|
|
* conditions mirror closely tcp_recvmsg()
|
|
|
|
*/
|
|
|
|
if (copied >= target)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (copied) {
|
|
|
|
if (sk->sk_err ||
|
|
|
|
sk->sk_state == TCP_CLOSE ||
|
|
|
|
(sk->sk_shutdown & RCV_SHUTDOWN) ||
|
|
|
|
!timeo ||
|
|
|
|
signal_pending(current))
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
if (sk->sk_err) {
|
|
|
|
copied = sock_error(sk);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sk->sk_shutdown & RCV_SHUTDOWN)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (sk->sk_state == TCP_CLOSE) {
|
|
|
|
copied = -ENOTCONN;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!timeo) {
|
|
|
|
copied = -EAGAIN;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (signal_pending(current)) {
|
|
|
|
copied = sock_intr_errno(timeo);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_debug("block timeout %ld", timeo);
|
|
|
|
wait_data = true;
|
|
|
|
mptcp_wait_data(sk, &timeo);
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
if (unlikely(__mptcp_tcp_fallback(msk)))
|
|
|
|
goto fallback;
|
2020-01-22 08:56:18 +08:00
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:26 +08:00
|
|
|
if (more_data_avail) {
|
|
|
|
if (!test_bit(MPTCP_DATA_READY, &msk->flags))
|
|
|
|
set_bit(MPTCP_DATA_READY, &msk->flags);
|
|
|
|
} else if (!wait_data) {
|
|
|
|
clear_bit(MPTCP_DATA_READY, &msk->flags);
|
2020-01-22 08:56:18 +08:00
|
|
|
|
2020-01-22 08:56:26 +08:00
|
|
|
/* .. race-breaker: ssk might get new data after last
|
|
|
|
* data_available() returns false.
|
|
|
|
*/
|
|
|
|
ssk = mptcp_subflow_recv_lookup(msk);
|
|
|
|
if (unlikely(ssk))
|
|
|
|
set_bit(MPTCP_DATA_READY, &msk->flags);
|
|
|
|
}
|
2020-01-22 08:56:18 +08:00
|
|
|
|
2020-01-22 08:56:26 +08:00
|
|
|
release_sock(sk);
|
2020-01-22 08:56:18 +08:00
|
|
|
return copied;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* subflow sockets can be either outgoing (connect) or incoming
|
|
|
|
* (accept).
|
|
|
|
*
|
|
|
|
* Outgoing subflows use in-kernel sockets.
|
|
|
|
* Incoming subflows do not have their own 'struct socket' allocated,
|
|
|
|
* so we need to use tcp_close() after detaching them from the mptcp
|
|
|
|
* parent socket.
|
|
|
|
*/
|
|
|
|
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
|
|
|
|
struct mptcp_subflow_context *subflow,
|
|
|
|
long timeout)
|
|
|
|
{
|
|
|
|
struct socket *sock = READ_ONCE(ssk->sk_socket);
|
|
|
|
|
|
|
|
list_del(&subflow->node);
|
|
|
|
|
|
|
|
if (sock && sock != sk->sk_socket) {
|
|
|
|
/* outgoing subflow */
|
|
|
|
sock_release(sock);
|
|
|
|
} else {
|
|
|
|
/* incoming subflow */
|
|
|
|
tcp_close(ssk, timeout);
|
|
|
|
}
|
2020-01-22 08:56:15 +08:00
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:28 +08:00
|
|
|
static int __mptcp_init_sock(struct sock *sk)
|
2020-01-22 08:56:15 +08:00
|
|
|
{
|
2020-01-22 08:56:18 +08:00
|
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&msk->conn_list);
|
2020-01-22 08:56:25 +08:00
|
|
|
__set_bit(MPTCP_SEND_SPACE, &msk->flags);
|
2020-01-22 08:56:18 +08:00
|
|
|
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
msk->first = NULL;
|
|
|
|
|
2020-01-22 08:56:15 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:28 +08:00
|
|
|
static int mptcp_init_sock(struct sock *sk)
|
|
|
|
{
|
|
|
|
if (!mptcp_is_enabled(sock_net(sk)))
|
|
|
|
return -ENOPROTOOPT;
|
|
|
|
|
|
|
|
return __mptcp_init_sock(sk);
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:21 +08:00
|
|
|
static void mptcp_subflow_shutdown(struct sock *ssk, int how)
|
|
|
|
{
|
|
|
|
lock_sock(ssk);
|
|
|
|
|
|
|
|
switch (ssk->sk_state) {
|
|
|
|
case TCP_LISTEN:
|
|
|
|
if (!(how & RCV_SHUTDOWN))
|
|
|
|
break;
|
|
|
|
/* fall through */
|
|
|
|
case TCP_SYN_SENT:
|
|
|
|
tcp_disconnect(ssk, O_NONBLOCK);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ssk->sk_shutdown |= how;
|
|
|
|
tcp_shutdown(ssk, how);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Wake up anyone sleeping in poll. */
|
|
|
|
ssk->sk_state_change(ssk);
|
|
|
|
release_sock(ssk);
|
|
|
|
}
|
|
|
|
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
/* Called with msk lock held, releases such lock before returning */
|
|
|
|
static void __mptcp_close(struct sock *sk, long timeout)
|
2020-01-22 08:56:15 +08:00
|
|
|
{
|
2020-01-22 08:56:18 +08:00
|
|
|
struct mptcp_subflow_context *subflow, *tmp;
|
2020-01-22 08:56:15 +08:00
|
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
|
|
|
|
2020-01-22 08:56:20 +08:00
|
|
|
mptcp_token_destroy(msk->token);
|
2020-01-22 08:56:15 +08:00
|
|
|
inet_sk_state_store(sk, TCP_CLOSE);
|
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
|
|
|
|
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
|
|
|
|
|
|
|
__mptcp_close_ssk(sk, ssk, subflow, timeout);
|
2020-01-22 08:56:15 +08:00
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:23 +08:00
|
|
|
if (msk->cached_ext)
|
|
|
|
__skb_ext_put(msk->cached_ext);
|
2020-01-22 08:56:18 +08:00
|
|
|
release_sock(sk);
|
|
|
|
sk_common_release(sk);
|
2020-01-22 08:56:15 +08:00
|
|
|
}
|
|
|
|
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
static void mptcp_close(struct sock *sk, long timeout)
|
|
|
|
{
|
|
|
|
lock_sock(sk);
|
|
|
|
__mptcp_close(sk, timeout);
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:19 +08:00
|
|
|
static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
|
|
|
|
{
|
|
|
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
|
|
|
const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
|
|
|
|
struct ipv6_pinfo *msk6 = inet6_sk(msk);
|
|
|
|
|
|
|
|
msk->sk_v6_daddr = ssk->sk_v6_daddr;
|
|
|
|
msk->sk_v6_rcv_saddr = ssk->sk_v6_rcv_saddr;
|
|
|
|
|
|
|
|
if (msk6 && ssk6) {
|
|
|
|
msk6->saddr = ssk6->saddr;
|
|
|
|
msk6->flow_label = ssk6->flow_label;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
inet_sk(msk)->inet_num = inet_sk(ssk)->inet_num;
|
|
|
|
inet_sk(msk)->inet_dport = inet_sk(ssk)->inet_dport;
|
|
|
|
inet_sk(msk)->inet_sport = inet_sk(ssk)->inet_sport;
|
|
|
|
inet_sk(msk)->inet_daddr = inet_sk(ssk)->inet_daddr;
|
|
|
|
inet_sk(msk)->inet_saddr = inet_sk(ssk)->inet_saddr;
|
|
|
|
inet_sk(msk)->inet_rcv_saddr = inet_sk(ssk)->inet_rcv_saddr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
|
|
|
|
bool kern)
|
|
|
|
{
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
|
|
|
struct socket *listener;
|
|
|
|
struct sock *newsk;
|
|
|
|
|
|
|
|
listener = __mptcp_nmpc_socket(msk);
|
|
|
|
if (WARN_ON_ONCE(!listener)) {
|
|
|
|
*err = -EINVAL;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk));
|
|
|
|
newsk = inet_csk_accept(listener->sk, flags, err, kern);
|
|
|
|
if (!newsk)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk));
|
|
|
|
|
|
|
|
if (sk_is_mptcp(newsk)) {
|
|
|
|
struct mptcp_subflow_context *subflow;
|
|
|
|
struct sock *new_mptcp_sock;
|
|
|
|
struct sock *ssk = newsk;
|
2020-01-22 08:56:23 +08:00
|
|
|
u64 ack_seq;
|
2020-01-22 08:56:19 +08:00
|
|
|
|
|
|
|
subflow = mptcp_subflow_ctx(newsk);
|
|
|
|
lock_sock(sk);
|
|
|
|
|
|
|
|
local_bh_disable();
|
|
|
|
new_mptcp_sock = sk_clone_lock(sk, GFP_ATOMIC);
|
|
|
|
if (!new_mptcp_sock) {
|
|
|
|
*err = -ENOBUFS;
|
|
|
|
local_bh_enable();
|
|
|
|
release_sock(sk);
|
2020-01-22 08:56:21 +08:00
|
|
|
mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1);
|
2020-01-22 08:56:19 +08:00
|
|
|
tcp_close(newsk, 0);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:28 +08:00
|
|
|
__mptcp_init_sock(new_mptcp_sock);
|
2020-01-22 08:56:19 +08:00
|
|
|
|
|
|
|
msk = mptcp_sk(new_mptcp_sock);
|
|
|
|
msk->local_key = subflow->local_key;
|
2020-01-22 08:56:20 +08:00
|
|
|
msk->token = subflow->token;
|
2020-01-22 08:56:19 +08:00
|
|
|
msk->subflow = NULL;
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
msk->first = newsk;
|
2020-01-22 08:56:19 +08:00
|
|
|
|
2020-01-22 08:56:20 +08:00
|
|
|
mptcp_token_update_accept(newsk, new_mptcp_sock);
|
2020-01-22 08:56:23 +08:00
|
|
|
|
|
|
|
msk->write_seq = subflow->idsn + 1;
|
2020-01-22 08:56:32 +08:00
|
|
|
if (subflow->can_ack) {
|
|
|
|
msk->can_ack = true;
|
|
|
|
msk->remote_key = subflow->remote_key;
|
|
|
|
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
|
|
|
|
ack_seq++;
|
|
|
|
msk->ack_seq = ack_seq;
|
|
|
|
}
|
2020-01-22 08:56:19 +08:00
|
|
|
newsk = new_mptcp_sock;
|
|
|
|
mptcp_copy_inaddrs(newsk, ssk);
|
|
|
|
list_add(&subflow->node, &msk->conn_list);
|
|
|
|
|
|
|
|
/* will be fully established at mptcp_stream_accept()
|
|
|
|
* completion.
|
|
|
|
*/
|
|
|
|
inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV);
|
|
|
|
bh_unlock_sock(new_mptcp_sock);
|
|
|
|
local_bh_enable();
|
|
|
|
release_sock(sk);
|
2020-01-22 08:56:26 +08:00
|
|
|
|
|
|
|
/* the subflow can already receive packet, avoid racing with
|
|
|
|
* the receive path and process the pending ones
|
|
|
|
*/
|
|
|
|
lock_sock(ssk);
|
|
|
|
subflow->rel_write_seq = 1;
|
|
|
|
subflow->tcp_sock = ssk;
|
|
|
|
subflow->conn = new_mptcp_sock;
|
|
|
|
if (unlikely(!skb_queue_empty(&ssk->sk_receive_queue)))
|
|
|
|
mptcp_subflow_data_available(ssk);
|
|
|
|
release_sock(ssk);
|
2020-01-22 08:56:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return newsk;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:20 +08:00
|
|
|
static void mptcp_destroy(struct sock *sk)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:22 +08:00
|
|
|
static int mptcp_setsockopt(struct sock *sk, int level, int optname,
|
|
|
|
char __user *uoptval, unsigned int optlen)
|
|
|
|
{
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
|
|
|
char __kernel *optval;
|
|
|
|
int ret = -EOPNOTSUPP;
|
|
|
|
struct socket *ssock;
|
|
|
|
|
|
|
|
/* will be treated as __user in tcp_setsockopt */
|
|
|
|
optval = (char __kernel __force *)uoptval;
|
|
|
|
|
|
|
|
pr_debug("msk=%p", msk);
|
|
|
|
|
|
|
|
/* @@ the meaning of setsockopt() when the socket is connected and
|
|
|
|
* there are multiple subflows is not defined.
|
|
|
|
*/
|
|
|
|
lock_sock(sk);
|
|
|
|
ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
|
|
|
|
if (!IS_ERR(ssock)) {
|
|
|
|
pr_debug("subflow=%p", ssock->sk);
|
|
|
|
ret = kernel_setsockopt(ssock, level, optname, optval, optlen);
|
|
|
|
}
|
|
|
|
release_sock(sk);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mptcp_getsockopt(struct sock *sk, int level, int optname,
|
|
|
|
char __user *uoptval, int __user *uoption)
|
|
|
|
{
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
|
|
|
char __kernel *optval;
|
|
|
|
int ret = -EOPNOTSUPP;
|
|
|
|
int __kernel *option;
|
|
|
|
struct socket *ssock;
|
|
|
|
|
|
|
|
/* will be treated as __user in tcp_getsockopt */
|
|
|
|
optval = (char __kernel __force *)uoptval;
|
|
|
|
option = (int __kernel __force *)uoption;
|
|
|
|
|
|
|
|
pr_debug("msk=%p", msk);
|
|
|
|
|
|
|
|
/* @@ the meaning of getsockopt() when the socket is connected and
|
|
|
|
* there are multiple subflows is not defined.
|
|
|
|
*/
|
|
|
|
lock_sock(sk);
|
|
|
|
ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
|
|
|
|
if (!IS_ERR(ssock)) {
|
|
|
|
pr_debug("subflow=%p", ssock->sk);
|
|
|
|
ret = kernel_getsockopt(ssock, level, optname, optval, option);
|
|
|
|
}
|
|
|
|
release_sock(sk);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
static int mptcp_get_port(struct sock *sk, unsigned short snum)
|
2020-01-22 08:56:15 +08:00
|
|
|
{
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
2020-01-22 08:56:18 +08:00
|
|
|
struct socket *ssock;
|
2020-01-22 08:56:15 +08:00
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
ssock = __mptcp_nmpc_socket(msk);
|
|
|
|
pr_debug("msk=%p, subflow=%p", msk, ssock);
|
|
|
|
if (WARN_ON_ONCE(!ssock))
|
|
|
|
return -EINVAL;
|
2020-01-22 08:56:15 +08:00
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
return inet_csk_get_port(ssock->sk, snum);
|
|
|
|
}
|
2020-01-22 08:56:15 +08:00
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
void mptcp_finish_connect(struct sock *ssk)
|
|
|
|
{
|
|
|
|
struct mptcp_subflow_context *subflow;
|
|
|
|
struct mptcp_sock *msk;
|
|
|
|
struct sock *sk;
|
2020-01-22 08:56:23 +08:00
|
|
|
u64 ack_seq;
|
2020-01-22 08:56:15 +08:00
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
subflow = mptcp_subflow_ctx(ssk);
|
2020-01-22 08:56:15 +08:00
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
if (!subflow->mp_capable)
|
|
|
|
return;
|
|
|
|
|
|
|
|
sk = subflow->conn;
|
|
|
|
msk = mptcp_sk(sk);
|
|
|
|
|
2020-01-22 08:56:24 +08:00
|
|
|
pr_debug("msk=%p, token=%u", sk, subflow->token);
|
|
|
|
|
2020-01-22 08:56:23 +08:00
|
|
|
mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
|
|
|
|
ack_seq++;
|
2020-01-22 08:56:24 +08:00
|
|
|
subflow->map_seq = ack_seq;
|
|
|
|
subflow->map_subflow_seq = 1;
|
2020-01-22 08:56:23 +08:00
|
|
|
subflow->rel_write_seq = 1;
|
|
|
|
|
2020-01-22 08:56:18 +08:00
|
|
|
/* the socket is not connected yet, no msk/subflow ops can access/race
|
|
|
|
* accessing the field below
|
|
|
|
*/
|
|
|
|
WRITE_ONCE(msk->remote_key, subflow->remote_key);
|
|
|
|
WRITE_ONCE(msk->local_key, subflow->local_key);
|
2020-01-22 08:56:20 +08:00
|
|
|
WRITE_ONCE(msk->token, subflow->token);
|
2020-01-22 08:56:23 +08:00
|
|
|
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
|
|
|
|
WRITE_ONCE(msk->ack_seq, ack_seq);
|
2020-01-22 08:56:32 +08:00
|
|
|
WRITE_ONCE(msk->can_ack, 1);
|
2020-01-22 08:56:15 +08:00
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:19 +08:00
|
|
|
static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
|
|
|
|
{
|
|
|
|
write_lock_bh(&sk->sk_callback_lock);
|
|
|
|
rcu_assign_pointer(sk->sk_wq, &parent->wq);
|
|
|
|
sk_set_socket(sk, parent);
|
|
|
|
sk->sk_uid = SOCK_INODE(parent)->i_uid;
|
|
|
|
write_unlock_bh(&sk->sk_callback_lock);
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:25 +08:00
|
|
|
static bool mptcp_memory_free(const struct sock *sk, int wake)
|
|
|
|
{
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
|
|
|
|
|
|
|
return wake ? test_bit(MPTCP_SEND_SPACE, &msk->flags) : true;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:15 +08:00
|
|
|
static struct proto mptcp_prot = {
|
|
|
|
.name = "MPTCP",
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.init = mptcp_init_sock,
|
|
|
|
.close = mptcp_close,
|
2020-01-22 08:56:19 +08:00
|
|
|
.accept = mptcp_accept,
|
2020-01-22 08:56:22 +08:00
|
|
|
.setsockopt = mptcp_setsockopt,
|
|
|
|
.getsockopt = mptcp_getsockopt,
|
2020-01-22 08:56:15 +08:00
|
|
|
.shutdown = tcp_shutdown,
|
2020-01-22 08:56:20 +08:00
|
|
|
.destroy = mptcp_destroy,
|
2020-01-22 08:56:15 +08:00
|
|
|
.sendmsg = mptcp_sendmsg,
|
|
|
|
.recvmsg = mptcp_recvmsg,
|
|
|
|
.hash = inet_hash,
|
|
|
|
.unhash = inet_unhash,
|
2020-01-22 08:56:18 +08:00
|
|
|
.get_port = mptcp_get_port,
|
2020-01-22 08:56:25 +08:00
|
|
|
.stream_memory_free = mptcp_memory_free,
|
2020-01-22 08:56:15 +08:00
|
|
|
.obj_size = sizeof(struct mptcp_sock),
|
|
|
|
.no_autobind = true,
|
|
|
|
};
|
|
|
|
|
2020-01-22 08:56:17 +08:00
|
|
|
static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
|
|
|
{
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sock->sk);
|
|
|
|
struct socket *ssock;
|
2020-01-22 08:56:19 +08:00
|
|
|
int err;
|
2020-01-22 08:56:17 +08:00
|
|
|
|
|
|
|
lock_sock(sock->sk);
|
|
|
|
ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
|
|
|
|
if (IS_ERR(ssock)) {
|
|
|
|
err = PTR_ERR(ssock);
|
|
|
|
goto unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = ssock->ops->bind(ssock, uaddr, addr_len);
|
2020-01-22 08:56:19 +08:00
|
|
|
if (!err)
|
|
|
|
mptcp_copy_inaddrs(sock->sk, ssock->sk);
|
2020-01-22 08:56:17 +08:00
|
|
|
|
|
|
|
unlock:
|
|
|
|
release_sock(sock->sk);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
|
|
|
|
int addr_len, int flags)
|
|
|
|
{
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sock->sk);
|
|
|
|
struct socket *ssock;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
lock_sock(sock->sk);
|
|
|
|
ssock = __mptcp_socket_create(msk, TCP_SYN_SENT);
|
|
|
|
if (IS_ERR(ssock)) {
|
|
|
|
err = PTR_ERR(ssock);
|
|
|
|
goto unlock;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:19 +08:00
|
|
|
#ifdef CONFIG_TCP_MD5SIG
|
|
|
|
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
|
|
|
|
* TCP option space.
|
|
|
|
*/
|
|
|
|
if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
|
|
|
|
mptcp_subflow_ctx(ssock->sk)->request_mptcp = 0;
|
|
|
|
#endif
|
|
|
|
|
2020-01-22 08:56:17 +08:00
|
|
|
err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
|
|
|
|
inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
|
2020-01-22 08:56:19 +08:00
|
|
|
mptcp_copy_inaddrs(sock->sk, ssock->sk);
|
2020-01-22 08:56:17 +08:00
|
|
|
|
|
|
|
unlock:
|
|
|
|
release_sock(sock->sk);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:19 +08:00
|
|
|
static int mptcp_v4_getname(struct socket *sock, struct sockaddr *uaddr,
|
|
|
|
int peer)
|
|
|
|
{
|
|
|
|
if (sock->sk->sk_prot == &tcp_prot) {
|
|
|
|
/* we are being invoked from __sys_accept4, after
|
|
|
|
* mptcp_accept() has just accepted a non-mp-capable
|
|
|
|
* flow: sk is a tcp_sk, not an mptcp one.
|
|
|
|
*
|
|
|
|
* Hand the socket over to tcp so all further socket ops
|
|
|
|
* bypass mptcp.
|
|
|
|
*/
|
|
|
|
sock->ops = &inet_stream_ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
return inet_getname(sock, uaddr, peer);
|
|
|
|
}
|
|
|
|
|
|
|
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
|
|
|
static int mptcp_v6_getname(struct socket *sock, struct sockaddr *uaddr,
|
|
|
|
int peer)
|
|
|
|
{
|
|
|
|
if (sock->sk->sk_prot == &tcpv6_prot) {
|
|
|
|
/* we are being invoked from __sys_accept4 after
|
|
|
|
* mptcp_accept() has accepted a non-mp-capable
|
|
|
|
* subflow: sk is a tcp_sk, not mptcp.
|
|
|
|
*
|
|
|
|
* Hand the socket over to tcp so all further
|
|
|
|
* socket ops bypass mptcp.
|
|
|
|
*/
|
|
|
|
sock->ops = &inet6_stream_ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
return inet6_getname(sock, uaddr, peer);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static int mptcp_listen(struct socket *sock, int backlog)
|
|
|
|
{
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sock->sk);
|
|
|
|
struct socket *ssock;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
pr_debug("msk=%p", msk);
|
|
|
|
|
|
|
|
lock_sock(sock->sk);
|
|
|
|
ssock = __mptcp_socket_create(msk, TCP_LISTEN);
|
|
|
|
if (IS_ERR(ssock)) {
|
|
|
|
err = PTR_ERR(ssock);
|
|
|
|
goto unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = ssock->ops->listen(ssock, backlog);
|
|
|
|
inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
|
|
|
|
if (!err)
|
|
|
|
mptcp_copy_inaddrs(sock->sk, ssock->sk);
|
|
|
|
|
|
|
|
unlock:
|
|
|
|
release_sock(sock->sk);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool is_tcp_proto(const struct proto *p)
|
|
|
|
{
|
|
|
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
|
|
|
return p == &tcp_prot || p == &tcpv6_prot;
|
|
|
|
#else
|
|
|
|
return p == &tcp_prot;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
|
|
|
|
int flags, bool kern)
|
|
|
|
{
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sock->sk);
|
|
|
|
struct socket *ssock;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
pr_debug("msk=%p", msk);
|
|
|
|
|
|
|
|
lock_sock(sock->sk);
|
|
|
|
if (sock->sk->sk_state != TCP_LISTEN)
|
|
|
|
goto unlock_fail;
|
|
|
|
|
|
|
|
ssock = __mptcp_nmpc_socket(msk);
|
|
|
|
if (!ssock)
|
|
|
|
goto unlock_fail;
|
|
|
|
|
|
|
|
sock_hold(ssock->sk);
|
|
|
|
release_sock(sock->sk);
|
|
|
|
|
|
|
|
err = ssock->ops->accept(sock, newsock, flags, kern);
|
|
|
|
if (err == 0 && !is_tcp_proto(newsock->sk->sk_prot)) {
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(newsock->sk);
|
|
|
|
struct mptcp_subflow_context *subflow;
|
|
|
|
|
|
|
|
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
|
|
|
|
* This is needed so NOSPACE flag can be set from tcp stack.
|
|
|
|
*/
|
|
|
|
list_for_each_entry(subflow, &msk->conn_list, node) {
|
|
|
|
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
|
|
|
|
|
|
|
if (!ssk->sk_socket)
|
|
|
|
mptcp_sock_graft(ssk, newsock);
|
|
|
|
}
|
|
|
|
|
|
|
|
inet_sk_state_store(newsock->sk, TCP_ESTABLISHED);
|
|
|
|
}
|
|
|
|
|
|
|
|
sock_put(ssock->sk);
|
|
|
|
return err;
|
|
|
|
|
|
|
|
unlock_fail:
|
|
|
|
release_sock(sock->sk);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:17 +08:00
|
|
|
static __poll_t mptcp_poll(struct file *file, struct socket *sock,
|
|
|
|
struct poll_table_struct *wait)
|
|
|
|
{
|
2020-01-22 08:56:25 +08:00
|
|
|
struct sock *sk = sock->sk;
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
struct mptcp_sock *msk;
|
2020-01-22 08:56:25 +08:00
|
|
|
struct socket *ssock;
|
2020-01-22 08:56:17 +08:00
|
|
|
__poll_t mask = 0;
|
|
|
|
|
2020-01-22 08:56:25 +08:00
|
|
|
msk = mptcp_sk(sk);
|
|
|
|
lock_sock(sk);
|
|
|
|
ssock = __mptcp_nmpc_socket(msk);
|
|
|
|
if (ssock) {
|
|
|
|
mask = ssock->ops->poll(file, ssock, wait);
|
|
|
|
release_sock(sk);
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
release_sock(sk);
|
|
|
|
sock_poll_wait(file, sock, wait);
|
|
|
|
lock_sock(sk);
|
mptcp: cope with later TCP fallback
With MPTCP v1, passive connections can fallback to TCP after the
subflow becomes established:
syn + MP_CAPABLE ->
<- syn, ack + MP_CAPABLE
ack, seq = 3 ->
// OoO packet is accepted because in-sequence
// passive socket is created, is in ESTABLISHED
// status and tentatively as MP_CAPABLE
ack, seq = 2 ->
// no MP_CAPABLE opt, subflow should fallback to TCP
We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.
Instead, when the fallback is detected, replace the mptcp
socket with the underlying TCP subflow. Beyond covering
the above scenario, it makes a TCP fallback socket as efficient
as plain TCP ones.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-01-22 08:56:33 +08:00
|
|
|
ssock = __mptcp_tcp_fallback(msk);
|
|
|
|
if (unlikely(ssock))
|
|
|
|
return ssock->ops->poll(file, ssock, NULL);
|
2020-01-22 08:56:25 +08:00
|
|
|
|
|
|
|
if (test_bit(MPTCP_DATA_READY, &msk->flags))
|
|
|
|
mask = EPOLLIN | EPOLLRDNORM;
|
|
|
|
if (sk_stream_is_writeable(sk) &&
|
|
|
|
test_bit(MPTCP_SEND_SPACE, &msk->flags))
|
|
|
|
mask |= EPOLLOUT | EPOLLWRNORM;
|
|
|
|
if (sk->sk_shutdown & RCV_SHUTDOWN)
|
|
|
|
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
|
|
|
|
|
|
|
|
release_sock(sk);
|
|
|
|
|
2020-01-22 08:56:17 +08:00
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:21 +08:00
|
|
|
static int mptcp_shutdown(struct socket *sock, int how)
|
|
|
|
{
|
|
|
|
struct mptcp_sock *msk = mptcp_sk(sock->sk);
|
|
|
|
struct mptcp_subflow_context *subflow;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
pr_debug("sk=%p, how=%d", msk, how);
|
|
|
|
|
|
|
|
lock_sock(sock->sk);
|
|
|
|
|
|
|
|
if (how == SHUT_WR || how == SHUT_RDWR)
|
|
|
|
inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
|
|
|
|
|
|
|
|
how++;
|
|
|
|
|
|
|
|
if ((how & ~SHUTDOWN_MASK) || !how) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sock->state == SS_CONNECTING) {
|
|
|
|
if ((1 << sock->sk->sk_state) &
|
|
|
|
(TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
|
|
|
|
sock->state = SS_DISCONNECTING;
|
|
|
|
else
|
|
|
|
sock->state = SS_CONNECTED;
|
|
|
|
}
|
|
|
|
|
|
|
|
mptcp_for_each_subflow(msk, subflow) {
|
|
|
|
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
|
|
|
|
|
|
|
|
mptcp_subflow_shutdown(tcp_sk, how);
|
|
|
|
}
|
|
|
|
|
|
|
|
out_unlock:
|
|
|
|
release_sock(sock->sk);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-01-25 08:04:02 +08:00
|
|
|
static const struct proto_ops mptcp_stream_ops = {
|
|
|
|
.family = PF_INET,
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.release = inet_release,
|
|
|
|
.bind = mptcp_bind,
|
|
|
|
.connect = mptcp_stream_connect,
|
|
|
|
.socketpair = sock_no_socketpair,
|
|
|
|
.accept = mptcp_stream_accept,
|
|
|
|
.getname = mptcp_v4_getname,
|
|
|
|
.poll = mptcp_poll,
|
|
|
|
.ioctl = inet_ioctl,
|
|
|
|
.gettstamp = sock_gettstamp,
|
|
|
|
.listen = mptcp_listen,
|
|
|
|
.shutdown = mptcp_shutdown,
|
|
|
|
.setsockopt = sock_common_setsockopt,
|
|
|
|
.getsockopt = sock_common_getsockopt,
|
|
|
|
.sendmsg = inet_sendmsg,
|
|
|
|
.recvmsg = inet_recvmsg,
|
|
|
|
.mmap = sock_no_mmap,
|
|
|
|
.sendpage = inet_sendpage,
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
.compat_setsockopt = compat_sock_common_setsockopt,
|
|
|
|
.compat_getsockopt = compat_sock_common_getsockopt,
|
|
|
|
#endif
|
|
|
|
};
|
2020-01-22 08:56:17 +08:00
|
|
|
|
2020-01-22 08:56:15 +08:00
|
|
|
static struct inet_protosw mptcp_protosw = {
|
|
|
|
.type = SOCK_STREAM,
|
|
|
|
.protocol = IPPROTO_MPTCP,
|
|
|
|
.prot = &mptcp_prot,
|
2020-01-22 08:56:17 +08:00
|
|
|
.ops = &mptcp_stream_ops,
|
|
|
|
.flags = INET_PROTOSW_ICSK,
|
2020-01-22 08:56:15 +08:00
|
|
|
};
|
|
|
|
|
2020-01-22 08:56:28 +08:00
|
|
|
void mptcp_proto_init(void)
|
2020-01-22 08:56:15 +08:00
|
|
|
{
|
2020-01-22 08:56:17 +08:00
|
|
|
mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo;
|
|
|
|
|
|
|
|
mptcp_subflow_init();
|
|
|
|
|
2020-01-22 08:56:15 +08:00
|
|
|
if (proto_register(&mptcp_prot, 1) != 0)
|
|
|
|
panic("Failed to register MPTCP proto.\n");
|
|
|
|
|
|
|
|
inet_register_protosw(&mptcp_protosw);
|
|
|
|
}
|
|
|
|
|
|
|
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
2020-01-25 08:04:02 +08:00
|
|
|
static const struct proto_ops mptcp_v6_stream_ops = {
|
|
|
|
.family = PF_INET6,
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.release = inet6_release,
|
|
|
|
.bind = mptcp_bind,
|
|
|
|
.connect = mptcp_stream_connect,
|
|
|
|
.socketpair = sock_no_socketpair,
|
|
|
|
.accept = mptcp_stream_accept,
|
|
|
|
.getname = mptcp_v6_getname,
|
|
|
|
.poll = mptcp_poll,
|
|
|
|
.ioctl = inet6_ioctl,
|
|
|
|
.gettstamp = sock_gettstamp,
|
|
|
|
.listen = mptcp_listen,
|
|
|
|
.shutdown = mptcp_shutdown,
|
|
|
|
.setsockopt = sock_common_setsockopt,
|
|
|
|
.getsockopt = sock_common_getsockopt,
|
|
|
|
.sendmsg = inet6_sendmsg,
|
|
|
|
.recvmsg = inet6_recvmsg,
|
|
|
|
.mmap = sock_no_mmap,
|
|
|
|
.sendpage = inet_sendpage,
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
.compat_setsockopt = compat_sock_common_setsockopt,
|
|
|
|
.compat_getsockopt = compat_sock_common_getsockopt,
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
2020-01-22 08:56:15 +08:00
|
|
|
static struct proto mptcp_v6_prot;
|
|
|
|
|
2020-01-22 08:56:20 +08:00
|
|
|
static void mptcp_v6_destroy(struct sock *sk)
|
|
|
|
{
|
|
|
|
mptcp_destroy(sk);
|
|
|
|
inet6_destroy_sock(sk);
|
|
|
|
}
|
|
|
|
|
2020-01-22 08:56:15 +08:00
|
|
|
static struct inet_protosw mptcp_v6_protosw = {
|
|
|
|
.type = SOCK_STREAM,
|
|
|
|
.protocol = IPPROTO_MPTCP,
|
|
|
|
.prot = &mptcp_v6_prot,
|
2020-01-22 08:56:17 +08:00
|
|
|
.ops = &mptcp_v6_stream_ops,
|
2020-01-22 08:56:15 +08:00
|
|
|
.flags = INET_PROTOSW_ICSK,
|
|
|
|
};
|
|
|
|
|
2020-01-22 08:56:28 +08:00
|
|
|
int mptcp_proto_v6_init(void)
|
2020-01-22 08:56:15 +08:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
mptcp_v6_prot = mptcp_prot;
|
|
|
|
strcpy(mptcp_v6_prot.name, "MPTCPv6");
|
|
|
|
mptcp_v6_prot.slab = NULL;
|
2020-01-22 08:56:20 +08:00
|
|
|
mptcp_v6_prot.destroy = mptcp_v6_destroy;
|
2020-01-22 08:56:15 +08:00
|
|
|
mptcp_v6_prot.obj_size = sizeof(struct mptcp_sock) +
|
|
|
|
sizeof(struct ipv6_pinfo);
|
|
|
|
|
|
|
|
err = proto_register(&mptcp_v6_prot, 1);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
err = inet6_register_protosw(&mptcp_v6_protosw);
|
|
|
|
if (err)
|
|
|
|
proto_unregister(&mptcp_v6_prot);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
#endif
|