Merge branch 'mptcp-fix-incoming-options-parsing'

Paolo Abeni says:

====================
mptcp: fix incoming options parsing

This series addresses a serious issue in MPTCP option parsing.

This is bigger than the usual -net change, but I was unable to find a
working, sane, smaller fix.

The core change is inside patch 2/5 which moved MPTCP options parsing from
the TCP code inside existing MPTCP hooks and clean MPTCP options status on
each processed packet.

The patch 1/5 is a needed pre-requisite, and patches 3,4,5 are smaller,
related fixes.

v1 -> v2:
 - cleaned-up patch 1/5
 - rebased on top of current -net
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-04-30 12:23:23 -07:00
commit 8c75595360
7 changed files with 138 additions and 149 deletions

View File

@ -78,47 +78,6 @@ struct tcp_sack_block {
#define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */
#define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/
#if IS_ENABLED(CONFIG_MPTCP)
struct mptcp_options_received {
u64 sndr_key;
u64 rcvr_key;
u64 data_ack;
u64 data_seq;
u32 subflow_seq;
u16 data_len;
u16 mp_capable : 1,
mp_join : 1,
dss : 1,
add_addr : 1,
rm_addr : 1,
family : 4,
echo : 1,
backup : 1;
u32 token;
u32 nonce;
u64 thmac;
u8 hmac[20];
u8 join_id;
u8 use_map:1,
dsn64:1,
data_fin:1,
use_ack:1,
ack64:1,
mpc_map:1,
__unused:2;
u8 addr_id;
u8 rm_id;
union {
struct in_addr addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
struct in6_addr addr6;
#endif
};
u64 ahmac;
u16 port;
};
#endif
struct tcp_options_received {
/* PAWS/RTTM data */
int ts_recent_stamp;/* Time we stored ts_recent (for aging) */
@ -136,9 +95,6 @@ struct tcp_options_received {
u8 num_sacks; /* Number of SACK blocks */
u16 user_mss; /* mss requested by user in ioctl */
u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
#if IS_ENABLED(CONFIG_MPTCP)
struct mptcp_options_received mptcp;
#endif
};
static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
@ -148,13 +104,6 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
#if IS_ENABLED(CONFIG_SMC)
rx_opt->smc_ok = 0;
#endif
#if IS_ENABLED(CONFIG_MPTCP)
rx_opt->mptcp.mp_capable = 0;
rx_opt->mptcp.mp_join = 0;
rx_opt->mptcp.add_addr = 0;
rx_opt->mptcp.rm_addr = 0;
rx_opt->mptcp.dss = 0;
#endif
}
/* This is the max number of SACKS that we'll generate and process. It's safe

View File

@ -68,11 +68,8 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
return tcp_rsk(req)->is_mptcp;
}
void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
int opsize, struct tcp_options_received *opt_rx);
bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
unsigned int *size, struct mptcp_out_options *opts);
void mptcp_rcv_synsent(struct sock *sk);
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
struct mptcp_out_options *opts);
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,

View File

@ -3926,10 +3926,6 @@ void tcp_parse_options(const struct net *net,
*/
break;
#endif
case TCPOPT_MPTCP:
mptcp_parse_option(skb, ptr, opsize, opt_rx);
break;
case TCPOPT_FASTOPEN:
tcp_parse_fastopen_option(
opsize - TCPOLEN_FASTOPEN_BASE,
@ -5990,9 +5986,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
if (sk_is_mptcp(sk))
mptcp_rcv_synsent(sk);
/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */

View File

@ -16,10 +16,10 @@ static bool mptcp_cap_flag_sha256(u8 flags)
return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
}
void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
int opsize, struct tcp_options_received *opt_rx)
static void mptcp_parse_option(const struct sk_buff *skb,
const unsigned char *ptr, int opsize,
struct mptcp_options_received *mp_opt)
{
struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
u8 subtype = *ptr >> 4;
int expected_opsize;
u8 version;
@ -283,12 +283,20 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
}
void mptcp_get_options(const struct sk_buff *skb,
struct tcp_options_received *opt_rx)
struct mptcp_options_received *mp_opt)
{
const unsigned char *ptr;
const struct tcphdr *th = tcp_hdr(skb);
int length = (th->doff * 4) - sizeof(struct tcphdr);
const unsigned char *ptr;
int length;
/* initialize option status */
mp_opt->mp_capable = 0;
mp_opt->mp_join = 0;
mp_opt->add_addr = 0;
mp_opt->rm_addr = 0;
mp_opt->dss = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
while (length > 0) {
@ -308,7 +316,7 @@ void mptcp_get_options(const struct sk_buff *skb,
if (opsize > length)
return; /* don't parse partial options */
if (opcode == TCPOPT_MPTCP)
mptcp_parse_option(skb, ptr, opsize, opt_rx);
mptcp_parse_option(skb, ptr, opsize, mp_opt);
ptr += opsize - 2;
length -= opsize;
}
@ -344,28 +352,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
return false;
}
void mptcp_rcv_synsent(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct tcp_sock *tp = tcp_sk(sk);
if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
pr_debug("subflow=%p, remote_key=%llu", subflow,
subflow->remote_key);
} else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) {
subflow->mp_join = 1;
subflow->thmac = tp->rx_opt.mptcp.thmac;
subflow->remote_nonce = tp->rx_opt.mptcp.nonce;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
subflow->thmac, subflow->remote_nonce);
} else if (subflow->request_mptcp) {
tcp_sk(sk)->is_mptcp = 0;
}
}
/* MP_JOIN client subflow must wait for 4th ack before sending any data:
* TCP can't schedule delack timer before the subflow is fully established.
* MPTCP uses the delack timer to do 3rd ack retransmissions
@ -709,7 +695,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
return subflow->mp_capable;
if (mp_opt->use_ack) {
if (mp_opt->dss && mp_opt->use_ack) {
/* subflows are fully established as soon as we get any
* additional ack.
*/
@ -717,8 +703,6 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
goto fully_established;
}
WARN_ON_ONCE(subflow->can_ack);
/* If the first established packet does not contain MP_CAPABLE + data
* then fallback to TCP
*/
@ -728,6 +712,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
return false;
}
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
subflow->fully_established = 1;
subflow->remote_key = mp_opt->sndr_key;
subflow->can_ack = 1;
@ -819,41 +805,41 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_options_received *mp_opt;
struct mptcp_options_received mp_opt;
struct mptcp_ext *mpext;
mp_opt = &opt_rx->mptcp;
if (!check_fully_established(msk, sk, subflow, skb, mp_opt))
mptcp_get_options(skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) {
if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
struct mptcp_addr_info addr;
addr.port = htons(mp_opt->port);
addr.id = mp_opt->addr_id;
if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
addr.port = htons(mp_opt.port);
addr.id = mp_opt.addr_id;
if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) {
addr.family = AF_INET;
addr.addr = mp_opt->addr;
addr.addr = mp_opt.addr;
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
else if (mp_opt->family == MPTCP_ADDR_IPVERSION_6) {
else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) {
addr.family = AF_INET6;
addr.addr6 = mp_opt->addr6;
addr.addr6 = mp_opt.addr6;
}
#endif
if (!mp_opt->echo)
if (!mp_opt.echo)
mptcp_pm_add_addr_received(msk, &addr);
mp_opt->add_addr = 0;
mp_opt.add_addr = 0;
}
if (!mp_opt->dss)
if (!mp_opt.dss)
return;
/* we can't wait for recvmsg() to update the ack_seq, otherwise
* monodirectional flows will stuck
*/
if (mp_opt->use_ack)
update_una(msk, mp_opt);
if (mp_opt.use_ack)
update_una(msk, &mp_opt);
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (!mpext)
@ -861,8 +847,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
memset(mpext, 0, sizeof(*mpext));
if (mp_opt->use_map) {
if (mp_opt->mpc_map) {
if (mp_opt.use_map) {
if (mp_opt.mpc_map) {
/* this is an MP_CAPABLE carrying MPTCP data
* we know this map the first chunk of data
*/
@ -872,13 +858,14 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
mpext->subflow_seq = 1;
mpext->dsn64 = 1;
mpext->mpc_map = 1;
mpext->data_fin = 0;
} else {
mpext->data_seq = mp_opt->data_seq;
mpext->subflow_seq = mp_opt->subflow_seq;
mpext->dsn64 = mp_opt->dsn64;
mpext->data_fin = mp_opt->data_fin;
mpext->data_seq = mp_opt.data_seq;
mpext->subflow_seq = mp_opt.subflow_seq;
mpext->dsn64 = mp_opt.dsn64;
mpext->data_fin = mp_opt.data_fin;
}
mpext->data_len = mp_opt->data_len;
mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
}
}

View File

@ -1334,7 +1334,7 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
#endif
struct sock *mptcp_sk_clone(const struct sock *sk,
const struct tcp_options_received *opt_rx,
const struct mptcp_options_received *mp_opt,
struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
@ -1373,9 +1373,9 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->write_seq = subflow_req->idsn + 1;
atomic64_set(&msk->snd_una, msk->write_seq);
if (opt_rx->mptcp.mp_capable) {
if (mp_opt->mp_capable) {
msk->can_ack = true;
msk->remote_key = opt_rx->mptcp.sndr_key;
msk->remote_key = mp_opt->sndr_key;
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
ack_seq++;
msk->ack_seq = ack_seq;

View File

@ -91,6 +91,45 @@
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
struct mptcp_options_received {
u64 sndr_key;
u64 rcvr_key;
u64 data_ack;
u64 data_seq;
u32 subflow_seq;
u16 data_len;
u16 mp_capable : 1,
mp_join : 1,
dss : 1,
add_addr : 1,
rm_addr : 1,
family : 4,
echo : 1,
backup : 1;
u32 token;
u32 nonce;
u64 thmac;
u8 hmac[20];
u8 join_id;
u8 use_map:1,
dsn64:1,
data_fin:1,
use_ack:1,
ack64:1,
mpc_map:1,
__unused:2;
u8 addr_id;
u8 rm_id;
union {
struct in_addr addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
struct in6_addr addr6;
#endif
};
u64 ahmac;
u16 port;
};
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
{
return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
@ -331,10 +370,10 @@ int mptcp_proto_v6_init(void);
#endif
struct sock *mptcp_sk_clone(const struct sock *sk,
const struct tcp_options_received *opt_rx,
const struct mptcp_options_received *mp_opt,
struct request_sock *req);
void mptcp_get_options(const struct sk_buff *skb,
struct tcp_options_received *opt_rx);
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);

View File

@ -124,12 +124,11 @@ static void subflow_init_req(struct request_sock *req,
{
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct tcp_options_received rx_opt;
struct mptcp_options_received mp_opt;
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp));
mptcp_get_options(skb, &rx_opt);
mptcp_get_options(skb, &mp_opt);
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
@ -142,16 +141,16 @@ static void subflow_init_req(struct request_sock *req,
return;
#endif
if (rx_opt.mptcp.mp_capable) {
if (mp_opt.mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
if (rx_opt.mptcp.mp_join)
if (mp_opt.mp_join)
return;
} else if (rx_opt.mptcp.mp_join) {
} else if (mp_opt.mp_join) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
}
if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
if (mp_opt.mp_capable && listener->request_mptcp) {
int err;
err = mptcp_token_new_request(req);
@ -159,13 +158,13 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->mp_capable = 1;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
} else if (rx_opt.mptcp.mp_join && listener->request_mptcp) {
} else if (mp_opt.mp_join && listener->request_mptcp) {
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
subflow_req->mp_join = 1;
subflow_req->backup = rx_opt.mptcp.backup;
subflow_req->remote_id = rx_opt.mptcp.join_id;
subflow_req->token = rx_opt.mptcp.token;
subflow_req->remote_nonce = rx_opt.mptcp.nonce;
subflow_req->backup = mp_opt.backup;
subflow_req->remote_id = mp_opt.join_id;
subflow_req->token = mp_opt.token;
subflow_req->remote_nonce = mp_opt.nonce;
pr_debug("token=%u, remote_nonce=%u", subflow_req->token,
subflow_req->remote_nonce);
if (!subflow_token_join_request(req, skb)) {
@ -221,7 +220,9 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn;
struct tcp_sock *tp = tcp_sk(sk);
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
@ -230,14 +231,36 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
parent->sk_state_change(parent);
}
if (subflow->conn_finished || !tcp_sk(sk)->is_mptcp)
/* be sure no special action on any packet other than syn-ack */
if (subflow->conn_finished)
return;
subflow->conn_finished = 1;
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp && mp_opt.mp_capable) {
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;
pr_debug("subflow=%p, remote_key=%llu", subflow,
subflow->remote_key);
} else if (subflow->request_join && mp_opt.mp_join) {
subflow->mp_join = 1;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
subflow->thmac, subflow->remote_nonce);
} else if (subflow->request_mptcp) {
tp->is_mptcp = 0;
}
if (!tp->is_mptcp)
return;
if (subflow->mp_capable) {
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
subflow->remote_key);
mptcp_finish_connect(sk);
subflow->conn_finished = 1;
if (skb) {
pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
@ -264,7 +287,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (!mptcp_finish_join(sk))
goto do_reset;
subflow->conn_finished = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
} else {
do_reset:
@ -322,7 +344,7 @@ drop:
/* validate hmac received in third ACK */
static bool subflow_hmac_valid(const struct request_sock *req,
const struct tcp_options_received *rx_opt)
const struct mptcp_options_received *mp_opt)
{
const struct mptcp_subflow_request_sock *subflow_req;
u8 hmac[MPTCPOPT_HMAC_LEN];
@ -339,7 +361,7 @@ static bool subflow_hmac_valid(const struct request_sock *req,
subflow_req->local_nonce, hmac);
ret = true;
if (crypto_memneq(hmac, rx_opt->mptcp.hmac, sizeof(hmac)))
if (crypto_memneq(hmac, mp_opt->hmac, sizeof(hmac)))
ret = false;
sock_put((struct sock *)msk);
@ -395,7 +417,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
{
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
struct mptcp_subflow_request_sock *subflow_req;
struct tcp_options_received opt_rx;
struct mptcp_options_received mp_opt;
bool fallback_is_fatal = false;
struct sock *new_msk = NULL;
bool fallback = false;
@ -403,7 +425,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
opt_rx.mptcp.mp_capable = 0;
/* we need later a valid 'mp_capable' value even when options are not
* parsed
*/
mp_opt.mp_capable = 0;
if (tcp_rsk(req)->is_mptcp == 0)
goto create_child;
@ -418,22 +443,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
goto create_msk;
}
mptcp_get_options(skb, &opt_rx);
if (!opt_rx.mptcp.mp_capable) {
mptcp_get_options(skb, &mp_opt);
if (!mp_opt.mp_capable) {
fallback = true;
goto create_child;
}
create_msk:
new_msk = mptcp_sk_clone(listener->conn, &opt_rx, req);
new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
fallback_is_fatal = true;
opt_rx.mptcp.mp_join = 0;
mptcp_get_options(skb, &opt_rx);
if (!opt_rx.mptcp.mp_join ||
!subflow_hmac_valid(req, &opt_rx)) {
mptcp_get_options(skb, &mp_opt);
if (!mp_opt.mp_join ||
!subflow_hmac_valid(req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
return NULL;
}
@ -473,9 +497,9 @@ create_child:
/* with OoO packets we can reach here without ingress
* mpc option
*/
ctx->remote_key = opt_rx.mptcp.sndr_key;
ctx->fully_established = opt_rx.mptcp.mp_capable;
ctx->can_ack = opt_rx.mptcp.mp_capable;
ctx->remote_key = mp_opt.sndr_key;
ctx->fully_established = mp_opt.mp_capable;
ctx->can_ack = mp_opt.mp_capable;
} else if (ctx->mp_join) {
struct mptcp_sock *owner;