Merge branch 'mptcp-MP_FAIL-timeout'
Mat Martineau says: ==================== mptcp: Timeout for MP_FAIL response When one peer sends an infinite mapping to coordinate fallback from MPTCP to regular TCP, the other peer is expected to send a packet with the MPTCP MP_FAIL option to acknowledge the infinite mapping. Rather than leave the connection in some half-fallback state, this series adds a timeout after which the infinite mapping sender will reset the connection. Patch 1 adds a fallback self test. Patches 2-5 make use of the MPTCP socket's retransmit timer to reset the MPTCP connection if no MP_FAIL was received. Patches 6 and 7 extends the self test to check MP_FAIL-related MIBs. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
124de27101
|
@ -287,11 +287,27 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
|
|||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
|
||||
struct sock *s = (struct sock *)msk;
|
||||
|
||||
pr_debug("fail_seq=%llu", fail_seq);
|
||||
|
||||
if (!mptcp_has_another_subflow(sk) && READ_ONCE(msk->allow_infinite_fallback))
|
||||
if (mptcp_has_another_subflow(sk) || !READ_ONCE(msk->allow_infinite_fallback))
|
||||
return;
|
||||
|
||||
if (!READ_ONCE(subflow->mp_fail_response_expect)) {
|
||||
pr_debug("send MP_FAIL response and infinite map");
|
||||
|
||||
subflow->send_mp_fail = 1;
|
||||
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
|
||||
subflow->send_infinite_map = 1;
|
||||
} else if (s && inet_sk_state_load(s) != TCP_CLOSE) {
|
||||
pr_debug("MP_FAIL response received");
|
||||
|
||||
mptcp_data_lock(s);
|
||||
if (inet_sk_state_load(s) != TCP_CLOSE)
|
||||
sk_stop_timer(s, &s->sk_timer);
|
||||
mptcp_data_unlock(s);
|
||||
}
|
||||
}
|
||||
|
||||
/* path manager helpers */
|
||||
|
|
|
@ -1605,8 +1605,10 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
|
|||
|
||||
out:
|
||||
/* ensure the rtx timer is running */
|
||||
mptcp_data_lock(sk);
|
||||
if (!mptcp_timer_pending(sk))
|
||||
mptcp_reset_timer(sk);
|
||||
mptcp_data_unlock(sk);
|
||||
if (copied)
|
||||
__mptcp_check_send_data_fin(sk);
|
||||
}
|
||||
|
@ -2167,10 +2169,38 @@ static void mptcp_retransmit_timer(struct timer_list *t)
|
|||
sock_put(sk);
|
||||
}
|
||||
|
||||
static struct mptcp_subflow_context *
|
||||
mp_fail_response_expect_subflow(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow, *ret = NULL;
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
if (READ_ONCE(subflow->mp_fail_response_expect)) {
|
||||
ret = subflow;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void mptcp_check_mp_fail_response(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
||||
bh_lock_sock(sk);
|
||||
subflow = mp_fail_response_expect_subflow(msk);
|
||||
if (subflow)
|
||||
__set_bit(MPTCP_FAIL_NO_RESPONSE, &msk->flags);
|
||||
bh_unlock_sock(sk);
|
||||
}
|
||||
|
||||
static void mptcp_timeout_timer(struct timer_list *t)
|
||||
{
|
||||
struct sock *sk = from_timer(sk, t, sk_timer);
|
||||
|
||||
mptcp_check_mp_fail_response(mptcp_sk(sk));
|
||||
mptcp_schedule_work(sk);
|
||||
sock_put(sk);
|
||||
}
|
||||
|
@ -2491,8 +2521,27 @@ static void __mptcp_retrans(struct sock *sk)
|
|||
reset_timer:
|
||||
mptcp_check_and_set_pending(sk);
|
||||
|
||||
mptcp_data_lock(sk);
|
||||
if (!mptcp_timer_pending(sk))
|
||||
mptcp_reset_timer(sk);
|
||||
mptcp_data_unlock(sk);
|
||||
}
|
||||
|
||||
static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *ssk;
|
||||
bool slow;
|
||||
|
||||
subflow = mp_fail_response_expect_subflow(msk);
|
||||
if (subflow) {
|
||||
pr_debug("MP_FAIL doesn't respond, reset the subflow");
|
||||
|
||||
ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
slow = lock_sock_fast(ssk);
|
||||
mptcp_subflow_reset(ssk);
|
||||
unlock_sock_fast(ssk, slow);
|
||||
}
|
||||
}
|
||||
|
||||
static void mptcp_worker(struct work_struct *work)
|
||||
|
@ -2535,6 +2584,9 @@ static void mptcp_worker(struct work_struct *work)
|
|||
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
|
||||
__mptcp_retrans(sk);
|
||||
|
||||
if (test_and_clear_bit(MPTCP_FAIL_NO_RESPONSE, &msk->flags))
|
||||
mptcp_mp_fail_no_response(msk);
|
||||
|
||||
unlock:
|
||||
release_sock(sk);
|
||||
sock_put(sk);
|
||||
|
@ -2651,8 +2703,10 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
|
|||
} else {
|
||||
pr_debug("Sending DATA_FIN on subflow %p", ssk);
|
||||
tcp_send_ack(ssk);
|
||||
mptcp_data_lock(sk);
|
||||
if (!mptcp_timer_pending(sk))
|
||||
mptcp_reset_timer(sk);
|
||||
mptcp_data_unlock(sk);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2753,8 +2807,10 @@ static void __mptcp_destroy_sock(struct sock *sk)
|
|||
/* join list will be eventually flushed (with rst) at sock lock release time*/
|
||||
list_splice_init(&msk->conn_list, &conn_list);
|
||||
|
||||
sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
|
||||
mptcp_data_lock(sk);
|
||||
mptcp_stop_timer(sk);
|
||||
sk_stop_timer(sk, &sk->sk_timer);
|
||||
mptcp_data_unlock(sk);
|
||||
msk->pm.status = 0;
|
||||
|
||||
/* clears msk->subflow, allowing the following loop to close
|
||||
|
@ -2816,7 +2872,9 @@ cleanup:
|
|||
__mptcp_destroy_sock(sk);
|
||||
do_cancel_work = true;
|
||||
} else {
|
||||
mptcp_data_lock(sk);
|
||||
sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN);
|
||||
mptcp_data_unlock(sk);
|
||||
}
|
||||
release_sock(sk);
|
||||
if (do_cancel_work)
|
||||
|
@ -2861,8 +2919,10 @@ static int mptcp_disconnect(struct sock *sk, int flags)
|
|||
__mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE);
|
||||
}
|
||||
|
||||
sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
|
||||
mptcp_data_lock(sk);
|
||||
mptcp_stop_timer(sk);
|
||||
sk_stop_timer(sk, &sk->sk_timer);
|
||||
mptcp_data_unlock(sk);
|
||||
|
||||
if (mptcp_sk(sk)->token)
|
||||
mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
|
||||
|
|
|
@ -116,6 +116,7 @@
|
|||
#define MPTCP_WORK_EOF 3
|
||||
#define MPTCP_FALLBACK_DONE 4
|
||||
#define MPTCP_WORK_CLOSE_SUBFLOW 5
|
||||
#define MPTCP_FAIL_NO_RESPONSE 6
|
||||
|
||||
/* MPTCP socket release cb flags */
|
||||
#define MPTCP_PUSH_PENDING 1
|
||||
|
@ -448,6 +449,7 @@ struct mptcp_subflow_context {
|
|||
stale : 1, /* unable to snd/rcv data, do not use for xmit */
|
||||
local_id_valid : 1; /* local_id is correctly initialized */
|
||||
enum mptcp_data_avail data_avail;
|
||||
bool mp_fail_response_expect;
|
||||
u32 remote_nonce;
|
||||
u64 thmac;
|
||||
u32 local_nonce;
|
||||
|
|
|
@ -968,6 +968,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
|
|||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
|
||||
bool csum_reqd = READ_ONCE(msk->csum_enabled);
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
struct mptcp_ext *mpext;
|
||||
struct sk_buff *skb;
|
||||
u16 data_len;
|
||||
|
@ -1009,6 +1010,12 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
|
|||
pr_debug("infinite mapping received");
|
||||
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
|
||||
subflow->map_data_len = 0;
|
||||
if (sk && inet_sk_state_load(sk) != TCP_CLOSE) {
|
||||
mptcp_data_lock(sk);
|
||||
if (inet_sk_state_load(sk) != TCP_CLOSE)
|
||||
sk_stop_timer(sk, &sk->sk_timer);
|
||||
mptcp_data_unlock(sk);
|
||||
}
|
||||
return MAPPING_INVALID;
|
||||
}
|
||||
|
||||
|
@ -1217,6 +1224,12 @@ fallback:
|
|||
tcp_send_active_reset(ssk, GFP_ATOMIC);
|
||||
while ((skb = skb_peek(&ssk->sk_receive_queue)))
|
||||
sk_eat_skb(ssk, skb);
|
||||
} else {
|
||||
WRITE_ONCE(subflow->mp_fail_response_expect, true);
|
||||
/* The data lock is acquired in __mptcp_move_skbs() */
|
||||
sk_reset_timer((struct sock *)msk,
|
||||
&((struct sock *)msk)->sk_timer,
|
||||
jiffies + TCP_RTO_MAX);
|
||||
}
|
||||
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
|
||||
return true;
|
||||
|
|
|
@ -12,6 +12,9 @@ CONFIG_NF_TABLES=m
|
|||
CONFIG_NFT_COMPAT=m
|
||||
CONFIG_NETFILTER_XTABLES=m
|
||||
CONFIG_NETFILTER_XT_MATCH_BPF=m
|
||||
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
|
||||
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
|
||||
CONFIG_NETFILTER_XT_TARGET_MARK=m
|
||||
CONFIG_NF_TABLES_INET=y
|
||||
CONFIG_NFT_TPROXY=m
|
||||
CONFIG_NFT_SOCKET=m
|
||||
|
@ -19,3 +22,8 @@ CONFIG_IP_ADVANCED_ROUTER=y
|
|||
CONFIG_IP_MULTIPLE_TABLES=y
|
||||
CONFIG_IP_NF_TARGET_REJECT=m
|
||||
CONFIG_IPV6_MULTIPLE_TABLES=y
|
||||
CONFIG_NET_ACT_CSUM=m
|
||||
CONFIG_NET_ACT_PEDIT=m
|
||||
CONFIG_NET_CLS_ACT=y
|
||||
CONFIG_NET_CLS_FW=m
|
||||
CONFIG_NET_SCH_INGRESS=m
|
||||
|
|
|
@ -266,6 +266,58 @@ reset_with_allow_join_id0()
|
|||
ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
|
||||
}
|
||||
|
||||
# Modify TCP payload without corrupting the TCP packet
|
||||
#
|
||||
# This rule inverts a 8-bit word at byte offset 148 for the 2nd TCP ACK packets
|
||||
# carrying enough data.
|
||||
# Once it is done, the TCP Checksum field is updated so the packet is still
|
||||
# considered as valid at the TCP level.
|
||||
# Because the MPTCP checksum, covering the TCP options and data, has not been
|
||||
# updated, the modification will be detected and an MP_FAIL will be emitted:
|
||||
# what we want to validate here without corrupting "random" MPTCP options.
|
||||
#
|
||||
# To avoid having tc producing this pr_info() message for each TCP ACK packets
|
||||
# not carrying enough data:
|
||||
#
|
||||
# tc action pedit offset 162 out of bounds
|
||||
#
|
||||
# Netfilter is used to mark packets with enough data.
|
||||
reset_with_fail()
|
||||
{
|
||||
reset "${1}" || return 1
|
||||
|
||||
ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1
|
||||
ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1
|
||||
|
||||
check_invert=1
|
||||
validate_checksum=1
|
||||
local i="$2"
|
||||
local ip="${3:-4}"
|
||||
local tables
|
||||
|
||||
tables="iptables"
|
||||
if [ $ip -eq 6 ]; then
|
||||
tables="ip6tables"
|
||||
fi
|
||||
|
||||
ip netns exec $ns2 $tables \
|
||||
-t mangle \
|
||||
-A OUTPUT \
|
||||
-o ns2eth$i \
|
||||
-p tcp \
|
||||
-m length --length 150:9999 \
|
||||
-m statistic --mode nth --packet 1 --every 99999 \
|
||||
-j MARK --set-mark 42 || exit 1
|
||||
|
||||
tc -n $ns2 qdisc add dev ns2eth$i clsact || exit 1
|
||||
tc -n $ns2 filter add dev ns2eth$i egress \
|
||||
protocol ip prio 1000 \
|
||||
handle 42 fw \
|
||||
action pedit munge offset 148 u8 invert \
|
||||
pipe csum tcp \
|
||||
index 100 || exit 1
|
||||
}
|
||||
|
||||
fail_test()
|
||||
{
|
||||
ret=1
|
||||
|
@ -961,6 +1013,7 @@ chk_csum_nr()
|
|||
local csum_ns2=${2:-0}
|
||||
local count
|
||||
local dump_stats
|
||||
local extra_msg=""
|
||||
local allow_multi_errors_ns1=0
|
||||
local allow_multi_errors_ns2=0
|
||||
|
||||
|
@ -976,6 +1029,9 @@ chk_csum_nr()
|
|||
printf "%-${nr_blank}s %s" " " "sum"
|
||||
count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
|
||||
[ -z "$count" ] && count=0
|
||||
if [ "$count" != "$csum_ns1" ]; then
|
||||
extra_msg="$extra_msg ns1=$count"
|
||||
fi
|
||||
if { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
|
||||
{ [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
|
||||
echo "[fail] got $count data checksum error[s] expected $csum_ns1"
|
||||
|
@ -987,28 +1043,58 @@ chk_csum_nr()
|
|||
echo -n " - csum "
|
||||
count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
|
||||
[ -z "$count" ] && count=0
|
||||
if [ "$count" != "$csum_ns2" ]; then
|
||||
extra_msg="$extra_msg ns2=$count"
|
||||
fi
|
||||
if { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
|
||||
{ [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
|
||||
echo "[fail] got $count data checksum error[s] expected $csum_ns2"
|
||||
fail_test
|
||||
dump_stats=1
|
||||
else
|
||||
echo "[ ok ]"
|
||||
echo -n "[ ok ]"
|
||||
fi
|
||||
[ "${dump_stats}" = 1 ] && dump_stats
|
||||
|
||||
echo "$extra_msg"
|
||||
}
|
||||
|
||||
chk_fail_nr()
|
||||
{
|
||||
local fail_tx=$1
|
||||
local fail_rx=$2
|
||||
local ns_invert=${3:-""}
|
||||
local count
|
||||
local dump_stats
|
||||
local ns_tx=$ns1
|
||||
local ns_rx=$ns2
|
||||
local extra_msg=""
|
||||
local allow_tx_lost=0
|
||||
local allow_rx_lost=0
|
||||
|
||||
if [[ $ns_invert = "invert" ]]; then
|
||||
ns_tx=$ns2
|
||||
ns_rx=$ns1
|
||||
extra_msg=" invert"
|
||||
fi
|
||||
|
||||
if [[ "${fail_tx}" = "-"* ]]; then
|
||||
allow_tx_lost=1
|
||||
fail_tx=${fail_tx:1}
|
||||
fi
|
||||
if [[ "${fail_rx}" = "-"* ]]; then
|
||||
allow_rx_lost=1
|
||||
fail_rx=${fail_rx:1}
|
||||
fi
|
||||
|
||||
printf "%-${nr_blank}s %s" " " "ftx"
|
||||
count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}')
|
||||
count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}')
|
||||
[ -z "$count" ] && count=0
|
||||
if [ "$count" != "$fail_tx" ]; then
|
||||
extra_msg="$extra_msg,tx=$count"
|
||||
fi
|
||||
if { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
|
||||
{ [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
|
||||
echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx"
|
||||
fail_test
|
||||
dump_stats=1
|
||||
|
@ -1017,17 +1103,23 @@ chk_fail_nr()
|
|||
fi
|
||||
|
||||
echo -n " - failrx"
|
||||
count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}')
|
||||
count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}')
|
||||
[ -z "$count" ] && count=0
|
||||
if [ "$count" != "$fail_rx" ]; then
|
||||
extra_msg="$extra_msg,rx=$count"
|
||||
fi
|
||||
if { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
|
||||
{ [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
|
||||
echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx"
|
||||
fail_test
|
||||
dump_stats=1
|
||||
else
|
||||
echo "[ ok ]"
|
||||
echo -n "[ ok ]"
|
||||
fi
|
||||
|
||||
[ "${dump_stats}" = 1 ] && dump_stats
|
||||
|
||||
echo "$extra_msg"
|
||||
}
|
||||
|
||||
chk_fclose_nr()
|
||||
|
@ -1199,7 +1291,7 @@ chk_join_nr()
|
|||
echo "[ ok ]"
|
||||
fi
|
||||
[ "${dump_stats}" = 1 ] && dump_stats
|
||||
if [ $checksum -eq 1 ]; then
|
||||
if [ $validate_checksum -eq 1 ]; then
|
||||
chk_csum_nr $csum_ns1 $csum_ns2
|
||||
chk_fail_nr $fail_nr $fail_nr
|
||||
chk_rst_nr $rst_nr $rst_nr
|
||||
|
@ -2590,6 +2682,22 @@ fastclose_tests()
|
|||
fi
|
||||
}
|
||||
|
||||
pedit_action_pkts()
|
||||
{
|
||||
tc -n $ns2 -j -s action show action pedit index 100 | \
|
||||
sed 's/.*"packets":\([0-9]\+\),.*/\1/'
|
||||
}
|
||||
|
||||
fail_tests()
|
||||
{
|
||||
# single subflow
|
||||
if reset_with_fail "Infinite map" 1; then
|
||||
run_tests $ns1 $ns2 10.0.1.1 128
|
||||
chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
|
||||
chk_fail_nr 1 -1 invert
|
||||
fi
|
||||
}
|
||||
|
||||
implicit_tests()
|
||||
{
|
||||
# userspace pm type prevents add_addr
|
||||
|
@ -2658,6 +2766,7 @@ all_tests_sorted=(
|
|||
d@deny_join_id0_tests
|
||||
m@fullmesh_tests
|
||||
z@fastclose_tests
|
||||
F@fail_tests
|
||||
I@implicit_tests
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue