tcp: remove early retransmit
This patch removes the support of RFC5827 early retransmit (i.e., fast recovery on small inflight with <3 dupacks) because it is subsumed by the new RACK loss detection. More specifically when RACK receives DUPACKs, it'll arm a reordering timer to start fast recovery after a quarter of (min)RTT, hence it covers the early retransmit except RACK does not limit itself to specific inflight or dupack numbers. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
840a3cbe89
commit
bec41a11dd
|
@ -246,21 +246,12 @@ tcp_dsack - BOOLEAN
|
|||
Allows TCP to send "duplicate" SACKs.
|
||||
|
||||
tcp_early_retrans - INTEGER
|
||||
Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold
|
||||
for triggering fast retransmit when the amount of outstanding data is
|
||||
small and when no previously unsent data can be transmitted (such
|
||||
that limited transmit could be used). Also controls the use of
|
||||
Tail loss probe (TLP) that converts RTOs occurring due to tail
|
||||
losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01).
|
||||
Tail loss probe (TLP) converts RTOs occurring due to tail
|
||||
losses into fast recovery (draft-ietf-tcpm-rack). Note that
|
||||
TLP requires RACK to function properly (see tcp_recovery below)
|
||||
Possible values:
|
||||
0 disables ER
|
||||
1 enables ER
|
||||
2 enables ER but delays fast recovery and fast retransmit
|
||||
by a fourth of RTT. This mitigates connection falsely
|
||||
recovers when network has a small degree of reordering
|
||||
(less than 3 packets).
|
||||
3 enables delayed ER and TLP.
|
||||
4 enables TLP only.
|
||||
0 disables TLP
|
||||
3 or 4 enables TLP
|
||||
Default: 3
|
||||
|
||||
tcp_ecn - INTEGER
|
||||
|
|
|
@ -224,8 +224,7 @@ struct tcp_sock {
|
|||
repair : 1,
|
||||
frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */
|
||||
u8 repair_queue;
|
||||
u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */
|
||||
syn_data:1, /* SYN includes data */
|
||||
u8 syn_data:1, /* SYN includes data */
|
||||
syn_fastopen:1, /* SYN includes Fast Open option */
|
||||
syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
|
||||
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
|
||||
|
|
|
@ -565,7 +565,6 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
|
|||
const struct sk_buff *next_skb);
|
||||
|
||||
/* tcp_input.c */
|
||||
void tcp_resume_early_retransmit(struct sock *sk);
|
||||
void tcp_rearm_rto(struct sock *sk);
|
||||
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
|
||||
void tcp_reset(struct sock *sk);
|
||||
|
@ -1037,24 +1036,6 @@ static inline void tcp_enable_fack(struct tcp_sock *tp)
|
|||
tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
|
||||
}
|
||||
|
||||
/* TCP early-retransmit (ER) is similar to but more conservative than
|
||||
* the thin-dupack feature. Enable ER only if thin-dupack is disabled.
|
||||
*/
|
||||
static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
|
||||
{
|
||||
struct net *net = sock_net((struct sock *)tp);
|
||||
|
||||
tp->do_early_retrans = sysctl_tcp_early_retrans &&
|
||||
sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
|
||||
!(sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) &&
|
||||
net->ipv4.sysctl_tcp_reordering == 3;
|
||||
}
|
||||
|
||||
static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
|
||||
{
|
||||
tp->do_early_retrans = 0;
|
||||
}
|
||||
|
||||
static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
|
||||
{
|
||||
return tp->sacked_out + tp->lost_out;
|
||||
|
|
|
@ -215,7 +215,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
|
|||
}
|
||||
|
||||
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
|
||||
icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
|
||||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
|
||||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
|
||||
r->idiag_timer = 1;
|
||||
|
|
|
@ -406,7 +406,6 @@ void tcp_init_sock(struct sock *sk)
|
|||
tp->mss_cache = TCP_MSS_DEFAULT;
|
||||
|
||||
tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
|
||||
tcp_enable_early_retrans(tp);
|
||||
tcp_assign_congestion_control(sk);
|
||||
|
||||
tp->tsoffset = 0;
|
||||
|
@ -2477,8 +2476,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
|
|||
err = -EINVAL;
|
||||
else {
|
||||
tp->thin_dupack = val;
|
||||
if (tp->thin_dupack)
|
||||
tcp_disable_early_retrans(tp);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
@ -904,8 +904,6 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
|
|||
tcp_disable_fack(tp);
|
||||
}
|
||||
|
||||
if (metric > 0)
|
||||
tcp_disable_early_retrans(tp);
|
||||
tp->rack.reord = 1;
|
||||
}
|
||||
|
||||
|
@ -2054,30 +2052,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
|
|||
return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
|
||||
}
|
||||
|
||||
static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
unsigned long delay;
|
||||
|
||||
/* Delay early retransmit and entering fast recovery for
|
||||
* max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
|
||||
* available, or RTO is scheduled to fire first.
|
||||
*/
|
||||
if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
|
||||
(flag & FLAG_ECE) || !tp->srtt_us)
|
||||
return false;
|
||||
|
||||
delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
|
||||
msecs_to_jiffies(2));
|
||||
|
||||
if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
|
||||
return false;
|
||||
|
||||
inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
|
||||
TCP_RTO_MAX);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Linux NewReno/SACK/FACK/ECN state machine.
|
||||
* --------------------------------------
|
||||
*
|
||||
|
@ -2221,16 +2195,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
|
|||
tcp_is_sack(tp) && !tcp_send_head(sk))
|
||||
return true;
|
||||
|
||||
/* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious
|
||||
* retransmissions due to small network reorderings, we implement
|
||||
* Mitigation A.3 in the RFC and delay the retransmission for a short
|
||||
* interval if appropriate.
|
||||
*/
|
||||
if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
|
||||
(tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
|
||||
!tcp_may_send_now(sk))
|
||||
return !tcp_pause_early_retransmit(sk, flag);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -3050,8 +3014,7 @@ void tcp_rearm_rto(struct sock *sk)
|
|||
} else {
|
||||
u32 rto = inet_csk(sk)->icsk_rto;
|
||||
/* Offset the time elapsed after installing regular RTO */
|
||||
if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
|
||||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
|
||||
if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
|
||||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
|
||||
struct sk_buff *skb = tcp_write_queue_head(sk);
|
||||
const u32 rto_time_stamp =
|
||||
|
@ -3068,24 +3031,6 @@ void tcp_rearm_rto(struct sock *sk)
|
|||
}
|
||||
}
|
||||
|
||||
/* This function is called when the delayed ER timer fires. TCP enters
|
||||
* fast recovery and performs fast-retransmit.
|
||||
*/
|
||||
void tcp_resume_early_retransmit(struct sock *sk)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
tcp_rearm_rto(sk);
|
||||
|
||||
/* Stop if ER is disabled after the delayed ER timer is scheduled */
|
||||
if (!tp->do_early_retrans)
|
||||
return;
|
||||
|
||||
tcp_enter_recovery(sk, false);
|
||||
tcp_update_scoreboard(sk, 1);
|
||||
tcp_xmit_retransmit_queue(sk);
|
||||
}
|
||||
|
||||
/* If we get here, the whole TSO packet has not been acked. */
|
||||
static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
|
@ -3651,8 +3596,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
|||
|
||||
skb_mstamp_get(&sack_state.ack_time);
|
||||
|
||||
if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
|
||||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
|
||||
if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
|
||||
tcp_rearm_rto(sk);
|
||||
|
||||
if (after(ack, prior_snd_una)) {
|
||||
|
|
|
@ -2229,7 +2229,6 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
|
|||
int state;
|
||||
|
||||
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
|
||||
icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
|
||||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
|
||||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
|
||||
timer_active = 1;
|
||||
|
|
|
@ -522,7 +522,6 @@ void tcp_init_metrics(struct sock *sk)
|
|||
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
|
||||
if (val && tp->reordering != val) {
|
||||
tcp_disable_fack(tp);
|
||||
tcp_disable_early_retrans(tp);
|
||||
tp->reordering = val;
|
||||
}
|
||||
|
||||
|
|
|
@ -468,7 +468,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
|
|||
newtp->sacked_out = 0;
|
||||
newtp->fackets_out = 0;
|
||||
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
|
||||
tcp_enable_early_retrans(newtp);
|
||||
newtp->tlp_high_seq = 0;
|
||||
newtp->lsndtime = treq->snt_synack.stamp_jiffies;
|
||||
newsk->sk_txhash = treq->txhash;
|
||||
|
|
|
@ -76,10 +76,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
|
|||
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
|
||||
|
||||
tp->packets_out += tcp_skb_pcount(skb);
|
||||
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
|
||||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
|
||||
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
|
||||
tcp_rearm_rto(sk);
|
||||
}
|
||||
|
||||
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
|
||||
tcp_skb_pcount(skb));
|
||||
|
@ -2289,8 +2287,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
|
|||
u32 timeout, tlp_time_stamp, rto_time_stamp;
|
||||
u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
|
||||
|
||||
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
|
||||
return false;
|
||||
/* No consecutive loss probes. */
|
||||
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
|
||||
tcp_rearm_rto(sk);
|
||||
|
@ -2309,8 +2305,9 @@ bool tcp_schedule_loss_probe(struct sock *sk)
|
|||
/* Schedule a loss probe in 2*RTT for SACK capable connections
|
||||
* in Open state, that are either limited by cwnd or application.
|
||||
*/
|
||||
if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||
|
||||
!tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
|
||||
if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
|
||||
!tp->packets_out || !tcp_is_sack(tp) ||
|
||||
icsk->icsk_ca_state != TCP_CA_Open)
|
||||
return false;
|
||||
|
||||
if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
|
||||
|
|
|
@ -566,9 +566,6 @@ void tcp_write_timer_handler(struct sock *sk)
|
|||
case ICSK_TIME_REO_TIMEOUT:
|
||||
tcp_rack_reo_timeout(sk);
|
||||
break;
|
||||
case ICSK_TIME_EARLY_RETRANS:
|
||||
tcp_resume_early_retransmit(sk);
|
||||
break;
|
||||
case ICSK_TIME_LOSS_PROBE:
|
||||
tcp_send_loss_probe(sk);
|
||||
break;
|
||||
|
|
|
@ -1745,7 +1745,6 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
|
|||
srcp = ntohs(inet->inet_sport);
|
||||
|
||||
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
|
||||
icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
|
||||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
|
||||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
|
||||
timer_active = 1;
|
||||
|
|
Loading…
Reference in New Issue