Merge branch 'tcp-sack-compression-changes'
Eric Dumazet says: ==================== tcp: sack compression changes Patch series refines SACK compression. We had issues with missing SACK when TCP option space is tight. Uses hrtimer slack to improve performance. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
1b2e788490
|
@ -651,6 +651,14 @@ tcp_comp_sack_delay_ns - LONG INTEGER
|
|||
|
||||
Default : 1,000,000 ns (1 ms)
|
||||
|
||||
tcp_comp_sack_slack_ns - LONG INTEGER
|
||||
This sysctl control the slack used when arming the
|
||||
timer used by SACK compression. This gives extra time
|
||||
for small RTT flows, and reduces system overhead by allowing
|
||||
opportunistic reduction of timer interrupts.
|
||||
|
||||
Default : 100,000 ns (100 us)
|
||||
|
||||
tcp_comp_sack_nr - INTEGER
|
||||
Max number of SACK that can be compressed.
|
||||
Using 0 disables SACK compression.
|
||||
|
|
|
@ -268,6 +268,7 @@ struct tcp_sock {
|
|||
} rack;
|
||||
u16 advmss; /* Advertised MSS */
|
||||
u8 compressed_ack;
|
||||
u8 dup_ack_counter;
|
||||
u32 chrono_start; /* Start time in jiffies of a TCP chrono */
|
||||
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
|
||||
u8 chrono_type:2, /* current chronograph type */
|
||||
|
|
|
@ -173,6 +173,7 @@ struct netns_ipv4 {
|
|||
int sysctl_tcp_rmem[3];
|
||||
int sysctl_tcp_comp_sack_nr;
|
||||
unsigned long sysctl_tcp_comp_sack_delay_ns;
|
||||
unsigned long sysctl_tcp_comp_sack_slack_ns;
|
||||
struct inet_timewait_death_row tcp_death_row;
|
||||
int sysctl_max_syn_backlog;
|
||||
int sysctl_tcp_fastopen;
|
||||
|
|
|
@ -1329,6 +1329,13 @@ static struct ctl_table ipv4_net_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_comp_sack_slack_ns",
|
||||
.data = &init_net.ipv4.sysctl_tcp_comp_sack_slack_ns,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_comp_sack_nr",
|
||||
.data = &init_net.ipv4.sysctl_tcp_comp_sack_nr,
|
||||
|
|
|
@ -4327,6 +4327,33 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
|
|||
}
|
||||
}
|
||||
|
||||
static void tcp_sack_compress_send_ack(struct sock *sk)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if (!tp->compressed_ack)
|
||||
return;
|
||||
|
||||
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
|
||||
__sock_put(sk);
|
||||
|
||||
/* Since we have to send one ack finally,
|
||||
* substract one from tp->compressed_ack to keep
|
||||
* LINUX_MIB_TCPACKCOMPRESSED accurate.
|
||||
*/
|
||||
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
|
||||
tp->compressed_ack - 1);
|
||||
|
||||
tp->compressed_ack = 0;
|
||||
tcp_send_ack(sk);
|
||||
}
|
||||
|
||||
/* Reasonable amount of sack blocks included in TCP SACK option
|
||||
* The max is 4, but this becomes 3 if TCP timestamps are there.
|
||||
* Given that SACK packets might be lost, be conservative and use 2.
|
||||
*/
|
||||
#define TCP_SACK_BLOCKS_EXPECTED 2
|
||||
|
||||
static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
@ -4339,6 +4366,8 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
|
|||
|
||||
for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
|
||||
if (tcp_sack_extend(sp, seq, end_seq)) {
|
||||
if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
|
||||
tcp_sack_compress_send_ack(sk);
|
||||
/* Rotate this_sack to the first one. */
|
||||
for (; this_sack > 0; this_sack--, sp--)
|
||||
swap(*sp, *(sp - 1));
|
||||
|
@ -4348,6 +4377,9 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
|
|||
}
|
||||
}
|
||||
|
||||
if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
|
||||
tcp_sack_compress_send_ack(sk);
|
||||
|
||||
/* Could not find an adjacent existing SACK, build a new one,
|
||||
* put it at the front, and shift everyone else down. We
|
||||
* always know there is at least one SACK present already here.
|
||||
|
@ -4355,8 +4387,6 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
|
|||
* If the sack array is full, forget about the last one.
|
||||
*/
|
||||
if (this_sack >= TCP_NUM_SACKS) {
|
||||
if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
|
||||
tcp_send_ack(sk);
|
||||
this_sack--;
|
||||
tp->rx_opt.num_sacks--;
|
||||
sp--;
|
||||
|
@ -5275,15 +5305,13 @@ send_now:
|
|||
|
||||
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
|
||||
tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
|
||||
if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
|
||||
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
|
||||
tp->compressed_ack - TCP_FASTRETRANS_THRESH);
|
||||
tp->compressed_ack = 0;
|
||||
tp->dup_ack_counter = 0;
|
||||
}
|
||||
|
||||
if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
|
||||
if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
|
||||
tp->dup_ack_counter++;
|
||||
goto send_now;
|
||||
|
||||
}
|
||||
tp->compressed_ack++;
|
||||
if (hrtimer_is_queued(&tp->compressed_ack_timer))
|
||||
return;
|
||||
|
||||
|
@ -5296,8 +5324,9 @@ send_now:
|
|||
delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
|
||||
rtt * (NSEC_PER_USEC >> 3)/20);
|
||||
sock_hold(sk);
|
||||
hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
|
||||
HRTIMER_MODE_REL_PINNED_SOFT);
|
||||
hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
|
||||
sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
|
||||
HRTIMER_MODE_REL_PINNED_SOFT);
|
||||
}
|
||||
|
||||
static inline void tcp_ack_snd_check(struct sock *sk)
|
||||
|
|
|
@ -2780,6 +2780,7 @@ static int __net_init tcp_sk_init(struct net *net)
|
|||
sizeof(init_net.ipv4.sysctl_tcp_wmem));
|
||||
}
|
||||
net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
|
||||
net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
|
||||
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
|
||||
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
|
||||
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
|
||||
|
|
|
@ -184,10 +184,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
|
|||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
|
||||
if (unlikely(tp->compressed_ack)) {
|
||||
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
|
||||
tp->compressed_ack - TCP_FASTRETRANS_THRESH);
|
||||
tp->compressed_ack = TCP_FASTRETRANS_THRESH;
|
||||
tp->compressed_ack);
|
||||
tp->compressed_ack = 0;
|
||||
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
|
||||
__sock_put(sk);
|
||||
}
|
||||
|
|
|
@ -753,8 +753,14 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
|
|||
|
||||
bh_lock_sock(sk);
|
||||
if (!sock_owned_by_user(sk)) {
|
||||
if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
|
||||
if (tp->compressed_ack) {
|
||||
/* Since we have to send one ack finally,
|
||||
* substract one from tp->compressed_ack to keep
|
||||
* LINUX_MIB_TCPACKCOMPRESSED accurate.
|
||||
*/
|
||||
tp->compressed_ack--;
|
||||
tcp_send_ack(sk);
|
||||
}
|
||||
} else {
|
||||
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
|
||||
&sk->sk_tsq_flags))
|
||||
|
|
Loading…
Reference in New Issue