tcp: tracking packets with CE marks in BW rate sample
In order to track CE marks per rate sample (one round trip), TCP needs a per-skb header field to record the tp->delivered_ce count when the skb was sent. To make space, we replace the "last_in_flight" field which is used exclusively for NV congestion control. The stat needed by NV can be alternatively approximated by existing stats tcp_sock delivered and mss_cache. This patch counts the number of packets delivered which have CE marks in the rate sample, using similar approach of delivery accounting. Cc: Lawrence Brakmo <brakmo@fb.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Luke Hsiao <lukehsiao@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
ae98f40d32
commit
40bc606379
|
@ -874,10 +874,11 @@ struct tcp_skb_cb {
|
|||
__u32 ack_seq; /* Sequence number ACK'd */
|
||||
union {
|
||||
struct {
|
||||
#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
|
||||
/* There is space for up to 24 bytes */
|
||||
__u32 in_flight:30,/* Bytes in flight at transmit */
|
||||
is_app_limited:1, /* cwnd not fully used? */
|
||||
unused:1;
|
||||
__u32 is_app_limited:1, /* cwnd not fully used? */
|
||||
delivered_ce:20,
|
||||
unused:11;
|
||||
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
|
||||
__u32 delivered;
|
||||
/* start of send pipeline phase */
|
||||
|
@ -1029,7 +1030,9 @@ struct ack_sample {
|
|||
struct rate_sample {
|
||||
u64 prior_mstamp; /* starting timestamp for interval */
|
||||
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
|
||||
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
|
||||
s32 delivered; /* number of packets delivered over interval */
|
||||
s32 delivered_ce; /* number of packets delivered w/ CE marks*/
|
||||
long interval_us; /* time for tp->delivered to incr "delivered" */
|
||||
u32 snd_interval_us; /* snd interval for delivered packets */
|
||||
u32 rcv_interval_us; /* rcv interval for delivered packets */
|
||||
|
|
|
@ -3221,7 +3221,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||
long seq_rtt_us = -1L;
|
||||
long ca_rtt_us = -1L;
|
||||
u32 pkts_acked = 0;
|
||||
u32 last_in_flight = 0;
|
||||
bool rtt_update;
|
||||
int flag = 0;
|
||||
|
||||
|
@ -3257,7 +3256,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||
if (!first_ackt)
|
||||
first_ackt = last_ackt;
|
||||
|
||||
last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
|
||||
if (before(start_seq, reord))
|
||||
reord = start_seq;
|
||||
if (!after(scb->end_seq, tp->high_seq))
|
||||
|
@ -3323,8 +3321,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||
seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
|
||||
ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
|
||||
|
||||
if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
|
||||
last_in_flight && !prior_sacked && fully_acked &&
|
||||
if (pkts_acked == 1 && fully_acked && !prior_sacked &&
|
||||
(tp->snd_una - prior_snd_una) < tp->mss_cache &&
|
||||
sack->rate->prior_delivered + 1 == tp->delivered &&
|
||||
!(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
|
||||
/* Conservatively mark a delayed ACK. It's typically
|
||||
|
@ -3381,9 +3379,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||
|
||||
if (icsk->icsk_ca_ops->pkts_acked) {
|
||||
struct ack_sample sample = { .pkts_acked = pkts_acked,
|
||||
.rtt_us = sack->rate->rtt_us,
|
||||
.in_flight = last_in_flight };
|
||||
.rtt_us = sack->rate->rtt_us };
|
||||
|
||||
sample.in_flight = tp->mss_cache *
|
||||
(tp->delivered - sack->rate->prior_delivered);
|
||||
icsk->icsk_ca_ops->pkts_acked(sk, &sample);
|
||||
}
|
||||
|
||||
|
|
|
@ -1256,8 +1256,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
|||
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
|
||||
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
||||
if (clone_it) {
|
||||
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
|
||||
- tp->snd_una;
|
||||
oskb = skb;
|
||||
|
||||
tcp_skb_tsorted_save(oskb) {
|
||||
|
|
|
@ -65,6 +65,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
|
|||
TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp;
|
||||
TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
|
||||
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
|
||||
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
|
||||
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
|
||||
}
|
||||
|
||||
|
@ -86,6 +87,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
|
|||
|
||||
if (!rs->prior_delivered ||
|
||||
after(scb->tx.delivered, rs->prior_delivered)) {
|
||||
rs->prior_delivered_ce = scb->tx.delivered_ce;
|
||||
rs->prior_delivered = scb->tx.delivered;
|
||||
rs->prior_mstamp = scb->tx.delivered_mstamp;
|
||||
rs->is_app_limited = scb->tx.is_app_limited;
|
||||
|
@ -138,6 +140,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
|
|||
}
|
||||
rs->delivered = tp->delivered - rs->prior_delivered;
|
||||
|
||||
rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
|
||||
/* delivered_ce occupies less than 32 bits in the skb control block */
|
||||
rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK;
|
||||
|
||||
/* Model sending data and receiving ACKs as separate pipeline phases
|
||||
* for a window. Usually the ACK phase is longer, but with ACK
|
||||
* compression the send phase can be longer. To be safe we use the
|
||||
|
|
Loading…
Reference in New Issue