Merge branch 'tcp-add-4-new-stats'

Wei Wang says:

====================
tcp: add 4 new stats

This patch series adds 3 RFC4898 stats:
1. tcpEStatsPerfHCDataOctetsOut
2. tcpEStatsPerfOctetsRetrans
3. tcpEStatsStackDSACKDups
and an addtional stat to record the number of data packet reordering
events seen:
4. tcp_reord_seen

Together with the existing stats, application can use them to measure
the retransmission rate in bytes, exclude spurious retransmissions
reflected by DSACK, and keep track of the reordering events on live
connections.
In particular the networks with different MTUs make bytes-based loss stats
more useful. Google servers have been using these stats for many years to
instrument transport and network performance.

Note: The first patch is a refactor to add a helper to calculate
opt_stats size in order to make later changes cleaner.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-08-01 09:56:11 -07:00
commit fab9593df1
6 changed files with 69 additions and 8 deletions

View File

@ -181,10 +181,16 @@ struct tcp_sock {
u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
* total number of data segments sent.
*/
u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut
* total number of data bytes sent.
*/
u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
* sum(delta(snd_una)), or how many bytes
* were acked.
*/
u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups
* total number of DSACK blocks received
*/
u32 snd_una; /* First byte we want an ack for */
u32 snd_sml; /* Last byte of the most recently transmitted small packet */
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
@ -214,8 +220,7 @@ struct tcp_sock {
#define TCP_RACK_RECOVERY_THRESH 16
u8 reo_wnd_persist:5, /* No. of recovery since last adj */
dsack_seen:1, /* Whether DSACK seen after last adj */
advanced:1, /* mstamp advanced since last lost marking */
reord:1; /* reordering detected */
advanced:1; /* mstamp advanced since last lost marking */
} rack;
u16 advmss; /* Advertised MSS */
u8 compressed_ack;
@ -261,6 +266,7 @@ struct tcp_sock {
u8 ecn_flags; /* ECN status bits. */
u8 keepalive_probes; /* num of allowed keep alive probes */
u32 reordering; /* Packet reordering metric. */
u32 reord_seen; /* number of data packet reordering events */
u32 snd_up; /* Urgent pointer */
/*
@ -330,6 +336,9 @@ struct tcp_sock {
* the first SYN. */
u32 undo_marker; /* snd_una upon a new recovery episode. */
int undo_retrans; /* number of undoable retransmissions. */
u64 bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans
* Total data bytes retransmitted
*/
u32 total_retrans; /* Total retransmits for entire connection */
u32 urg_seq; /* Seq of received urgent pointer */

View File

@ -235,6 +235,11 @@ struct tcp_info {
__u32 tcpi_delivered;
__u32 tcpi_delivered_ce;
__u64 tcpi_bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
__u64 tcpi_bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans */
__u32 tcpi_dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups */
__u32 tcpi_reord_seen; /* reordering events seen */
};
/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@ -257,7 +262,10 @@ enum {
TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */
TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */
TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */
TCP_NLA_BYTES_SENT, /* Data bytes sent including retransmission */
TCP_NLA_BYTES_RETRANS, /* Data bytes retransmitted */
TCP_NLA_DSACK_DUPS, /* DSACK blocks received */
TCP_NLA_REORD_SEEN, /* reordering events seen */
};
/* for TCP_MD5SIG socket option */

View File

@ -2594,6 +2594,10 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_rx_dst = NULL;
tcp_saved_syn_free(tp);
tp->compressed_ack = 0;
tp->bytes_sent = 0;
tp->bytes_retrans = 0;
tp->dsack_dups = 0;
tp->reord_seen = 0;
/* Clean up fastopen related fields */
tcp_free_fastopen_req(tp);
@ -3201,10 +3205,41 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_delivery_rate = rate64;
info->tcpi_delivered = tp->delivered;
info->tcpi_delivered_ce = tp->delivered_ce;
info->tcpi_bytes_sent = tp->bytes_sent;
info->tcpi_bytes_retrans = tp->bytes_retrans;
info->tcpi_dsack_dups = tp->dsack_dups;
info->tcpi_reord_seen = tp->reord_seen;
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
static size_t tcp_opt_stats_get_size(void)
{
return
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BUSY */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_RWND_LIMITED */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_SNDBUF_LIMITED */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DATA_SEGS_OUT */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_TOTAL_RETRANS */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_PACING_RATE */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DELIVERY_RATE */
nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_CWND */
nla_total_size(sizeof(u32)) + /* TCP_NLA_REORDERING */
nla_total_size(sizeof(u32)) + /* TCP_NLA_MIN_RTT */
nla_total_size(sizeof(u8)) + /* TCP_NLA_RECUR_RETRANS */
nla_total_size(sizeof(u8)) + /* TCP_NLA_DELIVERY_RATE_APP_LMT */
nla_total_size(sizeof(u32)) + /* TCP_NLA_SNDQ_SIZE */
nla_total_size(sizeof(u8)) + /* TCP_NLA_CA_STATE */
nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_SSTHRESH */
nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED */
nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
0;
}
struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
@ -3213,9 +3248,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
u64 rate64;
u32 rate;
stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
7 * nla_total_size(sizeof(u32)) +
3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
if (!stats)
return NULL;
@ -3251,6 +3284,13 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent,
TCP_NLA_PAD);
nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
TCP_NLA_PAD);
nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
return stats;
}

View File

@ -874,6 +874,7 @@ static void tcp_dsack_seen(struct tcp_sock *tp)
{
tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
tp->rack.dsack_seen = 1;
tp->dsack_dups++;
}
/* It's reordering when higher sequence was delivered (i.e. sacked) before
@ -905,8 +906,8 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
}
tp->rack.reord = 1;
/* This exciting event is worth to be remembered. 8) */
tp->reord_seen++;
NET_INC_STATS(sock_net(sk),
ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
}
@ -1870,6 +1871,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
tp->reordering = min_t(u32, tp->packets_out + addend,
sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
tp->reord_seen++;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}

View File

@ -1136,6 +1136,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
tp->data_segs_out += tcp_skb_pcount(skb);
tp->bytes_sent += skb->len - tcp_header_size;
tcp_internal_pacing(sk, skb);
}
@ -2870,6 +2871,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
tp->total_retrans += segs;
tp->bytes_retrans += skb->len;
/* make sure skb->data is aligned on arches that require it
* and check if ack-trimming & collapsing extended the headroom

View File

@ -25,7 +25,7 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (!tp->rack.reord) {
if (!tp->reord_seen) {
/* If reordering has not been observed, be aggressive during
* the recovery or starting the recovery by DUPACK threshold.
*/