tcp: provide earliest departure time in skb->tstamp
Switch internal TCP skb->skb_mstamp to skb->skb_mstamp_ns, from usec units to nsec units. Do not clear skb->tstamp before entering IP stacks in TX, so that qdisc or devices can implement pacing based on the earliest departure time instead of socket sk->sk_pacing_rate Packets are fed with tcp_wstamp_ns, and following patch will update tcp_wstamp_ns when both TCP and sch_fq switch to the earliest departure time mechanism. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
9799ccb0e9
commit
d3edd06ea8
|
@ -689,7 +689,7 @@ struct sk_buff {
|
||||||
|
|
||||||
union {
|
union {
|
||||||
ktime_t tstamp;
|
ktime_t tstamp;
|
||||||
u64 skb_mstamp;
|
u64 skb_mstamp_ns; /* earliest departure time */
|
||||||
};
|
};
|
||||||
/*
|
/*
|
||||||
* This is the control buffer. It is free to use for every
|
* This is the control buffer. It is free to use for every
|
||||||
|
|
|
@ -761,13 +761,13 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
|
||||||
|
|
||||||
static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
|
static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ);
|
return div_u64(skb->skb_mstamp_ns, NSEC_PER_SEC / TCP_TS_HZ);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* provide the departure time in us unit */
|
/* provide the departure time in us unit */
|
||||||
static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
|
static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
return skb->skb_mstamp;
|
return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -813,7 +813,7 @@ struct tcp_skb_cb {
|
||||||
#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
|
#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
|
||||||
#define TCPCB_LOST 0x04 /* SKB is lost */
|
#define TCPCB_LOST 0x04 /* SKB is lost */
|
||||||
#define TCPCB_TAGBITS 0x07 /* All tag bits */
|
#define TCPCB_TAGBITS 0x07 /* All tag bits */
|
||||||
#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */
|
#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */
|
||||||
#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
|
#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
|
||||||
#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
|
#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
|
||||||
TCPCB_REPAIRED)
|
TCPCB_REPAIRED)
|
||||||
|
|
|
@ -88,7 +88,7 @@ u64 cookie_init_timestamp(struct request_sock *req)
|
||||||
ts <<= TSBITS;
|
ts <<= TSBITS;
|
||||||
ts |= options;
|
ts |= options;
|
||||||
}
|
}
|
||||||
return (u64)ts * (USEC_PER_SEC / TCP_TS_HZ);
|
return (u64)ts * (NSEC_PER_SEC / TCP_TS_HZ);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1295,7 +1295,7 @@ new_segment:
|
||||||
copy = size_goal;
|
copy = size_goal;
|
||||||
|
|
||||||
/* All packets are restored as if they have
|
/* All packets are restored as if they have
|
||||||
* already been sent. skb_mstamp isn't set to
|
* already been sent. skb_mstamp_ns isn't set to
|
||||||
* avoid wrong rtt estimation.
|
* avoid wrong rtt estimation.
|
||||||
*/
|
*/
|
||||||
if (tp->repair)
|
if (tp->repair)
|
||||||
|
|
|
@ -1014,7 +1014,7 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
|
||||||
|
|
||||||
static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
|
static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
skb->skb_mstamp = tp->tcp_mstamp;
|
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
||||||
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1061,7 +1061,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
||||||
if (unlikely(!skb))
|
if (unlikely(!skb))
|
||||||
return -ENOBUFS;
|
return -ENOBUFS;
|
||||||
}
|
}
|
||||||
skb->skb_mstamp = tp->tcp_mstamp;
|
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
||||||
|
|
||||||
inet = inet_sk(sk);
|
inet = inet_sk(sk);
|
||||||
tcb = TCP_SKB_CB(skb);
|
tcb = TCP_SKB_CB(skb);
|
||||||
|
@ -1165,8 +1165,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
||||||
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
|
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
|
||||||
skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
|
skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
|
||||||
|
|
||||||
/* Our usage of tstamp should remain private */
|
/* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */
|
||||||
skb->tstamp = 0;
|
|
||||||
|
|
||||||
/* Cleanup our debris for IP stacks */
|
/* Cleanup our debris for IP stacks */
|
||||||
memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
|
memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
|
||||||
|
@ -3221,10 +3220,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
|
||||||
memset(&opts, 0, sizeof(opts));
|
memset(&opts, 0, sizeof(opts));
|
||||||
#ifdef CONFIG_SYN_COOKIES
|
#ifdef CONFIG_SYN_COOKIES
|
||||||
if (unlikely(req->cookie_ts))
|
if (unlikely(req->cookie_ts))
|
||||||
skb->skb_mstamp = cookie_init_timestamp(req);
|
skb->skb_mstamp_ns = cookie_init_timestamp(req);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
skb->skb_mstamp = tcp_clock_us();
|
skb->skb_mstamp_ns = tcp_clock_ns();
|
||||||
|
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
@ -3440,7 +3439,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
|
||||||
|
|
||||||
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
|
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
|
||||||
|
|
||||||
syn->skb_mstamp = syn_data->skb_mstamp;
|
syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
|
||||||
|
|
||||||
/* Now full SYN+DATA was cloned and sent (or not),
|
/* Now full SYN+DATA was cloned and sent (or not),
|
||||||
* remove the SYN from the original skb (syn_data)
|
* remove the SYN from the original skb (syn_data)
|
||||||
|
|
|
@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk)
|
||||||
*/
|
*/
|
||||||
start_ts = tcp_skb_timestamp(skb);
|
start_ts = tcp_skb_timestamp(skb);
|
||||||
if (!start_ts)
|
if (!start_ts)
|
||||||
skb->skb_mstamp = tp->tcp_mstamp;
|
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
||||||
else if (icsk->icsk_user_timeout &&
|
else if (icsk->icsk_user_timeout &&
|
||||||
(s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
|
(s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
|
||||||
goto abort;
|
goto abort;
|
||||||
|
|
Loading…
Reference in New Issue