Merge branch 'tcp-second-round-for-EDT-conversion'
Eric Dumazet says: ==================== tcp: second round for EDT conversion First round of EDT patches left TCP stack in a non optimal state. - High speed flows suffered from loss of performance, addressed by the first patch of this series. - Second patch brings pacing to the current state of networking, since we now reach ~100 Gbit on a single TCP flow. - Third patch implements a mitigation for scheduling delays, like the one we did in sch_fq in the past. - Fourth patch removes one special case in sch_fq for ACK packets. - Fifth patch removes a serious perfomance cost for TCP internal pacing. We should setup the high resolution timer only if really needed. - Sixth patch fixes a typo in BBR. - Last patch is one minor change in cdg congestion control. Neal Cardwell also has a patch series fixing BBR after EDT adoption. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
b13949678b
|
@ -249,6 +249,7 @@ struct tcp_sock {
|
||||||
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
|
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
|
||||||
|
|
||||||
u64 tcp_wstamp_ns; /* departure time for next sent data packet */
|
u64 tcp_wstamp_ns; /* departure time for next sent data packet */
|
||||||
|
u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
|
||||||
|
|
||||||
/* RTT measurement */
|
/* RTT measurement */
|
||||||
u64 tcp_mstamp; /* most recent packet received/sent */
|
u64 tcp_mstamp; /* most recent packet received/sent */
|
||||||
|
|
|
@ -422,8 +422,8 @@ struct sock {
|
||||||
struct timer_list sk_timer;
|
struct timer_list sk_timer;
|
||||||
__u32 sk_priority;
|
__u32 sk_priority;
|
||||||
__u32 sk_mark;
|
__u32 sk_mark;
|
||||||
u32 sk_pacing_rate; /* bytes per second */
|
unsigned long sk_pacing_rate; /* bytes per second */
|
||||||
u32 sk_max_pacing_rate;
|
unsigned long sk_max_pacing_rate;
|
||||||
struct page_frag sk_frag;
|
struct page_frag sk_frag;
|
||||||
netdev_features_t sk_route_caps;
|
netdev_features_t sk_route_caps;
|
||||||
netdev_features_t sk_route_nocaps;
|
netdev_features_t sk_route_nocaps;
|
||||||
|
|
|
@ -3927,8 +3927,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
|
||||||
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
|
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
|
||||||
sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
|
sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
|
||||||
break;
|
break;
|
||||||
case SO_MAX_PACING_RATE:
|
case SO_MAX_PACING_RATE: /* 32bit version */
|
||||||
sk->sk_max_pacing_rate = val;
|
sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
|
||||||
sk->sk_pacing_rate = min(sk->sk_pacing_rate,
|
sk->sk_pacing_rate = min(sk->sk_pacing_rate,
|
||||||
sk->sk_max_pacing_rate);
|
sk->sk_max_pacing_rate);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -998,7 +998,7 @@ set_rcvbuf:
|
||||||
cmpxchg(&sk->sk_pacing_status,
|
cmpxchg(&sk->sk_pacing_status,
|
||||||
SK_PACING_NONE,
|
SK_PACING_NONE,
|
||||||
SK_PACING_NEEDED);
|
SK_PACING_NEEDED);
|
||||||
sk->sk_max_pacing_rate = val;
|
sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
|
||||||
sk->sk_pacing_rate = min(sk->sk_pacing_rate,
|
sk->sk_pacing_rate = min(sk->sk_pacing_rate,
|
||||||
sk->sk_max_pacing_rate);
|
sk->sk_max_pacing_rate);
|
||||||
break;
|
break;
|
||||||
|
@ -1336,7 +1336,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
case SO_MAX_PACING_RATE:
|
case SO_MAX_PACING_RATE:
|
||||||
v.val = sk->sk_max_pacing_rate;
|
/* 32bit version */
|
||||||
|
v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SO_INCOMING_CPU:
|
case SO_INCOMING_CPU:
|
||||||
|
@ -2810,8 +2811,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
|
||||||
sk->sk_ll_usec = sysctl_net_busy_read;
|
sk->sk_ll_usec = sysctl_net_busy_read;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
sk->sk_max_pacing_rate = ~0U;
|
sk->sk_max_pacing_rate = ~0UL;
|
||||||
sk->sk_pacing_rate = ~0U;
|
sk->sk_pacing_rate = ~0UL;
|
||||||
sk->sk_pacing_shift = 10;
|
sk->sk_pacing_shift = 10;
|
||||||
sk->sk_incoming_cpu = -1;
|
sk->sk_incoming_cpu = -1;
|
||||||
|
|
||||||
|
|
|
@ -3111,10 +3111,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
|
||||||
{
|
{
|
||||||
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
|
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
|
||||||
const struct inet_connection_sock *icsk = inet_csk(sk);
|
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
|
unsigned long rate;
|
||||||
u32 now;
|
u32 now;
|
||||||
u64 rate64;
|
u64 rate64;
|
||||||
bool slow;
|
bool slow;
|
||||||
u32 rate;
|
|
||||||
|
|
||||||
memset(info, 0, sizeof(*info));
|
memset(info, 0, sizeof(*info));
|
||||||
if (sk->sk_type != SOCK_STREAM)
|
if (sk->sk_type != SOCK_STREAM)
|
||||||
|
@ -3124,11 +3124,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
|
||||||
|
|
||||||
/* Report meaningful fields for all TCP states, including listeners */
|
/* Report meaningful fields for all TCP states, including listeners */
|
||||||
rate = READ_ONCE(sk->sk_pacing_rate);
|
rate = READ_ONCE(sk->sk_pacing_rate);
|
||||||
rate64 = rate != ~0U ? rate : ~0ULL;
|
rate64 = (rate != ~0UL) ? rate : ~0ULL;
|
||||||
info->tcpi_pacing_rate = rate64;
|
info->tcpi_pacing_rate = rate64;
|
||||||
|
|
||||||
rate = READ_ONCE(sk->sk_max_pacing_rate);
|
rate = READ_ONCE(sk->sk_max_pacing_rate);
|
||||||
rate64 = rate != ~0U ? rate : ~0ULL;
|
rate64 = (rate != ~0UL) ? rate : ~0ULL;
|
||||||
info->tcpi_max_pacing_rate = rate64;
|
info->tcpi_max_pacing_rate = rate64;
|
||||||
|
|
||||||
info->tcpi_reordering = tp->reordering;
|
info->tcpi_reordering = tp->reordering;
|
||||||
|
@ -3254,8 +3254,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
|
||||||
const struct tcp_sock *tp = tcp_sk(sk);
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct sk_buff *stats;
|
struct sk_buff *stats;
|
||||||
struct tcp_info info;
|
struct tcp_info info;
|
||||||
|
unsigned long rate;
|
||||||
u64 rate64;
|
u64 rate64;
|
||||||
u32 rate;
|
|
||||||
|
|
||||||
stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
|
stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
|
||||||
if (!stats)
|
if (!stats)
|
||||||
|
@ -3274,7 +3274,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
|
||||||
tp->total_retrans, TCP_NLA_PAD);
|
tp->total_retrans, TCP_NLA_PAD);
|
||||||
|
|
||||||
rate = READ_ONCE(sk->sk_pacing_rate);
|
rate = READ_ONCE(sk->sk_pacing_rate);
|
||||||
rate64 = rate != ~0U ? rate : ~0ULL;
|
rate64 = (rate != ~0UL) ? rate : ~0ULL;
|
||||||
nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
|
nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
|
||||||
|
|
||||||
rate64 = tcp_compute_delivery_rate(tp);
|
rate64 = tcp_compute_delivery_rate(tp);
|
||||||
|
|
|
@ -129,7 +129,7 @@ static const u32 bbr_probe_rtt_mode_ms = 200;
|
||||||
static const int bbr_min_tso_rate = 1200000;
|
static const int bbr_min_tso_rate = 1200000;
|
||||||
|
|
||||||
/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. */
|
/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. */
|
||||||
static const int bbr_pacing_marging_percent = 1;
|
static const int bbr_pacing_margin_percent = 1;
|
||||||
|
|
||||||
/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
|
/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
|
||||||
* that will allow a smoothly increasing pacing rate that will double each RTT
|
* that will allow a smoothly increasing pacing rate that will double each RTT
|
||||||
|
@ -214,12 +214,12 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
|
||||||
rate *= mss;
|
rate *= mss;
|
||||||
rate *= gain;
|
rate *= gain;
|
||||||
rate >>= BBR_SCALE;
|
rate >>= BBR_SCALE;
|
||||||
rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_marging_percent);
|
rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent);
|
||||||
return rate >> BW_SCALE;
|
return rate >> BW_SCALE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
|
/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
|
||||||
static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
|
static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
|
||||||
{
|
{
|
||||||
u64 rate = bw;
|
u64 rate = bw;
|
||||||
|
|
||||||
|
@ -258,7 +258,7 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct bbr *bbr = inet_csk_ca(sk);
|
struct bbr *bbr = inet_csk_ca(sk);
|
||||||
u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
|
unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain);
|
||||||
|
|
||||||
if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
|
if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
|
||||||
bbr_init_pacing_rate_from_rtt(sk);
|
bbr_init_pacing_rate_from_rtt(sk);
|
||||||
|
@ -280,7 +280,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk)
|
||||||
/* Sort of tcp_tso_autosize() but ignoring
|
/* Sort of tcp_tso_autosize() but ignoring
|
||||||
* driver provided sk_gso_max_size.
|
* driver provided sk_gso_max_size.
|
||||||
*/
|
*/
|
||||||
bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
|
bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift,
|
||||||
GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
|
GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
|
||||||
segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
|
segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
|
||||||
|
|
||||||
|
|
|
@ -146,7 +146,7 @@ static void tcp_cdg_hystart_update(struct sock *sk)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (hystart_detect & HYSTART_ACK_TRAIN) {
|
if (hystart_detect & HYSTART_ACK_TRAIN) {
|
||||||
u32 now_us = div_u64(local_clock(), NSEC_PER_USEC);
|
u32 now_us = tp->tcp_mstamp;
|
||||||
|
|
||||||
if (ca->last_ack == 0 || !tcp_is_cwnd_limited(sk)) {
|
if (ca->last_ack == 0 || !tcp_is_cwnd_limited(sk)) {
|
||||||
ca->last_ack = now_us;
|
ca->last_ack = now_us;
|
||||||
|
|
|
@ -52,9 +52,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp)
|
||||||
{
|
{
|
||||||
u64 val = tcp_clock_ns();
|
u64 val = tcp_clock_ns();
|
||||||
|
|
||||||
/* departure time for next data packet */
|
if (val > tp->tcp_clock_cache)
|
||||||
if (val > tp->tcp_wstamp_ns)
|
tp->tcp_clock_cache = val;
|
||||||
tp->tcp_wstamp_ns = val;
|
|
||||||
|
|
||||||
val = div_u64(val, NSEC_PER_USEC);
|
val = div_u64(val, NSEC_PER_USEC);
|
||||||
if (val > tp->tcp_mstamp)
|
if (val > tp->tcp_mstamp)
|
||||||
|
@ -976,32 +975,26 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
|
||||||
return HRTIMER_NORESTART;
|
return HRTIMER_NORESTART;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_internal_pacing(struct sock *sk)
|
static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
|
||||||
{
|
u64 prior_wstamp)
|
||||||
if (!tcp_needs_internal_pacing(sk))
|
|
||||||
return;
|
|
||||||
hrtimer_start(&tcp_sk(sk)->pacing_timer,
|
|
||||||
ns_to_ktime(tcp_sk(sk)->tcp_wstamp_ns),
|
|
||||||
HRTIMER_MODE_ABS_PINNED_SOFT);
|
|
||||||
sock_hold(sk);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb)
|
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
||||||
if (sk->sk_pacing_status != SK_PACING_NONE) {
|
if (sk->sk_pacing_status != SK_PACING_NONE) {
|
||||||
u32 rate = sk->sk_pacing_rate;
|
unsigned long rate = sk->sk_pacing_rate;
|
||||||
|
|
||||||
/* Original sch_fq does not pace first 10 MSS
|
/* Original sch_fq does not pace first 10 MSS
|
||||||
* Note that tp->data_segs_out overflows after 2^32 packets,
|
* Note that tp->data_segs_out overflows after 2^32 packets,
|
||||||
* this is a minor annoyance.
|
* this is a minor annoyance.
|
||||||
*/
|
*/
|
||||||
if (rate != ~0U && rate && tp->data_segs_out >= 10) {
|
if (rate != ~0UL && rate && tp->data_segs_out >= 10) {
|
||||||
tp->tcp_wstamp_ns += div_u64((u64)skb->len * NSEC_PER_SEC, rate);
|
u64 len_ns = div64_ul((u64)skb->len * NSEC_PER_SEC, rate);
|
||||||
|
u64 credit = tp->tcp_wstamp_ns - prior_wstamp;
|
||||||
|
|
||||||
tcp_internal_pacing(sk);
|
/* take into account OS jitter */
|
||||||
|
len_ns -= min_t(u64, len_ns / 2, credit);
|
||||||
|
tp->tcp_wstamp_ns += len_ns;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
||||||
|
@ -1030,6 +1023,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
||||||
struct sk_buff *oskb = NULL;
|
struct sk_buff *oskb = NULL;
|
||||||
struct tcp_md5sig_key *md5;
|
struct tcp_md5sig_key *md5;
|
||||||
struct tcphdr *th;
|
struct tcphdr *th;
|
||||||
|
u64 prior_wstamp;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
BUG_ON(!skb || !tcp_skb_pcount(skb));
|
BUG_ON(!skb || !tcp_skb_pcount(skb));
|
||||||
|
@ -1050,6 +1044,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
||||||
if (unlikely(!skb))
|
if (unlikely(!skb))
|
||||||
return -ENOBUFS;
|
return -ENOBUFS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prior_wstamp = tp->tcp_wstamp_ns;
|
||||||
|
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
|
||||||
|
|
||||||
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
||||||
|
|
||||||
inet = inet_sk(sk);
|
inet = inet_sk(sk);
|
||||||
|
@ -1166,7 +1164,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
||||||
err = net_xmit_eval(err);
|
err = net_xmit_eval(err);
|
||||||
}
|
}
|
||||||
if (!err && oskb) {
|
if (!err && oskb) {
|
||||||
tcp_update_skb_after_send(sk, oskb);
|
tcp_update_skb_after_send(sk, oskb, prior_wstamp);
|
||||||
tcp_rate_skb_sent(sk, oskb);
|
tcp_rate_skb_sent(sk, oskb);
|
||||||
}
|
}
|
||||||
return err;
|
return err;
|
||||||
|
@ -1701,7 +1699,8 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
|
||||||
{
|
{
|
||||||
u32 bytes, segs;
|
u32 bytes, segs;
|
||||||
|
|
||||||
bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
|
bytes = min_t(unsigned long,
|
||||||
|
sk->sk_pacing_rate >> sk->sk_pacing_shift,
|
||||||
sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
|
sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
|
||||||
|
|
||||||
/* Goal is to send at least one packet per ms,
|
/* Goal is to send at least one packet per ms,
|
||||||
|
@ -2175,10 +2174,23 @@ static int tcp_mtu_probe(struct sock *sk)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool tcp_pacing_check(const struct sock *sk)
|
static bool tcp_pacing_check(struct sock *sk)
|
||||||
{
|
{
|
||||||
return tcp_needs_internal_pacing(sk) &&
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
|
|
||||||
|
if (!tcp_needs_internal_pacing(sk))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (tp->tcp_wstamp_ns <= tp->tcp_clock_cache)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!hrtimer_is_queued(&tp->pacing_timer)) {
|
||||||
|
hrtimer_start(&tp->pacing_timer,
|
||||||
|
ns_to_ktime(tp->tcp_wstamp_ns),
|
||||||
|
HRTIMER_MODE_ABS_PINNED_SOFT);
|
||||||
|
sock_hold(sk);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TCP Small Queues :
|
/* TCP Small Queues :
|
||||||
|
@ -2195,10 +2207,12 @@ static bool tcp_pacing_check(const struct sock *sk)
|
||||||
static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
|
static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
|
||||||
unsigned int factor)
|
unsigned int factor)
|
||||||
{
|
{
|
||||||
unsigned int limit;
|
unsigned long limit;
|
||||||
|
|
||||||
limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
|
limit = max_t(unsigned long,
|
||||||
limit = min_t(u32, limit,
|
2 * skb->truesize,
|
||||||
|
sk->sk_pacing_rate >> sk->sk_pacing_shift);
|
||||||
|
limit = min_t(unsigned long, limit,
|
||||||
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
|
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
|
||||||
limit <<= factor;
|
limit <<= factor;
|
||||||
|
|
||||||
|
@ -2315,7 +2329,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||||
|
|
||||||
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
|
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
|
||||||
/* "skb_mstamp" is used as a start point for the retransmit timer */
|
/* "skb_mstamp" is used as a start point for the retransmit timer */
|
||||||
tcp_update_skb_after_send(sk, skb);
|
tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns);
|
||||||
goto repair; /* Skip network transmission */
|
goto repair; /* Skip network transmission */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2890,7 +2904,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
|
||||||
} tcp_skb_tsorted_restore(skb);
|
} tcp_skb_tsorted_restore(skb);
|
||||||
|
|
||||||
if (!err) {
|
if (!err) {
|
||||||
tcp_update_skb_after_send(sk, skb);
|
tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns);
|
||||||
tcp_rate_skb_sent(sk, skb);
|
tcp_rate_skb_sent(sk, skb);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk)
|
||||||
*/
|
*/
|
||||||
start_ts = tcp_skb_timestamp(skb);
|
start_ts = tcp_skb_timestamp(skb);
|
||||||
if (!start_ts)
|
if (!start_ts)
|
||||||
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
skb->skb_mstamp_ns = tp->tcp_clock_cache;
|
||||||
else if (icsk->icsk_user_timeout &&
|
else if (icsk->icsk_user_timeout &&
|
||||||
(s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
|
(s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
|
||||||
goto abort;
|
goto abort;
|
||||||
|
|
|
@ -92,8 +92,8 @@ struct fq_sched_data {
|
||||||
u32 quantum;
|
u32 quantum;
|
||||||
u32 initial_quantum;
|
u32 initial_quantum;
|
||||||
u32 flow_refill_delay;
|
u32 flow_refill_delay;
|
||||||
u32 flow_max_rate; /* optional max rate per flow */
|
|
||||||
u32 flow_plimit; /* max packets per flow */
|
u32 flow_plimit; /* max packets per flow */
|
||||||
|
unsigned long flow_max_rate; /* optional max rate per flow */
|
||||||
u32 orphan_mask; /* mask for orphaned skb */
|
u32 orphan_mask; /* mask for orphaned skb */
|
||||||
u32 low_rate_threshold;
|
u32 low_rate_threshold;
|
||||||
struct rb_root *fq_root;
|
struct rb_root *fq_root;
|
||||||
|
@ -416,7 +416,8 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
|
||||||
struct fq_flow_head *head;
|
struct fq_flow_head *head;
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
struct fq_flow *f;
|
struct fq_flow *f;
|
||||||
u32 rate, plen;
|
unsigned long rate;
|
||||||
|
u32 plen;
|
||||||
|
|
||||||
skb = fq_dequeue_head(sch, &q->internal);
|
skb = fq_dequeue_head(sch, &q->internal);
|
||||||
if (skb)
|
if (skb)
|
||||||
|
@ -443,7 +444,7 @@ begin:
|
||||||
}
|
}
|
||||||
|
|
||||||
skb = f->head;
|
skb = f->head;
|
||||||
if (skb && !skb_is_tcp_pure_ack(skb)) {
|
if (skb) {
|
||||||
u64 time_next_packet = max_t(u64, ktime_to_ns(skb->tstamp),
|
u64 time_next_packet = max_t(u64, ktime_to_ns(skb->tstamp),
|
||||||
f->time_next_packet);
|
f->time_next_packet);
|
||||||
|
|
||||||
|
@ -485,11 +486,11 @@ begin:
|
||||||
if (f->credit > 0)
|
if (f->credit > 0)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
if (rate != ~0U) {
|
if (rate != ~0UL) {
|
||||||
u64 len = (u64)plen * NSEC_PER_SEC;
|
u64 len = (u64)plen * NSEC_PER_SEC;
|
||||||
|
|
||||||
if (likely(rate))
|
if (likely(rate))
|
||||||
do_div(len, rate);
|
len = div64_ul(len, rate);
|
||||||
/* Since socket rate can change later,
|
/* Since socket rate can change later,
|
||||||
* clamp the delay to 1 second.
|
* clamp the delay to 1 second.
|
||||||
* Really, providers of too big packets should be fixed !
|
* Really, providers of too big packets should be fixed !
|
||||||
|
@ -701,9 +702,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
|
||||||
pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
|
pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
|
||||||
nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]));
|
nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]));
|
||||||
|
|
||||||
if (tb[TCA_FQ_FLOW_MAX_RATE])
|
if (tb[TCA_FQ_FLOW_MAX_RATE]) {
|
||||||
q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
|
u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
|
||||||
|
|
||||||
|
q->flow_max_rate = (rate == ~0U) ? ~0UL : rate;
|
||||||
|
}
|
||||||
if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
|
if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
|
||||||
q->low_rate_threshold =
|
q->low_rate_threshold =
|
||||||
nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
|
nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
|
||||||
|
@ -766,7 +769,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
|
||||||
q->quantum = 2 * psched_mtu(qdisc_dev(sch));
|
q->quantum = 2 * psched_mtu(qdisc_dev(sch));
|
||||||
q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
|
q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
|
||||||
q->flow_refill_delay = msecs_to_jiffies(40);
|
q->flow_refill_delay = msecs_to_jiffies(40);
|
||||||
q->flow_max_rate = ~0U;
|
q->flow_max_rate = ~0UL;
|
||||||
q->time_next_delayed_flow = ~0ULL;
|
q->time_next_delayed_flow = ~0ULL;
|
||||||
q->rate_enable = 1;
|
q->rate_enable = 1;
|
||||||
q->new_flows.first = NULL;
|
q->new_flows.first = NULL;
|
||||||
|
@ -802,7 +805,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
|
||||||
nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
|
nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
|
||||||
nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
|
nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
|
||||||
nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
|
nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
|
||||||
nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
|
nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE,
|
||||||
|
min_t(unsigned long, q->flow_max_rate, ~0U)) ||
|
||||||
nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
|
nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
|
||||||
jiffies_to_usecs(q->flow_refill_delay)) ||
|
jiffies_to_usecs(q->flow_refill_delay)) ||
|
||||||
nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
|
nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
|
||||||
|
|
Loading…
Reference in New Issue