Merge branch 'tcp-fix-DCTCP-ECE-Ack-series'

Yuchung Cheng says:

====================
fix DCTCP ECE Ack series

This patch set address that the existing DCTCP implementation does not
fully implement the ACK policy specified in the RFC. This improves
the responsiveness of CE status change particularly on flows with
small inflight.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-07-20 14:32:24 -07:00
commit f7a6eb1e15
4 changed files with 44 additions and 45 deletions

View File

@ -342,6 +342,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
static inline void tcp_dec_quickack_mode(struct sock *sk,
const unsigned int pkts)
{
@ -539,6 +540,7 @@ void tcp_send_fin(struct sock *sk);
void tcp_send_active_reset(struct sock *sk, gfp_t priority);
int tcp_send_synack(struct sock *);
void tcp_push_one(struct sock *, unsigned int mss_now);
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
void tcp_send_ack(struct sock *sk);
void tcp_send_delayed_ack(struct sock *sk);
void tcp_send_loss_probe(struct sock *sk);

View File

@ -129,24 +129,14 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
/* State has changed from CE=0 to CE=1 and delayed
* ACK has not sent yet.
*/
if (!ca->ce_state &&
inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
u32 tmp_rcv_nxt;
/* Save current rcv_nxt. */
tmp_rcv_nxt = tp->rcv_nxt;
/* Generate previous ack with CE=0. */
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
tp->rcv_nxt = ca->prior_rcv_nxt;
tcp_send_ack(sk);
/* Recover current rcv_nxt. */
tp->rcv_nxt = tmp_rcv_nxt;
if (!ca->ce_state) {
/* State has changed from CE=0 to CE=1, force an immediate
* ACK to reflect the new CE state. If an ACK was delayed,
* send that first to reflect the prior CE state.
*/
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
__tcp_send_ack(sk, ca->prior_rcv_nxt);
tcp_enter_quickack_mode(sk, 1);
}
ca->prior_rcv_nxt = tp->rcv_nxt;
@ -160,24 +150,14 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
/* State has changed from CE=1 to CE=0 and delayed
* ACK has not sent yet.
*/
if (ca->ce_state &&
inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
u32 tmp_rcv_nxt;
/* Save current rcv_nxt. */
tmp_rcv_nxt = tp->rcv_nxt;
/* Generate previous ack with CE=1. */
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
tp->rcv_nxt = ca->prior_rcv_nxt;
tcp_send_ack(sk);
/* Recover current rcv_nxt. */
tp->rcv_nxt = tmp_rcv_nxt;
if (ca->ce_state) {
/* State has changed from CE=1 to CE=0, force an immediate
* ACK to reflect the new CE state. If an ACK was delayed,
* send that first to reflect the prior CE state.
*/
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
__tcp_send_ack(sk, ca->prior_rcv_nxt);
tcp_enter_quickack_mode(sk, 1);
}
ca->prior_rcv_nxt = tp->rcv_nxt;

View File

@ -215,7 +215,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.quick = quickacks;
}
static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@ -223,6 +223,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
EXPORT_SYMBOL(tcp_enter_quickack_mode);
/* Send ACKs quickly, if "quick" count is not exhausted
* and the session is not interactive.

View File

@ -160,7 +160,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
}
/* Account for an ACK we sent. */
static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
u32 rcv_nxt)
{
struct tcp_sock *tp = tcp_sk(sk);
@ -171,6 +172,9 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
if (unlikely(rcv_nxt != tp->rcv_nxt))
return; /* Special ACK sent by DCTCP to reflect ECN */
tcp_dec_quickack_mode(sk, pkts);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
@ -1023,8 +1027,8 @@ static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
* We are working here with either a clone of the original
* SKB, or a fresh unique copy made by the retransmit engine.
*/
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
gfp_t gfp_mask)
static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet;
@ -1100,7 +1104,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->source = inet->inet_sport;
th->dest = inet->inet_dport;
th->seq = htonl(tcb->seq);
th->ack_seq = htonl(tp->rcv_nxt);
th->ack_seq = htonl(rcv_nxt);
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
tcb->tcp_flags);
@ -1141,7 +1145,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
icsk->icsk_af_ops->send_check(sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
@ -1178,6 +1182,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
return err;
}
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
gfp_t gfp_mask)
{
return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
tcp_sk(sk)->rcv_nxt);
}
/* This routine just queues the buffer for sending.
*
* NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
@ -3571,7 +3582,7 @@ void tcp_send_delayed_ack(struct sock *sk)
}
/* This routine sends an ack and also updates the window. */
void tcp_send_ack(struct sock *sk)
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
{
struct sk_buff *buff;
@ -3604,9 +3615,14 @@ void tcp_send_ack(struct sock *sk)
skb_set_tcp_pure_ack(buff);
/* Send it off, this clears delayed acks for us. */
tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
__tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
}
EXPORT_SYMBOL_GPL(__tcp_send_ack);
void tcp_send_ack(struct sock *sk)
{
__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
}
EXPORT_SYMBOL_GPL(tcp_send_ack);
/* This routine sends a packet with an out of date sequence
* number. It assumes the other end will try to ack it.