tcp: refactor DCTCP ECN ACK handling
DCTCP has two parts - a new ECN signalling mechanism and the response function to it. The first part can be used by other congestion control for DCTCP-ECN deployed networks. This patch moves that part into a separate tcp_dctcp.h to be used by other congestion control module (like how Yeah uses Vegas algorithmas). For example, BBR is experimenting such ECN signal currently https://tinyurl.com/ietf-102-iccrg-bbr2 Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Yousuk Seung <ysseung@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
ed792e28c4
commit
ffd177dea5
|
@ -44,6 +44,7 @@
|
|||
#include <linux/mm.h>
|
||||
#include <net/tcp.h>
|
||||
#include <linux/inet_diag.h>
|
||||
#include "tcp_dctcp.h"
|
||||
|
||||
#define DCTCP_MAX_ALPHA 1024U
|
||||
|
||||
|
@ -118,54 +119,6 @@ static u32 dctcp_ssthresh(struct sock *sk)
|
|||
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
|
||||
}
|
||||
|
||||
/* Minimal DCTP CE state machine:
|
||||
*
|
||||
* S: 0 <- last pkt was non-CE
|
||||
* 1 <- last pkt was CE
|
||||
*/
|
||||
|
||||
static void dctcp_ce_state_0_to_1(struct sock *sk)
|
||||
{
|
||||
struct dctcp *ca = inet_csk_ca(sk);
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if (!ca->ce_state) {
|
||||
/* State has changed from CE=0 to CE=1, force an immediate
|
||||
* ACK to reflect the new CE state. If an ACK was delayed,
|
||||
* send that first to reflect the prior CE state.
|
||||
*/
|
||||
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
|
||||
__tcp_send_ack(sk, ca->prior_rcv_nxt);
|
||||
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
|
||||
}
|
||||
|
||||
ca->prior_rcv_nxt = tp->rcv_nxt;
|
||||
ca->ce_state = 1;
|
||||
|
||||
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
|
||||
}
|
||||
|
||||
static void dctcp_ce_state_1_to_0(struct sock *sk)
|
||||
{
|
||||
struct dctcp *ca = inet_csk_ca(sk);
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if (ca->ce_state) {
|
||||
/* State has changed from CE=1 to CE=0, force an immediate
|
||||
* ACK to reflect the new CE state. If an ACK was delayed,
|
||||
* send that first to reflect the prior CE state.
|
||||
*/
|
||||
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
|
||||
__tcp_send_ack(sk, ca->prior_rcv_nxt);
|
||||
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
|
||||
}
|
||||
|
||||
ca->prior_rcv_nxt = tp->rcv_nxt;
|
||||
ca->ce_state = 0;
|
||||
|
||||
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
|
||||
}
|
||||
|
||||
static void dctcp_update_alpha(struct sock *sk, u32 flags)
|
||||
{
|
||||
const struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
@ -230,12 +183,12 @@ static void dctcp_state(struct sock *sk, u8 new_state)
|
|||
|
||||
static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
|
||||
{
|
||||
struct dctcp *ca = inet_csk_ca(sk);
|
||||
|
||||
switch (ev) {
|
||||
case CA_EVENT_ECN_IS_CE:
|
||||
dctcp_ce_state_0_to_1(sk);
|
||||
break;
|
||||
case CA_EVENT_ECN_NO_CE:
|
||||
dctcp_ce_state_1_to_0(sk);
|
||||
dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
|
||||
break;
|
||||
default:
|
||||
/* Don't care for the rest. */
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
#ifndef _TCP_DCTCP_H
|
||||
#define _TCP_DCTCP_H
|
||||
|
||||
static inline void dctcp_ece_ack_cwr(struct sock *sk, u32 ce_state)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if (ce_state == 1)
|
||||
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
|
||||
else
|
||||
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
|
||||
}
|
||||
|
||||
/* Minimal DCTP CE state machine:
|
||||
*
|
||||
* S: 0 <- last pkt was non-CE
|
||||
* 1 <- last pkt was CE
|
||||
*/
|
||||
static inline void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
|
||||
u32 *prior_rcv_nxt, u32 *ce_state)
|
||||
{
|
||||
u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
|
||||
|
||||
if (*ce_state != new_ce_state) {
|
||||
/* CE state has changed, force an immediate ACK to
|
||||
* reflect the new CE state. If an ACK was delayed,
|
||||
* send that first to reflect the prior CE state.
|
||||
*/
|
||||
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
|
||||
dctcp_ece_ack_cwr(sk, *ce_state);
|
||||
__tcp_send_ack(sk, *prior_rcv_nxt);
|
||||
}
|
||||
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
|
||||
}
|
||||
*prior_rcv_nxt = tcp_sk(sk)->rcv_nxt;
|
||||
*ce_state = new_ce_state;
|
||||
dctcp_ece_ack_cwr(sk, new_ce_state);
|
||||
}
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue