tcp: refactor DCTCP ECN ACK handling
DCTCP has two parts - a new ECN signalling mechanism and the response function to it. The first part can be used by other congestion control for DCTCP-ECN deployed networks. This patch moves that part into a separate tcp_dctcp.h to be used by other congestion control module (like how Yeah uses Vegas algorithmas). For example, BBR is experimenting such ECN signal currently https://tinyurl.com/ietf-102-iccrg-bbr2 Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Yousuk Seung <ysseung@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
ed792e28c4
commit
ffd177dea5
|
@ -44,6 +44,7 @@
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
#include <linux/inet_diag.h>
|
#include <linux/inet_diag.h>
|
||||||
|
#include "tcp_dctcp.h"
|
||||||
|
|
||||||
#define DCTCP_MAX_ALPHA 1024U
|
#define DCTCP_MAX_ALPHA 1024U
|
||||||
|
|
||||||
|
@ -118,54 +119,6 @@ static u32 dctcp_ssthresh(struct sock *sk)
|
||||||
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
|
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Minimal DCTP CE state machine:
|
|
||||||
*
|
|
||||||
* S: 0 <- last pkt was non-CE
|
|
||||||
* 1 <- last pkt was CE
|
|
||||||
*/
|
|
||||||
|
|
||||||
static void dctcp_ce_state_0_to_1(struct sock *sk)
|
|
||||||
{
|
|
||||||
struct dctcp *ca = inet_csk_ca(sk);
|
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
|
||||||
|
|
||||||
if (!ca->ce_state) {
|
|
||||||
/* State has changed from CE=0 to CE=1, force an immediate
|
|
||||||
* ACK to reflect the new CE state. If an ACK was delayed,
|
|
||||||
* send that first to reflect the prior CE state.
|
|
||||||
*/
|
|
||||||
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
|
|
||||||
__tcp_send_ack(sk, ca->prior_rcv_nxt);
|
|
||||||
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
|
|
||||||
}
|
|
||||||
|
|
||||||
ca->prior_rcv_nxt = tp->rcv_nxt;
|
|
||||||
ca->ce_state = 1;
|
|
||||||
|
|
||||||
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dctcp_ce_state_1_to_0(struct sock *sk)
|
|
||||||
{
|
|
||||||
struct dctcp *ca = inet_csk_ca(sk);
|
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
|
||||||
|
|
||||||
if (ca->ce_state) {
|
|
||||||
/* State has changed from CE=1 to CE=0, force an immediate
|
|
||||||
* ACK to reflect the new CE state. If an ACK was delayed,
|
|
||||||
* send that first to reflect the prior CE state.
|
|
||||||
*/
|
|
||||||
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
|
|
||||||
__tcp_send_ack(sk, ca->prior_rcv_nxt);
|
|
||||||
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
|
|
||||||
}
|
|
||||||
|
|
||||||
ca->prior_rcv_nxt = tp->rcv_nxt;
|
|
||||||
ca->ce_state = 0;
|
|
||||||
|
|
||||||
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dctcp_update_alpha(struct sock *sk, u32 flags)
|
static void dctcp_update_alpha(struct sock *sk, u32 flags)
|
||||||
{
|
{
|
||||||
const struct tcp_sock *tp = tcp_sk(sk);
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
@ -230,12 +183,12 @@ static void dctcp_state(struct sock *sk, u8 new_state)
|
||||||
|
|
||||||
static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
|
static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
|
||||||
{
|
{
|
||||||
|
struct dctcp *ca = inet_csk_ca(sk);
|
||||||
|
|
||||||
switch (ev) {
|
switch (ev) {
|
||||||
case CA_EVENT_ECN_IS_CE:
|
case CA_EVENT_ECN_IS_CE:
|
||||||
dctcp_ce_state_0_to_1(sk);
|
|
||||||
break;
|
|
||||||
case CA_EVENT_ECN_NO_CE:
|
case CA_EVENT_ECN_NO_CE:
|
||||||
dctcp_ce_state_1_to_0(sk);
|
dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
/* Don't care for the rest. */
|
/* Don't care for the rest. */
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
#ifndef _TCP_DCTCP_H
|
||||||
|
#define _TCP_DCTCP_H
|
||||||
|
|
||||||
|
static inline void dctcp_ece_ack_cwr(struct sock *sk, u32 ce_state)
|
||||||
|
{
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
|
if (ce_state == 1)
|
||||||
|
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
|
||||||
|
else
|
||||||
|
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Minimal DCTP CE state machine:
|
||||||
|
*
|
||||||
|
* S: 0 <- last pkt was non-CE
|
||||||
|
* 1 <- last pkt was CE
|
||||||
|
*/
|
||||||
|
static inline void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
|
||||||
|
u32 *prior_rcv_nxt, u32 *ce_state)
|
||||||
|
{
|
||||||
|
u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
|
||||||
|
|
||||||
|
if (*ce_state != new_ce_state) {
|
||||||
|
/* CE state has changed, force an immediate ACK to
|
||||||
|
* reflect the new CE state. If an ACK was delayed,
|
||||||
|
* send that first to reflect the prior CE state.
|
||||||
|
*/
|
||||||
|
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
|
||||||
|
dctcp_ece_ack_cwr(sk, *ce_state);
|
||||||
|
__tcp_send_ack(sk, *prior_rcv_nxt);
|
||||||
|
}
|
||||||
|
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
|
||||||
|
}
|
||||||
|
*prior_rcv_nxt = tcp_sk(sk)->rcv_nxt;
|
||||||
|
*ce_state = new_ce_state;
|
||||||
|
dctcp_ece_ack_cwr(sk, new_ce_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in New Issue