tcp: refactor DCTCP ECN ACK handling

DCTCP has two parts - a new ECN signalling mechanism and the response
function to it. The first part can be used by other congestion
control for DCTCP-ECN deployed networks. This patch moves that part
into a separate tcp_dctcp.h to be used by other congestion control
module (like how Yeah uses Vegas algorithmas). For example, BBR is
experimenting such ECN signal currently
https://tinyurl.com/ietf-102-iccrg-bbr2

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Yousuk Seung <ysseung@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Yuchung Cheng 2018-10-08 15:32:20 -07:00 committed by David S. Miller
parent ed792e28c4
commit ffd177dea5
2 changed files with 44 additions and 51 deletions

View File

@ -44,6 +44,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <linux/inet_diag.h> #include <linux/inet_diag.h>
#include "tcp_dctcp.h"
#define DCTCP_MAX_ALPHA 1024U #define DCTCP_MAX_ALPHA 1024U
@ -118,54 +119,6 @@ static u32 dctcp_ssthresh(struct sock *sk)
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
} }
/* Minimal DCTP CE state machine:
*
* S: 0 <- last pkt was non-CE
* 1 <- last pkt was CE
*/
static void dctcp_ce_state_0_to_1(struct sock *sk)
{
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
if (!ca->ce_state) {
/* State has changed from CE=0 to CE=1, force an immediate
* ACK to reflect the new CE state. If an ACK was delayed,
* send that first to reflect the prior CE state.
*/
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
__tcp_send_ack(sk, ca->prior_rcv_nxt);
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
}
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->ce_state = 1;
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
}
static void dctcp_ce_state_1_to_0(struct sock *sk)
{
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
if (ca->ce_state) {
/* State has changed from CE=1 to CE=0, force an immediate
* ACK to reflect the new CE state. If an ACK was delayed,
* send that first to reflect the prior CE state.
*/
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
__tcp_send_ack(sk, ca->prior_rcv_nxt);
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
}
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->ce_state = 0;
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
}
static void dctcp_update_alpha(struct sock *sk, u32 flags) static void dctcp_update_alpha(struct sock *sk, u32 flags)
{ {
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
@ -230,12 +183,12 @@ static void dctcp_state(struct sock *sk, u8 new_state)
static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
{ {
struct dctcp *ca = inet_csk_ca(sk);
switch (ev) { switch (ev) {
case CA_EVENT_ECN_IS_CE: case CA_EVENT_ECN_IS_CE:
dctcp_ce_state_0_to_1(sk);
break;
case CA_EVENT_ECN_NO_CE: case CA_EVENT_ECN_NO_CE:
dctcp_ce_state_1_to_0(sk); dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
break; break;
default: default:
/* Don't care for the rest. */ /* Don't care for the rest. */

40
net/ipv4/tcp_dctcp.h Normal file
View File

@ -0,0 +1,40 @@
#ifndef _TCP_DCTCP_H
#define _TCP_DCTCP_H
static inline void dctcp_ece_ack_cwr(struct sock *sk, u32 ce_state)
{
struct tcp_sock *tp = tcp_sk(sk);
if (ce_state == 1)
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
else
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
}
/* Minimal DCTP CE state machine:
*
* S: 0 <- last pkt was non-CE
* 1 <- last pkt was CE
*/
static inline void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
u32 *prior_rcv_nxt, u32 *ce_state)
{
u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
if (*ce_state != new_ce_state) {
/* CE state has changed, force an immediate ACK to
* reflect the new CE state. If an ACK was delayed,
* send that first to reflect the prior CE state.
*/
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
dctcp_ece_ack_cwr(sk, *ce_state);
__tcp_send_ack(sk, *prior_rcv_nxt);
}
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
}
*prior_rcv_nxt = tcp_sk(sk)->rcv_nxt;
*ce_state = new_ce_state;
dctcp_ece_ack_cwr(sk, new_ce_state);
}
#endif