tipc: rate limit broadcast retransmissions

As cluster sizes grow, so does the amount of identical or overlapping
broadcast NACKs generated by the packet receivers. This often leads to
'NACK crunches' resulting in huge numbers of redundant retransmissions
of the same packet ranges.

In this commit, we introduce rate control of broadcast retransmissions,
so that a retransmitted range cannot be retransmitted again until after
at least 10 ms. This reduces the frequency of duplicate, redundant
retransmissions by an order of magnitude, while having a significant
positive impact on overall throughput and scalability.

Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Jon Paul Maloy 2016-09-01 13:52:50 -04:00 committed by David S. Miller
parent 02d11ca200
commit 7c4a54b963
1 changed files with 47 additions and 5 deletions

View File

@ -181,7 +181,10 @@ struct tipc_link {
u16 acked;
struct tipc_link *bc_rcvlink;
struct tipc_link *bc_sndlink;
int nack_state;
unsigned long prev_retr;
u16 prev_from;
u16 prev_to;
u8 nack_state;
bool bc_peer_is_up;
/* Statistics */
@ -202,6 +205,8 @@ enum {
BC_NACK_SND_SUPPRESS,
};
#define TIPC_BC_RETR_LIMIT 10 /* [ms] */
/*
* Interval between NACKs when packets arrive out of order
*/
@ -1590,11 +1595,48 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
l->rcv_nxt = peers_snd_nxt;
}
/* link_bc_retr eval()- check if the indicated range can be retransmitted now
* - Adjust permitted range if there is overlap with previous retransmission
*/
static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to)
{
unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr);
if (less(*to, *from))
return false;
/* New retransmission request */
if ((elapsed > TIPC_BC_RETR_LIMIT) ||
less(*to, l->prev_from) || more(*from, l->prev_to)) {
l->prev_from = *from;
l->prev_to = *to;
l->prev_retr = jiffies;
return true;
}
/* Inside range of previous retransmit */
if (!less(*from, l->prev_from) && !more(*to, l->prev_to))
return false;
/* Fully or partially outside previous range => exclude overlap */
if (less(*from, l->prev_from)) {
*to = l->prev_from - 1;
l->prev_from = *from;
}
if (more(*to, l->prev_to)) {
*from = l->prev_to + 1;
l->prev_to = *to;
}
l->prev_retr = jiffies;
return true;
}
/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
*/
int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
struct sk_buff_head *xmitq)
{
struct tipc_link *snd_l = l->bc_sndlink;
u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
u16 from = msg_bcast_ack(hdr) + 1;
u16 to = from + msg_bc_gap(hdr) - 1;
@ -1613,14 +1655,14 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
if (!l->bc_peer_is_up)
return rc;
l->stats.recv_nacks++;
/* Ignore if peers_snd_nxt goes beyond receive window */
if (more(peers_snd_nxt, l->rcv_nxt + l->window))
return rc;
if (!less(to, from)) {
rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
l->stats.recv_nacks++;
}
if (link_bc_retr_eval(snd_l, &from, &to))
rc = tipc_link_retrans(snd_l, from, to, xmitq);
l->snd_nxt = peers_snd_nxt;
if (link_bc_rcv_gap(l))