net: netem: use a list in addition to rbtree
When testing high-bandwidth TCP streams with large windows, high latency, and low jitter, netem consumes a lot of CPU cycles doing rbtree rebalancing. This patch uses a linear list/queue in addition to the rbtree: if an incoming packet is past the tail of the linear queue, it is added there, otherwise it is inserted into the rbtree. Without this patch, perf shows netem_enqueue, netem_dequeue, and rb_* functions among the top offenders. With this patch, only netem_enqueue is noticeable if jitter is low/absent. Suggested-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Peter Oskolkov <posk@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
932c441707
commit
d66280b12b
|
@ -77,6 +77,10 @@ struct netem_sched_data {
|
||||||
/* internal t(ime)fifo qdisc uses t_root and sch->limit */
|
/* internal t(ime)fifo qdisc uses t_root and sch->limit */
|
||||||
struct rb_root t_root;
|
struct rb_root t_root;
|
||||||
|
|
||||||
|
/* a linear queue; reduces rbtree rebalancing when jitter is low */
|
||||||
|
struct sk_buff *t_head;
|
||||||
|
struct sk_buff *t_tail;
|
||||||
|
|
||||||
/* optional qdisc for classful handling (NULL at netem init) */
|
/* optional qdisc for classful handling (NULL at netem init) */
|
||||||
struct Qdisc *qdisc;
|
struct Qdisc *qdisc;
|
||||||
|
|
||||||
|
@ -369,26 +373,39 @@ static void tfifo_reset(struct Qdisc *sch)
|
||||||
rb_erase(&skb->rbnode, &q->t_root);
|
rb_erase(&skb->rbnode, &q->t_root);
|
||||||
rtnl_kfree_skbs(skb, skb);
|
rtnl_kfree_skbs(skb, skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rtnl_kfree_skbs(q->t_head, q->t_tail);
|
||||||
|
q->t_head = NULL;
|
||||||
|
q->t_tail = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
|
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
|
||||||
{
|
{
|
||||||
struct netem_sched_data *q = qdisc_priv(sch);
|
struct netem_sched_data *q = qdisc_priv(sch);
|
||||||
u64 tnext = netem_skb_cb(nskb)->time_to_send;
|
u64 tnext = netem_skb_cb(nskb)->time_to_send;
|
||||||
struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
|
|
||||||
|
|
||||||
while (*p) {
|
if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
|
||||||
struct sk_buff *skb;
|
if (q->t_tail)
|
||||||
|
q->t_tail->next = nskb;
|
||||||
parent = *p;
|
|
||||||
skb = rb_to_skb(parent);
|
|
||||||
if (tnext >= netem_skb_cb(skb)->time_to_send)
|
|
||||||
p = &parent->rb_right;
|
|
||||||
else
|
else
|
||||||
p = &parent->rb_left;
|
q->t_head = nskb;
|
||||||
|
q->t_tail = nskb;
|
||||||
|
} else {
|
||||||
|
struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
|
||||||
|
|
||||||
|
while (*p) {
|
||||||
|
struct sk_buff *skb;
|
||||||
|
|
||||||
|
parent = *p;
|
||||||
|
skb = rb_to_skb(parent);
|
||||||
|
if (tnext >= netem_skb_cb(skb)->time_to_send)
|
||||||
|
p = &parent->rb_right;
|
||||||
|
else
|
||||||
|
p = &parent->rb_left;
|
||||||
|
}
|
||||||
|
rb_link_node(&nskb->rbnode, parent, p);
|
||||||
|
rb_insert_color(&nskb->rbnode, &q->t_root);
|
||||||
}
|
}
|
||||||
rb_link_node(&nskb->rbnode, parent, p);
|
|
||||||
rb_insert_color(&nskb->rbnode, &q->t_root);
|
|
||||||
sch->q.qlen++;
|
sch->q.qlen++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -530,9 +547,16 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
|
||||||
t_skb = skb_rb_last(&q->t_root);
|
t_skb = skb_rb_last(&q->t_root);
|
||||||
t_last = netem_skb_cb(t_skb);
|
t_last = netem_skb_cb(t_skb);
|
||||||
if (!last ||
|
if (!last ||
|
||||||
t_last->time_to_send > last->time_to_send) {
|
t_last->time_to_send > last->time_to_send)
|
||||||
|
last = t_last;
|
||||||
|
}
|
||||||
|
if (q->t_tail) {
|
||||||
|
struct netem_skb_cb *t_last =
|
||||||
|
netem_skb_cb(q->t_tail);
|
||||||
|
|
||||||
|
if (!last ||
|
||||||
|
t_last->time_to_send > last->time_to_send)
|
||||||
last = t_last;
|
last = t_last;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (last) {
|
if (last) {
|
||||||
|
@ -611,11 +635,38 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
|
||||||
q->slot.bytes_left = q->slot_config.max_bytes;
|
q->slot.bytes_left = q->slot_config.max_bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct sk_buff *netem_peek(struct netem_sched_data *q)
|
||||||
|
{
|
||||||
|
struct sk_buff *skb = skb_rb_first(&q->t_root);
|
||||||
|
u64 t1, t2;
|
||||||
|
|
||||||
|
if (!skb)
|
||||||
|
return q->t_head;
|
||||||
|
if (!q->t_head)
|
||||||
|
return skb;
|
||||||
|
|
||||||
|
t1 = netem_skb_cb(skb)->time_to_send;
|
||||||
|
t2 = netem_skb_cb(q->t_head)->time_to_send;
|
||||||
|
if (t1 < t2)
|
||||||
|
return skb;
|
||||||
|
return q->t_head;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
if (skb == q->t_head) {
|
||||||
|
q->t_head = skb->next;
|
||||||
|
if (!q->t_head)
|
||||||
|
q->t_tail = NULL;
|
||||||
|
} else {
|
||||||
|
rb_erase(&skb->rbnode, &q->t_root);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
|
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
|
||||||
{
|
{
|
||||||
struct netem_sched_data *q = qdisc_priv(sch);
|
struct netem_sched_data *q = qdisc_priv(sch);
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
struct rb_node *p;
|
|
||||||
|
|
||||||
tfifo_dequeue:
|
tfifo_dequeue:
|
||||||
skb = __qdisc_dequeue_head(&sch->q);
|
skb = __qdisc_dequeue_head(&sch->q);
|
||||||
|
@ -625,20 +676,18 @@ deliver:
|
||||||
qdisc_bstats_update(sch, skb);
|
qdisc_bstats_update(sch, skb);
|
||||||
return skb;
|
return skb;
|
||||||
}
|
}
|
||||||
p = rb_first(&q->t_root);
|
skb = netem_peek(q);
|
||||||
if (p) {
|
if (skb) {
|
||||||
u64 time_to_send;
|
u64 time_to_send;
|
||||||
u64 now = ktime_get_ns();
|
u64 now = ktime_get_ns();
|
||||||
|
|
||||||
skb = rb_to_skb(p);
|
|
||||||
|
|
||||||
/* if more time remaining? */
|
/* if more time remaining? */
|
||||||
time_to_send = netem_skb_cb(skb)->time_to_send;
|
time_to_send = netem_skb_cb(skb)->time_to_send;
|
||||||
if (q->slot.slot_next && q->slot.slot_next < time_to_send)
|
if (q->slot.slot_next && q->slot.slot_next < time_to_send)
|
||||||
get_slot_next(q, now);
|
get_slot_next(q, now);
|
||||||
|
|
||||||
if (time_to_send <= now && q->slot.slot_next <= now) {
|
if (time_to_send <= now && q->slot.slot_next <= now) {
|
||||||
rb_erase(p, &q->t_root);
|
netem_erase_head(q, skb);
|
||||||
sch->q.qlen--;
|
sch->q.qlen--;
|
||||||
qdisc_qstats_backlog_dec(sch, skb);
|
qdisc_qstats_backlog_dec(sch, skb);
|
||||||
skb->next = NULL;
|
skb->next = NULL;
|
||||||
|
|
Loading…
Reference in New Issue