Merge branch 'net_sched-reduce-the-number-of-qdisc-resets'

Cong Wang says:

====================
net_sched: reduce the number of qdisc resets

This patchset aims to reduce the number of qdisc resets during
qdisc tear down. Patch 1~3 are preparation for their following
patches, especially patch 2 and patch 3 add a few tracepoints
so that we can observe the whole lifetime of qdisc's. Patch 4
and patch 5 are the ones do the actual work. Please find more
details in each patch description.

Vaclav Zindulka tested this patchset and his large ruleset with
over 13k qdiscs defined got from 22s to 520ms.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-05-27 15:05:50 -07:00
commit bdad7f9441
3 changed files with 110 additions and 43 deletions

View File

@ -8,6 +8,8 @@
#include <linux/netdevice.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
#include <linux/pkt_sched.h>
#include <net/sch_generic.h>
TRACE_EVENT(qdisc_dequeue,
@ -44,6 +46,79 @@ TRACE_EVENT(qdisc_dequeue,
__entry->txq_state, __entry->packets, __entry->skbaddr )
);
TRACE_EVENT(qdisc_reset,
TP_PROTO(struct Qdisc *q),
TP_ARGS(q),
TP_STRUCT__entry(
__string( dev, qdisc_dev(q) )
__string( kind, q->ops->id )
__field( u32, parent )
__field( u32, handle )
),
TP_fast_assign(
__assign_str(dev, qdisc_dev(q));
__assign_str(kind, q->ops->id);
__entry->parent = q->parent;
__entry->handle = q->handle;
),
TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev),
__get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent),
TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle))
);
TRACE_EVENT(qdisc_destroy,
TP_PROTO(struct Qdisc *q),
TP_ARGS(q),
TP_STRUCT__entry(
__string( dev, qdisc_dev(q) )
__string( kind, q->ops->id )
__field( u32, parent )
__field( u32, handle )
),
TP_fast_assign(
__assign_str(dev, qdisc_dev(q));
__assign_str(kind, q->ops->id);
__entry->parent = q->parent;
__entry->handle = q->handle;
),
TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev),
__get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent),
TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle))
);
TRACE_EVENT(qdisc_create,
TP_PROTO(const struct Qdisc_ops *ops, struct net_device *dev, u32 parent),
TP_ARGS(ops, dev, parent),
TP_STRUCT__entry(
__string( dev, dev->name )
__string( kind, ops->id )
__field( u32, parent )
),
TP_fast_assign(
__assign_str(dev, dev->name);
__assign_str(kind, ops->id);
__entry->parent = parent;
),
TP_printk("dev=%s kind=%s parent=%x:%x",
__get_str(dev), __get_str(kind),
TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent))
);
#endif /* _TRACE_QDISC_H */
/* This part must be outside protection */

View File

@ -32,6 +32,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <trace/events/qdisc.h>
/*
Short review.
@ -1283,6 +1285,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
}
qdisc_hash_add(sch, false);
trace_qdisc_create(ops, dev, parent);
return sch;

View File

@ -896,8 +896,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
}
sch->parent = parentid;
if (!ops->init || ops->init(sch, NULL, extack) == 0)
if (!ops->init || ops->init(sch, NULL, extack) == 0) {
trace_qdisc_create(ops, dev_queue->dev, parentid);
return sch;
}
qdisc_put(sch);
return NULL;
@ -911,6 +913,8 @@ void qdisc_reset(struct Qdisc *qdisc)
const struct Qdisc_ops *ops = qdisc->ops;
struct sk_buff *skb, *tmp;
trace_qdisc_reset(qdisc);
if (ops->reset)
ops->reset(qdisc);
@ -949,7 +953,6 @@ static void qdisc_free_cb(struct rcu_head *head)
static void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
struct sk_buff *skb, *tmp;
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@ -957,23 +960,16 @@ static void qdisc_destroy(struct Qdisc *qdisc)
qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
gen_kill_estimator(&qdisc->rate_est);
if (ops->reset)
ops->reset(qdisc);
qdisc_reset(qdisc);
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
dev_put(qdisc_dev(qdisc));
skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
__skb_unlink(skb, &qdisc->gso_skb);
kfree_skb_list(skb);
}
skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
__skb_unlink(skb, &qdisc->skb_bad_txq);
kfree_skb_list(skb);
}
trace_qdisc_destroy(qdisc);
call_rcu(&qdisc->rcu, qdisc_free_cb);
}
@ -1132,6 +1128,28 @@ void dev_activate(struct net_device *dev)
}
EXPORT_SYMBOL(dev_activate);
static void qdisc_deactivate(struct Qdisc *qdisc)
{
bool nolock = qdisc->flags & TCQ_F_NOLOCK;
if (qdisc->flags & TCQ_F_BUILTIN)
return;
if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state))
return;
if (nolock)
spin_lock_bh(&qdisc->seqlock);
spin_lock_bh(qdisc_lock(qdisc));
set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
qdisc_reset(qdisc);
spin_unlock_bh(qdisc_lock(qdisc));
if (nolock)
spin_unlock_bh(&qdisc->seqlock);
}
static void dev_deactivate_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_qdisc_default)
@ -1141,21 +1159,8 @@ static void dev_deactivate_queue(struct net_device *dev,
qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
bool nolock = qdisc->flags & TCQ_F_NOLOCK;
if (nolock)
spin_lock_bh(&qdisc->seqlock);
spin_lock_bh(qdisc_lock(qdisc));
if (!(qdisc->flags & TCQ_F_BUILTIN))
set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
qdisc_deactivate(qdisc);
rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
qdisc_reset(qdisc);
spin_unlock_bh(qdisc_lock(qdisc));
if (nolock)
spin_unlock_bh(&qdisc->seqlock);
}
}
@ -1186,16 +1191,6 @@ static bool some_qdisc_is_busy(struct net_device *dev)
return false;
}
static void dev_qdisc_reset(struct net_device *dev,
struct netdev_queue *dev_queue,
void *none)
{
struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
if (qdisc)
qdisc_reset(qdisc);
}
/**
* dev_deactivate_many - deactivate transmissions on several devices
* @head: list of devices to deactivate
@ -1232,12 +1227,6 @@ void dev_deactivate_many(struct list_head *head)
*/
schedule_timeout_uninterruptible(1);
}
/* The new qdisc is assigned at this point so we can safely
* unwind stale skb lists and qdisc statistics
*/
netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
if (dev_ingress_queue(dev))
dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
}
}