2017-08-03 19:28:11 +08:00
|
|
|
#include <linux/rtnetlink.h>
|
|
|
|
#include <linux/notifier.h>
|
|
|
|
#include <linux/rcupdate.h>
|
|
|
|
#include <linux/kernel.h>
|
2017-09-01 17:15:17 +08:00
|
|
|
#include <linux/module.h>
|
2017-08-03 19:28:11 +08:00
|
|
|
#include <linux/init.h>
|
|
|
|
#include <net/net_namespace.h>
|
|
|
|
#include <net/fib_notifier.h>
|
|
|
|
|
|
|
|
static ATOMIC_NOTIFIER_HEAD(fib_chain);
|
|
|
|
|
|
|
|
int call_fib_notifier(struct notifier_block *nb, struct net *net,
|
|
|
|
enum fib_event_type event_type,
|
|
|
|
struct fib_notifier_info *info)
|
|
|
|
{
|
2018-03-28 09:21:55 +08:00
|
|
|
int err;
|
|
|
|
|
2017-08-03 19:28:11 +08:00
|
|
|
info->net = net;
|
2018-03-28 09:21:55 +08:00
|
|
|
err = nb->notifier_call(nb, event_type, info);
|
|
|
|
return notifier_to_errno(err);
|
2017-08-03 19:28:11 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(call_fib_notifier);
|
|
|
|
|
|
|
|
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
|
|
|
|
struct fib_notifier_info *info)
|
|
|
|
{
|
2018-03-28 09:21:55 +08:00
|
|
|
int err;
|
|
|
|
|
2017-08-03 19:28:11 +08:00
|
|
|
info->net = net;
|
2018-03-28 09:21:55 +08:00
|
|
|
err = atomic_notifier_call_chain(&fib_chain, event_type, info);
|
|
|
|
return notifier_to_errno(err);
|
2017-08-03 19:28:11 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(call_fib_notifiers);
|
|
|
|
|
|
|
|
static unsigned int fib_seq_sum(void)
|
|
|
|
{
|
|
|
|
struct fib_notifier_ops *ops;
|
|
|
|
unsigned int fib_seq = 0;
|
|
|
|
struct net *net;
|
|
|
|
|
|
|
|
rtnl_lock();
|
net: Introduce net_rwsem to protect net_namespace_list
rtnl_lock() is used everywhere, and contention is very high.
When someone wants to iterate over alive net namespaces,
he/she has no a possibility to do that without exclusive lock.
But the exclusive rtnl_lock() in such places is overkill,
and it just increases the contention. Yes, there is already
for_each_net_rcu() in kernel, but it requires rcu_read_lock(),
and this can't be sleepable. Also, sometimes it may be need
really prevent net_namespace_list growth, so for_each_net_rcu()
is not fit there.
This patch introduces new rw_semaphore, which will be used
instead of rtnl_mutex to protect net_namespace_list. It is
sleepable and allows not-exclusive iterations over net
namespaces list. It allows to stop using rtnl_lock()
in several places (what is made in next patches) and makes
less the time, we keep rtnl_mutex. Here we just add new lock,
while the explanation of we can remove rtnl_lock() there are
in next patches.
Fine grained locks generally are better, then one big lock,
so let's do that with net_namespace_list, while the situation
allows that.
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-30 00:20:32 +08:00
|
|
|
down_read(&net_rwsem);
|
2017-08-03 19:28:11 +08:00
|
|
|
for_each_net(net) {
|
2017-11-14 21:51:56 +08:00
|
|
|
rcu_read_lock();
|
|
|
|
list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
|
2017-09-01 17:15:17 +08:00
|
|
|
if (!try_module_get(ops->owner))
|
|
|
|
continue;
|
2017-08-03 19:28:11 +08:00
|
|
|
fib_seq += ops->fib_seq_read(net);
|
2017-09-01 17:15:17 +08:00
|
|
|
module_put(ops->owner);
|
|
|
|
}
|
2017-11-14 21:51:56 +08:00
|
|
|
rcu_read_unlock();
|
2017-08-03 19:28:11 +08:00
|
|
|
}
|
net: Introduce net_rwsem to protect net_namespace_list
rtnl_lock() is used everywhere, and contention is very high.
When someone wants to iterate over alive net namespaces,
he/she has no a possibility to do that without exclusive lock.
But the exclusive rtnl_lock() in such places is overkill,
and it just increases the contention. Yes, there is already
for_each_net_rcu() in kernel, but it requires rcu_read_lock(),
and this can't be sleepable. Also, sometimes it may be need
really prevent net_namespace_list growth, so for_each_net_rcu()
is not fit there.
This patch introduces new rw_semaphore, which will be used
instead of rtnl_mutex to protect net_namespace_list. It is
sleepable and allows not-exclusive iterations over net
namespaces list. It allows to stop using rtnl_lock()
in several places (what is made in next patches) and makes
less the time, we keep rtnl_mutex. Here we just add new lock,
while the explanation of we can remove rtnl_lock() there are
in next patches.
Fine grained locks generally are better, then one big lock,
so let's do that with net_namespace_list, while the situation
allows that.
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-30 00:20:32 +08:00
|
|
|
up_read(&net_rwsem);
|
2017-08-03 19:28:11 +08:00
|
|
|
rtnl_unlock();
|
|
|
|
|
|
|
|
return fib_seq;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fib_net_dump(struct net *net, struct notifier_block *nb)
|
|
|
|
{
|
|
|
|
struct fib_notifier_ops *ops;
|
|
|
|
|
|
|
|
list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
|
2017-09-01 17:15:17 +08:00
|
|
|
int err;
|
2017-08-03 19:28:11 +08:00
|
|
|
|
2017-09-01 17:15:17 +08:00
|
|
|
if (!try_module_get(ops->owner))
|
|
|
|
continue;
|
|
|
|
err = ops->fib_dump(net, nb);
|
|
|
|
module_put(ops->owner);
|
2017-08-03 19:28:11 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool fib_dump_is_consistent(struct notifier_block *nb,
|
|
|
|
void (*cb)(struct notifier_block *nb),
|
|
|
|
unsigned int fib_seq)
|
|
|
|
{
|
|
|
|
atomic_notifier_chain_register(&fib_chain, nb);
|
|
|
|
if (fib_seq == fib_seq_sum())
|
|
|
|
return true;
|
|
|
|
atomic_notifier_chain_unregister(&fib_chain, nb);
|
|
|
|
if (cb)
|
|
|
|
cb(nb);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define FIB_DUMP_MAX_RETRIES 5
|
|
|
|
int register_fib_notifier(struct notifier_block *nb,
|
|
|
|
void (*cb)(struct notifier_block *nb))
|
|
|
|
{
|
|
|
|
int retries = 0;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
do {
|
|
|
|
unsigned int fib_seq = fib_seq_sum();
|
|
|
|
struct net *net;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
for_each_net_rcu(net) {
|
|
|
|
err = fib_net_dump(net, nb);
|
|
|
|
if (err)
|
|
|
|
goto err_fib_net_dump;
|
|
|
|
}
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
if (fib_dump_is_consistent(nb, cb, fib_seq))
|
|
|
|
return 0;
|
|
|
|
} while (++retries < FIB_DUMP_MAX_RETRIES);
|
|
|
|
|
|
|
|
return -EBUSY;
|
|
|
|
|
|
|
|
err_fib_net_dump:
|
|
|
|
rcu_read_unlock();
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(register_fib_notifier);
|
|
|
|
|
|
|
|
int unregister_fib_notifier(struct notifier_block *nb)
|
|
|
|
{
|
|
|
|
return atomic_notifier_chain_unregister(&fib_chain, nb);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(unregister_fib_notifier);
|
|
|
|
|
|
|
|
static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
|
|
|
|
struct net *net)
|
|
|
|
{
|
|
|
|
struct fib_notifier_ops *o;
|
|
|
|
|
|
|
|
list_for_each_entry(o, &net->fib_notifier_ops, list)
|
|
|
|
if (ops->family == o->family)
|
|
|
|
return -EEXIST;
|
|
|
|
list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct fib_notifier_ops *
|
|
|
|
fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net)
|
|
|
|
{
|
|
|
|
struct fib_notifier_ops *ops;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
|
|
|
|
if (!ops)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
err = __fib_notifier_ops_register(ops, net);
|
|
|
|
if (err)
|
|
|
|
goto err_register;
|
|
|
|
|
|
|
|
return ops;
|
|
|
|
|
|
|
|
err_register:
|
|
|
|
kfree(ops);
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(fib_notifier_ops_register);
|
|
|
|
|
|
|
|
void fib_notifier_ops_unregister(struct fib_notifier_ops *ops)
|
|
|
|
{
|
|
|
|
list_del_rcu(&ops->list);
|
|
|
|
kfree_rcu(ops, rcu);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(fib_notifier_ops_unregister);
|
|
|
|
|
|
|
|
static int __net_init fib_notifier_net_init(struct net *net)
|
|
|
|
{
|
|
|
|
INIT_LIST_HEAD(&net->fib_notifier_ops);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-11-13 03:29:33 +08:00
|
|
|
static void __net_exit fib_notifier_net_exit(struct net *net)
|
|
|
|
{
|
|
|
|
WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops));
|
|
|
|
}
|
|
|
|
|
2017-08-03 19:28:11 +08:00
|
|
|
static struct pernet_operations fib_notifier_net_ops = {
|
|
|
|
.init = fib_notifier_net_init,
|
2017-11-13 03:29:33 +08:00
|
|
|
.exit = fib_notifier_net_exit,
|
2017-08-03 19:28:11 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static int __init fib_notifier_init(void)
|
|
|
|
{
|
|
|
|
return register_pernet_subsys(&fib_notifier_net_ops);
|
|
|
|
}
|
|
|
|
|
|
|
|
subsys_initcall(fib_notifier_init);
|