IB/core: Avoid deadlock during netlink message handling
When rdmacm module is not loaded, and when netlink message is received to
get char device info, it results into a deadlock due to recursive locking
of rdma_nl_mutex with the below call sequence.
[..]
rdma_nl_rcv()
mutex_lock()
[..]
rdma_nl_rcv_msg()
ib_get_client_nl_info()
request_module()
iw_cm_init()
rdma_nl_register()
mutex_lock(); <- Deadlock, acquiring mutex again
Due to above call sequence, following call trace and deadlock is observed.
kernel: __mutex_lock+0x35e/0x860
kernel: ? __mutex_lock+0x129/0x860
kernel: ? rdma_nl_register+0x1a/0x90 [ib_core]
kernel: rdma_nl_register+0x1a/0x90 [ib_core]
kernel: ? 0xffffffffc029b000
kernel: iw_cm_init+0x34/0x1000 [iw_cm]
kernel: do_one_initcall+0x67/0x2d4
kernel: ? kmem_cache_alloc_trace+0x1ec/0x2a0
kernel: do_init_module+0x5a/0x223
kernel: load_module+0x1998/0x1e10
kernel: ? __symbol_put+0x60/0x60
kernel: __do_sys_finit_module+0x94/0xe0
kernel: do_syscall_64+0x5a/0x270
kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe
process stack trace:
[<0>] __request_module+0x1c9/0x460
[<0>] ib_get_client_nl_info+0x5e/0xb0 [ib_core]
[<0>] nldev_get_chardev+0x1ac/0x320 [ib_core]
[<0>] rdma_nl_rcv_msg+0xeb/0x1d0 [ib_core]
[<0>] rdma_nl_rcv+0xcd/0x120 [ib_core]
[<0>] netlink_unicast+0x179/0x220
[<0>] netlink_sendmsg+0x2f6/0x3f0
[<0>] sock_sendmsg+0x30/0x40
[<0>] ___sys_sendmsg+0x27a/0x290
[<0>] __sys_sendmsg+0x58/0xa0
[<0>] do_syscall_64+0x5a/0x270
[<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
To overcome this deadlock and to allow multiple netlink messages to
progress in parallel, following scheme is implemented.
1. Split the lock protecting the cb_table into a per-index lock, and make
it a rwlock. This lock is used to ensure no callbacks are running after
unregistration returns. Since a module will not be registered once it
is already running callbacks, this avoids the deadlock.
2. Use smp_store_release() to update the cb_table during registration so
that no lock is required. This avoids lockdep problems with thinking
all the rwsems are the same lock class.
Fixes: 0e2d00eb6f
("RDMA: Add NLDEV_GET_CHARDEV to allow char dev discovery and autoload")
Link: https://lore.kernel.org/r/20191015080733.18625-1-leon@kernel.org
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
parent
a15542bb72
commit
549af00833
|
@ -199,6 +199,7 @@ void ib_mad_cleanup(void);
|
||||||
int ib_sa_init(void);
|
int ib_sa_init(void);
|
||||||
void ib_sa_cleanup(void);
|
void ib_sa_cleanup(void);
|
||||||
|
|
||||||
|
void rdma_nl_init(void);
|
||||||
void rdma_nl_exit(void);
|
void rdma_nl_exit(void);
|
||||||
|
|
||||||
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
|
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
|
||||||
|
|
|
@ -2716,6 +2716,8 @@ static int __init ib_core_init(void)
|
||||||
goto err_comp_unbound;
|
goto err_comp_unbound;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rdma_nl_init();
|
||||||
|
|
||||||
ret = addr_init();
|
ret = addr_init();
|
||||||
if (ret) {
|
if (ret) {
|
||||||
pr_warn("Could't init IB address resolution\n");
|
pr_warn("Could't init IB address resolution\n");
|
||||||
|
|
|
@ -42,9 +42,12 @@
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include "core_priv.h"
|
#include "core_priv.h"
|
||||||
|
|
||||||
static DEFINE_MUTEX(rdma_nl_mutex);
|
|
||||||
static struct {
|
static struct {
|
||||||
const struct rdma_nl_cbs *cb_table;
|
const struct rdma_nl_cbs *cb_table;
|
||||||
|
/* Synchronizes between ongoing netlink commands and netlink client
|
||||||
|
* unregistration.
|
||||||
|
*/
|
||||||
|
struct rw_semaphore sem;
|
||||||
} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
|
} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
|
||||||
|
|
||||||
bool rdma_nl_chk_listeners(unsigned int group)
|
bool rdma_nl_chk_listeners(unsigned int group)
|
||||||
|
@ -75,70 +78,53 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op)
|
||||||
return (op < max_num_ops[type]) ? true : false;
|
return (op < max_num_ops[type]) ? true : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static const struct rdma_nl_cbs *
|
||||||
is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op)
|
get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op)
|
||||||
{
|
{
|
||||||
const struct rdma_nl_cbs *cb_table;
|
const struct rdma_nl_cbs *cb_table;
|
||||||
|
|
||||||
if (!is_nl_msg_valid(type, op))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Currently only NLDEV client is supporting netlink commands in
|
* Currently only NLDEV client is supporting netlink commands in
|
||||||
* non init_net net namespace.
|
* non init_net net namespace.
|
||||||
*/
|
*/
|
||||||
if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
|
if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
|
||||||
return false;
|
return NULL;
|
||||||
|
|
||||||
|
cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
|
||||||
|
if (!cb_table) {
|
||||||
|
/*
|
||||||
|
* Didn't get valid reference of the table, attempt module
|
||||||
|
* load once.
|
||||||
|
*/
|
||||||
|
up_read(&rdma_nl_types[type].sem);
|
||||||
|
|
||||||
if (!rdma_nl_types[type].cb_table) {
|
|
||||||
mutex_unlock(&rdma_nl_mutex);
|
|
||||||
request_module("rdma-netlink-subsys-%d", type);
|
request_module("rdma-netlink-subsys-%d", type);
|
||||||
mutex_lock(&rdma_nl_mutex);
|
|
||||||
|
down_read(&rdma_nl_types[type].sem);
|
||||||
|
cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
|
||||||
}
|
}
|
||||||
|
|
||||||
cb_table = rdma_nl_types[type].cb_table;
|
|
||||||
|
|
||||||
if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
|
if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
|
||||||
return false;
|
return NULL;
|
||||||
return true;
|
return cb_table;
|
||||||
}
|
}
|
||||||
|
|
||||||
void rdma_nl_register(unsigned int index,
|
void rdma_nl_register(unsigned int index,
|
||||||
const struct rdma_nl_cbs cb_table[])
|
const struct rdma_nl_cbs cb_table[])
|
||||||
{
|
{
|
||||||
mutex_lock(&rdma_nl_mutex);
|
if (WARN_ON(!is_nl_msg_valid(index, 0)) ||
|
||||||
if (!is_nl_msg_valid(index, 0)) {
|
WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table)))
|
||||||
/*
|
|
||||||
* All clients are not interesting in success/failure of
|
|
||||||
* this call. They want to see the print to error log and
|
|
||||||
* continue their initialization. Print warning for them,
|
|
||||||
* because it is programmer's error to be here.
|
|
||||||
*/
|
|
||||||
mutex_unlock(&rdma_nl_mutex);
|
|
||||||
WARN(true,
|
|
||||||
"The not-valid %u index was supplied to RDMA netlink\n",
|
|
||||||
index);
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
if (rdma_nl_types[index].cb_table) {
|
/* Pairs with the READ_ONCE in is_nl_valid() */
|
||||||
mutex_unlock(&rdma_nl_mutex);
|
smp_store_release(&rdma_nl_types[index].cb_table, cb_table);
|
||||||
WARN(true,
|
|
||||||
"The %u index is already registered in RDMA netlink\n",
|
|
||||||
index);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
rdma_nl_types[index].cb_table = cb_table;
|
|
||||||
mutex_unlock(&rdma_nl_mutex);
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(rdma_nl_register);
|
EXPORT_SYMBOL(rdma_nl_register);
|
||||||
|
|
||||||
void rdma_nl_unregister(unsigned int index)
|
void rdma_nl_unregister(unsigned int index)
|
||||||
{
|
{
|
||||||
mutex_lock(&rdma_nl_mutex);
|
down_write(&rdma_nl_types[index].sem);
|
||||||
rdma_nl_types[index].cb_table = NULL;
|
rdma_nl_types[index].cb_table = NULL;
|
||||||
mutex_unlock(&rdma_nl_mutex);
|
up_write(&rdma_nl_types[index].sem);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(rdma_nl_unregister);
|
EXPORT_SYMBOL(rdma_nl_unregister);
|
||||||
|
|
||||||
|
@ -170,15 +156,21 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||||
unsigned int index = RDMA_NL_GET_CLIENT(type);
|
unsigned int index = RDMA_NL_GET_CLIENT(type);
|
||||||
unsigned int op = RDMA_NL_GET_OP(type);
|
unsigned int op = RDMA_NL_GET_OP(type);
|
||||||
const struct rdma_nl_cbs *cb_table;
|
const struct rdma_nl_cbs *cb_table;
|
||||||
|
int err = -EINVAL;
|
||||||
|
|
||||||
if (!is_nl_valid(skb, index, op))
|
if (!is_nl_msg_valid(index, op))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
cb_table = rdma_nl_types[index].cb_table;
|
down_read(&rdma_nl_types[index].sem);
|
||||||
|
cb_table = get_cb_table(skb, index, op);
|
||||||
|
if (!cb_table)
|
||||||
|
goto done;
|
||||||
|
|
||||||
if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
|
if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
|
||||||
!netlink_capable(skb, CAP_NET_ADMIN))
|
!netlink_capable(skb, CAP_NET_ADMIN)) {
|
||||||
return -EPERM;
|
err = -EPERM;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't
|
* LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't
|
||||||
|
@ -186,8 +178,8 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||||
*/
|
*/
|
||||||
if (index == RDMA_NL_LS) {
|
if (index == RDMA_NL_LS) {
|
||||||
if (cb_table[op].doit)
|
if (cb_table[op].doit)
|
||||||
return cb_table[op].doit(skb, nlh, extack);
|
err = cb_table[op].doit(skb, nlh, extack);
|
||||||
return -EINVAL;
|
goto done;
|
||||||
}
|
}
|
||||||
/* FIXME: Convert IWCM to properly handle doit callbacks */
|
/* FIXME: Convert IWCM to properly handle doit callbacks */
|
||||||
if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
|
if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
|
||||||
|
@ -195,14 +187,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||||
.dump = cb_table[op].dump,
|
.dump = cb_table[op].dump,
|
||||||
};
|
};
|
||||||
if (c.dump)
|
if (c.dump)
|
||||||
return netlink_dump_start(skb->sk, skb, nlh, &c);
|
err = netlink_dump_start(skb->sk, skb, nlh, &c);
|
||||||
return -EINVAL;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cb_table[op].doit)
|
if (cb_table[op].doit)
|
||||||
return cb_table[op].doit(skb, nlh, extack);
|
err = cb_table[op].doit(skb, nlh, extack);
|
||||||
|
done:
|
||||||
return 0;
|
up_read(&rdma_nl_types[index].sem);
|
||||||
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -263,9 +256,7 @@ skip:
|
||||||
|
|
||||||
static void rdma_nl_rcv(struct sk_buff *skb)
|
static void rdma_nl_rcv(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
mutex_lock(&rdma_nl_mutex);
|
|
||||||
rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
|
rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
|
||||||
mutex_unlock(&rdma_nl_mutex);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
|
int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
|
||||||
|
@ -297,6 +288,14 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(rdma_nl_multicast);
|
EXPORT_SYMBOL(rdma_nl_multicast);
|
||||||
|
|
||||||
|
void rdma_nl_init(void)
|
||||||
|
{
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
|
||||||
|
init_rwsem(&rdma_nl_types[idx].sem);
|
||||||
|
}
|
||||||
|
|
||||||
void rdma_nl_exit(void)
|
void rdma_nl_exit(void)
|
||||||
{
|
{
|
||||||
int idx;
|
int idx;
|
||||||
|
|
Loading…
Reference in New Issue