RDMA/device: Fix a race between mad_client and cm_client init
[ Upstream commit 7a8bccd8b29c321ac181369b42b04fecf05f98e2 ]
The mad_client will be initialized in enable_device_and_get(), while the
devices_rwsem will be downgraded to a read semaphore. There is a window
that leads to the failed initialization for cm_client, since it can not
get matched mad port from ib_mad_port_list, and the matched mad port will
be added to the list after that.
mad_client | cm_client
------------------|--------------------------------------------------------
ib_register_device|
enable_device_and_get
down_write(&devices_rwsem)
xa_set_mark(&devices, DEVICE_REGISTERED)
downgrade_write(&devices_rwsem)
|
|ib_cm_init
|ib_register_client(&cm_client)
|down_read(&devices_rwsem)
|xa_for_each_marked (&devices, DEVICE_REGISTERED)
|add_client_context
|cm_add_one
|ib_register_mad_agent
|ib_get_mad_port
|__ib_get_mad_port
|list_for_each_entry(entry, &ib_mad_port_list, port_list)
|return NULL
|up_read(&devices_rwsem)
|
add_client_context|
ib_mad_init_device|
ib_mad_port_open |
list_add_tail(&port_priv->port_list, &ib_mad_port_list)
up_read(&devices_rwsem)
|
Fix it by using down_write(&devices_rwsem) in ib_register_client().
Fixes: d0899892ed
("RDMA/device: Provide APIs from the core code to help unregistration")
Link: https://lore.kernel.org/r/20240203035313.98991-1-lishifeng@sangfor.com.cn
Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Shifeng Li <lishifeng@sangfor.com.cn>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
522e5e4770
commit
034977c33b
|
@ -1730,7 +1730,7 @@ static int assign_client_id(struct ib_client *client)
|
|||
{
|
||||
int ret;
|
||||
|
||||
down_write(&clients_rwsem);
|
||||
lockdep_assert_held(&clients_rwsem);
|
||||
/*
|
||||
* The add/remove callbacks must be called in FIFO/LIFO order. To
|
||||
* achieve this we assign client_ids so they are sorted in
|
||||
|
@ -1739,14 +1739,11 @@ static int assign_client_id(struct ib_client *client)
|
|||
client->client_id = highest_client_id;
|
||||
ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
highest_client_id++;
|
||||
xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
|
||||
|
||||
out:
|
||||
up_write(&clients_rwsem);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void remove_client_id(struct ib_client *client)
|
||||
|
@ -1776,25 +1773,35 @@ int ib_register_client(struct ib_client *client)
|
|||
{
|
||||
struct ib_device *device;
|
||||
unsigned long index;
|
||||
bool need_unreg = false;
|
||||
int ret;
|
||||
|
||||
refcount_set(&client->uses, 1);
|
||||
init_completion(&client->uses_zero);
|
||||
|
||||
/*
|
||||
* The devices_rwsem is held in write mode to ensure that a racing
|
||||
* ib_register_device() sees a consisent view of clients and devices.
|
||||
*/
|
||||
down_write(&devices_rwsem);
|
||||
down_write(&clients_rwsem);
|
||||
ret = assign_client_id(client);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
down_read(&devices_rwsem);
|
||||
need_unreg = true;
|
||||
xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
|
||||
ret = add_client_context(device, client);
|
||||
if (ret) {
|
||||
up_read(&devices_rwsem);
|
||||
ib_unregister_client(client);
|
||||
return ret;
|
||||
}
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
up_read(&devices_rwsem);
|
||||
return 0;
|
||||
ret = 0;
|
||||
out:
|
||||
up_write(&clients_rwsem);
|
||||
up_write(&devices_rwsem);
|
||||
if (need_unreg && ret)
|
||||
ib_unregister_client(client);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(ib_register_client);
|
||||
|
||||
|
|
Loading…
Reference in New Issue