RDMA/device: Fix a race between mad_client and cm_client init
[ Upstream commit 7a8bccd8b29c321ac181369b42b04fecf05f98e2 ] The mad_client will be initialized in enable_device_and_get(), while the devices_rwsem will be downgraded to a read semaphore. There is a window that leads to the failed initialization for cm_client, since it can not get matched mad port from ib_mad_port_list, and the matched mad port will be added to the list after that. mad_client | cm_client ------------------|-------------------------------------------------------- ib_register_device| enable_device_and_get down_write(&devices_rwsem) xa_set_mark(&devices, DEVICE_REGISTERED) downgrade_write(&devices_rwsem) | |ib_cm_init |ib_register_client(&cm_client) |down_read(&devices_rwsem) |xa_for_each_marked (&devices, DEVICE_REGISTERED) |add_client_context |cm_add_one |ib_register_mad_agent |ib_get_mad_port |__ib_get_mad_port |list_for_each_entry(entry, &ib_mad_port_list, port_list) |return NULL |up_read(&devices_rwsem) | add_client_context| ib_mad_init_device| ib_mad_port_open | list_add_tail(&port_priv->port_list, &ib_mad_port_list) up_read(&devices_rwsem) | Fix it by using down_write(&devices_rwsem) in ib_register_client(). Fixes: d0899892edd0 ("RDMA/device: Provide APIs from the core code to help unregistration") Link: https://lore.kernel.org/r/20240203035313.98991-1-lishifeng@sangfor.com.cn Suggested-by: Jason Gunthorpe <jgg@ziepe.ca> Signed-off-by: Shifeng Li <lishifeng@sangfor.com.cn> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
51596dfaaa
commit
90f1fbe86a
1 changed files with 22 additions and 15 deletions
|
@ -1686,7 +1686,7 @@ static int assign_client_id(struct ib_client *client)
|
|||
{
|
||||
int ret;
|
||||
|
||||
down_write(&clients_rwsem);
|
||||
lockdep_assert_held(&clients_rwsem);
|
||||
/*
|
||||
* The add/remove callbacks must be called in FIFO/LIFO order. To
|
||||
* achieve this we assign client_ids so they are sorted in
|
||||
|
@ -1695,14 +1695,11 @@ static int assign_client_id(struct ib_client *client)
|
|||
client->client_id = highest_client_id;
|
||||
ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
highest_client_id++;
|
||||
xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
|
||||
|
||||
out:
|
||||
up_write(&clients_rwsem);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void remove_client_id(struct ib_client *client)
|
||||
|
@ -1732,25 +1729,35 @@ int ib_register_client(struct ib_client *client)
|
|||
{
|
||||
struct ib_device *device;
|
||||
unsigned long index;
|
||||
bool need_unreg = false;
|
||||
int ret;
|
||||
|
||||
refcount_set(&client->uses, 1);
|
||||
init_completion(&client->uses_zero);
|
||||
|
||||
/*
|
||||
* The devices_rwsem is held in write mode to ensure that a racing
|
||||
* ib_register_device() sees a consisent view of clients and devices.
|
||||
*/
|
||||
down_write(&devices_rwsem);
|
||||
down_write(&clients_rwsem);
|
||||
ret = assign_client_id(client);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
down_read(&devices_rwsem);
|
||||
need_unreg = true;
|
||||
xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
|
||||
ret = add_client_context(device, client);
|
||||
if (ret) {
|
||||
up_read(&devices_rwsem);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
up_write(&clients_rwsem);
|
||||
up_write(&devices_rwsem);
|
||||
if (need_unreg && ret)
|
||||
ib_unregister_client(client);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
up_read(&devices_rwsem);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ib_register_client);
|
||||
|
||||
|
|
Loading…
Reference in a new issue