kernel_samsung_a53x/drivers/net/ethernet/hisilicon/hns3/hnae3.c
Peiyang Wang 4152c38410 net: hns3: fix kernel crash when uninstalling driver
[ Upstream commit df3dff8ab6d79edc942464999d06fbaedf8cdd18 ]

When the driver is uninstalled and the VF is disabled concurrently, a
kernel crash occurs. The reason is that the two actions call function
pci_disable_sriov(). The num_VFs is checked to determine whether to
release the corresponding resources. During the second calling, num_VFs
is not 0 and the resource release function is called. However, the
corresponding resource has been released during the first invoking.
Therefore, the problem occurs:

[15277.839633][T50670] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020
...
[15278.131557][T50670] Call trace:
[15278.134686][T50670]  klist_put+0x28/0x12c
[15278.138682][T50670]  klist_del+0x14/0x20
[15278.142592][T50670]  device_del+0xbc/0x3c0
[15278.146676][T50670]  pci_remove_bus_device+0x84/0x120
[15278.151714][T50670]  pci_stop_and_remove_bus_device+0x6c/0x80
[15278.157447][T50670]  pci_iov_remove_virtfn+0xb4/0x12c
[15278.162485][T50670]  sriov_disable+0x50/0x11c
[15278.166829][T50670]  pci_disable_sriov+0x24/0x30
[15278.171433][T50670]  hnae3_unregister_ae_algo_prepare+0x60/0x90 [hnae3]
[15278.178039][T50670]  hclge_exit+0x28/0xd0 [hclge]
[15278.182730][T50670]  __se_sys_delete_module.isra.0+0x164/0x230
[15278.188550][T50670]  __arm64_sys_delete_module+0x1c/0x30
[15278.193848][T50670]  invoke_syscall+0x50/0x11c
[15278.198278][T50670]  el0_svc_common.constprop.0+0x158/0x164
[15278.203837][T50670]  do_el0_svc+0x34/0xcc
[15278.207834][T50670]  el0_svc+0x20/0x30

For details, see the following figure.

     rmmod hclge              disable VFs
----------------------------------------------------
hclge_exit()            sriov_numvfs_store()
  ...                     device_lock()
  pci_disable_sriov()     hns3_pci_sriov_configure()
                            pci_disable_sriov()
                              sriov_disable()
    sriov_disable()             if !num_VFs :
      if !num_VFs :               return;
        return;                 sriov_del_vfs()
      sriov_del_vfs()             ...
        ...                       klist_put()
        klist_put()               ...
        ...                     num_VFs = 0;
      num_VFs = 0;        device_unlock();

In this patch, when driver is removing, we get the device_lock()
to protect num_VFs, just like sriov_numvfs_store().

Fixes: 0dd8a25f355b ("net: hns3: disable sriov before unload hclge layer")
Signed-off-by: Peiyang Wang <wangpeiyang1@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20241101091507.3644584-1-shaojijie@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
2024-11-30 02:33:22 +01:00

395 lines
9.5 KiB
C
Executable file

// SPDX-License-Identifier: GPL-2.0+
// Copyright (c) 2016-2017 Hisilicon Limited.
#include <linux/list.h>
#include <linux/spinlock.h>
#include "hnae3.h"
static LIST_HEAD(hnae3_ae_algo_list);
static LIST_HEAD(hnae3_client_list);
static LIST_HEAD(hnae3_ae_dev_list);
void hnae3_unregister_ae_algo_prepare(struct hnae3_ae_algo *ae_algo)
{
const struct pci_device_id *pci_id;
struct hnae3_ae_dev *ae_dev;
if (!ae_algo)
return;
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
if (!hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
continue;
pci_id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
if (!pci_id)
continue;
if (IS_ENABLED(CONFIG_PCI_IOV)) {
device_lock(&ae_dev->pdev->dev);
pci_disable_sriov(ae_dev->pdev);
device_unlock(&ae_dev->pdev->dev);
}
}
}
EXPORT_SYMBOL(hnae3_unregister_ae_algo_prepare);
/* we are keeping things simple and using single lock for all the
* list. This is a non-critical code so other updations, if happen
* in parallel, can wait.
*/
static DEFINE_MUTEX(hnae3_common_lock);
static bool hnae3_client_match(enum hnae3_client_type client_type)
{
if (client_type == HNAE3_CLIENT_KNIC ||
client_type == HNAE3_CLIENT_ROCE)
return true;
return false;
}
void hnae3_set_client_init_flag(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev,
unsigned int inited)
{
if (!client || !ae_dev)
return;
switch (client->type) {
case HNAE3_CLIENT_KNIC:
hnae3_set_bit(ae_dev->flag, HNAE3_KNIC_CLIENT_INITED_B, inited);
break;
case HNAE3_CLIENT_ROCE:
hnae3_set_bit(ae_dev->flag, HNAE3_ROCE_CLIENT_INITED_B, inited);
break;
default:
break;
}
}
EXPORT_SYMBOL(hnae3_set_client_init_flag);
static int hnae3_get_client_init_flag(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev)
{
int inited = 0;
switch (client->type) {
case HNAE3_CLIENT_KNIC:
inited = hnae3_get_bit(ae_dev->flag,
HNAE3_KNIC_CLIENT_INITED_B);
break;
case HNAE3_CLIENT_ROCE:
inited = hnae3_get_bit(ae_dev->flag,
HNAE3_ROCE_CLIENT_INITED_B);
break;
default:
break;
}
return inited;
}
static int hnae3_init_client_instance(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev)
{
int ret;
/* check if this client matches the type of ae_dev */
if (!(hnae3_client_match(client->type) &&
hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) {
return 0;
}
ret = ae_dev->ops->init_client_instance(client, ae_dev);
if (ret)
dev_err(&ae_dev->pdev->dev,
"fail to instantiate client, ret = %d\n", ret);
return ret;
}
static void hnae3_uninit_client_instance(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev)
{
/* check if this client matches the type of ae_dev */
if (!(hnae3_client_match(client->type) &&
hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B)))
return;
if (hnae3_get_client_init_flag(client, ae_dev)) {
ae_dev->ops->uninit_client_instance(client, ae_dev);
hnae3_set_client_init_flag(client, ae_dev, 0);
}
}
int hnae3_register_client(struct hnae3_client *client)
{
struct hnae3_client *client_tmp;
struct hnae3_ae_dev *ae_dev;
if (!client)
return -ENODEV;
mutex_lock(&hnae3_common_lock);
/* one system should only have one client for every type */
list_for_each_entry(client_tmp, &hnae3_client_list, node) {
if (client_tmp->type == client->type)
goto exit;
}
list_add_tail(&client->node, &hnae3_client_list);
/* initialize the client on every matched port */
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
/* if the client could not be initialized on current port, for
* any error reasons, move on to next available port
*/
int ret = hnae3_init_client_instance(client, ae_dev);
if (ret)
dev_err(&ae_dev->pdev->dev,
"match and instantiation failed for port, ret = %d\n",
ret);
}
exit:
mutex_unlock(&hnae3_common_lock);
return 0;
}
EXPORT_SYMBOL(hnae3_register_client);
void hnae3_unregister_client(struct hnae3_client *client)
{
struct hnae3_client *client_tmp;
struct hnae3_ae_dev *ae_dev;
bool existed = false;
if (!client)
return;
mutex_lock(&hnae3_common_lock);
/* one system should only have one client for every type */
list_for_each_entry(client_tmp, &hnae3_client_list, node) {
if (client_tmp->type == client->type) {
existed = true;
break;
}
}
if (!existed) {
mutex_unlock(&hnae3_common_lock);
pr_err("client %s does not exist!\n", client->name);
return;
}
/* un-initialize the client on every matched port */
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
hnae3_uninit_client_instance(client, ae_dev);
}
list_del(&client->node);
mutex_unlock(&hnae3_common_lock);
}
EXPORT_SYMBOL(hnae3_unregister_client);
/* hnae3_register_ae_algo - register a AE algorithm to hnae3 framework
* @ae_algo: AE algorithm
* NOTE: the duplicated name will not be checked
*/
void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
{
const struct pci_device_id *id;
struct hnae3_ae_dev *ae_dev;
struct hnae3_client *client;
int ret;
if (!ae_algo)
return;
mutex_lock(&hnae3_common_lock);
list_add_tail(&ae_algo->node, &hnae3_ae_algo_list);
/* Check if this algo/ops matches the list of ae_devs */
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
if (!id)
continue;
if (!ae_algo->ops) {
dev_err(&ae_dev->pdev->dev, "ae_algo ops are null\n");
continue;
}
ae_dev->ops = ae_algo->ops;
ret = ae_algo->ops->init_ae_dev(ae_dev);
if (ret) {
dev_err(&ae_dev->pdev->dev,
"init ae_dev error, ret = %d\n", ret);
continue;
}
/* ae_dev init should set flag */
hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
/* check the client list for the match with this ae_dev type and
* initialize the figure out client instance
*/
list_for_each_entry(client, &hnae3_client_list, node) {
ret = hnae3_init_client_instance(client, ae_dev);
if (ret)
dev_err(&ae_dev->pdev->dev,
"match and instantiation failed, ret = %d\n",
ret);
}
}
mutex_unlock(&hnae3_common_lock);
}
EXPORT_SYMBOL(hnae3_register_ae_algo);
/* hnae3_unregister_ae_algo - unregisters a AE algorithm
* @ae_algo: the AE algorithm to unregister
*/
void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
{
const struct pci_device_id *id;
struct hnae3_ae_dev *ae_dev;
struct hnae3_client *client;
if (!ae_algo)
return;
mutex_lock(&hnae3_common_lock);
/* Check if there are matched ae_dev */
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
if (!hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
continue;
id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
if (!id)
continue;
/* check the client list for the match with this ae_dev type and
* un-initialize the figure out client instance
*/
list_for_each_entry(client, &hnae3_client_list, node)
hnae3_uninit_client_instance(client, ae_dev);
ae_algo->ops->uninit_ae_dev(ae_dev);
hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
ae_dev->ops = NULL;
}
list_del(&ae_algo->node);
mutex_unlock(&hnae3_common_lock);
}
EXPORT_SYMBOL(hnae3_unregister_ae_algo);
/* hnae3_register_ae_dev - registers a AE device to hnae3 framework
* @ae_dev: the AE device
* NOTE: the duplicated name will not be checked
*/
int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
{
const struct pci_device_id *id;
struct hnae3_ae_algo *ae_algo;
struct hnae3_client *client;
int ret;
if (!ae_dev)
return -ENODEV;
mutex_lock(&hnae3_common_lock);
list_add_tail(&ae_dev->node, &hnae3_ae_dev_list);
/* Check if there are matched ae_algo */
list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) {
id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
if (!id)
continue;
if (!ae_algo->ops) {
dev_err(&ae_dev->pdev->dev, "ae_algo ops are null\n");
ret = -EOPNOTSUPP;
goto out_err;
}
ae_dev->ops = ae_algo->ops;
ret = ae_dev->ops->init_ae_dev(ae_dev);
if (ret) {
dev_err(&ae_dev->pdev->dev,
"init ae_dev error, ret = %d\n", ret);
goto out_err;
}
/* ae_dev init should set flag */
hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
break;
}
/* check the client list for the match with this ae_dev type and
* initialize the figure out client instance
*/
list_for_each_entry(client, &hnae3_client_list, node) {
ret = hnae3_init_client_instance(client, ae_dev);
if (ret)
dev_err(&ae_dev->pdev->dev,
"match and instantiation failed, ret = %d\n",
ret);
}
mutex_unlock(&hnae3_common_lock);
return 0;
out_err:
list_del(&ae_dev->node);
mutex_unlock(&hnae3_common_lock);
return ret;
}
EXPORT_SYMBOL(hnae3_register_ae_dev);
/* hnae3_unregister_ae_dev - unregisters a AE device
* @ae_dev: the AE device to unregister
*/
void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
{
const struct pci_device_id *id;
struct hnae3_ae_algo *ae_algo;
struct hnae3_client *client;
if (!ae_dev)
return;
mutex_lock(&hnae3_common_lock);
/* Check if there are matched ae_algo */
list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) {
if (!hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
continue;
id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
if (!id)
continue;
list_for_each_entry(client, &hnae3_client_list, node)
hnae3_uninit_client_instance(client, ae_dev);
ae_algo->ops->uninit_ae_dev(ae_dev);
hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
ae_dev->ops = NULL;
}
list_del(&ae_dev->node);
mutex_unlock(&hnae3_common_lock);
}
EXPORT_SYMBOL(hnae3_unregister_ae_dev);
MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("HNAE3(Hisilicon Network Acceleration Engine) Framework");
MODULE_VERSION(HNAE3_MOD_VERSION);