kernel_samsung_a53x/drivers/net/ethernet/hisilicon/hns3/hnae3.c

396 lines
9.5 KiB
C
Raw Normal View History

2024-06-15 16:02:09 -03:00
// SPDX-License-Identifier: GPL-2.0+
// Copyright (c) 2016-2017 Hisilicon Limited.
#include <linux/list.h>
#include <linux/spinlock.h>
#include "hnae3.h"
static LIST_HEAD(hnae3_ae_algo_list);
static LIST_HEAD(hnae3_client_list);
static LIST_HEAD(hnae3_ae_dev_list);
void hnae3_unregister_ae_algo_prepare(struct hnae3_ae_algo *ae_algo)
{
const struct pci_device_id *pci_id;
struct hnae3_ae_dev *ae_dev;
if (!ae_algo)
return;
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
if (!hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
continue;
pci_id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
if (!pci_id)
continue;
net: hns3: fix kernel crash when uninstalling driver [ Upstream commit df3dff8ab6d79edc942464999d06fbaedf8cdd18 ] When the driver is uninstalled and the VF is disabled concurrently, a kernel crash occurs. The reason is that the two actions call function pci_disable_sriov(). The num_VFs is checked to determine whether to release the corresponding resources. During the second calling, num_VFs is not 0 and the resource release function is called. However, the corresponding resource has been released during the first invoking. Therefore, the problem occurs: [15277.839633][T50670] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 ... [15278.131557][T50670] Call trace: [15278.134686][T50670] klist_put+0x28/0x12c [15278.138682][T50670] klist_del+0x14/0x20 [15278.142592][T50670] device_del+0xbc/0x3c0 [15278.146676][T50670] pci_remove_bus_device+0x84/0x120 [15278.151714][T50670] pci_stop_and_remove_bus_device+0x6c/0x80 [15278.157447][T50670] pci_iov_remove_virtfn+0xb4/0x12c [15278.162485][T50670] sriov_disable+0x50/0x11c [15278.166829][T50670] pci_disable_sriov+0x24/0x30 [15278.171433][T50670] hnae3_unregister_ae_algo_prepare+0x60/0x90 [hnae3] [15278.178039][T50670] hclge_exit+0x28/0xd0 [hclge] [15278.182730][T50670] __se_sys_delete_module.isra.0+0x164/0x230 [15278.188550][T50670] __arm64_sys_delete_module+0x1c/0x30 [15278.193848][T50670] invoke_syscall+0x50/0x11c [15278.198278][T50670] el0_svc_common.constprop.0+0x158/0x164 [15278.203837][T50670] do_el0_svc+0x34/0xcc [15278.207834][T50670] el0_svc+0x20/0x30 For details, see the following figure. rmmod hclge disable VFs ---------------------------------------------------- hclge_exit() sriov_numvfs_store() ... device_lock() pci_disable_sriov() hns3_pci_sriov_configure() pci_disable_sriov() sriov_disable() sriov_disable() if !num_VFs : if !num_VFs : return; return; sriov_del_vfs() sriov_del_vfs() ... ... klist_put() klist_put() ... ... num_VFs = 0; num_VFs = 0; device_unlock(); In this patch, when driver is removing, we get the device_lock() to protect num_VFs, just like sriov_numvfs_store(). Fixes: 0dd8a25f355b ("net: hns3: disable sriov before unload hclge layer") Signed-off-by: Peiyang Wang <wangpeiyang1@huawei.com> Signed-off-by: Jijie Shao <shaojijie@huawei.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://patch.msgid.link/20241101091507.3644584-1-shaojijie@huawei.com Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2024-11-01 17:15:07 +08:00
if (IS_ENABLED(CONFIG_PCI_IOV)) {
device_lock(&ae_dev->pdev->dev);
2024-06-15 16:02:09 -03:00
pci_disable_sriov(ae_dev->pdev);
net: hns3: fix kernel crash when uninstalling driver [ Upstream commit df3dff8ab6d79edc942464999d06fbaedf8cdd18 ] When the driver is uninstalled and the VF is disabled concurrently, a kernel crash occurs. The reason is that the two actions call function pci_disable_sriov(). The num_VFs is checked to determine whether to release the corresponding resources. During the second calling, num_VFs is not 0 and the resource release function is called. However, the corresponding resource has been released during the first invoking. Therefore, the problem occurs: [15277.839633][T50670] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 ... [15278.131557][T50670] Call trace: [15278.134686][T50670] klist_put+0x28/0x12c [15278.138682][T50670] klist_del+0x14/0x20 [15278.142592][T50670] device_del+0xbc/0x3c0 [15278.146676][T50670] pci_remove_bus_device+0x84/0x120 [15278.151714][T50670] pci_stop_and_remove_bus_device+0x6c/0x80 [15278.157447][T50670] pci_iov_remove_virtfn+0xb4/0x12c [15278.162485][T50670] sriov_disable+0x50/0x11c [15278.166829][T50670] pci_disable_sriov+0x24/0x30 [15278.171433][T50670] hnae3_unregister_ae_algo_prepare+0x60/0x90 [hnae3] [15278.178039][T50670] hclge_exit+0x28/0xd0 [hclge] [15278.182730][T50670] __se_sys_delete_module.isra.0+0x164/0x230 [15278.188550][T50670] __arm64_sys_delete_module+0x1c/0x30 [15278.193848][T50670] invoke_syscall+0x50/0x11c [15278.198278][T50670] el0_svc_common.constprop.0+0x158/0x164 [15278.203837][T50670] do_el0_svc+0x34/0xcc [15278.207834][T50670] el0_svc+0x20/0x30 For details, see the following figure. rmmod hclge disable VFs ---------------------------------------------------- hclge_exit() sriov_numvfs_store() ... device_lock() pci_disable_sriov() hns3_pci_sriov_configure() pci_disable_sriov() sriov_disable() sriov_disable() if !num_VFs : if !num_VFs : return; return; sriov_del_vfs() sriov_del_vfs() ... ... klist_put() klist_put() ... ... num_VFs = 0; num_VFs = 0; device_unlock(); In this patch, when driver is removing, we get the device_lock() to protect num_VFs, just like sriov_numvfs_store(). Fixes: 0dd8a25f355b ("net: hns3: disable sriov before unload hclge layer") Signed-off-by: Peiyang Wang <wangpeiyang1@huawei.com> Signed-off-by: Jijie Shao <shaojijie@huawei.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://patch.msgid.link/20241101091507.3644584-1-shaojijie@huawei.com Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
2024-11-01 17:15:07 +08:00
device_unlock(&ae_dev->pdev->dev);
}
2024-06-15 16:02:09 -03:00
}
}
EXPORT_SYMBOL(hnae3_unregister_ae_algo_prepare);
/* we are keeping things simple and using single lock for all the
* list. This is a non-critical code so other updations, if happen
* in parallel, can wait.
*/
static DEFINE_MUTEX(hnae3_common_lock);
static bool hnae3_client_match(enum hnae3_client_type client_type)
{
if (client_type == HNAE3_CLIENT_KNIC ||
client_type == HNAE3_CLIENT_ROCE)
return true;
return false;
}
void hnae3_set_client_init_flag(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev,
unsigned int inited)
{
if (!client || !ae_dev)
return;
switch (client->type) {
case HNAE3_CLIENT_KNIC:
hnae3_set_bit(ae_dev->flag, HNAE3_KNIC_CLIENT_INITED_B, inited);
break;
case HNAE3_CLIENT_ROCE:
hnae3_set_bit(ae_dev->flag, HNAE3_ROCE_CLIENT_INITED_B, inited);
break;
default:
break;
}
}
EXPORT_SYMBOL(hnae3_set_client_init_flag);
static int hnae3_get_client_init_flag(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev)
{
int inited = 0;
switch (client->type) {
case HNAE3_CLIENT_KNIC:
inited = hnae3_get_bit(ae_dev->flag,
HNAE3_KNIC_CLIENT_INITED_B);
break;
case HNAE3_CLIENT_ROCE:
inited = hnae3_get_bit(ae_dev->flag,
HNAE3_ROCE_CLIENT_INITED_B);
break;
default:
break;
}
return inited;
}
static int hnae3_init_client_instance(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev)
{
int ret;
/* check if this client matches the type of ae_dev */
if (!(hnae3_client_match(client->type) &&
hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) {
return 0;
}
ret = ae_dev->ops->init_client_instance(client, ae_dev);
if (ret)
dev_err(&ae_dev->pdev->dev,
"fail to instantiate client, ret = %d\n", ret);
return ret;
}
static void hnae3_uninit_client_instance(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev)
{
/* check if this client matches the type of ae_dev */
if (!(hnae3_client_match(client->type) &&
hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B)))
return;
if (hnae3_get_client_init_flag(client, ae_dev)) {
ae_dev->ops->uninit_client_instance(client, ae_dev);
hnae3_set_client_init_flag(client, ae_dev, 0);
}
}
int hnae3_register_client(struct hnae3_client *client)
{
struct hnae3_client *client_tmp;
struct hnae3_ae_dev *ae_dev;
if (!client)
return -ENODEV;
mutex_lock(&hnae3_common_lock);
/* one system should only have one client for every type */
list_for_each_entry(client_tmp, &hnae3_client_list, node) {
if (client_tmp->type == client->type)
goto exit;
}
list_add_tail(&client->node, &hnae3_client_list);
/* initialize the client on every matched port */
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
/* if the client could not be initialized on current port, for
* any error reasons, move on to next available port
*/
int ret = hnae3_init_client_instance(client, ae_dev);
if (ret)
dev_err(&ae_dev->pdev->dev,
"match and instantiation failed for port, ret = %d\n",
ret);
}
exit:
mutex_unlock(&hnae3_common_lock);
return 0;
}
EXPORT_SYMBOL(hnae3_register_client);
void hnae3_unregister_client(struct hnae3_client *client)
{
struct hnae3_client *client_tmp;
struct hnae3_ae_dev *ae_dev;
bool existed = false;
if (!client)
return;
mutex_lock(&hnae3_common_lock);
/* one system should only have one client for every type */
list_for_each_entry(client_tmp, &hnae3_client_list, node) {
if (client_tmp->type == client->type) {
existed = true;
break;
}
}
if (!existed) {
mutex_unlock(&hnae3_common_lock);
pr_err("client %s does not exist!\n", client->name);
return;
}
/* un-initialize the client on every matched port */
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
hnae3_uninit_client_instance(client, ae_dev);
}
list_del(&client->node);
mutex_unlock(&hnae3_common_lock);
}
EXPORT_SYMBOL(hnae3_unregister_client);
/* hnae3_register_ae_algo - register a AE algorithm to hnae3 framework
* @ae_algo: AE algorithm
* NOTE: the duplicated name will not be checked
*/
void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
{
const struct pci_device_id *id;
struct hnae3_ae_dev *ae_dev;
struct hnae3_client *client;
int ret;
if (!ae_algo)
return;
mutex_lock(&hnae3_common_lock);
list_add_tail(&ae_algo->node, &hnae3_ae_algo_list);
/* Check if this algo/ops matches the list of ae_devs */
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
if (!id)
continue;
if (!ae_algo->ops) {
dev_err(&ae_dev->pdev->dev, "ae_algo ops are null\n");
continue;
}
ae_dev->ops = ae_algo->ops;
ret = ae_algo->ops->init_ae_dev(ae_dev);
if (ret) {
dev_err(&ae_dev->pdev->dev,
"init ae_dev error, ret = %d\n", ret);
continue;
}
/* ae_dev init should set flag */
hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
/* check the client list for the match with this ae_dev type and
* initialize the figure out client instance
*/
list_for_each_entry(client, &hnae3_client_list, node) {
ret = hnae3_init_client_instance(client, ae_dev);
if (ret)
dev_err(&ae_dev->pdev->dev,
"match and instantiation failed, ret = %d\n",
ret);
}
}
mutex_unlock(&hnae3_common_lock);
}
EXPORT_SYMBOL(hnae3_register_ae_algo);
/* hnae3_unregister_ae_algo - unregisters a AE algorithm
* @ae_algo: the AE algorithm to unregister
*/
void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
{
const struct pci_device_id *id;
struct hnae3_ae_dev *ae_dev;
struct hnae3_client *client;
if (!ae_algo)
return;
mutex_lock(&hnae3_common_lock);
/* Check if there are matched ae_dev */
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
if (!hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
continue;
id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
if (!id)
continue;
/* check the client list for the match with this ae_dev type and
* un-initialize the figure out client instance
*/
list_for_each_entry(client, &hnae3_client_list, node)
hnae3_uninit_client_instance(client, ae_dev);
ae_algo->ops->uninit_ae_dev(ae_dev);
hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
ae_dev->ops = NULL;
}
list_del(&ae_algo->node);
mutex_unlock(&hnae3_common_lock);
}
EXPORT_SYMBOL(hnae3_unregister_ae_algo);
/* hnae3_register_ae_dev - registers a AE device to hnae3 framework
* @ae_dev: the AE device
* NOTE: the duplicated name will not be checked
*/
int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
{
const struct pci_device_id *id;
struct hnae3_ae_algo *ae_algo;
struct hnae3_client *client;
int ret;
if (!ae_dev)
return -ENODEV;
mutex_lock(&hnae3_common_lock);
list_add_tail(&ae_dev->node, &hnae3_ae_dev_list);
/* Check if there are matched ae_algo */
list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) {
id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
if (!id)
continue;
if (!ae_algo->ops) {
dev_err(&ae_dev->pdev->dev, "ae_algo ops are null\n");
ret = -EOPNOTSUPP;
goto out_err;
}
ae_dev->ops = ae_algo->ops;
ret = ae_dev->ops->init_ae_dev(ae_dev);
if (ret) {
dev_err(&ae_dev->pdev->dev,
"init ae_dev error, ret = %d\n", ret);
goto out_err;
}
/* ae_dev init should set flag */
hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
break;
}
/* check the client list for the match with this ae_dev type and
* initialize the figure out client instance
*/
list_for_each_entry(client, &hnae3_client_list, node) {
ret = hnae3_init_client_instance(client, ae_dev);
if (ret)
dev_err(&ae_dev->pdev->dev,
"match and instantiation failed, ret = %d\n",
ret);
}
mutex_unlock(&hnae3_common_lock);
return 0;
out_err:
list_del(&ae_dev->node);
mutex_unlock(&hnae3_common_lock);
return ret;
}
EXPORT_SYMBOL(hnae3_register_ae_dev);
/* hnae3_unregister_ae_dev - unregisters a AE device
* @ae_dev: the AE device to unregister
*/
void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
{
const struct pci_device_id *id;
struct hnae3_ae_algo *ae_algo;
struct hnae3_client *client;
if (!ae_dev)
return;
mutex_lock(&hnae3_common_lock);
/* Check if there are matched ae_algo */
list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) {
if (!hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
continue;
id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
if (!id)
continue;
list_for_each_entry(client, &hnae3_client_list, node)
hnae3_uninit_client_instance(client, ae_dev);
ae_algo->ops->uninit_ae_dev(ae_dev);
hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
ae_dev->ops = NULL;
}
list_del(&ae_dev->node);
mutex_unlock(&hnae3_common_lock);
}
EXPORT_SYMBOL(hnae3_unregister_ae_dev);
MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("HNAE3(Hisilicon Network Acceleration Engine) Framework");
MODULE_VERSION(HNAE3_MOD_VERSION);