OpenCloudOS-Kernel/drivers/crypto/qat/qat_dh895xcc/adf_drv.c

283 lines
7.2 KiB
C
Raw Normal View History

// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
/* Copyright(c) 2014 - 2020 Intel Corporation */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/platform_device.h>
#include <linux/workqueue.h>
#include <linux/io.h>
#include <adf_accel_devices.h>
#include <adf_common_drv.h>
#include <adf_cfg.h>
#include "adf_dh895xcc_hw_data.h"
static const struct pci_device_id adf_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_QAT_DH895XCC), },
{ }
};
MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
static int adf_probe(struct pci_dev *dev, const struct pci_device_id *ent);
static void adf_remove(struct pci_dev *dev);
static struct pci_driver adf_driver = {
.id_table = adf_pci_tbl,
.name = ADF_DH895XCC_DEVICE_NAME,
.probe = adf_probe,
.remove = adf_remove,
.sriov_configure = adf_sriov_configure,
};
static void adf_cleanup_pci_dev(struct adf_accel_dev *accel_dev)
{
pci_release_regions(accel_dev->accel_pci_dev.pci_dev);
pci_disable_device(accel_dev->accel_pci_dev.pci_dev);
}
static void adf_cleanup_accel(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *accel_pci_dev = &accel_dev->accel_pci_dev;
int i;
for (i = 0; i < ADF_PCI_MAX_BARS; i++) {
struct adf_bar *bar = &accel_pci_dev->pci_bars[i];
if (bar->virt_addr)
pci_iounmap(accel_pci_dev->pci_dev, bar->virt_addr);
}
if (accel_dev->hw_device) {
switch (accel_pci_dev->pci_dev->device) {
case PCI_DEVICE_ID_INTEL_QAT_DH895XCC:
adf_clean_hw_data_dh895xcc(accel_dev->hw_device);
break;
default:
break;
}
kfree(accel_dev->hw_device);
accel_dev->hw_device = NULL;
}
adf_cfg_dev_remove(accel_dev);
debugfs_remove(accel_dev->debugfs_dir);
adf_devmgr_rm_dev(accel_dev, NULL);
}
static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct adf_accel_dev *accel_dev;
struct adf_accel_pci *accel_pci_dev;
struct adf_hw_device_data *hw_data;
char name[ADF_DEVICE_NAME_LENGTH];
unsigned int i, bar_nr;
crypto: qat - Fix KASAN stack-out-of-bounds bug in adf_probe() The following KASAN warning was printed when booting a 64-bit kernel on some systems with Intel CPUs: [ 44.512826] ================================================================== [ 44.520165] BUG: KASAN: stack-out-of-bounds in find_first_bit+0xb0/0xc0 [ 44.526786] Read of size 8 at addr ffff88041e02fc50 by task kworker/0:2/124 [ 44.535253] CPU: 0 PID: 124 Comm: kworker/0:2 Tainted: G X --------- --- 4.18.0-12.el8.x86_64+debug #1 [ 44.545858] Hardware name: Intel Corporation PURLEY/PURLEY, BIOS BKVDTRL1.86B.0005.D08.1712070559 12/07/2017 [ 44.555682] Workqueue: events work_for_cpu_fn [ 44.560043] Call Trace: [ 44.562502] dump_stack+0x9a/0xe9 [ 44.565832] print_address_description+0x65/0x22e [ 44.570683] ? find_first_bit+0xb0/0xc0 [ 44.570689] kasan_report.cold.6+0x92/0x19f [ 44.578726] find_first_bit+0xb0/0xc0 [ 44.578737] adf_probe+0x9eb/0x19a0 [qat_c62x] [ 44.578751] ? adf_remove+0x110/0x110 [qat_c62x] [ 44.591490] ? mark_held_locks+0xc8/0x140 [ 44.591498] ? _raw_spin_unlock+0x30/0x30 [ 44.591505] ? trace_hardirqs_on_caller+0x381/0x570 [ 44.604418] ? adf_remove+0x110/0x110 [qat_c62x] [ 44.604427] local_pci_probe+0xd4/0x180 [ 44.604432] ? pci_device_shutdown+0x110/0x110 [ 44.617386] work_for_cpu_fn+0x51/0xa0 [ 44.621145] process_one_work+0x8fe/0x16e0 [ 44.625263] ? pwq_dec_nr_in_flight+0x2d0/0x2d0 [ 44.629799] ? lock_acquire+0x14c/0x400 [ 44.633645] ? move_linked_works+0x12e/0x2a0 [ 44.637928] worker_thread+0x536/0xb50 [ 44.641690] ? __kthread_parkme+0xb6/0x180 [ 44.645796] ? process_one_work+0x16e0/0x16e0 [ 44.650160] kthread+0x30c/0x3d0 [ 44.653400] ? kthread_create_worker_on_cpu+0xc0/0xc0 [ 44.658457] ret_from_fork+0x3a/0x50 [ 44.663557] The buggy address belongs to the page: [ 44.668350] page:ffffea0010780bc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 44.676356] flags: 0x17ffffc0000000() [ 44.680023] raw: 0017ffffc0000000 ffffea0010780bc8 ffffea0010780bc8 0000000000000000 [ 44.687769] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 44.695510] page dumped because: kasan: bad access detected [ 44.702578] Memory state around the buggy address: [ 44.707372] ffff88041e02fb00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 44.714593] ffff88041e02fb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 44.721810] >ffff88041e02fc00: 00 00 00 00 00 00 f1 f1 f1 f1 04 f2 f2 f2 f2 f2 [ 44.729028] ^ [ 44.734864] ffff88041e02fc80: f2 f2 00 00 00 00 f3 f3 f3 f3 00 00 00 00 00 00 [ 44.742082] ffff88041e02fd00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 44.749299] ================================================================== Looking into the code: int ret, bar_mask; : for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, It is casting a 32-bit integer pointer to a 64-bit unsigned long pointer. There are two problems here. First, the 32-bit pointer address may not be 64-bit aligned. Secondly, it is accessing an extra 4 bytes. This is fixed by changing the bar_mask type to unsigned long. Cc: <stable@vger.kernel.org> Signed-off-by: Waiman Long <longman@redhat.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2018-09-23 08:41:55 +08:00
unsigned long bar_mask;
int ret;
switch (ent->device) {
case PCI_DEVICE_ID_INTEL_QAT_DH895XCC:
break;
default:
dev_err(&pdev->dev, "Invalid device 0x%x.\n", ent->device);
return -ENODEV;
}
if (num_possible_nodes() > 1 && dev_to_node(&pdev->dev) < 0) {
/* If the accelerator is connected to a node with no memory
* there is no point in using the accelerator since the remote
* memory transaction will be very slow. */
dev_err(&pdev->dev, "Invalid NUMA configuration.\n");
return -EINVAL;
}
accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL,
dev_to_node(&pdev->dev));
if (!accel_dev)
return -ENOMEM;
INIT_LIST_HEAD(&accel_dev->crypto_list);
accel_pci_dev = &accel_dev->accel_pci_dev;
accel_pci_dev->pci_dev = pdev;
/* Add accel device to accel table.
* This should be called before adf_cleanup_accel is called */
if (adf_devmgr_add_dev(accel_dev, NULL)) {
dev_err(&pdev->dev, "Failed to add new accelerator device.\n");
kfree(accel_dev);
return -EFAULT;
}
accel_dev->owner = THIS_MODULE;
/* Allocate and configure device configuration structure */
hw_data = kzalloc_node(sizeof(*hw_data), GFP_KERNEL,
dev_to_node(&pdev->dev));
if (!hw_data) {
ret = -ENOMEM;
goto out_err;
}
accel_dev->hw_device = hw_data;
adf_init_hw_data_dh895xcc(accel_dev->hw_device);
pci_read_config_byte(pdev, PCI_REVISION_ID, &accel_pci_dev->revid);
pci_read_config_dword(pdev, ADF_DEVICE_FUSECTL_OFFSET,
&hw_data->fuses);
/* Get Accelerators and Accelerators Engines masks */
hw_data->accel_mask = hw_data->get_accel_mask(hw_data);
hw_data->ae_mask = hw_data->get_ae_mask(hw_data);
accel_pci_dev->sku = hw_data->get_sku(hw_data);
/* If the device has no acceleration engines then ignore it. */
if (!hw_data->accel_mask || !hw_data->ae_mask ||
((~hw_data->ae_mask) & 0x01)) {
dev_err(&pdev->dev, "No acceleration units found");
ret = -EFAULT;
goto out_err;
}
/* Create dev top level debugfs entry */
snprintf(name, sizeof(name), "%s%s_%s", ADF_DEVICE_NAME_PREFIX,
hw_data->dev_class->name, pci_name(pdev));
accel_dev->debugfs_dir = debugfs_create_dir(name, NULL);
/* Create device configuration table */
ret = adf_cfg_dev_add(accel_dev);
if (ret)
goto out_err;
pcie_set_readrq(pdev, 1024);
/* enable PCI device */
if (pci_enable_device(pdev)) {
ret = -EFAULT;
goto out_err;
}
/* set dma identifier */
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
dev_err(&pdev->dev, "No usable DMA configuration\n");
ret = -EFAULT;
goto out_err_disable;
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
}
} else {
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
}
if (pci_request_regions(pdev, ADF_DH895XCC_DEVICE_NAME)) {
ret = -EFAULT;
goto out_err_disable;
}
/* Get accelerator capabilities mask */
hw_data->accel_capabilities_mask = hw_data->get_accel_cap(accel_dev);
/* Find and map all the device's BARS */
i = 0;
bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
crypto: qat - Fix KASAN stack-out-of-bounds bug in adf_probe() The following KASAN warning was printed when booting a 64-bit kernel on some systems with Intel CPUs: [ 44.512826] ================================================================== [ 44.520165] BUG: KASAN: stack-out-of-bounds in find_first_bit+0xb0/0xc0 [ 44.526786] Read of size 8 at addr ffff88041e02fc50 by task kworker/0:2/124 [ 44.535253] CPU: 0 PID: 124 Comm: kworker/0:2 Tainted: G X --------- --- 4.18.0-12.el8.x86_64+debug #1 [ 44.545858] Hardware name: Intel Corporation PURLEY/PURLEY, BIOS BKVDTRL1.86B.0005.D08.1712070559 12/07/2017 [ 44.555682] Workqueue: events work_for_cpu_fn [ 44.560043] Call Trace: [ 44.562502] dump_stack+0x9a/0xe9 [ 44.565832] print_address_description+0x65/0x22e [ 44.570683] ? find_first_bit+0xb0/0xc0 [ 44.570689] kasan_report.cold.6+0x92/0x19f [ 44.578726] find_first_bit+0xb0/0xc0 [ 44.578737] adf_probe+0x9eb/0x19a0 [qat_c62x] [ 44.578751] ? adf_remove+0x110/0x110 [qat_c62x] [ 44.591490] ? mark_held_locks+0xc8/0x140 [ 44.591498] ? _raw_spin_unlock+0x30/0x30 [ 44.591505] ? trace_hardirqs_on_caller+0x381/0x570 [ 44.604418] ? adf_remove+0x110/0x110 [qat_c62x] [ 44.604427] local_pci_probe+0xd4/0x180 [ 44.604432] ? pci_device_shutdown+0x110/0x110 [ 44.617386] work_for_cpu_fn+0x51/0xa0 [ 44.621145] process_one_work+0x8fe/0x16e0 [ 44.625263] ? pwq_dec_nr_in_flight+0x2d0/0x2d0 [ 44.629799] ? lock_acquire+0x14c/0x400 [ 44.633645] ? move_linked_works+0x12e/0x2a0 [ 44.637928] worker_thread+0x536/0xb50 [ 44.641690] ? __kthread_parkme+0xb6/0x180 [ 44.645796] ? process_one_work+0x16e0/0x16e0 [ 44.650160] kthread+0x30c/0x3d0 [ 44.653400] ? kthread_create_worker_on_cpu+0xc0/0xc0 [ 44.658457] ret_from_fork+0x3a/0x50 [ 44.663557] The buggy address belongs to the page: [ 44.668350] page:ffffea0010780bc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 44.676356] flags: 0x17ffffc0000000() [ 44.680023] raw: 0017ffffc0000000 ffffea0010780bc8 ffffea0010780bc8 0000000000000000 [ 44.687769] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 44.695510] page dumped because: kasan: bad access detected [ 44.702578] Memory state around the buggy address: [ 44.707372] ffff88041e02fb00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 44.714593] ffff88041e02fb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 44.721810] >ffff88041e02fc00: 00 00 00 00 00 00 f1 f1 f1 f1 04 f2 f2 f2 f2 f2 [ 44.729028] ^ [ 44.734864] ffff88041e02fc80: f2 f2 00 00 00 00 f3 f3 f3 f3 00 00 00 00 00 00 [ 44.742082] ffff88041e02fd00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 44.749299] ================================================================== Looking into the code: int ret, bar_mask; : for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, It is casting a 32-bit integer pointer to a 64-bit unsigned long pointer. There are two problems here. First, the 32-bit pointer address may not be 64-bit aligned. Secondly, it is accessing an extra 4 bytes. This is fixed by changing the bar_mask type to unsigned long. Cc: <stable@vger.kernel.org> Signed-off-by: Waiman Long <longman@redhat.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2018-09-23 08:41:55 +08:00
for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
bar->base_addr = pci_resource_start(pdev, bar_nr);
if (!bar->base_addr)
break;
bar->size = pci_resource_len(pdev, bar_nr);
bar->virt_addr = pci_iomap(accel_pci_dev->pci_dev, bar_nr, 0);
if (!bar->virt_addr) {
dev_err(&pdev->dev, "Failed to map BAR %d\n", bar_nr);
ret = -EFAULT;
goto out_err_free_reg;
}
}
pci_set_master(pdev);
if (adf_enable_aer(accel_dev)) {
dev_err(&pdev->dev, "Failed to enable aer\n");
ret = -EFAULT;
goto out_err_free_reg;
}
if (pci_save_state(pdev)) {
dev_err(&pdev->dev, "Failed to save pci state\n");
ret = -ENOMEM;
goto out_err_free_reg;
}
ret = qat_crypto_dev_config(accel_dev);
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-10 03:54:58 +08:00
if (ret)
goto out_err_free_reg;
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-10 03:54:58 +08:00
ret = adf_dev_init(accel_dev);
if (ret)
goto out_err_dev_shutdown;
crypto: qat - fix device reset flow When the device needs a reset, e.g. when an uncorrectable PCIe AER event occurs, various services/data structures need to be cleaned up, the hardware reset and the services/data structures initialized and started. The code to perform the cleanup and initialization was not performed when a device reset was done. This patch moves some of the initialization code out of the .probe entry- point into a separate function that is now called during probe as well as after the hardware has been reset. Similarly, a new function is added for first cleaning up these services/data structures prior to resetting. The new functions are adf_dev_init() and adf_dev_shutdown(), respectively, for which there are already prototypes but no actual functions just yet and are now called when the device is reset and during probe/cleanup of the driver. The down and up flows via ioctl calls has similarly been updated. In addition, there are two other bugs in the reset flow - one in the logic for determining whether to schedule a device reset upon receiving an uncorrectable AER event which prevents the reset flow from being initiated, and another with clearing the status bit indicating a device is configured (when resetting the device the configuration remains across the reset so the bit should not be cleared, otherwise, the necessary services will not be re-started in adf_dev_start() after the reset - clear the bit only when actually deleting the configuration). Signed-off-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-01-10 03:54:58 +08:00
ret = adf_dev_start(accel_dev);
if (ret)
goto out_err_dev_stop;
return ret;
out_err_dev_stop:
adf_dev_stop(accel_dev);
out_err_dev_shutdown:
adf_dev_shutdown(accel_dev);
out_err_free_reg:
pci_release_regions(accel_pci_dev->pci_dev);
out_err_disable:
pci_disable_device(accel_pci_dev->pci_dev);
out_err:
adf_cleanup_accel(accel_dev);
kfree(accel_dev);
return ret;
}
static void adf_remove(struct pci_dev *pdev)
{
struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
if (!accel_dev) {
pr_err("QAT: Driver removal failed\n");
return;
}
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
adf_disable_aer(accel_dev);
adf_cleanup_accel(accel_dev);
adf_cleanup_pci_dev(accel_dev);
kfree(accel_dev);
}
static int __init adfdrv_init(void)
{
request_module("intel_qat");
if (pci_register_driver(&adf_driver)) {
pr_err("QAT: Driver initialization failed\n");
return -EFAULT;
}
return 0;
}
static void __exit adfdrv_release(void)
{
pci_unregister_driver(&adf_driver);
}
module_init(adfdrv_init);
module_exit(adfdrv_release);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Intel");
MODULE_FIRMWARE(ADF_DH895XCC_FW);
MODULE_FIRMWARE(ADF_DH895XCC_MMP);
MODULE_DESCRIPTION("Intel(R) QuickAssist Technology");
MODULE_VERSION(ADF_DRV_VERSION);