crypto: qat - add error counters
commit 532d7f6bc458 ("crypto: qat - add error counters") upstream Introduce ras counters interface for counting QAT specific device errors and expose them through the newly created qat_ras sysfs group attribute. This adds the following attributes: - errors_correctable: number of correctable errors - errors_nonfatal: number of uncorrectable non fatal errors - errors_fatal: number of uncorrectable fatal errors - reset_error_counters: resets all counters These counters are initialized during device bring up and cleared during device shutdown and are applicable only to QAT GEN4 devices. Intel-SIG: commit 532d7f6bc458 ("crypto: qat - add error counters)" Backport to support Intel QAT in-tree driver Signed-off-by: Shashank Gupta <shashank.gupta@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Reviewed-by: Tero Kristo <tero.kristo@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> [ Aichun Shi: amend commit log ] Signed-off-by: Aichun Shi <aichun.shi@intel.com>
This commit is contained in:
parent
f3deabe7f7
commit
72522472a4
|
@ -0,0 +1,41 @@
|
||||||
|
What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_correctable
|
||||||
|
Date: January 2024
|
||||||
|
KernelVersion: 6.7
|
||||||
|
Contact: qat-linux@intel.com
|
||||||
|
Description: (RO) Reports the number of correctable errors detected by the device.
|
||||||
|
|
||||||
|
This attribute is only available for qat_4xxx devices.
|
||||||
|
|
||||||
|
What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_nonfatal
|
||||||
|
Date: January 2024
|
||||||
|
KernelVersion: 6.7
|
||||||
|
Contact: qat-linux@intel.com
|
||||||
|
Description: (RO) Reports the number of non fatal errors detected by the device.
|
||||||
|
|
||||||
|
This attribute is only available for qat_4xxx devices.
|
||||||
|
|
||||||
|
What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
|
||||||
|
Date: January 2024
|
||||||
|
KernelVersion: 6.7
|
||||||
|
Contact: qat-linux@intel.com
|
||||||
|
Description: (RO) Reports the number of fatal errors detected by the device.
|
||||||
|
|
||||||
|
This attribute is only available for qat_4xxx devices.
|
||||||
|
|
||||||
|
What: /sys/bus/pci/devices/<BDF>/qat_ras/reset_error_counters
|
||||||
|
Date: January 2024
|
||||||
|
KernelVersion: 6.7
|
||||||
|
Contact: qat-linux@intel.com
|
||||||
|
Description: (WO) Write to resets all error counters of a device.
|
||||||
|
|
||||||
|
The following example reports how to reset the counters::
|
||||||
|
|
||||||
|
# echo 1 > /sys/bus/pci/devices/<BDF>/qat_ras/reset_error_counters
|
||||||
|
# cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_correctable
|
||||||
|
0
|
||||||
|
# cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_nonfatal
|
||||||
|
0
|
||||||
|
# cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
|
||||||
|
0
|
||||||
|
|
||||||
|
This attribute is only available for qat_4xxx devices.
|
|
@ -418,6 +418,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||||
goto out_err;
|
goto out_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
accel_dev->ras_errors.enabled = true;
|
||||||
adf_dbgfs_init(accel_dev);
|
adf_dbgfs_init(accel_dev);
|
||||||
|
|
||||||
ret = adf_dev_up(accel_dev, true);
|
ret = adf_dev_up(accel_dev, true);
|
||||||
|
|
|
@ -12,6 +12,7 @@ intel_qat-objs := adf_cfg.o \
|
||||||
adf_admin.o \
|
adf_admin.o \
|
||||||
adf_hw_arbiter.o \
|
adf_hw_arbiter.o \
|
||||||
adf_sysfs.o \
|
adf_sysfs.o \
|
||||||
|
adf_sysfs_ras_counters.o \
|
||||||
adf_gen2_hw_data.o \
|
adf_gen2_hw_data.o \
|
||||||
adf_gen2_config.o \
|
adf_gen2_config.o \
|
||||||
adf_gen4_hw_data.o \
|
adf_gen4_hw_data.o \
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <linux/list.h>
|
#include <linux/list.h>
|
||||||
#include <linux/io.h>
|
#include <linux/io.h>
|
||||||
#include <linux/ratelimit.h>
|
#include <linux/ratelimit.h>
|
||||||
|
#include <linux/types.h>
|
||||||
#include "adf_cfg_common.h"
|
#include "adf_cfg_common.h"
|
||||||
#include "adf_pfvf_msg.h"
|
#include "adf_pfvf_msg.h"
|
||||||
|
|
||||||
|
@ -81,6 +82,18 @@ enum dev_sku_info {
|
||||||
DEV_SKU_UNKNOWN,
|
DEV_SKU_UNKNOWN,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum ras_errors {
|
||||||
|
ADF_RAS_CORR,
|
||||||
|
ADF_RAS_UNCORR,
|
||||||
|
ADF_RAS_FATAL,
|
||||||
|
ADF_RAS_ERRORS,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct adf_error_counters {
|
||||||
|
atomic_t counter[ADF_RAS_ERRORS];
|
||||||
|
bool enabled;
|
||||||
|
};
|
||||||
|
|
||||||
static inline const char *get_sku_info(enum dev_sku_info info)
|
static inline const char *get_sku_info(enum dev_sku_info info)
|
||||||
{
|
{
|
||||||
switch (info) {
|
switch (info) {
|
||||||
|
@ -361,6 +374,7 @@ struct adf_accel_dev {
|
||||||
u8 pf_compat_ver;
|
u8 pf_compat_ver;
|
||||||
} vf;
|
} vf;
|
||||||
};
|
};
|
||||||
|
struct adf_error_counters ras_errors;
|
||||||
struct mutex state_lock; /* protect state of the device */
|
struct mutex state_lock; /* protect state of the device */
|
||||||
bool is_vf;
|
bool is_vf;
|
||||||
u32 accel_id;
|
u32 accel_id;
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "adf_common_drv.h"
|
#include "adf_common_drv.h"
|
||||||
#include "adf_dbgfs.h"
|
#include "adf_dbgfs.h"
|
||||||
#include "adf_heartbeat.h"
|
#include "adf_heartbeat.h"
|
||||||
|
#include "adf_sysfs_ras_counters.h"
|
||||||
|
|
||||||
static LIST_HEAD(service_table);
|
static LIST_HEAD(service_table);
|
||||||
static DEFINE_MUTEX(service_lock);
|
static DEFINE_MUTEX(service_lock);
|
||||||
|
@ -245,6 +246,7 @@ static int adf_dev_start(struct adf_accel_dev *accel_dev)
|
||||||
set_bit(ADF_STATUS_COMP_ALGS_REGISTERED, &accel_dev->status);
|
set_bit(ADF_STATUS_COMP_ALGS_REGISTERED, &accel_dev->status);
|
||||||
|
|
||||||
adf_dbgfs_add(accel_dev);
|
adf_dbgfs_add(accel_dev);
|
||||||
|
adf_sysfs_start_ras(accel_dev);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -271,6 +273,7 @@ static void adf_dev_stop(struct adf_accel_dev *accel_dev)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
adf_dbgfs_rm(accel_dev);
|
adf_dbgfs_rm(accel_dev);
|
||||||
|
adf_sysfs_stop_ras(accel_dev);
|
||||||
|
|
||||||
clear_bit(ADF_STATUS_STARTING, &accel_dev->status);
|
clear_bit(ADF_STATUS_STARTING, &accel_dev->status);
|
||||||
clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
|
clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
|
||||||
|
|
|
@ -0,0 +1,112 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
/* Copyright(c) 2023 Intel Corporation */
|
||||||
|
|
||||||
|
#include <linux/sysfs.h>
|
||||||
|
#include <linux/pci.h>
|
||||||
|
#include <linux/string.h>
|
||||||
|
|
||||||
|
#include "adf_common_drv.h"
|
||||||
|
#include "adf_sysfs_ras_counters.h"
|
||||||
|
|
||||||
|
static ssize_t errors_correctable_show(struct device *dev,
|
||||||
|
struct device_attribute *dev_attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct adf_accel_dev *accel_dev;
|
||||||
|
unsigned long counter;
|
||||||
|
|
||||||
|
accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
|
||||||
|
if (!accel_dev)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
counter = ADF_RAS_ERR_CTR_READ(accel_dev->ras_errors, ADF_RAS_CORR);
|
||||||
|
return scnprintf(buf, PAGE_SIZE, "%ld\n", counter);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t errors_nonfatal_show(struct device *dev,
|
||||||
|
struct device_attribute *dev_attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct adf_accel_dev *accel_dev;
|
||||||
|
unsigned long counter;
|
||||||
|
|
||||||
|
accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
|
||||||
|
if (!accel_dev)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
counter = ADF_RAS_ERR_CTR_READ(accel_dev->ras_errors, ADF_RAS_UNCORR);
|
||||||
|
return scnprintf(buf, PAGE_SIZE, "%ld\n", counter);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t errors_fatal_show(struct device *dev,
|
||||||
|
struct device_attribute *dev_attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct adf_accel_dev *accel_dev;
|
||||||
|
unsigned long counter;
|
||||||
|
|
||||||
|
accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
|
||||||
|
if (!accel_dev)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
counter = ADF_RAS_ERR_CTR_READ(accel_dev->ras_errors, ADF_RAS_FATAL);
|
||||||
|
return scnprintf(buf, PAGE_SIZE, "%ld\n", counter);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t reset_error_counters_store(struct device *dev,
|
||||||
|
struct device_attribute *dev_attr,
|
||||||
|
const char *buf, size_t count)
|
||||||
|
{
|
||||||
|
struct adf_accel_dev *accel_dev;
|
||||||
|
|
||||||
|
if (buf[0] != '1' || count != 2)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
|
||||||
|
if (!accel_dev)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
ADF_RAS_ERR_CTR_CLEAR(accel_dev->ras_errors);
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static DEVICE_ATTR_RO(errors_correctable);
|
||||||
|
static DEVICE_ATTR_RO(errors_nonfatal);
|
||||||
|
static DEVICE_ATTR_RO(errors_fatal);
|
||||||
|
static DEVICE_ATTR_WO(reset_error_counters);
|
||||||
|
|
||||||
|
static struct attribute *qat_ras_attrs[] = {
|
||||||
|
&dev_attr_errors_correctable.attr,
|
||||||
|
&dev_attr_errors_nonfatal.attr,
|
||||||
|
&dev_attr_errors_fatal.attr,
|
||||||
|
&dev_attr_reset_error_counters.attr,
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct attribute_group qat_ras_group = {
|
||||||
|
.attrs = qat_ras_attrs,
|
||||||
|
.name = "qat_ras",
|
||||||
|
};
|
||||||
|
|
||||||
|
void adf_sysfs_start_ras(struct adf_accel_dev *accel_dev)
|
||||||
|
{
|
||||||
|
if (!accel_dev->ras_errors.enabled)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ADF_RAS_ERR_CTR_CLEAR(accel_dev->ras_errors);
|
||||||
|
|
||||||
|
if (device_add_group(&GET_DEV(accel_dev), &qat_ras_group))
|
||||||
|
dev_err(&GET_DEV(accel_dev),
|
||||||
|
"Failed to create qat_ras attribute group.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void adf_sysfs_stop_ras(struct adf_accel_dev *accel_dev)
|
||||||
|
{
|
||||||
|
if (!accel_dev->ras_errors.enabled)
|
||||||
|
return;
|
||||||
|
|
||||||
|
device_remove_group(&GET_DEV(accel_dev), &qat_ras_group);
|
||||||
|
|
||||||
|
ADF_RAS_ERR_CTR_CLEAR(accel_dev->ras_errors);
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||||
|
/* Copyright(c) 2023 Intel Corporation */
|
||||||
|
|
||||||
|
#ifndef ADF_RAS_H
|
||||||
|
#define ADF_RAS_H
|
||||||
|
|
||||||
|
#include <linux/bitops.h>
|
||||||
|
#include <linux/atomic.h>
|
||||||
|
|
||||||
|
struct adf_accel_dev;
|
||||||
|
|
||||||
|
void adf_sysfs_start_ras(struct adf_accel_dev *accel_dev);
|
||||||
|
void adf_sysfs_stop_ras(struct adf_accel_dev *accel_dev);
|
||||||
|
|
||||||
|
#define ADF_RAS_ERR_CTR_READ(ras_errors, ERR) \
|
||||||
|
atomic_read(&(ras_errors).counter[ERR])
|
||||||
|
|
||||||
|
#define ADF_RAS_ERR_CTR_CLEAR(ras_errors) \
|
||||||
|
do { \
|
||||||
|
for (int err = 0; err < ADF_RAS_ERRORS; ++err) \
|
||||||
|
atomic_set(&(ras_errors).counter[err], 0); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define ADF_RAS_ERR_CTR_INC(ras_errors, ERR) \
|
||||||
|
atomic_inc(&(ras_errors).counter[ERR])
|
||||||
|
|
||||||
|
#endif /* ADF_RAS_H */
|
Loading…
Reference in New Issue