habanalabs: add device status option to INFO IOCTL
This patch adds a new opcode to INFO IOCTL that returns the device status. This will allow users to query the device status in order to avoid sending command submissions while device is in reset. Signed-off-by: Dalit Ben Zoor <dbenzoor@habana.ai> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
9354c29ed5
commit
aa957088b4
|
@ -10,6 +10,7 @@
|
||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
#include <linux/sched/signal.h>
|
#include <linux/sched/signal.h>
|
||||||
#include <linux/hwmon.h>
|
#include <linux/hwmon.h>
|
||||||
|
#include <uapi/misc/habanalabs.h>
|
||||||
|
|
||||||
#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10)
|
#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10)
|
||||||
|
|
||||||
|
@ -21,6 +22,20 @@ bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum hl_device_status hl_device_status(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
enum hl_device_status status;
|
||||||
|
|
||||||
|
if (hdev->disabled)
|
||||||
|
status = HL_DEVICE_STATUS_MALFUNCTION;
|
||||||
|
else if (atomic_read(&hdev->in_reset))
|
||||||
|
status = HL_DEVICE_STATUS_IN_RESET;
|
||||||
|
else
|
||||||
|
status = HL_DEVICE_STATUS_OPERATIONAL;
|
||||||
|
|
||||||
|
return status;
|
||||||
|
};
|
||||||
|
|
||||||
static void hpriv_release(struct kref *ref)
|
static void hpriv_release(struct kref *ref)
|
||||||
{
|
{
|
||||||
struct hl_fpriv *hpriv;
|
struct hl_fpriv *hpriv;
|
||||||
|
|
|
@ -1272,6 +1272,7 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
|
||||||
|
|
||||||
int hl_device_open(struct inode *inode, struct file *filp);
|
int hl_device_open(struct inode *inode, struct file *filp);
|
||||||
bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
|
bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
|
||||||
|
enum hl_device_status hl_device_status(struct hl_device *hdev);
|
||||||
int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
||||||
enum hl_asic_type asic_type, int minor);
|
enum hl_asic_type asic_type, int minor);
|
||||||
void destroy_hdev(struct hl_device *hdev);
|
void destroy_hdev(struct hl_device *hdev);
|
||||||
|
|
|
@ -12,6 +12,21 @@
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
|
||||||
|
static int device_status_info(struct hl_device *hdev, struct hl_info_args *args)
|
||||||
|
{
|
||||||
|
struct hl_info_device_status dev_stat = {0};
|
||||||
|
u32 size = args->return_size;
|
||||||
|
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
|
||||||
|
|
||||||
|
if ((!size) || (!out))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
dev_stat.status = hl_device_status(hdev);
|
||||||
|
|
||||||
|
return copy_to_user(out, &dev_stat,
|
||||||
|
min((size_t)size, sizeof(dev_stat))) ? -EFAULT : 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
|
static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
|
||||||
{
|
{
|
||||||
struct hl_info_hw_ip_info hw_ip = {0};
|
struct hl_info_hw_ip_info hw_ip = {0};
|
||||||
|
@ -105,6 +120,10 @@ static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||||
struct hl_device *hdev = hpriv->hdev;
|
struct hl_device *hdev = hpriv->hdev;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
|
/* We want to return device status even if it disabled or in reset */
|
||||||
|
if (args->op == HL_INFO_DEVICE_STATUS)
|
||||||
|
return device_status_info(hdev, args);
|
||||||
|
|
||||||
if (hl_device_disabled_or_in_reset(hdev)) {
|
if (hl_device_disabled_or_in_reset(hdev)) {
|
||||||
dev_warn_ratelimited(hdev->dev,
|
dev_warn_ratelimited(hdev->dev,
|
||||||
"Device is disabled or in reset. Can't execute INFO IOCTL\n");
|
"Device is disabled or in reset. Can't execute INFO IOCTL\n");
|
||||||
|
|
|
@ -45,11 +45,18 @@ enum goya_queue_id {
|
||||||
GOYA_QUEUE_ID_SIZE
|
GOYA_QUEUE_ID_SIZE
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum hl_device_status {
|
||||||
|
HL_DEVICE_STATUS_OPERATIONAL,
|
||||||
|
HL_DEVICE_STATUS_IN_RESET,
|
||||||
|
HL_DEVICE_STATUS_MALFUNCTION
|
||||||
|
};
|
||||||
|
|
||||||
/* Opcode for management ioctl */
|
/* Opcode for management ioctl */
|
||||||
#define HL_INFO_HW_IP_INFO 0
|
#define HL_INFO_HW_IP_INFO 0
|
||||||
#define HL_INFO_HW_EVENTS 1
|
#define HL_INFO_HW_EVENTS 1
|
||||||
#define HL_INFO_DRAM_USAGE 2
|
#define HL_INFO_DRAM_USAGE 2
|
||||||
#define HL_INFO_HW_IDLE 3
|
#define HL_INFO_HW_IDLE 3
|
||||||
|
#define HL_INFO_DEVICE_STATUS 4
|
||||||
|
|
||||||
#define HL_INFO_VERSION_MAX_LEN 128
|
#define HL_INFO_VERSION_MAX_LEN 128
|
||||||
|
|
||||||
|
@ -82,6 +89,11 @@ struct hl_info_hw_idle {
|
||||||
__u32 pad;
|
__u32 pad;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct hl_info_device_status {
|
||||||
|
__u32 status;
|
||||||
|
__u32 pad;
|
||||||
|
};
|
||||||
|
|
||||||
struct hl_info_args {
|
struct hl_info_args {
|
||||||
/* Location of relevant struct in userspace */
|
/* Location of relevant struct in userspace */
|
||||||
__u64 return_pointer;
|
__u64 return_pointer;
|
||||||
|
|
Loading…
Reference in New Issue