habanalabs: enable stop-on-error debugfs setting per ASIC
On Goya and Gaudi, the stop-on-error configuration can be set via debugfs. However, in future devices, this configuration will always be enabled. Modify the debugfs node to be allowed only for ASICs that support this dynamic configuration. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
4a0b01fa63
commit
d01e6cc97b
|
@ -222,6 +222,7 @@ KernelVersion: 5.6
|
|||
Contact: ogabbay@kernel.org
|
||||
Description: Sets the stop-on_error option for the device engines. Value of
|
||||
"0" is for disable, otherwise enable.
|
||||
Relevant only for GOYA and GAUDI.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
|
||||
Date: Sep 2021
|
||||
|
|
|
@ -1071,6 +1071,9 @@ static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
|
|||
char tmp_buf[200];
|
||||
ssize_t rc;
|
||||
|
||||
if (!hdev->asic_prop.configurable_stop_on_err)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (*ppos)
|
||||
return 0;
|
||||
|
||||
|
@ -1089,6 +1092,9 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
|
|||
u32 value;
|
||||
ssize_t rc;
|
||||
|
||||
if (!hdev->asic_prop.configurable_stop_on_err)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (hdev->reset_info.in_reset) {
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"Can't change stop on error during reset\n");
|
||||
|
|
|
@ -561,6 +561,7 @@ struct hl_hints_range {
|
|||
* use-case of doing soft-reset in training (due
|
||||
* to the fact that training runs on multiple
|
||||
* devices)
|
||||
* @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
|
||||
*/
|
||||
struct asic_fixed_properties {
|
||||
struct hw_queue_properties *hw_queues_props;
|
||||
|
@ -644,6 +645,7 @@ struct asic_fixed_properties {
|
|||
u8 use_get_power_for_reset_history;
|
||||
u8 supports_soft_reset;
|
||||
u8 allow_inference_soft_reset;
|
||||
u8 configurable_stop_on_err;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -669,6 +669,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
|
|||
|
||||
prop->use_get_power_for_reset_history = true;
|
||||
|
||||
prop->configurable_stop_on_err = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -483,6 +483,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
|
|||
|
||||
prop->use_get_power_for_reset_history = true;
|
||||
|
||||
prop->configurable_stop_on_err = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue