habanalabs: move HW dirty check to a proper location
Driver must verify if HW is dirty before trying to fetch preboot information. Hence, we move this validation to a prior stage of the boot sequence. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
28e052c952
commit
d1ddd90551
|
@ -1278,13 +1278,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||
|
||||
hl_debugfs_add_device(hdev);
|
||||
|
||||
if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
|
||||
dev_info(hdev->dev,
|
||||
"H/W state is dirty, must reset before initializing\n");
|
||||
hdev->asic_funcs->halt_engines(hdev, true);
|
||||
hdev->asic_funcs->hw_fini(hdev, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* From this point, in case of an error, add char devices and create
|
||||
* sysfs nodes as part of the error flow, to allow debugging.
|
||||
|
|
|
@ -917,7 +917,6 @@ struct hl_asic_funcs {
|
|||
size_t max_size);
|
||||
int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
|
||||
u16 len, u32 timeout, long *result);
|
||||
enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
|
||||
int (*pci_bars_map)(struct hl_device *hdev);
|
||||
int (*init_iatu)(struct hl_device *hdev);
|
||||
u32 (*rreg)(struct hl_device *hdev, u32 reg);
|
||||
|
@ -1901,6 +1900,7 @@ struct hl_device {
|
|||
u8 hard_reset_on_fw_events;
|
||||
u8 bmc_enable;
|
||||
u8 rl_enable;
|
||||
u8 reset_on_preboot_fail;
|
||||
};
|
||||
|
||||
|
||||
|
@ -2148,9 +2148,7 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
|
|||
struct hl_inbound_pci_region *pci_region);
|
||||
int hl_pci_set_outbound_region(struct hl_device *hdev,
|
||||
struct hl_outbound_pci_region *pci_region);
|
||||
int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
|
||||
u32 preboot_ver_timeout);
|
||||
int hl_pci_init(struct hl_device *hdev);
|
||||
void hl_pci_fini(struct hl_device *hdev);
|
||||
|
||||
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
|
||||
|
|
|
@ -234,20 +234,20 @@ out_err:
|
|||
|
||||
static void set_driver_behavior_per_device(struct hl_device *hdev)
|
||||
{
|
||||
hdev->mmu_enable = 1;
|
||||
hdev->cpu_enable = 1;
|
||||
hdev->fw_loading = 1;
|
||||
hdev->fw_loading = FW_TYPE_ALL_TYPES;
|
||||
hdev->cpu_queues_enable = 1;
|
||||
hdev->heartbeat = 1;
|
||||
hdev->mmu_enable = 1;
|
||||
hdev->clock_gating_mask = ULONG_MAX;
|
||||
|
||||
hdev->reset_pcilink = 0;
|
||||
hdev->axi_drain = 0;
|
||||
hdev->sram_scrambler_enable = 1;
|
||||
hdev->dram_scrambler_enable = 1;
|
||||
hdev->bmc_enable = 1;
|
||||
hdev->hard_reset_on_fw_events = 1;
|
||||
hdev->fw_loading = FW_TYPE_ALL_TYPES;
|
||||
hdev->reset_on_preboot_fail = 1;
|
||||
|
||||
hdev->reset_pcilink = 0;
|
||||
hdev->axi_drain = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -338,20 +338,12 @@ static int hl_pci_set_dma_mask(struct hl_device *hdev)
|
|||
/**
|
||||
* hl_pci_init() - PCI initialization code.
|
||||
* @hdev: Pointer to hl_device structure.
|
||||
* @cpu_boot_status_reg: status register of the device's CPU
|
||||
* @cpu_security_boot_status_reg: status register of device's CPU security
|
||||
* configuration
|
||||
* @boot_err0_reg: boot error register of the device's CPU
|
||||
* @preboot_ver_timeout: how much to wait before bailing out on reading
|
||||
* the preboot version
|
||||
*
|
||||
* Set DMA masks, initialize the PCI controller and map the PCI BARs.
|
||||
*
|
||||
* Return: 0 on success, non-zero for failure.
|
||||
*/
|
||||
int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
||||
u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
|
||||
u32 preboot_ver_timeout)
|
||||
int hl_pci_init(struct hl_device *hdev)
|
||||
{
|
||||
struct pci_dev *pdev = hdev->pdev;
|
||||
int rc;
|
||||
|
@ -383,19 +375,6 @@ int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
|
|||
if (rc)
|
||||
goto unmap_pci_bars;
|
||||
|
||||
/* Before continuing in the initialization, we need to read the preboot
|
||||
* version to determine whether we run with a security-enabled firmware
|
||||
* The check will be done in each ASIC's specific code
|
||||
*/
|
||||
rc = hl_fw_read_preboot_status(hdev, cpu_boot_status_reg,
|
||||
cpu_security_boot_status_reg, boot_err0_reg,
|
||||
preboot_ver_timeout);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to read preboot version\n");
|
||||
hdev->asic_funcs->hw_fini(hdev, true);
|
||||
goto unmap_pci_bars;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
unmap_pci_bars:
|
||||
|
|
|
@ -604,6 +604,11 @@ done:
|
|||
return rc;
|
||||
}
|
||||
|
||||
static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
|
||||
{
|
||||
return RREG32(mmHW_STATE);
|
||||
}
|
||||
|
||||
static int gaudi_early_init(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
|
@ -641,14 +646,32 @@ static int gaudi_early_init(struct hl_device *hdev)
|
|||
|
||||
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
|
||||
|
||||
rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
|
||||
mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
|
||||
GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
|
||||
rc = hl_pci_init(hdev);
|
||||
if (rc)
|
||||
goto free_queue_props;
|
||||
|
||||
if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
|
||||
dev_info(hdev->dev,
|
||||
"H/W state is dirty, must reset before initializing\n");
|
||||
hdev->asic_funcs->hw_fini(hdev, true);
|
||||
}
|
||||
|
||||
/* Before continuing in the initialization, we need to read the preboot
|
||||
* version to determine whether we run with a security-enabled firmware
|
||||
*/
|
||||
rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
|
||||
mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
|
||||
GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
|
||||
if (rc) {
|
||||
if (hdev->reset_on_preboot_fail)
|
||||
hdev->asic_funcs->hw_fini(hdev, true);
|
||||
goto pci_fini;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
pci_fini:
|
||||
hl_pci_fini(hdev);
|
||||
free_queue_props:
|
||||
kfree(hdev->asic_prop.hw_queues_props);
|
||||
return rc;
|
||||
|
@ -7691,11 +7714,6 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
|
||||
{
|
||||
return RREG32(mmHW_STATE);
|
||||
}
|
||||
|
||||
static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
|
||||
struct hl_ctx *ctx)
|
||||
{
|
||||
|
@ -8252,7 +8270,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
|||
.get_pci_id = gaudi_get_pci_id,
|
||||
.get_eeprom_data = gaudi_get_eeprom_data,
|
||||
.send_cpu_message = gaudi_send_cpu_message,
|
||||
.get_hw_state = gaudi_get_hw_state,
|
||||
.pci_bars_map = gaudi_pci_bars_map,
|
||||
.init_iatu = gaudi_init_iatu,
|
||||
.rreg = hl_rreg,
|
||||
|
|
|
@ -553,6 +553,11 @@ done:
|
|||
return rc;
|
||||
}
|
||||
|
||||
static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
|
||||
{
|
||||
return RREG32(mmHW_STATE);
|
||||
}
|
||||
|
||||
/*
|
||||
* goya_early_init - GOYA early initialization code
|
||||
*
|
||||
|
@ -602,12 +607,28 @@ static int goya_early_init(struct hl_device *hdev)
|
|||
|
||||
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
|
||||
|
||||
rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
|
||||
mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
|
||||
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
|
||||
rc = hl_pci_init(hdev);
|
||||
if (rc)
|
||||
goto free_queue_props;
|
||||
|
||||
if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
|
||||
dev_info(hdev->dev,
|
||||
"H/W state is dirty, must reset before initializing\n");
|
||||
hdev->asic_funcs->hw_fini(hdev, true);
|
||||
}
|
||||
|
||||
/* Before continuing in the initialization, we need to read the preboot
|
||||
* version to determine whether we run with a security-enabled firmware
|
||||
*/
|
||||
rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
|
||||
mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
|
||||
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
|
||||
if (rc) {
|
||||
if (hdev->reset_on_preboot_fail)
|
||||
hdev->asic_funcs->hw_fini(hdev, true);
|
||||
goto pci_fini;
|
||||
}
|
||||
|
||||
if (!hdev->pldm) {
|
||||
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
|
||||
if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
|
||||
|
@ -617,6 +638,8 @@ static int goya_early_init(struct hl_device *hdev)
|
|||
|
||||
return 0;
|
||||
|
||||
pci_fini:
|
||||
hl_pci_fini(hdev);
|
||||
free_queue_props:
|
||||
kfree(hdev->asic_prop.hw_queues_props);
|
||||
return rc;
|
||||
|
@ -2630,7 +2653,7 @@ static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
|
|||
"Timeout while waiting for device to reset 0x%x\n",
|
||||
status);
|
||||
|
||||
if (!hard_reset) {
|
||||
if (!hard_reset && goya) {
|
||||
goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
|
||||
HW_CAP_GOLDEN | HW_CAP_TPC);
|
||||
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
|
||||
|
@ -2651,6 +2674,7 @@ static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
|
|||
HW_CAP_DMA | HW_CAP_MME |
|
||||
HW_CAP_MMU | HW_CAP_TPC_MBIST |
|
||||
HW_CAP_GOLDEN | HW_CAP_TPC);
|
||||
|
||||
memset(goya->events_stat, 0, sizeof(goya->events_stat));
|
||||
}
|
||||
}
|
||||
|
@ -5274,11 +5298,6 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
|
|||
return hl_fw_get_eeprom_data(hdev, data, max_size);
|
||||
}
|
||||
|
||||
static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
|
||||
{
|
||||
return RREG32(mmHW_STATE);
|
||||
}
|
||||
|
||||
static int goya_ctx_init(struct hl_ctx *ctx)
|
||||
{
|
||||
return 0;
|
||||
|
@ -5414,7 +5433,6 @@ static const struct hl_asic_funcs goya_funcs = {
|
|||
.get_pci_id = goya_get_pci_id,
|
||||
.get_eeprom_data = goya_get_eeprom_data,
|
||||
.send_cpu_message = goya_send_cpu_message,
|
||||
.get_hw_state = goya_get_hw_state,
|
||||
.pci_bars_map = goya_pci_bars_map,
|
||||
.init_iatu = goya_init_iatu,
|
||||
.rreg = hl_rreg,
|
||||
|
|
Loading…
Reference in New Issue