habanalabs: add indication of security-enabled F/W

Future F/W versions will have enhanced security measures and the driver
won't be able to do certain configurations that it always did and those
configurations will be done by the firmware.

We use the firmware's preboot version to determine whether security
measures are enabled or not. Because we need this very early in our code,
the read of the preboot version is moved to the earliest possible place,
right after the device's PCI initialization.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
Oded Gabbay 2020-09-13 15:51:28 +03:00
parent d1f3633599
commit 57799ce9f8
5 changed files with 94 additions and 20 deletions

View File

@ -474,8 +474,11 @@ static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
"Device boot error - NIC F/W initialization failed\n"); "Device boot error - NIC F/W initialization failed\n");
} }
static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status) static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
{ {
/* Some of the status codes below are deprecated in newer f/w
* versions but we keep them here for backward compatibility
*/
switch (status) { switch (status) {
case CPU_BOOT_STATUS_NA: case CPU_BOOT_STATUS_NA:
dev_err(hdev->dev, dev_err(hdev->dev,
@ -521,6 +524,48 @@ static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status)
} }
} }
int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 boot_err0_reg, u32 timeout)
{
u32 status;
int rc;
if (!hdev->cpu_enable)
return 0;
/* Need to check two possible scenarios:
*
* CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where
* the preboot is waiting for the boot fit
*
* All other status values - for older firmwares where the uboot was
* loaded from the FLASH
*/
rc = hl_poll_timeout(
hdev,
cpu_boot_status_reg,
status,
(status == CPU_BOOT_STATUS_IN_UBOOT) ||
(status == CPU_BOOT_STATUS_DRAM_RDY) ||
(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_SRAM_AVAIL) ||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
10000,
timeout);
if (rc) {
dev_err(hdev->dev, "Failed to read preboot version\n");
detect_cpu_boot_status(hdev, status);
fw_read_errors(hdev, boot_err0_reg);
return -EIO;
}
hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
return 0;
}
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
u32 boot_err0_reg, bool skip_bmc, u32 boot_err0_reg, bool skip_bmc,
@ -586,15 +631,11 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
10000, 10000,
cpu_timeout); cpu_timeout);
/* Read U-Boot, preboot versions now in case we will later fail */ /* Read U-Boot version now in case we will later fail */
hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT); hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
/* Some of the status codes below are deprecated in newer f/w
* versions but we keep them here for backward compatibility
*/
if (rc) { if (rc) {
hl_detect_cpu_boot_status(hdev, status); detect_cpu_boot_status(hdev, status);
rc = -EIO; rc = -EIO;
goto out; goto out;
} }

View File

@ -320,6 +320,8 @@ struct hl_mmu_properties {
* @first_available_user_mon: first monitor available for the user * @first_available_user_mon: first monitor available for the user
* @tpc_enabled_mask: which TPCs are enabled. * @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues. * @completion_queues_count: number of completion queues.
* @fw_security_disabled: true if security measures are disabled in firmware,
* false otherwise
*/ */
struct asic_fixed_properties { struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props; struct hw_queue_properties *hw_queues_props;
@ -370,6 +372,7 @@ struct asic_fixed_properties {
u16 first_available_user_mon[HL_MAX_DCORES]; u16 first_available_user_mon[HL_MAX_DCORES];
u8 tpc_enabled_mask; u8 tpc_enabled_mask;
u8 completion_queues_count; u8 completion_queues_count;
u8 fw_security_disabled;
}; };
/** /**
@ -1933,6 +1936,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
u32 boot_err0_reg, bool skip_bmc, u32 boot_err0_reg, bool skip_bmc,
u32 cpu_timeout, u32 boot_fit_timeout); u32 cpu_timeout, u32 boot_fit_timeout);
int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 boot_err0_reg, u32 timeout);
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
bool is_wc[3]); bool is_wc[3]);
@ -1941,7 +1946,8 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
struct hl_inbound_pci_region *pci_region); struct hl_inbound_pci_region *pci_region);
int hl_pci_set_outbound_region(struct hl_device *hdev, int hl_pci_set_outbound_region(struct hl_device *hdev,
struct hl_outbound_pci_region *pci_region); struct hl_outbound_pci_region *pci_region);
int hl_pci_init(struct hl_device *hdev); int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 boot_err0_reg, u32 preboot_ver_timeout);
void hl_pci_fini(struct hl_device *hdev); void hl_pci_fini(struct hl_device *hdev);
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);

View File

@ -338,12 +338,17 @@ static int hl_pci_set_dma_mask(struct hl_device *hdev)
/** /**
* hl_pci_init() - PCI initialization code. * hl_pci_init() - PCI initialization code.
* @hdev: Pointer to hl_device structure. * @hdev: Pointer to hl_device structure.
* @cpu_boot_status_reg: status register of the device's CPU
* @boot_err0_reg: boot error register of the device's CPU
* @preboot_ver_timeout: how much to wait before bailing out on reading
* the preboot version
* *
* Set DMA masks, initialize the PCI controller and map the PCI BARs. * Set DMA masks, initialize the PCI controller and map the PCI BARs.
* *
* Return: 0 on success, non-zero for failure. * Return: 0 on success, non-zero for failure.
*/ */
int hl_pci_init(struct hl_device *hdev) int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 boot_err0_reg, u32 preboot_ver_timeout)
{ {
struct pci_dev *pdev = hdev->pdev; struct pci_dev *pdev = hdev->pdev;
int rc; int rc;
@ -375,6 +380,15 @@ int hl_pci_init(struct hl_device *hdev)
if (rc) if (rc)
goto unmap_pci_bars; goto unmap_pci_bars;
/* Before continuing in the initialization, we need to read the preboot
* version to determine whether we run with a security-enabled firmware
* The check will be done in each ASIC's specific code
*/
rc = hl_fw_read_preboot_ver(hdev, cpu_boot_status_reg, boot_err0_reg,
preboot_ver_timeout);
if (rc)
goto unmap_pci_bars;
return 0; return 0;
unmap_pci_bars: unmap_pci_bars:

View File

@ -599,10 +599,15 @@ static int gaudi_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
rc = hl_pci_init(hdev); rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
if (rc) if (rc)
goto free_queue_props; goto free_queue_props;
/* GAUDI Firmware does not yet support security */
prop->fw_security_disabled = true;
dev_info(hdev->dev, "firmware-level security is disabled\n");
return 0; return 0;
free_queue_props: free_queue_props:
@ -2871,6 +2876,18 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
/* Perform read from the device to make sure device is up */ /* Perform read from the device to make sure device is up */
RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
/* Set the access through PCI bars (Linux driver only) as
* secured
*/
WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
/* Perform read to flush the waiting writes to ensure
* configuration was set in the device
*/
RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
/* /*
* Let's mark in the H/W that we have reached this point. We check * Let's mark in the H/W that we have reached this point. We check
* this value in the reset_before_init function to understand whether * this value in the reset_before_init function to understand whether
@ -2879,15 +2896,6 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
*/ */
WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
/* Set the access through PCI bars (Linux driver only) as secured */
WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
/* Perform read to flush the waiting writes to ensure configuration
* was set in the device
*/
RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
/* Configure the reset registers. Must be done as early as possible /* Configure the reset registers. Must be done as early as possible
* in case we fail during H/W initialization * in case we fail during H/W initialization
*/ */

View File

@ -600,10 +600,15 @@ static int goya_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
rc = hl_pci_init(hdev); rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
mmCPU_BOOT_ERR0, GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
if (rc) if (rc)
goto free_queue_props; goto free_queue_props;
/* Goya Firmware does not support security */
prop->fw_security_disabled = true;
dev_info(hdev->dev, "firmware-level security is disabled\n");
if (!hdev->pldm) { if (!hdev->pldm) {
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK) if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)