diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile index 0910f7fa34ec..6f1ead69bd77 100644 --- a/drivers/misc/habanalabs/Makefile +++ b/drivers/misc/habanalabs/Makefile @@ -5,3 +5,6 @@ obj-m := habanalabs.o habanalabs-y := habanalabs_drv.o device.o + +include $(src)/goya/Makefile +habanalabs-y += $(HL_GOYA_FILES) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 6189fd0e2ccd..a4feaa784db3 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -120,8 +120,11 @@ err_cdev_add: */ static int device_early_init(struct hl_device *hdev) { + int rc; + switch (hdev->asic_type) { case ASIC_GOYA: + goya_set_asic_funcs(hdev); strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); break; default: @@ -130,6 +133,10 @@ static int device_early_init(struct hl_device *hdev) return -EINVAL; } + rc = hdev->asic_funcs->early_init(hdev); + if (rc) + return rc; + return 0; } @@ -141,6 +148,10 @@ static int device_early_init(struct hl_device *hdev) */ static void device_early_fini(struct hl_device *hdev) { + + if (hdev->asic_funcs->early_fini) + hdev->asic_funcs->early_fini(hdev); + } /* @@ -154,8 +165,15 @@ static void device_early_fini(struct hl_device *hdev) */ int hl_device_suspend(struct hl_device *hdev) { + int rc; + pci_save_state(hdev->pdev); + rc = hdev->asic_funcs->suspend(hdev); + if (rc) + dev_err(hdev->dev, + "Failed to disable PCI access of device CPU\n"); + /* Shut down the device */ pci_disable_device(hdev->pdev); pci_set_power_state(hdev->pdev, PCI_D3hot); @@ -185,6 +203,13 @@ int hl_device_resume(struct hl_device *hdev) return rc; } + rc = hdev->asic_funcs->resume(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to enable PCI access from device CPU\n"); + return rc; + } + return 0; } @@ -212,11 +237,21 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) if (rc) goto release_device; + /* + * Start calling ASIC initialization. First S/W then H/W and finally + * late init + */ + rc = hdev->asic_funcs->sw_init(hdev); + if (rc) + goto early_fini; + dev_notice(hdev->dev, "Successfully added device to habanalabs driver\n"); return 0; +early_fini: + device_early_fini(hdev); release_device: device_destroy(hclass, hdev->dev->devt); cdev_del(&hdev->cdev); @@ -247,6 +282,9 @@ void hl_device_fini(struct hl_device *hdev) /* Mark device as disabled */ hdev->disabled = true; + /* Call ASIC S/W finalize function */ + hdev->asic_funcs->sw_fini(hdev); + device_early_fini(hdev); /* Hide device from user */ @@ -338,3 +376,36 @@ int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr, return *val ? 0 : -ETIMEDOUT; } + +/* + * MMIO register access helper functions. + */ + +/* + * hl_rreg - Read an MMIO register + * + * @hdev: pointer to habanalabs device structure + * @reg: MMIO register offset (in bytes) + * + * Returns the value of the MMIO register we are asked to read + * + */ +inline u32 hl_rreg(struct hl_device *hdev, u32 reg) +{ + return readl(hdev->rmmio + reg); +} + +/* + * hl_wreg - Write to an MMIO register + * + * @hdev: pointer to habanalabs device structure + * @reg: MMIO register offset (in bytes) + * @val: 32-bit value + * + * Writes the 32-bit value into the MMIO register + * + */ +inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) +{ + writel(val, hdev->rmmio + reg); +} diff --git a/drivers/misc/habanalabs/goya/Makefile b/drivers/misc/habanalabs/goya/Makefile new file mode 100644 index 000000000000..38d43006386d --- /dev/null +++ b/drivers/misc/habanalabs/goya/Makefile @@ -0,0 +1,3 @@ +subdir-ccflags-y += -I$(src) + +HL_GOYA_FILES := goya/goya.o diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c new file mode 100644 index 000000000000..ceaffa9afd83 --- /dev/null +++ b/drivers/misc/habanalabs/goya/goya.c @@ -0,0 +1,632 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "goyaP.h" +#include "include/goya/asic_reg/goya_masks.h" + +#include +#include +#include + +/* + * GOYA security scheme: + * + * 1. Host is protected by: + * - Range registers (When MMU is enabled, DMA RR does NOT protect host) + * - MMU + * + * 2. DRAM is protected by: + * - Range registers (protect the first 512MB) + * - MMU (isolation between users) + * + * 3. Configuration is protected by: + * - Range registers + * - Protection bits + * + * When MMU is disabled: + * + * QMAN DMA: PQ, CQ, CP, DMA are secured. + * PQ, CB and the data are on the host. + * + * QMAN TPC/MME: + * PQ, CQ and CP are not secured. + * PQ, CB and the data are on the SRAM/DRAM. + * + * Since QMAN DMA is secured, KMD is parsing the DMA CB: + * - KMD checks DMA pointer + * - WREG, MSG_PROT are not allowed. + * - MSG_LONG/SHORT are allowed. + * + * A read/write transaction by the QMAN to a protected area will succeed if + * and only if the QMAN's CP is secured and MSG_PROT is used + * + * + * When MMU is enabled: + * + * QMAN DMA: PQ, CQ and CP are secured. + * MMU is set to bypass on the Secure props register of the QMAN. + * The reasons we don't enable MMU for PQ, CQ and CP are: + * - PQ entry is in kernel address space and KMD doesn't map it. + * - CP writes to MSIX register and to kernel address space (completion + * queue). + * + * DMA is not secured but because CP is secured, KMD still needs to parse the + * CB, but doesn't need to check the DMA addresses. + * + * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD + * doesn't map memory in MMU. + * + * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode) + * + * DMA RR does NOT protect host because DMA is not secured + * + */ + +#define GOYA_MMU_REGS_NUM 61 + +#define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ + +#define GOYA_RESET_TIMEOUT_MSEC 500 /* 500ms */ +#define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000 /* 20s */ +#define GOYA_RESET_WAIT_MSEC 1 /* 1ms */ +#define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */ +#define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ +#define GOYA_CPU_TIMEOUT_USEC 10000000 /* 10s */ +#define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ + +#define GOYA_QMAN0_FENCE_VAL 0xD169B243 + +#define GOYA_MAX_INITIATORS 20 + +static void goya_get_fixed_properties(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; + + prop->dram_base_address = DRAM_PHYS_BASE; + prop->dram_size = DRAM_PHYS_DEFAULT_SIZE; + prop->dram_end_address = prop->dram_base_address + prop->dram_size; + prop->dram_user_base_address = DRAM_BASE_ADDR_USER; + + prop->sram_base_address = SRAM_BASE_ADDR; + prop->sram_size = SRAM_SIZE; + prop->sram_end_address = prop->sram_base_address + prop->sram_size; + prop->sram_user_base_address = prop->sram_base_address + + SRAM_USER_BASE_OFFSET; + + prop->host_phys_base_address = HOST_PHYS_BASE; + prop->va_space_host_start_address = VA_HOST_SPACE_START; + prop->va_space_host_end_address = VA_HOST_SPACE_END; + prop->va_space_dram_start_address = VA_DDR_SPACE_START; + prop->va_space_dram_end_address = VA_DDR_SPACE_END; + prop->cfg_size = CFG_SIZE; + prop->max_asid = MAX_ASID; + prop->tpc_enabled_mask = TPC_ENABLED_MASK; + + prop->high_pll = PLL_HIGH_DEFAULT; +} + +/* + * goya_pci_bars_map - Map PCI BARS of Goya device + * + * @hdev: pointer to hl_device structure + * + * Request PCI regions and map them to kernel virtual addresses. + * Returns 0 on success + * + */ +int goya_pci_bars_map(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + int rc; + + rc = pci_request_regions(pdev, HL_NAME); + if (rc) { + dev_err(hdev->dev, "Cannot obtain PCI resources\n"); + return rc; + } + + hdev->pcie_bar[SRAM_CFG_BAR_ID] = + pci_ioremap_bar(pdev, SRAM_CFG_BAR_ID); + if (!hdev->pcie_bar[SRAM_CFG_BAR_ID]) { + dev_err(hdev->dev, "pci_ioremap_bar failed for CFG\n"); + rc = -ENODEV; + goto err_release_regions; + } + + hdev->pcie_bar[MSIX_BAR_ID] = pci_ioremap_bar(pdev, MSIX_BAR_ID); + if (!hdev->pcie_bar[MSIX_BAR_ID]) { + dev_err(hdev->dev, "pci_ioremap_bar failed for MSIX\n"); + rc = -ENODEV; + goto err_unmap_sram_cfg; + } + + hdev->pcie_bar[DDR_BAR_ID] = pci_ioremap_wc_bar(pdev, DDR_BAR_ID); + if (!hdev->pcie_bar[DDR_BAR_ID]) { + dev_err(hdev->dev, "pci_ioremap_bar failed for DDR\n"); + rc = -ENODEV; + goto err_unmap_msix; + } + + hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + + (CFG_BASE - SRAM_BASE_ADDR); + + return 0; + +err_unmap_msix: + iounmap(hdev->pcie_bar[MSIX_BAR_ID]); +err_unmap_sram_cfg: + iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]); +err_release_regions: + pci_release_regions(pdev); + + return rc; +} + +/* + * goya_pci_bars_unmap - Unmap PCI BARS of Goya device + * + * @hdev: pointer to hl_device structure + * + * Release all PCI BARS and unmap their virtual addresses + * + */ +static void goya_pci_bars_unmap(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + + iounmap(hdev->pcie_bar[DDR_BAR_ID]); + iounmap(hdev->pcie_bar[MSIX_BAR_ID]); + iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]); + pci_release_regions(pdev); +} + +/* + * goya_elbi_write - Write through the ELBI interface + * + * @hdev: pointer to hl_device structure + * + * return 0 on success, -1 on failure + * + */ +static int goya_elbi_write(struct hl_device *hdev, u64 addr, u32 data) +{ + struct pci_dev *pdev = hdev->pdev; + ktime_t timeout; + u32 val; + + /* Clear previous status */ + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0); + + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, + PCI_CONFIG_ELBI_CTRL_WRITE); + + timeout = ktime_add_ms(ktime_get(), 10); + for (;;) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val); + if (val & PCI_CONFIG_ELBI_STS_MASK) + break; + if (ktime_compare(ktime_get(), timeout) > 0) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, + &val); + break; + } + usleep_range(300, 500); + } + + if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) + return 0; + + if (val & PCI_CONFIG_ELBI_STS_ERR) { + dev_err(hdev->dev, "Error writing to ELBI\n"); + return -EIO; + } + + if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { + dev_err(hdev->dev, "ELBI write didn't finish in time\n"); + return -EIO; + } + + dev_err(hdev->dev, "ELBI write has undefined bits in status\n"); + return -EIO; +} + +/* + * goya_iatu_write - iatu write routine + * + * @hdev: pointer to hl_device structure + * + */ +static int goya_iatu_write(struct hl_device *hdev, u32 addr, u32 data) +{ + u32 dbi_offset; + int rc; + + dbi_offset = addr & 0xFFF; + + rc = goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0x00300000); + rc |= goya_elbi_write(hdev, mmPCIE_DBI_BASE + dbi_offset, data); + + if (rc) + return -EIO; + + return 0; +} + +void goya_reset_link_through_bridge(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + struct pci_dev *parent_port; + u16 val; + + parent_port = pdev->bus->self; + pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val); + val |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); + ssleep(1); + + val &= ~(PCI_BRIDGE_CTL_BUS_RESET); + pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); + ssleep(3); +} + +/* + * goya_set_ddr_bar_base - set DDR bar to map specific device address + * + * @hdev: pointer to hl_device structure + * @addr: address in DDR. Must be aligned to DDR bar size + * + * This function configures the iATU so that the DDR bar will start at the + * specified addr. + * + */ +static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr) +{ + struct goya_device *goya = hdev->asic_specific; + int rc; + + if ((goya) && (goya->ddr_bar_cur_addr == addr)) + return 0; + + /* Inbound Region 1 - Bar 4 - Point to DDR */ + rc = goya_iatu_write(hdev, 0x314, lower_32_bits(addr)); + rc |= goya_iatu_write(hdev, 0x318, upper_32_bits(addr)); + rc |= goya_iatu_write(hdev, 0x300, 0); + /* Enable + Bar match + match enable + Bar 4 */ + rc |= goya_iatu_write(hdev, 0x304, 0xC0080400); + + /* Return the DBI window to the default location */ + rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0); + rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0); + + if (rc) { + dev_err(hdev->dev, "failed to map DDR bar to 0x%08llx\n", addr); + return -EIO; + } + + if (goya) + goya->ddr_bar_cur_addr = addr; + + return 0; +} + +/* + * goya_init_iatu - Initialize the iATU unit inside the PCI controller + * + * @hdev: pointer to hl_device structure + * + * This is needed in case the firmware doesn't initialize the iATU + * + */ +static int goya_init_iatu(struct hl_device *hdev) +{ + int rc; + + /* Inbound Region 0 - Bar 0 - Point to SRAM_BASE_ADDR */ + rc = goya_iatu_write(hdev, 0x114, lower_32_bits(SRAM_BASE_ADDR)); + rc |= goya_iatu_write(hdev, 0x118, upper_32_bits(SRAM_BASE_ADDR)); + rc |= goya_iatu_write(hdev, 0x100, 0); + /* Enable + Bar match + match enable */ + rc |= goya_iatu_write(hdev, 0x104, 0xC0080000); + + /* Inbound Region 1 - Bar 4 - Point to DDR */ + rc |= goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE); + + /* Outbound Region 0 - Point to Host */ + rc |= goya_iatu_write(hdev, 0x008, lower_32_bits(HOST_PHYS_BASE)); + rc |= goya_iatu_write(hdev, 0x00C, upper_32_bits(HOST_PHYS_BASE)); + rc |= goya_iatu_write(hdev, 0x010, + lower_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1)); + rc |= goya_iatu_write(hdev, 0x014, 0); + rc |= goya_iatu_write(hdev, 0x018, 0); + rc |= goya_iatu_write(hdev, 0x020, + upper_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1)); + /* Increase region size */ + rc |= goya_iatu_write(hdev, 0x000, 0x00002000); + /* Enable */ + rc |= goya_iatu_write(hdev, 0x004, 0x80000000); + + /* Return the DBI window to the default location */ + rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0); + rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0); + + if (rc) + return -EIO; + + return 0; +} + +/* + * goya_early_init - GOYA early initialization code + * + * @hdev: pointer to hl_device structure + * + * Verify PCI bars + * Set DMA masks + * PCI controller initialization + * Map PCI bars + * + */ +static int goya_early_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct pci_dev *pdev = hdev->pdev; + u32 val; + int rc; + + goya_get_fixed_properties(hdev); + + /* Check BAR sizes */ + if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) { + dev_err(hdev->dev, + "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", + SRAM_CFG_BAR_ID, + (unsigned long long) pci_resource_len(pdev, + SRAM_CFG_BAR_ID), + CFG_BAR_SIZE); + return -ENODEV; + } + + if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) { + dev_err(hdev->dev, + "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", + MSIX_BAR_ID, + (unsigned long long) pci_resource_len(pdev, + MSIX_BAR_ID), + MSIX_BAR_SIZE); + return -ENODEV; + } + + prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); + + /* set DMA mask for GOYA */ + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); + if (rc) { + dev_warn(hdev->dev, "Unable to set pci dma mask to 39 bits\n"); + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(hdev->dev, + "Unable to set pci dma mask to 32 bits\n"); + return rc; + } + } + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); + if (rc) { + dev_warn(hdev->dev, + "Unable to set pci consistent dma mask to 39 bits\n"); + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(hdev->dev, + "Unable to set pci consistent dma mask to 32 bits\n"); + return rc; + } + } + + if (hdev->reset_pcilink) + goya_reset_link_through_bridge(hdev); + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(hdev->dev, "can't enable PCI device\n"); + return rc; + } + + pci_set_master(pdev); + + rc = goya_init_iatu(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize iATU\n"); + goto disable_device; + } + + rc = goya_pci_bars_map(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize PCI BARS\n"); + goto disable_device; + } + + val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); + if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK) + dev_warn(hdev->dev, + "PCI strap is not configured correctly, PCI bus errors may occur\n"); + + return 0; + +disable_device: + pci_clear_master(pdev); + pci_disable_device(pdev); + + return rc; +} + +/* + * goya_early_fini - GOYA early finalization code + * + * @hdev: pointer to hl_device structure + * + * Unmap PCI bars + * + */ +int goya_early_fini(struct hl_device *hdev) +{ + goya_pci_bars_unmap(hdev); + + pci_clear_master(hdev->pdev); + pci_disable_device(hdev->pdev); + + return 0; +} + +/* + * goya_sw_init - Goya software initialization code + * + * @hdev: pointer to hl_device structure + * + */ +static int goya_sw_init(struct hl_device *hdev) +{ + struct goya_device *goya; + int rc; + + /* Allocate device structure */ + goya = kzalloc(sizeof(*goya), GFP_KERNEL); + if (!goya) + return -ENOMEM; + + /* according to goya_init_iatu */ + goya->ddr_bar_cur_addr = DRAM_PHYS_BASE; + hdev->asic_specific = goya; + + /* Create DMA pool for small allocations */ + hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), + &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0); + if (!hdev->dma_pool) { + dev_err(hdev->dev, "failed to create DMA pool\n"); + rc = -ENOMEM; + goto free_goya_device; + } + + hdev->cpu_accessible_dma_mem = + hdev->asic_funcs->dma_alloc_coherent(hdev, + CPU_ACCESSIBLE_MEM_SIZE, + &hdev->cpu_accessible_dma_address, + GFP_KERNEL | __GFP_ZERO); + + if (!hdev->cpu_accessible_dma_mem) { + dev_err(hdev->dev, + "failed to allocate %d of dma memory for CPU accessible memory space\n", + CPU_ACCESSIBLE_MEM_SIZE); + rc = -ENOMEM; + goto free_dma_pool; + } + + hdev->cpu_accessible_dma_pool = gen_pool_create(CPU_PKT_SHIFT, -1); + if (!hdev->cpu_accessible_dma_pool) { + dev_err(hdev->dev, + "Failed to create CPU accessible DMA pool\n"); + rc = -ENOMEM; + goto free_cpu_pq_dma_mem; + } + + rc = gen_pool_add(hdev->cpu_accessible_dma_pool, + (uintptr_t) hdev->cpu_accessible_dma_mem, + CPU_ACCESSIBLE_MEM_SIZE, -1); + if (rc) { + dev_err(hdev->dev, + "Failed to add memory to CPU accessible DMA pool\n"); + rc = -EFAULT; + goto free_cpu_pq_pool; + } + + spin_lock_init(&goya->hw_queues_lock); + + return 0; + +free_cpu_pq_pool: + gen_pool_destroy(hdev->cpu_accessible_dma_pool); +free_cpu_pq_dma_mem: + hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE, + hdev->cpu_accessible_dma_mem, + hdev->cpu_accessible_dma_address); +free_dma_pool: + dma_pool_destroy(hdev->dma_pool); +free_goya_device: + kfree(goya); + + return rc; +} + +/* + * goya_sw_fini - Goya software tear-down code + * + * @hdev: pointer to hl_device structure + * + */ +int goya_sw_fini(struct hl_device *hdev) +{ + struct goya_device *goya = hdev->asic_specific; + + gen_pool_destroy(hdev->cpu_accessible_dma_pool); + + hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE, + hdev->cpu_accessible_dma_mem, + hdev->cpu_accessible_dma_address); + + dma_pool_destroy(hdev->dma_pool); + + kfree(goya); + + return 0; +} + +int goya_suspend(struct hl_device *hdev) +{ + return 0; +} + +int goya_resume(struct hl_device *hdev) +{ + return 0; +} + +void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags); +} + +void goya_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr, + dma_addr_t dma_handle) +{ + dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle); +} + +static const struct hl_asic_funcs goya_funcs = { + .early_init = goya_early_init, + .early_fini = goya_early_fini, + .sw_init = goya_sw_init, + .sw_fini = goya_sw_fini, + .suspend = goya_suspend, + .resume = goya_resume, + .dma_alloc_coherent = goya_dma_alloc_coherent, + .dma_free_coherent = goya_dma_free_coherent, +}; + +/* + * goya_set_asic_funcs - set Goya function pointers + * + * @*hdev: pointer to hl_device structure + * + */ +void goya_set_asic_funcs(struct hl_device *hdev) +{ + hdev->asic_funcs = &goya_funcs; +} diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h new file mode 100644 index 000000000000..6a78976e2098 --- /dev/null +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef GOYAP_H_ +#define GOYAP_H_ + +#include +#include "habanalabs.h" +#include "include/goya/goya.h" + +#define NUMBER_OF_CMPLT_QUEUES 5 +#define NUMBER_OF_EXT_HW_QUEUES 5 +#define NUMBER_OF_CPU_HW_QUEUES 1 +#define NUMBER_OF_INT_HW_QUEUES 9 +#define NUMBER_OF_HW_QUEUES (NUMBER_OF_EXT_HW_QUEUES + \ + NUMBER_OF_CPU_HW_QUEUES + \ + NUMBER_OF_INT_HW_QUEUES) + +/* + * Number of MSIX interrupts IDS: + * Each completion queue has 1 ID + * The event queue has 1 ID + */ +#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + 1) + +#if (NUMBER_OF_HW_QUEUES >= HL_MAX_QUEUES) +#error "Number of H/W queues must be smaller than HL_MAX_QUEUES" +#endif + +#if (NUMBER_OF_INTERRUPTS > GOYA_MSIX_ENTRIES) +#error "Number of MSIX interrupts must be smaller or equal to GOYA_MSIX_ENTRIES" +#endif + +#define QMAN_FENCE_TIMEOUT_USEC 10000 /* 10 ms */ + +#define QMAN_STOP_TIMEOUT_USEC 100000 /* 100 ms */ + +#define TPC_ENABLED_MASK 0xFF + +#define PLL_HIGH_DEFAULT 1575000000 /* 1.575 GHz */ + +#define GOYA_ARMCP_INFO_TIMEOUT 10000000 /* 10s */ + +#define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */ + +/* DRAM Memory Map */ + +#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */ +#define MMU_PAGE_TABLES_SIZE 0x0E000000 /* 224MB */ +#define MMU_CACHE_MNG_SIZE 0x00001000 /* 4KB */ +#define CPU_PQ_PKT_SIZE 0x00001000 /* 4KB */ +#define CPU_PQ_DATA_SIZE 0x01FFE000 /* 32MB - 8KB */ + +#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE +#define MMU_PAGE_TABLES_ADDR (CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE) +#define MMU_CACHE_MNG_ADDR (MMU_PAGE_TABLES_ADDR + MMU_PAGE_TABLES_SIZE) +#define CPU_PQ_PKT_ADDR (MMU_CACHE_MNG_ADDR + MMU_CACHE_MNG_SIZE) +#define CPU_PQ_DATA_ADDR (CPU_PQ_PKT_ADDR + CPU_PQ_PKT_SIZE) +#define DRAM_BASE_ADDR_USER (CPU_PQ_DATA_ADDR + CPU_PQ_DATA_SIZE) + +#if (DRAM_BASE_ADDR_USER != 0x20000000) +#error "KMD must reserve 512MB" +#endif + +/* + * SRAM Memory Map for KMD + * + * KMD occupies KMD_SRAM_SIZE bytes from the start of SRAM. It is used for + * MME/TPC QMANs + * + */ + +#define MME_QMAN_BASE_OFFSET 0x000000 /* Must be 0 */ +#define MME_QMAN_LENGTH 64 +#define TPC_QMAN_LENGTH 64 + +#define TPC0_QMAN_BASE_OFFSET (MME_QMAN_BASE_OFFSET + \ + (MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)) +#define TPC1_QMAN_BASE_OFFSET (TPC0_QMAN_BASE_OFFSET + \ + (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)) +#define TPC2_QMAN_BASE_OFFSET (TPC1_QMAN_BASE_OFFSET + \ + (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)) +#define TPC3_QMAN_BASE_OFFSET (TPC2_QMAN_BASE_OFFSET + \ + (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)) +#define TPC4_QMAN_BASE_OFFSET (TPC3_QMAN_BASE_OFFSET + \ + (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)) +#define TPC5_QMAN_BASE_OFFSET (TPC4_QMAN_BASE_OFFSET + \ + (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)) +#define TPC6_QMAN_BASE_OFFSET (TPC5_QMAN_BASE_OFFSET + \ + (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)) +#define TPC7_QMAN_BASE_OFFSET (TPC6_QMAN_BASE_OFFSET + \ + (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)) + +#define SRAM_KMD_RES_OFFSET (TPC7_QMAN_BASE_OFFSET + \ + (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)) + +#if (SRAM_KMD_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START) +#error "MME/TPC QMANs SRAM space exceeds limit" +#endif + +#define SRAM_USER_BASE_OFFSET GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START + +/* Virtual address space */ +#define VA_HOST_SPACE_START 0x1000000000000ull /* 256TB */ +#define VA_HOST_SPACE_END 0x3FF8000000000ull /* 1PB - 1TB */ +#define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_END - \ + VA_HOST_SPACE_START) /* 767TB */ + +#define VA_DDR_SPACE_START 0x800000000ull /* 32GB */ +#define VA_DDR_SPACE_END 0x2000000000ull /* 128GB */ +#define VA_DDR_SPACE_SIZE (VA_DDR_SPACE_END - \ + VA_DDR_SPACE_START) /* 128GB */ + +#define DMA_MAX_TRANSFER_SIZE 0xFFFFFFFF + +#define HW_CAP_PLL 0x00000001 +#define HW_CAP_DDR_0 0x00000002 +#define HW_CAP_DDR_1 0x00000004 +#define HW_CAP_MME 0x00000008 +#define HW_CAP_CPU 0x00000010 +#define HW_CAP_DMA 0x00000020 +#define HW_CAP_MSIX 0x00000040 +#define HW_CAP_CPU_Q 0x00000080 +#define HW_CAP_MMU 0x00000100 +#define HW_CAP_TPC_MBIST 0x00000200 +#define HW_CAP_GOLDEN 0x00000400 +#define HW_CAP_TPC 0x00000800 + +#define CPU_PKT_SHIFT 5 +#define CPU_PKT_SIZE (1 << CPU_PKT_SHIFT) +#define CPU_PKT_MASK (~((1 << CPU_PKT_SHIFT) - 1)) +#define CPU_MAX_PKTS_IN_CB 32 +#define CPU_CB_SIZE (CPU_PKT_SIZE * CPU_MAX_PKTS_IN_CB) +#define CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * CPU_CB_SIZE) + +enum goya_fw_component { + FW_COMP_UBOOT, + FW_COMP_PREBOOT +}; + +struct goya_device { + /* TODO: remove hw_queues_lock after moving to scheduler code */ + spinlock_t hw_queues_lock; + u64 ddr_bar_cur_addr; + u32 hw_cap_initialized; +}; + +#endif /* GOYAP_H_ */ diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index fa628b05db13..5076e680a73c 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -14,9 +14,62 @@ #define HL_NAME "habanalabs" +#define HL_MAX_QUEUES 128 + struct hl_device; +/** + * struct asic_fixed_properties - ASIC specific immutable properties. + * @sram_base_address: SRAM physical start address. + * @sram_end_address: SRAM physical end address. + * @sram_user_base_address - SRAM physical start address for user access. + * @dram_base_address: DRAM physical start address. + * @dram_end_address: DRAM physical end address. + * @dram_user_base_address: DRAM physical start address for user access. + * @dram_size: DRAM total size. + * @dram_pci_bar_size: size of PCI bar towards DRAM. + * @host_phys_base_address: base physical address of host memory for + * transactions that the device generates. + * @va_space_host_start_address: base address of virtual memory range for + * mapping host memory. + * @va_space_host_end_address: end address of virtual memory range for + * mapping host memory. + * @va_space_dram_start_address: base address of virtual memory range for + * mapping DRAM memory. + * @va_space_dram_end_address: end address of virtual memory range for + * mapping DRAM memory. + * @cfg_size: configuration space size on SRAM. + * @sram_size: total size of SRAM. + * @max_asid: maximum number of open contexts (ASIDs). + * @completion_queues_count: number of completion queues. + * @high_pll: high PLL frequency used by the device. + * @tpc_enabled_mask: which TPCs are enabled. + */ +struct asic_fixed_properties { + u64 sram_base_address; + u64 sram_end_address; + u64 sram_user_base_address; + u64 dram_base_address; + u64 dram_end_address; + u64 dram_user_base_address; + u64 dram_size; + u64 dram_pci_bar_size; + u64 host_phys_base_address; + u64 va_space_host_start_address; + u64 va_space_host_end_address; + u64 va_space_dram_start_address; + u64 va_space_dram_end_address; + u32 cfg_size; + u32 sram_size; + u32 max_asid; + u32 high_pll; + u8 completion_queues_count; + u8 tpc_enabled_mask; +}; + + +#define HL_QUEUE_LENGTH 256 /* * ASICs */ @@ -33,6 +86,36 @@ enum hl_asic_type { ASIC_INVALID }; +/** + * struct hl_asic_funcs - ASIC specific functions that are can be called from + * common code. + * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W. + * @early_fini: tears down what was done in early_init. + * @sw_init: sets up driver state, does not configure H/W. + * @sw_fini: tears down driver state, does not configure H/W. + * @suspend: handles IP specific H/W or SW changes for suspend. + * @resume: handles IP specific H/W or SW changes for resume. + * @dma_alloc_coherent: Allocate coherent DMA memory by calling + * dma_alloc_coherent(). This is ASIC function because its + * implementation is not trivial when the driver is loaded + * in simulation mode (not upstreamed). + * @dma_free_coherent: Free coherent DMA memory by calling dma_free_coherent(). + * This is ASIC function because its implementation is not + * trivial when the driver is loaded in simulation mode + * (not upstreamed). + */ +struct hl_asic_funcs { + int (*early_init)(struct hl_device *hdev); + int (*early_fini)(struct hl_device *hdev); + int (*sw_init)(struct hl_device *hdev); + int (*sw_fini)(struct hl_device *hdev); + int (*suspend)(struct hl_device *hdev); + int (*resume)(struct hl_device *hdev); + void* (*dma_alloc_coherent)(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle, gfp_t flag); + void (*dma_free_coherent)(struct hl_device *hdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle); +}; /* * FILE PRIVATE STRUCTURE @@ -62,26 +145,78 @@ struct hl_fpriv { */ #define HL_MAX_MINORS 256 +/* + * Registers read & write functions. + */ + +u32 hl_rreg(struct hl_device *hdev, u32 reg); +void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); + +#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \ + readl_poll_timeout(hdev->rmmio + addr, val, cond, sleep_us, timeout_us) + +#define RREG32(reg) hl_rreg(hdev, (reg)) +#define WREG32(reg, v) hl_wreg(hdev, (reg), (v)) +#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \ + hl_rreg(hdev, (reg))) + +#define WREG32_P(reg, val, mask) \ + do { \ + u32 tmp_ = RREG32(reg); \ + tmp_ &= (mask); \ + tmp_ |= ((val) & ~(mask)); \ + WREG32(reg, tmp_); \ + } while (0) +#define WREG32_AND(reg, and) WREG32_P(reg, 0, and) +#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or)) + +#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT +#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK +#define WREG32_FIELD(reg, field, val) \ + WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \ + (val) << REG_FIELD_SHIFT(reg, field)) + /** * struct hl_device - habanalabs device structure. * @pdev: pointer to PCI device, can be NULL in case of simulator device. + * @pcie_bar: array of available PCIe bars. + * @rmmio: configuration area address on SRAM. * @cdev: related char device. * @dev: realted kernel basic device structure. * @asic_name: ASIC specific nmae. * @asic_type: ASIC specific type. + * @dma_pool: DMA pool for small allocations. + * @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address. + * @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address. + * @cpu_accessible_dma_pool: KMD <-> ArmCP shared memory pool. + * @asic_prop: ASIC specific immutable properties. + * @asic_funcs: ASIC specific functions. + * @asic_specific: ASIC specific information to use only from ASIC files. * @major: habanalabs KMD major. * @id: device minor. * @disabled: is device disabled. */ struct hl_device { struct pci_dev *pdev; + void __iomem *pcie_bar[6]; + void __iomem *rmmio; struct cdev cdev; struct device *dev; char asic_name[16]; enum hl_asic_type asic_type; + struct dma_pool *dma_pool; + void *cpu_accessible_dma_mem; + dma_addr_t cpu_accessible_dma_address; + struct gen_pool *cpu_accessible_dma_pool; + struct asic_fixed_properties asic_prop; + const struct hl_asic_funcs *asic_funcs; + void *asic_specific; u32 major; u16 id; u8 disabled; + + /* Parameters for bring-up */ + u8 reset_pcilink; }; @@ -128,4 +263,6 @@ void hl_device_fini(struct hl_device *hdev); int hl_device_suspend(struct hl_device *hdev); int hl_device_resume(struct hl_device *hdev); +void goya_set_asic_funcs(struct hl_device *hdev); + #endif /* HABANALABSP_H_ */ diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c index a5251ed277d1..edf626b2b7b1 100644 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ b/drivers/misc/habanalabs/habanalabs_drv.c @@ -122,6 +122,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->major = hl_major; + /* Parameters for bring-up - set them to defaults */ + hdev->reset_pcilink = 0; + hdev->disabled = true; hdev->pdev = pdev; /* can be NULL in case of simulator device */ diff --git a/drivers/misc/habanalabs/include/goya/goya.h b/drivers/misc/habanalabs/include/goya/goya.h new file mode 100644 index 000000000000..614149efa412 --- /dev/null +++ b/drivers/misc/habanalabs/include/goya/goya.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef GOYA_H +#define GOYA_H + +#include "asic_reg/goya_regs.h" + +#include + +#define SRAM_CFG_BAR_ID 0 +#define MSIX_BAR_ID 2 +#define DDR_BAR_ID 4 + +#define CFG_BAR_SIZE 0x10000000ull /* 256MB */ +#define MSIX_BAR_SIZE 0x1000ull /* 4KB */ + +#define CFG_BASE 0x7FFC000000ull +#define CFG_SIZE 0x4000000 /* 32MB CFG + 32MB DBG*/ + +#define SRAM_BASE_ADDR 0x7FF0000000ull +#define SRAM_SIZE 0x32A0000 /* 50.625MB */ + +#define DRAM_PHYS_BASE 0x0ull + +#define HOST_PHYS_BASE 0x8000000000ull /* 0.5TB */ +#define HOST_PHYS_SIZE 0x1000000000000ull /* 0.25PB (48 bits) */ + +#define GOYA_MSIX_ENTRIES 8 + +#define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */ + +#define MAX_ASID 1024 + +#define PROT_BITS_OFFS 0xF80 + +#define DMA_MAX_NUM 5 + +#define TPC_MAX_NUM 8 + +#endif /* GOYA_H */ diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h new file mode 100644 index 000000000000..a0ec23adf8f5 --- /dev/null +++ b/include/uapi/misc/habanalabs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + * + * Copyright 2016-2018 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef HABANALABS_H_ +#define HABANALABS_H_ + +#include +#include + +/* + * Defines that are asic-specific but constitutes as ABI between kernel driver + * and userspace + */ +#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ + +#endif /* HABANALABS_H_ */