habanalabs: add basic Goya support
This patch adds a basic support for the Goya device. The code initializes the device's PCI controller and PCI bars. It also initializes various S/W structures and adds some basic helper functions. Reviewed-by: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
1ea2a20e91
commit
99b9d7b497
|
@ -5,3 +5,6 @@
|
||||||
obj-m := habanalabs.o
|
obj-m := habanalabs.o
|
||||||
|
|
||||||
habanalabs-y := habanalabs_drv.o device.o
|
habanalabs-y := habanalabs_drv.o device.o
|
||||||
|
|
||||||
|
include $(src)/goya/Makefile
|
||||||
|
habanalabs-y += $(HL_GOYA_FILES)
|
||||||
|
|
|
@ -120,8 +120,11 @@ err_cdev_add:
|
||||||
*/
|
*/
|
||||||
static int device_early_init(struct hl_device *hdev)
|
static int device_early_init(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
|
int rc;
|
||||||
|
|
||||||
switch (hdev->asic_type) {
|
switch (hdev->asic_type) {
|
||||||
case ASIC_GOYA:
|
case ASIC_GOYA:
|
||||||
|
goya_set_asic_funcs(hdev);
|
||||||
strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
|
strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -130,6 +133,10 @@ static int device_early_init(struct hl_device *hdev)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rc = hdev->asic_funcs->early_init(hdev);
|
||||||
|
if (rc)
|
||||||
|
return rc;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,6 +148,10 @@ static int device_early_init(struct hl_device *hdev)
|
||||||
*/
|
*/
|
||||||
static void device_early_fini(struct hl_device *hdev)
|
static void device_early_fini(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
if (hdev->asic_funcs->early_fini)
|
||||||
|
hdev->asic_funcs->early_fini(hdev);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -154,8 +165,15 @@ static void device_early_fini(struct hl_device *hdev)
|
||||||
*/
|
*/
|
||||||
int hl_device_suspend(struct hl_device *hdev)
|
int hl_device_suspend(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
|
int rc;
|
||||||
|
|
||||||
pci_save_state(hdev->pdev);
|
pci_save_state(hdev->pdev);
|
||||||
|
|
||||||
|
rc = hdev->asic_funcs->suspend(hdev);
|
||||||
|
if (rc)
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Failed to disable PCI access of device CPU\n");
|
||||||
|
|
||||||
/* Shut down the device */
|
/* Shut down the device */
|
||||||
pci_disable_device(hdev->pdev);
|
pci_disable_device(hdev->pdev);
|
||||||
pci_set_power_state(hdev->pdev, PCI_D3hot);
|
pci_set_power_state(hdev->pdev, PCI_D3hot);
|
||||||
|
@ -185,6 +203,13 @@ int hl_device_resume(struct hl_device *hdev)
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rc = hdev->asic_funcs->resume(hdev);
|
||||||
|
if (rc) {
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Failed to enable PCI access from device CPU\n");
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -212,11 +237,21 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
||||||
if (rc)
|
if (rc)
|
||||||
goto release_device;
|
goto release_device;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start calling ASIC initialization. First S/W then H/W and finally
|
||||||
|
* late init
|
||||||
|
*/
|
||||||
|
rc = hdev->asic_funcs->sw_init(hdev);
|
||||||
|
if (rc)
|
||||||
|
goto early_fini;
|
||||||
|
|
||||||
dev_notice(hdev->dev,
|
dev_notice(hdev->dev,
|
||||||
"Successfully added device to habanalabs driver\n");
|
"Successfully added device to habanalabs driver\n");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
early_fini:
|
||||||
|
device_early_fini(hdev);
|
||||||
release_device:
|
release_device:
|
||||||
device_destroy(hclass, hdev->dev->devt);
|
device_destroy(hclass, hdev->dev->devt);
|
||||||
cdev_del(&hdev->cdev);
|
cdev_del(&hdev->cdev);
|
||||||
|
@ -247,6 +282,9 @@ void hl_device_fini(struct hl_device *hdev)
|
||||||
/* Mark device as disabled */
|
/* Mark device as disabled */
|
||||||
hdev->disabled = true;
|
hdev->disabled = true;
|
||||||
|
|
||||||
|
/* Call ASIC S/W finalize function */
|
||||||
|
hdev->asic_funcs->sw_fini(hdev);
|
||||||
|
|
||||||
device_early_fini(hdev);
|
device_early_fini(hdev);
|
||||||
|
|
||||||
/* Hide device from user */
|
/* Hide device from user */
|
||||||
|
@ -338,3 +376,36 @@ int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
|
||||||
|
|
||||||
return *val ? 0 : -ETIMEDOUT;
|
return *val ? 0 : -ETIMEDOUT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MMIO register access helper functions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* hl_rreg - Read an MMIO register
|
||||||
|
*
|
||||||
|
* @hdev: pointer to habanalabs device structure
|
||||||
|
* @reg: MMIO register offset (in bytes)
|
||||||
|
*
|
||||||
|
* Returns the value of the MMIO register we are asked to read
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
|
||||||
|
{
|
||||||
|
return readl(hdev->rmmio + reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* hl_wreg - Write to an MMIO register
|
||||||
|
*
|
||||||
|
* @hdev: pointer to habanalabs device structure
|
||||||
|
* @reg: MMIO register offset (in bytes)
|
||||||
|
* @val: 32-bit value
|
||||||
|
*
|
||||||
|
* Writes the 32-bit value into the MMIO register
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
|
||||||
|
{
|
||||||
|
writel(val, hdev->rmmio + reg);
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
subdir-ccflags-y += -I$(src)
|
||||||
|
|
||||||
|
HL_GOYA_FILES := goya/goya.o
|
|
@ -0,0 +1,632 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "goyaP.h"
|
||||||
|
#include "include/goya/asic_reg/goya_masks.h"
|
||||||
|
|
||||||
|
#include <linux/pci.h>
|
||||||
|
#include <linux/delay.h>
|
||||||
|
#include <linux/genalloc.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GOYA security scheme:
|
||||||
|
*
|
||||||
|
* 1. Host is protected by:
|
||||||
|
* - Range registers (When MMU is enabled, DMA RR does NOT protect host)
|
||||||
|
* - MMU
|
||||||
|
*
|
||||||
|
* 2. DRAM is protected by:
|
||||||
|
* - Range registers (protect the first 512MB)
|
||||||
|
* - MMU (isolation between users)
|
||||||
|
*
|
||||||
|
* 3. Configuration is protected by:
|
||||||
|
* - Range registers
|
||||||
|
* - Protection bits
|
||||||
|
*
|
||||||
|
* When MMU is disabled:
|
||||||
|
*
|
||||||
|
* QMAN DMA: PQ, CQ, CP, DMA are secured.
|
||||||
|
* PQ, CB and the data are on the host.
|
||||||
|
*
|
||||||
|
* QMAN TPC/MME:
|
||||||
|
* PQ, CQ and CP are not secured.
|
||||||
|
* PQ, CB and the data are on the SRAM/DRAM.
|
||||||
|
*
|
||||||
|
* Since QMAN DMA is secured, KMD is parsing the DMA CB:
|
||||||
|
* - KMD checks DMA pointer
|
||||||
|
* - WREG, MSG_PROT are not allowed.
|
||||||
|
* - MSG_LONG/SHORT are allowed.
|
||||||
|
*
|
||||||
|
* A read/write transaction by the QMAN to a protected area will succeed if
|
||||||
|
* and only if the QMAN's CP is secured and MSG_PROT is used
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* When MMU is enabled:
|
||||||
|
*
|
||||||
|
* QMAN DMA: PQ, CQ and CP are secured.
|
||||||
|
* MMU is set to bypass on the Secure props register of the QMAN.
|
||||||
|
* The reasons we don't enable MMU for PQ, CQ and CP are:
|
||||||
|
* - PQ entry is in kernel address space and KMD doesn't map it.
|
||||||
|
* - CP writes to MSIX register and to kernel address space (completion
|
||||||
|
* queue).
|
||||||
|
*
|
||||||
|
* DMA is not secured but because CP is secured, KMD still needs to parse the
|
||||||
|
* CB, but doesn't need to check the DMA addresses.
|
||||||
|
*
|
||||||
|
* For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD
|
||||||
|
* doesn't map memory in MMU.
|
||||||
|
*
|
||||||
|
* QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
|
||||||
|
*
|
||||||
|
* DMA RR does NOT protect host because DMA is not secured
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define GOYA_MMU_REGS_NUM 61
|
||||||
|
|
||||||
|
#define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
|
||||||
|
|
||||||
|
#define GOYA_RESET_TIMEOUT_MSEC 500 /* 500ms */
|
||||||
|
#define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000 /* 20s */
|
||||||
|
#define GOYA_RESET_WAIT_MSEC 1 /* 1ms */
|
||||||
|
#define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */
|
||||||
|
#define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
|
||||||
|
#define GOYA_CPU_TIMEOUT_USEC 10000000 /* 10s */
|
||||||
|
#define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
|
||||||
|
|
||||||
|
#define GOYA_QMAN0_FENCE_VAL 0xD169B243
|
||||||
|
|
||||||
|
#define GOYA_MAX_INITIATORS 20
|
||||||
|
|
||||||
|
static void goya_get_fixed_properties(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||||
|
|
||||||
|
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
|
||||||
|
|
||||||
|
prop->dram_base_address = DRAM_PHYS_BASE;
|
||||||
|
prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
|
||||||
|
prop->dram_end_address = prop->dram_base_address + prop->dram_size;
|
||||||
|
prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
|
||||||
|
|
||||||
|
prop->sram_base_address = SRAM_BASE_ADDR;
|
||||||
|
prop->sram_size = SRAM_SIZE;
|
||||||
|
prop->sram_end_address = prop->sram_base_address + prop->sram_size;
|
||||||
|
prop->sram_user_base_address = prop->sram_base_address +
|
||||||
|
SRAM_USER_BASE_OFFSET;
|
||||||
|
|
||||||
|
prop->host_phys_base_address = HOST_PHYS_BASE;
|
||||||
|
prop->va_space_host_start_address = VA_HOST_SPACE_START;
|
||||||
|
prop->va_space_host_end_address = VA_HOST_SPACE_END;
|
||||||
|
prop->va_space_dram_start_address = VA_DDR_SPACE_START;
|
||||||
|
prop->va_space_dram_end_address = VA_DDR_SPACE_END;
|
||||||
|
prop->cfg_size = CFG_SIZE;
|
||||||
|
prop->max_asid = MAX_ASID;
|
||||||
|
prop->tpc_enabled_mask = TPC_ENABLED_MASK;
|
||||||
|
|
||||||
|
prop->high_pll = PLL_HIGH_DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goya_pci_bars_map - Map PCI BARS of Goya device
|
||||||
|
*
|
||||||
|
* @hdev: pointer to hl_device structure
|
||||||
|
*
|
||||||
|
* Request PCI regions and map them to kernel virtual addresses.
|
||||||
|
* Returns 0 on success
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int goya_pci_bars_map(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
struct pci_dev *pdev = hdev->pdev;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
rc = pci_request_regions(pdev, HL_NAME);
|
||||||
|
if (rc) {
|
||||||
|
dev_err(hdev->dev, "Cannot obtain PCI resources\n");
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
hdev->pcie_bar[SRAM_CFG_BAR_ID] =
|
||||||
|
pci_ioremap_bar(pdev, SRAM_CFG_BAR_ID);
|
||||||
|
if (!hdev->pcie_bar[SRAM_CFG_BAR_ID]) {
|
||||||
|
dev_err(hdev->dev, "pci_ioremap_bar failed for CFG\n");
|
||||||
|
rc = -ENODEV;
|
||||||
|
goto err_release_regions;
|
||||||
|
}
|
||||||
|
|
||||||
|
hdev->pcie_bar[MSIX_BAR_ID] = pci_ioremap_bar(pdev, MSIX_BAR_ID);
|
||||||
|
if (!hdev->pcie_bar[MSIX_BAR_ID]) {
|
||||||
|
dev_err(hdev->dev, "pci_ioremap_bar failed for MSIX\n");
|
||||||
|
rc = -ENODEV;
|
||||||
|
goto err_unmap_sram_cfg;
|
||||||
|
}
|
||||||
|
|
||||||
|
hdev->pcie_bar[DDR_BAR_ID] = pci_ioremap_wc_bar(pdev, DDR_BAR_ID);
|
||||||
|
if (!hdev->pcie_bar[DDR_BAR_ID]) {
|
||||||
|
dev_err(hdev->dev, "pci_ioremap_bar failed for DDR\n");
|
||||||
|
rc = -ENODEV;
|
||||||
|
goto err_unmap_msix;
|
||||||
|
}
|
||||||
|
|
||||||
|
hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
|
||||||
|
(CFG_BASE - SRAM_BASE_ADDR);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
err_unmap_msix:
|
||||||
|
iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
|
||||||
|
err_unmap_sram_cfg:
|
||||||
|
iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
|
||||||
|
err_release_regions:
|
||||||
|
pci_release_regions(pdev);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goya_pci_bars_unmap - Unmap PCI BARS of Goya device
|
||||||
|
*
|
||||||
|
* @hdev: pointer to hl_device structure
|
||||||
|
*
|
||||||
|
* Release all PCI BARS and unmap their virtual addresses
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static void goya_pci_bars_unmap(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
struct pci_dev *pdev = hdev->pdev;
|
||||||
|
|
||||||
|
iounmap(hdev->pcie_bar[DDR_BAR_ID]);
|
||||||
|
iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
|
||||||
|
iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
|
||||||
|
pci_release_regions(pdev);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goya_elbi_write - Write through the ELBI interface
|
||||||
|
*
|
||||||
|
* @hdev: pointer to hl_device structure
|
||||||
|
*
|
||||||
|
* return 0 on success, -1 on failure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static int goya_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
|
||||||
|
{
|
||||||
|
struct pci_dev *pdev = hdev->pdev;
|
||||||
|
ktime_t timeout;
|
||||||
|
u32 val;
|
||||||
|
|
||||||
|
/* Clear previous status */
|
||||||
|
pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
|
||||||
|
|
||||||
|
pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
|
||||||
|
pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
|
||||||
|
pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
|
||||||
|
PCI_CONFIG_ELBI_CTRL_WRITE);
|
||||||
|
|
||||||
|
timeout = ktime_add_ms(ktime_get(), 10);
|
||||||
|
for (;;) {
|
||||||
|
pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
|
||||||
|
if (val & PCI_CONFIG_ELBI_STS_MASK)
|
||||||
|
break;
|
||||||
|
if (ktime_compare(ktime_get(), timeout) > 0) {
|
||||||
|
pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
|
||||||
|
&val);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
usleep_range(300, 500);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (val & PCI_CONFIG_ELBI_STS_ERR) {
|
||||||
|
dev_err(hdev->dev, "Error writing to ELBI\n");
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
|
||||||
|
dev_err(hdev->dev, "ELBI write didn't finish in time\n");
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
dev_err(hdev->dev, "ELBI write has undefined bits in status\n");
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goya_iatu_write - iatu write routine
|
||||||
|
*
|
||||||
|
* @hdev: pointer to hl_device structure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static int goya_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
|
||||||
|
{
|
||||||
|
u32 dbi_offset;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
dbi_offset = addr & 0xFFF;
|
||||||
|
|
||||||
|
rc = goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0x00300000);
|
||||||
|
rc |= goya_elbi_write(hdev, mmPCIE_DBI_BASE + dbi_offset, data);
|
||||||
|
|
||||||
|
if (rc)
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void goya_reset_link_through_bridge(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
struct pci_dev *pdev = hdev->pdev;
|
||||||
|
struct pci_dev *parent_port;
|
||||||
|
u16 val;
|
||||||
|
|
||||||
|
parent_port = pdev->bus->self;
|
||||||
|
pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val);
|
||||||
|
val |= PCI_BRIDGE_CTL_BUS_RESET;
|
||||||
|
pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
|
||||||
|
ssleep(1);
|
||||||
|
|
||||||
|
val &= ~(PCI_BRIDGE_CTL_BUS_RESET);
|
||||||
|
pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
|
||||||
|
ssleep(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goya_set_ddr_bar_base - set DDR bar to map specific device address
|
||||||
|
*
|
||||||
|
* @hdev: pointer to hl_device structure
|
||||||
|
* @addr: address in DDR. Must be aligned to DDR bar size
|
||||||
|
*
|
||||||
|
* This function configures the iATU so that the DDR bar will start at the
|
||||||
|
* specified addr.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
|
||||||
|
{
|
||||||
|
struct goya_device *goya = hdev->asic_specific;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
if ((goya) && (goya->ddr_bar_cur_addr == addr))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Inbound Region 1 - Bar 4 - Point to DDR */
|
||||||
|
rc = goya_iatu_write(hdev, 0x314, lower_32_bits(addr));
|
||||||
|
rc |= goya_iatu_write(hdev, 0x318, upper_32_bits(addr));
|
||||||
|
rc |= goya_iatu_write(hdev, 0x300, 0);
|
||||||
|
/* Enable + Bar match + match enable + Bar 4 */
|
||||||
|
rc |= goya_iatu_write(hdev, 0x304, 0xC0080400);
|
||||||
|
|
||||||
|
/* Return the DBI window to the default location */
|
||||||
|
rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
|
||||||
|
rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);
|
||||||
|
|
||||||
|
if (rc) {
|
||||||
|
dev_err(hdev->dev, "failed to map DDR bar to 0x%08llx\n", addr);
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (goya)
|
||||||
|
goya->ddr_bar_cur_addr = addr;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goya_init_iatu - Initialize the iATU unit inside the PCI controller
|
||||||
|
*
|
||||||
|
* @hdev: pointer to hl_device structure
|
||||||
|
*
|
||||||
|
* This is needed in case the firmware doesn't initialize the iATU
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static int goya_init_iatu(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* Inbound Region 0 - Bar 0 - Point to SRAM_BASE_ADDR */
|
||||||
|
rc = goya_iatu_write(hdev, 0x114, lower_32_bits(SRAM_BASE_ADDR));
|
||||||
|
rc |= goya_iatu_write(hdev, 0x118, upper_32_bits(SRAM_BASE_ADDR));
|
||||||
|
rc |= goya_iatu_write(hdev, 0x100, 0);
|
||||||
|
/* Enable + Bar match + match enable */
|
||||||
|
rc |= goya_iatu_write(hdev, 0x104, 0xC0080000);
|
||||||
|
|
||||||
|
/* Inbound Region 1 - Bar 4 - Point to DDR */
|
||||||
|
rc |= goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
|
||||||
|
|
||||||
|
/* Outbound Region 0 - Point to Host */
|
||||||
|
rc |= goya_iatu_write(hdev, 0x008, lower_32_bits(HOST_PHYS_BASE));
|
||||||
|
rc |= goya_iatu_write(hdev, 0x00C, upper_32_bits(HOST_PHYS_BASE));
|
||||||
|
rc |= goya_iatu_write(hdev, 0x010,
|
||||||
|
lower_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
|
||||||
|
rc |= goya_iatu_write(hdev, 0x014, 0);
|
||||||
|
rc |= goya_iatu_write(hdev, 0x018, 0);
|
||||||
|
rc |= goya_iatu_write(hdev, 0x020,
|
||||||
|
upper_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
|
||||||
|
/* Increase region size */
|
||||||
|
rc |= goya_iatu_write(hdev, 0x000, 0x00002000);
|
||||||
|
/* Enable */
|
||||||
|
rc |= goya_iatu_write(hdev, 0x004, 0x80000000);
|
||||||
|
|
||||||
|
/* Return the DBI window to the default location */
|
||||||
|
rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
|
||||||
|
rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);
|
||||||
|
|
||||||
|
if (rc)
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goya_early_init - GOYA early initialization code
|
||||||
|
*
|
||||||
|
* @hdev: pointer to hl_device structure
|
||||||
|
*
|
||||||
|
* Verify PCI bars
|
||||||
|
* Set DMA masks
|
||||||
|
* PCI controller initialization
|
||||||
|
* Map PCI bars
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static int goya_early_init(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||||
|
struct pci_dev *pdev = hdev->pdev;
|
||||||
|
u32 val;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
goya_get_fixed_properties(hdev);
|
||||||
|
|
||||||
|
/* Check BAR sizes */
|
||||||
|
if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
|
||||||
|
SRAM_CFG_BAR_ID,
|
||||||
|
(unsigned long long) pci_resource_len(pdev,
|
||||||
|
SRAM_CFG_BAR_ID),
|
||||||
|
CFG_BAR_SIZE);
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
|
||||||
|
MSIX_BAR_ID,
|
||||||
|
(unsigned long long) pci_resource_len(pdev,
|
||||||
|
MSIX_BAR_ID),
|
||||||
|
MSIX_BAR_SIZE);
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
|
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
|
||||||
|
|
||||||
|
/* set DMA mask for GOYA */
|
||||||
|
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
|
||||||
|
if (rc) {
|
||||||
|
dev_warn(hdev->dev, "Unable to set pci dma mask to 39 bits\n");
|
||||||
|
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||||
|
if (rc) {
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Unable to set pci dma mask to 32 bits\n");
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
|
||||||
|
if (rc) {
|
||||||
|
dev_warn(hdev->dev,
|
||||||
|
"Unable to set pci consistent dma mask to 39 bits\n");
|
||||||
|
rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||||
|
if (rc) {
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Unable to set pci consistent dma mask to 32 bits\n");
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hdev->reset_pcilink)
|
||||||
|
goya_reset_link_through_bridge(hdev);
|
||||||
|
|
||||||
|
rc = pci_enable_device_mem(pdev);
|
||||||
|
if (rc) {
|
||||||
|
dev_err(hdev->dev, "can't enable PCI device\n");
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
pci_set_master(pdev);
|
||||||
|
|
||||||
|
rc = goya_init_iatu(hdev);
|
||||||
|
if (rc) {
|
||||||
|
dev_err(hdev->dev, "Failed to initialize iATU\n");
|
||||||
|
goto disable_device;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = goya_pci_bars_map(hdev);
|
||||||
|
if (rc) {
|
||||||
|
dev_err(hdev->dev, "Failed to initialize PCI BARS\n");
|
||||||
|
goto disable_device;
|
||||||
|
}
|
||||||
|
|
||||||
|
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
|
||||||
|
if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
|
||||||
|
dev_warn(hdev->dev,
|
||||||
|
"PCI strap is not configured correctly, PCI bus errors may occur\n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
disable_device:
|
||||||
|
pci_clear_master(pdev);
|
||||||
|
pci_disable_device(pdev);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goya_early_fini - GOYA early finalization code
|
||||||
|
*
|
||||||
|
* @hdev: pointer to hl_device structure
|
||||||
|
*
|
||||||
|
* Unmap PCI bars
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int goya_early_fini(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
goya_pci_bars_unmap(hdev);
|
||||||
|
|
||||||
|
pci_clear_master(hdev->pdev);
|
||||||
|
pci_disable_device(hdev->pdev);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goya_sw_init - Goya software initialization code
|
||||||
|
*
|
||||||
|
* @hdev: pointer to hl_device structure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static int goya_sw_init(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
struct goya_device *goya;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* Allocate device structure */
|
||||||
|
goya = kzalloc(sizeof(*goya), GFP_KERNEL);
|
||||||
|
if (!goya)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
/* according to goya_init_iatu */
|
||||||
|
goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
|
||||||
|
hdev->asic_specific = goya;
|
||||||
|
|
||||||
|
/* Create DMA pool for small allocations */
|
||||||
|
hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
|
||||||
|
&hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
|
||||||
|
if (!hdev->dma_pool) {
|
||||||
|
dev_err(hdev->dev, "failed to create DMA pool\n");
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto free_goya_device;
|
||||||
|
}
|
||||||
|
|
||||||
|
hdev->cpu_accessible_dma_mem =
|
||||||
|
hdev->asic_funcs->dma_alloc_coherent(hdev,
|
||||||
|
CPU_ACCESSIBLE_MEM_SIZE,
|
||||||
|
&hdev->cpu_accessible_dma_address,
|
||||||
|
GFP_KERNEL | __GFP_ZERO);
|
||||||
|
|
||||||
|
if (!hdev->cpu_accessible_dma_mem) {
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"failed to allocate %d of dma memory for CPU accessible memory space\n",
|
||||||
|
CPU_ACCESSIBLE_MEM_SIZE);
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto free_dma_pool;
|
||||||
|
}
|
||||||
|
|
||||||
|
hdev->cpu_accessible_dma_pool = gen_pool_create(CPU_PKT_SHIFT, -1);
|
||||||
|
if (!hdev->cpu_accessible_dma_pool) {
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Failed to create CPU accessible DMA pool\n");
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto free_cpu_pq_dma_mem;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
|
||||||
|
(uintptr_t) hdev->cpu_accessible_dma_mem,
|
||||||
|
CPU_ACCESSIBLE_MEM_SIZE, -1);
|
||||||
|
if (rc) {
|
||||||
|
dev_err(hdev->dev,
|
||||||
|
"Failed to add memory to CPU accessible DMA pool\n");
|
||||||
|
rc = -EFAULT;
|
||||||
|
goto free_cpu_pq_pool;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_lock_init(&goya->hw_queues_lock);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
free_cpu_pq_pool:
|
||||||
|
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
|
||||||
|
free_cpu_pq_dma_mem:
|
||||||
|
hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
|
||||||
|
hdev->cpu_accessible_dma_mem,
|
||||||
|
hdev->cpu_accessible_dma_address);
|
||||||
|
free_dma_pool:
|
||||||
|
dma_pool_destroy(hdev->dma_pool);
|
||||||
|
free_goya_device:
|
||||||
|
kfree(goya);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goya_sw_fini - Goya software tear-down code
|
||||||
|
*
|
||||||
|
* @hdev: pointer to hl_device structure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int goya_sw_fini(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
struct goya_device *goya = hdev->asic_specific;
|
||||||
|
|
||||||
|
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
|
||||||
|
|
||||||
|
hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
|
||||||
|
hdev->cpu_accessible_dma_mem,
|
||||||
|
hdev->cpu_accessible_dma_address);
|
||||||
|
|
||||||
|
dma_pool_destroy(hdev->dma_pool);
|
||||||
|
|
||||||
|
kfree(goya);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int goya_suspend(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int goya_resume(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
|
||||||
|
dma_addr_t *dma_handle, gfp_t flags)
|
||||||
|
{
|
||||||
|
return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
void goya_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
|
||||||
|
dma_addr_t dma_handle)
|
||||||
|
{
|
||||||
|
dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct hl_asic_funcs goya_funcs = {
|
||||||
|
.early_init = goya_early_init,
|
||||||
|
.early_fini = goya_early_fini,
|
||||||
|
.sw_init = goya_sw_init,
|
||||||
|
.sw_fini = goya_sw_fini,
|
||||||
|
.suspend = goya_suspend,
|
||||||
|
.resume = goya_resume,
|
||||||
|
.dma_alloc_coherent = goya_dma_alloc_coherent,
|
||||||
|
.dma_free_coherent = goya_dma_free_coherent,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goya_set_asic_funcs - set Goya function pointers
|
||||||
|
*
|
||||||
|
* @*hdev: pointer to hl_device structure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void goya_set_asic_funcs(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
hdev->asic_funcs = &goya_funcs;
|
||||||
|
}
|
|
@ -0,0 +1,152 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0
|
||||||
|
*
|
||||||
|
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef GOYAP_H_
|
||||||
|
#define GOYAP_H_
|
||||||
|
|
||||||
|
#include <uapi/misc/habanalabs.h>
|
||||||
|
#include "habanalabs.h"
|
||||||
|
#include "include/goya/goya.h"
|
||||||
|
|
||||||
|
#define NUMBER_OF_CMPLT_QUEUES 5
|
||||||
|
#define NUMBER_OF_EXT_HW_QUEUES 5
|
||||||
|
#define NUMBER_OF_CPU_HW_QUEUES 1
|
||||||
|
#define NUMBER_OF_INT_HW_QUEUES 9
|
||||||
|
#define NUMBER_OF_HW_QUEUES (NUMBER_OF_EXT_HW_QUEUES + \
|
||||||
|
NUMBER_OF_CPU_HW_QUEUES + \
|
||||||
|
NUMBER_OF_INT_HW_QUEUES)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of MSIX interrupts IDS:
|
||||||
|
* Each completion queue has 1 ID
|
||||||
|
* The event queue has 1 ID
|
||||||
|
*/
|
||||||
|
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + 1)
|
||||||
|
|
||||||
|
#if (NUMBER_OF_HW_QUEUES >= HL_MAX_QUEUES)
|
||||||
|
#error "Number of H/W queues must be smaller than HL_MAX_QUEUES"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (NUMBER_OF_INTERRUPTS > GOYA_MSIX_ENTRIES)
|
||||||
|
#error "Number of MSIX interrupts must be smaller or equal to GOYA_MSIX_ENTRIES"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define QMAN_FENCE_TIMEOUT_USEC 10000 /* 10 ms */
|
||||||
|
|
||||||
|
#define QMAN_STOP_TIMEOUT_USEC 100000 /* 100 ms */
|
||||||
|
|
||||||
|
#define TPC_ENABLED_MASK 0xFF
|
||||||
|
|
||||||
|
#define PLL_HIGH_DEFAULT 1575000000 /* 1.575 GHz */
|
||||||
|
|
||||||
|
#define GOYA_ARMCP_INFO_TIMEOUT 10000000 /* 10s */
|
||||||
|
|
||||||
|
#define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */
|
||||||
|
|
||||||
|
/* DRAM Memory Map */
|
||||||
|
|
||||||
|
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
|
||||||
|
#define MMU_PAGE_TABLES_SIZE 0x0E000000 /* 224MB */
|
||||||
|
#define MMU_CACHE_MNG_SIZE 0x00001000 /* 4KB */
|
||||||
|
#define CPU_PQ_PKT_SIZE 0x00001000 /* 4KB */
|
||||||
|
#define CPU_PQ_DATA_SIZE 0x01FFE000 /* 32MB - 8KB */
|
||||||
|
|
||||||
|
#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE
|
||||||
|
#define MMU_PAGE_TABLES_ADDR (CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE)
|
||||||
|
#define MMU_CACHE_MNG_ADDR (MMU_PAGE_TABLES_ADDR + MMU_PAGE_TABLES_SIZE)
|
||||||
|
#define CPU_PQ_PKT_ADDR (MMU_CACHE_MNG_ADDR + MMU_CACHE_MNG_SIZE)
|
||||||
|
#define CPU_PQ_DATA_ADDR (CPU_PQ_PKT_ADDR + CPU_PQ_PKT_SIZE)
|
||||||
|
#define DRAM_BASE_ADDR_USER (CPU_PQ_DATA_ADDR + CPU_PQ_DATA_SIZE)
|
||||||
|
|
||||||
|
#if (DRAM_BASE_ADDR_USER != 0x20000000)
|
||||||
|
#error "KMD must reserve 512MB"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SRAM Memory Map for KMD
|
||||||
|
*
|
||||||
|
* KMD occupies KMD_SRAM_SIZE bytes from the start of SRAM. It is used for
|
||||||
|
* MME/TPC QMANs
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define MME_QMAN_BASE_OFFSET 0x000000 /* Must be 0 */
|
||||||
|
#define MME_QMAN_LENGTH 64
|
||||||
|
#define TPC_QMAN_LENGTH 64
|
||||||
|
|
||||||
|
#define TPC0_QMAN_BASE_OFFSET (MME_QMAN_BASE_OFFSET + \
|
||||||
|
(MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
|
||||||
|
#define TPC1_QMAN_BASE_OFFSET (TPC0_QMAN_BASE_OFFSET + \
|
||||||
|
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
|
||||||
|
#define TPC2_QMAN_BASE_OFFSET (TPC1_QMAN_BASE_OFFSET + \
|
||||||
|
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
|
||||||
|
#define TPC3_QMAN_BASE_OFFSET (TPC2_QMAN_BASE_OFFSET + \
|
||||||
|
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
|
||||||
|
#define TPC4_QMAN_BASE_OFFSET (TPC3_QMAN_BASE_OFFSET + \
|
||||||
|
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
|
||||||
|
#define TPC5_QMAN_BASE_OFFSET (TPC4_QMAN_BASE_OFFSET + \
|
||||||
|
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
|
||||||
|
#define TPC6_QMAN_BASE_OFFSET (TPC5_QMAN_BASE_OFFSET + \
|
||||||
|
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
|
||||||
|
#define TPC7_QMAN_BASE_OFFSET (TPC6_QMAN_BASE_OFFSET + \
|
||||||
|
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
|
||||||
|
|
||||||
|
#define SRAM_KMD_RES_OFFSET (TPC7_QMAN_BASE_OFFSET + \
|
||||||
|
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
|
||||||
|
|
||||||
|
#if (SRAM_KMD_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START)
|
||||||
|
#error "MME/TPC QMANs SRAM space exceeds limit"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define SRAM_USER_BASE_OFFSET GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START
|
||||||
|
|
||||||
|
/* Virtual address space */
|
||||||
|
#define VA_HOST_SPACE_START 0x1000000000000ull /* 256TB */
|
||||||
|
#define VA_HOST_SPACE_END 0x3FF8000000000ull /* 1PB - 1TB */
|
||||||
|
#define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_END - \
|
||||||
|
VA_HOST_SPACE_START) /* 767TB */
|
||||||
|
|
||||||
|
#define VA_DDR_SPACE_START 0x800000000ull /* 32GB */
|
||||||
|
#define VA_DDR_SPACE_END 0x2000000000ull /* 128GB */
|
||||||
|
#define VA_DDR_SPACE_SIZE (VA_DDR_SPACE_END - \
|
||||||
|
VA_DDR_SPACE_START) /* 128GB */
|
||||||
|
|
||||||
|
#define DMA_MAX_TRANSFER_SIZE 0xFFFFFFFF
|
||||||
|
|
||||||
|
#define HW_CAP_PLL 0x00000001
|
||||||
|
#define HW_CAP_DDR_0 0x00000002
|
||||||
|
#define HW_CAP_DDR_1 0x00000004
|
||||||
|
#define HW_CAP_MME 0x00000008
|
||||||
|
#define HW_CAP_CPU 0x00000010
|
||||||
|
#define HW_CAP_DMA 0x00000020
|
||||||
|
#define HW_CAP_MSIX 0x00000040
|
||||||
|
#define HW_CAP_CPU_Q 0x00000080
|
||||||
|
#define HW_CAP_MMU 0x00000100
|
||||||
|
#define HW_CAP_TPC_MBIST 0x00000200
|
||||||
|
#define HW_CAP_GOLDEN 0x00000400
|
||||||
|
#define HW_CAP_TPC 0x00000800
|
||||||
|
|
||||||
|
#define CPU_PKT_SHIFT 5
|
||||||
|
#define CPU_PKT_SIZE (1 << CPU_PKT_SHIFT)
|
||||||
|
#define CPU_PKT_MASK (~((1 << CPU_PKT_SHIFT) - 1))
|
||||||
|
#define CPU_MAX_PKTS_IN_CB 32
|
||||||
|
#define CPU_CB_SIZE (CPU_PKT_SIZE * CPU_MAX_PKTS_IN_CB)
|
||||||
|
#define CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * CPU_CB_SIZE)
|
||||||
|
|
||||||
|
enum goya_fw_component {
|
||||||
|
FW_COMP_UBOOT,
|
||||||
|
FW_COMP_PREBOOT
|
||||||
|
};
|
||||||
|
|
||||||
|
struct goya_device {
|
||||||
|
/* TODO: remove hw_queues_lock after moving to scheduler code */
|
||||||
|
spinlock_t hw_queues_lock;
|
||||||
|
u64 ddr_bar_cur_addr;
|
||||||
|
u32 hw_cap_initialized;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* GOYAP_H_ */
|
|
@ -14,9 +14,62 @@
|
||||||
|
|
||||||
#define HL_NAME "habanalabs"
|
#define HL_NAME "habanalabs"
|
||||||
|
|
||||||
|
#define HL_MAX_QUEUES 128
|
||||||
|
|
||||||
struct hl_device;
|
struct hl_device;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct asic_fixed_properties - ASIC specific immutable properties.
|
||||||
|
* @sram_base_address: SRAM physical start address.
|
||||||
|
* @sram_end_address: SRAM physical end address.
|
||||||
|
* @sram_user_base_address - SRAM physical start address for user access.
|
||||||
|
* @dram_base_address: DRAM physical start address.
|
||||||
|
* @dram_end_address: DRAM physical end address.
|
||||||
|
* @dram_user_base_address: DRAM physical start address for user access.
|
||||||
|
* @dram_size: DRAM total size.
|
||||||
|
* @dram_pci_bar_size: size of PCI bar towards DRAM.
|
||||||
|
* @host_phys_base_address: base physical address of host memory for
|
||||||
|
* transactions that the device generates.
|
||||||
|
* @va_space_host_start_address: base address of virtual memory range for
|
||||||
|
* mapping host memory.
|
||||||
|
* @va_space_host_end_address: end address of virtual memory range for
|
||||||
|
* mapping host memory.
|
||||||
|
* @va_space_dram_start_address: base address of virtual memory range for
|
||||||
|
* mapping DRAM memory.
|
||||||
|
* @va_space_dram_end_address: end address of virtual memory range for
|
||||||
|
* mapping DRAM memory.
|
||||||
|
* @cfg_size: configuration space size on SRAM.
|
||||||
|
* @sram_size: total size of SRAM.
|
||||||
|
* @max_asid: maximum number of open contexts (ASIDs).
|
||||||
|
* @completion_queues_count: number of completion queues.
|
||||||
|
* @high_pll: high PLL frequency used by the device.
|
||||||
|
* @tpc_enabled_mask: which TPCs are enabled.
|
||||||
|
*/
|
||||||
|
struct asic_fixed_properties {
|
||||||
|
u64 sram_base_address;
|
||||||
|
u64 sram_end_address;
|
||||||
|
u64 sram_user_base_address;
|
||||||
|
u64 dram_base_address;
|
||||||
|
u64 dram_end_address;
|
||||||
|
u64 dram_user_base_address;
|
||||||
|
u64 dram_size;
|
||||||
|
u64 dram_pci_bar_size;
|
||||||
|
u64 host_phys_base_address;
|
||||||
|
u64 va_space_host_start_address;
|
||||||
|
u64 va_space_host_end_address;
|
||||||
|
u64 va_space_dram_start_address;
|
||||||
|
u64 va_space_dram_end_address;
|
||||||
|
u32 cfg_size;
|
||||||
|
u32 sram_size;
|
||||||
|
u32 max_asid;
|
||||||
|
u32 high_pll;
|
||||||
|
u8 completion_queues_count;
|
||||||
|
u8 tpc_enabled_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#define HL_QUEUE_LENGTH 256
|
||||||
/*
|
/*
|
||||||
* ASICs
|
* ASICs
|
||||||
*/
|
*/
|
||||||
|
@ -33,6 +86,36 @@ enum hl_asic_type {
|
||||||
ASIC_INVALID
|
ASIC_INVALID
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct hl_asic_funcs - ASIC specific functions that are can be called from
|
||||||
|
* common code.
|
||||||
|
* @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
|
||||||
|
* @early_fini: tears down what was done in early_init.
|
||||||
|
* @sw_init: sets up driver state, does not configure H/W.
|
||||||
|
* @sw_fini: tears down driver state, does not configure H/W.
|
||||||
|
* @suspend: handles IP specific H/W or SW changes for suspend.
|
||||||
|
* @resume: handles IP specific H/W or SW changes for resume.
|
||||||
|
* @dma_alloc_coherent: Allocate coherent DMA memory by calling
|
||||||
|
* dma_alloc_coherent(). This is ASIC function because its
|
||||||
|
* implementation is not trivial when the driver is loaded
|
||||||
|
* in simulation mode (not upstreamed).
|
||||||
|
* @dma_free_coherent: Free coherent DMA memory by calling dma_free_coherent().
|
||||||
|
* This is ASIC function because its implementation is not
|
||||||
|
* trivial when the driver is loaded in simulation mode
|
||||||
|
* (not upstreamed).
|
||||||
|
*/
|
||||||
|
struct hl_asic_funcs {
|
||||||
|
int (*early_init)(struct hl_device *hdev);
|
||||||
|
int (*early_fini)(struct hl_device *hdev);
|
||||||
|
int (*sw_init)(struct hl_device *hdev);
|
||||||
|
int (*sw_fini)(struct hl_device *hdev);
|
||||||
|
int (*suspend)(struct hl_device *hdev);
|
||||||
|
int (*resume)(struct hl_device *hdev);
|
||||||
|
void* (*dma_alloc_coherent)(struct hl_device *hdev, size_t size,
|
||||||
|
dma_addr_t *dma_handle, gfp_t flag);
|
||||||
|
void (*dma_free_coherent)(struct hl_device *hdev, size_t size,
|
||||||
|
void *cpu_addr, dma_addr_t dma_handle);
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* FILE PRIVATE STRUCTURE
|
* FILE PRIVATE STRUCTURE
|
||||||
|
@ -62,26 +145,78 @@ struct hl_fpriv {
|
||||||
*/
|
*/
|
||||||
#define HL_MAX_MINORS 256
|
#define HL_MAX_MINORS 256
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Registers read & write functions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
u32 hl_rreg(struct hl_device *hdev, u32 reg);
|
||||||
|
void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
||||||
|
|
||||||
|
#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
|
||||||
|
readl_poll_timeout(hdev->rmmio + addr, val, cond, sleep_us, timeout_us)
|
||||||
|
|
||||||
|
#define RREG32(reg) hl_rreg(hdev, (reg))
|
||||||
|
#define WREG32(reg, v) hl_wreg(hdev, (reg), (v))
|
||||||
|
#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \
|
||||||
|
hl_rreg(hdev, (reg)))
|
||||||
|
|
||||||
|
#define WREG32_P(reg, val, mask) \
|
||||||
|
do { \
|
||||||
|
u32 tmp_ = RREG32(reg); \
|
||||||
|
tmp_ &= (mask); \
|
||||||
|
tmp_ |= ((val) & ~(mask)); \
|
||||||
|
WREG32(reg, tmp_); \
|
||||||
|
} while (0)
|
||||||
|
#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
|
||||||
|
#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
|
||||||
|
|
||||||
|
#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
|
||||||
|
#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
|
||||||
|
#define WREG32_FIELD(reg, field, val) \
|
||||||
|
WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \
|
||||||
|
(val) << REG_FIELD_SHIFT(reg, field))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct hl_device - habanalabs device structure.
|
* struct hl_device - habanalabs device structure.
|
||||||
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
|
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
|
||||||
|
* @pcie_bar: array of available PCIe bars.
|
||||||
|
* @rmmio: configuration area address on SRAM.
|
||||||
* @cdev: related char device.
|
* @cdev: related char device.
|
||||||
* @dev: realted kernel basic device structure.
|
* @dev: realted kernel basic device structure.
|
||||||
* @asic_name: ASIC specific nmae.
|
* @asic_name: ASIC specific nmae.
|
||||||
* @asic_type: ASIC specific type.
|
* @asic_type: ASIC specific type.
|
||||||
|
* @dma_pool: DMA pool for small allocations.
|
||||||
|
* @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address.
|
||||||
|
* @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address.
|
||||||
|
* @cpu_accessible_dma_pool: KMD <-> ArmCP shared memory pool.
|
||||||
|
* @asic_prop: ASIC specific immutable properties.
|
||||||
|
* @asic_funcs: ASIC specific functions.
|
||||||
|
* @asic_specific: ASIC specific information to use only from ASIC files.
|
||||||
* @major: habanalabs KMD major.
|
* @major: habanalabs KMD major.
|
||||||
* @id: device minor.
|
* @id: device minor.
|
||||||
* @disabled: is device disabled.
|
* @disabled: is device disabled.
|
||||||
*/
|
*/
|
||||||
struct hl_device {
|
struct hl_device {
|
||||||
struct pci_dev *pdev;
|
struct pci_dev *pdev;
|
||||||
|
void __iomem *pcie_bar[6];
|
||||||
|
void __iomem *rmmio;
|
||||||
struct cdev cdev;
|
struct cdev cdev;
|
||||||
struct device *dev;
|
struct device *dev;
|
||||||
char asic_name[16];
|
char asic_name[16];
|
||||||
enum hl_asic_type asic_type;
|
enum hl_asic_type asic_type;
|
||||||
|
struct dma_pool *dma_pool;
|
||||||
|
void *cpu_accessible_dma_mem;
|
||||||
|
dma_addr_t cpu_accessible_dma_address;
|
||||||
|
struct gen_pool *cpu_accessible_dma_pool;
|
||||||
|
struct asic_fixed_properties asic_prop;
|
||||||
|
const struct hl_asic_funcs *asic_funcs;
|
||||||
|
void *asic_specific;
|
||||||
u32 major;
|
u32 major;
|
||||||
u16 id;
|
u16 id;
|
||||||
u8 disabled;
|
u8 disabled;
|
||||||
|
|
||||||
|
/* Parameters for bring-up */
|
||||||
|
u8 reset_pcilink;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -128,4 +263,6 @@ void hl_device_fini(struct hl_device *hdev);
|
||||||
int hl_device_suspend(struct hl_device *hdev);
|
int hl_device_suspend(struct hl_device *hdev);
|
||||||
int hl_device_resume(struct hl_device *hdev);
|
int hl_device_resume(struct hl_device *hdev);
|
||||||
|
|
||||||
|
void goya_set_asic_funcs(struct hl_device *hdev);
|
||||||
|
|
||||||
#endif /* HABANALABSP_H_ */
|
#endif /* HABANALABSP_H_ */
|
||||||
|
|
|
@ -122,6 +122,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
||||||
|
|
||||||
hdev->major = hl_major;
|
hdev->major = hl_major;
|
||||||
|
|
||||||
|
/* Parameters for bring-up - set them to defaults */
|
||||||
|
hdev->reset_pcilink = 0;
|
||||||
|
|
||||||
hdev->disabled = true;
|
hdev->disabled = true;
|
||||||
hdev->pdev = pdev; /* can be NULL in case of simulator device */
|
hdev->pdev = pdev; /* can be NULL in case of simulator device */
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0
|
||||||
|
*
|
||||||
|
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef GOYA_H
|
||||||
|
#define GOYA_H
|
||||||
|
|
||||||
|
#include "asic_reg/goya_regs.h"
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#define SRAM_CFG_BAR_ID 0
|
||||||
|
#define MSIX_BAR_ID 2
|
||||||
|
#define DDR_BAR_ID 4
|
||||||
|
|
||||||
|
#define CFG_BAR_SIZE 0x10000000ull /* 256MB */
|
||||||
|
#define MSIX_BAR_SIZE 0x1000ull /* 4KB */
|
||||||
|
|
||||||
|
#define CFG_BASE 0x7FFC000000ull
|
||||||
|
#define CFG_SIZE 0x4000000 /* 32MB CFG + 32MB DBG*/
|
||||||
|
|
||||||
|
#define SRAM_BASE_ADDR 0x7FF0000000ull
|
||||||
|
#define SRAM_SIZE 0x32A0000 /* 50.625MB */
|
||||||
|
|
||||||
|
#define DRAM_PHYS_BASE 0x0ull
|
||||||
|
|
||||||
|
#define HOST_PHYS_BASE 0x8000000000ull /* 0.5TB */
|
||||||
|
#define HOST_PHYS_SIZE 0x1000000000000ull /* 0.25PB (48 bits) */
|
||||||
|
|
||||||
|
#define GOYA_MSIX_ENTRIES 8
|
||||||
|
|
||||||
|
#define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */
|
||||||
|
|
||||||
|
#define MAX_ASID 1024
|
||||||
|
|
||||||
|
#define PROT_BITS_OFFS 0xF80
|
||||||
|
|
||||||
|
#define DMA_MAX_NUM 5
|
||||||
|
|
||||||
|
#define TPC_MAX_NUM 8
|
||||||
|
|
||||||
|
#endif /* GOYA_H */
|
|
@ -0,0 +1,20 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||||
|
*
|
||||||
|
* Copyright 2016-2018 HabanaLabs, Ltd.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef HABANALABS_H_
|
||||||
|
#define HABANALABS_H_
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/ioctl.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Defines that are asic-specific but constitutes as ABI between kernel driver
|
||||||
|
* and userspace
|
||||||
|
*/
|
||||||
|
#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */
|
||||||
|
|
||||||
|
#endif /* HABANALABS_H_ */
|
Loading…
Reference in New Issue