habanalabs: PCIe iATU refactoring
Divide iATU initialization into inbound/outbound methods. We must separate it in order to enable different match mode per PCIe region. In addition, added support for PCI address match mode. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
fcc6a4e606
commit
f4cbfd2445
|
@ -465,6 +465,7 @@ static int gaudi_pci_bars_map(struct hl_device *hdev)
|
|||
static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
struct hl_inbound_pci_region pci_region;
|
||||
u64 old_addr = addr;
|
||||
int rc;
|
||||
|
||||
|
@ -472,7 +473,10 @@ static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
|
|||
return old_addr;
|
||||
|
||||
/* Inbound Region 2 - Bar 4 - Point to HBM */
|
||||
rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
|
||||
pci_region.mode = PCI_BAR_MATCH_MODE;
|
||||
pci_region.bar = HBM_BAR_ID;
|
||||
pci_region.addr = addr;
|
||||
rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
|
||||
if (rc)
|
||||
return U64_MAX;
|
||||
|
||||
|
@ -486,22 +490,43 @@ static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
|
|||
|
||||
static int gaudi_init_iatu(struct hl_device *hdev)
|
||||
{
|
||||
int rc = 0;
|
||||
struct hl_inbound_pci_region inbound_region;
|
||||
struct hl_outbound_pci_region outbound_region;
|
||||
int rc;
|
||||
|
||||
/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
|
||||
inbound_region.mode = PCI_BAR_MATCH_MODE;
|
||||
inbound_region.bar = SRAM_BAR_ID;
|
||||
inbound_region.addr = SRAM_BASE_ADDR;
|
||||
rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
|
||||
if (rc)
|
||||
goto done;
|
||||
|
||||
/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
|
||||
rc = hl_pci_iatu_write(hdev, 0x314,
|
||||
lower_32_bits(SPI_FLASH_BASE_ADDR));
|
||||
rc |= hl_pci_iatu_write(hdev, 0x318,
|
||||
upper_32_bits(SPI_FLASH_BASE_ADDR));
|
||||
rc |= hl_pci_iatu_write(hdev, 0x300, 0);
|
||||
/* Enable + Bar match + match enable */
|
||||
rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
|
||||
|
||||
inbound_region.mode = PCI_BAR_MATCH_MODE;
|
||||
inbound_region.bar = CFG_BAR_ID;
|
||||
inbound_region.addr = SPI_FLASH_BASE_ADDR;
|
||||
rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
|
||||
if (rc)
|
||||
return -EIO;
|
||||
goto done;
|
||||
|
||||
return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
|
||||
HOST_PHYS_BASE, HOST_PHYS_SIZE);
|
||||
/* Inbound Region 2 - Bar 4 - Point to HBM */
|
||||
inbound_region.mode = PCI_BAR_MATCH_MODE;
|
||||
inbound_region.bar = HBM_BAR_ID;
|
||||
inbound_region.addr = DRAM_PHYS_BASE;
|
||||
rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
|
||||
if (rc)
|
||||
goto done;
|
||||
|
||||
hdev->asic_funcs->set_dma_mask_from_fw(hdev);
|
||||
|
||||
/* Outbound Region 0 - Point to Host */
|
||||
outbound_region.addr = HOST_PHYS_BASE;
|
||||
outbound_region.size = HOST_PHYS_SIZE;
|
||||
rc = hl_pci_set_outbound_region(hdev, &outbound_region);
|
||||
|
||||
done:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int gaudi_early_init(struct hl_device *hdev)
|
||||
|
@ -2884,16 +2909,6 @@ static int gaudi_hw_init(struct hl_device *hdev)
|
|||
|
||||
gaudi_init_hbm_dma_qmans(hdev);
|
||||
|
||||
/*
|
||||
* Before pushing u-boot/linux to device, need to set the hbm bar to
|
||||
* base address of dram
|
||||
*/
|
||||
if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
|
||||
dev_err(hdev->dev,
|
||||
"failed to map HBM bar to DRAM base address\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
rc = gaudi_init_cpu(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to initialize CPU\n");
|
||||
|
|
|
@ -458,6 +458,7 @@ static int goya_pci_bars_map(struct hl_device *hdev)
|
|||
static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
|
||||
{
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
struct hl_inbound_pci_region pci_region;
|
||||
u64 old_addr = addr;
|
||||
int rc;
|
||||
|
||||
|
@ -465,7 +466,10 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
|
|||
return old_addr;
|
||||
|
||||
/* Inbound Region 1 - Bar 4 - Point to DDR */
|
||||
rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
|
||||
pci_region.mode = PCI_BAR_MATCH_MODE;
|
||||
pci_region.bar = DDR_BAR_ID;
|
||||
pci_region.addr = addr;
|
||||
rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
|
||||
if (rc)
|
||||
return U64_MAX;
|
||||
|
||||
|
@ -487,8 +491,35 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
|
|||
*/
|
||||
static int goya_init_iatu(struct hl_device *hdev)
|
||||
{
|
||||
return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
|
||||
HOST_PHYS_BASE, HOST_PHYS_SIZE);
|
||||
struct hl_inbound_pci_region inbound_region;
|
||||
struct hl_outbound_pci_region outbound_region;
|
||||
int rc;
|
||||
|
||||
/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
|
||||
inbound_region.mode = PCI_BAR_MATCH_MODE;
|
||||
inbound_region.bar = SRAM_CFG_BAR_ID;
|
||||
inbound_region.addr = SRAM_BASE_ADDR;
|
||||
rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
|
||||
if (rc)
|
||||
goto done;
|
||||
|
||||
/* Inbound Region 1 - Bar 4 - Point to DDR */
|
||||
inbound_region.mode = PCI_BAR_MATCH_MODE;
|
||||
inbound_region.bar = DDR_BAR_ID;
|
||||
inbound_region.addr = DRAM_PHYS_BASE;
|
||||
rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
|
||||
if (rc)
|
||||
goto done;
|
||||
|
||||
hdev->asic_funcs->set_dma_mask_from_fw(hdev);
|
||||
|
||||
/* Outbound Region 0 - Point to Host */
|
||||
outbound_region.addr = HOST_PHYS_BASE;
|
||||
outbound_region.size = HOST_PHYS_SIZE;
|
||||
rc = hl_pci_set_outbound_region(hdev, &outbound_region);
|
||||
|
||||
done:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -66,6 +66,8 @@
|
|||
#define IS_POWER_OF_2(n) (n != 0 && ((n & (n - 1)) == 0))
|
||||
#define IS_MAX_PENDING_CS_VALID(n) (IS_POWER_OF_2(n) && (n > 1))
|
||||
|
||||
#define HL_PCI_NUM_BARS 6
|
||||
|
||||
/**
|
||||
* struct pgt_info - MMU hop page info.
|
||||
* @node: hash linked-list node for the pgts shadow hash of pgts.
|
||||
|
@ -90,6 +92,16 @@ struct pgt_info {
|
|||
struct hl_device;
|
||||
struct hl_fpriv;
|
||||
|
||||
/**
|
||||
* enum hl_pci_match_mode - pci match mode per region
|
||||
* @PCI_ADDRESS_MATCH_MODE: address match mode
|
||||
* @PCI_BAR_MATCH_MODE: bar match mode
|
||||
*/
|
||||
enum hl_pci_match_mode {
|
||||
PCI_ADDRESS_MATCH_MODE,
|
||||
PCI_BAR_MATCH_MODE
|
||||
};
|
||||
|
||||
/**
|
||||
* enum hl_fw_component - F/W components to read version through registers.
|
||||
* @FW_COMP_UBOOT: u-boot.
|
||||
|
@ -125,6 +137,32 @@ enum hl_cs_type {
|
|||
CS_TYPE_WAIT
|
||||
};
|
||||
|
||||
/*
|
||||
* struct hl_inbound_pci_region - inbound region descriptor
|
||||
* @mode: pci match mode for this region
|
||||
* @addr: region target address
|
||||
* @size: region size in bytes
|
||||
* @offset_in_bar: offset within bar (address match mode)
|
||||
* @bar: bar id
|
||||
*/
|
||||
struct hl_inbound_pci_region {
|
||||
enum hl_pci_match_mode mode;
|
||||
u64 addr;
|
||||
u64 size;
|
||||
u64 offset_in_bar;
|
||||
u8 bar;
|
||||
};
|
||||
|
||||
/*
|
||||
* struct hl_outbound_pci_region - outbound region descriptor
|
||||
* @addr: region target address
|
||||
* @size: region size in bytes
|
||||
*/
|
||||
struct hl_outbound_pci_region {
|
||||
u64 addr;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
/*
|
||||
* struct hl_hw_sob - H/W SOB info.
|
||||
* @hdev: habanalabs device structure.
|
||||
|
@ -1347,7 +1385,9 @@ struct hl_device_idle_busy_ts {
|
|||
/**
|
||||
* struct hl_device - habanalabs device structure.
|
||||
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
|
||||
* @pcie_bar: array of available PCIe bars.
|
||||
* @pcie_bar_phys: array of available PCIe bars physical addresses.
|
||||
* (required only for PCI address match mode)
|
||||
* @pcie_bar: array of available PCIe bars virtual addresses.
|
||||
* @rmmio: configuration area address on SRAM.
|
||||
* @cdev: related char device.
|
||||
* @cdev_ctrl: char device for control operations only (INFO IOCTL)
|
||||
|
@ -1442,7 +1482,8 @@ struct hl_device_idle_busy_ts {
|
|||
*/
|
||||
struct hl_device {
|
||||
struct pci_dev *pdev;
|
||||
void __iomem *pcie_bar[6];
|
||||
u64 pcie_bar_phys[HL_PCI_NUM_BARS];
|
||||
void __iomem *pcie_bar[HL_PCI_NUM_BARS];
|
||||
void __iomem *rmmio;
|
||||
struct cdev cdev;
|
||||
struct cdev cdev_ctrl;
|
||||
|
@ -1767,9 +1808,10 @@ int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
|
|||
int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
|
||||
int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
|
||||
u64 addr);
|
||||
int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
|
||||
u64 dram_base_address, u64 host_phys_base_address,
|
||||
u64 host_phys_size);
|
||||
int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
|
||||
struct hl_inbound_pci_region *pci_region);
|
||||
int hl_pci_set_outbound_region(struct hl_device *hdev,
|
||||
struct hl_outbound_pci_region *pci_region);
|
||||
int hl_pci_init(struct hl_device *hdev);
|
||||
void hl_pci_fini(struct hl_device *hdev);
|
||||
|
||||
|
|
|
@ -9,9 +9,15 @@
|
|||
#include "include/hw_ip/pci/pci_general.h"
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/bitfield.h>
|
||||
|
||||
#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
|
||||
|
||||
#define IATU_REGION_CTRL_REGION_EN_MASK BIT(31)
|
||||
#define IATU_REGION_CTRL_MATCH_MODE_MASK BIT(30)
|
||||
#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK BIT(19)
|
||||
#define IATU_REGION_CTRL_BAR_NUM_MASK GENMASK(10, 8)
|
||||
|
||||
/**
|
||||
* hl_pci_bars_map() - Map PCI BARs.
|
||||
* @hdev: Pointer to hl_device structure.
|
||||
|
@ -187,110 +193,94 @@ static void hl_pci_reset_link_through_bridge(struct hl_device *hdev)
|
|||
}
|
||||
|
||||
/**
|
||||
* hl_pci_set_dram_bar_base() - Set DDR BAR to map specific device address.
|
||||
* hl_pci_set_inbound_region() - Configure inbound region
|
||||
* @hdev: Pointer to hl_device structure.
|
||||
* @inbound_region: Inbound region number.
|
||||
* @bar: PCI BAR number.
|
||||
* @addr: Address in DRAM. Must be aligned to DRAM bar size.
|
||||
* @region: Inbound region number.
|
||||
* @pci_region: Inbound region parameters.
|
||||
*
|
||||
* Configure the iATU so that the DRAM bar will start at the specified address.
|
||||
* Configure the iATU inbound region.
|
||||
*
|
||||
* Return: 0 on success, negative value for failure.
|
||||
*/
|
||||
int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
|
||||
u64 addr)
|
||||
int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
|
||||
struct hl_inbound_pci_region *pci_region)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u32 offset;
|
||||
int rc;
|
||||
u64 bar_phys_base, region_base, region_end_address;
|
||||
u32 offset, ctrl_reg_val;
|
||||
int rc = 0;
|
||||
|
||||
switch (inbound_region) {
|
||||
case 0:
|
||||
offset = 0x100;
|
||||
break;
|
||||
case 1:
|
||||
offset = 0x300;
|
||||
break;
|
||||
case 2:
|
||||
offset = 0x500;
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "Invalid inbound region %d\n",
|
||||
inbound_region);
|
||||
return -EINVAL;
|
||||
}
|
||||
/* region offset */
|
||||
offset = (0x200 * region) + 0x100;
|
||||
|
||||
if (bar != 0 && bar != 2 && bar != 4) {
|
||||
dev_err(hdev->dev, "Invalid PCI BAR %d\n", bar);
|
||||
return -EINVAL;
|
||||
if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) {
|
||||
bar_phys_base = hdev->pcie_bar_phys[pci_region->bar];
|
||||
region_base = bar_phys_base + pci_region->offset_in_bar;
|
||||
region_end_address = region_base + pci_region->size - 1;
|
||||
|
||||
rc |= hl_pci_iatu_write(hdev, offset + 0x8,
|
||||
lower_32_bits(region_base));
|
||||
rc |= hl_pci_iatu_write(hdev, offset + 0xC,
|
||||
upper_32_bits(region_base));
|
||||
rc |= hl_pci_iatu_write(hdev, offset + 0x10,
|
||||
lower_32_bits(region_end_address));
|
||||
}
|
||||
|
||||
/* Point to the specified address */
|
||||
rc = hl_pci_iatu_write(hdev, offset + 0x14, lower_32_bits(addr));
|
||||
rc |= hl_pci_iatu_write(hdev, offset + 0x18, upper_32_bits(addr));
|
||||
rc = hl_pci_iatu_write(hdev, offset + 0x14,
|
||||
lower_32_bits(pci_region->addr));
|
||||
rc |= hl_pci_iatu_write(hdev, offset + 0x18,
|
||||
upper_32_bits(pci_region->addr));
|
||||
rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
|
||||
/* Enable + BAR match + match enable + BAR number */
|
||||
rc |= hl_pci_iatu_write(hdev, offset + 0x4, 0xC0080000 | (bar << 8));
|
||||
|
||||
/* Enable + bar/address match + match enable + bar number */
|
||||
ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1);
|
||||
ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK,
|
||||
pci_region->mode);
|
||||
ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1);
|
||||
|
||||
if (pci_region->mode == PCI_BAR_MATCH_MODE)
|
||||
ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK,
|
||||
pci_region->bar);
|
||||
|
||||
rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
|
||||
|
||||
/* Return the DBI window to the default location */
|
||||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
|
||||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
|
||||
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "failed to map DRAM bar to 0x%08llx\n",
|
||||
addr);
|
||||
dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
|
||||
pci_region->bar, pci_region->addr);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_pci_init_iatu() - Initialize the iATU unit inside the PCI controller.
|
||||
* hl_pci_set_outbound_region() - Configure outbound region 0
|
||||
* @hdev: Pointer to hl_device structure.
|
||||
* @sram_base_address: SRAM base address.
|
||||
* @dram_base_address: DRAM base address.
|
||||
* @host_phys_base_address: Base physical address of host memory for device
|
||||
* transactions.
|
||||
* @host_phys_size: Size of host memory for device transactions.
|
||||
* @pci_region: Outbound region parameters.
|
||||
*
|
||||
* This is needed in case the firmware doesn't initialize the iATU.
|
||||
* Configure the iATU outbound region 0.
|
||||
*
|
||||
* Return: 0 on success, negative value for failure.
|
||||
*/
|
||||
int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
|
||||
u64 dram_base_address, u64 host_phys_base_address,
|
||||
u64 host_phys_size)
|
||||
int hl_pci_set_outbound_region(struct hl_device *hdev,
|
||||
struct hl_outbound_pci_region *pci_region)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 host_phys_end_addr;
|
||||
u64 outbound_region_end_address;
|
||||
int rc = 0;
|
||||
|
||||
/* Inbound Region 0 - Bar 0 - Point to SRAM base address */
|
||||
rc = hl_pci_iatu_write(hdev, 0x114, lower_32_bits(sram_base_address));
|
||||
rc |= hl_pci_iatu_write(hdev, 0x118, upper_32_bits(sram_base_address));
|
||||
rc |= hl_pci_iatu_write(hdev, 0x100, 0);
|
||||
/* Enable + Bar match + match enable */
|
||||
rc |= hl_pci_iatu_write(hdev, 0x104, 0xC0080000);
|
||||
|
||||
/* Return the DBI window to the default location */
|
||||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
|
||||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
|
||||
|
||||
hdev->asic_funcs->set_dma_mask_from_fw(hdev);
|
||||
|
||||
/* Point to DRAM */
|
||||
if (!hdev->asic_funcs->set_dram_bar_base)
|
||||
return -EINVAL;
|
||||
if (hdev->asic_funcs->set_dram_bar_base(hdev, dram_base_address) ==
|
||||
U64_MAX)
|
||||
return -EIO;
|
||||
|
||||
/* Outbound Region 0 - Point to Host */
|
||||
host_phys_end_addr = host_phys_base_address + host_phys_size - 1;
|
||||
/* Outbound Region 0 */
|
||||
outbound_region_end_address =
|
||||
pci_region->addr + pci_region->size - 1;
|
||||
rc |= hl_pci_iatu_write(hdev, 0x008,
|
||||
lower_32_bits(host_phys_base_address));
|
||||
lower_32_bits(pci_region->addr));
|
||||
rc |= hl_pci_iatu_write(hdev, 0x00C,
|
||||
upper_32_bits(host_phys_base_address));
|
||||
rc |= hl_pci_iatu_write(hdev, 0x010, lower_32_bits(host_phys_end_addr));
|
||||
upper_32_bits(pci_region->addr));
|
||||
rc |= hl_pci_iatu_write(hdev, 0x010,
|
||||
lower_32_bits(outbound_region_end_address));
|
||||
rc |= hl_pci_iatu_write(hdev, 0x014, 0);
|
||||
|
||||
if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
|
||||
|
@ -298,7 +288,8 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
|
|||
else
|
||||
rc |= hl_pci_iatu_write(hdev, 0x018, 0);
|
||||
|
||||
rc |= hl_pci_iatu_write(hdev, 0x020, upper_32_bits(host_phys_end_addr));
|
||||
rc |= hl_pci_iatu_write(hdev, 0x020,
|
||||
upper_32_bits(outbound_region_end_address));
|
||||
/* Increase region size */
|
||||
rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
|
||||
/* Enable */
|
||||
|
@ -308,10 +299,7 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
|
|||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
|
||||
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
|
||||
|
||||
if (rc)
|
||||
return -EIO;
|
||||
|
||||
return 0;
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue