habanalabs: split the host MMU properties
Host memory may be allocated with huge pages. A different virtual range may be used for mapping in this case. Add Huge PCI MMU (HPMMU) properties to support it. This patch is a prerequisite for future ASICs support and has no effect on Goya ASIC as currently a single virtual host range is used for all page sizes. Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
240c92fd04
commit
64a7e2955d
|
@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data)
|
|||
}
|
||||
|
||||
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
|
||||
prop->va_space_dram_start_address,
|
||||
prop->va_space_dram_end_address);
|
||||
prop->dmmu.start_addr,
|
||||
prop->dmmu.end_addr);
|
||||
|
||||
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
|
@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
|
|||
goto out;
|
||||
|
||||
if (hdev->dram_supports_virtual_memory &&
|
||||
addr >= prop->va_space_dram_start_address &&
|
||||
addr < prop->va_space_dram_end_address)
|
||||
(addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
|
||||
return true;
|
||||
|
||||
if (addr >= prop->va_space_host_start_address &&
|
||||
addr < prop->va_space_host_end_address)
|
||||
if (addr >= prop->pmmu.start_addr &&
|
||||
addr < prop->pmmu.end_addr)
|
||||
return true;
|
||||
|
||||
if (addr >= prop->pmmu_huge.start_addr &&
|
||||
addr < prop->pmmu_huge.end_addr)
|
||||
return true;
|
||||
out:
|
||||
return false;
|
||||
|
@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
|
|||
}
|
||||
|
||||
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
|
||||
prop->va_space_dram_start_address,
|
||||
prop->va_space_dram_end_address);
|
||||
prop->dmmu.start_addr,
|
||||
prop->dmmu.end_addr);
|
||||
|
||||
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
|
|
|
@ -393,19 +393,21 @@ void goya_get_fixed_properties(struct hl_device *hdev)
|
|||
prop->dmmu.hop2_mask = HOP2_MASK;
|
||||
prop->dmmu.hop3_mask = HOP3_MASK;
|
||||
prop->dmmu.hop4_mask = HOP4_MASK;
|
||||
prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
|
||||
|
||||
/* No difference between PMMU and DMMU except of page size */
|
||||
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
|
||||
prop->dmmu.start_addr = VA_DDR_SPACE_START;
|
||||
prop->dmmu.end_addr = VA_DDR_SPACE_END;
|
||||
prop->dmmu.page_size = PAGE_SIZE_2MB;
|
||||
|
||||
/* shifts and masks are the same in PMMU and DMMU */
|
||||
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
|
||||
prop->pmmu.start_addr = VA_HOST_SPACE_START;
|
||||
prop->pmmu.end_addr = VA_HOST_SPACE_END;
|
||||
prop->pmmu.page_size = PAGE_SIZE_4KB;
|
||||
|
||||
prop->va_space_host_start_address = VA_HOST_SPACE_START;
|
||||
prop->va_space_host_end_address = VA_HOST_SPACE_END;
|
||||
prop->va_space_dram_start_address = VA_DDR_SPACE_START;
|
||||
prop->va_space_dram_end_address = VA_DDR_SPACE_END;
|
||||
prop->dram_size_for_default_page_mapping =
|
||||
prop->va_space_dram_end_address;
|
||||
/* PMMU and HPMMU are the same except of page size */
|
||||
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
|
||||
prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
|
||||
|
||||
prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
|
||||
prop->cfg_size = CFG_SIZE;
|
||||
prop->max_asid = MAX_ASID;
|
||||
prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
|
||||
|
@ -3443,12 +3445,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
|
|||
/*
|
||||
* WA for HW-23.
|
||||
* We can't allow user to read from Host using QMANs other than 1.
|
||||
* PMMU and HPMMU addresses are equal, check only one of them.
|
||||
*/
|
||||
if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
|
||||
hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
|
||||
le32_to_cpu(user_dma_pkt->tsize),
|
||||
hdev->asic_prop.va_space_host_start_address,
|
||||
hdev->asic_prop.va_space_host_end_address)) {
|
||||
hdev->asic_prop.pmmu.start_addr,
|
||||
hdev->asic_prop.pmmu.end_addr)) {
|
||||
dev_err(hdev->dev,
|
||||
"Can't DMA from host on queue other then 1\n");
|
||||
return -EFAULT;
|
||||
|
|
|
@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr,
|
|||
u64 range_start, range_end;
|
||||
|
||||
if (hdev->mmu_enable) {
|
||||
range_start = prop->va_space_dram_start_address;
|
||||
range_end = prop->va_space_dram_end_address;
|
||||
range_start = prop->dmmu.start_addr;
|
||||
range_end = prop->dmmu.end_addr;
|
||||
} else {
|
||||
range_start = prop->dram_user_base_address;
|
||||
range_end = prop->dram_end_address;
|
||||
|
|
|
@ -132,6 +132,8 @@ enum hl_device_hw_state {
|
|||
|
||||
/**
|
||||
* struct hl_mmu_properties - ASIC specific MMU address translation properties.
|
||||
* @start_addr: virtual start address of the memory region.
|
||||
* @end_addr: virtual end address of the memory region.
|
||||
* @hop0_shift: shift of hop 0 mask.
|
||||
* @hop1_shift: shift of hop 1 mask.
|
||||
* @hop2_shift: shift of hop 2 mask.
|
||||
|
@ -143,9 +145,10 @@ enum hl_device_hw_state {
|
|||
* @hop3_mask: mask to get the PTE address in hop 3.
|
||||
* @hop4_mask: mask to get the PTE address in hop 4.
|
||||
* @page_size: default page size used to allocate memory.
|
||||
* @huge_page_size: page size used to allocate memory with huge pages.
|
||||
*/
|
||||
struct hl_mmu_properties {
|
||||
u64 start_addr;
|
||||
u64 end_addr;
|
||||
u64 hop0_shift;
|
||||
u64 hop1_shift;
|
||||
u64 hop2_shift;
|
||||
|
@ -157,7 +160,6 @@ struct hl_mmu_properties {
|
|||
u64 hop3_mask;
|
||||
u64 hop4_mask;
|
||||
u32 page_size;
|
||||
u32 huge_page_size;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -169,6 +171,8 @@ struct hl_mmu_properties {
|
|||
* @preboot_ver: F/W Preboot version.
|
||||
* @dmmu: DRAM MMU address translation properties.
|
||||
* @pmmu: PCI (host) MMU address translation properties.
|
||||
* @pmmu_huge: PCI (host) MMU address translation properties for memory
|
||||
* allocated with huge pages.
|
||||
* @sram_base_address: SRAM physical start address.
|
||||
* @sram_end_address: SRAM physical end address.
|
||||
* @sram_user_base_address - SRAM physical start address for user access.
|
||||
|
@ -178,14 +182,6 @@ struct hl_mmu_properties {
|
|||
* @dram_size: DRAM total size.
|
||||
* @dram_pci_bar_size: size of PCI bar towards DRAM.
|
||||
* @max_power_default: max power of the device after reset
|
||||
* @va_space_host_start_address: base address of virtual memory range for
|
||||
* mapping host memory.
|
||||
* @va_space_host_end_address: end address of virtual memory range for
|
||||
* mapping host memory.
|
||||
* @va_space_dram_start_address: base address of virtual memory range for
|
||||
* mapping DRAM memory.
|
||||
* @va_space_dram_end_address: end address of virtual memory range for
|
||||
* mapping DRAM memory.
|
||||
* @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
|
||||
* fault.
|
||||
* @pcie_dbi_base_address: Base address of the PCIE_DBI block.
|
||||
|
@ -218,6 +214,7 @@ struct asic_fixed_properties {
|
|||
char preboot_ver[VERSION_MAX_LEN];
|
||||
struct hl_mmu_properties dmmu;
|
||||
struct hl_mmu_properties pmmu;
|
||||
struct hl_mmu_properties pmmu_huge;
|
||||
u64 sram_base_address;
|
||||
u64 sram_end_address;
|
||||
u64 sram_user_base_address;
|
||||
|
@ -227,10 +224,6 @@ struct asic_fixed_properties {
|
|||
u64 dram_size;
|
||||
u64 dram_pci_bar_size;
|
||||
u64 max_power_default;
|
||||
u64 va_space_host_start_address;
|
||||
u64 va_space_host_end_address;
|
||||
u64 va_space_dram_start_address;
|
||||
u64 va_space_dram_end_address;
|
||||
u64 dram_size_for_default_page_mapping;
|
||||
u64 pcie_dbi_base_address;
|
||||
u64 pcie_aux_dbi_reg_addr;
|
||||
|
@ -658,6 +651,8 @@ struct hl_va_range {
|
|||
* this hits 0l. It is incremented on CS and CS_WAIT.
|
||||
* @cs_pending: array of DMA fence objects representing pending CS.
|
||||
* @host_va_range: holds available virtual addresses for host mappings.
|
||||
* @host_huge_va_range: holds available virtual addresses for host mappings
|
||||
* with huge pages.
|
||||
* @dram_va_range: holds available virtual addresses for DRAM mappings.
|
||||
* @mem_hash_lock: protects the mem_hash.
|
||||
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
|
||||
|
@ -688,8 +683,9 @@ struct hl_ctx {
|
|||
struct hl_device *hdev;
|
||||
struct kref refcount;
|
||||
struct dma_fence *cs_pending[HL_MAX_PENDING_CS];
|
||||
struct hl_va_range host_va_range;
|
||||
struct hl_va_range dram_va_range;
|
||||
struct hl_va_range *host_va_range;
|
||||
struct hl_va_range *host_huge_va_range;
|
||||
struct hl_va_range *dram_va_range;
|
||||
struct mutex mem_hash_lock;
|
||||
struct mutex mmu_lock;
|
||||
struct list_head debugfs_list;
|
||||
|
@ -1291,6 +1287,8 @@ struct hl_device_idle_busy_ts {
|
|||
* otherwise.
|
||||
* @dram_supports_virtual_memory: is MMU enabled towards DRAM.
|
||||
* @dram_default_page_mapping: is DRAM default page mapping enabled.
|
||||
* @pmmu_huge_range: is a different virtual addresses range used for PMMU with
|
||||
* huge pages.
|
||||
* @init_done: is the initialization of the device done.
|
||||
* @mmu_enable: is MMU enabled.
|
||||
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
|
||||
|
@ -1372,6 +1370,7 @@ struct hl_device {
|
|||
u8 reset_on_lockup;
|
||||
u8 dram_supports_virtual_memory;
|
||||
u8 dram_default_page_mapping;
|
||||
u8 pmmu_huge_range;
|
||||
u8 init_done;
|
||||
u8 device_cpu_disabled;
|
||||
u8 dma_mask;
|
||||
|
|
|
@ -530,7 +530,7 @@ static u64 get_va_block(struct hl_device *hdev,
|
|||
* or not, hence we continue with the biggest possible
|
||||
* granularity.
|
||||
*/
|
||||
page_size = hdev->asic_prop.pmmu.huge_page_size;
|
||||
page_size = hdev->asic_prop.pmmu_huge.page_size;
|
||||
else
|
||||
page_size = hdev->asic_prop.dmmu.page_size;
|
||||
|
||||
|
@ -638,13 +638,12 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
|
|||
struct hl_userptr *userptr,
|
||||
struct hl_vm_phys_pg_pack **pphys_pg_pack)
|
||||
{
|
||||
struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu;
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
struct scatterlist *sg;
|
||||
dma_addr_t dma_addr;
|
||||
u64 page_mask, total_npages;
|
||||
u32 npages, page_size = PAGE_SIZE,
|
||||
huge_page_size = mmu_prop->huge_page_size;
|
||||
huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
|
||||
bool first = true, is_huge_page_opt = true;
|
||||
int rc, i, j;
|
||||
u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
|
||||
|
@ -856,6 +855,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
struct hl_userptr *userptr = NULL;
|
||||
struct hl_vm_hash_node *hnode;
|
||||
struct hl_va_range *va_range;
|
||||
enum vm_type_t *vm_type;
|
||||
u64 ret_vaddr, hint_addr;
|
||||
u32 handle = 0;
|
||||
|
@ -927,9 +927,16 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
goto hnode_err;
|
||||
}
|
||||
|
||||
ret_vaddr = get_va_block(hdev,
|
||||
is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
|
||||
phys_pg_pack->total_size, hint_addr, is_userptr);
|
||||
if (is_userptr)
|
||||
if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
|
||||
va_range = ctx->host_va_range;
|
||||
else
|
||||
va_range = ctx->host_huge_va_range;
|
||||
else
|
||||
va_range = ctx->dram_va_range;
|
||||
|
||||
ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
|
||||
hint_addr, is_userptr);
|
||||
if (!ret_vaddr) {
|
||||
dev_err(hdev->dev, "no available va block for handle %u\n",
|
||||
handle);
|
||||
|
@ -968,10 +975,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
return 0;
|
||||
|
||||
map_err:
|
||||
if (add_va_block(hdev,
|
||||
is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
|
||||
ret_vaddr,
|
||||
ret_vaddr + phys_pg_pack->total_size - 1))
|
||||
if (add_va_block(hdev, va_range, ret_vaddr,
|
||||
ret_vaddr + phys_pg_pack->total_size - 1))
|
||||
dev_warn(hdev->dev,
|
||||
"release va block failed for handle 0x%x, vaddr: 0x%llx\n",
|
||||
handle, ret_vaddr);
|
||||
|
@ -1033,7 +1038,6 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
|
|||
|
||||
if (*vm_type == VM_TYPE_USERPTR) {
|
||||
is_userptr = true;
|
||||
va_range = &ctx->host_va_range;
|
||||
userptr = hnode->ptr;
|
||||
rc = init_phys_pg_pack_from_userptr(ctx, userptr,
|
||||
&phys_pg_pack);
|
||||
|
@ -1043,9 +1047,15 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
|
|||
vaddr);
|
||||
goto vm_type_err;
|
||||
}
|
||||
|
||||
if (phys_pg_pack->page_size ==
|
||||
hdev->asic_prop.pmmu.page_size)
|
||||
va_range = ctx->host_va_range;
|
||||
else
|
||||
va_range = ctx->host_huge_va_range;
|
||||
} else if (*vm_type == VM_TYPE_PHYS_PACK) {
|
||||
is_userptr = false;
|
||||
va_range = &ctx->dram_va_range;
|
||||
va_range = ctx->dram_va_range;
|
||||
phys_pg_pack = hnode->ptr;
|
||||
} else {
|
||||
dev_warn(hdev->dev,
|
||||
|
@ -1441,19 +1451,18 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
|
|||
}
|
||||
|
||||
/*
|
||||
* hl_va_range_init - initialize virtual addresses range
|
||||
*
|
||||
* @hdev : pointer to the habanalabs device structure
|
||||
* @va_range : pointer to the range to initialize
|
||||
* @start : range start address
|
||||
* @end : range end address
|
||||
* va_range_init - initialize virtual addresses range
|
||||
* @hdev: pointer to the habanalabs device structure
|
||||
* @va_range: pointer to the range to initialize
|
||||
* @start: range start address
|
||||
* @end: range end address
|
||||
*
|
||||
* This function does the following:
|
||||
* - Initializes the virtual addresses list of the given range with the given
|
||||
* addresses.
|
||||
*/
|
||||
static int hl_va_range_init(struct hl_device *hdev,
|
||||
struct hl_va_range *va_range, u64 start, u64 end)
|
||||
static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
int rc;
|
||||
|
||||
|
@ -1488,47 +1497,105 @@ static int hl_va_range_init(struct hl_device *hdev,
|
|||
}
|
||||
|
||||
/*
|
||||
* hl_vm_ctx_init_with_ranges - initialize virtual memory for context
|
||||
* va_range_fini() - clear a virtual addresses range
|
||||
* @hdev: pointer to the habanalabs structure
|
||||
* va_range: pointer to virtual addresses range
|
||||
*
|
||||
* @ctx : pointer to the habanalabs context structure
|
||||
* @host_range_start : host virtual addresses range start
|
||||
* @host_range_end : host virtual addresses range end
|
||||
* @dram_range_start : dram virtual addresses range start
|
||||
* @dram_range_end : dram virtual addresses range end
|
||||
* This function does the following:
|
||||
* - Frees the virtual addresses block list and its lock
|
||||
*/
|
||||
static void va_range_fini(struct hl_device *hdev,
|
||||
struct hl_va_range *va_range)
|
||||
{
|
||||
mutex_lock(&va_range->lock);
|
||||
clear_va_list_locked(hdev, &va_range->list);
|
||||
mutex_unlock(&va_range->lock);
|
||||
|
||||
mutex_destroy(&va_range->lock);
|
||||
kfree(va_range);
|
||||
}
|
||||
|
||||
/*
|
||||
* vm_ctx_init_with_ranges() - initialize virtual memory for context
|
||||
* @ctx: pointer to the habanalabs context structure
|
||||
* @host_range_start: host virtual addresses range start.
|
||||
* @host_range_end: host virtual addresses range end.
|
||||
* @host_huge_range_start: host virtual addresses range start for memory
|
||||
* allocated with huge pages.
|
||||
* @host_huge_range_end: host virtual addresses range end for memory allocated
|
||||
* with huge pages.
|
||||
* @dram_range_start: dram virtual addresses range start.
|
||||
* @dram_range_end: dram virtual addresses range end.
|
||||
*
|
||||
* This function initializes the following:
|
||||
* - MMU for context
|
||||
* - Virtual address to area descriptor hashtable
|
||||
* - Virtual block list of available virtual memory
|
||||
*/
|
||||
static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
|
||||
u64 host_range_end, u64 dram_range_start,
|
||||
u64 dram_range_end)
|
||||
static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
|
||||
u64 host_range_start,
|
||||
u64 host_range_end,
|
||||
u64 host_huge_range_start,
|
||||
u64 host_huge_range_end,
|
||||
u64 dram_range_start,
|
||||
u64 dram_range_end)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
int rc;
|
||||
|
||||
ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
|
||||
if (!ctx->host_va_range)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
|
||||
GFP_KERNEL);
|
||||
if (!ctx->host_huge_va_range) {
|
||||
rc = -ENOMEM;
|
||||
goto host_huge_va_range_err;
|
||||
}
|
||||
|
||||
ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
|
||||
if (!ctx->dram_va_range) {
|
||||
rc = -ENOMEM;
|
||||
goto dram_va_range_err;
|
||||
}
|
||||
|
||||
rc = hl_mmu_ctx_init(ctx);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
|
||||
return rc;
|
||||
goto mmu_ctx_err;
|
||||
}
|
||||
|
||||
mutex_init(&ctx->mem_hash_lock);
|
||||
hash_init(ctx->mem_hash);
|
||||
|
||||
mutex_init(&ctx->host_va_range.lock);
|
||||
mutex_init(&ctx->host_va_range->lock);
|
||||
|
||||
rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start,
|
||||
host_range_end);
|
||||
rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
|
||||
host_range_end);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to init host vm range\n");
|
||||
goto host_vm_err;
|
||||
goto host_page_range_err;
|
||||
}
|
||||
|
||||
mutex_init(&ctx->dram_va_range.lock);
|
||||
if (hdev->pmmu_huge_range) {
|
||||
mutex_init(&ctx->host_huge_va_range->lock);
|
||||
|
||||
rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start,
|
||||
rc = va_range_init(hdev, ctx->host_huge_va_range,
|
||||
host_huge_range_start,
|
||||
host_huge_range_end);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"failed to init host huge vm range\n");
|
||||
goto host_hpage_range_err;
|
||||
}
|
||||
} else {
|
||||
ctx->host_huge_va_range = ctx->host_va_range;
|
||||
}
|
||||
|
||||
mutex_init(&ctx->dram_va_range->lock);
|
||||
|
||||
rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
|
||||
dram_range_end);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to init dram vm range\n");
|
||||
|
@ -1540,15 +1607,29 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
|
|||
return 0;
|
||||
|
||||
dram_vm_err:
|
||||
mutex_destroy(&ctx->dram_va_range.lock);
|
||||
mutex_destroy(&ctx->dram_va_range->lock);
|
||||
|
||||
mutex_lock(&ctx->host_va_range.lock);
|
||||
clear_va_list_locked(hdev, &ctx->host_va_range.list);
|
||||
mutex_unlock(&ctx->host_va_range.lock);
|
||||
host_vm_err:
|
||||
mutex_destroy(&ctx->host_va_range.lock);
|
||||
if (hdev->pmmu_huge_range) {
|
||||
mutex_lock(&ctx->host_huge_va_range->lock);
|
||||
clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
|
||||
mutex_unlock(&ctx->host_huge_va_range->lock);
|
||||
}
|
||||
host_hpage_range_err:
|
||||
if (hdev->pmmu_huge_range)
|
||||
mutex_destroy(&ctx->host_huge_va_range->lock);
|
||||
mutex_lock(&ctx->host_va_range->lock);
|
||||
clear_va_list_locked(hdev, &ctx->host_va_range->list);
|
||||
mutex_unlock(&ctx->host_va_range->lock);
|
||||
host_page_range_err:
|
||||
mutex_destroy(&ctx->host_va_range->lock);
|
||||
mutex_destroy(&ctx->mem_hash_lock);
|
||||
hl_mmu_ctx_fini(ctx);
|
||||
mmu_ctx_err:
|
||||
kfree(ctx->dram_va_range);
|
||||
dram_va_range_err:
|
||||
kfree(ctx->host_huge_va_range);
|
||||
host_huge_va_range_err:
|
||||
kfree(ctx->host_va_range);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -1556,8 +1637,8 @@ host_vm_err:
|
|||
int hl_vm_ctx_init(struct hl_ctx *ctx)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
|
||||
u64 host_range_start, host_range_end, dram_range_start,
|
||||
dram_range_end;
|
||||
u64 host_range_start, host_range_end, host_huge_range_start,
|
||||
host_huge_range_end, dram_range_start, dram_range_end;
|
||||
|
||||
atomic64_set(&ctx->dram_phys_mem, 0);
|
||||
|
||||
|
@ -1569,38 +1650,26 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
|
|||
* address of the memory related to the given handle.
|
||||
*/
|
||||
if (ctx->hdev->mmu_enable) {
|
||||
dram_range_start = prop->va_space_dram_start_address;
|
||||
dram_range_end = prop->va_space_dram_end_address;
|
||||
host_range_start = prop->va_space_host_start_address;
|
||||
host_range_end = prop->va_space_host_end_address;
|
||||
dram_range_start = prop->dmmu.start_addr;
|
||||
dram_range_end = prop->dmmu.end_addr;
|
||||
host_range_start = prop->pmmu.start_addr;
|
||||
host_range_end = prop->pmmu.end_addr;
|
||||
host_huge_range_start = prop->pmmu_huge.start_addr;
|
||||
host_huge_range_end = prop->pmmu_huge.end_addr;
|
||||
} else {
|
||||
dram_range_start = prop->dram_user_base_address;
|
||||
dram_range_end = prop->dram_end_address;
|
||||
host_range_start = prop->dram_user_base_address;
|
||||
host_range_end = prop->dram_end_address;
|
||||
host_huge_range_start = prop->dram_user_base_address;
|
||||
host_huge_range_end = prop->dram_end_address;
|
||||
}
|
||||
|
||||
return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
|
||||
dram_range_start, dram_range_end);
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_va_range_fini - clear a virtual addresses range
|
||||
*
|
||||
* @hdev : pointer to the habanalabs structure
|
||||
* va_range : pointer to virtual addresses range
|
||||
*
|
||||
* This function does the following:
|
||||
* - Frees the virtual addresses block list and its lock
|
||||
*/
|
||||
static void hl_va_range_fini(struct hl_device *hdev,
|
||||
struct hl_va_range *va_range)
|
||||
{
|
||||
mutex_lock(&va_range->lock);
|
||||
clear_va_list_locked(hdev, &va_range->list);
|
||||
mutex_unlock(&va_range->lock);
|
||||
|
||||
mutex_destroy(&va_range->lock);
|
||||
return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
|
||||
host_huge_range_start,
|
||||
host_huge_range_end,
|
||||
dram_range_start,
|
||||
dram_range_end);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1667,8 +1736,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
|
|||
}
|
||||
spin_unlock(&vm->idr_lock);
|
||||
|
||||
hl_va_range_fini(hdev, &ctx->dram_va_range);
|
||||
hl_va_range_fini(hdev, &ctx->host_va_range);
|
||||
va_range_fini(hdev, ctx->dram_va_range);
|
||||
if (hdev->pmmu_huge_range)
|
||||
va_range_fini(hdev, ctx->host_huge_va_range);
|
||||
va_range_fini(hdev, ctx->host_va_range);
|
||||
|
||||
mutex_destroy(&ctx->mem_hash_lock);
|
||||
hl_mmu_ctx_fini(ctx);
|
||||
|
|
|
@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
|
|||
return phys_hop_addr + pte_offset;
|
||||
}
|
||||
|
||||
static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
|
||||
return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
|
||||
prop->dmmu.start_addr,
|
||||
prop->dmmu.end_addr);
|
||||
}
|
||||
|
||||
static int dram_default_mapping_init(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
|
@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
|
|||
curr_pte;
|
||||
bool is_huge, clear_hop3 = true;
|
||||
|
||||
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
|
@ -702,26 +712,25 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
|||
if (!hdev->mmu_enable)
|
||||
return 0;
|
||||
|
||||
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
|
||||
prop->va_space_dram_start_address,
|
||||
prop->va_space_dram_end_address);
|
||||
is_dram_addr = is_dram_va(hdev, virt_addr);
|
||||
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
if (is_dram_addr)
|
||||
mmu_prop = &prop->dmmu;
|
||||
else if ((page_size % prop->pmmu_huge.page_size) == 0)
|
||||
mmu_prop = &prop->pmmu_huge;
|
||||
else
|
||||
mmu_prop = &prop->pmmu;
|
||||
|
||||
/*
|
||||
* The H/W handles mapping of specific page sizes. Hence if the page
|
||||
* size is bigger, we break it to sub-pages and unmap them separately.
|
||||
*/
|
||||
if ((page_size % mmu_prop->huge_page_size) == 0) {
|
||||
real_page_size = mmu_prop->huge_page_size;
|
||||
} else if ((page_size % mmu_prop->page_size) == 0) {
|
||||
if ((page_size % mmu_prop->page_size) == 0) {
|
||||
real_page_size = mmu_prop->page_size;
|
||||
} else {
|
||||
dev_err(hdev->dev,
|
||||
"page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
|
||||
page_size,
|
||||
mmu_prop->page_size >> 10,
|
||||
mmu_prop->huge_page_size >> 20);
|
||||
"page size of %u is not %uKB aligned, can't unmap\n",
|
||||
page_size, mmu_prop->page_size >> 10);
|
||||
|
||||
return -EFAULT;
|
||||
}
|
||||
|
@ -759,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
|||
hop4_new = false, is_huge;
|
||||
int rc = -ENOMEM;
|
||||
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
|
||||
/*
|
||||
* This mapping function can map a page or a huge page. For huge page
|
||||
* there are only 3 hops rather than 4. Currently the DRAM allocation
|
||||
|
@ -768,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
|||
* one of the two page sizes. Since this is a common code for all the
|
||||
* three cases, we need this hugs page check.
|
||||
*/
|
||||
is_huge = page_size == mmu_prop->huge_page_size;
|
||||
|
||||
if (is_dram_addr && !is_huge) {
|
||||
dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
|
||||
return -EFAULT;
|
||||
if (is_dram_addr) {
|
||||
mmu_prop = &prop->dmmu;
|
||||
is_huge = true;
|
||||
} else if (page_size == prop->pmmu_huge.page_size) {
|
||||
mmu_prop = &prop->pmmu_huge;
|
||||
is_huge = true;
|
||||
} else {
|
||||
mmu_prop = &prop->pmmu;
|
||||
is_huge = false;
|
||||
}
|
||||
|
||||
hop0_addr = get_hop0_addr(ctx);
|
||||
|
@ -942,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
|
|||
if (!hdev->mmu_enable)
|
||||
return 0;
|
||||
|
||||
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
|
||||
prop->va_space_dram_start_address,
|
||||
prop->va_space_dram_end_address);
|
||||
is_dram_addr = is_dram_va(hdev, virt_addr);
|
||||
|
||||
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
|
||||
if (is_dram_addr)
|
||||
mmu_prop = &prop->dmmu;
|
||||
else if ((page_size % prop->pmmu_huge.page_size) == 0)
|
||||
mmu_prop = &prop->pmmu_huge;
|
||||
else
|
||||
mmu_prop = &prop->pmmu;
|
||||
|
||||
/*
|
||||
* The H/W handles mapping of specific page sizes. Hence if the page
|
||||
* size is bigger, we break it to sub-pages and map them separately.
|
||||
*/
|
||||
if ((page_size % mmu_prop->huge_page_size) == 0) {
|
||||
real_page_size = mmu_prop->huge_page_size;
|
||||
} else if ((page_size % mmu_prop->page_size) == 0) {
|
||||
if ((page_size % mmu_prop->page_size) == 0) {
|
||||
real_page_size = mmu_prop->page_size;
|
||||
} else {
|
||||
dev_err(hdev->dev,
|
||||
"page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
|
||||
page_size,
|
||||
mmu_prop->page_size >> 10,
|
||||
mmu_prop->huge_page_size >> 20);
|
||||
"page size of %u is not %uKB aligned, can't unmap\n",
|
||||
page_size, mmu_prop->page_size >> 10);
|
||||
|
||||
return -EFAULT;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue