habanalabs: mmu map wrapper for sizes larger than a page
We introduce a new wrapper which allows us to mmu map any size to any host va_range available. In addition we remove duplicated code from various places in driver and using this new wrapper instead. This wrapper supports mapping only contiguous physical memory blocks and will be used for mappings that are done to the driver ASID. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
5e5867e51d
commit
5c05487f15
|
@ -67,9 +67,9 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
|
|||
bus_addr = cb->bus_address;
|
||||
offset = 0;
|
||||
list_for_each_entry(va_block, &cb->va_block_list, node) {
|
||||
rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size,
|
||||
list_is_last(&va_block->node,
|
||||
&cb->va_block_list));
|
||||
rc = hl_mmu_map_page(ctx, va_block->start, bus_addr,
|
||||
va_block->size, list_is_last(&va_block->node,
|
||||
&cb->va_block_list));
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
|
||||
va_block->start);
|
||||
|
@ -92,7 +92,7 @@ err_va_umap:
|
|||
list_for_each_entry(va_block, &cb->va_block_list, node) {
|
||||
if (offset <= 0)
|
||||
break;
|
||||
hl_mmu_unmap(ctx, va_block->start, va_block->size,
|
||||
hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
|
||||
offset <= va_block->size);
|
||||
offset -= va_block->size;
|
||||
}
|
||||
|
@ -119,7 +119,7 @@ static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
|
|||
mutex_lock(&ctx->mmu_lock);
|
||||
|
||||
list_for_each_entry(va_block, &cb->va_block_list, node)
|
||||
if (hl_mmu_unmap(ctx, va_block->start, va_block->size,
|
||||
if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
|
||||
list_is_last(&va_block->node,
|
||||
&cb->va_block_list)))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
|
|
|
@ -2162,10 +2162,13 @@ int hl_mmu_init(struct hl_device *hdev);
|
|||
void hl_mmu_fini(struct hl_device *hdev);
|
||||
int hl_mmu_ctx_init(struct hl_ctx *ctx);
|
||||
void hl_mmu_ctx_fini(struct hl_ctx *ctx);
|
||||
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||
int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||
u32 page_size, bool flush_pte);
|
||||
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
||||
int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
||||
bool flush_pte);
|
||||
int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
|
||||
u64 phys_addr, u32 size);
|
||||
int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
|
||||
void hl_mmu_swap_out(struct hl_ctx *ctx);
|
||||
void hl_mmu_swap_in(struct hl_ctx *ctx);
|
||||
int hl_mmu_if_set_funcs(struct hl_device *hdev);
|
||||
|
|
|
@ -843,7 +843,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
|
|||
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
|
||||
paddr = phys_pg_pack->pages[i];
|
||||
|
||||
rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
|
||||
rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size,
|
||||
(i + 1) == phys_pg_pack->npages);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
|
@ -862,7 +862,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
|
|||
err:
|
||||
next_vaddr = vaddr;
|
||||
for (i = 0 ; i < mapped_pg_cnt ; i++) {
|
||||
if (hl_mmu_unmap(ctx, next_vaddr, page_size,
|
||||
if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
|
||||
(i + 1) == mapped_pg_cnt))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
|
||||
|
@ -892,7 +892,7 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
|
|||
next_vaddr = vaddr;
|
||||
|
||||
for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
|
||||
if (hl_mmu_unmap(ctx, next_vaddr, page_size,
|
||||
if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
|
||||
(i + 1) == phys_pg_pack->npages))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"unmap failed for vaddr: 0x%llx\n", next_vaddr);
|
||||
|
|
|
@ -122,7 +122,7 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
|
|||
}
|
||||
|
||||
/*
|
||||
* hl_mmu_unmap - unmaps a virtual addr
|
||||
* hl_mmu_unmap_page - unmaps a virtual addr
|
||||
*
|
||||
* @ctx: pointer to the context structure
|
||||
* @virt_addr: virt addr to map from
|
||||
|
@ -142,7 +142,7 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
|
|||
* For optimization reasons PCI flush may be requested once after unmapping of
|
||||
* large area.
|
||||
*/
|
||||
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
||||
int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
||||
bool flush_pte)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
|
@ -200,7 +200,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
|||
}
|
||||
|
||||
/*
|
||||
* hl_mmu_map - maps a virtual addr to physical addr
|
||||
* hl_mmu_map_page - maps a virtual addr to physical addr
|
||||
*
|
||||
* @ctx: pointer to the context structure
|
||||
* @virt_addr: virt addr to map from
|
||||
|
@ -221,8 +221,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
|
|||
* For optimization reasons PCI flush may be requested once after mapping of
|
||||
* large area.
|
||||
*/
|
||||
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
|
||||
bool flush_pte)
|
||||
int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
|
||||
u32 page_size, bool flush_pte)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
|
@ -302,6 +302,108 @@ err:
|
|||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_mmu_map_contiguous - implements a wrapper for hl_mmu_map_page
|
||||
* for mapping contiguous physical memory
|
||||
*
|
||||
* @ctx: pointer to the context structure
|
||||
* @virt_addr: virt addr to map from
|
||||
* @phys_addr: phys addr to map to
|
||||
* @size: size to map
|
||||
*
|
||||
*/
|
||||
int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
|
||||
u64 phys_addr, u32 size)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 curr_va, curr_pa;
|
||||
u32 page_size;
|
||||
bool flush_pte;
|
||||
int rc = 0, off;
|
||||
|
||||
if (hl_mem_area_inside_range(virt_addr, size,
|
||||
prop->dmmu.start_addr, prop->dmmu.end_addr))
|
||||
page_size = prop->dmmu.page_size;
|
||||
else if (hl_mem_area_inside_range(virt_addr, size,
|
||||
prop->pmmu.start_addr, prop->pmmu.end_addr))
|
||||
page_size = prop->pmmu.page_size;
|
||||
else if (hl_mem_area_inside_range(virt_addr, size,
|
||||
prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
|
||||
page_size = prop->pmmu_huge.page_size;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
for (off = 0 ; off < size ; off += page_size) {
|
||||
curr_va = virt_addr + off;
|
||||
curr_pa = phys_addr + off;
|
||||
flush_pte = (off + page_size) >= size;
|
||||
rc = hl_mmu_map_page(ctx, curr_va, curr_pa, page_size,
|
||||
flush_pte);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Map failed for va 0x%llx to pa 0x%llx\n",
|
||||
curr_va, curr_pa);
|
||||
goto unmap;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
|
||||
unmap:
|
||||
for (; off >= 0 ; off -= page_size) {
|
||||
curr_va = virt_addr + off;
|
||||
flush_pte = (off - (s32) page_size) < 0;
|
||||
if (hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap va 0x%llx\n", curr_va);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_mmu_unmap_contiguous - implements a wrapper for hl_mmu_unmap_page
|
||||
* for unmapping contiguous physical memory
|
||||
*
|
||||
* @ctx: pointer to the context structure
|
||||
* @virt_addr: virt addr to unmap
|
||||
* @size: size to unmap
|
||||
*
|
||||
*/
|
||||
int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 curr_va;
|
||||
u32 page_size;
|
||||
bool flush_pte;
|
||||
int rc = 0, off;
|
||||
|
||||
if (hl_mem_area_inside_range(virt_addr, size,
|
||||
prop->dmmu.start_addr, prop->dmmu.end_addr))
|
||||
page_size = prop->dmmu.page_size;
|
||||
else if (hl_mem_area_inside_range(virt_addr, size,
|
||||
prop->pmmu.start_addr, prop->pmmu.end_addr))
|
||||
page_size = prop->pmmu.page_size;
|
||||
else if (hl_mem_area_inside_range(virt_addr, size,
|
||||
prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
|
||||
page_size = prop->pmmu_huge.page_size;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
for (off = 0 ; off < size ; off += page_size) {
|
||||
curr_va = virt_addr + off;
|
||||
flush_pte = (off + page_size) >= size;
|
||||
rc = hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte);
|
||||
if (rc)
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"Unmap failed for va 0x%llx\n", curr_va);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
|
||||
*
|
||||
|
|
|
@ -7755,9 +7755,6 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
|
|||
struct hl_ctx *ctx)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
bool flush_pte;
|
||||
u64 va, pa;
|
||||
s64 off;
|
||||
int min_alloc_order, rc, collective_cb_size;
|
||||
|
||||
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
|
||||
|
@ -7802,48 +7799,23 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
|
|||
goto destroy_internal_cb_pool;
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
|
||||
/* The mapping is done page by page since we can't assure allocated ptr
|
||||
* is aligned to HOST_SPACE_INTERNAL_CB_SZ
|
||||
*/
|
||||
for (off = 0 ; off < HOST_SPACE_INTERNAL_CB_SZ ; off += PAGE_SIZE_4KB) {
|
||||
va = hdev->internal_cb_va_base + off;
|
||||
pa = hdev->internal_cb_pool_dma_addr + off;
|
||||
flush_pte = (off + PAGE_SIZE_4KB) >= HOST_SPACE_INTERNAL_CB_SZ;
|
||||
rc = hl_mmu_map(ctx, va, pa, PAGE_SIZE_4KB, flush_pte);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Map failed for va 0x%llx to pa 0x%llx\n",
|
||||
va, pa);
|
||||
goto unmap;
|
||||
}
|
||||
}
|
||||
rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
|
||||
hdev->internal_cb_pool_dma_addr,
|
||||
HOST_SPACE_INTERNAL_CB_SZ);
|
||||
|
||||
hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
|
||||
if (rc)
|
||||
goto unreserve_internal_cb_pool;
|
||||
|
||||
return 0;
|
||||
|
||||
unmap:
|
||||
for (; off >= 0 ; off -= PAGE_SIZE_4KB) {
|
||||
va = hdev->internal_cb_va_base + off;
|
||||
flush_pte = (off - (s32) PAGE_SIZE_4KB) < 0;
|
||||
if (hl_mmu_unmap(ctx, va, PAGE_SIZE_4KB, flush_pte))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap va 0x%llx\n", va);
|
||||
}
|
||||
|
||||
unreserve_internal_cb_pool:
|
||||
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
|
||||
HOST_SPACE_INTERNAL_CB_SZ);
|
||||
|
||||
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
|
||||
destroy_internal_cb_pool:
|
||||
gen_pool_destroy(hdev->internal_cb_pool);
|
||||
|
||||
free_internal_cb_pool:
|
||||
hdev->asic_funcs->asic_dma_free_coherent(hdev,
|
||||
HOST_SPACE_INTERNAL_CB_SZ,
|
||||
|
@ -7857,30 +7829,16 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
|
|||
struct hl_ctx *ctx)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
bool flush_pte = false;
|
||||
u64 va, off;
|
||||
|
||||
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
|
||||
return;
|
||||
|
||||
mutex_lock(&ctx->mmu_lock);
|
||||
|
||||
for (off = 0 ; off < HOST_SPACE_INTERNAL_CB_SZ ; off += PAGE_SIZE_4KB) {
|
||||
va = hdev->internal_cb_va_base + off;
|
||||
|
||||
if (off + PAGE_SIZE_4KB >= HOST_SPACE_INTERNAL_CB_SZ)
|
||||
flush_pte = true;
|
||||
|
||||
if (hl_mmu_unmap(ctx, va, PAGE_SIZE_4KB, flush_pte))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap va 0x%llx\n", va);
|
||||
}
|
||||
|
||||
hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
|
||||
HOST_SPACE_INTERNAL_CB_SZ);
|
||||
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
|
||||
HOST_SPACE_INTERNAL_CB_SZ);
|
||||
|
||||
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
|
||||
|
||||
mutex_unlock(&ctx->mmu_lock);
|
||||
|
||||
gen_pool_destroy(hdev->internal_cb_pool);
|
||||
|
|
|
@ -4906,9 +4906,10 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
|||
return 0;
|
||||
|
||||
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
|
||||
rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB,
|
||||
(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
|
||||
rc = hl_mmu_map_page(hdev->kernel_ctx,
|
||||
prop->dram_base_address + off,
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB,
|
||||
(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Map failed for address 0x%llx\n",
|
||||
prop->dram_base_address + off);
|
||||
|
@ -4917,8 +4918,10 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
|||
}
|
||||
|
||||
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
|
||||
rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
|
||||
hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
|
||||
rc = hl_mmu_map_page(hdev->kernel_ctx,
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR,
|
||||
hdev->cpu_accessible_dma_address,
|
||||
PAGE_SIZE_2MB, true);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
|
@ -4928,7 +4931,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
|||
}
|
||||
} else {
|
||||
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
|
||||
rc = hl_mmu_map(hdev->kernel_ctx,
|
||||
rc = hl_mmu_map_page(hdev->kernel_ctx,
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||
hdev->cpu_accessible_dma_address + cpu_off,
|
||||
PAGE_SIZE_4KB, true);
|
||||
|
@ -4955,7 +4958,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
|||
|
||||
unmap_cpu:
|
||||
for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
if (hl_mmu_unmap_page(hdev->kernel_ctx,
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||
PAGE_SIZE_4KB, true))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
|
@ -4963,7 +4966,7 @@ unmap_cpu:
|
|||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
|
||||
unmap:
|
||||
for (; off >= 0 ; off -= PAGE_SIZE_2MB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
if (hl_mmu_unmap_page(hdev->kernel_ctx,
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB,
|
||||
true))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
|
@ -4989,13 +4992,14 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
|
|||
WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
|
||||
|
||||
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
|
||||
if (hl_mmu_unmap_page(hdev->kernel_ctx,
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR,
|
||||
PAGE_SIZE_2MB, true))
|
||||
dev_warn(hdev->dev,
|
||||
"Failed to unmap CPU accessible memory\n");
|
||||
} else {
|
||||
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
if (hl_mmu_unmap_page(hdev->kernel_ctx,
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||
PAGE_SIZE_4KB,
|
||||
(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
|
||||
|
@ -5005,7 +5009,7 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
|
|||
}
|
||||
|
||||
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
if (hl_mmu_unmap_page(hdev->kernel_ctx,
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB,
|
||||
(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
|
|
Loading…
Reference in New Issue