OpenCloudOS-Kernel/drivers/accel/habanalabs/common/memory.c

3004 lines
81 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include <uapi/drm/habanalabs_accel.h>
#include "habanalabs.h"
#include "../include/hw_ip/mmu/mmu_general.h"
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
#include <linux/pci-p2pdma.h>
MODULE_IMPORT_NS(DMA_BUF);
#define HL_MMU_DEBUG 0
/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
#define DRAM_POOL_PAGE_SIZE SZ_8M
#define MEM_HANDLE_INVALID ULONG_MAX
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
struct hl_mem_in *args, u64 *handle);
static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u32 *page_size)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 psize;
/*
* for ASIC that supports setting the allocation page size by user we will address
* user's choice only if it is not 0 (as 0 means taking the default page size)
*/
if (prop->supports_user_set_page_size && args->alloc.page_size) {
psize = args->alloc.page_size;
if (!is_power_of_2(psize)) {
dev_err(hdev->dev, "user page size (%#llx) is not power of 2\n", psize);
return -EINVAL;
}
} else {
psize = prop->device_mem_alloc_default_page_size;
}
*page_size = psize;
return 0;
}
/*
* The va ranges in context object contain a list with the available chunks of
* device virtual memory.
* There is one range for host allocations and one for DRAM allocations.
*
* On initialization each range contains one chunk of all of its available
* virtual range which is a half of the total device virtual range.
*
* On each mapping of physical pages, a suitable virtual range chunk (with a
* minimum size) is selected from the list. If the chunk size equals the
* requested size, the chunk is returned. Otherwise, the chunk is split into
* two chunks - one to return as result and a remainder to stay in the list.
*
* On each Unmapping of a virtual address, the relevant virtual chunk is
* returned to the list. The chunk is added to the list and if its edges match
* the edges of the adjacent chunks (means a contiguous chunk can be created),
* the chunks are merged.
*
* On finish, the list is checked to have only one chunk of all the relevant
* virtual range (which is a half of the device total virtual range).
* If not (means not all mappings were unmapped), a warning is printed.
*/
/*
* alloc_device_memory() - allocate device memory.
* @ctx: pointer to the context structure.
* @args: host parameters containing the requested size.
* @ret_handle: result handle.
*
* This function does the following:
* - Allocate the requested size rounded up to 'dram_page_size' pages.
* - Return unique handle for later map/unmap/free.
*/
static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
u32 *ret_handle)
{
struct hl_device *hdev = ctx->hdev;
struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_pack;
u64 paddr = 0, total_size, num_pgs, i;
u32 num_curr_pgs, page_size;
bool contiguous;
int handle, rc;
num_curr_pgs = 0;
rc = set_alloc_page_size(hdev, args, &page_size);
if (rc)
return rc;
num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
total_size = num_pgs * page_size;
if (!total_size) {
dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
return -EINVAL;
}
contiguous = args->flags & HL_MEM_CONTIGUOUS;
if (contiguous) {
if (is_power_of_2(page_size))
paddr = (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool,
total_size, NULL, page_size);
else
paddr = gen_pool_alloc(vm->dram_pg_pool, total_size);
if (!paddr) {
dev_err(hdev->dev,
"Cannot allocate %llu contiguous pages with total size of %llu\n",
num_pgs, total_size);
return -ENOMEM;
}
}
phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
if (!phys_pg_pack) {
rc = -ENOMEM;
goto pages_pack_err;
}
phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
phys_pg_pack->asid = ctx->asid;
phys_pg_pack->npages = num_pgs;
phys_pg_pack->page_size = page_size;
phys_pg_pack->total_size = total_size;
phys_pg_pack->flags = args->flags;
phys_pg_pack->contiguous = contiguous;
phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
rc = -ENOMEM;
goto pages_arr_err;
}
if (phys_pg_pack->contiguous) {
for (i = 0 ; i < num_pgs ; i++)
phys_pg_pack->pages[i] = paddr + i * page_size;
} else {
for (i = 0 ; i < num_pgs ; i++) {
if (is_power_of_2(page_size))
phys_pg_pack->pages[i] =
(uintptr_t)gen_pool_dma_alloc_align(vm->dram_pg_pool,
page_size, NULL,
page_size);
else
phys_pg_pack->pages[i] = gen_pool_alloc(vm->dram_pg_pool,
page_size);
if (!phys_pg_pack->pages[i]) {
dev_err(hdev->dev,
"Cannot allocate device memory (out of memory)\n");
rc = -ENOMEM;
goto page_err;
}
num_curr_pgs++;
}
}
spin_lock(&vm->idr_lock);
handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
GFP_ATOMIC);
spin_unlock(&vm->idr_lock);
if (handle < 0) {
dev_err(hdev->dev, "Failed to get handle for page\n");
rc = -EFAULT;
goto idr_err;
}
for (i = 0 ; i < num_pgs ; i++)
kref_get(&vm->dram_pg_pool_refcount);
phys_pg_pack->handle = handle;
atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
*ret_handle = handle;
return 0;
idr_err:
page_err:
if (!phys_pg_pack->contiguous)
for (i = 0 ; i < num_curr_pgs ; i++)
gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
page_size);
kvfree(phys_pg_pack->pages);
pages_arr_err:
kfree(phys_pg_pack);
pages_pack_err:
if (contiguous)
gen_pool_free(vm->dram_pg_pool, paddr, total_size);
return rc;
}
/**
* dma_map_host_va() - DMA mapping of the given host virtual address.
* @hdev: habanalabs device structure.
* @addr: the host virtual address of the memory area.
* @size: the size of the memory area.
* @p_userptr: pointer to result userptr structure.
*
* This function does the following:
* - Allocate userptr structure.
* - Pin the given host memory using the userptr structure.
* - Perform DMA mapping to have the DMA addresses of the pages.
*/
static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
struct hl_userptr **p_userptr)
{
struct hl_userptr *userptr;
int rc;
userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
if (!userptr) {
rc = -ENOMEM;
goto userptr_err;
}
rc = hl_pin_host_memory(hdev, addr, size, userptr);
if (rc) {
dev_err(hdev->dev, "Failed to pin host memory\n");
goto pin_err;
}
userptr->dma_mapped = true;
userptr->dir = DMA_BIDIRECTIONAL;
userptr->vm_type = VM_TYPE_USERPTR;
*p_userptr = userptr;
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto dma_map_err;
}
return 0;
dma_map_err:
hl_unpin_host_memory(hdev, userptr);
pin_err:
kfree(userptr);
userptr_err:
return rc;
}
/**
* dma_unmap_host_va() - DMA unmapping of the given host virtual address.
* @hdev: habanalabs device structure.
* @userptr: userptr to free.
*
* This function does the following:
* - Unpins the physical pages.
* - Frees the userptr structure.
*/
static void dma_unmap_host_va(struct hl_device *hdev,
struct hl_userptr *userptr)
{
hl_unpin_host_memory(hdev, userptr);
kfree(userptr);
}
/**
* dram_pg_pool_do_release() - free DRAM pages pool
* @ref: pointer to reference object.
*
* This function does the following:
* - Frees the idr structure of physical pages handles.
* - Frees the generic pool of DRAM physical pages.
*/
static void dram_pg_pool_do_release(struct kref *ref)
{
struct hl_vm *vm = container_of(ref, struct hl_vm,
dram_pg_pool_refcount);
/*
* free the idr here as only here we know for sure that there are no
* allocated physical pages and hence there are no handles in use
*/
idr_destroy(&vm->phys_pg_pack_handles);
gen_pool_destroy(vm->dram_pg_pool);
}
/**
* free_phys_pg_pack() - free physical page pack.
* @hdev: habanalabs device structure.
* @phys_pg_pack: physical page pack to free.
*
* This function does the following:
* - For DRAM memory only
* - iterate over the pack, free each physical block structure by
* returning it to the general pool.
* - Free the hl_vm_phys_pg_pack structure.
*/
static void free_phys_pg_pack(struct hl_device *hdev,
struct hl_vm_phys_pg_pack *phys_pg_pack)
{
struct hl_vm *vm = &hdev->vm;
u64 i;
if (phys_pg_pack->created_from_userptr)
goto end;
if (phys_pg_pack->contiguous) {
gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
phys_pg_pack->total_size);
for (i = 0; i < phys_pg_pack->npages ; i++)
kref_put(&vm->dram_pg_pool_refcount,
dram_pg_pool_do_release);
} else {
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
gen_pool_free(vm->dram_pg_pool,
phys_pg_pack->pages[i],
phys_pg_pack->page_size);
kref_put(&vm->dram_pg_pool_refcount,
dram_pg_pool_do_release);
}
}
end:
kvfree(phys_pg_pack->pages);
kfree(phys_pg_pack);
return;
}
/**
* free_device_memory() - free device memory.
* @ctx: pointer to the context structure.
* @args: host parameters containing the requested size.
*
* This function does the following:
* - Free the device memory related to the given handle.
*/
static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
{
struct hl_device *hdev = ctx->hdev;
struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_pack;
u32 handle = args->free.handle;
spin_lock(&vm->idr_lock);
phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
if (!phys_pg_pack) {
spin_unlock(&vm->idr_lock);
dev_err(hdev->dev, "free device memory failed, no match for handle %u\n", handle);
return -EINVAL;
}
if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
spin_unlock(&vm->idr_lock);
dev_err(hdev->dev, "handle %u is mapped, cannot free\n", handle);
return -EINVAL;
}
/* must remove from idr before the freeing of the physical pages as the refcount of the pool
* is also the trigger of the idr destroy
*/
idr_remove(&vm->phys_pg_pack_handles, handle);
spin_unlock(&vm->idr_lock);
atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
free_phys_pg_pack(hdev, phys_pg_pack);
return 0;
}
/**
* clear_va_list_locked() - free virtual addresses list.
* @hdev: habanalabs device structure.
* @va_list: list of virtual addresses to free.
*
* This function does the following:
* - Iterate over the list and free each virtual addresses block.
*
* This function should be called only when va_list lock is taken.
*/
static void clear_va_list_locked(struct hl_device *hdev,
struct list_head *va_list)
{
struct hl_vm_va_block *va_block, *tmp;
list_for_each_entry_safe(va_block, tmp, va_list, node) {
list_del(&va_block->node);
kfree(va_block);
}
}
/**
* print_va_list_locked() - print virtual addresses list.
* @hdev: habanalabs device structure.
* @va_list: list of virtual addresses to print.
*
* This function does the following:
* - Iterate over the list and print each virtual addresses block.
*
* This function should be called only when va_list lock is taken.
*/
static void print_va_list_locked(struct hl_device *hdev,
struct list_head *va_list)
{
#if HL_MMU_DEBUG
struct hl_vm_va_block *va_block;
dev_dbg(hdev->dev, "print va list:\n");
list_for_each_entry(va_block, va_list, node)
dev_dbg(hdev->dev,
"va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
va_block->start, va_block->end, va_block->size);
#endif
}
/**
* merge_va_blocks_locked() - merge a virtual block if possible.
* @hdev: pointer to the habanalabs device structure.
* @va_list: pointer to the virtual addresses block list.
* @va_block: virtual block to merge with adjacent blocks.
*
* This function does the following:
* - Merge the given blocks with the adjacent blocks if their virtual ranges
* create a contiguous virtual range.
*
* This Function should be called only when va_list lock is taken.
*/
static void merge_va_blocks_locked(struct hl_device *hdev,
struct list_head *va_list, struct hl_vm_va_block *va_block)
{
struct hl_vm_va_block *prev, *next;
prev = list_prev_entry(va_block, node);
if (&prev->node != va_list && prev->end + 1 == va_block->start) {
prev->end = va_block->end;
prev->size = prev->end - prev->start + 1;
list_del(&va_block->node);
kfree(va_block);
va_block = prev;
}
next = list_next_entry(va_block, node);
if (&next->node != va_list && va_block->end + 1 == next->start) {
next->start = va_block->start;
next->size = next->end - next->start + 1;
list_del(&va_block->node);
kfree(va_block);
}
}
/**
* add_va_block_locked() - add a virtual block to the virtual addresses list.
* @hdev: pointer to the habanalabs device structure.
* @va_list: pointer to the virtual addresses block list.
* @start: start virtual address.
* @end: end virtual address.
*
* This function does the following:
* - Add the given block to the virtual blocks list and merge with other blocks
* if a contiguous virtual block can be created.
*
* This Function should be called only when va_list lock is taken.
*/
static int add_va_block_locked(struct hl_device *hdev,
struct list_head *va_list, u64 start, u64 end)
{
struct hl_vm_va_block *va_block, *res = NULL;
u64 size = end - start + 1;
print_va_list_locked(hdev, va_list);
list_for_each_entry(va_block, va_list, node) {
/* TODO: remove upon matureness */
if (hl_mem_area_crosses_range(start, size, va_block->start,
va_block->end)) {
dev_err(hdev->dev,
"block crossing ranges at start 0x%llx, end 0x%llx\n",
va_block->start, va_block->end);
return -EINVAL;
}
if (va_block->end < start)
res = va_block;
}
va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
if (!va_block)
return -ENOMEM;
va_block->start = start;
va_block->end = end;
va_block->size = size;
if (!res)
list_add(&va_block->node, va_list);
else
list_add(&va_block->node, &res->node);
merge_va_blocks_locked(hdev, va_list, va_block);
print_va_list_locked(hdev, va_list);
return 0;
}
/**
* add_va_block() - wrapper for add_va_block_locked.
* @hdev: pointer to the habanalabs device structure.
* @va_range: pointer to the virtual addresses range object.
* @start: start virtual address.
* @end: end virtual address.
*
* This function does the following:
* - Takes the list lock and calls add_va_block_locked.
*/
static inline int add_va_block(struct hl_device *hdev,
struct hl_va_range *va_range, u64 start, u64 end)
{
int rc;
mutex_lock(&va_range->lock);
rc = add_va_block_locked(hdev, &va_range->list, start, end);
mutex_unlock(&va_range->lock);
return rc;
}
/**
* is_hint_crossing_range() - check if hint address crossing specified reserved.
* @range_type: virtual space range type.
* @start_addr: start virtual address.
* @size: block size.
* @prop: asic properties structure to retrieve reserved ranges from.
*/
static inline bool is_hint_crossing_range(enum hl_va_range_type range_type,
u64 start_addr, u32 size, struct asic_fixed_properties *prop) {
bool range_cross;
if (range_type == HL_VA_RANGE_TYPE_DRAM)
range_cross =
hl_mem_area_crosses_range(start_addr, size,
prop->hints_dram_reserved_va_range.start_addr,
prop->hints_dram_reserved_va_range.end_addr);
else if (range_type == HL_VA_RANGE_TYPE_HOST)
range_cross =
hl_mem_area_crosses_range(start_addr, size,
prop->hints_host_reserved_va_range.start_addr,
prop->hints_host_reserved_va_range.end_addr);
else
range_cross =
hl_mem_area_crosses_range(start_addr, size,
prop->hints_host_hpage_reserved_va_range.start_addr,
prop->hints_host_hpage_reserved_va_range.end_addr);
return range_cross;
}
/**
* get_va_block() - get a virtual block for the given size and alignment.
*
* @hdev: pointer to the habanalabs device structure.
* @va_range: pointer to the virtual addresses range.
* @size: requested block size.
* @hint_addr: hint for requested address by the user.
* @va_block_align: required alignment of the virtual block start address.
* @range_type: va range type (host, dram)
* @flags: additional memory flags, currently only uses HL_MEM_FORCE_HINT
*
* This function does the following:
* - Iterate on the virtual block list to find a suitable virtual block for the
* given size, hint address and alignment.
* - Reserve the requested block and update the list.
* - Return the start address of the virtual block.
*/
static u64 get_va_block(struct hl_device *hdev,
struct hl_va_range *va_range,
u64 size, u64 hint_addr, u32 va_block_align,
enum hl_va_range_type range_type,
u32 flags)
{
struct hl_vm_va_block *va_block, *new_va_block = NULL;
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
align_mask, reserved_valid_start = 0, reserved_valid_size = 0,
dram_hint_mask = prop->dram_hints_align_mask;
bool add_prev = false;
bool is_align_pow_2 = is_power_of_2(va_range->page_size);
bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr);
bool force_hint = flags & HL_MEM_FORCE_HINT;
if (is_align_pow_2)
align_mask = ~((u64)va_block_align - 1);
else
/*
* with non-power-of-2 range we work only with page granularity
* and the start address is page aligned,
* so no need for alignment checking.
*/
size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
va_range->page_size;
tmp_hint_addr = hint_addr & ~dram_hint_mask;
/* Check if we need to ignore hint address */
if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
(!is_align_pow_2 && is_hint_dram_addr &&
do_div(tmp_hint_addr, va_range->page_size))) {
if (force_hint) {
/* Hint must be respected, so here we just fail */
dev_err(hdev->dev,
"Hint address 0x%llx is not page aligned - cannot be respected\n",
hint_addr);
return 0;
}
dev_dbg(hdev->dev,
"Hint address 0x%llx will be ignored because it is not aligned\n",
hint_addr);
hint_addr = 0;
}
mutex_lock(&va_range->lock);
print_va_list_locked(hdev, &va_range->list);
list_for_each_entry(va_block, &va_range->list, node) {
/* Calc the first possible aligned addr */
valid_start = va_block->start;
if (is_align_pow_2 && (valid_start & (va_block_align - 1))) {
valid_start &= align_mask;
valid_start += va_block_align;
if (valid_start > va_block->end)
continue;
}
valid_size = va_block->end - valid_start + 1;
if (valid_size < size)
continue;
/*
* In case hint address is 0, and hints_range_reservation
* property enabled, then avoid allocating va blocks from the
* range reserved for hint addresses
*/
if (prop->hints_range_reservation && !hint_addr)
if (is_hint_crossing_range(range_type, valid_start,
size, prop))
continue;
/* Pick the minimal length block which has the required size */
if (!new_va_block || (valid_size < reserved_valid_size)) {
new_va_block = va_block;
reserved_valid_start = valid_start;
reserved_valid_size = valid_size;
}
if (hint_addr && hint_addr >= valid_start &&
(hint_addr + size) <= va_block->end) {
new_va_block = va_block;
reserved_valid_start = hint_addr;
reserved_valid_size = valid_size;
break;
}
}
if (!new_va_block) {
dev_err(hdev->dev, "no available va block for size %llu\n",
size);
goto out;
}
if (force_hint && reserved_valid_start != hint_addr) {
/* Hint address must be respected. If we are here - this means
* we could not respect it.
*/
dev_err(hdev->dev,
"Hint address 0x%llx could not be respected\n",
hint_addr);
reserved_valid_start = 0;
goto out;
}
/*
* Check if there is some leftover range due to reserving the new
* va block, then return it to the main virtual addresses list.
*/
if (reserved_valid_start > new_va_block->start) {
prev_start = new_va_block->start;
prev_end = reserved_valid_start - 1;
new_va_block->start = reserved_valid_start;
new_va_block->size = reserved_valid_size;
add_prev = true;
}
if (new_va_block->size > size) {
new_va_block->start += size;
new_va_block->size = new_va_block->end - new_va_block->start + 1;
} else {
list_del(&new_va_block->node);
kfree(new_va_block);
}
if (add_prev)
add_va_block_locked(hdev, &va_range->list, prev_start,
prev_end);
print_va_list_locked(hdev, &va_range->list);
out:
mutex_unlock(&va_range->lock);
return reserved_valid_start;
}
/*
* hl_reserve_va_block() - reserve a virtual block of a given size.
* @hdev: pointer to the habanalabs device structure.
* @ctx: current context
* @type: virtual addresses range type.
* @size: requested block size.
* @alignment: required alignment in bytes of the virtual block start address,
* 0 means no alignment.
*
* This function does the following:
* - Iterate on the virtual block list to find a suitable virtual block for the
* given size and alignment.
* - Reserve the requested block and update the list.
* - Return the start address of the virtual block.
*/
u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
enum hl_va_range_type type, u64 size, u32 alignment)
{
return get_va_block(hdev, ctx->va_range[type], size, 0,
max(alignment, ctx->va_range[type]->page_size),
type, 0);
}
/**
* hl_get_va_range_type() - get va_range type for the given address and size.
* @ctx: context to fetch va_range from.
* @address: the start address of the area we want to validate.
* @size: the size in bytes of the area we want to validate.
* @type: returned va_range type.
*
* Return: true if the area is inside a valid range, false otherwise.
*/
static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size,
enum hl_va_range_type *type)
{
int i;
for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX; i++) {
if (hl_mem_area_inside_range(address, size,
ctx->va_range[i]->start_addr,
ctx->va_range[i]->end_addr)) {
*type = i;
return 0;
}
}
return -EINVAL;
}
/**
* hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block.
* @hdev: pointer to the habanalabs device structure
* @ctx: pointer to the context structure.
* @start_addr: start virtual address.
* @size: number of bytes to unreserve.
*
* This function does the following:
* - Takes the list lock and calls add_va_block_locked.
*/
int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
u64 start_addr, u64 size)
{
enum hl_va_range_type type;
int rc;
rc = hl_get_va_range_type(ctx, start_addr, size, &type);
if (rc) {
dev_err(hdev->dev,
"cannot find va_range for va %#llx size %llu",
start_addr, size);
return rc;
}
rc = add_va_block(hdev, ctx->va_range[type], start_addr,
start_addr + size - 1);
if (rc)
dev_warn(hdev->dev,
"add va block failed for vaddr: 0x%llx\n", start_addr);
return rc;
}
/**
* init_phys_pg_pack_from_userptr() - initialize physical page pack from host
* memory
* @ctx: pointer to the context structure.
* @userptr: userptr to initialize from.
* @pphys_pg_pack: result pointer.
* @force_regular_page: tell the function to ignore huge page optimization,
* even if possible. Needed for cases where the device VA
* is allocated before we know the composition of the
* physical pages
*
* This function does the following:
* - Pin the physical pages related to the given virtual block.
* - Create a physical page pack from the physical pages related to the given
* virtual block.
*/
static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
struct hl_userptr *userptr,
struct hl_vm_phys_pg_pack **pphys_pg_pack,
bool force_regular_page)
{
u32 npages, page_size = PAGE_SIZE,
huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
struct hl_vm_phys_pg_pack *phys_pg_pack;
bool first = true, is_huge_page_opt;
u64 page_mask, total_npages;
struct scatterlist *sg;
dma_addr_t dma_addr;
int rc, i, j;
phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
if (!phys_pg_pack)
return -ENOMEM;
phys_pg_pack->vm_type = userptr->vm_type;
phys_pg_pack->created_from_userptr = true;
phys_pg_pack->asid = ctx->asid;
atomic_set(&phys_pg_pack->mapping_cnt, 1);
is_huge_page_opt = (force_regular_page ? false : true);
/* Only if all dma_addrs are aligned to 2MB and their
* sizes is at least 2MB, we can use huge page mapping.
* We limit the 2MB optimization to this condition,
* since later on we acquire the related VA range as one
* consecutive block.
*/
total_npages = 0;
for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
npages = hl_get_sg_info(sg, &dma_addr);
total_npages += npages;
if ((npages % pgs_in_huge_page) ||
(dma_addr & (huge_page_size - 1)))
is_huge_page_opt = false;
}
if (is_huge_page_opt) {
page_size = huge_page_size;
do_div(total_npages, pgs_in_huge_page);
}
page_mask = ~(((u64) page_size) - 1);
phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
GFP_KERNEL);
if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
rc = -ENOMEM;
goto page_pack_arr_mem_err;
}
phys_pg_pack->npages = total_npages;
phys_pg_pack->page_size = page_size;
phys_pg_pack->total_size = total_npages * page_size;
j = 0;
for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
npages = hl_get_sg_info(sg, &dma_addr);
/* align down to physical page size and save the offset */
if (first) {
first = false;
phys_pg_pack->offset = dma_addr & (page_size - 1);
dma_addr &= page_mask;
}
while (npages) {
phys_pg_pack->pages[j++] = dma_addr;
dma_addr += page_size;
if (is_huge_page_opt)
npages -= pgs_in_huge_page;
else
npages--;
}
}
*pphys_pg_pack = phys_pg_pack;
return 0;
page_pack_arr_mem_err:
kfree(phys_pg_pack);
return rc;
}
/**
* map_phys_pg_pack() - maps the physical page pack..
* @ctx: pointer to the context structure.
* @vaddr: start address of the virtual area to map from.
* @phys_pg_pack: the pack of physical pages to map to.
*
* This function does the following:
* - Maps each chunk of virtual memory to matching physical chunk.
* - Stores number of successful mappings in the given argument.
* - Returns 0 on success, error code otherwise.
*/
static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
struct hl_vm_phys_pg_pack *phys_pg_pack)
{
struct hl_device *hdev = ctx->hdev;
u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
u32 page_size = phys_pg_pack->page_size;
int rc = 0;
bool is_host_addr;
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
paddr = phys_pg_pack->pages[i];
rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size,
(i + 1) == phys_pg_pack->npages);
if (rc) {
dev_err(hdev->dev,
"map failed for handle %u, npages: %llu, mapped: %llu",
phys_pg_pack->handle, phys_pg_pack->npages,
mapped_pg_cnt);
goto err;
}
mapped_pg_cnt++;
next_vaddr += page_size;
}
return 0;
err:
is_host_addr = !hl_is_dram_va(hdev, vaddr);
next_vaddr = vaddr;
for (i = 0 ; i < mapped_pg_cnt ; i++) {
if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
(i + 1) == mapped_pg_cnt))
dev_warn_ratelimited(hdev->dev,
"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
phys_pg_pack->handle, next_vaddr,
phys_pg_pack->pages[i], page_size);
next_vaddr += page_size;
/*
* unmapping on Palladium can be really long, so avoid a CPU
* soft lockup bug by sleeping a little between unmapping pages
*
* In addition, on host num of pages could be huge,
* because page size could be 4KB, so when unmapping host
* pages sleep every 32K pages to avoid soft lockup
*/
if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
usleep_range(50, 200);
}
return rc;
}
/**
* unmap_phys_pg_pack() - unmaps the physical page pack.
* @ctx: pointer to the context structure.
* @vaddr: start address of the virtual area to unmap.
* @phys_pg_pack: the pack of physical pages to unmap.
*/
static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
struct hl_vm_phys_pg_pack *phys_pg_pack)
{
struct hl_device *hdev = ctx->hdev;
u64 next_vaddr, i;
bool is_host_addr;
u32 page_size;
is_host_addr = !hl_is_dram_va(hdev, vaddr);
page_size = phys_pg_pack->page_size;
next_vaddr = vaddr;
for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
(i + 1) == phys_pg_pack->npages))
dev_warn_ratelimited(hdev->dev,
"unmap failed for vaddr: 0x%llx\n", next_vaddr);
/*
* unmapping on Palladium can be really long, so avoid a CPU
* soft lockup bug by sleeping a little between unmapping pages
*
* In addition, on host num of pages could be huge,
* because page size could be 4KB, so when unmapping host
* pages sleep every 32K pages to avoid soft lockup
*/
if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
usleep_range(50, 200);
}
}
static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
u64 *paddr)
{
struct hl_device *hdev = ctx->hdev;
struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_pack;
u32 handle;
handle = lower_32_bits(args->map_device.handle);
spin_lock(&vm->idr_lock);
phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
if (!phys_pg_pack) {
spin_unlock(&vm->idr_lock);
dev_err(hdev->dev, "no match for handle %u\n", handle);
return -EINVAL;
}
*paddr = phys_pg_pack->pages[0];
spin_unlock(&vm->idr_lock);
return 0;
}
/**
* map_device_va() - map the given memory.
* @ctx: pointer to the context structure.
* @args: host parameters with handle/host virtual address.
* @device_addr: pointer to result device virtual address.
*
* This function does the following:
* - If given a physical device memory handle, map to a device virtual block
* and return the start address of this block.
* - If given a host virtual address and size, find the related physical pages,
* map a device virtual block to this pages and return the start address of
* this block.
*/
static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device_addr)
{
struct hl_vm_phys_pg_pack *phys_pg_pack;
enum hl_va_range_type va_range_type = 0;
struct hl_device *hdev = ctx->hdev;
struct hl_userptr *userptr = NULL;
u32 handle = 0, va_block_align;
struct hl_vm_hash_node *hnode;
struct hl_vm *vm = &hdev->vm;
struct hl_va_range *va_range;
bool is_userptr, do_prefetch;
u64 ret_vaddr, hint_addr;
enum vm_type *vm_type;
int rc;
/* set map flags */
is_userptr = args->flags & HL_MEM_USERPTR;
do_prefetch = hdev->supports_mmu_prefetch && (args->flags & HL_MEM_PREFETCH);
/* Assume failure */
*device_addr = 0;
if (is_userptr) {
u64 addr = args->map_host.host_virt_addr,
size = args->map_host.mem_size;
u32 page_size = hdev->asic_prop.pmmu.page_size,
huge_page_size = hdev->asic_prop.pmmu_huge.page_size;
rc = dma_map_host_va(hdev, addr, size, &userptr);
if (rc) {
dev_err(hdev->dev, "failed to get userptr from va\n");
return rc;
}
rc = init_phys_pg_pack_from_userptr(ctx, userptr,
&phys_pg_pack, false);
if (rc) {
dev_err(hdev->dev,
"unable to init page pack for vaddr 0x%llx\n",
addr);
goto init_page_pack_err;
}
vm_type = (enum vm_type *) userptr;
hint_addr = args->map_host.hint_addr;
handle = phys_pg_pack->handle;
/* get required alignment */
if (phys_pg_pack->page_size == page_size) {
va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
va_range_type = HL_VA_RANGE_TYPE_HOST;
/*
* huge page alignment may be needed in case of regular
* page mapping, depending on the host VA alignment
*/
if (addr & (huge_page_size - 1))
va_block_align = page_size;
else
va_block_align = huge_page_size;
} else {
/*
* huge page alignment is needed in case of huge page
* mapping
*/
va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
va_range_type = HL_VA_RANGE_TYPE_HOST_HUGE;
va_block_align = huge_page_size;
}
} else {
handle = lower_32_bits(args->map_device.handle);
spin_lock(&vm->idr_lock);
phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
if (!phys_pg_pack) {
spin_unlock(&vm->idr_lock);
dev_err(hdev->dev,
"no match for handle %u\n", handle);
return -EINVAL;
}
/* increment now to avoid freeing device memory while mapping */
atomic_inc(&phys_pg_pack->mapping_cnt);
spin_unlock(&vm->idr_lock);
vm_type = (enum vm_type *) phys_pg_pack;
hint_addr = args->map_device.hint_addr;
/* DRAM VA alignment is the same as the MMU page size */
va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
va_range_type = HL_VA_RANGE_TYPE_DRAM;
va_block_align = hdev->asic_prop.dmmu.page_size;
}
/*
* relevant for mapping device physical memory only, as host memory is
* implicitly shared
*/
if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
phys_pg_pack->asid != ctx->asid) {
dev_err(hdev->dev,
"Failed to map memory, handle %u is not shared\n",
handle);
rc = -EPERM;
goto shared_err;
}
hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
if (!hnode) {
rc = -ENOMEM;
goto hnode_err;
}
if (hint_addr && phys_pg_pack->offset) {
if (args->flags & HL_MEM_FORCE_HINT) {
/* Fail if hint must be respected but it can't be */
dev_err(hdev->dev,
"Hint address 0x%llx cannot be respected because source memory is not aligned 0x%x\n",
hint_addr, phys_pg_pack->offset);
rc = -EINVAL;
goto va_block_err;
}
dev_dbg(hdev->dev,
"Hint address 0x%llx will be ignored because source memory is not aligned 0x%x\n",
hint_addr, phys_pg_pack->offset);
}
ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
hint_addr, va_block_align,
va_range_type, args->flags);
if (!ret_vaddr) {
dev_err(hdev->dev, "no available va block for handle %u\n",
handle);
rc = -ENOMEM;
goto va_block_err;
}
mutex_lock(&hdev->mmu_lock);
rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
if (rc) {
dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
mutex_unlock(&hdev->mmu_lock);
goto map_err;
}
rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV,
ctx->asid, ret_vaddr, phys_pg_pack->total_size);
mutex_unlock(&hdev->mmu_lock);
if (rc)
goto map_err;
/*
* prefetch is done upon user's request. it is performed in WQ as and so can
* be outside the MMU lock. the operation itself is already protected by the mmu lock
*/
if (do_prefetch) {
rc = hl_mmu_prefetch_cache_range(ctx, *vm_type, ctx->asid, ret_vaddr,
phys_pg_pack->total_size);
if (rc)
goto map_err;
}
ret_vaddr += phys_pg_pack->offset;
hnode->ptr = vm_type;
hnode->vaddr = ret_vaddr;
hnode->handle = is_userptr ? MEM_HANDLE_INVALID : handle;
mutex_lock(&ctx->mem_hash_lock);
hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
mutex_unlock(&ctx->mem_hash_lock);
*device_addr = ret_vaddr;
if (is_userptr)
free_phys_pg_pack(hdev, phys_pg_pack);
return rc;
map_err:
if (add_va_block(hdev, va_range, ret_vaddr,
ret_vaddr + phys_pg_pack->total_size - 1))
dev_warn(hdev->dev,
"release va block failed for handle 0x%x, vaddr: 0x%llx\n",
handle, ret_vaddr);
va_block_err:
kfree(hnode);
hnode_err:
shared_err:
atomic_dec(&phys_pg_pack->mapping_cnt);
if (is_userptr)
free_phys_pg_pack(hdev, phys_pg_pack);
init_page_pack_err:
if (is_userptr)
dma_unmap_host_va(hdev, userptr);
return rc;
}
/**
* unmap_device_va() - unmap the given device virtual address.
* @ctx: pointer to the context structure.
* @args: host parameters with device virtual address to unmap.
* @ctx_free: true if in context free flow, false otherwise.
*
* This function does the following:
* - unmap the physical pages related to the given virtual address.
* - return the device virtual block to the virtual block list.
*/
static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
bool ctx_free)
{
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
u64 vaddr = args->unmap.device_virt_addr;
struct hl_vm_hash_node *hnode = NULL;
struct asic_fixed_properties *prop;
struct hl_device *hdev = ctx->hdev;
struct hl_userptr *userptr = NULL;
struct hl_va_range *va_range;
enum vm_type *vm_type;
bool is_userptr;
int rc = 0;
prop = &hdev->asic_prop;
/* protect from double entrance */
mutex_lock(&ctx->mem_hash_lock);
hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
if (vaddr == hnode->vaddr)
break;
if (!hnode) {
mutex_unlock(&ctx->mem_hash_lock);
dev_err(hdev->dev,
"unmap failed, no mem hnode for vaddr 0x%llx\n",
vaddr);
return -EINVAL;
}
if (hnode->export_cnt) {
mutex_unlock(&ctx->mem_hash_lock);
dev_err(hdev->dev, "failed to unmap %#llx, memory is exported\n", vaddr);
return -EINVAL;
}
hash_del(&hnode->node);
mutex_unlock(&ctx->mem_hash_lock);
vm_type = hnode->ptr;
if (*vm_type == VM_TYPE_USERPTR) {
is_userptr = true;
userptr = hnode->ptr;
rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack,
false);
if (rc) {
dev_err(hdev->dev,
"unable to init page pack for vaddr 0x%llx\n",
vaddr);
goto vm_type_err;
}
if (phys_pg_pack->page_size ==
hdev->asic_prop.pmmu.page_size)
va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
else
va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
} else if (*vm_type == VM_TYPE_PHYS_PACK) {
is_userptr = false;
va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
phys_pg_pack = hnode->ptr;
} else {
dev_warn(hdev->dev,
"unmap failed, unknown vm desc for vaddr 0x%llx\n",
vaddr);
rc = -EFAULT;
goto vm_type_err;
}
if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
rc = -EINVAL;
goto mapping_cnt_err;
}
if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size))
vaddr = prop->dram_base_address +
DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address,
phys_pg_pack->page_size) *
phys_pg_pack->page_size;
else
vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
mutex_lock(&hdev->mmu_lock);
unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
/*
* During context free this function is called in a loop to clean all
* the context mappings. Hence the cache invalidation can be called once
* at the loop end rather than for each iteration
*/
if (!ctx_free)
rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr,
phys_pg_pack->total_size);
mutex_unlock(&hdev->mmu_lock);
/*
* If the context is closing we don't need to check for the MMU cache
* invalidation return code and update the VA free list as in this flow
* we invalidate the MMU cache outside of this unmap function and the VA
* free list will be freed anyway.
*/
if (!ctx_free) {
int tmp_rc;
tmp_rc = add_va_block(hdev, va_range, vaddr,
vaddr + phys_pg_pack->total_size - 1);
if (tmp_rc) {
dev_warn(hdev->dev,
"add va block failed for vaddr: 0x%llx\n",
vaddr);
if (!rc)
rc = tmp_rc;
}
}
atomic_dec(&phys_pg_pack->mapping_cnt);
kfree(hnode);
if (is_userptr) {
free_phys_pg_pack(hdev, phys_pg_pack);
dma_unmap_host_va(hdev, userptr);
}
return rc;
mapping_cnt_err:
if (is_userptr)
free_phys_pg_pack(hdev, phys_pg_pack);
vm_type_err:
mutex_lock(&ctx->mem_hash_lock);
hash_add(ctx->mem_hash, &hnode->node, vaddr);
mutex_unlock(&ctx->mem_hash_lock);
return rc;
}
static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size)
{
u32 block_id;
int rc;
*handle = 0;
if (size)
*size = 0;
rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
if (rc)
return rc;
*handle = block_id | HL_MMAP_TYPE_BLOCK;
*handle <<= PAGE_SHIFT;
return 0;
}
static void hw_block_vm_close(struct vm_area_struct *vma)
{
struct hl_vm_hw_block_list_node *lnode =
(struct hl_vm_hw_block_list_node *) vma->vm_private_data;
struct hl_ctx *ctx = lnode->ctx;
long new_mmap_size;
new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start);
if (new_mmap_size > 0) {
lnode->mapped_size = new_mmap_size;
return;
}
mutex_lock(&ctx->hw_block_list_lock);
list_del(&lnode->node);
mutex_unlock(&ctx->hw_block_list_lock);
hl_ctx_put(ctx);
kfree(lnode);
vma->vm_private_data = NULL;
}
static const struct vm_operations_struct hw_block_vm_ops = {
.close = hw_block_vm_close
};
/**
* hl_hw_block_mmap() - mmap a hw block to user.
* @hpriv: pointer to the private data of the fd
* @vma: pointer to vm_area_struct of the process
*
* Driver increments context reference for every HW block mapped in order
* to prevent user from closing FD without unmapping first
*/
int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
{
struct hl_vm_hw_block_list_node *lnode;
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
u32 block_id, block_size;
int rc;
/* We use the page offset to hold the block id and thus we need to clear
* it before doing the mmap itself
*/
block_id = vma->vm_pgoff;
vma->vm_pgoff = 0;
/* Driver only allows mapping of a complete HW block */
block_size = vma->vm_end - vma->vm_start;
if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) {
dev_err(hdev->dev,
"user pointer is invalid - 0x%lx\n",
vma->vm_start);
return -EINVAL;
}
lnode = kzalloc(sizeof(*lnode), GFP_KERNEL);
if (!lnode)
return -ENOMEM;
rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
if (rc) {
kfree(lnode);
return rc;
}
hl_ctx_get(ctx);
lnode->ctx = ctx;
lnode->vaddr = vma->vm_start;
lnode->block_size = block_size;
lnode->mapped_size = lnode->block_size;
lnode->id = block_id;
vma->vm_private_data = lnode;
vma->vm_ops = &hw_block_vm_ops;
mutex_lock(&ctx->hw_block_list_lock);
list_add_tail(&lnode->node, &ctx->hw_block_mem_list);
mutex_unlock(&ctx->hw_block_list_lock);
vma->vm_pgoff = block_id;
return 0;
}
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
struct device *dev, enum dma_data_direction dir)
{
dma_addr_t addr;
int rc;
addr = dma_map_resource(dev, bar_address, chunk_size, dir,
DMA_ATTR_SKIP_CPU_SYNC);
rc = dma_mapping_error(dev, addr);
if (rc)
return rc;
sg_set_page(sg, NULL, chunk_size, 0);
sg_dma_address(sg) = addr;
sg_dma_len(sg) = chunk_size;
return 0;
}
static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
u64 page_size, u64 exported_size,
struct device *dev, enum dma_data_direction dir)
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
{
u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
struct asic_fixed_properties *prop;
int rc, i, j, nents, cur_page;
struct scatterlist *sg;
struct sg_table *sgt;
prop = &hdev->asic_prop;
dma_max_seg_size = dma_get_max_seg_size(dev);
/* We would like to align the max segment size to PAGE_SIZE, so the
* SGL will contain aligned addresses that can be easily mapped to
* an MMU
*/
dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
if (dma_max_seg_size < PAGE_SIZE) {
dev_err_ratelimited(hdev->dev,
"dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
dma_max_seg_size);
return ERR_PTR(-EINVAL);
}
sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
if (!sgt)
return ERR_PTR(-ENOMEM);
/* remove export size restrictions in case not explicitly defined */
cur_size_to_export = exported_size ? exported_size : (npages * page_size);
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
/* If the size of each page is larger than the dma max segment size,
* then we can't combine pages and the number of entries in the SGL
* will just be the
* <number of pages> * <chunks of max segment size in each page>
*/
if (page_size > dma_max_seg_size) {
/* we should limit number of pages according to the exported size */
cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size);
nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size);
} else {
cur_npages = npages;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
/* Get number of non-contiguous chunks */
for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) {
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
if (pages[i - 1] + page_size != pages[i] ||
chunk_size + page_size > dma_max_seg_size) {
nents++;
chunk_size = page_size;
continue;
}
chunk_size += page_size;
}
}
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
if (rc)
goto error_free;
cur_page = 0;
if (page_size > dma_max_seg_size) {
u64 size_left, cur_device_address = 0;
size_left = page_size;
/* Need to split each page into the number of chunks of
* dma_max_seg_size
*/
for_each_sgtable_dma_sg(sgt, sg, i) {
if (size_left == page_size)
cur_device_address =
pages[cur_page] - prop->dram_base_address;
else
cur_device_address += dma_max_seg_size;
/* make sure not to export over exported size */
chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export);
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
bar_address = hdev->dram_pci_bar_start + cur_device_address;
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
if (rc)
goto error_unmap;
cur_size_to_export -= chunk_size;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
if (size_left > dma_max_seg_size) {
size_left -= dma_max_seg_size;
} else {
cur_page++;
size_left = page_size;
}
}
} else {
/* Merge pages and put them into the scatterlist */
for_each_sgtable_dma_sg(sgt, sg, i) {
chunk_size = page_size;
for (j = cur_page + 1 ; j < cur_npages ; j++) {
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
if (pages[j - 1] + page_size != pages[j] ||
chunk_size + page_size > dma_max_seg_size)
break;
chunk_size += page_size;
}
bar_address = hdev->dram_pci_bar_start +
(pages[cur_page] - prop->dram_base_address);
/* make sure not to export over exported size */
chunk_size = min(chunk_size, cur_size_to_export);
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
if (rc)
goto error_unmap;
cur_size_to_export -= chunk_size;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
cur_page = j;
}
}
/* Because we are not going to include a CPU list we want to have some
* chance that other users will detect this by setting the orig_nents
* to 0 and using only nents (length of DMA list) when going over the
* sgl
*/
sgt->orig_nents = 0;
return sgt;
error_unmap:
for_each_sgtable_dma_sg(sgt, sg, i) {
if (!sg_dma_len(sg))
continue;
dma_unmap_resource(dev, sg_dma_address(sg),
sg_dma_len(sg), dir,
DMA_ATTR_SKIP_CPU_SYNC);
}
sg_free_table(sgt);
error_free:
kfree(sgt);
return ERR_PTR(rc);
}
static int hl_dmabuf_attach(struct dma_buf *dmabuf,
struct dma_buf_attachment *attachment)
{
struct hl_dmabuf_priv *hl_dmabuf;
struct hl_device *hdev;
int rc;
hl_dmabuf = dmabuf->priv;
hdev = hl_dmabuf->ctx->hdev;
rc = pci_p2pdma_distance(hdev->pdev, attachment->dev, true);
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
if (rc < 0)
attachment->peer2peer = false;
return 0;
}
static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
enum dma_data_direction dir)
{
struct dma_buf *dma_buf = attachment->dmabuf;
struct hl_vm_phys_pg_pack *phys_pg_pack;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
struct hl_dmabuf_priv *hl_dmabuf;
struct hl_device *hdev;
struct sg_table *sgt;
hl_dmabuf = dma_buf->priv;
hdev = hl_dmabuf->ctx->hdev;
phys_pg_pack = hl_dmabuf->phys_pg_pack;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
if (!attachment->peer2peer) {
dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
return ERR_PTR(-EPERM);
}
if (phys_pg_pack)
sgt = alloc_sgt_from_device_pages(hdev,
phys_pg_pack->pages,
phys_pg_pack->npages,
phys_pg_pack->page_size,
phys_pg_pack->exported_size,
attachment->dev,
dir);
else
sgt = alloc_sgt_from_device_pages(hdev,
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
&hl_dmabuf->device_address,
1,
hl_dmabuf->dmabuf->size,
0,
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
attachment->dev,
dir);
if (IS_ERR(sgt))
dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
return sgt;
}
static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment,
struct sg_table *sgt,
enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
/* The memory behind the dma-buf has *always* resided on the device itself, i.e. it lives
* only in the 'device' domain (after all, it maps a PCI bar address which points to the
* device memory).
*
* Therefore, it was never in the 'CPU' domain and hence, there is no need to perform
* a sync of the memory to the CPU's cache, as it never resided inside that cache.
*/
for_each_sgtable_dma_sg(sgt, sg, i)
dma_unmap_resource(attachment->dev, sg_dma_address(sg),
sg_dma_len(sg), dir,
DMA_ATTR_SKIP_CPU_SYNC);
/* Need to restore orig_nents because sg_free_table use that field */
sgt->orig_nents = sgt->nents;
sg_free_table(sgt);
kfree(sgt);
}
static void hl_release_dmabuf(struct dma_buf *dmabuf)
{
struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv;
struct hl_ctx *ctx;
if (!hl_dmabuf)
return;
ctx = hl_dmabuf->ctx;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
if (hl_dmabuf->memhash_hnode) {
mutex_lock(&ctx->mem_hash_lock);
hl_dmabuf->memhash_hnode->export_cnt--;
mutex_unlock(&ctx->mem_hash_lock);
}
hl_ctx_put(ctx);
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
kfree(hl_dmabuf);
}
static const struct dma_buf_ops habanalabs_dmabuf_ops = {
.attach = hl_dmabuf_attach,
.map_dma_buf = hl_map_dmabuf,
.unmap_dma_buf = hl_unmap_dmabuf,
.release = hl_release_dmabuf,
};
static int export_dmabuf(struct hl_ctx *ctx,
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
struct hl_dmabuf_priv *hl_dmabuf,
u64 total_size, int flags, int *dmabuf_fd)
{
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
struct hl_device *hdev = ctx->hdev;
int rc, fd;
exp_info.ops = &habanalabs_dmabuf_ops;
exp_info.size = total_size;
exp_info.flags = flags;
exp_info.priv = hl_dmabuf;
hl_dmabuf->dmabuf = dma_buf_export(&exp_info);
if (IS_ERR(hl_dmabuf->dmabuf)) {
dev_err(hdev->dev, "failed to export dma-buf\n");
return PTR_ERR(hl_dmabuf->dmabuf);
}
fd = dma_buf_fd(hl_dmabuf->dmabuf, flags);
if (fd < 0) {
dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd);
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
rc = fd;
goto err_dma_buf_put;
}
hl_dmabuf->ctx = ctx;
hl_ctx_get(hl_dmabuf->ctx);
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
*dmabuf_fd = fd;
return 0;
err_dma_buf_put:
hl_dmabuf->dmabuf->priv = NULL;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
dma_buf_put(hl_dmabuf->dmabuf);
return rc;
}
static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size)
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
{
if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
dev_dbg(hdev->dev,
"exported device memory address 0x%llx should be aligned to 0x%lx\n",
device_addr, PAGE_SIZE);
return -EINVAL;
}
if (size < PAGE_SIZE) {
dev_dbg(hdev->dev,
"exported device memory size %llu should be equal to or greater than %lu\n",
size, PAGE_SIZE);
return -EINVAL;
}
return 0;
}
static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr, u64 size)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 bar_address;
int rc;
rc = validate_export_params_common(hdev, device_addr, size);
if (rc)
return rc;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
if (device_addr < prop->dram_user_base_address ||
(device_addr + size) > prop->dram_end_address ||
(device_addr + size) < device_addr) {
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
dev_dbg(hdev->dev,
"DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
device_addr, size);
return -EINVAL;
}
bar_address = hdev->dram_pci_bar_start + (device_addr - prop->dram_base_address);
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
if ((bar_address + size) > (hdev->dram_pci_bar_start + prop->dram_pci_bar_size) ||
(bar_address + size) < bar_address) {
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
dev_dbg(hdev->dev,
"DRAM memory range 0x%llx (+0x%llx) is outside of PCI BAR boundaries\n",
device_addr, size);
return -EINVAL;
}
return 0;
}
static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 size, u64 offset,
struct hl_vm_phys_pg_pack *phys_pg_pack)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 bar_address;
int i, rc;
rc = validate_export_params_common(hdev, device_addr, size);
if (rc)
return rc;
if ((offset + size) > phys_pg_pack->total_size) {
dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n",
offset, size, phys_pg_pack->total_size);
return -EINVAL;
}
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
bar_address = hdev->dram_pci_bar_start +
(phys_pg_pack->pages[i] - prop->dram_base_address);
if ((bar_address + phys_pg_pack->page_size) >
(hdev->dram_pci_bar_start + prop->dram_pci_bar_size) ||
(bar_address + phys_pg_pack->page_size) < bar_address) {
dev_dbg(hdev->dev,
"DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
phys_pg_pack->pages[i],
phys_pg_pack->page_size);
return -EINVAL;
}
}
return 0;
}
static struct hl_vm_hash_node *memhash_node_export_get(struct hl_ctx *ctx, u64 addr)
{
struct hl_device *hdev = ctx->hdev;
struct hl_vm_hash_node *hnode;
/* get the memory handle */
mutex_lock(&ctx->mem_hash_lock);
hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)addr)
if (addr == hnode->vaddr)
break;
if (!hnode) {
mutex_unlock(&ctx->mem_hash_lock);
dev_dbg(hdev->dev, "map address %#llx not found\n", addr);
return ERR_PTR(-EINVAL);
}
if (upper_32_bits(hnode->handle)) {
mutex_unlock(&ctx->mem_hash_lock);
dev_dbg(hdev->dev, "invalid handle %#llx for map address %#llx\n",
hnode->handle, addr);
return ERR_PTR(-EINVAL);
}
/*
* node found, increase export count so this memory cannot be unmapped
* and the hash node cannot be deleted.
*/
hnode->export_cnt++;
mutex_unlock(&ctx->mem_hash_lock);
return hnode;
}
static void memhash_node_export_put(struct hl_ctx *ctx, struct hl_vm_hash_node *hnode)
{
mutex_lock(&ctx->mem_hash_lock);
hnode->export_cnt--;
mutex_unlock(&ctx->mem_hash_lock);
}
static struct hl_vm_phys_pg_pack *get_phys_pg_pack_from_hash_node(struct hl_device *hdev,
struct hl_vm_hash_node *hnode)
{
struct hl_vm_phys_pg_pack *phys_pg_pack;
struct hl_vm *vm = &hdev->vm;
spin_lock(&vm->idr_lock);
phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, (u32) hnode->handle);
if (!phys_pg_pack) {
spin_unlock(&vm->idr_lock);
dev_dbg(hdev->dev, "no match for handle 0x%x\n", (u32) hnode->handle);
return ERR_PTR(-EINVAL);
}
spin_unlock(&vm->idr_lock);
if (phys_pg_pack->vm_type != VM_TYPE_PHYS_PACK) {
dev_dbg(hdev->dev, "handle 0x%llx does not represent DRAM memory\n", hnode->handle);
return ERR_PTR(-EINVAL);
}
return phys_pg_pack;
}
/**
* export_dmabuf_from_addr() - export a dma-buf object for the given memory
* address and size.
* @ctx: pointer to the context structure.
* @addr: device address.
* @size: size of device memory to export.
* @offset: the offset into the buffer from which to start exporting
* @flags: DMA-BUF file/FD flags.
* @dmabuf_fd: pointer to result FD that represents the dma-buf object.
*
* Create and export a dma-buf object for an existing memory allocation inside
* the device memory, and return a FD which is associated with the dma-buf
* object.
*
* Return: 0 on success, non-zero for failure.
*/
static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 offset,
int flags, int *dmabuf_fd)
{
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
struct hl_vm_hash_node *hnode = NULL;
struct asic_fixed_properties *prop;
struct hl_dmabuf_priv *hl_dmabuf;
struct hl_device *hdev;
u64 export_addr;
int rc;
hdev = ctx->hdev;
prop = &hdev->asic_prop;
/* offset must be 0 in devices without virtual memory support */
if (!prop->dram_supports_virtual_memory && offset) {
dev_dbg(hdev->dev, "offset is not allowed in device without virtual memory\n");
return -EINVAL;
}
export_addr = addr + offset;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
if (!hl_dmabuf)
return -ENOMEM;
if (prop->dram_supports_virtual_memory) {
hnode = memhash_node_export_get(ctx, addr);
if (IS_ERR(hnode)) {
rc = PTR_ERR(hnode);
goto err_free_dmabuf_wrapper;
}
phys_pg_pack = get_phys_pg_pack_from_hash_node(hdev, hnode);
if (IS_ERR(phys_pg_pack)) {
rc = PTR_ERR(phys_pg_pack);
goto dec_memhash_export_cnt;
}
rc = validate_export_params(hdev, export_addr, size, offset, phys_pg_pack);
if (rc)
goto dec_memhash_export_cnt;
phys_pg_pack->exported_size = size;
hl_dmabuf->phys_pg_pack = phys_pg_pack;
hl_dmabuf->memhash_hnode = hnode;
} else {
rc = validate_export_params_no_mmu(hdev, export_addr, size);
if (rc)
goto err_free_dmabuf_wrapper;
}
hl_dmabuf->device_address = export_addr;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd);
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
if (rc)
goto dec_memhash_export_cnt;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
return 0;
dec_memhash_export_cnt:
if (prop->dram_supports_virtual_memory)
memhash_node_export_put(ctx, hnode);
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
err_free_dmabuf_wrapper:
kfree(hl_dmabuf);
return rc;
}
static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
{
struct hl_device *hdev = hpriv->hdev;
u64 block_handle, device_addr = 0;
struct hl_ctx *ctx = hpriv->ctx;
u32 handle = 0, block_size;
int rc;
switch (args->in.op) {
case HL_MEM_OP_ALLOC:
if (args->in.alloc.mem_size == 0) {
dev_err(hdev->dev, "alloc size must be larger than 0\n");
rc = -EINVAL;
goto out;
}
/* Force contiguous as there are no real MMU
* translations to overcome physical memory gaps
*/
args->in.flags |= HL_MEM_CONTIGUOUS;
rc = alloc_device_memory(ctx, &args->in, &handle);
memset(args, 0, sizeof(*args));
args->out.handle = (__u64) handle;
break;
case HL_MEM_OP_FREE:
rc = free_device_memory(ctx, &args->in);
break;
case HL_MEM_OP_MAP:
if (args->in.flags & HL_MEM_USERPTR) {
dev_err(hdev->dev, "Failed to map host memory when MMU is disabled\n");
rc = -EPERM;
} else {
rc = get_paddr_from_handle(ctx, &args->in, &device_addr);
memset(args, 0, sizeof(*args));
args->out.device_virt_addr = device_addr;
}
break;
case HL_MEM_OP_UNMAP:
rc = 0;
break;
case HL_MEM_OP_MAP_BLOCK:
rc = map_block(hdev, args->in.map_block.block_addr, &block_handle, &block_size);
args->out.block_handle = block_handle;
args->out.block_size = block_size;
break;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
case HL_MEM_OP_EXPORT_DMABUF_FD:
dev_err(hdev->dev, "Failed to export dma-buf object when MMU is disabled\n");
rc = -EPERM;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
break;
case HL_MEM_OP_TS_ALLOC:
rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
break;
default:
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
rc = -EINVAL;
break;
}
out:
return rc;
}
static void ts_buff_release(struct hl_mmap_mem_buf *buf)
{
struct hl_ts_buff *ts_buff = buf->private;
vfree(ts_buff->kernel_buff_address);
vfree(ts_buff->user_buff_address);
kfree(ts_buff);
}
static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args)
{
struct hl_ts_buff *ts_buff = buf->private;
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE;
return remap_vmalloc_range(vma, ts_buff->user_buff_address, 0);
}
static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)
{
struct hl_ts_buff *ts_buff = NULL;
u32 num_elements;
size_t size;
void *p;
num_elements = *(u32 *)args;
ts_buff = kzalloc(sizeof(*ts_buff), gfp);
if (!ts_buff)
return -ENOMEM;
/* Allocate the user buffer */
size = num_elements * sizeof(u64);
p = vmalloc_user(size);
if (!p)
goto free_mem;
ts_buff->user_buff_address = p;
buf->mappable_size = size;
/* Allocate the internal kernel buffer */
size = num_elements * sizeof(struct hl_user_pending_interrupt);
p = vzalloc(size);
if (!p)
goto free_user_buff;
ts_buff->kernel_buff_address = p;
ts_buff->kernel_buff_size = size;
buf->private = ts_buff;
return 0;
free_user_buff:
vfree(ts_buff->user_buff_address);
free_mem:
kfree(ts_buff);
return -ENOMEM;
}
static struct hl_mmap_mem_buf_behavior hl_ts_behavior = {
.topic = "TS",
.mem_id = HL_MMAP_TYPE_TS_BUFF,
.mmap = hl_ts_mmap,
.alloc = hl_ts_alloc_buf,
.release = ts_buff_release,
};
/**
* allocate_timestamps_buffers() - allocate timestamps buffers
* This function will allocate ts buffer that will later on be mapped to the user
* in order to be able to read the timestamp.
* in additon it'll allocate an extra buffer for registration management.
* since we cannot fail during registration for out-of-memory situation, so
* we'll prepare a pool which will be used as user interrupt nodes and instead
* of dynamically allocating nodes while registration we'll pick the node from
* this pool. in addtion it'll add node to the mapping hash which will be used
* to map user ts buffer to the internal kernel ts buffer.
* @hpriv: pointer to the private data of the fd
* @args: ioctl input
* @handle: user timestamp buffer handle as an output
*/
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
{
struct hl_mem_mgr *mmg = &hpriv->mem_mgr;
struct hl_mmap_mem_buf *buf;
if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) {
dev_err(mmg->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
args->num_of_elements, TS_MAX_ELEMENTS_NUM);
return -EINVAL;
}
buf = hl_mmap_mem_buf_alloc(mmg, &hl_ts_behavior, GFP_KERNEL, &args->num_of_elements);
if (!buf)
return -ENOMEM;
*handle = buf->handle;
return 0;
}
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
{
enum hl_device_status status;
union hl_mem_args *args = data;
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
u64 block_handle, device_addr = 0;
u32 handle = 0, block_size;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
int rc, dmabuf_fd = -EBADF;
if (!hl_device_operational(hdev, &status)) {
dev_dbg_ratelimited(hdev->dev,
"Device is %s. Can't execute MEMORY IOCTL\n",
hdev->status[status]);
return -EBUSY;
}
if (!hdev->mmu_enable)
return mem_ioctl_no_mmu(hpriv, args);
switch (args->in.op) {
case HL_MEM_OP_ALLOC:
if (args->in.alloc.mem_size == 0) {
dev_err(hdev->dev,
"alloc size must be larger than 0\n");
rc = -EINVAL;
goto out;
}
/* If DRAM does not support virtual memory the driver won't
* handle the allocation/freeing of that memory. However, for
* system administration/monitoring purposes, the driver will
* keep track of the amount of DRAM memory that is allocated
* and freed by the user. Because this code totally relies on
* the user's input, the driver can't ensure the validity
* of this accounting.
*/
if (!hdev->asic_prop.dram_supports_virtual_memory) {
atomic64_add(args->in.alloc.mem_size,
&ctx->dram_phys_mem);
atomic64_add(args->in.alloc.mem_size,
&hdev->dram_used_mem);
dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
rc = 0;
memset(args, 0, sizeof(*args));
args->out.handle = 0;
goto out;
}
rc = alloc_device_memory(ctx, &args->in, &handle);
memset(args, 0, sizeof(*args));
args->out.handle = (__u64) handle;
break;
case HL_MEM_OP_FREE:
/* If DRAM does not support virtual memory the driver won't
* handle the allocation/freeing of that memory. However, for
* system administration/monitoring purposes, the driver will
* keep track of the amount of DRAM memory that is allocated
* and freed by the user. Because this code totally relies on
* the user's input, the driver can't ensure the validity
* of this accounting.
*/
if (!hdev->asic_prop.dram_supports_virtual_memory) {
atomic64_sub(args->in.alloc.mem_size,
&ctx->dram_phys_mem);
atomic64_sub(args->in.alloc.mem_size,
&hdev->dram_used_mem);
dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
rc = 0;
goto out;
}
rc = free_device_memory(ctx, &args->in);
break;
case HL_MEM_OP_MAP:
rc = map_device_va(ctx, &args->in, &device_addr);
memset(args, 0, sizeof(*args));
args->out.device_virt_addr = device_addr;
break;
case HL_MEM_OP_UNMAP:
rc = unmap_device_va(ctx, &args->in, false);
break;
case HL_MEM_OP_MAP_BLOCK:
rc = map_block(hdev, args->in.map_block.block_addr,
&block_handle, &block_size);
args->out.block_handle = block_handle;
args->out.block_size = block_size;
break;
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
case HL_MEM_OP_EXPORT_DMABUF_FD:
rc = export_dmabuf_from_addr(ctx,
args->in.export_dmabuf_fd.addr,
args->in.export_dmabuf_fd.mem_size,
args->in.export_dmabuf_fd.offset,
args->in.flags,
&dmabuf_fd);
habanalabs: add support for dma-buf exporter Implement the calls to the dma-buf kernel api to create a dma-buf object backed by FD. We block the option to mmap the DMA-BUF object because we don't support DIRECT_IO and implicit P2P. We only implement support for explicit P2P through importing the FD of the DMA-BUF. In the export phase, we provide to the DMA-BUF object an array of pages that represent the device's memory area. During the map callback, we convert the array of pages into an SGT. We split/merge the pages according to the dma max segment size of the importer. To get the DMA address of the PCI bar, we use the dma_map_resources() kernel API, because our device memory is not backed by page struct and this API doesn't need page struct to map the physical address to a DMA address. We set the orig_nents member of the SGT to be 0, to indicate to other drivers that we don't support CPU mappings. Note that in Habanalabs's ASICs, the device memory is pinned and immutable. Therefore, there is no need for dynamic mappings and pinning callbacks. Also note that in GAUDI we don't have an MMU towards the device memory and the user works on physical addresses. Therefore, the user doesn't pass through the kernel driver to allocate memory there. As a result, only for GAUDI we receive from the user a device memory physical address (instead of a handle) and a size. We check the p2p distance using pci_p2pdma_distance_many() and refusing to map dmabuf in case the distance doesn't allow p2p. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Gal Pressman <galpress@amazon.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
2021-06-15 15:55:43 +08:00
memset(args, 0, sizeof(*args));
args->out.fd = dmabuf_fd;
break;
case HL_MEM_OP_TS_ALLOC:
rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
break;
default:
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
rc = -EINVAL;
break;
}
out:
return rc;
}
static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
u32 npages, u64 start, u32 offset,
struct hl_userptr *userptr)
{
int rc;
if (!access_ok((void __user *) (uintptr_t) addr, size)) {
dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
return -EFAULT;
}
userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
misc/habana: Stop using frame_vector helpers All we need are a pages array, pin_user_pages_fast can give us that directly. Plus this avoids the entire raw pfn side of get_vaddr_frames. Note that pin_user_pages_fast is a safe replacement despite the seeming lack of checking for vma->vm_flasg & (VM_IO | VM_PFNMAP). Such ptes are marked with pte_mkspecial (which pup_fast rejects in the fastpath), and only architectures supporting that support the pin_user_pages_fast fastpath. Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Dan Williams <dan.j.williams@intel.com> Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Oded Gabbay <oded.gabbay@gmail.com> Cc: Omer Shpigelman <oshpigelman@habana.ai> Cc: Ofir Bitton <obitton@habana.ai> Cc: Tomer Tayar <ttayar@habana.ai> Cc: Moti Haimovski <mhaimovski@habana.ai> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Pawel Piskorski <ppiskorski@habana.ai> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-4-daniel.vetter@ffwll.ch
2020-11-28 00:41:17 +08:00
if (!userptr->pages)
return -ENOMEM;
rc = pin_user_pages_fast(start, npages, FOLL_WRITE | FOLL_LONGTERM,
misc/habana: Stop using frame_vector helpers All we need are a pages array, pin_user_pages_fast can give us that directly. Plus this avoids the entire raw pfn side of get_vaddr_frames. Note that pin_user_pages_fast is a safe replacement despite the seeming lack of checking for vma->vm_flasg & (VM_IO | VM_PFNMAP). Such ptes are marked with pte_mkspecial (which pup_fast rejects in the fastpath), and only architectures supporting that support the pin_user_pages_fast fastpath. Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Dan Williams <dan.j.williams@intel.com> Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Oded Gabbay <oded.gabbay@gmail.com> Cc: Omer Shpigelman <oshpigelman@habana.ai> Cc: Ofir Bitton <obitton@habana.ai> Cc: Tomer Tayar <ttayar@habana.ai> Cc: Moti Haimovski <mhaimovski@habana.ai> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Pawel Piskorski <ppiskorski@habana.ai> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-4-daniel.vetter@ffwll.ch
2020-11-28 00:41:17 +08:00
userptr->pages);
if (rc != npages) {
dev_err(hdev->dev,
"Failed (%d) to pin host memory with user ptr 0x%llx, size 0x%llx, npages %d\n",
rc, addr, size, npages);
if (rc < 0)
misc/habana: Stop using frame_vector helpers All we need are a pages array, pin_user_pages_fast can give us that directly. Plus this avoids the entire raw pfn side of get_vaddr_frames. Note that pin_user_pages_fast is a safe replacement despite the seeming lack of checking for vma->vm_flasg & (VM_IO | VM_PFNMAP). Such ptes are marked with pte_mkspecial (which pup_fast rejects in the fastpath), and only architectures supporting that support the pin_user_pages_fast fastpath. Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Dan Williams <dan.j.williams@intel.com> Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Oded Gabbay <oded.gabbay@gmail.com> Cc: Omer Shpigelman <oshpigelman@habana.ai> Cc: Ofir Bitton <obitton@habana.ai> Cc: Tomer Tayar <ttayar@habana.ai> Cc: Moti Haimovski <mhaimovski@habana.ai> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Pawel Piskorski <ppiskorski@habana.ai> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-4-daniel.vetter@ffwll.ch
2020-11-28 00:41:17 +08:00
goto destroy_pages;
npages = rc;
rc = -EFAULT;
misc/habana: Stop using frame_vector helpers All we need are a pages array, pin_user_pages_fast can give us that directly. Plus this avoids the entire raw pfn side of get_vaddr_frames. Note that pin_user_pages_fast is a safe replacement despite the seeming lack of checking for vma->vm_flasg & (VM_IO | VM_PFNMAP). Such ptes are marked with pte_mkspecial (which pup_fast rejects in the fastpath), and only architectures supporting that support the pin_user_pages_fast fastpath. Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Dan Williams <dan.j.williams@intel.com> Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Oded Gabbay <oded.gabbay@gmail.com> Cc: Omer Shpigelman <oshpigelman@habana.ai> Cc: Ofir Bitton <obitton@habana.ai> Cc: Tomer Tayar <ttayar@habana.ai> Cc: Moti Haimovski <mhaimovski@habana.ai> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Pawel Piskorski <ppiskorski@habana.ai> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-4-daniel.vetter@ffwll.ch
2020-11-28 00:41:17 +08:00
goto put_pages;
}
misc/habana: Stop using frame_vector helpers All we need are a pages array, pin_user_pages_fast can give us that directly. Plus this avoids the entire raw pfn side of get_vaddr_frames. Note that pin_user_pages_fast is a safe replacement despite the seeming lack of checking for vma->vm_flasg & (VM_IO | VM_PFNMAP). Such ptes are marked with pte_mkspecial (which pup_fast rejects in the fastpath), and only architectures supporting that support the pin_user_pages_fast fastpath. Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Dan Williams <dan.j.williams@intel.com> Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Oded Gabbay <oded.gabbay@gmail.com> Cc: Omer Shpigelman <oshpigelman@habana.ai> Cc: Ofir Bitton <obitton@habana.ai> Cc: Tomer Tayar <ttayar@habana.ai> Cc: Moti Haimovski <mhaimovski@habana.ai> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Pawel Piskorski <ppiskorski@habana.ai> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-4-daniel.vetter@ffwll.ch
2020-11-28 00:41:17 +08:00
userptr->npages = npages;
rc = sg_alloc_table_from_pages(userptr->sgt,
misc/habana: Stop using frame_vector helpers All we need are a pages array, pin_user_pages_fast can give us that directly. Plus this avoids the entire raw pfn side of get_vaddr_frames. Note that pin_user_pages_fast is a safe replacement despite the seeming lack of checking for vma->vm_flasg & (VM_IO | VM_PFNMAP). Such ptes are marked with pte_mkspecial (which pup_fast rejects in the fastpath), and only architectures supporting that support the pin_user_pages_fast fastpath. Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Dan Williams <dan.j.williams@intel.com> Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Oded Gabbay <oded.gabbay@gmail.com> Cc: Omer Shpigelman <oshpigelman@habana.ai> Cc: Ofir Bitton <obitton@habana.ai> Cc: Tomer Tayar <ttayar@habana.ai> Cc: Moti Haimovski <mhaimovski@habana.ai> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Pawel Piskorski <ppiskorski@habana.ai> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-4-daniel.vetter@ffwll.ch
2020-11-28 00:41:17 +08:00
userptr->pages,
npages, offset, size, GFP_KERNEL);
if (rc < 0) {
dev_err(hdev->dev, "failed to create SG table from pages\n");
misc/habana: Stop using frame_vector helpers All we need are a pages array, pin_user_pages_fast can give us that directly. Plus this avoids the entire raw pfn side of get_vaddr_frames. Note that pin_user_pages_fast is a safe replacement despite the seeming lack of checking for vma->vm_flasg & (VM_IO | VM_PFNMAP). Such ptes are marked with pte_mkspecial (which pup_fast rejects in the fastpath), and only architectures supporting that support the pin_user_pages_fast fastpath. Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Dan Williams <dan.j.williams@intel.com> Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Oded Gabbay <oded.gabbay@gmail.com> Cc: Omer Shpigelman <oshpigelman@habana.ai> Cc: Ofir Bitton <obitton@habana.ai> Cc: Tomer Tayar <ttayar@habana.ai> Cc: Moti Haimovski <mhaimovski@habana.ai> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Pawel Piskorski <ppiskorski@habana.ai> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-4-daniel.vetter@ffwll.ch
2020-11-28 00:41:17 +08:00
goto put_pages;
}
return 0;
misc/habana: Stop using frame_vector helpers All we need are a pages array, pin_user_pages_fast can give us that directly. Plus this avoids the entire raw pfn side of get_vaddr_frames. Note that pin_user_pages_fast is a safe replacement despite the seeming lack of checking for vma->vm_flasg & (VM_IO | VM_PFNMAP). Such ptes are marked with pte_mkspecial (which pup_fast rejects in the fastpath), and only architectures supporting that support the pin_user_pages_fast fastpath. Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Dan Williams <dan.j.williams@intel.com> Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Oded Gabbay <oded.gabbay@gmail.com> Cc: Omer Shpigelman <oshpigelman@habana.ai> Cc: Ofir Bitton <obitton@habana.ai> Cc: Tomer Tayar <ttayar@habana.ai> Cc: Moti Haimovski <mhaimovski@habana.ai> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Pawel Piskorski <ppiskorski@habana.ai> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-4-daniel.vetter@ffwll.ch
2020-11-28 00:41:17 +08:00
put_pages:
unpin_user_pages(userptr->pages, npages);
destroy_pages:
kvfree(userptr->pages);
return rc;
}
/**
* hl_pin_host_memory() - pins a chunk of host memory.
* @hdev: pointer to the habanalabs device structure.
* @addr: the host virtual address of the memory area.
* @size: the size of the memory area.
* @userptr: pointer to hl_userptr structure.
*
* This function does the following:
* - Pins the physical pages.
* - Create an SG list from those pages.
*/
int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
struct hl_userptr *userptr)
{
u64 start, end;
u32 npages, offset;
int rc;
if (!size) {
dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
return -EINVAL;
}
/*
* If the combination of the address and size requested for this memory
* region causes an integer overflow, return error.
*/
if (((addr + size) < addr) ||
PAGE_ALIGN(addr + size) < (addr + size)) {
dev_err(hdev->dev,
"user pointer 0x%llx + %llu causes integer overflow\n",
addr, size);
return -EINVAL;
}
userptr->pid = current->pid;
userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
if (!userptr->sgt)
return -ENOMEM;
start = addr & PAGE_MASK;
offset = addr & ~PAGE_MASK;
end = PAGE_ALIGN(addr + size);
npages = (end - start) >> PAGE_SHIFT;
userptr->size = size;
userptr->addr = addr;
userptr->dma_mapped = false;
INIT_LIST_HEAD(&userptr->job_node);
rc = get_user_memory(hdev, addr, size, npages, start, offset,
userptr);
if (rc) {
dev_err(hdev->dev,
"failed to get user memory for address 0x%llx\n",
addr);
goto free_sgt;
}
hl_debugfs_add_userptr(hdev, userptr);
return 0;
free_sgt:
kfree(userptr->sgt);
return rc;
}
/*
* hl_unpin_host_memory - unpins a chunk of host memory.
* @hdev: pointer to the habanalabs device structure
* @userptr: pointer to hl_userptr structure
*
* This function does the following:
* - Unpins the physical pages related to the host memory
* - Free the SG list
*/
void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
{
hl_debugfs_remove_userptr(hdev, userptr);
if (userptr->dma_mapped)
hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
misc/habana: Stop using frame_vector helpers All we need are a pages array, pin_user_pages_fast can give us that directly. Plus this avoids the entire raw pfn side of get_vaddr_frames. Note that pin_user_pages_fast is a safe replacement despite the seeming lack of checking for vma->vm_flasg & (VM_IO | VM_PFNMAP). Such ptes are marked with pte_mkspecial (which pup_fast rejects in the fastpath), and only architectures supporting that support the pin_user_pages_fast fastpath. Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Dan Williams <dan.j.williams@intel.com> Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: Oded Gabbay <oded.gabbay@gmail.com> Cc: Omer Shpigelman <oshpigelman@habana.ai> Cc: Ofir Bitton <obitton@habana.ai> Cc: Tomer Tayar <ttayar@habana.ai> Cc: Moti Haimovski <mhaimovski@habana.ai> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Pawel Piskorski <ppiskorski@habana.ai> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20201127164131.2244124-4-daniel.vetter@ffwll.ch
2020-11-28 00:41:17 +08:00
unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
kvfree(userptr->pages);
list_del(&userptr->job_node);
sg_free_table(userptr->sgt);
kfree(userptr->sgt);
}
/**
* hl_userptr_delete_list() - clear userptr list.
* @hdev: pointer to the habanalabs device structure.
* @userptr_list: pointer to the list to clear.
*
* This function does the following:
* - Iterates over the list and unpins the host memory and frees the userptr
* structure.
*/
void hl_userptr_delete_list(struct hl_device *hdev,
struct list_head *userptr_list)
{
struct hl_userptr *userptr, *tmp;
list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
hl_unpin_host_memory(hdev, userptr);
kfree(userptr);
}
INIT_LIST_HEAD(userptr_list);
}
/**
* hl_userptr_is_pinned() - returns whether the given userptr is pinned.
* @hdev: pointer to the habanalabs device structure.
* @addr: user address to check.
* @size: user block size to check.
* @userptr_list: pointer to the list to clear.
* @userptr: pointer to userptr to check.
*
* This function does the following:
* - Iterates over the list and checks if the given userptr is in it, means is
* pinned. If so, returns true, otherwise returns false.
*/
bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
u32 size, struct list_head *userptr_list,
struct hl_userptr **userptr)
{
list_for_each_entry((*userptr), userptr_list, job_node) {
if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
return true;
}
return false;
}
/**
* va_range_init() - initialize virtual addresses range.
* @hdev: pointer to the habanalabs device structure.
* @va_ranges: pointer to va_ranges array.
* @range_type: virtual address range type.
* @start: range start address, inclusive.
* @end: range end address, inclusive.
* @page_size: page size for this va_range.
*
* This function does the following:
* - Initializes the virtual addresses list of the given range with the given
* addresses.
*/
static int va_range_init(struct hl_device *hdev, struct hl_va_range **va_ranges,
enum hl_va_range_type range_type, u64 start,
u64 end, u32 page_size)
{
struct hl_va_range *va_range = va_ranges[range_type];
int rc;
INIT_LIST_HEAD(&va_range->list);
/*
* PAGE_SIZE alignment
* it is the caller's responsibility to align the addresses if the
* page size is not a power of 2
*/
if (is_power_of_2(page_size)) {
start = round_up(start, page_size);
/*
* The end of the range is inclusive, hence we need to align it
* to the end of the last full page in the range. For example if
* end = 0x3ff5 with page size 0x1000, we need to align it to
* 0x2fff. The remaining 0xff5 bytes do not form a full page.
*/
end = round_down(end + 1, page_size) - 1;
}
if (start >= end) {
dev_err(hdev->dev, "too small vm range for va list\n");
return -EFAULT;
}
rc = add_va_block(hdev, va_range, start, end);
if (rc) {
dev_err(hdev->dev, "Failed to init host va list\n");
return rc;
}
va_range->start_addr = start;
va_range->end_addr = end;
va_range->page_size = page_size;
return 0;
}
/**
* va_range_fini() - clear a virtual addresses range.
* @hdev: pointer to the habanalabs structure.
* @va_range: pointer to virtual addresses range.
*
* This function does the following:
* - Frees the virtual addresses block list and its lock.
*/
static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
{
mutex_lock(&va_range->lock);
clear_va_list_locked(hdev, &va_range->list);
mutex_unlock(&va_range->lock);
mutex_destroy(&va_range->lock);
kfree(va_range);
}
/**
* vm_ctx_init_with_ranges() - initialize virtual memory for context.
* @ctx: pointer to the habanalabs context structure.
* @host_range_start: host virtual addresses range start.
* @host_range_end: host virtual addresses range end.
* @host_page_size: host page size.
* @host_huge_range_start: host virtual addresses range start for memory
* allocated with huge pages.
* @host_huge_range_end: host virtual addresses range end for memory allocated
* with huge pages.
* @host_huge_page_size: host huge page size.
* @dram_range_start: dram virtual addresses range start.
* @dram_range_end: dram virtual addresses range end.
* @dram_page_size: dram page size.
*
* This function initializes the following:
* - MMU for context.
* - Virtual address to area descriptor hashtable.
* - Virtual block list of available virtual memory.
*/
static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
u64 host_range_start,
u64 host_range_end,
u32 host_page_size,
u64 host_huge_range_start,
u64 host_huge_range_end,
u32 host_huge_page_size,
u64 dram_range_start,
u64 dram_range_end,
u32 dram_page_size)
{
struct hl_device *hdev = ctx->hdev;
int i, rc;
for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) {
ctx->va_range[i] =
kzalloc(sizeof(struct hl_va_range), GFP_KERNEL);
if (!ctx->va_range[i]) {
rc = -ENOMEM;
goto free_va_range;
}
}
rc = hl_mmu_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
goto free_va_range;
}
mutex_init(&ctx->mem_hash_lock);
hash_init(ctx->mem_hash);
mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
rc = va_range_init(hdev, ctx->va_range, HL_VA_RANGE_TYPE_HOST,
host_range_start, host_range_end, host_page_size);
if (rc) {
dev_err(hdev->dev, "failed to init host vm range\n");
goto mmu_ctx_fini;
}
if (hdev->pmmu_huge_range) {
mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
rc = va_range_init(hdev,
ctx->va_range, HL_VA_RANGE_TYPE_HOST_HUGE,
host_huge_range_start, host_huge_range_end,
host_huge_page_size);
if (rc) {
dev_err(hdev->dev,
"failed to init host huge vm range\n");
goto clear_host_va_range;
}
} else {
kfree(ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]);
ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE] =
ctx->va_range[HL_VA_RANGE_TYPE_HOST];
}
mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock);
rc = va_range_init(hdev, ctx->va_range, HL_VA_RANGE_TYPE_DRAM,
dram_range_start, dram_range_end, dram_page_size);
if (rc) {
dev_err(hdev->dev, "failed to init dram vm range\n");
goto clear_host_huge_va_range;
}
hl_debugfs_add_ctx_mem_hash(hdev, ctx);
return 0;
clear_host_huge_va_range:
mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock);
if (hdev->pmmu_huge_range) {
mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
clear_va_list_locked(hdev,
&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->list);
mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
}
clear_host_va_range:
if (hdev->pmmu_huge_range)
mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
clear_va_list_locked(hdev, &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->list);
mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
mmu_ctx_fini:
mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
mutex_destroy(&ctx->mem_hash_lock);
hl_mmu_ctx_fini(ctx);
free_va_range:
for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++)
kfree(ctx->va_range[i]);
return rc;
}
int hl_vm_ctx_init(struct hl_ctx *ctx)
{
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
u64 host_range_start, host_range_end, host_huge_range_start,
host_huge_range_end, dram_range_start, dram_range_end;
u32 host_page_size, host_huge_page_size, dram_page_size;
atomic64_set(&ctx->dram_phys_mem, 0);
/*
* - If MMU is enabled, init the ranges as usual.
* - If MMU is disabled, in case of host mapping, the returned address
* is the given one.
* In case of DRAM mapping, the returned address is the physical
* address of the memory related to the given handle.
*/
if (!ctx->hdev->mmu_enable)
return 0;
dram_range_start = prop->dmmu.start_addr;
dram_range_end = prop->dmmu.end_addr - 1;
dram_page_size = prop->dram_page_size ?
prop->dram_page_size : prop->dmmu.page_size;
host_range_start = prop->pmmu.start_addr;
host_range_end = prop->pmmu.end_addr - 1;
host_page_size = prop->pmmu.page_size;
host_huge_range_start = prop->pmmu_huge.start_addr;
host_huge_range_end = prop->pmmu_huge.end_addr - 1;
host_huge_page_size = prop->pmmu_huge.page_size;
return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
host_page_size, host_huge_range_start,
host_huge_range_end, host_huge_page_size,
dram_range_start, dram_range_end, dram_page_size);
}
/**
* hl_vm_ctx_fini() - virtual memory teardown of context.
* @ctx: pointer to the habanalabs context structure.
*
* This function perform teardown the following:
* - Virtual block list of available virtual memory.
* - Virtual address to area descriptor hashtable.
* - MMU for context.
*
* In addition this function does the following:
* - Unmaps the existing hashtable nodes if the hashtable is not empty. The
* hashtable should be empty as no valid mappings should exist at this
* point.
* - Frees any existing physical page list from the idr which relates to the
* current context asid.
* - This function checks the virtual block list for correctness. At this point
* the list should contain one element which describes the whole virtual
* memory range of the context. Otherwise, a warning is printed.
*/
void hl_vm_ctx_fini(struct hl_ctx *ctx)
{
struct hl_vm_phys_pg_pack *phys_pg_list, *tmp_phys_node;
struct hl_device *hdev = ctx->hdev;
struct hl_vm_hash_node *hnode;
struct hl_vm *vm = &hdev->vm;
struct hlist_node *tmp_node;
struct list_head free_list;
struct hl_mem_in args;
int i;
if (!hdev->mmu_enable)
return;
hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
/*
* Clearly something went wrong on hard reset so no point in printing
* another side effect error
*/
if (!hdev->reset_info.hard_reset_pending && !hash_empty(ctx->mem_hash))
dev_dbg(hdev->dev,
"user released device without removing its memory mappings\n");
hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
dev_dbg(hdev->dev,
"hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
hnode->vaddr, ctx->asid);
args.unmap.device_virt_addr = hnode->vaddr;
unmap_device_va(ctx, &args, true);
}
mutex_lock(&hdev->mmu_lock);
/* invalidate the cache once after the unmapping loop */
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK);
mutex_unlock(&hdev->mmu_lock);
INIT_LIST_HEAD(&free_list);
spin_lock(&vm->idr_lock);
idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
if (phys_pg_list->asid == ctx->asid) {
dev_dbg(hdev->dev,
"page list 0x%px of asid %d is still alive\n",
phys_pg_list, ctx->asid);
atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem);
idr_remove(&vm->phys_pg_pack_handles, i);
list_add(&phys_pg_list->node, &free_list);
}
spin_unlock(&vm->idr_lock);
list_for_each_entry_safe(phys_pg_list, tmp_phys_node, &free_list, node)
free_phys_pg_pack(hdev, phys_pg_list);
va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]);
va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]);
if (hdev->pmmu_huge_range)
va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]);
mutex_destroy(&ctx->mem_hash_lock);
hl_mmu_ctx_fini(ctx);
/* In this case we need to clear the global accounting of DRAM usage
* because the user notifies us on allocations. If the user is no more,
* all DRAM is available
*/
if (ctx->asid != HL_KERNEL_ASID_ID &&
!hdev->asic_prop.dram_supports_virtual_memory)
atomic64_set(&hdev->dram_used_mem, 0);
}
/**
* hl_vm_init() - initialize virtual memory module.
* @hdev: pointer to the habanalabs device structure.
*
* This function initializes the following:
* - MMU module.
* - DRAM physical pages pool of 2MB.
* - Idr for device memory allocation handles.
*/
int hl_vm_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_vm *vm = &hdev->vm;
int rc;
if (is_power_of_2(prop->dram_page_size))
vm->dram_pg_pool =
gen_pool_create(__ffs(prop->dram_page_size), -1);
else
vm->dram_pg_pool =
gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1);
if (!vm->dram_pg_pool) {
dev_err(hdev->dev, "Failed to create dram page pool\n");
return -ENOMEM;
}
kref_init(&vm->dram_pg_pool_refcount);
rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
prop->dram_end_address - prop->dram_user_base_address,
-1);
if (rc) {
dev_err(hdev->dev,
"Failed to add memory to dram page pool %d\n", rc);
goto pool_add_err;
}
spin_lock_init(&vm->idr_lock);
idr_init(&vm->phys_pg_pack_handles);
atomic64_set(&hdev->dram_used_mem, 0);
vm->init_done = true;
return 0;
pool_add_err:
gen_pool_destroy(vm->dram_pg_pool);
return rc;
}
/**
* hl_vm_fini() - virtual memory module teardown.
* @hdev: pointer to the habanalabs device structure.
*
* This function perform teardown to the following:
* - Idr for device memory allocation handles.
* - DRAM physical pages pool of 2MB.
* - MMU module.
*/
void hl_vm_fini(struct hl_device *hdev)
{
struct hl_vm *vm = &hdev->vm;
if (!vm->init_done)
return;
/*
* At this point all the contexts should be freed and hence no DRAM
* memory should be in use. Hence the DRAM pool should be freed here.
*/
if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
__func__);
vm->init_done = false;
}
/**
* hl_hw_block_mem_init() - HW block memory initialization.
* @ctx: pointer to the habanalabs context structure.
*
* This function initializes the HW block virtual mapped addresses list and
* it's lock.
*/
void hl_hw_block_mem_init(struct hl_ctx *ctx)
{
mutex_init(&ctx->hw_block_list_lock);
INIT_LIST_HEAD(&ctx->hw_block_mem_list);
}
/**
* hl_hw_block_mem_fini() - HW block memory teardown.
* @ctx: pointer to the habanalabs context structure.
*
* This function clears the HW block virtual mapped addresses list and destroys
* it's lock.
*/
void hl_hw_block_mem_fini(struct hl_ctx *ctx)
{
struct hl_vm_hw_block_list_node *lnode, *tmp;
if (!list_empty(&ctx->hw_block_mem_list))
dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n");
list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) {
list_del(&lnode->node);
kfree(lnode);
}
mutex_destroy(&ctx->hw_block_list_lock);
}