Misc driver patches for 5.15-rc1, second round
Here is a second round of misc driver patches for 5.15-rc1. In here is only updates for the Habanalabs driver. This request is late because the previously-objected-to dma-buf patches are all removed and some fixes that you and others found are now included in here as well. All of these have been in linux-next for well over a week with no reports of problems, and they are all self-contained to only this one driver. Full details are in the shortlog. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -----BEGIN PGP SIGNATURE----- iG0EABECAC0WIQT0tgzFv3jCIUoxPcsxR9QN2y37KQUCYTtnnQ8cZ3JlZ0Brcm9h aC5jb20ACgkQMUfUDdst+yldxgCfeXoRaaCBSzpbejgf48CQRd/m/v0AmgMRVUxB kJ58eqBrDMdeExOkvkh8 =6d/9 -----END PGP SIGNATURE----- Merge tag 'char-misc-5.15-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc Pull habanalabs updates from Greg KH: "Here is another round of misc driver patches for 5.15-rc1. In here is only updates for the Habanalabs driver. This request is late because the previously-objected-to dma-buf patches are all removed and some fixes that you and others found are now included in here as well. All of these have been in linux-next for well over a week with no reports of problems, and they are all self-contained to only this one driver. Full details are in the shortlog" * tag 'char-misc-5.15-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (61 commits) habanalabs/gaudi: hwmon default card name habanalabs: add support for f/w reset habanalabs/gaudi: block ICACHE_BASE_ADDERESS_HIGH in TPC habanalabs: cannot sleep while holding spinlock habanalabs: never copy_from_user inside spinlock habanalabs: remove unnecessary device status check habanalabs: disable IRQ in user interrupts spinlock habanalabs: add "in device creation" status habanalabs/gaudi: invalidate PMMU mem cache on init habanalabs/gaudi: size should be printed in decimal habanalabs/gaudi: define DC POWER for secured PMC habanalabs/gaudi: unmask out of bounds SLM access interrupt habanalabs: add userptr_lookup node in debugfs habanalabs/gaudi: fetch TPC/MME ECC errors from F/W habanalabs: modify multi-CS to wait on stream masters habanalabs/gaudi: add monitored SOBs to state dump habanalabs/gaudi: restore user registers when context opens habanalabs/gaudi: increase boot fit timeout habanalabs: update to latest firmware headers habanalabs/gaudi: minimize number of register reads ...
This commit is contained in:
commit
5ffc06ebea
|
@ -215,6 +215,17 @@ Description: Sets the skip reset on timeout option for the device. Value of
|
|||
"0" means device will be reset in case some CS has timed out,
|
||||
otherwise it will not be reset.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/state_dump
|
||||
Date: Oct 2021
|
||||
KernelVersion: 5.15
|
||||
Contact: ynudelman@habana.ai
|
||||
Description: Gets the state dump occurring on a CS timeout or failure.
|
||||
State dump is used for debug and is created each time in case of
|
||||
a problem in a CS execution, before reset.
|
||||
Reading from the node returns the newest state dump available.
|
||||
Writing an integer X discards X state dumps, so that the
|
||||
next read would return X+1-st newest state dump.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
|
||||
Date: Mar 2020
|
||||
KernelVersion: 5.6
|
||||
|
@ -230,6 +241,14 @@ Description: Displays a list with information about the currently user
|
|||
pointers (user virtual addresses) that are pinned and mapped
|
||||
to DMA addresses
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
|
||||
Date: Aug 2021
|
||||
KernelVersion: 5.15
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Allows to search for specific user pointers (user virtual
|
||||
addresses) that are pinned and mapped to DMA addresses, and see
|
||||
their resolution to the specific dma address.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/vm
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
|
|
|
@ -10,4 +10,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
|
|||
common/asid.o common/habanalabs_ioctl.o \
|
||||
common/command_buffer.o common/hw_queue.o common/irq.o \
|
||||
common/sysfs.o common/hwmon.o common/memory.o \
|
||||
common/command_submission.o common/firmware_if.o
|
||||
common/command_submission.o common/firmware_if.o \
|
||||
common/state_dump.o
|
||||
|
|
|
@ -314,8 +314,6 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
|
|||
|
||||
spin_lock(&mgr->cb_lock);
|
||||
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
|
||||
if (rc < 0)
|
||||
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL);
|
||||
spin_unlock(&mgr->cb_lock);
|
||||
|
||||
if (rc < 0) {
|
||||
|
@ -552,7 +550,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
|
|||
|
||||
vma->vm_private_data = cb;
|
||||
|
||||
rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
|
||||
rc = hdev->asic_funcs->mmap(hdev, vma, cb->kernel_address,
|
||||
cb->bus_address, cb->size);
|
||||
if (rc) {
|
||||
spin_lock(&cb->lock);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -9,16 +9,70 @@
|
|||
|
||||
#include <linux/slab.h>
|
||||
|
||||
void hl_encaps_handle_do_release(struct kref *ref)
|
||||
{
|
||||
struct hl_cs_encaps_sig_handle *handle =
|
||||
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
|
||||
struct hl_ctx *ctx = handle->hdev->compute_ctx;
|
||||
struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
|
||||
|
||||
spin_lock(&mgr->lock);
|
||||
idr_remove(&mgr->handles, handle->id);
|
||||
spin_unlock(&mgr->lock);
|
||||
|
||||
kfree(handle);
|
||||
}
|
||||
|
||||
static void hl_encaps_handle_do_release_sob(struct kref *ref)
|
||||
{
|
||||
struct hl_cs_encaps_sig_handle *handle =
|
||||
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
|
||||
struct hl_ctx *ctx = handle->hdev->compute_ctx;
|
||||
struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
|
||||
|
||||
/* if we're here, then there was a signals reservation but cs with
|
||||
* encaps signals wasn't submitted, so need to put refcount
|
||||
* to hw_sob taken at the reservation.
|
||||
*/
|
||||
hw_sob_put(handle->hw_sob);
|
||||
|
||||
spin_lock(&mgr->lock);
|
||||
idr_remove(&mgr->handles, handle->id);
|
||||
spin_unlock(&mgr->lock);
|
||||
|
||||
kfree(handle);
|
||||
}
|
||||
|
||||
static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
|
||||
{
|
||||
spin_lock_init(&mgr->lock);
|
||||
idr_init(&mgr->handles);
|
||||
}
|
||||
|
||||
static void hl_encaps_sig_mgr_fini(struct hl_device *hdev,
|
||||
struct hl_encaps_signals_mgr *mgr)
|
||||
{
|
||||
struct hl_cs_encaps_sig_handle *handle;
|
||||
struct idr *idp;
|
||||
u32 id;
|
||||
|
||||
idp = &mgr->handles;
|
||||
|
||||
if (!idr_is_empty(idp)) {
|
||||
dev_warn(hdev->dev, "device released while some encaps signals handles are still allocated\n");
|
||||
idr_for_each_entry(idp, handle, id)
|
||||
kref_put(&handle->refcount,
|
||||
hl_encaps_handle_do_release_sob);
|
||||
}
|
||||
|
||||
idr_destroy(&mgr->handles);
|
||||
}
|
||||
|
||||
static void hl_ctx_fini(struct hl_ctx *ctx)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
int i;
|
||||
|
||||
/* Release all allocated pending cb's, those cb's were never
|
||||
* scheduled so it is safe to release them here
|
||||
*/
|
||||
hl_pending_cb_list_flush(ctx);
|
||||
|
||||
/* Release all allocated HW block mapped list entries and destroy
|
||||
* the mutex.
|
||||
*/
|
||||
|
@ -53,6 +107,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
|
|||
hl_cb_va_pool_fini(ctx);
|
||||
hl_vm_ctx_fini(ctx);
|
||||
hl_asid_free(hdev, ctx->asid);
|
||||
hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
|
||||
|
||||
/* Scrub both SRAM and DRAM */
|
||||
hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
|
||||
|
@ -130,9 +185,6 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
|
|||
{
|
||||
if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
|
||||
return;
|
||||
|
||||
dev_warn(hdev->dev,
|
||||
"user process released device but its command submissions are still executing\n");
|
||||
}
|
||||
|
||||
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
||||
|
@ -144,11 +196,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
|||
kref_init(&ctx->refcount);
|
||||
|
||||
ctx->cs_sequence = 1;
|
||||
INIT_LIST_HEAD(&ctx->pending_cb_list);
|
||||
spin_lock_init(&ctx->pending_cb_lock);
|
||||
spin_lock_init(&ctx->cs_lock);
|
||||
atomic_set(&ctx->thread_ctx_switch_token, 1);
|
||||
atomic_set(&ctx->thread_pending_cb_token, 1);
|
||||
ctx->thread_ctx_switch_wait_token = 0;
|
||||
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
|
||||
sizeof(struct hl_fence *),
|
||||
|
@ -200,6 +249,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
|||
goto err_cb_va_pool_fini;
|
||||
}
|
||||
|
||||
hl_encaps_sig_mgr_init(&ctx->sig_mgr);
|
||||
|
||||
dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
|
||||
}
|
||||
|
||||
|
@ -229,31 +280,86 @@ int hl_ctx_put(struct hl_ctx *ctx)
|
|||
return kref_put(&ctx->refcount, hl_ctx_do_release);
|
||||
}
|
||||
|
||||
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
|
||||
/*
|
||||
* hl_ctx_get_fence_locked - get CS fence under CS lock
|
||||
*
|
||||
* @ctx: pointer to the context structure.
|
||||
* @seq: CS sequences number
|
||||
*
|
||||
* @return valid fence pointer on success, NULL if fence is gone, otherwise
|
||||
* error pointer.
|
||||
*
|
||||
* NOTE: this function shall be called with cs_lock locked
|
||||
*/
|
||||
static struct hl_fence *hl_ctx_get_fence_locked(struct hl_ctx *ctx, u64 seq)
|
||||
{
|
||||
struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
|
||||
struct hl_fence *fence;
|
||||
|
||||
spin_lock(&ctx->cs_lock);
|
||||
|
||||
if (seq >= ctx->cs_sequence) {
|
||||
spin_unlock(&ctx->cs_lock);
|
||||
if (seq >= ctx->cs_sequence)
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
|
||||
spin_unlock(&ctx->cs_lock);
|
||||
if (seq + asic_prop->max_pending_cs < ctx->cs_sequence)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
|
||||
hl_fence_get(fence);
|
||||
return fence;
|
||||
}
|
||||
|
||||
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
|
||||
{
|
||||
struct hl_fence *fence;
|
||||
|
||||
spin_lock(&ctx->cs_lock);
|
||||
|
||||
fence = hl_ctx_get_fence_locked(ctx, seq);
|
||||
|
||||
spin_unlock(&ctx->cs_lock);
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_ctx_get_fences - get multiple CS fences under the same CS lock
|
||||
*
|
||||
* @ctx: pointer to the context structure.
|
||||
* @seq_arr: array of CS sequences to wait for
|
||||
* @fence: fence array to store the CS fences
|
||||
* @arr_len: length of seq_arr and fence_arr
|
||||
*
|
||||
* @return 0 on success, otherwise non 0 error code
|
||||
*/
|
||||
int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
|
||||
struct hl_fence **fence, u32 arr_len)
|
||||
{
|
||||
struct hl_fence **fence_arr_base = fence;
|
||||
int i, rc = 0;
|
||||
|
||||
spin_lock(&ctx->cs_lock);
|
||||
|
||||
for (i = 0; i < arr_len; i++, fence++) {
|
||||
u64 seq = seq_arr[i];
|
||||
|
||||
*fence = hl_ctx_get_fence_locked(ctx, seq);
|
||||
|
||||
if (IS_ERR(*fence)) {
|
||||
dev_err(ctx->hdev->dev,
|
||||
"Failed to get fence for CS with seq 0x%llx\n",
|
||||
seq);
|
||||
rc = PTR_ERR(*fence);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&ctx->cs_lock);
|
||||
|
||||
if (rc)
|
||||
hl_fences_put(fence_arr_base, i);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_ctx_mgr_init - initialize the context manager
|
||||
*
|
||||
|
|
|
@ -209,12 +209,12 @@ static int userptr_show(struct seq_file *s, void *data)
|
|||
if (first) {
|
||||
first = false;
|
||||
seq_puts(s, "\n");
|
||||
seq_puts(s, " user virtual address size dma dir\n");
|
||||
seq_puts(s, " pid user virtual address size dma dir\n");
|
||||
seq_puts(s, "----------------------------------------------------------\n");
|
||||
}
|
||||
seq_printf(s,
|
||||
" 0x%-14llx %-10u %-30s\n",
|
||||
userptr->addr, userptr->size, dma_dir[userptr->dir]);
|
||||
seq_printf(s, " %-7d 0x%-14llx %-10llu %-30s\n",
|
||||
userptr->pid, userptr->addr, userptr->size,
|
||||
dma_dir[userptr->dir]);
|
||||
}
|
||||
|
||||
spin_unlock(&dev_entry->userptr_spinlock);
|
||||
|
@ -235,7 +235,7 @@ static int vm_show(struct seq_file *s, void *data)
|
|||
struct hl_vm_hash_node *hnode;
|
||||
struct hl_userptr *userptr;
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
|
||||
enum vm_type_t *vm_type;
|
||||
enum vm_type *vm_type;
|
||||
bool once = true;
|
||||
u64 j;
|
||||
int i;
|
||||
|
@ -261,7 +261,7 @@ static int vm_show(struct seq_file *s, void *data)
|
|||
if (*vm_type == VM_TYPE_USERPTR) {
|
||||
userptr = hnode->ptr;
|
||||
seq_printf(s,
|
||||
" 0x%-14llx %-10u\n",
|
||||
" 0x%-14llx %-10llu\n",
|
||||
hnode->vaddr, userptr->size);
|
||||
} else {
|
||||
phys_pg_pack = hnode->ptr;
|
||||
|
@ -320,6 +320,77 @@ static int vm_show(struct seq_file *s, void *data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int userptr_lookup_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct hl_debugfs_entry *entry = s->private;
|
||||
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||
struct scatterlist *sg;
|
||||
struct hl_userptr *userptr;
|
||||
bool first = true;
|
||||
u64 total_npages, npages, sg_start, sg_end;
|
||||
dma_addr_t dma_addr;
|
||||
int i;
|
||||
|
||||
spin_lock(&dev_entry->userptr_spinlock);
|
||||
|
||||
list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
|
||||
if (dev_entry->userptr_lookup >= userptr->addr &&
|
||||
dev_entry->userptr_lookup < userptr->addr + userptr->size) {
|
||||
total_npages = 0;
|
||||
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents,
|
||||
i) {
|
||||
npages = hl_get_sg_info(sg, &dma_addr);
|
||||
sg_start = userptr->addr +
|
||||
total_npages * PAGE_SIZE;
|
||||
sg_end = userptr->addr +
|
||||
(total_npages + npages) * PAGE_SIZE;
|
||||
|
||||
if (dev_entry->userptr_lookup >= sg_start &&
|
||||
dev_entry->userptr_lookup < sg_end) {
|
||||
dma_addr += (dev_entry->userptr_lookup -
|
||||
sg_start);
|
||||
if (first) {
|
||||
first = false;
|
||||
seq_puts(s, "\n");
|
||||
seq_puts(s, " user virtual address dma address pid region start region size\n");
|
||||
seq_puts(s, "---------------------------------------------------------------------------------------\n");
|
||||
}
|
||||
seq_printf(s, " 0x%-18llx 0x%-16llx %-8u 0x%-16llx %-12llu\n",
|
||||
dev_entry->userptr_lookup,
|
||||
(u64)dma_addr, userptr->pid,
|
||||
userptr->addr, userptr->size);
|
||||
}
|
||||
total_npages += npages;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&dev_entry->userptr_spinlock);
|
||||
|
||||
if (!first)
|
||||
seq_puts(s, "\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t userptr_lookup_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *f_pos)
|
||||
{
|
||||
struct seq_file *s = file->private_data;
|
||||
struct hl_debugfs_entry *entry = s->private;
|
||||
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||
ssize_t rc;
|
||||
u64 value;
|
||||
|
||||
rc = kstrtoull_from_user(buf, count, 16, &value);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
dev_entry->userptr_lookup = value;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static int mmu_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct hl_debugfs_entry *entry = s->private;
|
||||
|
@ -349,7 +420,7 @@ static int mmu_show(struct seq_file *s, void *data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val;
|
||||
hl_mmu_va_to_pa(ctx, virt_addr, &phys_addr);
|
||||
|
||||
if (hops_info.scrambled_vaddr &&
|
||||
(dev_entry->mmu_addr != hops_info.scrambled_vaddr))
|
||||
|
@ -491,11 +562,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
|
|||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
struct hl_ctx *ctx = hdev->compute_ctx;
|
||||
struct hl_vm_hash_node *hnode;
|
||||
u64 end_address, range_size;
|
||||
struct hl_userptr *userptr;
|
||||
enum vm_type_t *vm_type;
|
||||
enum vm_type *vm_type;
|
||||
bool valid = false;
|
||||
u64 end_address;
|
||||
u32 range_size;
|
||||
int i, rc = 0;
|
||||
|
||||
if (!ctx) {
|
||||
|
@ -1043,6 +1113,60 @@ static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t hl_state_dump_read(struct file *f, char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||
ssize_t rc;
|
||||
|
||||
down_read(&entry->state_dump_sem);
|
||||
if (!entry->state_dump[entry->state_dump_head])
|
||||
rc = 0;
|
||||
else
|
||||
rc = simple_read_from_buffer(
|
||||
buf, count, ppos,
|
||||
entry->state_dump[entry->state_dump_head],
|
||||
strlen(entry->state_dump[entry->state_dump_head]));
|
||||
up_read(&entry->state_dump_sem);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||
struct hl_device *hdev = entry->hdev;
|
||||
ssize_t rc;
|
||||
u32 size;
|
||||
int i;
|
||||
|
||||
rc = kstrtouint_from_user(buf, count, 10, &size);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (size <= 0 || size >= ARRAY_SIZE(entry->state_dump)) {
|
||||
dev_err(hdev->dev, "Invalid number of dumps to skip\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (entry->state_dump[entry->state_dump_head]) {
|
||||
down_write(&entry->state_dump_sem);
|
||||
for (i = 0; i < size; ++i) {
|
||||
vfree(entry->state_dump[entry->state_dump_head]);
|
||||
entry->state_dump[entry->state_dump_head] = NULL;
|
||||
if (entry->state_dump_head > 0)
|
||||
entry->state_dump_head--;
|
||||
else
|
||||
entry->state_dump_head =
|
||||
ARRAY_SIZE(entry->state_dump) - 1;
|
||||
}
|
||||
up_write(&entry->state_dump_sem);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations hl_data32b_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = hl_data_read32,
|
||||
|
@ -1110,12 +1234,19 @@ static const struct file_operations hl_security_violations_fops = {
|
|||
.read = hl_security_violations_read
|
||||
};
|
||||
|
||||
static const struct file_operations hl_state_dump_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = hl_state_dump_read,
|
||||
.write = hl_state_dump_write
|
||||
};
|
||||
|
||||
static const struct hl_info_list hl_debugfs_list[] = {
|
||||
{"command_buffers", command_buffers_show, NULL},
|
||||
{"command_submission", command_submission_show, NULL},
|
||||
{"command_submission_jobs", command_submission_jobs_show, NULL},
|
||||
{"userptr", userptr_show, NULL},
|
||||
{"vm", vm_show, NULL},
|
||||
{"userptr_lookup", userptr_lookup_show, userptr_lookup_write},
|
||||
{"mmu", mmu_show, mmu_asid_va_write},
|
||||
{"engines", engines_show, NULL}
|
||||
};
|
||||
|
@ -1172,6 +1303,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
|
|||
INIT_LIST_HEAD(&dev_entry->userptr_list);
|
||||
INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
|
||||
mutex_init(&dev_entry->file_mutex);
|
||||
init_rwsem(&dev_entry->state_dump_sem);
|
||||
spin_lock_init(&dev_entry->cb_spinlock);
|
||||
spin_lock_init(&dev_entry->cs_spinlock);
|
||||
spin_lock_init(&dev_entry->cs_job_spinlock);
|
||||
|
@ -1283,6 +1415,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
|
|||
dev_entry->root,
|
||||
&hdev->skip_reset_on_timeout);
|
||||
|
||||
debugfs_create_file("state_dump",
|
||||
0600,
|
||||
dev_entry->root,
|
||||
dev_entry,
|
||||
&hl_state_dump_fops);
|
||||
|
||||
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
|
||||
debugfs_create_file(hl_debugfs_list[i].name,
|
||||
0444,
|
||||
|
@ -1297,6 +1435,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
|
|||
void hl_debugfs_remove_device(struct hl_device *hdev)
|
||||
{
|
||||
struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
|
||||
int i;
|
||||
|
||||
debugfs_remove_recursive(entry->root);
|
||||
|
||||
|
@ -1304,6 +1443,9 @@ void hl_debugfs_remove_device(struct hl_device *hdev)
|
|||
|
||||
vfree(entry->blob_desc.data);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i)
|
||||
vfree(entry->state_dump[i]);
|
||||
|
||||
kfree(entry->entry_arr);
|
||||
}
|
||||
|
||||
|
@ -1416,6 +1558,28 @@ void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
|
|||
spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_debugfs_set_state_dump - register state dump making it accessible via
|
||||
* debugfs
|
||||
* @hdev: pointer to the device structure
|
||||
* @data: the actual dump data
|
||||
* @length: the length of the data
|
||||
*/
|
||||
void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
|
||||
unsigned long length)
|
||||
{
|
||||
struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
|
||||
|
||||
down_write(&dev_entry->state_dump_sem);
|
||||
|
||||
dev_entry->state_dump_head = (dev_entry->state_dump_head + 1) %
|
||||
ARRAY_SIZE(dev_entry->state_dump);
|
||||
vfree(dev_entry->state_dump[dev_entry->state_dump_head]);
|
||||
dev_entry->state_dump[dev_entry->state_dump_head] = data;
|
||||
|
||||
up_write(&dev_entry->state_dump_sem);
|
||||
}
|
||||
|
||||
void __init hl_debugfs_init(void)
|
||||
{
|
||||
hl_debug_root = debugfs_create_dir("habanalabs", NULL);
|
||||
|
|
|
@ -7,11 +7,11 @@
|
|||
|
||||
#define pr_fmt(fmt) "habanalabs: " fmt
|
||||
|
||||
#include <uapi/misc/habanalabs.h>
|
||||
#include "habanalabs.h"
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/hwmon.h>
|
||||
#include <uapi/misc/habanalabs.h>
|
||||
|
||||
enum hl_device_status hl_device_status(struct hl_device *hdev)
|
||||
{
|
||||
|
@ -23,6 +23,8 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
|
|||
status = HL_DEVICE_STATUS_NEEDS_RESET;
|
||||
else if (hdev->disabled)
|
||||
status = HL_DEVICE_STATUS_MALFUNCTION;
|
||||
else if (!hdev->init_done)
|
||||
status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
|
||||
else
|
||||
status = HL_DEVICE_STATUS_OPERATIONAL;
|
||||
|
||||
|
@ -44,6 +46,7 @@ bool hl_device_operational(struct hl_device *hdev,
|
|||
case HL_DEVICE_STATUS_NEEDS_RESET:
|
||||
return false;
|
||||
case HL_DEVICE_STATUS_OPERATIONAL:
|
||||
case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
@ -129,8 +132,8 @@ static int hl_device_release(struct inode *inode, struct file *filp)
|
|||
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
|
||||
|
||||
if (!hl_hpriv_put(hpriv))
|
||||
dev_warn(hdev->dev,
|
||||
"Device is still in use because there are live CS and/or memory mappings\n");
|
||||
dev_notice(hdev->dev,
|
||||
"User process closed FD but device still in use\n");
|
||||
|
||||
hdev->last_open_session_duration_jif =
|
||||
jiffies - hdev->last_successful_open_jif;
|
||||
|
@ -308,9 +311,15 @@ static void device_hard_reset_pending(struct work_struct *work)
|
|||
container_of(work, struct hl_device_reset_work,
|
||||
reset_work.work);
|
||||
struct hl_device *hdev = device_reset_work->hdev;
|
||||
u32 flags;
|
||||
int rc;
|
||||
|
||||
rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD);
|
||||
flags = HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD;
|
||||
|
||||
if (device_reset_work->fw_reset)
|
||||
flags |= HL_RESET_FW;
|
||||
|
||||
rc = hl_device_reset(hdev, flags);
|
||||
if ((rc == -EBUSY) && !hdev->device_fini_pending) {
|
||||
dev_info(hdev->dev,
|
||||
"Could not reset device. will try again in %u seconds",
|
||||
|
@ -682,6 +691,44 @@ out:
|
|||
return rc;
|
||||
}
|
||||
|
||||
static void take_release_locks(struct hl_device *hdev)
|
||||
{
|
||||
/* Flush anyone that is inside the critical section of enqueue
|
||||
* jobs to the H/W
|
||||
*/
|
||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||
|
||||
/* Flush processes that are sending message to CPU */
|
||||
mutex_lock(&hdev->send_cpu_message_lock);
|
||||
mutex_unlock(&hdev->send_cpu_message_lock);
|
||||
|
||||
/* Flush anyone that is inside device open */
|
||||
mutex_lock(&hdev->fpriv_list_lock);
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
}
|
||||
|
||||
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
|
||||
{
|
||||
if (hard_reset)
|
||||
device_late_fini(hdev);
|
||||
|
||||
/*
|
||||
* Halt the engines and disable interrupts so we won't get any more
|
||||
* completions from H/W and we won't have any accesses from the
|
||||
* H/W to the host machine
|
||||
*/
|
||||
hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
|
||||
|
||||
/* Go over all the queues, release all CS and their jobs */
|
||||
hl_cs_rollback_all(hdev);
|
||||
|
||||
/* Release all pending user interrupts, each pending user interrupt
|
||||
* holds a reference to user context
|
||||
*/
|
||||
hl_release_pending_user_interrupts(hdev);
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_device_suspend - initiate device suspend
|
||||
*
|
||||
|
@ -707,16 +754,7 @@ int hl_device_suspend(struct hl_device *hdev)
|
|||
/* This blocks all other stuff that is not blocked by in_reset */
|
||||
hdev->disabled = true;
|
||||
|
||||
/*
|
||||
* Flush anyone that is inside the critical section of enqueue
|
||||
* jobs to the H/W
|
||||
*/
|
||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||
|
||||
/* Flush processes that are sending message to CPU */
|
||||
mutex_lock(&hdev->send_cpu_message_lock);
|
||||
mutex_unlock(&hdev->send_cpu_message_lock);
|
||||
take_release_locks(hdev);
|
||||
|
||||
rc = hdev->asic_funcs->suspend(hdev);
|
||||
if (rc)
|
||||
|
@ -819,6 +857,11 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
|
|||
usleep_range(1000, 10000);
|
||||
|
||||
put_task_struct(task);
|
||||
} else {
|
||||
dev_warn(hdev->dev,
|
||||
"Can't get task struct for PID so giving up on killing process\n");
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
return -ETIME;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -885,7 +928,7 @@ static void device_disable_open_processes(struct hl_device *hdev)
|
|||
int hl_device_reset(struct hl_device *hdev, u32 flags)
|
||||
{
|
||||
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
|
||||
bool hard_reset, from_hard_reset_thread, hard_instead_soft = false;
|
||||
bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false;
|
||||
int i, rc;
|
||||
|
||||
if (!hdev->init_done) {
|
||||
|
@ -894,8 +937,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
|
|||
return 0;
|
||||
}
|
||||
|
||||
hard_reset = (flags & HL_RESET_HARD) != 0;
|
||||
from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0;
|
||||
hard_reset = !!(flags & HL_RESET_HARD);
|
||||
from_hard_reset_thread = !!(flags & HL_RESET_FROM_RESET_THREAD);
|
||||
fw_reset = !!(flags & HL_RESET_FW);
|
||||
|
||||
if (!hard_reset && !hdev->supports_soft_reset) {
|
||||
hard_instead_soft = true;
|
||||
|
@ -947,11 +991,13 @@ do_reset:
|
|||
else
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
|
||||
|
||||
/*
|
||||
* if reset is due to heartbeat, device CPU is no responsive in
|
||||
* which case no point sending PCI disable message to it
|
||||
/* If reset is due to heartbeat, device CPU is no responsive in
|
||||
* which case no point sending PCI disable message to it.
|
||||
*
|
||||
* If F/W is performing the reset, no need to send it a message to disable
|
||||
* PCI access
|
||||
*/
|
||||
if (hard_reset && !(flags & HL_RESET_HEARTBEAT)) {
|
||||
if (hard_reset && !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
|
||||
/* Disable PCI access from device F/W so he won't send
|
||||
* us additional interrupts. We disable MSI/MSI-X at
|
||||
* the halt_engines function and we can't have the F/W
|
||||
|
@ -970,15 +1016,7 @@ do_reset:
|
|||
/* This also blocks future CS/VM/JOB completion operations */
|
||||
hdev->disabled = true;
|
||||
|
||||
/* Flush anyone that is inside the critical section of enqueue
|
||||
* jobs to the H/W
|
||||
*/
|
||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||
|
||||
/* Flush anyone that is inside device open */
|
||||
mutex_lock(&hdev->fpriv_list_lock);
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
take_release_locks(hdev);
|
||||
|
||||
dev_err(hdev->dev, "Going to RESET device!\n");
|
||||
}
|
||||
|
@ -989,6 +1027,8 @@ again:
|
|||
|
||||
hdev->process_kill_trial_cnt = 0;
|
||||
|
||||
hdev->device_reset_work.fw_reset = fw_reset;
|
||||
|
||||
/*
|
||||
* Because the reset function can't run from heartbeat work,
|
||||
* we need to call the reset function from a dedicated work.
|
||||
|
@ -999,31 +1039,7 @@ again:
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (hard_reset) {
|
||||
device_late_fini(hdev);
|
||||
|
||||
/*
|
||||
* Now that the heartbeat thread is closed, flush processes
|
||||
* which are sending messages to CPU
|
||||
*/
|
||||
mutex_lock(&hdev->send_cpu_message_lock);
|
||||
mutex_unlock(&hdev->send_cpu_message_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Halt the engines and disable interrupts so we won't get any more
|
||||
* completions from H/W and we won't have any accesses from the
|
||||
* H/W to the host machine
|
||||
*/
|
||||
hdev->asic_funcs->halt_engines(hdev, hard_reset);
|
||||
|
||||
/* Go over all the queues, release all CS and their jobs */
|
||||
hl_cs_rollback_all(hdev);
|
||||
|
||||
/* Release all pending user interrupts, each pending user interrupt
|
||||
* holds a reference to user context
|
||||
*/
|
||||
hl_release_pending_user_interrupts(hdev);
|
||||
cleanup_resources(hdev, hard_reset, fw_reset);
|
||||
|
||||
kill_processes:
|
||||
if (hard_reset) {
|
||||
|
@ -1057,12 +1073,15 @@ kill_processes:
|
|||
}
|
||||
|
||||
/* Reset the H/W. It will be in idle state after this returns */
|
||||
hdev->asic_funcs->hw_fini(hdev, hard_reset);
|
||||
hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
|
||||
|
||||
if (hard_reset) {
|
||||
hdev->fw_loader.linux_loaded = false;
|
||||
|
||||
/* Release kernel context */
|
||||
if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
|
||||
hdev->kernel_ctx = NULL;
|
||||
|
||||
hl_vm_fini(hdev);
|
||||
hl_mmu_fini(hdev);
|
||||
hl_eq_reset(hdev, &hdev->event_queue);
|
||||
|
@ -1292,6 +1311,10 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||
if (rc)
|
||||
goto user_interrupts_fini;
|
||||
|
||||
|
||||
/* initialize completion structure for multi CS wait */
|
||||
hl_multi_cs_completion_init(hdev);
|
||||
|
||||
/*
|
||||
* Initialize the H/W queues. Must be done before hw_init, because
|
||||
* there the addresses of the kernel queue are being written to the
|
||||
|
@ -1361,6 +1384,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||
|
||||
hdev->compute_ctx = NULL;
|
||||
|
||||
hdev->asic_funcs->state_dump_init(hdev);
|
||||
|
||||
hl_debugfs_add_device(hdev);
|
||||
|
||||
/* debugfs nodes are created in hl_ctx_init so it must be called after
|
||||
|
@ -1567,31 +1592,13 @@ void hl_device_fini(struct hl_device *hdev)
|
|||
/* Mark device as disabled */
|
||||
hdev->disabled = true;
|
||||
|
||||
/* Flush anyone that is inside the critical section of enqueue
|
||||
* jobs to the H/W
|
||||
*/
|
||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||
|
||||
/* Flush anyone that is inside device open */
|
||||
mutex_lock(&hdev->fpriv_list_lock);
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
take_release_locks(hdev);
|
||||
|
||||
hdev->hard_reset_pending = true;
|
||||
|
||||
hl_hwmon_fini(hdev);
|
||||
|
||||
device_late_fini(hdev);
|
||||
|
||||
/*
|
||||
* Halt the engines and disable interrupts so we won't get any more
|
||||
* completions from H/W and we won't have any accesses from the
|
||||
* H/W to the host machine
|
||||
*/
|
||||
hdev->asic_funcs->halt_engines(hdev, true);
|
||||
|
||||
/* Go over all the queues, release all CS and their jobs */
|
||||
hl_cs_rollback_all(hdev);
|
||||
cleanup_resources(hdev, true, false);
|
||||
|
||||
/* Kill processes here after CS rollback. This is because the process
|
||||
* can't really exit until all its CSs are done, which is what we
|
||||
|
@ -1610,7 +1617,9 @@ void hl_device_fini(struct hl_device *hdev)
|
|||
hl_cb_pool_fini(hdev);
|
||||
|
||||
/* Reset the H/W. It will be in idle state after this returns */
|
||||
hdev->asic_funcs->hw_fini(hdev, true);
|
||||
hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||
|
||||
hdev->fw_loader.linux_loaded = false;
|
||||
|
||||
/* Release kernel context */
|
||||
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2021 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
|
@ -240,11 +240,15 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
|||
/* set fence to a non valid value */
|
||||
pkt->fence = cpu_to_le32(UINT_MAX);
|
||||
|
||||
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* The CPU queue is a synchronous queue with an effective depth of
|
||||
* a single entry (although it is allocated with room for multiple
|
||||
* entries). We lock on it using 'send_cpu_message_lock' which
|
||||
* serializes accesses to the CPU queue.
|
||||
* Which means that we don't need to lock the access to the entire H/W
|
||||
* queues module when submitting a JOB to the CPU queue.
|
||||
*/
|
||||
hl_hw_queue_submit_bd(hdev, queue, 0, len, pkt_dma_addr);
|
||||
|
||||
if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
|
||||
expected_ack_val = queue->pi;
|
||||
|
@ -663,17 +667,15 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
|
|||
hdev->event_queue.check_eqe_index = false;
|
||||
|
||||
/* Read FW application security bits again */
|
||||
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid) {
|
||||
hdev->asic_prop.fw_app_cpu_boot_dev_sts0 =
|
||||
RREG32(sts_boot_dev_sts0_reg);
|
||||
if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
|
||||
if (prop->fw_cpu_boot_dev_sts0_valid) {
|
||||
prop->fw_app_cpu_boot_dev_sts0 = RREG32(sts_boot_dev_sts0_reg);
|
||||
if (prop->fw_app_cpu_boot_dev_sts0 &
|
||||
CPU_BOOT_DEV_STS0_EQ_INDEX_EN)
|
||||
hdev->event_queue.check_eqe_index = true;
|
||||
}
|
||||
|
||||
if (hdev->asic_prop.fw_cpu_boot_dev_sts1_valid)
|
||||
hdev->asic_prop.fw_app_cpu_boot_dev_sts1 =
|
||||
RREG32(sts_boot_dev_sts1_reg);
|
||||
if (prop->fw_cpu_boot_dev_sts1_valid)
|
||||
prop->fw_app_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg);
|
||||
|
||||
out:
|
||||
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
|
||||
|
@ -1008,6 +1010,11 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
|
|||
} else {
|
||||
WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
|
||||
msleep(static_loader->cpu_reset_wait_msec);
|
||||
|
||||
/* Must clear this register in order to prevent preboot
|
||||
* from reading WFE after reboot
|
||||
*/
|
||||
WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_NA);
|
||||
}
|
||||
|
||||
hdev->device_cpu_is_halted = true;
|
||||
|
@ -1055,6 +1062,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
|
|||
dev_err(hdev->dev,
|
||||
"Device boot progress - Thermal Sensor initialization failed\n");
|
||||
break;
|
||||
case CPU_BOOT_STATUS_SECURITY_READY:
|
||||
dev_err(hdev->dev,
|
||||
"Device boot progress - Stuck in preboot after security initialization\n");
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev,
|
||||
"Device boot progress - Invalid status code %d\n",
|
||||
|
@ -1238,11 +1249,6 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
|
|||
* b. Check whether hard reset is done by boot cpu
|
||||
* 3. FW application - a. Fetch fw application security status
|
||||
* b. Check whether hard reset is done by fw app
|
||||
*
|
||||
* Preboot:
|
||||
* Check security status bit (CPU_BOOT_DEV_STS0_ENABLED). If set, then-
|
||||
* check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
|
||||
* If set, then mark GIC controller to be disabled.
|
||||
*/
|
||||
prop->hard_reset_done_by_fw =
|
||||
!!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
|
||||
|
@ -1953,8 +1959,8 @@ static void hl_fw_dynamic_update_linux_interrupt_if(struct hl_device *hdev)
|
|||
if (!hdev->asic_prop.gic_interrupts_enable &&
|
||||
!(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
|
||||
CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) {
|
||||
dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_irq_ctrl;
|
||||
dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_irq_ctrl;
|
||||
dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_pi_upd_irq;
|
||||
dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_pi_upd_irq;
|
||||
|
||||
dev_warn(hdev->dev,
|
||||
"Using a single interrupt interface towards cpucp");
|
||||
|
@ -2122,8 +2128,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
|
|||
|
||||
/* Read FW application security bits */
|
||||
if (prop->fw_cpu_boot_dev_sts0_valid) {
|
||||
prop->fw_app_cpu_boot_dev_sts0 =
|
||||
RREG32(cpu_boot_dev_sts0_reg);
|
||||
prop->fw_app_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
|
||||
|
||||
if (prop->fw_app_cpu_boot_dev_sts0 &
|
||||
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
|
||||
|
@ -2143,8 +2148,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
|
|||
}
|
||||
|
||||
if (prop->fw_cpu_boot_dev_sts1_valid) {
|
||||
prop->fw_app_cpu_boot_dev_sts1 =
|
||||
RREG32(cpu_boot_dev_sts1_reg);
|
||||
prop->fw_app_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg);
|
||||
|
||||
dev_dbg(hdev->dev,
|
||||
"Firmware application CPU status1 %#x\n",
|
||||
|
@ -2235,6 +2239,10 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
|
|||
dev_info(hdev->dev,
|
||||
"Loading firmware to device, may take some time...\n");
|
||||
|
||||
/*
|
||||
* In this stage, "cpu_dyn_regs" contains only LKD's hard coded values!
|
||||
* It will be updated from FW after hl_fw_dynamic_request_descriptor().
|
||||
*/
|
||||
dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
|
||||
|
||||
rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <linux/scatterlist.h>
|
||||
#include <linux/hashtable.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/genalloc.h>
|
||||
#include <linux/sched/signal.h>
|
||||
|
@ -65,6 +66,11 @@
|
|||
|
||||
#define HL_COMMON_USER_INTERRUPT_ID 0xFFF
|
||||
|
||||
#define HL_STATE_DUMP_HIST_LEN 5
|
||||
|
||||
#define OBJ_NAMES_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
||||
#define SYNC_TO_ENGINE_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
||||
|
||||
/* Memory */
|
||||
#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
||||
|
||||
|
@ -122,12 +128,17 @@ enum hl_mmu_page_table_location {
|
|||
*
|
||||
* - HL_RESET_DEVICE_RELEASE
|
||||
* Set if reset is due to device release
|
||||
*
|
||||
* - HL_RESET_FW
|
||||
* F/W will perform the reset. No need to ask it to reset the device. This is relevant
|
||||
* only when running with secured f/w
|
||||
*/
|
||||
#define HL_RESET_HARD (1 << 0)
|
||||
#define HL_RESET_FROM_RESET_THREAD (1 << 1)
|
||||
#define HL_RESET_HEARTBEAT (1 << 2)
|
||||
#define HL_RESET_TDR (1 << 3)
|
||||
#define HL_RESET_DEVICE_RELEASE (1 << 4)
|
||||
#define HL_RESET_FW (1 << 5)
|
||||
|
||||
#define HL_MAX_SOBS_PER_MONITOR 8
|
||||
|
||||
|
@ -236,7 +247,9 @@ enum hl_cs_type {
|
|||
CS_TYPE_DEFAULT,
|
||||
CS_TYPE_SIGNAL,
|
||||
CS_TYPE_WAIT,
|
||||
CS_TYPE_COLLECTIVE_WAIT
|
||||
CS_TYPE_COLLECTIVE_WAIT,
|
||||
CS_RESERVE_SIGNALS,
|
||||
CS_UNRESERVE_SIGNALS
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -281,13 +294,17 @@ enum queue_cb_alloc_flags {
|
|||
* @hdev: habanalabs device structure.
|
||||
* @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
|
||||
* @sob_id: id of this SOB.
|
||||
* @sob_addr: the sob offset from the base address.
|
||||
* @q_idx: the H/W queue that uses this SOB.
|
||||
* @need_reset: reset indication set when switching to the other sob.
|
||||
*/
|
||||
struct hl_hw_sob {
|
||||
struct hl_device *hdev;
|
||||
struct kref kref;
|
||||
u32 sob_id;
|
||||
u32 sob_addr;
|
||||
u32 q_idx;
|
||||
bool need_reset;
|
||||
};
|
||||
|
||||
enum hl_collective_mode {
|
||||
|
@ -317,11 +334,11 @@ struct hw_queue_properties {
|
|||
};
|
||||
|
||||
/**
|
||||
* enum vm_type_t - virtual memory mapping request information.
|
||||
* enum vm_type - virtual memory mapping request information.
|
||||
* @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
|
||||
* @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
|
||||
*/
|
||||
enum vm_type_t {
|
||||
enum vm_type {
|
||||
VM_TYPE_USERPTR = 0x1,
|
||||
VM_TYPE_PHYS_PACK = 0x2
|
||||
};
|
||||
|
@ -381,6 +398,16 @@ struct hl_mmu_properties {
|
|||
u8 host_resident;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_hints_range - hint addresses reserved va range.
|
||||
* @start_addr: start address of the va range.
|
||||
* @end_addr: end address of the va range.
|
||||
*/
|
||||
struct hl_hints_range {
|
||||
u64 start_addr;
|
||||
u64 end_addr;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct asic_fixed_properties - ASIC specific immutable properties.
|
||||
* @hw_queues_props: H/W queues properties.
|
||||
|
@ -392,6 +419,10 @@ struct hl_mmu_properties {
|
|||
* @pmmu: PCI (host) MMU address translation properties.
|
||||
* @pmmu_huge: PCI (host) MMU address translation properties for memory
|
||||
* allocated with huge pages.
|
||||
* @hints_dram_reserved_va_range: dram hint addresses reserved range.
|
||||
* @hints_host_reserved_va_range: host hint addresses reserved range.
|
||||
* @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved
|
||||
* range.
|
||||
* @sram_base_address: SRAM physical start address.
|
||||
* @sram_end_address: SRAM physical end address.
|
||||
* @sram_user_base_address - SRAM physical start address for user access.
|
||||
|
@ -412,6 +443,10 @@ struct hl_mmu_properties {
|
|||
* to the device's MMU.
|
||||
* @cb_va_end_addr: virtual end address of command buffers which are mapped to
|
||||
* the device's MMU.
|
||||
* @dram_hints_align_mask: dram va hint addresses alignment mask which is used
|
||||
* for hints validity check.
|
||||
* device_dma_offset_for_host_access: the offset to add to host DMA addresses
|
||||
* to enable the device to access them.
|
||||
* @mmu_pgt_size: MMU page tables total size.
|
||||
* @mmu_pte_size: PTE size in MMU page tables.
|
||||
* @mmu_hop_table_size: MMU hop table size.
|
||||
|
@ -459,6 +494,8 @@ struct hl_mmu_properties {
|
|||
* reserved for the user
|
||||
* @first_available_cq: first available CQ for the user.
|
||||
* @user_interrupt_count: number of user interrupts.
|
||||
* @server_type: Server type that the ASIC is currently installed in.
|
||||
* The value is according to enum hl_server_type in uapi file.
|
||||
* @tpc_enabled_mask: which TPCs are enabled.
|
||||
* @completion_queues_count: number of completion queues.
|
||||
* @fw_security_enabled: true if security measures are enabled in firmware,
|
||||
|
@ -470,6 +507,7 @@ struct hl_mmu_properties {
|
|||
* @dram_supports_virtual_memory: is there an MMU towards the DRAM
|
||||
* @hard_reset_done_by_fw: true if firmware is handling hard reset flow
|
||||
* @num_functional_hbms: number of functional HBMs in each DCORE.
|
||||
* @hints_range_reservation: device support hint addresses range reservation.
|
||||
* @iatu_done_by_fw: true if iATU configuration is being done by FW.
|
||||
* @dynamic_fw_load: is dynamic FW load is supported.
|
||||
* @gic_interrupts_enable: true if FW is not blocking GIC controller,
|
||||
|
@ -483,6 +521,9 @@ struct asic_fixed_properties {
|
|||
struct hl_mmu_properties dmmu;
|
||||
struct hl_mmu_properties pmmu;
|
||||
struct hl_mmu_properties pmmu_huge;
|
||||
struct hl_hints_range hints_dram_reserved_va_range;
|
||||
struct hl_hints_range hints_host_reserved_va_range;
|
||||
struct hl_hints_range hints_host_hpage_reserved_va_range;
|
||||
u64 sram_base_address;
|
||||
u64 sram_end_address;
|
||||
u64 sram_user_base_address;
|
||||
|
@ -500,6 +541,8 @@ struct asic_fixed_properties {
|
|||
u64 mmu_dram_default_page_addr;
|
||||
u64 cb_va_start_addr;
|
||||
u64 cb_va_end_addr;
|
||||
u64 dram_hints_align_mask;
|
||||
u64 device_dma_offset_for_host_access;
|
||||
u32 mmu_pgt_size;
|
||||
u32 mmu_pte_size;
|
||||
u32 mmu_hop_table_size;
|
||||
|
@ -534,6 +577,7 @@ struct asic_fixed_properties {
|
|||
u16 first_available_user_msix_interrupt;
|
||||
u16 first_available_cq[HL_MAX_DCORES];
|
||||
u16 user_interrupt_count;
|
||||
u16 server_type;
|
||||
u8 tpc_enabled_mask;
|
||||
u8 completion_queues_count;
|
||||
u8 fw_security_enabled;
|
||||
|
@ -542,6 +586,7 @@ struct asic_fixed_properties {
|
|||
u8 dram_supports_virtual_memory;
|
||||
u8 hard_reset_done_by_fw;
|
||||
u8 num_functional_hbms;
|
||||
u8 hints_range_reservation;
|
||||
u8 iatu_done_by_fw;
|
||||
u8 dynamic_fw_load;
|
||||
u8 gic_interrupts_enable;
|
||||
|
@ -552,40 +597,45 @@ struct asic_fixed_properties {
|
|||
* @completion: fence is implemented using completion
|
||||
* @refcount: refcount for this fence
|
||||
* @cs_sequence: sequence of the corresponding command submission
|
||||
* @stream_master_qid_map: streams masters QID bitmap to represent all streams
|
||||
* masters QIDs that multi cs is waiting on
|
||||
* @error: mark this fence with error
|
||||
* @timestamp: timestamp upon completion
|
||||
*
|
||||
*/
|
||||
struct hl_fence {
|
||||
struct completion completion;
|
||||
struct kref refcount;
|
||||
u64 cs_sequence;
|
||||
u32 stream_master_qid_map;
|
||||
int error;
|
||||
ktime_t timestamp;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_cs_compl - command submission completion object.
|
||||
* @sob_reset_work: workqueue object to run SOB reset flow.
|
||||
* @base_fence: hl fence object.
|
||||
* @lock: spinlock to protect fence.
|
||||
* @hdev: habanalabs device structure.
|
||||
* @hw_sob: the H/W SOB used in this signal/wait CS.
|
||||
* @encaps_sig_hdl: encaps signals hanlder.
|
||||
* @cs_seq: command submission sequence number.
|
||||
* @type: type of the CS - signal/wait.
|
||||
* @sob_val: the SOB value that is used in this signal/wait CS.
|
||||
* @sob_group: the SOB group that is used in this collective wait CS.
|
||||
* @encaps_signals: indication whether it's a completion object of cs with
|
||||
* encaps signals or not.
|
||||
*/
|
||||
struct hl_cs_compl {
|
||||
struct work_struct sob_reset_work;
|
||||
struct hl_fence base_fence;
|
||||
spinlock_t lock;
|
||||
struct hl_device *hdev;
|
||||
struct hl_hw_sob *hw_sob;
|
||||
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
|
||||
u64 cs_seq;
|
||||
enum hl_cs_type type;
|
||||
u16 sob_val;
|
||||
u16 sob_group;
|
||||
bool encaps_signals;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -697,6 +747,17 @@ struct hl_sync_stream_properties {
|
|||
u8 curr_sob_offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_encaps_signals_mgr - describes sync stream encapsulated signals
|
||||
* handlers manager
|
||||
* @lock: protects handles.
|
||||
* @handles: an idr to hold all encapsulated signals handles.
|
||||
*/
|
||||
struct hl_encaps_signals_mgr {
|
||||
spinlock_t lock;
|
||||
struct idr handles;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_hw_queue - describes a H/W transport queue.
|
||||
* @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
|
||||
|
@ -875,7 +936,7 @@ struct pci_mem_region {
|
|||
u64 region_base;
|
||||
u64 region_size;
|
||||
u64 bar_size;
|
||||
u32 offset_in_bar;
|
||||
u64 offset_in_bar;
|
||||
u8 bar_id;
|
||||
u8 used;
|
||||
};
|
||||
|
@ -996,7 +1057,7 @@ struct fw_load_mgr {
|
|||
* hw_fini and before CS rollback.
|
||||
* @suspend: handles IP specific H/W or SW changes for suspend.
|
||||
* @resume: handles IP specific H/W or SW changes for resume.
|
||||
* @cb_mmap: maps a CB.
|
||||
* @mmap: maps a memory.
|
||||
* @ring_doorbell: increment PI on a given QMAN.
|
||||
* @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
|
||||
* function because the PQs are located in different memory areas
|
||||
|
@ -1101,6 +1162,10 @@ struct fw_load_mgr {
|
|||
* generic f/w compatible PLL Indexes
|
||||
* @init_firmware_loader: initialize data for FW loader.
|
||||
* @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling
|
||||
* @state_dump_init: initialize constants required for state dump
|
||||
* @get_sob_addr: get SOB base address offset.
|
||||
* @set_pci_memory_regions: setting properties of PCI memory regions
|
||||
* @get_stream_master_qid_arr: get pointer to stream masters QID array
|
||||
*/
|
||||
struct hl_asic_funcs {
|
||||
int (*early_init)(struct hl_device *hdev);
|
||||
|
@ -1110,11 +1175,11 @@ struct hl_asic_funcs {
|
|||
int (*sw_init)(struct hl_device *hdev);
|
||||
int (*sw_fini)(struct hl_device *hdev);
|
||||
int (*hw_init)(struct hl_device *hdev);
|
||||
void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
|
||||
void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
|
||||
void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
|
||||
void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
|
||||
int (*suspend)(struct hl_device *hdev);
|
||||
int (*resume)(struct hl_device *hdev);
|
||||
int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||
int (*mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||
void *cpu_addr, dma_addr_t dma_addr, size_t size);
|
||||
void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
|
||||
void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
|
||||
|
@ -1210,10 +1275,11 @@ struct hl_asic_funcs {
|
|||
void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
|
||||
void (*set_dma_mask_from_fw)(struct hl_device *hdev);
|
||||
u64 (*get_device_time)(struct hl_device *hdev);
|
||||
void (*collective_wait_init_cs)(struct hl_cs *cs);
|
||||
int (*collective_wait_init_cs)(struct hl_cs *cs);
|
||||
int (*collective_wait_create_jobs)(struct hl_device *hdev,
|
||||
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
|
||||
u32 collective_engine_id);
|
||||
struct hl_ctx *ctx, struct hl_cs *cs,
|
||||
u32 wait_queue_id, u32 collective_engine_id,
|
||||
u32 encaps_signal_offset);
|
||||
u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
|
||||
u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
|
||||
void (*ack_protection_bits_errors)(struct hl_device *hdev);
|
||||
|
@ -1226,6 +1292,10 @@ struct hl_asic_funcs {
|
|||
int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
|
||||
void (*init_firmware_loader)(struct hl_device *hdev);
|
||||
void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
|
||||
void (*state_dump_init)(struct hl_device *hdev);
|
||||
u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
|
||||
void (*set_pci_memory_regions)(struct hl_device *hdev);
|
||||
u32* (*get_stream_master_qid_arr)(void);
|
||||
};
|
||||
|
||||
|
||||
|
@ -1282,20 +1352,6 @@ struct hl_cs_counters_atomic {
|
|||
atomic64_t validation_drop_cnt;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_pending_cb - pending command buffer structure
|
||||
* @cb_node: cb node in pending cb list
|
||||
* @cb: command buffer to send in next submission
|
||||
* @cb_size: command buffer size
|
||||
* @hw_queue_id: destination queue id
|
||||
*/
|
||||
struct hl_pending_cb {
|
||||
struct list_head cb_node;
|
||||
struct hl_cb *cb;
|
||||
u32 cb_size;
|
||||
u32 hw_queue_id;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_ctx - user/kernel context.
|
||||
* @mem_hash: holds mapping from virtual address to virtual memory area
|
||||
|
@ -1312,28 +1368,21 @@ struct hl_pending_cb {
|
|||
* MMU hash or walking the PGT requires talking this lock.
|
||||
* @hw_block_list_lock: protects the HW block memory list.
|
||||
* @debugfs_list: node in debugfs list of contexts.
|
||||
* pending_cb_list: list of pending command buffers waiting to be sent upon
|
||||
* next user command submission context.
|
||||
* @hw_block_mem_list: list of HW block virtual mapped addresses.
|
||||
* @cs_counters: context command submission counters.
|
||||
* @cb_va_pool: device VA pool for command buffers which are mapped to the
|
||||
* device's MMU.
|
||||
* @sig_mgr: encaps signals handle manager.
|
||||
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
|
||||
* to user so user could inquire about CS. It is used as
|
||||
* index to cs_pending array.
|
||||
* @dram_default_hops: array that holds all hops addresses needed for default
|
||||
* DRAM mapping.
|
||||
* @pending_cb_lock: spinlock to protect pending cb list
|
||||
* @cs_lock: spinlock to protect cs_sequence.
|
||||
* @dram_phys_mem: amount of used physical DRAM memory by this context.
|
||||
* @thread_ctx_switch_token: token to prevent multiple threads of the same
|
||||
* context from running the context switch phase.
|
||||
* Only a single thread should run it.
|
||||
* @thread_pending_cb_token: token to prevent multiple threads from processing
|
||||
* the pending CB list. Only a single thread should
|
||||
* process the list since it is protected by a
|
||||
* spinlock and we don't want to halt the entire
|
||||
* command submission sequence.
|
||||
* @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
|
||||
* the context switch phase from moving to their
|
||||
* execution phase before the context switch phase
|
||||
|
@ -1353,17 +1402,15 @@ struct hl_ctx {
|
|||
struct mutex mmu_lock;
|
||||
struct mutex hw_block_list_lock;
|
||||
struct list_head debugfs_list;
|
||||
struct list_head pending_cb_list;
|
||||
struct list_head hw_block_mem_list;
|
||||
struct hl_cs_counters_atomic cs_counters;
|
||||
struct gen_pool *cb_va_pool;
|
||||
struct hl_encaps_signals_mgr sig_mgr;
|
||||
u64 cs_sequence;
|
||||
u64 *dram_default_hops;
|
||||
spinlock_t pending_cb_lock;
|
||||
spinlock_t cs_lock;
|
||||
atomic64_t dram_phys_mem;
|
||||
atomic_t thread_ctx_switch_token;
|
||||
atomic_t thread_pending_cb_token;
|
||||
u32 thread_ctx_switch_wait_token;
|
||||
u32 asid;
|
||||
u32 handle;
|
||||
|
@ -1394,20 +1441,22 @@ struct hl_ctx_mgr {
|
|||
* @sgt: pointer to the scatter-gather table that holds the pages.
|
||||
* @dir: for DMA unmapping, the direction must be supplied, so save it.
|
||||
* @debugfs_list: node in debugfs list of command submissions.
|
||||
* @pid: the pid of the user process owning the memory
|
||||
* @addr: user-space virtual address of the start of the memory area.
|
||||
* @size: size of the memory area to pin & map.
|
||||
* @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
|
||||
*/
|
||||
struct hl_userptr {
|
||||
enum vm_type_t vm_type; /* must be first */
|
||||
enum vm_type vm_type; /* must be first */
|
||||
struct list_head job_node;
|
||||
struct page **pages;
|
||||
unsigned int npages;
|
||||
struct sg_table *sgt;
|
||||
enum dma_data_direction dir;
|
||||
struct list_head debugfs_list;
|
||||
pid_t pid;
|
||||
u64 addr;
|
||||
u32 size;
|
||||
u64 size;
|
||||
u8 dma_mapped;
|
||||
};
|
||||
|
||||
|
@ -1426,12 +1475,14 @@ struct hl_userptr {
|
|||
* @mirror_node : node in device mirror list of command submissions.
|
||||
* @staged_cs_node: node in the staged cs list.
|
||||
* @debugfs_list: node in debugfs list of command submissions.
|
||||
* @encaps_sig_hdl: holds the encaps signals handle.
|
||||
* @sequence: the sequence number of this CS.
|
||||
* @staged_sequence: the sequence of the staged submission this CS is part of,
|
||||
* relevant only if staged_cs is set.
|
||||
* @timeout_jiffies: cs timeout in jiffies.
|
||||
* @submission_time_jiffies: submission time of the cs
|
||||
* @type: CS_TYPE_*.
|
||||
* @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
|
||||
* @submitted: true if CS was submitted to H/W.
|
||||
* @completed: true if CS was completed by device.
|
||||
* @timedout : true if CS was timedout.
|
||||
|
@ -1445,6 +1496,7 @@ struct hl_userptr {
|
|||
* @staged_cs: true if this CS is part of a staged submission.
|
||||
* @skip_reset_on_timeout: true if we shall not reset the device in case
|
||||
* timeout occurs (debug scenario).
|
||||
* @encaps_signals: true if this CS has encaps reserved signals.
|
||||
*/
|
||||
struct hl_cs {
|
||||
u16 *jobs_in_queue_cnt;
|
||||
|
@ -1459,11 +1511,13 @@ struct hl_cs {
|
|||
struct list_head mirror_node;
|
||||
struct list_head staged_cs_node;
|
||||
struct list_head debugfs_list;
|
||||
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
|
||||
u64 sequence;
|
||||
u64 staged_sequence;
|
||||
u64 timeout_jiffies;
|
||||
u64 submission_time_jiffies;
|
||||
enum hl_cs_type type;
|
||||
u32 encaps_sig_hdl_id;
|
||||
u8 submitted;
|
||||
u8 completed;
|
||||
u8 timedout;
|
||||
|
@ -1474,6 +1528,7 @@ struct hl_cs {
|
|||
u8 staged_first;
|
||||
u8 staged_cs;
|
||||
u8 skip_reset_on_timeout;
|
||||
u8 encaps_signals;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -1493,6 +1548,8 @@ struct hl_cs {
|
|||
* @hw_queue_id: the id of the H/W queue this job is submitted to.
|
||||
* @user_cb_size: the actual size of the CB we got from the user.
|
||||
* @job_cb_size: the actual size of the CB that we put on the queue.
|
||||
* @encaps_sig_wait_offset: encapsulated signals offset, which allow user
|
||||
* to wait on part of the reserved signals.
|
||||
* @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
|
||||
* handle to a kernel-allocated CB object, false
|
||||
* otherwise (SRAM/DRAM/host address).
|
||||
|
@ -1517,6 +1574,7 @@ struct hl_cs_job {
|
|||
u32 hw_queue_id;
|
||||
u32 user_cb_size;
|
||||
u32 job_cb_size;
|
||||
u32 encaps_sig_wait_offset;
|
||||
u8 is_kernel_allocated_cb;
|
||||
u8 contains_dma_pkt;
|
||||
};
|
||||
|
@ -1613,7 +1671,7 @@ struct hl_vm_hw_block_list_node {
|
|||
* @created_from_userptr: is product of host virtual address.
|
||||
*/
|
||||
struct hl_vm_phys_pg_pack {
|
||||
enum vm_type_t vm_type; /* must be first */
|
||||
enum vm_type vm_type; /* must be first */
|
||||
u64 *pages;
|
||||
u64 npages;
|
||||
u64 total_size;
|
||||
|
@ -1759,9 +1817,13 @@ struct hl_debugfs_entry {
|
|||
* @ctx_mem_hash_list: list of available contexts with MMU mappings.
|
||||
* @ctx_mem_hash_spinlock: protects cb_list.
|
||||
* @blob_desc: descriptor of blob
|
||||
* @state_dump: data of the system states in case of a bad cs.
|
||||
* @state_dump_sem: protects state_dump.
|
||||
* @addr: next address to read/write from/to in read/write32.
|
||||
* @mmu_addr: next virtual address to translate to physical address in mmu_show.
|
||||
* @userptr_lookup: the target user ptr to look up for on demand.
|
||||
* @mmu_asid: ASID to use while translating in mmu_show.
|
||||
* @state_dump_head: index of the latest state dump
|
||||
* @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
|
||||
* @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
|
||||
* @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
|
||||
|
@ -1783,14 +1845,149 @@ struct hl_dbg_device_entry {
|
|||
struct list_head ctx_mem_hash_list;
|
||||
spinlock_t ctx_mem_hash_spinlock;
|
||||
struct debugfs_blob_wrapper blob_desc;
|
||||
char *state_dump[HL_STATE_DUMP_HIST_LEN];
|
||||
struct rw_semaphore state_dump_sem;
|
||||
u64 addr;
|
||||
u64 mmu_addr;
|
||||
u64 userptr_lookup;
|
||||
u32 mmu_asid;
|
||||
u32 state_dump_head;
|
||||
u8 i2c_bus;
|
||||
u8 i2c_addr;
|
||||
u8 i2c_reg;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_hw_obj_name_entry - single hw object name, member of
|
||||
* hl_state_dump_specs
|
||||
* @node: link to the containing hash table
|
||||
* @name: hw object name
|
||||
* @id: object identifier
|
||||
*/
|
||||
struct hl_hw_obj_name_entry {
|
||||
struct hlist_node node;
|
||||
const char *name;
|
||||
u32 id;
|
||||
};
|
||||
|
||||
enum hl_state_dump_specs_props {
|
||||
SP_SYNC_OBJ_BASE_ADDR,
|
||||
SP_NEXT_SYNC_OBJ_ADDR,
|
||||
SP_SYNC_OBJ_AMOUNT,
|
||||
SP_MON_OBJ_WR_ADDR_LOW,
|
||||
SP_MON_OBJ_WR_ADDR_HIGH,
|
||||
SP_MON_OBJ_WR_DATA,
|
||||
SP_MON_OBJ_ARM_DATA,
|
||||
SP_MON_OBJ_STATUS,
|
||||
SP_MONITORS_AMOUNT,
|
||||
SP_TPC0_CMDQ,
|
||||
SP_TPC0_CFG_SO,
|
||||
SP_NEXT_TPC,
|
||||
SP_MME_CMDQ,
|
||||
SP_MME_CFG_SO,
|
||||
SP_NEXT_MME,
|
||||
SP_DMA_CMDQ,
|
||||
SP_DMA_CFG_SO,
|
||||
SP_DMA_QUEUES_OFFSET,
|
||||
SP_NUM_OF_MME_ENGINES,
|
||||
SP_SUB_MME_ENG_NUM,
|
||||
SP_NUM_OF_DMA_ENGINES,
|
||||
SP_NUM_OF_TPC_ENGINES,
|
||||
SP_ENGINE_NUM_OF_QUEUES,
|
||||
SP_ENGINE_NUM_OF_STREAMS,
|
||||
SP_ENGINE_NUM_OF_FENCES,
|
||||
SP_FENCE0_CNT_OFFSET,
|
||||
SP_FENCE0_RDATA_OFFSET,
|
||||
SP_CP_STS_OFFSET,
|
||||
SP_NUM_CORES,
|
||||
|
||||
SP_MAX
|
||||
};
|
||||
|
||||
enum hl_sync_engine_type {
|
||||
ENGINE_TPC,
|
||||
ENGINE_DMA,
|
||||
ENGINE_MME,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_mon_state_dump - represents a state dump of a single monitor
|
||||
* @id: monitor id
|
||||
* @wr_addr_low: address monitor will write to, low bits
|
||||
* @wr_addr_high: address monitor will write to, high bits
|
||||
* @wr_data: data monitor will write
|
||||
* @arm_data: register value containing monitor configuration
|
||||
* @status: monitor status
|
||||
*/
|
||||
struct hl_mon_state_dump {
|
||||
u32 id;
|
||||
u32 wr_addr_low;
|
||||
u32 wr_addr_high;
|
||||
u32 wr_data;
|
||||
u32 arm_data;
|
||||
u32 status;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_sync_to_engine_map_entry - sync object id to engine mapping entry
|
||||
* @engine_type: type of the engine
|
||||
* @engine_id: id of the engine
|
||||
* @sync_id: id of the sync object
|
||||
*/
|
||||
struct hl_sync_to_engine_map_entry {
|
||||
struct hlist_node node;
|
||||
enum hl_sync_engine_type engine_type;
|
||||
u32 engine_id;
|
||||
u32 sync_id;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_sync_to_engine_map - maps sync object id to associated engine id
|
||||
* @tb: hash table containing the mapping, each element is of type
|
||||
* struct hl_sync_to_engine_map_entry
|
||||
*/
|
||||
struct hl_sync_to_engine_map {
|
||||
DECLARE_HASHTABLE(tb, SYNC_TO_ENGINE_HASH_TABLE_BITS);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_state_dump_specs_funcs - virtual functions used by the state dump
|
||||
* @gen_sync_to_engine_map: generate a hash map from sync obj id to its engine
|
||||
* @print_single_monitor: format monitor data as string
|
||||
* @monitor_valid: return true if given monitor dump is valid
|
||||
* @print_fences_single_engine: format fences data as string
|
||||
*/
|
||||
struct hl_state_dump_specs_funcs {
|
||||
int (*gen_sync_to_engine_map)(struct hl_device *hdev,
|
||||
struct hl_sync_to_engine_map *map);
|
||||
int (*print_single_monitor)(char **buf, size_t *size, size_t *offset,
|
||||
struct hl_device *hdev,
|
||||
struct hl_mon_state_dump *mon);
|
||||
int (*monitor_valid)(struct hl_mon_state_dump *mon);
|
||||
int (*print_fences_single_engine)(struct hl_device *hdev,
|
||||
u64 base_offset,
|
||||
u64 status_base_offset,
|
||||
enum hl_sync_engine_type engine_type,
|
||||
u32 engine_id, char **buf,
|
||||
size_t *size, size_t *offset);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_state_dump_specs - defines ASIC known hw objects names
|
||||
* @so_id_to_str_tb: sync objects names index table
|
||||
* @monitor_id_to_str_tb: monitors names index table
|
||||
* @funcs: virtual functions used for state dump
|
||||
* @sync_namager_names: readable names for sync manager if available (ex: N_E)
|
||||
* @props: pointer to a per asic const props array required for state dump
|
||||
*/
|
||||
struct hl_state_dump_specs {
|
||||
DECLARE_HASHTABLE(so_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS);
|
||||
DECLARE_HASHTABLE(monitor_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS);
|
||||
struct hl_state_dump_specs_funcs funcs;
|
||||
const char * const *sync_namager_names;
|
||||
s64 *props;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* DEVICES
|
||||
|
@ -1798,7 +1995,7 @@ struct hl_dbg_device_entry {
|
|||
|
||||
#define HL_STR_MAX 32
|
||||
|
||||
#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
|
||||
#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1)
|
||||
|
||||
/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
|
||||
* x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
|
||||
|
@ -1946,11 +2143,13 @@ struct hwmon_chip_info;
|
|||
* @wq: work queue for device reset procedure.
|
||||
* @reset_work: reset work to be done.
|
||||
* @hdev: habanalabs device structure.
|
||||
* @fw_reset: whether f/w will do the reset without us sending them a message to do it.
|
||||
*/
|
||||
struct hl_device_reset_work {
|
||||
struct workqueue_struct *wq;
|
||||
struct delayed_work reset_work;
|
||||
struct hl_device *hdev;
|
||||
bool fw_reset;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -2064,6 +2263,58 @@ struct hl_mmu_funcs {
|
|||
u64 virt_addr, struct hl_mmu_hop_info *hops);
|
||||
};
|
||||
|
||||
/**
|
||||
* number of user contexts allowed to call wait_for_multi_cs ioctl in
|
||||
* parallel
|
||||
*/
|
||||
#define MULTI_CS_MAX_USER_CTX 2
|
||||
|
||||
/**
|
||||
* struct multi_cs_completion - multi CS wait completion.
|
||||
* @completion: completion of any of the CS in the list
|
||||
* @lock: spinlock for the completion structure
|
||||
* @timestamp: timestamp for the multi-CS completion
|
||||
* @stream_master_qid_map: bitmap of all stream masters on which the multi-CS
|
||||
* is waiting
|
||||
* @used: 1 if in use, otherwise 0
|
||||
*/
|
||||
struct multi_cs_completion {
|
||||
struct completion completion;
|
||||
spinlock_t lock;
|
||||
s64 timestamp;
|
||||
u32 stream_master_qid_map;
|
||||
u8 used;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct multi_cs_data - internal data for multi CS call
|
||||
* @ctx: pointer to the context structure
|
||||
* @fence_arr: array of fences of all CSs
|
||||
* @seq_arr: array of CS sequence numbers
|
||||
* @timeout_us: timeout in usec for waiting for CS to complete
|
||||
* @timestamp: timestamp of first completed CS
|
||||
* @wait_status: wait for CS status
|
||||
* @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
|
||||
* @stream_master_qid_map: bitmap of all stream master QIDs on which the
|
||||
* multi-CS is waiting
|
||||
* @arr_len: fence_arr and seq_arr array length
|
||||
* @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0)
|
||||
* @update_ts: update timestamp. 1- update the timestamp, otherwise 0.
|
||||
*/
|
||||
struct multi_cs_data {
|
||||
struct hl_ctx *ctx;
|
||||
struct hl_fence **fence_arr;
|
||||
u64 *seq_arr;
|
||||
s64 timeout_us;
|
||||
s64 timestamp;
|
||||
long wait_status;
|
||||
u32 completion_bitmap;
|
||||
u32 stream_master_qid_map;
|
||||
u8 arr_len;
|
||||
u8 gone_cs;
|
||||
u8 update_ts;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_device - habanalabs device structure.
|
||||
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
|
||||
|
@ -2129,6 +2380,8 @@ struct hl_mmu_funcs {
|
|||
* @mmu_func: device-related MMU functions.
|
||||
* @fw_loader: FW loader manager.
|
||||
* @pci_mem_region: array of memory regions in the PCI
|
||||
* @state_dump_specs: constants and dictionaries needed to dump system state.
|
||||
* @multi_cs_completion: array of multi-CS completion.
|
||||
* @dram_used_mem: current DRAM memory consumption.
|
||||
* @timeout_jiffies: device CS timeout value.
|
||||
* @max_power: the max power of the device, as configured by the sysadmin. This
|
||||
|
@ -2205,6 +2458,7 @@ struct hl_mmu_funcs {
|
|||
* halted. We can't halt it again because the COMMS
|
||||
* protocol will throw an error. Relevant only for
|
||||
* cases where Linux was not loaded to device CPU
|
||||
* @supports_wait_for_multi_cs: true if wait for multi CS is supported
|
||||
*/
|
||||
struct hl_device {
|
||||
struct pci_dev *pdev;
|
||||
|
@ -2273,6 +2527,11 @@ struct hl_device {
|
|||
|
||||
struct pci_mem_region pci_mem_region[PCI_REGION_NUMBER];
|
||||
|
||||
struct hl_state_dump_specs state_dump_specs;
|
||||
|
||||
struct multi_cs_completion multi_cs_completion[
|
||||
MULTI_CS_MAX_USER_CTX];
|
||||
u32 *stream_master_qid_arr;
|
||||
atomic64_t dram_used_mem;
|
||||
u64 timeout_jiffies;
|
||||
u64 max_power;
|
||||
|
@ -2322,6 +2581,8 @@ struct hl_device {
|
|||
u8 curr_reset_cause;
|
||||
u8 skip_reset_on_timeout;
|
||||
u8 device_cpu_is_halted;
|
||||
u8 supports_wait_for_multi_cs;
|
||||
u8 stream_master_qid_arr_size;
|
||||
|
||||
/* Parameters for bring-up */
|
||||
u64 nic_ports_mask;
|
||||
|
@ -2343,6 +2604,29 @@ struct hl_device {
|
|||
};
|
||||
|
||||
|
||||
/**
|
||||
* struct hl_cs_encaps_sig_handle - encapsulated signals handle structure
|
||||
* @refcount: refcount used to protect removing this id when several
|
||||
* wait cs are used to wait of the reserved encaps signals.
|
||||
* @hdev: pointer to habanalabs device structure.
|
||||
* @hw_sob: pointer to H/W SOB used in the reservation.
|
||||
* @cs_seq: staged cs sequence which contains encapsulated signals
|
||||
* @id: idr handler id to be used to fetch the handler info
|
||||
* @q_idx: stream queue index
|
||||
* @pre_sob_val: current SOB value before reservation
|
||||
* @count: signals number
|
||||
*/
|
||||
struct hl_cs_encaps_sig_handle {
|
||||
struct kref refcount;
|
||||
struct hl_device *hdev;
|
||||
struct hl_hw_sob *hw_sob;
|
||||
u64 cs_seq;
|
||||
u32 id;
|
||||
u32 q_idx;
|
||||
u32 pre_sob_val;
|
||||
u32 count;
|
||||
};
|
||||
|
||||
/*
|
||||
* IOCTLs
|
||||
*/
|
||||
|
@ -2372,6 +2656,23 @@ struct hl_ioctl_desc {
|
|||
* Kernel module functions that can be accessed by entire module
|
||||
*/
|
||||
|
||||
/**
|
||||
* hl_get_sg_info() - get number of pages and the DMA address from SG list.
|
||||
* @sg: the SG list.
|
||||
* @dma_addr: pointer to DMA address to return.
|
||||
*
|
||||
* Calculate the number of consecutive pages described by the SG list. Take the
|
||||
* offset of the address in the first page, add to it the length and round it up
|
||||
* to the number of needed pages.
|
||||
*/
|
||||
static inline u32 hl_get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
|
||||
{
|
||||
*dma_addr = sg_dma_address(sg);
|
||||
|
||||
return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
|
||||
(PAGE_SIZE - 1)) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_mem_area_inside_range() - Checks whether address+size are inside a range.
|
||||
* @address: The start address of the area we want to validate.
|
||||
|
@ -2436,7 +2737,9 @@ void destroy_hdev(struct hl_device *hdev);
|
|||
int hl_hw_queues_create(struct hl_device *hdev);
|
||||
void hl_hw_queues_destroy(struct hl_device *hdev);
|
||||
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
||||
u32 cb_size, u64 cb_ptr);
|
||||
u32 cb_size, u64 cb_ptr);
|
||||
void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
|
||||
u32 ctl, u32 len, u64 ptr);
|
||||
int hl_hw_queue_schedule_cs(struct hl_cs *cs);
|
||||
u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
|
||||
void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
|
||||
|
@ -2470,6 +2773,8 @@ void hl_ctx_do_release(struct kref *ref);
|
|||
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||
int hl_ctx_put(struct hl_ctx *ctx);
|
||||
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
|
||||
int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
|
||||
struct hl_fence **fence, u32 arr_len);
|
||||
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
|
||||
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
|
||||
|
||||
|
@ -2511,18 +2816,19 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx);
|
|||
void hl_cb_va_pool_fini(struct hl_ctx *ctx);
|
||||
|
||||
void hl_cs_rollback_all(struct hl_device *hdev);
|
||||
void hl_pending_cb_list_flush(struct hl_ctx *ctx);
|
||||
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
|
||||
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
|
||||
void hl_sob_reset_error(struct kref *ref);
|
||||
int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
|
||||
void hl_fence_put(struct hl_fence *fence);
|
||||
void hl_fences_put(struct hl_fence **fence, int len);
|
||||
void hl_fence_get(struct hl_fence *fence);
|
||||
void cs_get(struct hl_cs *cs);
|
||||
bool cs_needs_completion(struct hl_cs *cs);
|
||||
bool cs_needs_timeout(struct hl_cs *cs);
|
||||
bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
|
||||
struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
|
||||
void hl_multi_cs_completion_init(struct hl_device *hdev);
|
||||
|
||||
void goya_set_asic_funcs(struct hl_device *hdev);
|
||||
void gaudi_set_asic_funcs(struct hl_device *hdev);
|
||||
|
@ -2650,9 +2956,25 @@ int hl_set_voltage(struct hl_device *hdev,
|
|||
int sensor_index, u32 attr, long value);
|
||||
int hl_set_current(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value);
|
||||
void hw_sob_get(struct hl_hw_sob *hw_sob);
|
||||
void hw_sob_put(struct hl_hw_sob *hw_sob);
|
||||
void hl_encaps_handle_do_release(struct kref *ref);
|
||||
void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
|
||||
struct hl_cs *cs, struct hl_cs_job *job,
|
||||
struct hl_cs_compl *cs_cmpl);
|
||||
void hl_release_pending_user_interrupts(struct hl_device *hdev);
|
||||
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
|
||||
struct hl_hw_sob **hw_sob, u32 count);
|
||||
struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig);
|
||||
|
||||
int hl_state_dump(struct hl_device *hdev);
|
||||
const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id);
|
||||
const char *hl_state_dump_get_monitor_name(struct hl_device *hdev,
|
||||
struct hl_mon_state_dump *mon);
|
||||
void hl_state_dump_free_sync_to_engine_map(struct hl_sync_to_engine_map *map);
|
||||
__printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
|
||||
const char *format, ...);
|
||||
char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
|
||||
const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
|
@ -2673,6 +2995,8 @@ void hl_debugfs_remove_userptr(struct hl_device *hdev,
|
|||
struct hl_userptr *userptr);
|
||||
void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||
void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
|
||||
void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
|
||||
unsigned long length);
|
||||
|
||||
#else
|
||||
|
||||
|
@ -2746,6 +3070,11 @@ static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
|
|||
{
|
||||
}
|
||||
|
||||
static inline void hl_debugfs_set_state_dump(struct hl_device *hdev,
|
||||
char *data, unsigned long length)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* IOCTLs */
|
||||
|
|
|
@ -141,7 +141,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
|||
hl_cb_mgr_init(&hpriv->cb_mgr);
|
||||
hl_ctx_mgr_init(&hpriv->ctx_mgr);
|
||||
|
||||
hpriv->taskpid = find_get_pid(current->pid);
|
||||
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
|
||||
|
||||
mutex_lock(&hdev->fpriv_list_lock);
|
||||
|
||||
|
@ -194,7 +194,6 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
|||
|
||||
out_err:
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
|
||||
hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
|
||||
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
|
||||
filp->private_data = NULL;
|
||||
|
@ -318,12 +317,16 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
|||
hdev->asic_prop.fw_security_enabled = false;
|
||||
|
||||
/* Assign status description string */
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
|
||||
"disabled", HL_STR_MAX);
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL],
|
||||
"operational", HL_STR_MAX);
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
|
||||
"in reset", HL_STR_MAX);
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
|
||||
"disabled", HL_STR_MAX);
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
|
||||
"needs reset", HL_STR_MAX);
|
||||
strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
|
||||
"in device creation", HL_STR_MAX);
|
||||
|
||||
hdev->major = hl_major;
|
||||
hdev->reset_on_lockup = reset_on_lockup;
|
||||
|
@ -532,7 +535,7 @@ hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
|
|||
result = PCI_ERS_RESULT_NONE;
|
||||
}
|
||||
|
||||
hdev->asic_funcs->halt_engines(hdev, true);
|
||||
hdev->asic_funcs->halt_engines(hdev, true, false);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -94,6 +94,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
|
|||
|
||||
hw_ip.first_available_interrupt_id =
|
||||
prop->first_available_user_msix_interrupt;
|
||||
hw_ip.server_type = prop->server_type;
|
||||
|
||||
return copy_to_user(out, &hw_ip,
|
||||
min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
|
||||
}
|
||||
|
|
|
@ -65,7 +65,7 @@ void hl_hw_queue_update_ci(struct hl_cs *cs)
|
|||
}
|
||||
|
||||
/*
|
||||
* ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
|
||||
* hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
|
||||
* H/W queue.
|
||||
* @hdev: pointer to habanalabs device structure
|
||||
* @q: pointer to habanalabs queue structure
|
||||
|
@ -80,8 +80,8 @@ void hl_hw_queue_update_ci(struct hl_cs *cs)
|
|||
* This function must be called when the scheduler mutex is taken
|
||||
*
|
||||
*/
|
||||
static void ext_and_hw_queue_submit_bd(struct hl_device *hdev,
|
||||
struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr)
|
||||
void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
|
||||
u32 ctl, u32 len, u64 ptr)
|
||||
{
|
||||
struct hl_bd *bd;
|
||||
|
||||
|
@ -222,8 +222,8 @@ static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
|
|||
* @cb_size: size of CB
|
||||
* @cb_ptr: pointer to CB location
|
||||
*
|
||||
* This function sends a single CB, that must NOT generate a completion entry
|
||||
*
|
||||
* This function sends a single CB, that must NOT generate a completion entry.
|
||||
* Sending CPU messages can be done instead via 'hl_hw_queue_submit_bd()'
|
||||
*/
|
||||
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
||||
u32 cb_size, u64 cb_ptr)
|
||||
|
@ -231,16 +231,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
|||
struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
|
||||
int rc = 0;
|
||||
|
||||
/*
|
||||
* The CPU queue is a synchronous queue with an effective depth of
|
||||
* a single entry (although it is allocated with room for multiple
|
||||
* entries). Therefore, there is a different lock, called
|
||||
* send_cpu_message_lock, that serializes accesses to the CPU queue.
|
||||
* As a result, we don't need to lock the access to the entire H/W
|
||||
* queues module when submitting a JOB to the CPU queue
|
||||
*/
|
||||
if (q->queue_type != QUEUE_TYPE_CPU)
|
||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||
|
||||
if (hdev->disabled) {
|
||||
rc = -EPERM;
|
||||
|
@ -258,11 +249,10 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
|||
goto out;
|
||||
}
|
||||
|
||||
ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
|
||||
hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
|
||||
|
||||
out:
|
||||
if (q->queue_type != QUEUE_TYPE_CPU)
|
||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -328,7 +318,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
|
|||
cq->pi = hl_cq_inc_ptr(cq->pi);
|
||||
|
||||
submit_bd:
|
||||
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
|
||||
hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -407,7 +397,7 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
|
|||
else
|
||||
ptr = (u64) (uintptr_t) job->user_cb;
|
||||
|
||||
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
|
||||
hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
|
||||
}
|
||||
|
||||
static int init_signal_cs(struct hl_device *hdev,
|
||||
|
@ -426,8 +416,9 @@ static int init_signal_cs(struct hl_device *hdev,
|
|||
cs_cmpl->sob_val = prop->next_sob_val;
|
||||
|
||||
dev_dbg(hdev->dev,
|
||||
"generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
|
||||
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
|
||||
"generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n",
|
||||
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx,
|
||||
cs_cmpl->cs_seq);
|
||||
|
||||
/* we set an EB since we must make sure all oeprations are done
|
||||
* when sending the signal
|
||||
|
@ -435,17 +426,37 @@ static int init_signal_cs(struct hl_device *hdev,
|
|||
hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
|
||||
cs_cmpl->hw_sob->sob_id, 0, true);
|
||||
|
||||
rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1);
|
||||
rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1,
|
||||
false);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
|
||||
void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
|
||||
struct hl_cs *cs, struct hl_cs_job *job,
|
||||
struct hl_cs_compl *cs_cmpl)
|
||||
{
|
||||
struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
|
||||
|
||||
cs_cmpl->hw_sob = handle->hw_sob;
|
||||
|
||||
/* Note that encaps_sig_wait_offset was validated earlier in the flow
|
||||
* for offset value which exceeds the max reserved signal count.
|
||||
* always decrement 1 of the offset since when the user
|
||||
* set offset 1 for example he mean to wait only for the first
|
||||
* signal only, which will be pre_sob_val, and if he set offset 2
|
||||
* then the value required is (pre_sob_val + 1) and so on...
|
||||
*/
|
||||
cs_cmpl->sob_val = handle->pre_sob_val +
|
||||
(job->encaps_sig_wait_offset - 1);
|
||||
}
|
||||
|
||||
static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
|
||||
struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
|
||||
{
|
||||
struct hl_cs_compl *signal_cs_cmpl;
|
||||
struct hl_sync_stream_properties *prop;
|
||||
struct hl_gen_wait_properties wait_prop;
|
||||
struct hl_sync_stream_properties *prop;
|
||||
struct hl_cs_compl *signal_cs_cmpl;
|
||||
u32 q_idx;
|
||||
|
||||
q_idx = job->hw_queue_id;
|
||||
|
@ -455,14 +466,51 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
|
|||
struct hl_cs_compl,
|
||||
base_fence);
|
||||
|
||||
/* copy the SOB id and value of the signal CS */
|
||||
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
|
||||
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
|
||||
if (cs->encaps_signals) {
|
||||
/* use the encaps signal handle stored earlier in the flow
|
||||
* and set the SOB information from the encaps
|
||||
* signals handle
|
||||
*/
|
||||
hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl);
|
||||
|
||||
dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n",
|
||||
cs->encaps_sig_hdl->q_idx,
|
||||
cs->encaps_sig_hdl->cs_seq,
|
||||
cs_cmpl->sob_val,
|
||||
job->encaps_sig_wait_offset);
|
||||
} else {
|
||||
/* Copy the SOB id and value of the signal CS */
|
||||
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
|
||||
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
|
||||
}
|
||||
|
||||
/* check again if the signal cs already completed.
|
||||
* if yes then don't send any wait cs since the hw_sob
|
||||
* could be in reset already. if signal is not completed
|
||||
* then get refcount to hw_sob to prevent resetting the sob
|
||||
* while wait cs is not submitted.
|
||||
* note that this check is protected by two locks,
|
||||
* hw queue lock and completion object lock,
|
||||
* and the same completion object lock also protects
|
||||
* the hw_sob reset handler function.
|
||||
* The hw_queue lock prevent out of sync of hw_sob
|
||||
* refcount value, changed by signal/wait flows.
|
||||
*/
|
||||
spin_lock(&signal_cs_cmpl->lock);
|
||||
|
||||
if (completion_done(&cs->signal_fence->completion)) {
|
||||
spin_unlock(&signal_cs_cmpl->lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
kref_get(&cs_cmpl->hw_sob->kref);
|
||||
|
||||
spin_unlock(&signal_cs_cmpl->lock);
|
||||
|
||||
dev_dbg(hdev->dev,
|
||||
"generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
|
||||
"generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n",
|
||||
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
|
||||
prop->base_mon_id, q_idx);
|
||||
prop->base_mon_id, q_idx, cs->sequence);
|
||||
|
||||
wait_prop.data = (void *) job->patched_cb;
|
||||
wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
|
||||
|
@ -471,17 +519,14 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
|
|||
wait_prop.mon_id = prop->base_mon_id;
|
||||
wait_prop.q_idx = q_idx;
|
||||
wait_prop.size = 0;
|
||||
|
||||
hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
|
||||
|
||||
kref_get(&cs_cmpl->hw_sob->kref);
|
||||
/*
|
||||
* Must put the signal fence after the SOB refcnt increment so
|
||||
* the SOB refcnt won't turn 0 and reset the SOB before the
|
||||
* wait CS was submitted.
|
||||
*/
|
||||
mb();
|
||||
hl_fence_put(cs->signal_fence);
|
||||
cs->signal_fence = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -506,7 +551,60 @@ static int init_signal_wait_cs(struct hl_cs *cs)
|
|||
if (cs->type & CS_TYPE_SIGNAL)
|
||||
rc = init_signal_cs(hdev, job, cs_cmpl);
|
||||
else if (cs->type & CS_TYPE_WAIT)
|
||||
init_wait_cs(hdev, cs, job, cs_cmpl);
|
||||
rc = init_wait_cs(hdev, cs, job, cs_cmpl);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int encaps_sig_first_staged_cs_handler
|
||||
(struct hl_device *hdev, struct hl_cs *cs)
|
||||
{
|
||||
struct hl_cs_compl *cs_cmpl =
|
||||
container_of(cs->fence,
|
||||
struct hl_cs_compl, base_fence);
|
||||
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
|
||||
struct hl_encaps_signals_mgr *mgr;
|
||||
int rc = 0;
|
||||
|
||||
mgr = &hdev->compute_ctx->sig_mgr;
|
||||
|
||||
spin_lock(&mgr->lock);
|
||||
encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id);
|
||||
if (encaps_sig_hdl) {
|
||||
/*
|
||||
* Set handler CS sequence,
|
||||
* the CS which contains the encapsulated signals.
|
||||
*/
|
||||
encaps_sig_hdl->cs_seq = cs->sequence;
|
||||
/* store the handle and set encaps signal indication,
|
||||
* to be used later in cs_do_release to put the last
|
||||
* reference to encaps signals handlers.
|
||||
*/
|
||||
cs_cmpl->encaps_signals = true;
|
||||
cs_cmpl->encaps_sig_hdl = encaps_sig_hdl;
|
||||
|
||||
/* set hw_sob pointer in completion object
|
||||
* since it's used in cs_do_release flow to put
|
||||
* refcount to sob
|
||||
*/
|
||||
cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob;
|
||||
cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val +
|
||||
encaps_sig_hdl->count;
|
||||
|
||||
dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n",
|
||||
cs->sequence, encaps_sig_hdl->id,
|
||||
encaps_sig_hdl->count,
|
||||
encaps_sig_hdl->q_idx,
|
||||
cs_cmpl->hw_sob->sob_id,
|
||||
cs_cmpl->sob_val);
|
||||
|
||||
} else {
|
||||
dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n",
|
||||
cs->encaps_sig_hdl_id);
|
||||
rc = -EINVAL;
|
||||
}
|
||||
|
||||
spin_unlock(&mgr->lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -581,14 +679,21 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
|
|||
|
||||
if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
|
||||
rc = init_signal_wait_cs(cs);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to submit signal cs\n");
|
||||
if (rc)
|
||||
goto unroll_cq_resv;
|
||||
}
|
||||
} else if (cs->type == CS_TYPE_COLLECTIVE_WAIT)
|
||||
hdev->asic_funcs->collective_wait_init_cs(cs);
|
||||
} else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) {
|
||||
rc = hdev->asic_funcs->collective_wait_init_cs(cs);
|
||||
if (rc)
|
||||
goto unroll_cq_resv;
|
||||
}
|
||||
|
||||
|
||||
if (cs->encaps_signals && cs->staged_first) {
|
||||
rc = encaps_sig_first_staged_cs_handler(hdev, cs);
|
||||
if (rc)
|
||||
goto unroll_cq_resv;
|
||||
}
|
||||
|
||||
spin_lock(&hdev->cs_mirror_lock);
|
||||
|
||||
/* Verify staged CS exists and add to the staged list */
|
||||
|
@ -613,6 +718,11 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
|
|||
}
|
||||
|
||||
list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
|
||||
|
||||
/* update stream map of the first CS */
|
||||
if (hdev->supports_wait_for_multi_cs)
|
||||
staged_cs->fence->stream_master_qid_map |=
|
||||
cs->fence->stream_master_qid_map;
|
||||
}
|
||||
|
||||
list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
|
||||
|
@ -834,6 +944,8 @@ static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
|
|||
hw_sob = &sync_stream_prop->hw_sob[sob];
|
||||
hw_sob->hdev = hdev;
|
||||
hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
|
||||
hw_sob->sob_addr =
|
||||
hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
|
||||
hw_sob->q_idx = q_idx;
|
||||
kref_init(&hw_sob->kref);
|
||||
}
|
||||
|
|
|
@ -124,7 +124,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
|
||||
spin_lock(&vm->idr_lock);
|
||||
handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
|
||||
GFP_KERNEL);
|
||||
GFP_ATOMIC);
|
||||
spin_unlock(&vm->idr_lock);
|
||||
|
||||
if (handle < 0) {
|
||||
|
@ -528,6 +528,33 @@ static inline int add_va_block(struct hl_device *hdev,
|
|||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* is_hint_crossing_range() - check if hint address crossing specified reserved
|
||||
* range.
|
||||
*/
|
||||
static inline bool is_hint_crossing_range(enum hl_va_range_type range_type,
|
||||
u64 start_addr, u32 size, struct asic_fixed_properties *prop) {
|
||||
bool range_cross;
|
||||
|
||||
if (range_type == HL_VA_RANGE_TYPE_DRAM)
|
||||
range_cross =
|
||||
hl_mem_area_crosses_range(start_addr, size,
|
||||
prop->hints_dram_reserved_va_range.start_addr,
|
||||
prop->hints_dram_reserved_va_range.end_addr);
|
||||
else if (range_type == HL_VA_RANGE_TYPE_HOST)
|
||||
range_cross =
|
||||
hl_mem_area_crosses_range(start_addr, size,
|
||||
prop->hints_host_reserved_va_range.start_addr,
|
||||
prop->hints_host_reserved_va_range.end_addr);
|
||||
else
|
||||
range_cross =
|
||||
hl_mem_area_crosses_range(start_addr, size,
|
||||
prop->hints_host_hpage_reserved_va_range.start_addr,
|
||||
prop->hints_host_hpage_reserved_va_range.end_addr);
|
||||
|
||||
return range_cross;
|
||||
}
|
||||
|
||||
/**
|
||||
* get_va_block() - get a virtual block for the given size and alignment.
|
||||
*
|
||||
|
@ -536,6 +563,8 @@ static inline int add_va_block(struct hl_device *hdev,
|
|||
* @size: requested block size.
|
||||
* @hint_addr: hint for requested address by the user.
|
||||
* @va_block_align: required alignment of the virtual block start address.
|
||||
* @range_type: va range type (host, dram)
|
||||
* @flags: additional memory flags, currently only uses HL_MEM_FORCE_HINT
|
||||
*
|
||||
* This function does the following:
|
||||
* - Iterate on the virtual block list to find a suitable virtual block for the
|
||||
|
@ -545,13 +574,19 @@ static inline int add_va_block(struct hl_device *hdev,
|
|||
*/
|
||||
static u64 get_va_block(struct hl_device *hdev,
|
||||
struct hl_va_range *va_range,
|
||||
u64 size, u64 hint_addr, u32 va_block_align)
|
||||
u64 size, u64 hint_addr, u32 va_block_align,
|
||||
enum hl_va_range_type range_type,
|
||||
u32 flags)
|
||||
{
|
||||
struct hl_vm_va_block *va_block, *new_va_block = NULL;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
|
||||
align_mask, reserved_valid_start = 0, reserved_valid_size = 0;
|
||||
align_mask, reserved_valid_start = 0, reserved_valid_size = 0,
|
||||
dram_hint_mask = prop->dram_hints_align_mask;
|
||||
bool add_prev = false;
|
||||
bool is_align_pow_2 = is_power_of_2(va_range->page_size);
|
||||
bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr);
|
||||
bool force_hint = flags & HL_MEM_FORCE_HINT;
|
||||
|
||||
if (is_align_pow_2)
|
||||
align_mask = ~((u64)va_block_align - 1);
|
||||
|
@ -564,12 +599,20 @@ static u64 get_va_block(struct hl_device *hdev,
|
|||
size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
|
||||
va_range->page_size;
|
||||
|
||||
tmp_hint_addr = hint_addr;
|
||||
tmp_hint_addr = hint_addr & ~dram_hint_mask;
|
||||
|
||||
/* Check if we need to ignore hint address */
|
||||
if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
|
||||
(!is_align_pow_2 &&
|
||||
do_div(tmp_hint_addr, va_range->page_size))) {
|
||||
(!is_align_pow_2 && is_hint_dram_addr &&
|
||||
do_div(tmp_hint_addr, va_range->page_size))) {
|
||||
|
||||
if (force_hint) {
|
||||
/* Hint must be respected, so here we just fail */
|
||||
dev_err(hdev->dev,
|
||||
"Hint address 0x%llx is not page aligned - cannot be respected\n",
|
||||
hint_addr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
dev_dbg(hdev->dev,
|
||||
"Hint address 0x%llx will be ignored because it is not aligned\n",
|
||||
|
@ -596,6 +639,16 @@ static u64 get_va_block(struct hl_device *hdev,
|
|||
if (valid_size < size)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* In case hint address is 0, and arc_hints_range_reservation
|
||||
* property enabled, then avoid allocating va blocks from the
|
||||
* range reserved for hint addresses
|
||||
*/
|
||||
if (prop->hints_range_reservation && !hint_addr)
|
||||
if (is_hint_crossing_range(range_type, valid_start,
|
||||
size, prop))
|
||||
continue;
|
||||
|
||||
/* Pick the minimal length block which has the required size */
|
||||
if (!new_va_block || (valid_size < reserved_valid_size)) {
|
||||
new_va_block = va_block;
|
||||
|
@ -618,6 +671,17 @@ static u64 get_va_block(struct hl_device *hdev,
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (force_hint && reserved_valid_start != hint_addr) {
|
||||
/* Hint address must be respected. If we are here - this means
|
||||
* we could not respect it.
|
||||
*/
|
||||
dev_err(hdev->dev,
|
||||
"Hint address 0x%llx could not be respected\n",
|
||||
hint_addr);
|
||||
reserved_valid_start = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if there is some leftover range due to reserving the new
|
||||
* va block, then return it to the main virtual addresses list.
|
||||
|
@ -670,7 +734,8 @@ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
|
|||
enum hl_va_range_type type, u32 size, u32 alignment)
|
||||
{
|
||||
return get_va_block(hdev, ctx->va_range[type], size, 0,
|
||||
max(alignment, ctx->va_range[type]->page_size));
|
||||
max(alignment, ctx->va_range[type]->page_size),
|
||||
type, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -731,29 +796,16 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
|
|||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* get_sg_info() - get number of pages and the DMA address from SG list.
|
||||
* @sg: the SG list.
|
||||
* @dma_addr: pointer to DMA address to return.
|
||||
*
|
||||
* Calculate the number of consecutive pages described by the SG list. Take the
|
||||
* offset of the address in the first page, add to it the length and round it up
|
||||
* to the number of needed pages.
|
||||
*/
|
||||
static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
|
||||
{
|
||||
*dma_addr = sg_dma_address(sg);
|
||||
|
||||
return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
|
||||
(PAGE_SIZE - 1)) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
/**
|
||||
* init_phys_pg_pack_from_userptr() - initialize physical page pack from host
|
||||
* memory
|
||||
* @ctx: pointer to the context structure.
|
||||
* @userptr: userptr to initialize from.
|
||||
* @pphys_pg_pack: result pointer.
|
||||
* @force_regular_page: tell the function to ignore huge page optimization,
|
||||
* even if possible. Needed for cases where the device VA
|
||||
* is allocated before we know the composition of the
|
||||
* physical pages
|
||||
*
|
||||
* This function does the following:
|
||||
* - Pin the physical pages related to the given virtual block.
|
||||
|
@ -762,17 +814,18 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
|
|||
*/
|
||||
static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
|
||||
struct hl_userptr *userptr,
|
||||
struct hl_vm_phys_pg_pack **pphys_pg_pack)
|
||||
struct hl_vm_phys_pg_pack **pphys_pg_pack,
|
||||
bool force_regular_page)
|
||||
{
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
struct scatterlist *sg;
|
||||
dma_addr_t dma_addr;
|
||||
u64 page_mask, total_npages;
|
||||
u32 npages, page_size = PAGE_SIZE,
|
||||
huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
|
||||
bool first = true, is_huge_page_opt = true;
|
||||
int rc, i, j;
|
||||
u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
bool first = true, is_huge_page_opt;
|
||||
u64 page_mask, total_npages;
|
||||
struct scatterlist *sg;
|
||||
dma_addr_t dma_addr;
|
||||
int rc, i, j;
|
||||
|
||||
phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
|
||||
if (!phys_pg_pack)
|
||||
|
@ -783,6 +836,8 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
|
|||
phys_pg_pack->asid = ctx->asid;
|
||||
atomic_set(&phys_pg_pack->mapping_cnt, 1);
|
||||
|
||||
is_huge_page_opt = (force_regular_page ? false : true);
|
||||
|
||||
/* Only if all dma_addrs are aligned to 2MB and their
|
||||
* sizes is at least 2MB, we can use huge page mapping.
|
||||
* We limit the 2MB optimization to this condition,
|
||||
|
@ -791,7 +846,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
|
|||
*/
|
||||
total_npages = 0;
|
||||
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
|
||||
npages = get_sg_info(sg, &dma_addr);
|
||||
npages = hl_get_sg_info(sg, &dma_addr);
|
||||
|
||||
total_npages += npages;
|
||||
|
||||
|
@ -820,7 +875,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
|
|||
|
||||
j = 0;
|
||||
for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
|
||||
npages = get_sg_info(sg, &dma_addr);
|
||||
npages = hl_get_sg_info(sg, &dma_addr);
|
||||
|
||||
/* align down to physical page size and save the offset */
|
||||
if (first) {
|
||||
|
@ -1001,11 +1056,12 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
struct hl_userptr *userptr = NULL;
|
||||
struct hl_vm_hash_node *hnode;
|
||||
struct hl_va_range *va_range;
|
||||
enum vm_type_t *vm_type;
|
||||
enum vm_type *vm_type;
|
||||
u64 ret_vaddr, hint_addr;
|
||||
u32 handle = 0, va_block_align;
|
||||
int rc;
|
||||
bool is_userptr = args->flags & HL_MEM_USERPTR;
|
||||
enum hl_va_range_type va_range_type = 0;
|
||||
|
||||
/* Assume failure */
|
||||
*device_addr = 0;
|
||||
|
@ -1023,7 +1079,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
}
|
||||
|
||||
rc = init_phys_pg_pack_from_userptr(ctx, userptr,
|
||||
&phys_pg_pack);
|
||||
&phys_pg_pack, false);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"unable to init page pack for vaddr 0x%llx\n",
|
||||
|
@ -1031,14 +1087,14 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
goto init_page_pack_err;
|
||||
}
|
||||
|
||||
vm_type = (enum vm_type_t *) userptr;
|
||||
vm_type = (enum vm_type *) userptr;
|
||||
hint_addr = args->map_host.hint_addr;
|
||||
handle = phys_pg_pack->handle;
|
||||
|
||||
/* get required alignment */
|
||||
if (phys_pg_pack->page_size == page_size) {
|
||||
va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
|
||||
|
||||
va_range_type = HL_VA_RANGE_TYPE_HOST;
|
||||
/*
|
||||
* huge page alignment may be needed in case of regular
|
||||
* page mapping, depending on the host VA alignment
|
||||
|
@ -1053,6 +1109,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
* mapping
|
||||
*/
|
||||
va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
|
||||
va_range_type = HL_VA_RANGE_TYPE_HOST_HUGE;
|
||||
va_block_align = huge_page_size;
|
||||
}
|
||||
} else {
|
||||
|
@ -1072,12 +1129,13 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
|
||||
spin_unlock(&vm->idr_lock);
|
||||
|
||||
vm_type = (enum vm_type_t *) phys_pg_pack;
|
||||
vm_type = (enum vm_type *) phys_pg_pack;
|
||||
|
||||
hint_addr = args->map_device.hint_addr;
|
||||
|
||||
/* DRAM VA alignment is the same as the MMU page size */
|
||||
va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
|
||||
va_range_type = HL_VA_RANGE_TYPE_DRAM;
|
||||
va_block_align = hdev->asic_prop.dmmu.page_size;
|
||||
}
|
||||
|
||||
|
@ -1100,8 +1158,23 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
goto hnode_err;
|
||||
}
|
||||
|
||||
if (hint_addr && phys_pg_pack->offset) {
|
||||
if (args->flags & HL_MEM_FORCE_HINT) {
|
||||
/* Fail if hint must be respected but it can't be */
|
||||
dev_err(hdev->dev,
|
||||
"Hint address 0x%llx cannot be respected because source memory is not aligned 0x%x\n",
|
||||
hint_addr, phys_pg_pack->offset);
|
||||
rc = -EINVAL;
|
||||
goto va_block_err;
|
||||
}
|
||||
dev_dbg(hdev->dev,
|
||||
"Hint address 0x%llx will be ignored because source memory is not aligned 0x%x\n",
|
||||
hint_addr, phys_pg_pack->offset);
|
||||
}
|
||||
|
||||
ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
|
||||
hint_addr, va_block_align);
|
||||
hint_addr, va_block_align,
|
||||
va_range_type, args->flags);
|
||||
if (!ret_vaddr) {
|
||||
dev_err(hdev->dev, "no available va block for handle %u\n",
|
||||
handle);
|
||||
|
@ -1181,17 +1254,19 @@ init_page_pack_err:
|
|||
static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
||||
bool ctx_free)
|
||||
{
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
|
||||
u64 vaddr = args->unmap.device_virt_addr;
|
||||
struct hl_vm_hash_node *hnode = NULL;
|
||||
struct asic_fixed_properties *prop;
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct hl_userptr *userptr = NULL;
|
||||
struct hl_va_range *va_range;
|
||||
u64 vaddr = args->unmap.device_virt_addr;
|
||||
enum vm_type_t *vm_type;
|
||||
enum vm_type *vm_type;
|
||||
bool is_userptr;
|
||||
int rc = 0;
|
||||
|
||||
prop = &hdev->asic_prop;
|
||||
|
||||
/* protect from double entrance */
|
||||
mutex_lock(&ctx->mem_hash_lock);
|
||||
hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
|
||||
|
@ -1214,8 +1289,9 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
if (*vm_type == VM_TYPE_USERPTR) {
|
||||
is_userptr = true;
|
||||
userptr = hnode->ptr;
|
||||
rc = init_phys_pg_pack_from_userptr(ctx, userptr,
|
||||
&phys_pg_pack);
|
||||
|
||||
rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack,
|
||||
false);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"unable to init page pack for vaddr 0x%llx\n",
|
||||
|
@ -1299,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
|
|||
kfree(hnode);
|
||||
|
||||
if (is_userptr) {
|
||||
rc = free_phys_pg_pack(hdev, phys_pg_pack);
|
||||
free_phys_pg_pack(hdev, phys_pg_pack);
|
||||
dma_unmap_host_va(hdev, userptr);
|
||||
}
|
||||
|
||||
|
@ -1669,6 +1745,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
userptr->pid = current->pid;
|
||||
userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
|
||||
if (!userptr->sgt)
|
||||
return -ENOMEM;
|
||||
|
@ -2033,7 +2110,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
|
|||
* another side effect error
|
||||
*/
|
||||
if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
|
||||
dev_notice(hdev->dev,
|
||||
dev_dbg(hdev->dev,
|
||||
"user released device without removing its memory mappings\n");
|
||||
|
||||
hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
|
||||
|
|
|
@ -470,13 +470,13 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
|
|||
if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
|
||||
kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
|
||||
gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
|
||||
}
|
||||
|
||||
/* Make sure that if we arrive here again without init was called we
|
||||
* won't cause kernel panic. This can happen for example if we fail
|
||||
* during hard reset code at certain points
|
||||
*/
|
||||
hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
|
||||
/* Make sure that if we arrive here again without init was
|
||||
* called we won't cause kernel panic. This can happen for
|
||||
* example if we fail during hard reset code at certain points
|
||||
*/
|
||||
hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -436,6 +436,8 @@ int hl_pci_init(struct hl_device *hdev)
|
|||
goto unmap_pci_bars;
|
||||
}
|
||||
|
||||
dma_set_max_seg_size(&pdev->dev, U32_MAX);
|
||||
|
||||
return 0;
|
||||
|
||||
unmap_pci_bars:
|
||||
|
|
|
@ -0,0 +1,718 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2021 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <linux/vmalloc.h>
|
||||
#include <uapi/misc/habanalabs.h>
|
||||
#include "habanalabs.h"
|
||||
|
||||
/**
|
||||
* hl_format_as_binary - helper function, format an integer as binary
|
||||
* using supplied scratch buffer
|
||||
* @buf: the buffer to use
|
||||
* @buf_len: buffer capacity
|
||||
* @n: number to format
|
||||
*
|
||||
* Returns pointer to buffer
|
||||
*/
|
||||
char *hl_format_as_binary(char *buf, size_t buf_len, u32 n)
|
||||
{
|
||||
int i;
|
||||
u32 bit;
|
||||
bool leading0 = true;
|
||||
char *wrptr = buf;
|
||||
|
||||
if (buf_len > 0 && buf_len < 3) {
|
||||
*wrptr = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
wrptr[0] = '0';
|
||||
wrptr[1] = 'b';
|
||||
wrptr += 2;
|
||||
/* Remove 3 characters from length for '0b' and '\0' termination */
|
||||
buf_len -= 3;
|
||||
|
||||
for (i = 0; i < sizeof(n) * BITS_PER_BYTE && buf_len; ++i, n <<= 1) {
|
||||
/* Writing bit calculation in one line would cause a false
|
||||
* positive static code analysis error, so splitting.
|
||||
*/
|
||||
bit = n & (1 << (sizeof(n) * BITS_PER_BYTE - 1));
|
||||
bit = !!bit;
|
||||
leading0 &= !bit;
|
||||
if (!leading0) {
|
||||
*wrptr = '0' + bit;
|
||||
++wrptr;
|
||||
}
|
||||
}
|
||||
|
||||
*wrptr = '\0';
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
/**
|
||||
* resize_to_fit - helper function, resize buffer to fit given amount of data
|
||||
* @buf: destination buffer double pointer
|
||||
* @size: pointer to the size container
|
||||
* @desired_size: size the buffer must contain
|
||||
*
|
||||
* Returns 0 on success or error code on failure.
|
||||
* On success, the size of buffer is at least desired_size. Buffer is allocated
|
||||
* via vmalloc and must be freed with vfree.
|
||||
*/
|
||||
static int resize_to_fit(char **buf, size_t *size, size_t desired_size)
|
||||
{
|
||||
char *resized_buf;
|
||||
size_t new_size;
|
||||
|
||||
if (*size >= desired_size)
|
||||
return 0;
|
||||
|
||||
/* Not enough space to print all, have to resize */
|
||||
new_size = max_t(size_t, PAGE_SIZE, round_up(desired_size, PAGE_SIZE));
|
||||
resized_buf = vmalloc(new_size);
|
||||
if (!resized_buf)
|
||||
return -ENOMEM;
|
||||
memcpy(resized_buf, *buf, *size);
|
||||
vfree(*buf);
|
||||
*buf = resized_buf;
|
||||
*size = new_size;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_snprintf_resize() - print formatted data to buffer, resize as needed
|
||||
* @buf: buffer double pointer, to be written to and resized, must be either
|
||||
* NULL or allocated with vmalloc.
|
||||
* @size: current size of the buffer
|
||||
* @offset: current offset to write to
|
||||
* @format: format of the data
|
||||
*
|
||||
* This function will write formatted data into the buffer. If buffer is not
|
||||
* large enough, it will be resized using vmalloc. Size may be modified if the
|
||||
* buffer was resized, offset will be advanced by the number of bytes written
|
||||
* not including the terminating character
|
||||
*
|
||||
* Returns 0 on success or error code on failure
|
||||
*
|
||||
* Note that the buffer has to be manually released using vfree.
|
||||
*/
|
||||
int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
|
||||
const char *format, ...)
|
||||
{
|
||||
va_list args;
|
||||
size_t length;
|
||||
int rc;
|
||||
|
||||
if (*buf == NULL && (*size != 0 || *offset != 0))
|
||||
return -EINVAL;
|
||||
|
||||
va_start(args, format);
|
||||
length = vsnprintf(*buf + *offset, *size - *offset, format, args);
|
||||
va_end(args);
|
||||
|
||||
rc = resize_to_fit(buf, size, *offset + length + 1);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
else if (rc > 0) {
|
||||
/* Resize was needed, write again */
|
||||
va_start(args, format);
|
||||
length = vsnprintf(*buf + *offset, *size - *offset, format,
|
||||
args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
*offset += length;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_sync_engine_to_string - convert engine type enum to string literal
|
||||
* @engine_type: engine type (TPC/MME/DMA)
|
||||
*
|
||||
* Return the resolved string literal
|
||||
*/
|
||||
const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type)
|
||||
{
|
||||
switch (engine_type) {
|
||||
case ENGINE_DMA:
|
||||
return "DMA";
|
||||
case ENGINE_MME:
|
||||
return "MME";
|
||||
case ENGINE_TPC:
|
||||
return "TPC";
|
||||
}
|
||||
return "Invalid Engine Type";
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_print_resize_sync_engine - helper function, format engine name and ID
|
||||
* using hl_snprintf_resize
|
||||
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||
* @size: pointer to the size container
|
||||
* @offset: pointer to the offset container
|
||||
* @engine_type: engine type (TPC/MME/DMA)
|
||||
* @engine_id: engine numerical id
|
||||
*
|
||||
* Returns 0 on success or error code on failure
|
||||
*/
|
||||
static int hl_print_resize_sync_engine(char **buf, size_t *size, size_t *offset,
|
||||
enum hl_sync_engine_type engine_type,
|
||||
u32 engine_id)
|
||||
{
|
||||
return hl_snprintf_resize(buf, size, offset, "%s%u",
|
||||
hl_sync_engine_to_string(engine_type), engine_id);
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_get_sync_name - transform sync object id to name if available
|
||||
* @hdev: pointer to the device
|
||||
* @sync_id: sync object id
|
||||
*
|
||||
* Returns a name literal or NULL if not resolved.
|
||||
* Note: returning NULL shall not be considered as a failure, as not all
|
||||
* sync objects are named.
|
||||
*/
|
||||
const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id)
|
||||
{
|
||||
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||
struct hl_hw_obj_name_entry *entry;
|
||||
|
||||
hash_for_each_possible(sds->so_id_to_str_tb, entry,
|
||||
node, sync_id)
|
||||
if (sync_id == entry->id)
|
||||
return entry->name;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_get_monitor_name - transform monitor object dump to monitor
|
||||
* name if available
|
||||
* @hdev: pointer to the device
|
||||
* @mon: monitor state dump
|
||||
*
|
||||
* Returns a name literal or NULL if not resolved.
|
||||
* Note: returning NULL shall not be considered as a failure, as not all
|
||||
* monitors are named.
|
||||
*/
|
||||
const char *hl_state_dump_get_monitor_name(struct hl_device *hdev,
|
||||
struct hl_mon_state_dump *mon)
|
||||
{
|
||||
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||
struct hl_hw_obj_name_entry *entry;
|
||||
|
||||
hash_for_each_possible(sds->monitor_id_to_str_tb,
|
||||
entry, node, mon->id)
|
||||
if (mon->id == entry->id)
|
||||
return entry->name;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_free_sync_to_engine_map - free sync object to engine map
|
||||
* @map: sync object to engine map
|
||||
*
|
||||
* Note: generic free implementation, the allocation is implemented per ASIC.
|
||||
*/
|
||||
void hl_state_dump_free_sync_to_engine_map(struct hl_sync_to_engine_map *map)
|
||||
{
|
||||
struct hl_sync_to_engine_map_entry *entry;
|
||||
struct hlist_node *tmp_node;
|
||||
int i;
|
||||
|
||||
hash_for_each_safe(map->tb, i, tmp_node, entry, node) {
|
||||
hash_del(&entry->node);
|
||||
kfree(entry);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_get_sync_to_engine - transform sync_id to
|
||||
* hl_sync_to_engine_map_entry if available for current id
|
||||
* @map: sync object to engine map
|
||||
* @sync_id: sync object id
|
||||
*
|
||||
* Returns the translation entry if found or NULL if not.
|
||||
* Note, returned NULL shall not be considered as a failure as the map
|
||||
* does not cover all possible, it is a best effort sync ids.
|
||||
*/
|
||||
static struct hl_sync_to_engine_map_entry *
|
||||
hl_state_dump_get_sync_to_engine(struct hl_sync_to_engine_map *map, u32 sync_id)
|
||||
{
|
||||
struct hl_sync_to_engine_map_entry *entry;
|
||||
|
||||
hash_for_each_possible(map->tb, entry, node, sync_id)
|
||||
if (entry->sync_id == sync_id)
|
||||
return entry;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_read_sync_objects - read sync objects array
|
||||
* @hdev: pointer to the device
|
||||
* @index: sync manager block index starting with E_N
|
||||
*
|
||||
* Returns array of size SP_SYNC_OBJ_AMOUNT on success or NULL on failure
|
||||
*/
|
||||
static u32 *hl_state_dump_read_sync_objects(struct hl_device *hdev, u32 index)
|
||||
{
|
||||
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||
u32 *sync_objects;
|
||||
s64 base_addr; /* Base addr can be negative */
|
||||
int i;
|
||||
|
||||
base_addr = sds->props[SP_SYNC_OBJ_BASE_ADDR] +
|
||||
sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index;
|
||||
|
||||
sync_objects = vmalloc(sds->props[SP_SYNC_OBJ_AMOUNT] * sizeof(u32));
|
||||
if (!sync_objects)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < sds->props[SP_SYNC_OBJ_AMOUNT]; ++i)
|
||||
sync_objects[i] = RREG32(base_addr + i * sizeof(u32));
|
||||
|
||||
return sync_objects;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_free_sync_objects - free sync objects array allocated by
|
||||
* hl_state_dump_read_sync_objects
|
||||
* @sync_objects: sync objects array
|
||||
*/
|
||||
static void hl_state_dump_free_sync_objects(u32 *sync_objects)
|
||||
{
|
||||
vfree(sync_objects);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* hl_state_dump_print_syncs_single_block - print active sync objects on a
|
||||
* single block
|
||||
* @hdev: pointer to the device
|
||||
* @index: sync manager block index starting with E_N
|
||||
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||
* @size: pointer to the size container
|
||||
* @offset: pointer to the offset container
|
||||
* @map: sync engines names map
|
||||
*
|
||||
* Returns 0 on success or error code on failure
|
||||
*/
|
||||
static int
|
||||
hl_state_dump_print_syncs_single_block(struct hl_device *hdev, u32 index,
|
||||
char **buf, size_t *size, size_t *offset,
|
||||
struct hl_sync_to_engine_map *map)
|
||||
{
|
||||
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||
const char *sync_name;
|
||||
u32 *sync_objects = NULL;
|
||||
int rc = 0, i;
|
||||
|
||||
if (sds->sync_namager_names) {
|
||||
rc = hl_snprintf_resize(
|
||||
buf, size, offset, "%s\n",
|
||||
sds->sync_namager_names[index]);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
|
||||
sync_objects = hl_state_dump_read_sync_objects(hdev, index);
|
||||
if (!sync_objects) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < sds->props[SP_SYNC_OBJ_AMOUNT]; ++i) {
|
||||
struct hl_sync_to_engine_map_entry *entry;
|
||||
u64 sync_object_addr;
|
||||
|
||||
if (!sync_objects[i])
|
||||
continue;
|
||||
|
||||
sync_object_addr = sds->props[SP_SYNC_OBJ_BASE_ADDR] +
|
||||
sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index +
|
||||
i * sizeof(u32);
|
||||
|
||||
rc = hl_snprintf_resize(buf, size, offset, "sync id: %u", i);
|
||||
if (rc)
|
||||
goto free_sync_objects;
|
||||
sync_name = hl_state_dump_get_sync_name(hdev, i);
|
||||
if (sync_name) {
|
||||
rc = hl_snprintf_resize(buf, size, offset, " %s",
|
||||
sync_name);
|
||||
if (rc)
|
||||
goto free_sync_objects;
|
||||
}
|
||||
rc = hl_snprintf_resize(buf, size, offset, ", value: %u",
|
||||
sync_objects[i]);
|
||||
if (rc)
|
||||
goto free_sync_objects;
|
||||
|
||||
/* Append engine string */
|
||||
entry = hl_state_dump_get_sync_to_engine(map,
|
||||
(u32)sync_object_addr);
|
||||
if (entry) {
|
||||
rc = hl_snprintf_resize(buf, size, offset,
|
||||
", Engine: ");
|
||||
if (rc)
|
||||
goto free_sync_objects;
|
||||
rc = hl_print_resize_sync_engine(buf, size, offset,
|
||||
entry->engine_type,
|
||||
entry->engine_id);
|
||||
if (rc)
|
||||
goto free_sync_objects;
|
||||
}
|
||||
|
||||
rc = hl_snprintf_resize(buf, size, offset, "\n");
|
||||
if (rc)
|
||||
goto free_sync_objects;
|
||||
}
|
||||
|
||||
free_sync_objects:
|
||||
hl_state_dump_free_sync_objects(sync_objects);
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_print_syncs - print active sync objects
|
||||
* @hdev: pointer to the device
|
||||
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||
* @size: pointer to the size container
|
||||
* @offset: pointer to the offset container
|
||||
*
|
||||
* Returns 0 on success or error code on failure
|
||||
*/
|
||||
static int hl_state_dump_print_syncs(struct hl_device *hdev,
|
||||
char **buf, size_t *size,
|
||||
size_t *offset)
|
||||
|
||||
{
|
||||
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||
struct hl_sync_to_engine_map *map;
|
||||
u32 index;
|
||||
int rc = 0;
|
||||
|
||||
map = kzalloc(sizeof(*map), GFP_KERNEL);
|
||||
if (!map)
|
||||
return -ENOMEM;
|
||||
|
||||
rc = sds->funcs.gen_sync_to_engine_map(hdev, map);
|
||||
if (rc)
|
||||
goto free_map_mem;
|
||||
|
||||
rc = hl_snprintf_resize(buf, size, offset, "Non zero sync objects:\n");
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
if (sds->sync_namager_names) {
|
||||
for (index = 0; sds->sync_namager_names[index]; ++index) {
|
||||
rc = hl_state_dump_print_syncs_single_block(
|
||||
hdev, index, buf, size, offset, map);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
for (index = 0; index < sds->props[SP_NUM_CORES]; ++index) {
|
||||
rc = hl_state_dump_print_syncs_single_block(
|
||||
hdev, index, buf, size, offset, map);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
hl_state_dump_free_sync_to_engine_map(map);
|
||||
free_map_mem:
|
||||
kfree(map);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_alloc_read_sm_block_monitors - read monitors for a specific
|
||||
* block
|
||||
* @hdev: pointer to the device
|
||||
* @index: sync manager block index starting with E_N
|
||||
*
|
||||
* Returns an array of monitor data of size SP_MONITORS_AMOUNT or NULL
|
||||
* on error
|
||||
*/
|
||||
static struct hl_mon_state_dump *
|
||||
hl_state_dump_alloc_read_sm_block_monitors(struct hl_device *hdev, u32 index)
|
||||
{
|
||||
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||
struct hl_mon_state_dump *monitors;
|
||||
s64 base_addr; /* Base addr can be negative */
|
||||
int i;
|
||||
|
||||
monitors = vmalloc(sds->props[SP_MONITORS_AMOUNT] *
|
||||
sizeof(struct hl_mon_state_dump));
|
||||
if (!monitors)
|
||||
return NULL;
|
||||
|
||||
base_addr = sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index;
|
||||
|
||||
for (i = 0; i < sds->props[SP_MONITORS_AMOUNT]; ++i) {
|
||||
monitors[i].id = i;
|
||||
monitors[i].wr_addr_low =
|
||||
RREG32(base_addr + sds->props[SP_MON_OBJ_WR_ADDR_LOW] +
|
||||
i * sizeof(u32));
|
||||
|
||||
monitors[i].wr_addr_high =
|
||||
RREG32(base_addr + sds->props[SP_MON_OBJ_WR_ADDR_HIGH] +
|
||||
i * sizeof(u32));
|
||||
|
||||
monitors[i].wr_data =
|
||||
RREG32(base_addr + sds->props[SP_MON_OBJ_WR_DATA] +
|
||||
i * sizeof(u32));
|
||||
|
||||
monitors[i].arm_data =
|
||||
RREG32(base_addr + sds->props[SP_MON_OBJ_ARM_DATA] +
|
||||
i * sizeof(u32));
|
||||
|
||||
monitors[i].status =
|
||||
RREG32(base_addr + sds->props[SP_MON_OBJ_STATUS] +
|
||||
i * sizeof(u32));
|
||||
}
|
||||
|
||||
return monitors;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_free_monitors - free the monitors structure
|
||||
* @monitors: monitors array created with
|
||||
* hl_state_dump_alloc_read_sm_block_monitors
|
||||
*/
|
||||
static void hl_state_dump_free_monitors(struct hl_mon_state_dump *monitors)
|
||||
{
|
||||
vfree(monitors);
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_print_monitors_single_block - print active monitors on a
|
||||
* single block
|
||||
* @hdev: pointer to the device
|
||||
* @index: sync manager block index starting with E_N
|
||||
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||
* @size: pointer to the size container
|
||||
* @offset: pointer to the offset container
|
||||
*
|
||||
* Returns 0 on success or error code on failure
|
||||
*/
|
||||
static int hl_state_dump_print_monitors_single_block(struct hl_device *hdev,
|
||||
u32 index,
|
||||
char **buf, size_t *size,
|
||||
size_t *offset)
|
||||
{
|
||||
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||
struct hl_mon_state_dump *monitors = NULL;
|
||||
int rc = 0, i;
|
||||
|
||||
if (sds->sync_namager_names) {
|
||||
rc = hl_snprintf_resize(
|
||||
buf, size, offset, "%s\n",
|
||||
sds->sync_namager_names[index]);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
|
||||
monitors = hl_state_dump_alloc_read_sm_block_monitors(hdev, index);
|
||||
if (!monitors) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < sds->props[SP_MONITORS_AMOUNT]; ++i) {
|
||||
if (!(sds->funcs.monitor_valid(&monitors[i])))
|
||||
continue;
|
||||
|
||||
/* Monitor is valid, dump it */
|
||||
rc = sds->funcs.print_single_monitor(buf, size, offset, hdev,
|
||||
&monitors[i]);
|
||||
if (rc)
|
||||
goto free_monitors;
|
||||
|
||||
hl_snprintf_resize(buf, size, offset, "\n");
|
||||
}
|
||||
|
||||
free_monitors:
|
||||
hl_state_dump_free_monitors(monitors);
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_print_monitors - print active monitors
|
||||
* @hdev: pointer to the device
|
||||
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||
* @size: pointer to the size container
|
||||
* @offset: pointer to the offset container
|
||||
*
|
||||
* Returns 0 on success or error code on failure
|
||||
*/
|
||||
static int hl_state_dump_print_monitors(struct hl_device *hdev,
|
||||
char **buf, size_t *size,
|
||||
size_t *offset)
|
||||
{
|
||||
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||
u32 index;
|
||||
int rc = 0;
|
||||
|
||||
rc = hl_snprintf_resize(buf, size, offset,
|
||||
"Valid (armed) monitor objects:\n");
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
if (sds->sync_namager_names) {
|
||||
for (index = 0; sds->sync_namager_names[index]; ++index) {
|
||||
rc = hl_state_dump_print_monitors_single_block(
|
||||
hdev, index, buf, size, offset);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
for (index = 0; index < sds->props[SP_NUM_CORES]; ++index) {
|
||||
rc = hl_state_dump_print_monitors_single_block(
|
||||
hdev, index, buf, size, offset);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_print_engine_fences - print active fences for a specific
|
||||
* engine
|
||||
* @hdev: pointer to the device
|
||||
* @engine_type: engine type to use
|
||||
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||
* @size: pointer to the size container
|
||||
* @offset: pointer to the offset container
|
||||
*/
|
||||
static int
|
||||
hl_state_dump_print_engine_fences(struct hl_device *hdev,
|
||||
enum hl_sync_engine_type engine_type,
|
||||
char **buf, size_t *size, size_t *offset)
|
||||
{
|
||||
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
|
||||
int rc = 0, i, n_fences;
|
||||
u64 base_addr, next_fence;
|
||||
|
||||
switch (engine_type) {
|
||||
case ENGINE_TPC:
|
||||
n_fences = sds->props[SP_NUM_OF_TPC_ENGINES];
|
||||
base_addr = sds->props[SP_TPC0_CMDQ];
|
||||
next_fence = sds->props[SP_NEXT_TPC];
|
||||
break;
|
||||
case ENGINE_MME:
|
||||
n_fences = sds->props[SP_NUM_OF_MME_ENGINES];
|
||||
base_addr = sds->props[SP_MME_CMDQ];
|
||||
next_fence = sds->props[SP_NEXT_MME];
|
||||
break;
|
||||
case ENGINE_DMA:
|
||||
n_fences = sds->props[SP_NUM_OF_DMA_ENGINES];
|
||||
base_addr = sds->props[SP_DMA_CMDQ];
|
||||
next_fence = sds->props[SP_DMA_QUEUES_OFFSET];
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
for (i = 0; i < n_fences; ++i) {
|
||||
rc = sds->funcs.print_fences_single_engine(
|
||||
hdev,
|
||||
base_addr + next_fence * i +
|
||||
sds->props[SP_FENCE0_CNT_OFFSET],
|
||||
base_addr + next_fence * i +
|
||||
sds->props[SP_CP_STS_OFFSET],
|
||||
engine_type, i, buf, size, offset);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump_print_fences - print active fences
|
||||
* @hdev: pointer to the device
|
||||
* @buf: destination buffer double pointer to be used with hl_snprintf_resize
|
||||
* @size: pointer to the size container
|
||||
* @offset: pointer to the offset container
|
||||
*/
|
||||
static int hl_state_dump_print_fences(struct hl_device *hdev, char **buf,
|
||||
size_t *size, size_t *offset)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
rc = hl_snprintf_resize(buf, size, offset, "Valid (armed) fences:\n");
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
rc = hl_state_dump_print_engine_fences(hdev, ENGINE_TPC, buf, size, offset);
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
rc = hl_state_dump_print_engine_fences(hdev, ENGINE_MME, buf, size, offset);
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
rc = hl_state_dump_print_engine_fences(hdev, ENGINE_DMA, buf, size, offset);
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* hl_state_dump() - dump system state
|
||||
* @hdev: pointer to device structure
|
||||
*/
|
||||
int hl_state_dump(struct hl_device *hdev)
|
||||
{
|
||||
char *buf = NULL;
|
||||
size_t offset = 0, size = 0;
|
||||
int rc;
|
||||
|
||||
rc = hl_snprintf_resize(&buf, &size, &offset,
|
||||
"Timestamp taken on: %llu\n\n",
|
||||
ktime_to_ns(ktime_get()));
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
rc = hl_state_dump_print_syncs(hdev, &buf, &size, &offset);
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
hl_snprintf_resize(&buf, &size, &offset, "\n");
|
||||
|
||||
rc = hl_state_dump_print_monitors(hdev, &buf, &size, &offset);
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
hl_snprintf_resize(&buf, &size, &offset, "\n");
|
||||
|
||||
rc = hl_state_dump_print_fences(hdev, &buf, &size, &offset);
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
hl_snprintf_resize(&buf, &size, &offset, "\n");
|
||||
|
||||
hl_debugfs_set_state_dump(hdev, buf, size);
|
||||
|
||||
return 0;
|
||||
err:
|
||||
vfree(buf);
|
||||
return rc;
|
||||
}
|
|
@ -9,8 +9,7 @@
|
|||
|
||||
#include <linux/pci.h>
|
||||
|
||||
long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
|
||||
bool curr)
|
||||
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
u32 used_pll_idx;
|
||||
|
@ -44,8 +43,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
|
|||
return (long) result;
|
||||
}
|
||||
|
||||
void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
|
||||
u64 freq)
|
||||
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
u32 used_pll_idx;
|
||||
|
@ -285,16 +283,12 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
|
|||
char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
char *str;
|
||||
char str[HL_STR_MAX];
|
||||
|
||||
if (atomic_read(&hdev->in_reset))
|
||||
str = "In reset";
|
||||
else if (hdev->disabled)
|
||||
str = "Malfunction";
|
||||
else if (hdev->needs_reset)
|
||||
str = "Needs Reset";
|
||||
else
|
||||
str = "Operational";
|
||||
strscpy(str, hdev->status[hl_device_status(hdev)], HL_STR_MAX);
|
||||
|
||||
/* use uppercase for backward compatibility */
|
||||
str[0] = 'A' + (str[0] - 'a');
|
||||
|
||||
return sprintf(buf, "%s\n", str);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -36,6 +36,8 @@
|
|||
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + \
|
||||
NUMBER_OF_CPU_HW_QUEUES)
|
||||
|
||||
#define GAUDI_STREAM_MASTER_ARR_SIZE 8
|
||||
|
||||
#if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES)
|
||||
#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
|
||||
#endif
|
||||
|
@ -50,6 +52,8 @@
|
|||
#define DC_POWER_DEFAULT_PCI 60000 /* 60W */
|
||||
#define DC_POWER_DEFAULT_PMC 60000 /* 60W */
|
||||
|
||||
#define DC_POWER_DEFAULT_PMC_SEC 97000 /* 97W */
|
||||
|
||||
#define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */
|
||||
|
||||
#define TPC_ENABLED_MASK 0xFF
|
||||
|
@ -62,7 +66,7 @@
|
|||
|
||||
#define DMA_MAX_TRANSFER_SIZE U32_MAX
|
||||
|
||||
#define GAUDI_DEFAULT_CARD_NAME "HL2000"
|
||||
#define GAUDI_DEFAULT_CARD_NAME "HL205"
|
||||
|
||||
#define GAUDI_MAX_PENDING_CS SZ_16K
|
||||
|
||||
|
@ -117,6 +121,7 @@
|
|||
(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 - \
|
||||
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2)
|
||||
|
||||
#define MONITOR_MAX_SOBS 8
|
||||
|
||||
/* DRAM Memory Map */
|
||||
|
||||
|
@ -200,6 +205,18 @@
|
|||
#define HW_CAP_TPC_MASK GENMASK(31, 24)
|
||||
#define HW_CAP_TPC_SHIFT 24
|
||||
|
||||
#define NEXT_SYNC_OBJ_ADDR_INTERVAL \
|
||||
(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 - \
|
||||
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)
|
||||
#define NUM_OF_MME_ENGINES 2
|
||||
#define NUM_OF_MME_SUB_ENGINES 2
|
||||
#define NUM_OF_TPC_ENGINES 8
|
||||
#define NUM_OF_DMA_ENGINES 8
|
||||
#define NUM_OF_QUEUES 5
|
||||
#define NUM_OF_STREAMS 4
|
||||
#define NUM_OF_FENCES 4
|
||||
|
||||
|
||||
#define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39)
|
||||
#define GAUDI_PCI_TO_CPU_ADDR(addr) \
|
||||
do { \
|
||||
|
|
|
@ -622,11 +622,6 @@ static int gaudi_config_etr(struct hl_device *hdev,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER,
|
||||
hdev->compute_ctx->asid);
|
||||
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER,
|
||||
hdev->compute_ctx->asid);
|
||||
|
||||
msb = upper_32_bits(input->buffer_address) >> 8;
|
||||
msb &= PSOC_GLOBAL_CONF_TRACE_ADDR_MSB_MASK;
|
||||
WREG32(mmPSOC_GLOBAL_CONF_TRACE_ADDR, msb);
|
||||
|
|
|
@ -9559,6 +9559,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||
mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC0_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC0_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC0_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||
|
@ -10013,6 +10014,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||
mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC1_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC1_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC1_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC1_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||
|
@ -10466,6 +10468,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||
mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC2_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC2_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC2_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC2_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||
|
@ -10919,6 +10922,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||
mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC3_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC3_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC3_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC3_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||
|
@ -11372,6 +11376,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||
mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC4_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC4_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC4_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC4_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||
|
@ -11825,6 +11830,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||
mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC5_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC5_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC5_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC5_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||
|
@ -12280,6 +12286,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||
mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC6_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC6_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC6_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC6_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||
|
@ -12735,6 +12742,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
|
|||
mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC7_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC7_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC7_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
|
||||
mask |= 1U << ((mmTPC7_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||
|
|
|
@ -350,6 +350,8 @@ static u32 goya_all_events[] = {
|
|||
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
|
||||
};
|
||||
|
||||
static s64 goya_state_dump_specs_props[SP_MAX] = {0};
|
||||
|
||||
static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
|
||||
static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
|
||||
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
|
||||
|
@ -387,6 +389,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
|
|||
prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
|
||||
}
|
||||
|
||||
prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
|
||||
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
|
||||
|
||||
prop->dram_base_address = DRAM_PHYS_BASE;
|
||||
|
@ -466,6 +469,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
|
|||
prop->hard_reset_done_by_fw = false;
|
||||
prop->gic_interrupts_enable = true;
|
||||
|
||||
prop->server_type = HL_SERVER_TYPE_UNKNOWN;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -649,14 +654,14 @@ pci_init:
|
|||
GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
|
||||
if (rc) {
|
||||
if (hdev->reset_on_preboot_fail)
|
||||
hdev->asic_funcs->hw_fini(hdev, true);
|
||||
hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||
goto pci_fini;
|
||||
}
|
||||
|
||||
if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
|
||||
dev_info(hdev->dev,
|
||||
"H/W state is dirty, must reset before initializing\n");
|
||||
hdev->asic_funcs->hw_fini(hdev, true);
|
||||
hdev->asic_funcs->hw_fini(hdev, true, false);
|
||||
}
|
||||
|
||||
if (!hdev->pldm) {
|
||||
|
@ -955,8 +960,9 @@ static int goya_sw_init(struct hl_device *hdev)
|
|||
hdev->supports_coresight = true;
|
||||
hdev->supports_soft_reset = true;
|
||||
hdev->allow_external_soft_reset = true;
|
||||
hdev->supports_wait_for_multi_cs = false;
|
||||
|
||||
goya_set_pci_memory_regions(hdev);
|
||||
hdev->asic_funcs->set_pci_memory_regions(hdev);
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -2374,7 +2380,7 @@ static void goya_disable_timestamp(struct hl_device *hdev)
|
|||
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
|
||||
}
|
||||
|
||||
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
|
||||
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
|
||||
{
|
||||
u32 wait_timeout_ms;
|
||||
|
||||
|
@ -2493,6 +2499,7 @@ static void goya_init_firmware_loader(struct hl_device *hdev)
|
|||
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
|
||||
|
||||
/* fill common fields */
|
||||
fw_loader->linux_loaded = false;
|
||||
fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
|
||||
fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
|
||||
fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
|
||||
|
@ -2696,14 +2703,7 @@ disable_queues:
|
|||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* goya_hw_fini - Goya hardware tear-down code
|
||||
*
|
||||
* @hdev: pointer to hl_device structure
|
||||
* @hard_reset: should we do hard reset to all engines or just reset the
|
||||
* compute/dma engines
|
||||
*/
|
||||
static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
|
||||
static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
|
||||
{
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
u32 reset_timeout_ms, cpu_timeout_ms, status;
|
||||
|
@ -2796,7 +2796,7 @@ int goya_resume(struct hl_device *hdev)
|
|||
return goya_init_iatu(hdev);
|
||||
}
|
||||
|
||||
static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||
static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
|
||||
void *cpu_addr, dma_addr_t dma_addr, size_t size)
|
||||
{
|
||||
int rc;
|
||||
|
@ -4797,6 +4797,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
|||
>> EQ_CTL_EVENT_TYPE_SHIFT);
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
|
||||
if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
|
||||
dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
|
||||
event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
goya->events_stat[event_type]++;
|
||||
goya->events_stat_aggregate[event_type]++;
|
||||
|
||||
|
@ -5475,14 +5481,14 @@ u64 goya_get_device_time(struct hl_device *hdev)
|
|||
return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
|
||||
}
|
||||
|
||||
static void goya_collective_wait_init_cs(struct hl_cs *cs)
|
||||
static int goya_collective_wait_init_cs(struct hl_cs *cs)
|
||||
{
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int goya_collective_wait_create_jobs(struct hl_device *hdev,
|
||||
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
|
||||
u32 collective_engine_id)
|
||||
u32 collective_engine_id, u32 encaps_signal_offset)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -5524,6 +5530,62 @@ static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
|
|||
}
|
||||
}
|
||||
|
||||
static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
|
||||
struct hl_sync_to_engine_map *map)
|
||||
{
|
||||
/* Not implemented */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int goya_monitor_valid(struct hl_mon_state_dump *mon)
|
||||
{
|
||||
/* Not implemented */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset,
|
||||
struct hl_device *hdev,
|
||||
struct hl_mon_state_dump *mon)
|
||||
{
|
||||
/* Not implemented */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int goya_print_fences_single_engine(
|
||||
struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
|
||||
enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
|
||||
size_t *size, size_t *offset)
|
||||
{
|
||||
/* Not implemented */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
|
||||
.monitor_valid = goya_monitor_valid,
|
||||
.print_single_monitor = goya_print_single_monitor,
|
||||
.gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
|
||||
.print_fences_single_engine = goya_print_fences_single_engine,
|
||||
};
|
||||
|
||||
static void goya_state_dump_init(struct hl_device *hdev)
|
||||
{
|
||||
/* Not implemented */
|
||||
hdev->state_dump_specs.props = goya_state_dump_specs_props;
|
||||
hdev->state_dump_specs.funcs = goya_state_dump_funcs;
|
||||
}
|
||||
|
||||
static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 *goya_get_stream_master_qid_arr(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const struct hl_asic_funcs goya_funcs = {
|
||||
.early_init = goya_early_init,
|
||||
.early_fini = goya_early_fini,
|
||||
|
@ -5536,7 +5598,7 @@ static const struct hl_asic_funcs goya_funcs = {
|
|||
.halt_engines = goya_halt_engines,
|
||||
.suspend = goya_suspend,
|
||||
.resume = goya_resume,
|
||||
.cb_mmap = goya_cb_mmap,
|
||||
.mmap = goya_mmap,
|
||||
.ring_doorbell = goya_ring_doorbell,
|
||||
.pqe_write = goya_pqe_write,
|
||||
.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
|
||||
|
@ -5609,7 +5671,11 @@ static const struct hl_asic_funcs goya_funcs = {
|
|||
.enable_events_from_fw = goya_enable_events_from_fw,
|
||||
.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
|
||||
.init_firmware_loader = goya_init_firmware_loader,
|
||||
.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram
|
||||
.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
|
||||
.state_dump_init = goya_state_dump_init,
|
||||
.get_sob_addr = &goya_get_sob_addr,
|
||||
.set_pci_memory_regions = goya_set_pci_memory_regions,
|
||||
.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -98,6 +98,18 @@ struct hl_eq_fw_alive {
|
|||
__u8 pad[7];
|
||||
};
|
||||
|
||||
enum hl_pcie_addr_dec_cause {
|
||||
PCIE_ADDR_DEC_HBW_ERR_RESP,
|
||||
PCIE_ADDR_DEC_LBW_ERR_RESP,
|
||||
PCIE_ADDR_DEC_TLP_BLOCKED_BY_RR
|
||||
};
|
||||
|
||||
struct hl_eq_pcie_addr_dec_data {
|
||||
/* enum hl_pcie_addr_dec_cause */
|
||||
__u8 addr_dec_cause;
|
||||
__u8 pad[7];
|
||||
};
|
||||
|
||||
struct hl_eq_entry {
|
||||
struct hl_eq_header hdr;
|
||||
union {
|
||||
|
@ -106,6 +118,7 @@ struct hl_eq_entry {
|
|||
struct hl_eq_sm_sei_data sm_sei_data;
|
||||
struct cpucp_pkt_sync_err pkt_sync_err;
|
||||
struct hl_eq_fw_alive fw_alive;
|
||||
struct hl_eq_pcie_addr_dec_data pcie_addr_dec_data;
|
||||
__le64 data[7];
|
||||
};
|
||||
};
|
||||
|
@ -116,7 +129,7 @@ struct hl_eq_entry {
|
|||
#define EQ_CTL_READY_MASK 0x80000000
|
||||
|
||||
#define EQ_CTL_EVENT_TYPE_SHIFT 16
|
||||
#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000
|
||||
#define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000
|
||||
|
||||
#define EQ_CTL_INDEX_SHIFT 0
|
||||
#define EQ_CTL_INDEX_MASK 0x0000FFFF
|
||||
|
@ -300,7 +313,7 @@ enum pq_init_status {
|
|||
* The packet's arguments specify the desired sensor and the field to
|
||||
* set.
|
||||
*
|
||||
* CPUCP_PACKET_PCIE_THROUGHPUT_GET
|
||||
* CPUCP_PACKET_PCIE_THROUGHPUT_GET -
|
||||
* Get throughput of PCIe.
|
||||
* The packet's arguments specify the transaction direction (TX/RX).
|
||||
* The window measurement is 10[msec], and the return value is in KB/sec.
|
||||
|
@ -309,19 +322,19 @@ enum pq_init_status {
|
|||
* Replay count measures number of "replay" events, which is basicly
|
||||
* number of retries done by PCIe.
|
||||
*
|
||||
* CPUCP_PACKET_TOTAL_ENERGY_GET
|
||||
* CPUCP_PACKET_TOTAL_ENERGY_GET -
|
||||
* Total Energy is measurement of energy from the time FW Linux
|
||||
* is loaded. It is calculated by multiplying the average power
|
||||
* by time (passed from armcp start). The units are in MilliJouls.
|
||||
*
|
||||
* CPUCP_PACKET_PLL_INFO_GET
|
||||
* CPUCP_PACKET_PLL_INFO_GET -
|
||||
* Fetch frequencies of PLL from the required PLL IP.
|
||||
* The packet's arguments specify the device PLL type
|
||||
* Pll type is the PLL from device pll_index enum.
|
||||
* The result is composed of 4 outputs, each is 16-bit
|
||||
* frequency in MHz.
|
||||
*
|
||||
* CPUCP_PACKET_POWER_GET
|
||||
* CPUCP_PACKET_POWER_GET -
|
||||
* Fetch the present power consumption of the device (Current * Voltage).
|
||||
*
|
||||
* CPUCP_PACKET_NIC_PFC_SET -
|
||||
|
@ -345,6 +358,24 @@ enum pq_init_status {
|
|||
* CPUCP_PACKET_MSI_INFO_SET -
|
||||
* set the index number for each supported msi type going from
|
||||
* host to device
|
||||
*
|
||||
* CPUCP_PACKET_NIC_XPCS91_REGS_GET -
|
||||
* Fetch the un/correctable counters values from the NIC MAC.
|
||||
*
|
||||
* CPUCP_PACKET_NIC_STAT_REGS_GET -
|
||||
* Fetch various NIC MAC counters from the NIC STAT.
|
||||
*
|
||||
* CPUCP_PACKET_NIC_STAT_REGS_CLR -
|
||||
* Clear the various NIC MAC counters in the NIC STAT.
|
||||
*
|
||||
* CPUCP_PACKET_NIC_STAT_REGS_ALL_GET -
|
||||
* Fetch all NIC MAC counters from the NIC STAT.
|
||||
*
|
||||
* CPUCP_PACKET_IS_IDLE_CHECK -
|
||||
* Check if the device is IDLE in regard to the DMA/compute engines
|
||||
* and QMANs. The f/w will return a bitmask where each bit represents
|
||||
* a different engine or QMAN according to enum cpucp_idle_mask.
|
||||
* The bit will be 1 if the engine is NOT idle.
|
||||
*/
|
||||
|
||||
enum cpucp_packet_id {
|
||||
|
@ -385,6 +416,11 @@ enum cpucp_packet_id {
|
|||
CPUCP_PACKET_NIC_LPBK_SET, /* internal */
|
||||
CPUCP_PACKET_NIC_MAC_CFG, /* internal */
|
||||
CPUCP_PACKET_MSI_INFO_SET, /* internal */
|
||||
CPUCP_PACKET_NIC_XPCS91_REGS_GET, /* internal */
|
||||
CPUCP_PACKET_NIC_STAT_REGS_GET, /* internal */
|
||||
CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */
|
||||
CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */
|
||||
CPUCP_PACKET_IS_IDLE_CHECK, /* internal */
|
||||
};
|
||||
|
||||
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
|
||||
|
@ -414,6 +450,11 @@ enum cpucp_packet_id {
|
|||
#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1
|
||||
#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull
|
||||
|
||||
#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT 0
|
||||
#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull
|
||||
#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT 1
|
||||
#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull
|
||||
|
||||
/* heartbeat status bits */
|
||||
#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0
|
||||
#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001
|
||||
|
@ -467,7 +508,8 @@ struct cpucp_packet {
|
|||
__le32 status_mask;
|
||||
};
|
||||
|
||||
__le32 reserved;
|
||||
/* For NIC requests */
|
||||
__le32 port_index;
|
||||
};
|
||||
|
||||
struct cpucp_unmask_irq_arr_packet {
|
||||
|
@ -476,6 +518,12 @@ struct cpucp_unmask_irq_arr_packet {
|
|||
__le32 irqs[0];
|
||||
};
|
||||
|
||||
struct cpucp_nic_status_packet {
|
||||
struct cpucp_packet cpucp_pkt;
|
||||
__le32 length;
|
||||
__le32 data[0];
|
||||
};
|
||||
|
||||
struct cpucp_array_data_packet {
|
||||
struct cpucp_packet cpucp_pkt;
|
||||
__le32 length;
|
||||
|
@ -595,6 +643,18 @@ enum pll_index {
|
|||
PLL_MAX
|
||||
};
|
||||
|
||||
enum rl_index {
|
||||
TPC_RL = 0,
|
||||
MME_RL,
|
||||
};
|
||||
|
||||
enum pvt_index {
|
||||
PVT_SW,
|
||||
PVT_SE,
|
||||
PVT_NW,
|
||||
PVT_NE
|
||||
};
|
||||
|
||||
/* Event Queue Packets */
|
||||
|
||||
struct eq_generic_event {
|
||||
|
@ -700,6 +760,15 @@ struct cpucp_mac_addr {
|
|||
__u8 mac_addr[ETH_ALEN];
|
||||
};
|
||||
|
||||
enum cpucp_serdes_type {
|
||||
TYPE_1_SERDES_TYPE,
|
||||
TYPE_2_SERDES_TYPE,
|
||||
HLS1_SERDES_TYPE,
|
||||
HLS1H_SERDES_TYPE,
|
||||
UNKNOWN_SERDES_TYPE,
|
||||
MAX_NUM_SERDES_TYPE = UNKNOWN_SERDES_TYPE
|
||||
};
|
||||
|
||||
struct cpucp_nic_info {
|
||||
struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS];
|
||||
__le64 link_mask[CPUCP_NIC_MASK_ARR_LEN];
|
||||
|
@ -708,6 +777,40 @@ struct cpucp_nic_info {
|
|||
__le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN];
|
||||
__u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN];
|
||||
__le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN];
|
||||
__le16 serdes_type; /* enum cpucp_serdes_type */
|
||||
__u8 reserved[6];
|
||||
};
|
||||
|
||||
/*
|
||||
* struct cpucp_nic_status - describes the status of a NIC port.
|
||||
* @port: NIC port index.
|
||||
* @bad_format_cnt: e.g. CRC.
|
||||
* @responder_out_of_sequence_psn_cnt: e.g NAK.
|
||||
* @high_ber_reinit_cnt: link reinit due to high BER.
|
||||
* @correctable_err_cnt: e.g. bit-flip.
|
||||
* @uncorrectable_err_cnt: e.g. MAC errors.
|
||||
* @retraining_cnt: re-training counter.
|
||||
* @up: is port up.
|
||||
* @pcs_link: has PCS link.
|
||||
* @phy_ready: is PHY ready.
|
||||
* @auto_neg: is Autoneg enabled.
|
||||
* @timeout_retransmission_cnt: timeout retransmission events
|
||||
* @high_ber_cnt: high ber events
|
||||
*/
|
||||
struct cpucp_nic_status {
|
||||
__le32 port;
|
||||
__le32 bad_format_cnt;
|
||||
__le32 responder_out_of_sequence_psn_cnt;
|
||||
__le32 high_ber_reinit;
|
||||
__le32 correctable_err_cnt;
|
||||
__le32 uncorrectable_err_cnt;
|
||||
__le32 retraining_cnt;
|
||||
__u8 up;
|
||||
__u8 pcs_link;
|
||||
__u8 phy_ready;
|
||||
__u8 auto_neg;
|
||||
__le32 timeout_retransmission_cnt;
|
||||
__le32 high_ber_cnt;
|
||||
};
|
||||
|
||||
#endif /* CPUCP_IF_H */
|
||||
|
|
|
@ -78,6 +78,26 @@
|
|||
* CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support
|
||||
* should be contacted.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD HALT ACK from ARC0 is not received
|
||||
* within specified retries after issuing
|
||||
* HALT request. ARC0 appears to be in bad
|
||||
* reset.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD HALT ACK from ARC1 is not received
|
||||
* within specified retries after issuing
|
||||
* HALT request. ARC1 appears to be in bad
|
||||
* reset.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD RUN ACK from ARC0 is not received
|
||||
* within specified timeout after issuing
|
||||
* RUN request. ARC0 appears to be in bad
|
||||
* reset.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD RUN ACK from ARC1 is not received
|
||||
* within specified timeout after issuing
|
||||
* RUN request. ARC1 appears to be in bad
|
||||
* reset.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
|
||||
* This is a main indication that the
|
||||
* running FW populates the error
|
||||
|
@ -98,6 +118,10 @@
|
|||
#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << 11)
|
||||
#define CPU_BOOT_ERR0_PLL_FAIL (1 << 12)
|
||||
#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << 13)
|
||||
#define CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD (1 << 14)
|
||||
#define CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD (1 << 15)
|
||||
#define CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD (1 << 16)
|
||||
#define CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD (1 << 17)
|
||||
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
|
||||
#define CPU_BOOT_ERR1_ENABLED (1 << 31)
|
||||
|
||||
|
@ -186,6 +210,10 @@
|
|||
* configured and is ready for use.
|
||||
* Initialized in: ppboot
|
||||
*
|
||||
* CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN NIC MAC channels init is done by FW and
|
||||
* any access to them is done via the FW.
|
||||
* Initialized in: linux
|
||||
*
|
||||
* CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled.
|
||||
* FW sends to host a bitmap of supported
|
||||
* PLLs.
|
||||
|
@ -209,6 +237,21 @@
|
|||
* prevent IRQs overriding each other.
|
||||
* Initialized in: linux
|
||||
*
|
||||
* CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN
|
||||
* NIC STAT and XPCS91 access is restricted
|
||||
* and is done via FW only.
|
||||
* Initialized in: linux
|
||||
*
|
||||
* CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN
|
||||
* NIC STAT get all is supported.
|
||||
* Initialized in: linux
|
||||
*
|
||||
* CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN
|
||||
* F/W checks if the device is idle by reading defined set
|
||||
* of registers. It returns a bitmask of all the engines,
|
||||
* where a bit is set if the engine is not idle.
|
||||
* Initialized in: linux
|
||||
*
|
||||
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
|
||||
* This is a main indication that the
|
||||
* running FW populates the device status
|
||||
|
@ -236,10 +279,14 @@
|
|||
#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15)
|
||||
#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16)
|
||||
#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17)
|
||||
#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << 18)
|
||||
#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19)
|
||||
#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << 20)
|
||||
#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << 21)
|
||||
#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << 22)
|
||||
#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << 23)
|
||||
#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << 24)
|
||||
#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << 25)
|
||||
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
|
||||
#define CPU_BOOT_DEV_STS1_ENABLED (1 << 31)
|
||||
|
||||
|
@ -313,10 +360,7 @@ struct cpu_dyn_regs {
|
|||
__le32 hw_state;
|
||||
__le32 kmd_msg_to_cpu;
|
||||
__le32 cpu_cmd_status_to_host;
|
||||
union {
|
||||
__le32 gic_host_irq_ctrl;
|
||||
__le32 gic_host_pi_upd_irq;
|
||||
};
|
||||
__le32 gic_host_pi_upd_irq;
|
||||
__le32 gic_tpc_qm_irq_ctrl;
|
||||
__le32 gic_mme_qm_irq_ctrl;
|
||||
__le32 gic_dma_qm_irq_ctrl;
|
||||
|
@ -324,7 +368,9 @@ struct cpu_dyn_regs {
|
|||
__le32 gic_dma_core_irq_ctrl;
|
||||
__le32 gic_host_halt_irq;
|
||||
__le32 gic_host_ints_irq;
|
||||
__le32 reserved1[24]; /* reserve for future use */
|
||||
__le32 gic_host_soft_rst_irq;
|
||||
__le32 gic_rot_qm_irq_ctrl;
|
||||
__le32 reserved1[22]; /* reserve for future use */
|
||||
};
|
||||
|
||||
/* TODO: remove the desc magic after the code is updated to use message */
|
||||
|
@ -462,6 +508,11 @@ struct lkd_fw_comms_msg {
|
|||
* Do not wait for BMC response.
|
||||
*
|
||||
* COMMS_LOW_PLL_OPP Initialize PLLs for low OPP.
|
||||
*
|
||||
* COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory
|
||||
* space is allocated in a ELBI access only
|
||||
* address range.
|
||||
*
|
||||
*/
|
||||
enum comms_cmd {
|
||||
COMMS_NOOP = 0,
|
||||
|
@ -474,6 +525,7 @@ enum comms_cmd {
|
|||
COMMS_GOTO_WFE = 7,
|
||||
COMMS_SKIP_BMC = 8,
|
||||
COMMS_LOW_PLL_OPP = 9,
|
||||
COMMS_PREP_DESC_ELBI = 10,
|
||||
COMMS_INVLD_LAST
|
||||
};
|
||||
|
||||
|
|
|
@ -126,6 +126,9 @@
|
|||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 0x4F2004
|
||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 0x4F3FFC
|
||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 0x4F4000
|
||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 0x4F4800
|
||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0 0x4F5000
|
||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0 0x4F5800
|
||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 0x4F6000
|
||||
#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 0x4F67FC
|
||||
|
||||
|
|
|
@ -449,4 +449,21 @@ enum axi_id {
|
|||
#define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK 0x1
|
||||
#define PCIE_AUX_FLR_CTRL_INT_MASK_MASK 0x2
|
||||
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_SHIFT 0
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK 0x1
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_SHIFT 1
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK 0x1FE
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_SHIFT 0
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK 0xFF
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_SHIFT 8
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK 0xFF00
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_SHIFT 16
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_MASK 0x10000
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_SHIFT 17
|
||||
#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK 0xFFFE0000
|
||||
#define TPC0_QM_CP_STS_0_FENCE_ID_SHIFT 20
|
||||
#define TPC0_QM_CP_STS_0_FENCE_ID_MASK 0x300000
|
||||
#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_SHIFT 22
|
||||
#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK 0x400000
|
||||
|
||||
#endif /* GAUDI_MASKS_H_ */
|
||||
|
|
|
@ -12,8 +12,6 @@
|
|||
* PSOC scratch-pad registers
|
||||
*/
|
||||
#define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
|
||||
/* TODO: remove mmGIC_HOST_IRQ_CTRL_POLL_REG */
|
||||
#define mmGIC_HOST_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
|
||||
#define mmGIC_HOST_PI_UPD_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
|
||||
#define mmGIC_TPC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
|
||||
#define mmGIC_MME_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
|
||||
|
|
|
@ -276,7 +276,17 @@ enum hl_device_status {
|
|||
HL_DEVICE_STATUS_OPERATIONAL,
|
||||
HL_DEVICE_STATUS_IN_RESET,
|
||||
HL_DEVICE_STATUS_MALFUNCTION,
|
||||
HL_DEVICE_STATUS_NEEDS_RESET
|
||||
HL_DEVICE_STATUS_NEEDS_RESET,
|
||||
HL_DEVICE_STATUS_IN_DEVICE_CREATION,
|
||||
HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
|
||||
};
|
||||
|
||||
enum hl_server_type {
|
||||
HL_SERVER_TYPE_UNKNOWN = 0,
|
||||
HL_SERVER_GAUDI_HLS1 = 1,
|
||||
HL_SERVER_GAUDI_HLS1H = 2,
|
||||
HL_SERVER_GAUDI_TYPE1 = 3,
|
||||
HL_SERVER_GAUDI_TYPE2 = 4
|
||||
};
|
||||
|
||||
/* Opcode for management ioctl
|
||||
|
@ -337,17 +347,49 @@ enum hl_device_status {
|
|||
#define HL_INFO_VERSION_MAX_LEN 128
|
||||
#define HL_INFO_CARD_NAME_MAX_LEN 16
|
||||
|
||||
/**
|
||||
* struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC
|
||||
* @sram_base_address: The first SRAM physical base address that is free to be
|
||||
* used by the user.
|
||||
* @dram_base_address: The first DRAM virtual or physical base address that is
|
||||
* free to be used by the user.
|
||||
* @dram_size: The DRAM size that is available to the user.
|
||||
* @sram_size: The SRAM size that is available to the user.
|
||||
* @num_of_events: The number of events that can be received from the f/w. This
|
||||
* is needed so the user can what is the size of the h/w events
|
||||
* array he needs to pass to the kernel when he wants to fetch
|
||||
* the event counters.
|
||||
* @device_id: PCI device ID of the ASIC.
|
||||
* @module_id: Module ID of the ASIC for mezzanine cards in servers
|
||||
* (From OCP spec).
|
||||
* @first_available_interrupt_id: The first available interrupt ID for the user
|
||||
* to be used when it works with user interrupts.
|
||||
* @server_type: Server type that the Gaudi ASIC is currently installed in.
|
||||
* The value is according to enum hl_server_type
|
||||
* @cpld_version: CPLD version on the board.
|
||||
* @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs.
|
||||
* @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs.
|
||||
* @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs.
|
||||
* @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler
|
||||
* in some ASICs.
|
||||
* @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant
|
||||
* for Goya/Gaudi only.
|
||||
* @dram_enabled: Whether the DRAM is enabled.
|
||||
* @cpucp_version: The CPUCP f/w version.
|
||||
* @card_name: The card name as passed by the f/w.
|
||||
* @dram_page_size: The DRAM physical page size.
|
||||
*/
|
||||
struct hl_info_hw_ip_info {
|
||||
__u64 sram_base_address;
|
||||
__u64 dram_base_address;
|
||||
__u64 dram_size;
|
||||
__u32 sram_size;
|
||||
__u32 num_of_events;
|
||||
__u32 device_id; /* PCI Device ID */
|
||||
__u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
|
||||
__u32 device_id;
|
||||
__u32 module_id;
|
||||
__u32 reserved;
|
||||
__u16 first_available_interrupt_id;
|
||||
__u16 reserved2;
|
||||
__u16 server_type;
|
||||
__u32 cpld_version;
|
||||
__u32 psoc_pci_pll_nr;
|
||||
__u32 psoc_pci_pll_nf;
|
||||
|
@ -358,7 +400,7 @@ struct hl_info_hw_ip_info {
|
|||
__u8 pad[2];
|
||||
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
|
||||
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
|
||||
__u64 reserved3;
|
||||
__u64 reserved2;
|
||||
__u64 dram_page_size;
|
||||
};
|
||||
|
||||
|
@ -628,12 +670,21 @@ struct hl_cs_chunk {
|
|||
__u64 cb_handle;
|
||||
|
||||
/* Relevant only when HL_CS_FLAGS_WAIT or
|
||||
* HL_CS_FLAGS_COLLECTIVE_WAIT is set.
|
||||
* HL_CS_FLAGS_COLLECTIVE_WAIT is set
|
||||
* This holds address of array of u64 values that contain
|
||||
* signal CS sequence numbers. The wait described by this job
|
||||
* will listen on all those signals (wait event per signal)
|
||||
* signal CS sequence numbers. The wait described by
|
||||
* this job will listen on all those signals
|
||||
* (wait event per signal)
|
||||
*/
|
||||
__u64 signal_seq_arr;
|
||||
|
||||
/*
|
||||
* Relevant only when HL_CS_FLAGS_WAIT or
|
||||
* HL_CS_FLAGS_COLLECTIVE_WAIT is set
|
||||
* along with HL_CS_FLAGS_ENCAP_SIGNALS.
|
||||
* This is the CS sequence which has the encapsulated signals.
|
||||
*/
|
||||
__u64 encaps_signal_seq;
|
||||
};
|
||||
|
||||
/* Index of queue to put the CB on */
|
||||
|
@ -651,6 +702,17 @@ struct hl_cs_chunk {
|
|||
* Number of entries in signal_seq_arr
|
||||
*/
|
||||
__u32 num_signal_seq_arr;
|
||||
|
||||
/* Relevant only when HL_CS_FLAGS_WAIT or
|
||||
* HL_CS_FLAGS_COLLECTIVE_WAIT is set along
|
||||
* with HL_CS_FLAGS_ENCAP_SIGNALS
|
||||
* This set the signals range that the user want to wait for
|
||||
* out of the whole reserved signals range.
|
||||
* e.g if the signals range is 20, and user don't want
|
||||
* to wait for signal 8, so he set this offset to 7, then
|
||||
* he call the API again with 9 and so on till 20.
|
||||
*/
|
||||
__u32 encaps_signal_offset;
|
||||
};
|
||||
|
||||
/* HL_CS_CHUNK_FLAGS_* */
|
||||
|
@ -678,6 +740,28 @@ struct hl_cs_chunk {
|
|||
#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200
|
||||
#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400
|
||||
|
||||
/*
|
||||
* The encapsulated signals CS is merged into the existing CS ioctls.
|
||||
* In order to use this feature need to follow the below procedure:
|
||||
* 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY
|
||||
* the output of this API will be the SOB offset from CFG_BASE.
|
||||
* this address will be used to patch CB cmds to do the signaling for this
|
||||
* SOB by incrementing it's value.
|
||||
* for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY
|
||||
* CS type, note that this might fail if out-of-sync happened to the SOB
|
||||
* value, in case other signaling request to the same SOB occurred between
|
||||
* reserve-unreserve calls.
|
||||
* 2. Use the staged CS to do the encapsulated signaling jobs.
|
||||
* use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
|
||||
* along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset
|
||||
* field. This offset allows app to wait on part of the reserved signals.
|
||||
* 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag
|
||||
* to wait for the encapsulated signals.
|
||||
*/
|
||||
#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800
|
||||
#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000
|
||||
#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000
|
||||
|
||||
#define HL_CS_STATUS_SUCCESS 0
|
||||
|
||||
#define HL_MAX_JOBS_PER_CS 512
|
||||
|
@ -690,10 +774,35 @@ struct hl_cs_in {
|
|||
/* holds address of array of hl_cs_chunk for execution phase */
|
||||
__u64 chunks_execute;
|
||||
|
||||
/* Sequence number of a staged submission CS
|
||||
* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
|
||||
*/
|
||||
__u64 seq;
|
||||
union {
|
||||
/*
|
||||
* Sequence number of a staged submission CS
|
||||
* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and
|
||||
* HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset.
|
||||
*/
|
||||
__u64 seq;
|
||||
|
||||
/*
|
||||
* Encapsulated signals handle id
|
||||
* Valid for two flows:
|
||||
* 1. CS with encapsulated signals:
|
||||
* when HL_CS_FLAGS_STAGED_SUBMISSION and
|
||||
* HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
|
||||
* and HL_CS_FLAGS_ENCAP_SIGNALS are set.
|
||||
* 2. unreserve signals:
|
||||
* valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set.
|
||||
*/
|
||||
__u32 encaps_sig_handle_id;
|
||||
|
||||
/* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
|
||||
struct {
|
||||
/* Encapsulated signals number */
|
||||
__u32 encaps_signals_count;
|
||||
|
||||
/* Encapsulated signals queue index (stream) */
|
||||
__u32 encaps_signals_q_idx;
|
||||
};
|
||||
};
|
||||
|
||||
/* Number of chunks in restore phase array. Maximum number is
|
||||
* HL_MAX_JOBS_PER_CS
|
||||
|
@ -718,14 +827,31 @@ struct hl_cs_in {
|
|||
};
|
||||
|
||||
struct hl_cs_out {
|
||||
/*
|
||||
* seq holds the sequence number of the CS to pass to wait ioctl. All
|
||||
* values are valid except for 0 and ULLONG_MAX
|
||||
*/
|
||||
__u64 seq;
|
||||
/* HL_CS_STATUS_* */
|
||||
union {
|
||||
/*
|
||||
* seq holds the sequence number of the CS to pass to wait
|
||||
* ioctl. All values are valid except for 0 and ULLONG_MAX
|
||||
*/
|
||||
__u64 seq;
|
||||
|
||||
/* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
|
||||
struct {
|
||||
/* This is the resereved signal handle id */
|
||||
__u32 handle_id;
|
||||
|
||||
/* This is the signals count */
|
||||
__u32 count;
|
||||
};
|
||||
};
|
||||
|
||||
/* HL_CS_STATUS */
|
||||
__u32 status;
|
||||
__u32 pad;
|
||||
|
||||
/*
|
||||
* SOB base address offset
|
||||
* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set
|
||||
*/
|
||||
__u32 sob_base_addr_offset;
|
||||
};
|
||||
|
||||
union hl_cs_args {
|
||||
|
@ -735,11 +861,18 @@ union hl_cs_args {
|
|||
|
||||
#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
|
||||
#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
|
||||
#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
|
||||
|
||||
#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32
|
||||
|
||||
struct hl_wait_cs_in {
|
||||
union {
|
||||
struct {
|
||||
/* Command submission sequence number */
|
||||
/*
|
||||
* In case of wait_cs holds the CS sequence number.
|
||||
* In case of wait for multi CS hold a user pointer to
|
||||
* an array of CS sequence numbers
|
||||
*/
|
||||
__u64 seq;
|
||||
/* Absolute timeout to wait for command submission
|
||||
* in microseconds
|
||||
|
@ -767,12 +900,17 @@ struct hl_wait_cs_in {
|
|||
|
||||
/* Context ID - Currently not in use */
|
||||
__u32 ctx_id;
|
||||
|
||||
/* HL_WAIT_CS_FLAGS_*
|
||||
* If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include
|
||||
* interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order
|
||||
* not to specify an interrupt id ,set mask to all 1s.
|
||||
*/
|
||||
__u32 flags;
|
||||
|
||||
/* Multi CS API info- valid entries in multi-CS array */
|
||||
__u8 seq_arr_len;
|
||||
__u8 pad[7];
|
||||
};
|
||||
|
||||
#define HL_WAIT_CS_STATUS_COMPLETED 0
|
||||
|
@ -789,8 +927,15 @@ struct hl_wait_cs_out {
|
|||
__u32 status;
|
||||
/* HL_WAIT_CS_STATUS_FLAG* */
|
||||
__u32 flags;
|
||||
/* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */
|
||||
/*
|
||||
* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set
|
||||
* for wait_cs: timestamp of CS completion
|
||||
* for wait_multi_cs: timestamp of FIRST CS completion
|
||||
*/
|
||||
__s64 timestamp_nsec;
|
||||
/* multi CS completion bitmap */
|
||||
__u32 cs_completion_map;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
union hl_wait_cs_args {
|
||||
|
@ -813,6 +958,7 @@ union hl_wait_cs_args {
|
|||
#define HL_MEM_CONTIGUOUS 0x1
|
||||
#define HL_MEM_SHARED 0x2
|
||||
#define HL_MEM_USERPTR 0x4
|
||||
#define HL_MEM_FORCE_HINT 0x8
|
||||
|
||||
struct hl_mem_in {
|
||||
union {
|
||||
|
|
Loading…
Reference in New Issue