habanalabs: fix cs counters structure

Fix cs counters structure in uapi to be one flat structure instead
of two instances of the same other structure.
use atomic read/increment for context counters so we could use
one structure for both aggregated and context counters.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
farah kassabri 2020-10-12 14:30:26 +03:00 committed by Oded Gabbay
parent 9bb86b63d8
commit e753643d51
6 changed files with 95 additions and 75 deletions

View File

@ -462,7 +462,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
if (other && !completion_done(&other->completion)) { if (other && !completion_done(&other->completion)) {
dev_dbg_ratelimited(hdev->dev, dev_dbg_ratelimited(hdev->dev,
"Rejecting CS because of too many in-flights CS\n"); "Rejecting CS because of too many in-flights CS\n");
ctx->cs_counters.max_cs_in_flight_drop_cnt++; atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
atomic64_inc(&cntr->max_cs_in_flight_drop_cnt); atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
rc = -EAGAIN; rc = -EAGAIN;
goto free_fence; goto free_fence;
@ -720,7 +720,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = validate_queue_index(hdev, chunk, &queue_type, rc = validate_queue_index(hdev, chunk, &queue_type,
&is_kernel_allocated_cb); &is_kernel_allocated_cb);
if (rc) { if (rc) {
hpriv->ctx->cs_counters.parsing_drop_cnt++; atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
atomic64_inc(&cntr->parsing_drop_cnt); atomic64_inc(&cntr->parsing_drop_cnt);
goto free_cs_object; goto free_cs_object;
} }
@ -728,7 +728,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
if (is_kernel_allocated_cb) { if (is_kernel_allocated_cb) {
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
if (!cb) { if (!cb) {
hpriv->ctx->cs_counters.parsing_drop_cnt++; atomic64_inc(
&hpriv->ctx->cs_counters.parsing_drop_cnt);
atomic64_inc(&cntr->parsing_drop_cnt); atomic64_inc(&cntr->parsing_drop_cnt);
rc = -EINVAL; rc = -EINVAL;
goto free_cs_object; goto free_cs_object;
@ -743,7 +744,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
job = hl_cs_allocate_job(hdev, queue_type, job = hl_cs_allocate_job(hdev, queue_type,
is_kernel_allocated_cb); is_kernel_allocated_cb);
if (!job) { if (!job) {
hpriv->ctx->cs_counters.out_of_mem_drop_cnt++; atomic64_inc(
&hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt);
dev_err(hdev->dev, "Failed to allocate a new job\n"); dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM; rc = -ENOMEM;
@ -777,7 +779,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = cs_parser(hpriv, job); rc = cs_parser(hpriv, job);
if (rc) { if (rc) {
hpriv->ctx->cs_counters.parsing_drop_cnt++; atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
atomic64_inc(&cntr->parsing_drop_cnt); atomic64_inc(&cntr->parsing_drop_cnt);
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
@ -787,7 +789,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
} }
if (int_queues_only) { if (int_queues_only) {
hpriv->ctx->cs_counters.parsing_drop_cnt++; atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
atomic64_inc(&cntr->parsing_drop_cnt); atomic64_inc(&cntr->parsing_drop_cnt);
dev_err(hdev->dev, dev_err(hdev->dev,
"Reject CS %d.%llu because only internal queues jobs are present\n", "Reject CS %d.%llu because only internal queues jobs are present\n",
@ -880,7 +882,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
job = hl_cs_allocate_job(hdev, q_type, true); job = hl_cs_allocate_job(hdev, q_type, true);
if (!job) { if (!job) {
ctx->cs_counters.out_of_mem_drop_cnt++; atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt);
dev_err(hdev->dev, "Failed to allocate a new job\n"); dev_err(hdev->dev, "Failed to allocate a new job\n");
return -ENOMEM; return -ENOMEM;
@ -894,7 +896,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
cb = hl_cb_kernel_create(hdev, cb_size, cb = hl_cb_kernel_create(hdev, cb_size,
q_type == QUEUE_TYPE_HW && hdev->mmu_enable); q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
if (!cb) { if (!cb) {
ctx->cs_counters.out_of_mem_drop_cnt++; atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt);
kfree(job); kfree(job);
return -EFAULT; return -EFAULT;

View File

@ -936,6 +936,22 @@ struct hl_va_range {
u64 end_addr; u64 end_addr;
}; };
/**
* struct hl_cs_counters_atomic - command submission counters
* @out_of_mem_drop_cnt: dropped due to memory allocation issue
* @parsing_drop_cnt: dropped due to error in packet parsing
* @queue_full_drop_cnt: dropped due to queue full
* @device_in_reset_drop_cnt: dropped due to device in reset
* @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
*/
struct hl_cs_counters_atomic {
atomic64_t out_of_mem_drop_cnt;
atomic64_t parsing_drop_cnt;
atomic64_t queue_full_drop_cnt;
atomic64_t device_in_reset_drop_cnt;
atomic64_t max_cs_in_flight_drop_cnt;
};
/** /**
* struct hl_ctx - user/kernel context. * struct hl_ctx - user/kernel context.
* @mem_hash: holds mapping from virtual address to virtual memory area * @mem_hash: holds mapping from virtual address to virtual memory area
@ -954,6 +970,7 @@ struct hl_va_range {
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
* MMU hash or walking the PGT requires talking this lock. * MMU hash or walking the PGT requires talking this lock.
* @debugfs_list: node in debugfs list of contexts. * @debugfs_list: node in debugfs list of contexts.
* @cs_counters: context command submission counters.
* @cb_va_pool: device VA pool for command buffers which are mapped to the * @cb_va_pool: device VA pool for command buffers which are mapped to the
* device's MMU. * device's MMU.
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
@ -976,26 +993,26 @@ struct hl_va_range {
struct hl_ctx { struct hl_ctx {
DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS); DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS); DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
struct hl_fpriv *hpriv; struct hl_fpriv *hpriv;
struct hl_device *hdev; struct hl_device *hdev;
struct kref refcount; struct kref refcount;
struct hl_fence **cs_pending; struct hl_fence **cs_pending;
struct hl_va_range *host_va_range; struct hl_va_range *host_va_range;
struct hl_va_range *host_huge_va_range; struct hl_va_range *host_huge_va_range;
struct hl_va_range *dram_va_range; struct hl_va_range *dram_va_range;
struct mutex mem_hash_lock; struct mutex mem_hash_lock;
struct mutex mmu_lock; struct mutex mmu_lock;
struct list_head debugfs_list; struct list_head debugfs_list;
struct hl_cs_counters cs_counters; struct hl_cs_counters_atomic cs_counters;
struct gen_pool *cb_va_pool; struct gen_pool *cb_va_pool;
u64 cs_sequence; u64 cs_sequence;
u64 *dram_default_hops; u64 *dram_default_hops;
spinlock_t cs_lock; spinlock_t cs_lock;
atomic64_t dram_phys_mem; atomic64_t dram_phys_mem;
atomic_t thread_ctx_switch_token; atomic_t thread_ctx_switch_token;
u32 thread_ctx_switch_wait_token; u32 thread_ctx_switch_wait_token;
u32 asid; u32 asid;
u32 handle; u32 handle;
}; };
/** /**
@ -1164,22 +1181,6 @@ struct hl_cs_parser {
u8 contains_dma_pkt; u8 contains_dma_pkt;
}; };
/**
* struct hl_info_cs_counters - command submission counters
* @out_of_mem_drop_cnt: dropped due to memory allocation issue
* @parsing_drop_cnt: dropped due to error in packet parsing
* @queue_full_drop_cnt: dropped due to queue full
* @device_in_reset_drop_cnt: dropped due to device in reset
* @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
*/
struct hl_cs_counters_atomic {
atomic64_t out_of_mem_drop_cnt;
atomic64_t parsing_drop_cnt;
atomic64_t queue_full_drop_cnt;
atomic64_t device_in_reset_drop_cnt;
atomic64_t max_cs_in_flight_drop_cnt;
};
/* /*
* MEMORY STRUCTURE * MEMORY STRUCTURE
*/ */

View File

@ -315,7 +315,7 @@ static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{ {
void __user *out = (void __user *) (uintptr_t) args->return_pointer; void __user *out = (void __user *) (uintptr_t) args->return_pointer;
struct hl_info_cs_counters cs_counters = { {0} }; struct hl_info_cs_counters cs_counters = {0};
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
struct hl_cs_counters_atomic *cntr; struct hl_cs_counters_atomic *cntr;
u32 max_size = args->return_size; u32 max_size = args->return_size;
@ -325,23 +325,34 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out)) if ((!max_size) || (!out))
return -EINVAL; return -EINVAL;
memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters, cs_counters.total_out_of_mem_drop_cnt =
sizeof(struct hl_cs_counters));
cs_counters.cs_counters.out_of_mem_drop_cnt =
atomic64_read(&cntr->out_of_mem_drop_cnt); atomic64_read(&cntr->out_of_mem_drop_cnt);
cs_counters.cs_counters.parsing_drop_cnt = cs_counters.total_parsing_drop_cnt =
atomic64_read(&cntr->parsing_drop_cnt); atomic64_read(&cntr->parsing_drop_cnt);
cs_counters.cs_counters.queue_full_drop_cnt = cs_counters.total_queue_full_drop_cnt =
atomic64_read(&cntr->queue_full_drop_cnt); atomic64_read(&cntr->queue_full_drop_cnt);
cs_counters.cs_counters.device_in_reset_drop_cnt = cs_counters.total_device_in_reset_drop_cnt =
atomic64_read(&cntr->device_in_reset_drop_cnt); atomic64_read(&cntr->device_in_reset_drop_cnt);
cs_counters.cs_counters.max_cs_in_flight_drop_cnt = cs_counters.total_max_cs_in_flight_drop_cnt =
atomic64_read(&cntr->max_cs_in_flight_drop_cnt); atomic64_read(&cntr->max_cs_in_flight_drop_cnt);
if (hpriv->ctx) if (hpriv->ctx) {
memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters, cs_counters.ctx_out_of_mem_drop_cnt =
sizeof(struct hl_cs_counters)); atomic64_read(
&hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
cs_counters.ctx_parsing_drop_cnt =
atomic64_read(
&hpriv->ctx->cs_counters.parsing_drop_cnt);
cs_counters.ctx_queue_full_drop_cnt =
atomic64_read(
&hpriv->ctx->cs_counters.queue_full_drop_cnt);
cs_counters.ctx_device_in_reset_drop_cnt =
atomic64_read(
&hpriv->ctx->cs_counters.device_in_reset_drop_cnt);
cs_counters.ctx_max_cs_in_flight_drop_cnt =
atomic64_read(
&hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt);
}
return copy_to_user(out, &cs_counters, return copy_to_user(out, &cs_counters,
min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0; min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;

View File

@ -523,7 +523,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
hdev->asic_funcs->hw_queues_lock(hdev); hdev->asic_funcs->hw_queues_lock(hdev);
if (hl_device_disabled_or_in_reset(hdev)) { if (hl_device_disabled_or_in_reset(hdev)) {
ctx->cs_counters.device_in_reset_drop_cnt++; atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt);
atomic64_inc(&cntr->device_in_reset_drop_cnt); atomic64_inc(&cntr->device_in_reset_drop_cnt);
dev_err(hdev->dev, dev_err(hdev->dev,
"device is disabled or in reset, CS rejected!\n"); "device is disabled or in reset, CS rejected!\n");
@ -557,7 +557,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
} }
if (rc) { if (rc) {
ctx->cs_counters.queue_full_drop_cnt++; atomic64_inc(
&ctx->cs_counters.queue_full_drop_cnt);
atomic64_inc(&cntr->queue_full_drop_cnt); atomic64_inc(&cntr->queue_full_drop_cnt);
goto unroll_cq_resv; goto unroll_cq_resv;
} }

View File

@ -1137,7 +1137,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
if (!job) { if (!job) {
ctx->cs_counters.out_of_mem_drop_cnt++; atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt);
dev_err(hdev->dev, "Failed to allocate a new job\n"); dev_err(hdev->dev, "Failed to allocate a new job\n");
return -ENOMEM; return -ENOMEM;
@ -1147,7 +1147,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
cb = hl_cb_kernel_create(hdev, cb_size, cb = hl_cb_kernel_create(hdev, cb_size,
hdev->mmu_enable && !patched_cb); hdev->mmu_enable && !patched_cb);
if (!cb) { if (!cb) {
ctx->cs_counters.out_of_mem_drop_cnt++; atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt);
kfree(job); kfree(job);
return -EFAULT; return -EFAULT;

View File

@ -407,23 +407,28 @@ struct hl_info_sync_manager {
/** /**
* struct hl_info_cs_counters - command submission counters * struct hl_info_cs_counters - command submission counters
* @out_of_mem_drop_cnt: dropped due to memory allocation issue * @total_out_of_mem_drop_cnt: total dropped due to memory allocation issue
* @parsing_drop_cnt: dropped due to error in packet parsing * @ctx_out_of_mem_drop_cnt: context dropped due to memory allocation issue
* @queue_full_drop_cnt: dropped due to queue full * @total_parsing_drop_cnt: total dropped due to error in packet parsing
* @device_in_reset_drop_cnt: dropped due to device in reset * @ctx_parsing_drop_cnt: context dropped due to error in packet parsing
* @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight * @total_queue_full_drop_cnt: total dropped due to queue full
* @ctx_queue_full_drop_cnt: context dropped due to queue full
* @total_device_in_reset_drop_cnt: total dropped due to device in reset
* @ctx_device_in_reset_drop_cnt: context dropped due to device in reset
* @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight
* @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight
*/ */
struct hl_cs_counters {
__u64 out_of_mem_drop_cnt;
__u64 parsing_drop_cnt;
__u64 queue_full_drop_cnt;
__u64 device_in_reset_drop_cnt;
__u64 max_cs_in_flight_drop_cnt;
};
struct hl_info_cs_counters { struct hl_info_cs_counters {
struct hl_cs_counters cs_counters; __u64 total_out_of_mem_drop_cnt;
struct hl_cs_counters ctx_cs_counters; __u64 ctx_out_of_mem_drop_cnt;
__u64 total_parsing_drop_cnt;
__u64 ctx_parsing_drop_cnt;
__u64 total_queue_full_drop_cnt;
__u64 ctx_queue_full_drop_cnt;
__u64 total_device_in_reset_drop_cnt;
__u64 ctx_device_in_reset_drop_cnt;
__u64 total_max_cs_in_flight_drop_cnt;
__u64 ctx_max_cs_in_flight_drop_cnt;
}; };
enum gaudi_dcores { enum gaudi_dcores {