habanalabs: change aggregate cs counters to atomic
In case we will have multiple contexts/processes, we can't just increment aggregated counters. We need to make them atomic as they can be incremented by multiple processes Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
6c38e7c60f
commit
23c15ae615
|
@ -242,20 +242,6 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
|
|||
kfree(job);
|
||||
}
|
||||
|
||||
static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
|
||||
{
|
||||
hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
|
||||
ctx->cs_counters.device_in_reset_drop_cnt;
|
||||
hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
|
||||
ctx->cs_counters.out_of_mem_drop_cnt;
|
||||
hdev->aggregated_cs_counters.parsing_drop_cnt +=
|
||||
ctx->cs_counters.parsing_drop_cnt;
|
||||
hdev->aggregated_cs_counters.queue_full_drop_cnt +=
|
||||
ctx->cs_counters.queue_full_drop_cnt;
|
||||
hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt +=
|
||||
ctx->cs_counters.max_cs_in_flight_drop_cnt;
|
||||
}
|
||||
|
||||
static void cs_do_release(struct kref *ref)
|
||||
{
|
||||
struct hl_cs *cs = container_of(ref, struct hl_cs,
|
||||
|
@ -358,7 +344,6 @@ static void cs_do_release(struct kref *ref)
|
|||
|
||||
complete_all(&cs->fence->completion);
|
||||
hl_fence_put(cs->fence);
|
||||
cs_counters_aggregate(hdev, cs->ctx);
|
||||
|
||||
kfree(cs->jobs_in_queue_cnt);
|
||||
kfree(cs);
|
||||
|
@ -397,11 +382,14 @@ static void cs_timedout(struct work_struct *work)
|
|||
static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
enum hl_cs_type cs_type, struct hl_cs **cs_new)
|
||||
{
|
||||
struct hl_cs_compl *cs_cmpl;
|
||||
struct hl_cs_counters_atomic *cntr;
|
||||
struct hl_fence *other = NULL;
|
||||
struct hl_cs_compl *cs_cmpl;
|
||||
struct hl_cs *cs;
|
||||
int rc;
|
||||
|
||||
cntr = &hdev->aggregated_cs_counters;
|
||||
|
||||
cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
|
||||
if (!cs)
|
||||
return -ENOMEM;
|
||||
|
@ -436,6 +424,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
|
|||
dev_dbg_ratelimited(hdev->dev,
|
||||
"Rejecting CS because of too many in-flights CS\n");
|
||||
ctx->cs_counters.max_cs_in_flight_drop_cnt++;
|
||||
atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
|
||||
rc = -EAGAIN;
|
||||
goto free_fence;
|
||||
}
|
||||
|
@ -610,6 +599,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_cs_chunk *cs_chunk_array;
|
||||
struct hl_cs_counters_atomic *cntr;
|
||||
struct hl_cs_job *job;
|
||||
struct hl_cs *cs;
|
||||
struct hl_cb *cb;
|
||||
|
@ -617,6 +607,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||
u32 size_to_copy;
|
||||
int rc, i;
|
||||
|
||||
cntr = &hdev->aggregated_cs_counters;
|
||||
*cs_seq = ULLONG_MAX;
|
||||
|
||||
if (num_chunks > HL_MAX_JOBS_PER_CS) {
|
||||
|
@ -664,6 +655,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||
&is_kernel_allocated_cb);
|
||||
if (rc) {
|
||||
hpriv->ctx->cs_counters.parsing_drop_cnt++;
|
||||
atomic64_inc(&cntr->parsing_drop_cnt);
|
||||
goto free_cs_object;
|
||||
}
|
||||
|
||||
|
@ -671,6 +663,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
|
||||
if (!cb) {
|
||||
hpriv->ctx->cs_counters.parsing_drop_cnt++;
|
||||
atomic64_inc(&cntr->parsing_drop_cnt);
|
||||
rc = -EINVAL;
|
||||
goto free_cs_object;
|
||||
}
|
||||
|
@ -685,6 +678,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||
is_kernel_allocated_cb);
|
||||
if (!job) {
|
||||
hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
|
||||
atomic64_inc(&cntr->out_of_mem_drop_cnt);
|
||||
dev_err(hdev->dev, "Failed to allocate a new job\n");
|
||||
rc = -ENOMEM;
|
||||
if (is_kernel_allocated_cb)
|
||||
|
@ -718,6 +712,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||
rc = cs_parser(hpriv, job);
|
||||
if (rc) {
|
||||
hpriv->ctx->cs_counters.parsing_drop_cnt++;
|
||||
atomic64_inc(&cntr->parsing_drop_cnt);
|
||||
dev_err(hdev->dev,
|
||||
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
|
||||
cs->ctx->asid, cs->sequence, job->id, rc);
|
||||
|
@ -727,6 +722,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||
|
||||
if (int_queues_only) {
|
||||
hpriv->ctx->cs_counters.parsing_drop_cnt++;
|
||||
atomic64_inc(&cntr->parsing_drop_cnt);
|
||||
dev_err(hdev->dev,
|
||||
"Reject CS %d.%llu because only internal queues jobs are present\n",
|
||||
cs->ctx->asid, cs->sequence);
|
||||
|
@ -768,20 +764,22 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
|||
void __user *chunks, u32 num_chunks,
|
||||
u64 *cs_seq)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_ctx *ctx = hpriv->ctx;
|
||||
u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
|
||||
struct hl_cs_chunk *cs_chunk_array, *chunk;
|
||||
struct hw_queue_properties *hw_queue_prop;
|
||||
u64 *signal_seq_arr = NULL, signal_seq;
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_cs_counters_atomic *cntr;
|
||||
struct hl_fence *sig_fence = NULL;
|
||||
struct hl_ctx *ctx = hpriv->ctx;
|
||||
enum hl_queue_type q_type;
|
||||
struct hl_cs_job *job;
|
||||
struct hl_cs *cs;
|
||||
struct hl_cb *cb;
|
||||
enum hl_queue_type q_type;
|
||||
u64 *signal_seq_arr = NULL, signal_seq;
|
||||
u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
|
||||
int rc;
|
||||
|
||||
*cs_seq = ULLONG_MAX;
|
||||
cntr = &hdev->aggregated_cs_counters;
|
||||
|
||||
if (num_chunks > HL_MAX_JOBS_PER_CS) {
|
||||
dev_err(hdev->dev,
|
||||
|
@ -920,6 +918,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
|||
job = hl_cs_allocate_job(hdev, q_type, true);
|
||||
if (!job) {
|
||||
ctx->cs_counters.out_of_mem_drop_cnt++;
|
||||
atomic64_inc(&cntr->out_of_mem_drop_cnt);
|
||||
dev_err(hdev->dev, "Failed to allocate a new job\n");
|
||||
rc = -ENOMEM;
|
||||
goto put_cs;
|
||||
|
@ -934,6 +933,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
|||
q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
|
||||
if (!cb) {
|
||||
ctx->cs_counters.out_of_mem_drop_cnt++;
|
||||
atomic64_inc(&cntr->out_of_mem_drop_cnt);
|
||||
kfree(job);
|
||||
rc = -EFAULT;
|
||||
goto put_cs;
|
||||
|
|
|
@ -1067,6 +1067,21 @@ struct hl_cs_parser {
|
|||
u8 contains_dma_pkt;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_info_cs_counters - command submission counters
|
||||
* @out_of_mem_drop_cnt: dropped due to memory allocation issue
|
||||
* @parsing_drop_cnt: dropped due to error in packet parsing
|
||||
* @queue_full_drop_cnt: dropped due to queue full
|
||||
* @device_in_reset_drop_cnt: dropped due to device in reset
|
||||
* @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
|
||||
*/
|
||||
struct hl_cs_counters_atomic {
|
||||
atomic64_t out_of_mem_drop_cnt;
|
||||
atomic64_t parsing_drop_cnt;
|
||||
atomic64_t queue_full_drop_cnt;
|
||||
atomic64_t device_in_reset_drop_cnt;
|
||||
atomic64_t max_cs_in_flight_drop_cnt;
|
||||
};
|
||||
|
||||
/*
|
||||
* MEMORY STRUCTURE
|
||||
|
@ -1649,7 +1664,7 @@ struct hl_device {
|
|||
|
||||
struct hl_device_idle_busy_ts *idle_busy_ts_arr;
|
||||
|
||||
struct hl_cs_counters aggregated_cs_counters;
|
||||
struct hl_cs_counters_atomic aggregated_cs_counters;
|
||||
|
||||
struct hl_mmu_priv mmu_priv;
|
||||
struct hl_mmu_funcs mmu_func;
|
||||
|
|
|
@ -314,10 +314,13 @@ static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
|||
|
||||
static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_info_cs_counters cs_counters = { {0} };
|
||||
u32 max_size = args->return_size;
|
||||
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
|
||||
struct hl_info_cs_counters cs_counters = { {0} };
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_cs_counters_atomic *cntr;
|
||||
u32 max_size = args->return_size;
|
||||
|
||||
cntr = &hdev->aggregated_cs_counters;
|
||||
|
||||
if ((!max_size) || (!out))
|
||||
return -EINVAL;
|
||||
|
@ -325,6 +328,17 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
|
|||
memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters,
|
||||
sizeof(struct hl_cs_counters));
|
||||
|
||||
cs_counters.cs_counters.out_of_mem_drop_cnt =
|
||||
atomic64_read(&cntr->out_of_mem_drop_cnt);
|
||||
cs_counters.cs_counters.parsing_drop_cnt =
|
||||
atomic64_read(&cntr->parsing_drop_cnt);
|
||||
cs_counters.cs_counters.queue_full_drop_cnt =
|
||||
atomic64_read(&cntr->queue_full_drop_cnt);
|
||||
cs_counters.cs_counters.device_in_reset_drop_cnt =
|
||||
atomic64_read(&cntr->device_in_reset_drop_cnt);
|
||||
cs_counters.cs_counters.max_cs_in_flight_drop_cnt =
|
||||
atomic64_read(&cntr->max_cs_in_flight_drop_cnt);
|
||||
|
||||
if (hpriv->ctx)
|
||||
memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters,
|
||||
sizeof(struct hl_cs_counters));
|
||||
|
|
|
@ -484,17 +484,21 @@ static void init_signal_wait_cs(struct hl_cs *cs)
|
|||
*/
|
||||
int hl_hw_queue_schedule_cs(struct hl_cs *cs)
|
||||
{
|
||||
struct hl_cs_counters_atomic *cntr;
|
||||
struct hl_ctx *ctx = cs->ctx;
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct hl_cs_job *job, *tmp;
|
||||
struct hl_hw_queue *q;
|
||||
u32 max_queues;
|
||||
int rc = 0, i, cq_cnt;
|
||||
u32 max_queues;
|
||||
|
||||
cntr = &hdev->aggregated_cs_counters;
|
||||
|
||||
hdev->asic_funcs->hw_queues_lock(hdev);
|
||||
|
||||
if (hl_device_disabled_or_in_reset(hdev)) {
|
||||
ctx->cs_counters.device_in_reset_drop_cnt++;
|
||||
atomic64_inc(&cntr->device_in_reset_drop_cnt);
|
||||
dev_err(hdev->dev,
|
||||
"device is disabled or in reset, CS rejected!\n");
|
||||
rc = -EPERM;
|
||||
|
@ -528,6 +532,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
|
|||
|
||||
if (rc) {
|
||||
ctx->cs_counters.queue_full_drop_cnt++;
|
||||
atomic64_inc(&cntr->queue_full_drop_cnt);
|
||||
goto unroll_cq_resv;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue