habanalabs: optimize command submission completion timestamp
Completion timestamp is taken during the actual command submission release. As the release happens in a work queue, the timestamp taken is not accurate. Hence, we will take the timestamp in the interrupt handler itself while propagating it to the release function. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
9a7d530a80
commit
75b6984ef6
|
@ -398,8 +398,16 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
|
||||||
* flow by calling 'hl_hw_queue_update_ci'.
|
* flow by calling 'hl_hw_queue_update_ci'.
|
||||||
*/
|
*/
|
||||||
if (cs_needs_completion(cs) &&
|
if (cs_needs_completion(cs) &&
|
||||||
(job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW))
|
(job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) {
|
||||||
|
|
||||||
|
/* In CS based completions, the timestamp is already available,
|
||||||
|
* so no need to extract it from job
|
||||||
|
*/
|
||||||
|
if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB)
|
||||||
|
cs->completion_timestamp = job->timestamp;
|
||||||
|
|
||||||
cs_put(cs);
|
cs_put(cs);
|
||||||
|
}
|
||||||
|
|
||||||
hl_cs_job_put(job);
|
hl_cs_job_put(job);
|
||||||
}
|
}
|
||||||
|
@ -776,7 +784,7 @@ out:
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cs->timestamp) {
|
if (cs->timestamp) {
|
||||||
cs->fence->timestamp = ktime_get();
|
cs->fence->timestamp = cs->completion_timestamp;
|
||||||
hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
|
hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
|
||||||
cs->fence->timestamp, cs->fence->error);
|
cs->fence->timestamp, cs->fence->error);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1940,6 +1940,7 @@ struct hl_userptr {
|
||||||
* @type: CS_TYPE_*.
|
* @type: CS_TYPE_*.
|
||||||
* @jobs_cnt: counter of submitted jobs on all queues.
|
* @jobs_cnt: counter of submitted jobs on all queues.
|
||||||
* @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
|
* @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
|
||||||
|
* @completion_timestamp: timestamp of the last completed cs job.
|
||||||
* @sob_addr_offset: sob offset from the configuration base address.
|
* @sob_addr_offset: sob offset from the configuration base address.
|
||||||
* @initial_sob_count: count of completed signals in SOB before current submission of signal or
|
* @initial_sob_count: count of completed signals in SOB before current submission of signal or
|
||||||
* cs with encaps signals.
|
* cs with encaps signals.
|
||||||
|
@ -1972,6 +1973,7 @@ struct hl_cs {
|
||||||
struct list_head staged_cs_node;
|
struct list_head staged_cs_node;
|
||||||
struct list_head debugfs_list;
|
struct list_head debugfs_list;
|
||||||
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
|
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
|
||||||
|
ktime_t completion_timestamp;
|
||||||
u64 sequence;
|
u64 sequence;
|
||||||
u64 staged_sequence;
|
u64 staged_sequence;
|
||||||
u64 timeout_jiffies;
|
u64 timeout_jiffies;
|
||||||
|
@ -2007,6 +2009,7 @@ struct hl_cs {
|
||||||
* @debugfs_list: node in debugfs list of command submission jobs.
|
* @debugfs_list: node in debugfs list of command submission jobs.
|
||||||
* @refcount: reference counter for usage of the CS job.
|
* @refcount: reference counter for usage of the CS job.
|
||||||
* @queue_type: the type of the H/W queue this job is submitted to.
|
* @queue_type: the type of the H/W queue this job is submitted to.
|
||||||
|
* @timestamp: timestamp upon job completion
|
||||||
* @id: the id of this job inside a CS.
|
* @id: the id of this job inside a CS.
|
||||||
* @hw_queue_id: the id of the H/W queue this job is submitted to.
|
* @hw_queue_id: the id of the H/W queue this job is submitted to.
|
||||||
* @user_cb_size: the actual size of the CB we got from the user.
|
* @user_cb_size: the actual size of the CB we got from the user.
|
||||||
|
@ -2033,6 +2036,7 @@ struct hl_cs_job {
|
||||||
struct list_head debugfs_list;
|
struct list_head debugfs_list;
|
||||||
struct kref refcount;
|
struct kref refcount;
|
||||||
enum hl_queue_type queue_type;
|
enum hl_queue_type queue_type;
|
||||||
|
ktime_t timestamp;
|
||||||
u32 id;
|
u32 id;
|
||||||
u32 hw_queue_id;
|
u32 hw_queue_id;
|
||||||
u32 user_cb_size;
|
u32 user_cb_size;
|
||||||
|
|
|
@ -72,15 +72,17 @@ static void irq_handle_eqe(struct work_struct *work)
|
||||||
* @hdev: pointer to device structure
|
* @hdev: pointer to device structure
|
||||||
* @cs_seq: command submission sequence
|
* @cs_seq: command submission sequence
|
||||||
* @cq: completion queue
|
* @cq: completion queue
|
||||||
|
* @timestamp: interrupt timestamp
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq)
|
static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq, ktime_t timestamp)
|
||||||
{
|
{
|
||||||
struct hl_hw_queue *queue;
|
struct hl_hw_queue *queue;
|
||||||
struct hl_cs_job *job;
|
struct hl_cs_job *job;
|
||||||
|
|
||||||
queue = &hdev->kernel_queues[cq->hw_queue_id];
|
queue = &hdev->kernel_queues[cq->hw_queue_id];
|
||||||
job = queue->shadow_queue[hl_pi_2_offset(cs_seq)];
|
job = queue->shadow_queue[hl_pi_2_offset(cs_seq)];
|
||||||
|
job->timestamp = timestamp;
|
||||||
queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
|
queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
|
||||||
|
|
||||||
atomic_inc(&queue->ci);
|
atomic_inc(&queue->ci);
|
||||||
|
@ -91,9 +93,10 @@ static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq)
|
||||||
*
|
*
|
||||||
* @hdev: pointer to device structure
|
* @hdev: pointer to device structure
|
||||||
* @cs_seq: command submission sequence
|
* @cs_seq: command submission sequence
|
||||||
|
* @timestamp: interrupt timestamp
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static void cs_finish(struct hl_device *hdev, u16 cs_seq)
|
static void cs_finish(struct hl_device *hdev, u16 cs_seq, ktime_t timestamp)
|
||||||
{
|
{
|
||||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||||
struct hl_hw_queue *queue;
|
struct hl_hw_queue *queue;
|
||||||
|
@ -113,6 +116,7 @@ static void cs_finish(struct hl_device *hdev, u16 cs_seq)
|
||||||
atomic_inc(&queue->ci);
|
atomic_inc(&queue->ci);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cs->completion_timestamp = timestamp;
|
||||||
queue_work(hdev->cs_cmplt_wq, &cs->finish_work);
|
queue_work(hdev->cs_cmplt_wq, &cs->finish_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,6 +134,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
|
||||||
bool shadow_index_valid, entry_ready;
|
bool shadow_index_valid, entry_ready;
|
||||||
u16 shadow_index;
|
u16 shadow_index;
|
||||||
struct hl_cq_entry *cq_entry, *cq_base;
|
struct hl_cq_entry *cq_entry, *cq_base;
|
||||||
|
ktime_t timestamp = ktime_get();
|
||||||
|
|
||||||
if (hdev->disabled) {
|
if (hdev->disabled) {
|
||||||
dev_dbg(hdev->dev,
|
dev_dbg(hdev->dev,
|
||||||
|
@ -171,9 +176,9 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
|
||||||
if (shadow_index_valid && !hdev->disabled) {
|
if (shadow_index_valid && !hdev->disabled) {
|
||||||
if (hdev->asic_prop.completion_mode ==
|
if (hdev->asic_prop.completion_mode ==
|
||||||
HL_COMPLETION_MODE_CS)
|
HL_COMPLETION_MODE_CS)
|
||||||
cs_finish(hdev, shadow_index);
|
cs_finish(hdev, shadow_index, timestamp);
|
||||||
else
|
else
|
||||||
job_finish(hdev, shadow_index, cq);
|
job_finish(hdev, shadow_index, cq, timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Clear CQ entry ready bit */
|
/* Clear CQ entry ready bit */
|
||||||
|
|
Loading…
Reference in New Issue