habanalabs: optimize command submission completion timestamp

Completion timestamp is taken during the actual command submission
release. As the release happens in a work queue, the timestamp taken
is not accurate. Hence, we will take the timestamp in the interrupt
handler itself while propagating it to the release function.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ofir Bitton 2023-01-10 11:41:39 +02:00 committed by Oded Gabbay
parent 9a7d530a80
commit 75b6984ef6
3 changed files with 23 additions and 6 deletions

View File

@ -398,8 +398,16 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
* flow by calling 'hl_hw_queue_update_ci'. * flow by calling 'hl_hw_queue_update_ci'.
*/ */
if (cs_needs_completion(cs) && if (cs_needs_completion(cs) &&
(job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) {
/* In CS based completions, the timestamp is already available,
* so no need to extract it from job
*/
if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB)
cs->completion_timestamp = job->timestamp;
cs_put(cs); cs_put(cs);
}
hl_cs_job_put(job); hl_cs_job_put(job);
} }
@ -776,7 +784,7 @@ out:
} }
if (cs->timestamp) { if (cs->timestamp) {
cs->fence->timestamp = ktime_get(); cs->fence->timestamp = cs->completion_timestamp;
hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence, hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
cs->fence->timestamp, cs->fence->error); cs->fence->timestamp, cs->fence->error);
} }

View File

@ -1940,6 +1940,7 @@ struct hl_userptr {
* @type: CS_TYPE_*. * @type: CS_TYPE_*.
* @jobs_cnt: counter of submitted jobs on all queues. * @jobs_cnt: counter of submitted jobs on all queues.
* @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs. * @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
* @completion_timestamp: timestamp of the last completed cs job.
* @sob_addr_offset: sob offset from the configuration base address. * @sob_addr_offset: sob offset from the configuration base address.
* @initial_sob_count: count of completed signals in SOB before current submission of signal or * @initial_sob_count: count of completed signals in SOB before current submission of signal or
* cs with encaps signals. * cs with encaps signals.
@ -1972,6 +1973,7 @@ struct hl_cs {
struct list_head staged_cs_node; struct list_head staged_cs_node;
struct list_head debugfs_list; struct list_head debugfs_list;
struct hl_cs_encaps_sig_handle *encaps_sig_hdl; struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
ktime_t completion_timestamp;
u64 sequence; u64 sequence;
u64 staged_sequence; u64 staged_sequence;
u64 timeout_jiffies; u64 timeout_jiffies;
@ -2007,6 +2009,7 @@ struct hl_cs {
* @debugfs_list: node in debugfs list of command submission jobs. * @debugfs_list: node in debugfs list of command submission jobs.
* @refcount: reference counter for usage of the CS job. * @refcount: reference counter for usage of the CS job.
* @queue_type: the type of the H/W queue this job is submitted to. * @queue_type: the type of the H/W queue this job is submitted to.
* @timestamp: timestamp upon job completion
* @id: the id of this job inside a CS. * @id: the id of this job inside a CS.
* @hw_queue_id: the id of the H/W queue this job is submitted to. * @hw_queue_id: the id of the H/W queue this job is submitted to.
* @user_cb_size: the actual size of the CB we got from the user. * @user_cb_size: the actual size of the CB we got from the user.
@ -2033,6 +2036,7 @@ struct hl_cs_job {
struct list_head debugfs_list; struct list_head debugfs_list;
struct kref refcount; struct kref refcount;
enum hl_queue_type queue_type; enum hl_queue_type queue_type;
ktime_t timestamp;
u32 id; u32 id;
u32 hw_queue_id; u32 hw_queue_id;
u32 user_cb_size; u32 user_cb_size;

View File

@ -72,15 +72,17 @@ static void irq_handle_eqe(struct work_struct *work)
* @hdev: pointer to device structure * @hdev: pointer to device structure
* @cs_seq: command submission sequence * @cs_seq: command submission sequence
* @cq: completion queue * @cq: completion queue
* @timestamp: interrupt timestamp
* *
*/ */
static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq) static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq, ktime_t timestamp)
{ {
struct hl_hw_queue *queue; struct hl_hw_queue *queue;
struct hl_cs_job *job; struct hl_cs_job *job;
queue = &hdev->kernel_queues[cq->hw_queue_id]; queue = &hdev->kernel_queues[cq->hw_queue_id];
job = queue->shadow_queue[hl_pi_2_offset(cs_seq)]; job = queue->shadow_queue[hl_pi_2_offset(cs_seq)];
job->timestamp = timestamp;
queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work); queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
atomic_inc(&queue->ci); atomic_inc(&queue->ci);
@ -91,9 +93,10 @@ static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq)
* *
* @hdev: pointer to device structure * @hdev: pointer to device structure
* @cs_seq: command submission sequence * @cs_seq: command submission sequence
* @timestamp: interrupt timestamp
* *
*/ */
static void cs_finish(struct hl_device *hdev, u16 cs_seq) static void cs_finish(struct hl_device *hdev, u16 cs_seq, ktime_t timestamp)
{ {
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_hw_queue *queue; struct hl_hw_queue *queue;
@ -113,6 +116,7 @@ static void cs_finish(struct hl_device *hdev, u16 cs_seq)
atomic_inc(&queue->ci); atomic_inc(&queue->ci);
} }
cs->completion_timestamp = timestamp;
queue_work(hdev->cs_cmplt_wq, &cs->finish_work); queue_work(hdev->cs_cmplt_wq, &cs->finish_work);
} }
@ -130,6 +134,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
bool shadow_index_valid, entry_ready; bool shadow_index_valid, entry_ready;
u16 shadow_index; u16 shadow_index;
struct hl_cq_entry *cq_entry, *cq_base; struct hl_cq_entry *cq_entry, *cq_base;
ktime_t timestamp = ktime_get();
if (hdev->disabled) { if (hdev->disabled) {
dev_dbg(hdev->dev, dev_dbg(hdev->dev,
@ -171,9 +176,9 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
if (shadow_index_valid && !hdev->disabled) { if (shadow_index_valid && !hdev->disabled) {
if (hdev->asic_prop.completion_mode == if (hdev->asic_prop.completion_mode ==
HL_COMPLETION_MODE_CS) HL_COMPLETION_MODE_CS)
cs_finish(hdev, shadow_index); cs_finish(hdev, shadow_index, timestamp);
else else
job_finish(hdev, shadow_index, cq); job_finish(hdev, shadow_index, cq, timestamp);
} }
/* Clear CQ entry ready bit */ /* Clear CQ entry ready bit */