habanalabs: wait again for multi-CS if no CS completed

The original multi-CS design assumption that stream masters are used
exclusively (i.e. multi-CS with set of stream master QIDs will not get
completed by CS not from the multi-CS set) is inaccurate.

Thus multi-CS behavior is now modified not to treat such case as an
error.

Instead, if we have multi-CS completion but we detect that no CS from
the list is actually completed we will do another multi-CS wait (with
modified timeout).

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ohad Sharabi 2021-12-01 10:52:27 +02:00 committed by Oded Gabbay
parent 5b90e59d55
commit b02220536c
2 changed files with 51 additions and 52 deletions

View File

@ -545,13 +545,6 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
* mcs fences.
*/
fence->mcs_handling_done = true;
/*
* Since CS (and its related fence) can be associated with only one
* multi CS context, once it triggered multi CS completion no need to
* continue checking other multi CS contexts.
*/
spin_unlock(&mcs_compl->lock);
break;
}
spin_unlock(&mcs_compl->lock);
@ -2498,6 +2491,21 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
return rc;
}
static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
{
if (usecs <= U32_MAX)
return usecs_to_jiffies(usecs);
/*
* If the value in nanoseconds is larger than 64 bit, use the largest
* 64 bit value.
*/
if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
return nsecs_to_jiffies(U64_MAX);
return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
}
/*
* hl_wait_multi_cs_completion_init - init completion structure
*
@ -2534,8 +2542,7 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
}
if (i == MULTI_CS_MAX_USER_CTX) {
dev_err(hdev->dev,
"no available multi-CS completion structure\n");
dev_err(hdev->dev, "no available multi-CS completion structure\n");
return ERR_PTR(-ENOMEM);
}
return mcs_compl;
@ -2566,27 +2573,18 @@ static void hl_wait_multi_cs_completion_fini(
*
* @return 0 on success, otherwise non 0 error code
*/
static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data)
static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
struct multi_cs_completion *mcs_compl)
{
struct hl_device *hdev = mcs_data->ctx->hdev;
struct multi_cs_completion *mcs_compl;
long completion_rc;
mcs_compl = hl_wait_multi_cs_completion_init(hdev,
mcs_data->stream_master_qid_map);
if (IS_ERR(mcs_compl))
return PTR_ERR(mcs_compl);
completion_rc = wait_for_completion_interruptible_timeout(
&mcs_compl->completion,
usecs_to_jiffies(mcs_data->timeout_us));
completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
mcs_data->timeout_jiffies);
/* update timestamp */
if (completion_rc > 0)
mcs_data->timestamp = mcs_compl->timestamp;
hl_wait_multi_cs_completion_fini(mcs_compl);
mcs_data->wait_status = completion_rc;
return 0;
@ -2619,6 +2617,7 @@ void hl_multi_cs_completion_init(struct hl_device *hdev)
*/
static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
struct multi_cs_completion *mcs_compl;
struct hl_device *hdev = hpriv->hdev;
struct multi_cs_data mcs_data = {0};
union hl_wait_cs_args *args = data;
@ -2686,12 +2685,19 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
goto put_ctx;
/* wait (with timeout) for the first CS to be completed */
mcs_data.timeout_us = args->in.timeout_us;
rc = hl_wait_multi_cs_completion(&mcs_data);
if (rc)
goto put_ctx;
mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
mcs_compl = hl_wait_multi_cs_completion_init(hdev, mcs_data.stream_master_qid_map);
if (IS_ERR(mcs_compl)) {
rc = PTR_ERR(mcs_compl);
goto put_ctx;
}
while (true) {
rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
if (rc || (mcs_data.wait_status == 0))
break;
if (mcs_data.wait_status > 0) {
/*
* poll fences once again to update the CS map.
* no timestamp should be updated this time.
@ -2699,18 +2705,26 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
mcs_data.update_ts = false;
rc = hl_cs_poll_fences(&mcs_data);
if (mcs_data.completion_bitmap)
break;
/*
* if hl_wait_multi_cs_completion returned before timeout (i.e.
* it got a completion) we expect to see at least one CS
* completed after the poll function.
* it got a completion) it either got completed by CS in the multi CS list
* (in which case the indication will be non empty completion_bitmap) or it
* got completed by CS submitted to one of the shared stream master but
* not in the multi CS list (in which case we should wait again but reinit
* the completion, modify the timeout and set timestamp as zero to let a CS
* related to the current multi-CS set a new, relevant, timestamp)
*/
if (!mcs_data.completion_bitmap) {
dev_warn_ratelimited(hdev->dev,
"Multi-CS got completion on wait but no CS completed\n");
rc = -EFAULT;
}
/* wait again with modified timeout */
mcs_data.timeout_jiffies = mcs_data.wait_status;
reinit_completion(&mcs_compl->completion);
mcs_compl->timestamp = 0;
}
hl_wait_multi_cs_completion_fini(mcs_compl);
put_ctx:
hl_ctx_put(ctx);
kfree(fence_arr);
@ -2741,7 +2755,7 @@ free_seq_arr:
}
/* update if some CS was gone */
if (mcs_data.timestamp)
if (!mcs_data.timestamp)
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
} else {
args->out.status = HL_WAIT_CS_STATUS_BUSY;
@ -2807,21 +2821,6 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return 0;
}
static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
{
if (usecs <= U32_MAX)
return usecs_to_jiffies(usecs);
/*
* If the value in nanoseconds is larger than 64 bit, use the largest
* 64 bit value.
*/
if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
return nsecs_to_jiffies(U64_MAX);
return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
}
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
u64 timeout_us, u64 user_address,
u64 target_value, struct hl_user_interrupt *interrupt,

View File

@ -2362,7 +2362,7 @@ struct multi_cs_completion {
* @ctx: pointer to the context structure
* @fence_arr: array of fences of all CSs
* @seq_arr: array of CS sequence numbers
* @timeout_us: timeout in usec for waiting for CS to complete
* @timeout_jiffies: timeout in jiffies for waiting for CS to complete
* @timestamp: timestamp of first completed CS
* @wait_status: wait for CS status
* @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
@ -2376,7 +2376,7 @@ struct multi_cs_data {
struct hl_ctx *ctx;
struct hl_fence **fence_arr;
u64 *seq_arr;
s64 timeout_us;
s64 timeout_jiffies;
s64 timestamp;
long wait_status;
u32 completion_bitmap;