Merge tag 'misc-habanalabs-fixes-2019-07-29' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next
Oded writes: This tag contains two fixes when running in BE architecture: - Fix for F/W download. The F/W is in LE so use a function that doesn't do bytw-swapping. - Fix for polling on host memory locations that are written by the device. The device always works in LE, so we need to do byte-swap when polling on those locations. * tag 'misc-habanalabs-fixes-2019-07-29' of git://people.freedesktop.org/~gabbayo/linux: habanalabs: fix host memory polling in BE architecture habanalabs: fix F/W download in BE architecture
This commit is contained in:
commit
d21a95e0ef
|
@ -683,7 +683,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||
|
||||
rc = hl_poll_timeout_memory(hdev,
|
||||
&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
|
||||
100, jiffies_to_usecs(hdev->timeout_jiffies));
|
||||
100, jiffies_to_usecs(hdev->timeout_jiffies), false);
|
||||
|
||||
if (rc == -ETIMEDOUT) {
|
||||
dev_err(hdev->dev,
|
||||
|
|
|
@ -24,7 +24,7 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
|
|||
{
|
||||
const struct firmware *fw;
|
||||
const u64 *fw_data;
|
||||
size_t fw_size, i;
|
||||
size_t fw_size;
|
||||
int rc;
|
||||
|
||||
rc = request_firmware(&fw, fw_name, hdev->dev);
|
||||
|
@ -45,22 +45,7 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
|
|||
|
||||
fw_data = (const u64 *) fw->data;
|
||||
|
||||
if ((fw->size % 8) != 0)
|
||||
fw_size -= 8;
|
||||
|
||||
for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) {
|
||||
if (!(i & (0x80000 - 1))) {
|
||||
dev_dbg(hdev->dev,
|
||||
"copied so far %zu out of %zu for %s firmware",
|
||||
i, fw_size, fw_name);
|
||||
usleep_range(20, 100);
|
||||
}
|
||||
|
||||
writeq(*fw_data, dst);
|
||||
}
|
||||
|
||||
if ((fw->size % 8) != 0)
|
||||
writel(*(const u32 *) fw_data, dst);
|
||||
memcpy_toio(dst, fw_data, fw_size);
|
||||
|
||||
out:
|
||||
release_firmware(fw);
|
||||
|
@ -112,7 +97,8 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
|||
}
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
|
||||
(tmp == ARMCP_PACKET_FENCE_VAL), 1000, timeout);
|
||||
(tmp == ARMCP_PACKET_FENCE_VAL), 1000,
|
||||
timeout, true);
|
||||
|
||||
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
|
||||
|
||||
|
|
|
@ -2864,7 +2864,8 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
|
|||
}
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
|
||||
(tmp == GOYA_QMAN0_FENCE_VAL), 1000, timeout);
|
||||
(tmp == GOYA_QMAN0_FENCE_VAL), 1000,
|
||||
timeout, true);
|
||||
|
||||
hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
|
||||
|
||||
|
@ -2945,7 +2946,7 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
|
|||
}
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
|
||||
1000, GOYA_TEST_QUEUE_WAIT_USEC);
|
||||
1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
|
||||
|
||||
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
|
||||
|
||||
|
|
|
@ -1062,9 +1062,17 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
|||
/*
|
||||
* address in this macro points always to a memory location in the
|
||||
* host's (server's) memory. That location is updated asynchronously
|
||||
* either by the direct access of the device or by another core
|
||||
* either by the direct access of the device or by another core.
|
||||
*
|
||||
* To work both in LE and BE architectures, we need to distinguish between the
|
||||
* two states (device or another core updates the memory location). Therefore,
|
||||
* if mem_written_by_device is true, the host memory being polled will be
|
||||
* updated directly by the device. If false, the host memory being polled will
|
||||
* be updated by host CPU. Required so host knows whether or not the memory
|
||||
* might need to be byte-swapped before returning value to caller.
|
||||
*/
|
||||
#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us) \
|
||||
#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
|
||||
mem_written_by_device) \
|
||||
({ \
|
||||
ktime_t __timeout; \
|
||||
/* timeout should be longer when working with simulator */ \
|
||||
|
@ -1077,10 +1085,14 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
|||
/* Verify we read updates done by other cores or by device */ \
|
||||
mb(); \
|
||||
(val) = *((u32 *) (uintptr_t) (addr)); \
|
||||
if (mem_written_by_device) \
|
||||
(val) = le32_to_cpu(val); \
|
||||
if (cond) \
|
||||
break; \
|
||||
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
|
||||
(val) = *((u32 *) (uintptr_t) (addr)); \
|
||||
if (mem_written_by_device) \
|
||||
(val) = le32_to_cpu(val); \
|
||||
break; \
|
||||
} \
|
||||
if (sleep_us) \
|
||||
|
|
Loading…
Reference in New Issue