This tag contains habanalabs driver changes for v5.16:
- Add a new uAPI (under the memory ioctl) to request from the driver to export a DMA-BUF object that represents a memory region on the device's DRAM. This is needed to enable peer-to-peer over PCIe between habana device and an RDMA adapter (e.g. mlnx5 or efa rdma adapter). - Add debugfs node to dynamically configure CS timeout. Up until now, it was only configurable through kernel module parameter. - Fetch more comprehensive power information from the firmware. - Always take timestamp when waiting for user interrupt, as the user needs that information to optimize the graph runtime compilation. - Modify user interrupt to look on 64-bit user value as fence, instead of 32-bit. - Bypass reset in case of repeated h/w error event after device reset. This is to prevent endless loop of resets to the device. - Fix several bugs in multi CS completion code. - Fix race condition in fd close/open. - Update to latest firmware headers - Add select CRC32 in kconfig - Small fixes, cosmetics -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEE7TEboABC71LctBLFZR1NuKta54AFAmFtPa4ACgkQZR1NuKta 54A9NAf/QJOc72XhqinNm62RvUoZQehNyDFHcYYBORqIpC+/NsggWwy0VdrDHSeg uXmt6qwRUCc1+tOjOcFD/b6pnwz16mEqQrmVO8XiZOiCvIGcDEapq0HqGfEXwYtv NcQ+k682qqXlza6SCZS9/webJzRuhHwxdFP9HTEYLhKmHoOgBza63F6dreku/fEG mCDVtnMbo8Sa98657Jz3yTElhA+JPsDYO6PycZUTGdPn38mzz5Y5o6Ds8SshIXbr ZQQxHope7NkqcnfYQ8nIoyl7bPLBv2NaqNz216+sBVVy+kHx6f1+FJ4hJM+PHi20 CuOiUgVBRKp2LJ2k0HoITy5XiXe/Cg== =bzK+ -----END PGP SIGNATURE----- Merge tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next Oded writes: This tag contains habanalabs driver changes for v5.16: - Add a new uAPI (under the memory ioctl) to request from the driver to export a DMA-BUF object that represents a memory region on the device's DRAM. This is needed to enable peer-to-peer over PCIe between habana device and an RDMA adapter (e.g. mlnx5 or efa rdma adapter). - Add debugfs node to dynamically configure CS timeout. Up until now, it was only configurable through kernel module parameter. - Fetch more comprehensive power information from the firmware. - Always take timestamp when waiting for user interrupt, as the user needs that information to optimize the graph runtime compilation. - Modify user interrupt to look on 64-bit user value as fence, instead of 32-bit. - Bypass reset in case of repeated h/w error event after device reset. This is to prevent endless loop of resets to the device. - Fix several bugs in multi CS completion code. - Fix race condition in fd close/open. - Update to latest firmware headers - Add select CRC32 in kconfig - Small fixes, cosmetics * tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (25 commits) habanalabs: refactor fence handling in hl_cs_poll_fences habanalabs: context cleanup cosmetics habanalabs: simplify wait for interrupt with timestamp flow habanalabs: initialize hpriv fields before adding new node habanalabs: Unify frequency set/get functionality habanalabs: select CRC32 habanalabs: add support for dma-buf exporter habanalabs: define uAPI to export FD for DMA-BUF habanalabs: fix NULL pointer dereference habanalabs: fix race condition in multi CS completion habanalabs: use only u32 habanalabs: update firmware files habanalabs: bypass reset for continuous h/w error event habanalabs: take timestamp on wait for interrupt habanalabs: prevent race between fd close/open habanalabs: refactor reset log message habanalabs: define soft-reset as inference op habanalabs: fix debugfs device memory MMU VA translation habanalabs: add support for a long interrupt target value habanalabs: remove redundant cs validity checks ...
This commit is contained in:
commit
be24dd486d
|
@ -226,6 +226,12 @@ Description: Gets the state dump occurring on a CS timeout or failure.
|
|||
Writing an integer X discards X state dumps, so that the
|
||||
next read would return X+1-st newest state dump.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
|
||||
Date: Sep 2021
|
||||
KernelVersion: 5.16
|
||||
Contact: obitton@habana.ai
|
||||
Description: Sets the command submission timeout value in seconds.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
|
||||
Date: Mar 2020
|
||||
KernelVersion: 5.6
|
||||
|
|
|
@ -8,6 +8,8 @@ config HABANA_AI
|
|||
depends on PCI && HAS_IOMEM
|
||||
select GENERIC_ALLOCATOR
|
||||
select HWMON
|
||||
select DMA_SHARED_BUFFER
|
||||
select CRC32
|
||||
help
|
||||
Enables PCIe card driver for Habana's AI Processors (AIP) that are
|
||||
designed to accelerate Deep Learning inference and training workloads.
|
||||
|
|
|
@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
|
|||
common/command_buffer.o common/hw_queue.o common/irq.o \
|
||||
common/sysfs.o common/hwmon.o common/memory.o \
|
||||
common/command_submission.o common/firmware_if.o \
|
||||
common/state_dump.o
|
||||
common/state_dump.o common/hwmgr.o
|
||||
|
|
|
@ -143,6 +143,7 @@ static void hl_fence_init(struct hl_fence *fence, u64 sequence)
|
|||
fence->cs_sequence = sequence;
|
||||
fence->error = 0;
|
||||
fence->timestamp = ktime_set(0, 0);
|
||||
fence->mcs_handling_done = false;
|
||||
init_completion(&fence->completion);
|
||||
}
|
||||
|
||||
|
@ -431,11 +432,10 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
|
|||
/* Don't cancel TDR in case this CS was timedout because we might be
|
||||
* running from the TDR context
|
||||
*/
|
||||
if (cs && (cs->timedout ||
|
||||
hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
|
||||
if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
|
||||
return;
|
||||
|
||||
if (cs && cs->tdr_active)
|
||||
if (cs->tdr_active)
|
||||
cancel_delayed_work_sync(&cs->work_tdr);
|
||||
|
||||
spin_lock(&hdev->cs_mirror_lock);
|
||||
|
@ -536,10 +536,21 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
|
|||
mcs_compl->timestamp =
|
||||
ktime_to_ns(fence->timestamp);
|
||||
complete_all(&mcs_compl->completion);
|
||||
|
||||
/*
|
||||
* Setting mcs_handling_done inside the lock ensures
|
||||
* at least one fence have mcs_handling_done set to
|
||||
* true before wait for mcs finish. This ensures at
|
||||
* least one CS will be set as completed when polling
|
||||
* mcs fences.
|
||||
*/
|
||||
fence->mcs_handling_done = true;
|
||||
}
|
||||
|
||||
spin_unlock(&mcs_compl->lock);
|
||||
}
|
||||
/* In case CS completed without mcs completion initialized */
|
||||
fence->mcs_handling_done = true;
|
||||
}
|
||||
|
||||
static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
|
||||
|
@ -2371,32 +2382,48 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
|
|||
break;
|
||||
}
|
||||
|
||||
mcs_data->stream_master_qid_map |= fence->stream_master_qid_map;
|
||||
switch (status) {
|
||||
case CS_WAIT_STATUS_BUSY:
|
||||
/* CS did not finished, keep waiting on its QID*/
|
||||
mcs_data->stream_master_qid_map |=
|
||||
fence->stream_master_qid_map;
|
||||
break;
|
||||
case CS_WAIT_STATUS_COMPLETED:
|
||||
/*
|
||||
* Using mcs_handling_done to avoid possibility of mcs_data
|
||||
* returns to user indicating CS completed before it finished
|
||||
* all of its mcs handling, to avoid race the next time the
|
||||
* user waits for mcs.
|
||||
*/
|
||||
if (!fence->mcs_handling_done)
|
||||
break;
|
||||
|
||||
if (status == CS_WAIT_STATUS_BUSY)
|
||||
continue;
|
||||
|
||||
mcs_data->completion_bitmap |= BIT(i);
|
||||
|
||||
/*
|
||||
* best effort to extract timestamp. few notes:
|
||||
* - if even single fence is gone we cannot extract timestamp
|
||||
* (as fence not exist anymore)
|
||||
* - for all completed CSs we take the earliest timestamp.
|
||||
* for this we have to validate that:
|
||||
* 1. given timestamp was indeed set
|
||||
* 2. the timestamp is earliest of all timestamps so far
|
||||
*/
|
||||
|
||||
if (status == CS_WAIT_STATUS_GONE) {
|
||||
mcs_data->completion_bitmap |= BIT(i);
|
||||
/*
|
||||
* For all completed CSs we take the earliest timestamp.
|
||||
* For this we have to validate that the timestamp is
|
||||
* earliest of all timestamps so far.
|
||||
*/
|
||||
if (mcs_data->update_ts &&
|
||||
(ktime_compare(fence->timestamp, first_cs_time) < 0))
|
||||
first_cs_time = fence->timestamp;
|
||||
break;
|
||||
case CS_WAIT_STATUS_GONE:
|
||||
mcs_data->update_ts = false;
|
||||
mcs_data->gone_cs = true;
|
||||
} else if (mcs_data->update_ts &&
|
||||
(ktime_compare(fence->timestamp,
|
||||
ktime_set(0, 0)) > 0) &&
|
||||
(ktime_compare(fence->timestamp, first_cs_time) < 0)) {
|
||||
first_cs_time = fence->timestamp;
|
||||
/*
|
||||
* It is possible to get an old sequence numbers from user
|
||||
* which related to already completed CSs and their fences
|
||||
* already gone. In this case, CS set as completed but
|
||||
* no need to consider its QID for mcs completion.
|
||||
*/
|
||||
mcs_data->completion_bitmap |= BIT(i);
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "Invalid fence status\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
hl_fences_put(mcs_data->fence_arr, arr_len);
|
||||
|
@ -2740,13 +2767,14 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||
|
||||
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
||||
u32 timeout_us, u64 user_address,
|
||||
u32 target_value, u16 interrupt_offset,
|
||||
enum hl_cs_wait_status *status)
|
||||
u64 target_value, u16 interrupt_offset,
|
||||
enum hl_cs_wait_status *status,
|
||||
u64 *timestamp)
|
||||
{
|
||||
struct hl_user_pending_interrupt *pend;
|
||||
struct hl_user_interrupt *interrupt;
|
||||
unsigned long timeout, flags;
|
||||
u32 completion_value;
|
||||
u64 completion_value;
|
||||
long completion_rc;
|
||||
int rc = 0;
|
||||
|
||||
|
@ -2780,15 +2808,17 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
|||
/* We check for completion value as interrupt could have been received
|
||||
* before we added the node to the wait list
|
||||
*/
|
||||
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
|
||||
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
|
||||
dev_err(hdev->dev, "Failed to copy completion value from user\n");
|
||||
rc = -EFAULT;
|
||||
goto remove_pending_user_interrupt;
|
||||
}
|
||||
|
||||
if (completion_value >= target_value)
|
||||
if (completion_value >= target_value) {
|
||||
*status = CS_WAIT_STATUS_COMPLETED;
|
||||
else
|
||||
/* There was no interrupt, we assume the completion is now. */
|
||||
pend->fence.timestamp = ktime_get();
|
||||
} else
|
||||
*status = CS_WAIT_STATUS_BUSY;
|
||||
|
||||
if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
|
||||
|
@ -2812,7 +2842,7 @@ wait_again:
|
|||
reinit_completion(&pend->fence.completion);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
|
||||
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
|
||||
dev_err(hdev->dev, "Failed to copy completion value from user\n");
|
||||
rc = -EFAULT;
|
||||
|
||||
|
@ -2839,6 +2869,8 @@ remove_pending_user_interrupt:
|
|||
list_del(&pend->wait_list_node);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
*timestamp = ktime_to_ns(pend->fence.timestamp);
|
||||
|
||||
kfree(pend);
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
|
@ -2852,6 +2884,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||
struct asic_fixed_properties *prop;
|
||||
union hl_wait_cs_args *args = data;
|
||||
enum hl_cs_wait_status status;
|
||||
u64 timestamp;
|
||||
int rc;
|
||||
|
||||
prop = &hdev->asic_prop;
|
||||
|
@ -2881,7 +2914,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||
|
||||
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
|
||||
args->in.interrupt_timeout_us, args->in.addr,
|
||||
args->in.target, interrupt_offset, &status);
|
||||
args->in.target, interrupt_offset, &status,
|
||||
×tamp);
|
||||
|
||||
if (rc) {
|
||||
if (rc != -EINTR)
|
||||
|
@ -2893,6 +2927,11 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||
|
||||
memset(args, 0, sizeof(*args));
|
||||
|
||||
if (timestamp) {
|
||||
args->out.timestamp_nsec = timestamp;
|
||||
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
|
||||
}
|
||||
|
||||
switch (status) {
|
||||
case CS_WAIT_STATUS_COMPLETED:
|
||||
args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
|
||||
|
|
|
@ -181,12 +181,6 @@ out_err:
|
|||
return rc;
|
||||
}
|
||||
|
||||
void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
|
||||
{
|
||||
if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
|
||||
return;
|
||||
}
|
||||
|
||||
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
||||
{
|
||||
int rc = 0;
|
||||
|
@ -392,7 +386,7 @@ void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
|
|||
idp = &mgr->ctx_handles;
|
||||
|
||||
idr_for_each_entry(idp, ctx, id)
|
||||
hl_ctx_free(hdev, ctx);
|
||||
kref_put(&ctx->refcount, hl_ctx_do_release);
|
||||
|
||||
idr_destroy(&mgr->ctx_handles);
|
||||
mutex_destroy(&mgr->ctx_lock);
|
||||
|
|
|
@ -1167,6 +1167,45 @@ static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
|
|||
return count;
|
||||
}
|
||||
|
||||
static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||
struct hl_device *hdev = entry->hdev;
|
||||
char tmp_buf[200];
|
||||
ssize_t rc;
|
||||
|
||||
if (*ppos)
|
||||
return 0;
|
||||
|
||||
sprintf(tmp_buf, "%d\n",
|
||||
jiffies_to_msecs(hdev->timeout_jiffies) / 1000);
|
||||
rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
|
||||
strlen(tmp_buf) + 1);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||
struct hl_device *hdev = entry->hdev;
|
||||
u32 value;
|
||||
ssize_t rc;
|
||||
|
||||
rc = kstrtouint_from_user(buf, count, 10, &value);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (value)
|
||||
hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
|
||||
else
|
||||
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations hl_data32b_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = hl_data_read32,
|
||||
|
@ -1240,6 +1279,12 @@ static const struct file_operations hl_state_dump_fops = {
|
|||
.write = hl_state_dump_write
|
||||
};
|
||||
|
||||
static const struct file_operations hl_timeout_locked_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = hl_timeout_locked_read,
|
||||
.write = hl_timeout_locked_write
|
||||
};
|
||||
|
||||
static const struct hl_info_list hl_debugfs_list[] = {
|
||||
{"command_buffers", command_buffers_show, NULL},
|
||||
{"command_submission", command_submission_show, NULL},
|
||||
|
@ -1421,6 +1466,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
|
|||
dev_entry,
|
||||
&hl_state_dump_fops);
|
||||
|
||||
debugfs_create_file("timeout_locked",
|
||||
0644,
|
||||
dev_entry->root,
|
||||
dev_entry,
|
||||
&hl_timeout_locked_fops);
|
||||
|
||||
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
|
||||
debugfs_create_file(hl_debugfs_list[i].name,
|
||||
0444,
|
||||
|
|
|
@ -69,13 +69,6 @@ static void hpriv_release(struct kref *ref)
|
|||
|
||||
mutex_destroy(&hpriv->restore_phase_mutex);
|
||||
|
||||
mutex_lock(&hdev->fpriv_list_lock);
|
||||
list_del(&hpriv->dev_node);
|
||||
hdev->compute_ctx = NULL;
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
|
||||
kfree(hpriv);
|
||||
|
||||
if ((!hdev->pldm) && (hdev->pdev) &&
|
||||
(!hdev->asic_funcs->is_device_idle(hdev,
|
||||
idle_mask,
|
||||
|
@ -87,9 +80,32 @@ static void hpriv_release(struct kref *ref)
|
|||
device_is_idle = false;
|
||||
}
|
||||
|
||||
/* We need to remove the user from the list to make sure the reset process won't
|
||||
* try to kill the user process. Because, if we got here, it means there are no
|
||||
* more driver/device resources that the user process is occupying so there is
|
||||
* no need to kill it
|
||||
*
|
||||
* However, we can't set the compute_ctx to NULL at this stage. This is to prevent
|
||||
* a race between the release and opening the device again. We don't want to let
|
||||
* a user open the device while there a reset is about to happen.
|
||||
*/
|
||||
mutex_lock(&hdev->fpriv_list_lock);
|
||||
list_del(&hpriv->dev_node);
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
|
||||
if ((hdev->reset_if_device_not_idle && !device_is_idle)
|
||||
|| hdev->reset_upon_device_release)
|
||||
hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE);
|
||||
|
||||
/* Now we can mark the compute_ctx as empty. Even if a reset is running in a different
|
||||
* thread, we don't care because the in_reset is marked so if a user will try to open
|
||||
* the device it will fail on that, even if compute_ctx is NULL.
|
||||
*/
|
||||
mutex_lock(&hdev->fpriv_list_lock);
|
||||
hdev->compute_ctx = NULL;
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
|
||||
kfree(hpriv);
|
||||
}
|
||||
|
||||
void hl_hpriv_get(struct hl_fpriv *hpriv)
|
||||
|
@ -530,6 +546,19 @@ static void hl_device_heartbeat(struct work_struct *work)
|
|||
return;
|
||||
|
||||
reschedule:
|
||||
/*
|
||||
* prev_reset_trigger tracks consecutive fatal h/w errors until first
|
||||
* heartbeat immediately post reset.
|
||||
* If control reached here, then at least one heartbeat work has been
|
||||
* scheduled since last reset/init cycle.
|
||||
* So if the device is not already in reset cycle, reset the flag
|
||||
* prev_reset_trigger as no reset occurred with HL_RESET_FW_FATAL_ERR
|
||||
* status for at least one heartbeat. From this point driver restarts
|
||||
* tracking future consecutive fatal errors.
|
||||
*/
|
||||
if (!(atomic_read(&hdev->in_reset)))
|
||||
hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
|
||||
|
||||
schedule_delayed_work(&hdev->work_heartbeat,
|
||||
usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
|
||||
}
|
||||
|
@ -909,6 +938,65 @@ static void device_disable_open_processes(struct hl_device *hdev)
|
|||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
}
|
||||
|
||||
static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
|
||||
{
|
||||
u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
|
||||
|
||||
/*
|
||||
* 'reset cause' is being updated here, because getting here
|
||||
* means that it's the 1st time and the last time we're here
|
||||
* ('in_reset' makes sure of it). This makes sure that
|
||||
* 'reset_cause' will continue holding its 1st recorded reason!
|
||||
*/
|
||||
if (flags & HL_RESET_HEARTBEAT) {
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
|
||||
cur_reset_trigger = HL_RESET_HEARTBEAT;
|
||||
} else if (flags & HL_RESET_TDR) {
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
|
||||
cur_reset_trigger = HL_RESET_TDR;
|
||||
} else if (flags & HL_RESET_FW_FATAL_ERR) {
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
|
||||
cur_reset_trigger = HL_RESET_FW_FATAL_ERR;
|
||||
} else {
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
|
||||
}
|
||||
|
||||
/*
|
||||
* If reset cause is same twice, then reset_trigger_repeated
|
||||
* is set and if this reset is due to a fatal FW error
|
||||
* device is set to an unstable state.
|
||||
*/
|
||||
if (hdev->prev_reset_trigger != cur_reset_trigger) {
|
||||
hdev->prev_reset_trigger = cur_reset_trigger;
|
||||
hdev->reset_trigger_repeated = 0;
|
||||
} else {
|
||||
hdev->reset_trigger_repeated = 1;
|
||||
}
|
||||
|
||||
/* If reset is due to heartbeat, device CPU is no responsive in
|
||||
* which case no point sending PCI disable message to it.
|
||||
*
|
||||
* If F/W is performing the reset, no need to send it a message to disable
|
||||
* PCI access
|
||||
*/
|
||||
if ((flags & HL_RESET_HARD) &&
|
||||
!(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
|
||||
/* Disable PCI access from device F/W so he won't send
|
||||
* us additional interrupts. We disable MSI/MSI-X at
|
||||
* the halt_engines function and we can't have the F/W
|
||||
* sending us interrupts after that. We need to disable
|
||||
* the access here because if the device is marked
|
||||
* disable, the message won't be send. Also, in case
|
||||
* of heartbeat, the device CPU is marked as disable
|
||||
* so this message won't be sent
|
||||
*/
|
||||
if (hl_fw_send_pci_access_msg(hdev,
|
||||
CPUCP_PACKET_DISABLE_PCI_ACCESS))
|
||||
dev_warn(hdev->dev,
|
||||
"Failed to disable PCI access by F/W\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_device_reset - reset the device
|
||||
*
|
||||
|
@ -954,7 +1042,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
|
|||
goto do_reset;
|
||||
}
|
||||
|
||||
if (!hard_reset && !hdev->allow_external_soft_reset) {
|
||||
if (!hard_reset && !hdev->allow_inference_soft_reset) {
|
||||
hard_instead_soft = true;
|
||||
hard_reset = true;
|
||||
}
|
||||
|
@ -978,47 +1066,21 @@ do_reset:
|
|||
if (rc)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* 'reset cause' is being updated here, because getting here
|
||||
* means that it's the 1st time and the last time we're here
|
||||
* ('in_reset' makes sure of it). This makes sure that
|
||||
* 'reset_cause' will continue holding its 1st recorded reason!
|
||||
*/
|
||||
if (flags & HL_RESET_HEARTBEAT)
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
|
||||
else if (flags & HL_RESET_TDR)
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
|
||||
else
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
|
||||
|
||||
/* If reset is due to heartbeat, device CPU is no responsive in
|
||||
* which case no point sending PCI disable message to it.
|
||||
*
|
||||
* If F/W is performing the reset, no need to send it a message to disable
|
||||
* PCI access
|
||||
*/
|
||||
if (hard_reset && !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
|
||||
/* Disable PCI access from device F/W so he won't send
|
||||
* us additional interrupts. We disable MSI/MSI-X at
|
||||
* the halt_engines function and we can't have the F/W
|
||||
* sending us interrupts after that. We need to disable
|
||||
* the access here because if the device is marked
|
||||
* disable, the message won't be send. Also, in case
|
||||
* of heartbeat, the device CPU is marked as disable
|
||||
* so this message won't be sent
|
||||
*/
|
||||
if (hl_fw_send_pci_access_msg(hdev,
|
||||
CPUCP_PACKET_DISABLE_PCI_ACCESS))
|
||||
dev_warn(hdev->dev,
|
||||
"Failed to disable PCI access by F/W\n");
|
||||
}
|
||||
handle_reset_trigger(hdev, flags);
|
||||
|
||||
/* This also blocks future CS/VM/JOB completion operations */
|
||||
hdev->disabled = true;
|
||||
|
||||
take_release_locks(hdev);
|
||||
|
||||
dev_err(hdev->dev, "Going to RESET device!\n");
|
||||
if (hard_reset)
|
||||
dev_info(hdev->dev, "Going to reset device\n");
|
||||
else if (flags & HL_RESET_DEVICE_RELEASE)
|
||||
dev_info(hdev->dev,
|
||||
"Going to reset device after it was released by user\n");
|
||||
else
|
||||
dev_info(hdev->dev,
|
||||
"Going to reset compute engines of inference device\n");
|
||||
}
|
||||
|
||||
again:
|
||||
|
@ -1108,6 +1170,17 @@ kill_processes:
|
|||
hdev->device_cpu_disabled = false;
|
||||
hdev->hard_reset_pending = false;
|
||||
|
||||
if (hdev->reset_trigger_repeated &&
|
||||
(hdev->prev_reset_trigger == HL_RESET_FW_FATAL_ERR)) {
|
||||
/* if there 2 back to back resets from FW,
|
||||
* ensure driver puts the driver in a unusable state
|
||||
*/
|
||||
dev_crit(hdev->dev,
|
||||
"Consecutive FW fatal errors received, stopping hard reset\n");
|
||||
rc = -EIO;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
if (hdev->kernel_ctx) {
|
||||
dev_crit(hdev->dev,
|
||||
"kernel ctx was alive during hard reset, something is terribly wrong\n");
|
||||
|
|
|
@ -2162,18 +2162,17 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
|
|||
}
|
||||
|
||||
/**
|
||||
* hl_fw_dynamic_report_reset_cause - send a COMMS message with the cause
|
||||
* of the newly triggered hard reset
|
||||
* hl_fw_dynamic_send_msg - send a COMMS message with attached data
|
||||
*
|
||||
* @hdev: pointer to the habanalabs device structure
|
||||
* @fw_loader: managing structure for loading device's FW
|
||||
* @reset_cause: enumerated cause for the recent hard reset
|
||||
* @msg_type: message type
|
||||
* @data: data to be sent
|
||||
*
|
||||
* @return 0 on success, otherwise non-zero error code
|
||||
*/
|
||||
static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev,
|
||||
struct fw_load_mgr *fw_loader,
|
||||
enum comms_reset_cause reset_cause)
|
||||
static int hl_fw_dynamic_send_msg(struct hl_device *hdev,
|
||||
struct fw_load_mgr *fw_loader, u8 msg_type, void *data)
|
||||
{
|
||||
struct lkd_msg_comms msg;
|
||||
int rc;
|
||||
|
@ -2181,11 +2180,20 @@ static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev,
|
|||
memset(&msg, 0, sizeof(msg));
|
||||
|
||||
/* create message to be sent */
|
||||
msg.header.type = HL_COMMS_RESET_CAUSE_TYPE;
|
||||
msg.header.type = msg_type;
|
||||
msg.header.size = cpu_to_le16(sizeof(struct comms_msg_header));
|
||||
msg.header.magic = cpu_to_le32(HL_COMMS_MSG_MAGIC);
|
||||
|
||||
msg.reset_cause = reset_cause;
|
||||
switch (msg_type) {
|
||||
case HL_COMMS_RESET_CAUSE_TYPE:
|
||||
msg.reset_cause = *(__u8 *) data;
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev,
|
||||
"Send COMMS message - invalid message type %u\n",
|
||||
msg_type);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
|
||||
sizeof(struct lkd_msg_comms));
|
||||
|
@ -2252,8 +2260,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
|
|||
goto protocol_err;
|
||||
|
||||
if (hdev->curr_reset_cause) {
|
||||
rc = hl_fw_dynamic_report_reset_cause(hdev, fw_loader,
|
||||
hdev->curr_reset_cause);
|
||||
rc = hl_fw_dynamic_send_msg(hdev, fw_loader,
|
||||
HL_COMMS_RESET_CAUSE_TYPE, &hdev->curr_reset_cause);
|
||||
if (rc)
|
||||
goto protocol_err;
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <linux/sched/signal.h>
|
||||
#include <linux/io-64-nonatomic-lo-hi.h>
|
||||
#include <linux/coresight.h>
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
#define HL_NAME "habanalabs"
|
||||
|
||||
|
@ -68,6 +69,9 @@
|
|||
|
||||
#define HL_STATE_DUMP_HIST_LEN 5
|
||||
|
||||
/* Default value for device reset trigger , an invalid value */
|
||||
#define HL_RESET_TRIGGER_DEFAULT 0xFF
|
||||
|
||||
#define OBJ_NAMES_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
||||
#define SYNC_TO_ENGINE_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
|
||||
|
||||
|
@ -132,13 +136,18 @@ enum hl_mmu_page_table_location {
|
|||
* - HL_RESET_FW
|
||||
* F/W will perform the reset. No need to ask it to reset the device. This is relevant
|
||||
* only when running with secured f/w
|
||||
*
|
||||
* - HL_RESET_FW_FATAL_ERR
|
||||
* Set if reset is due to a fatal error from FW
|
||||
*/
|
||||
|
||||
#define HL_RESET_HARD (1 << 0)
|
||||
#define HL_RESET_FROM_RESET_THREAD (1 << 1)
|
||||
#define HL_RESET_HEARTBEAT (1 << 2)
|
||||
#define HL_RESET_TDR (1 << 3)
|
||||
#define HL_RESET_DEVICE_RELEASE (1 << 4)
|
||||
#define HL_RESET_FW (1 << 5)
|
||||
#define HL_RESET_FW_FATAL_ERR (1 << 6)
|
||||
|
||||
#define HL_MAX_SOBS_PER_MONITOR 8
|
||||
|
||||
|
@ -447,6 +456,9 @@ struct hl_hints_range {
|
|||
* for hints validity check.
|
||||
* device_dma_offset_for_host_access: the offset to add to host DMA addresses
|
||||
* to enable the device to access them.
|
||||
* @max_freq_value: current max clk frequency.
|
||||
* @clk_pll_index: clock PLL index that specify which PLL determines the clock
|
||||
* we display to the user
|
||||
* @mmu_pgt_size: MMU page tables total size.
|
||||
* @mmu_pte_size: PTE size in MMU page tables.
|
||||
* @mmu_hop_table_size: MMU hop table size.
|
||||
|
@ -543,6 +555,8 @@ struct asic_fixed_properties {
|
|||
u64 cb_va_end_addr;
|
||||
u64 dram_hints_align_mask;
|
||||
u64 device_dma_offset_for_host_access;
|
||||
u64 max_freq_value;
|
||||
u32 clk_pll_index;
|
||||
u32 mmu_pgt_size;
|
||||
u32 mmu_pte_size;
|
||||
u32 mmu_hop_table_size;
|
||||
|
@ -601,6 +615,9 @@ struct asic_fixed_properties {
|
|||
* masters QIDs that multi cs is waiting on
|
||||
* @error: mark this fence with error
|
||||
* @timestamp: timestamp upon completion
|
||||
* @mcs_handling_done: indicates that corresponding command submission has
|
||||
* finished msc handling, this does not mean it was part
|
||||
* of the mcs
|
||||
*/
|
||||
struct hl_fence {
|
||||
struct completion completion;
|
||||
|
@ -609,6 +626,7 @@ struct hl_fence {
|
|||
u32 stream_master_qid_map;
|
||||
int error;
|
||||
ktime_t timestamp;
|
||||
u8 mcs_handling_done;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -1352,6 +1370,23 @@ struct hl_cs_counters_atomic {
|
|||
atomic64_t validation_drop_cnt;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_dmabuf_priv - a dma-buf private object.
|
||||
* @dmabuf: pointer to dma-buf object.
|
||||
* @ctx: pointer to the dma-buf owner's context.
|
||||
* @phys_pg_pack: pointer to physical page pack if the dma-buf was exported for
|
||||
* memory allocation handle.
|
||||
* @device_address: physical address of the device's memory. Relevant only
|
||||
* if phys_pg_pack is NULL (dma-buf was exported from address).
|
||||
* The total size can be taken from the dmabuf object.
|
||||
*/
|
||||
struct hl_dmabuf_priv {
|
||||
struct dma_buf *dmabuf;
|
||||
struct hl_ctx *ctx;
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
uint64_t device_address;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_ctx - user/kernel context.
|
||||
* @mem_hash: holds mapping from virtual address to virtual memory area
|
||||
|
@ -1662,6 +1697,7 @@ struct hl_vm_hw_block_list_node {
|
|||
* @npages: num physical pages in the pack.
|
||||
* @total_size: total size of all the pages in this list.
|
||||
* @mapping_cnt: number of shared mappings.
|
||||
* @exporting_cnt: number of dma-buf exporting.
|
||||
* @asid: the context related to this list.
|
||||
* @page_size: size of each page in the pack.
|
||||
* @flags: HL_MEM_* flags related to this list.
|
||||
|
@ -1676,6 +1712,7 @@ struct hl_vm_phys_pg_pack {
|
|||
u64 npages;
|
||||
u64 total_size;
|
||||
atomic_t mapping_cnt;
|
||||
u32 exporting_cnt;
|
||||
u32 asid;
|
||||
u32 page_size;
|
||||
u32 flags;
|
||||
|
@ -2396,6 +2433,7 @@ struct multi_cs_data {
|
|||
* the error will be ignored by the driver during
|
||||
* device initialization. Mainly used to debug and
|
||||
* workaround firmware bugs
|
||||
* @dram_pci_bar_start: start bus address of PCIe bar towards DRAM.
|
||||
* @last_successful_open_jif: timestamp (jiffies) of the last successful
|
||||
* device open.
|
||||
* @last_open_session_duration_jif: duration (jiffies) of the last device open
|
||||
|
@ -2440,8 +2478,12 @@ struct multi_cs_data {
|
|||
* @collective_mon_idx: helper index for collective initialization
|
||||
* @supports_coresight: is CoreSight supported.
|
||||
* @supports_soft_reset: is soft reset supported.
|
||||
* @allow_external_soft_reset: true if soft reset initiated by user or TDR is
|
||||
* allowed.
|
||||
* @allow_inference_soft_reset: true if the ASIC supports soft reset that is
|
||||
* initiated by user or TDR. This is only true
|
||||
* in inference ASICs, as there is no real-world
|
||||
* use-case of doing soft-reset in training (due
|
||||
* to the fact that training runs on multiple
|
||||
* devices)
|
||||
* @supports_cb_mapping: is mapping a CB to the device's MMU supported.
|
||||
* @needs_reset: true if reset_on_lockup is false and device should be reset
|
||||
* due to lockup.
|
||||
|
@ -2452,6 +2494,10 @@ struct multi_cs_data {
|
|||
* @supports_staged_submission: true if staged submissions are supported
|
||||
* @curr_reset_cause: saves an enumerated reset cause when a hard reset is
|
||||
* triggered, and cleared after it is shared with preboot.
|
||||
* @prev_reset_trigger: saves the previous trigger which caused a reset, overidden
|
||||
* with a new value on next reset
|
||||
* @reset_trigger_repeated: set if device reset is triggered more than once with
|
||||
* same cause.
|
||||
* @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to
|
||||
* complete instead.
|
||||
* @device_cpu_is_halted: Flag to indicate whether the device CPU was already
|
||||
|
@ -2537,6 +2583,7 @@ struct hl_device {
|
|||
u64 max_power;
|
||||
u64 clock_gating_mask;
|
||||
u64 boot_error_status_mask;
|
||||
u64 dram_pci_bar_start;
|
||||
u64 last_successful_open_jif;
|
||||
u64 last_open_session_duration_jif;
|
||||
u64 open_counter;
|
||||
|
@ -2572,13 +2619,15 @@ struct hl_device {
|
|||
u8 collective_mon_idx;
|
||||
u8 supports_coresight;
|
||||
u8 supports_soft_reset;
|
||||
u8 allow_external_soft_reset;
|
||||
u8 allow_inference_soft_reset;
|
||||
u8 supports_cb_mapping;
|
||||
u8 needs_reset;
|
||||
u8 process_kill_trial_cnt;
|
||||
u8 device_fini_pending;
|
||||
u8 supports_staged_submission;
|
||||
u8 curr_reset_cause;
|
||||
u8 prev_reset_trigger;
|
||||
u8 reset_trigger_repeated;
|
||||
u8 skip_reset_on_timeout;
|
||||
u8 device_cpu_is_halted;
|
||||
u8 supports_wait_for_multi_cs;
|
||||
|
@ -2956,6 +3005,15 @@ int hl_set_voltage(struct hl_device *hdev,
|
|||
int sensor_index, u32 attr, long value);
|
||||
int hl_set_current(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value);
|
||||
int hl_set_power(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value);
|
||||
int hl_get_power(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value);
|
||||
int hl_get_clk_rate(struct hl_device *hdev,
|
||||
u32 *cur_clk, u32 *max_clk);
|
||||
void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
|
||||
void hl_add_device_attr(struct hl_device *hdev,
|
||||
struct attribute_group *dev_attr_grp);
|
||||
void hw_sob_get(struct hl_hw_sob *hw_sob);
|
||||
void hw_sob_put(struct hl_hw_sob *hw_sob);
|
||||
void hl_encaps_handle_do_release(struct kref *ref);
|
||||
|
|
|
@ -225,6 +225,17 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
|
|||
if (!hpriv)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Prevent other routines from reading partial hpriv data by
|
||||
* initializing hpriv fields before inserting it to the list
|
||||
*/
|
||||
hpriv->hdev = hdev;
|
||||
filp->private_data = hpriv;
|
||||
hpriv->filp = filp;
|
||||
hpriv->is_control = true;
|
||||
nonseekable_open(inode, filp);
|
||||
|
||||
hpriv->taskpid = find_get_pid(current->pid);
|
||||
|
||||
mutex_lock(&hdev->fpriv_list_lock);
|
||||
|
||||
if (!hl_device_operational(hdev, NULL)) {
|
||||
|
@ -238,19 +249,15 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
|
|||
list_add(&hpriv->dev_node, &hdev->fpriv_list);
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
|
||||
hpriv->hdev = hdev;
|
||||
filp->private_data = hpriv;
|
||||
hpriv->filp = filp;
|
||||
hpriv->is_control = true;
|
||||
nonseekable_open(inode, filp);
|
||||
|
||||
hpriv->taskpid = find_get_pid(current->pid);
|
||||
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
mutex_unlock(&hdev->fpriv_list_lock);
|
||||
filp->private_data = NULL;
|
||||
put_pid(hpriv->taskpid);
|
||||
|
||||
kfree(hpriv);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -339,6 +346,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
|||
set_driver_behavior_per_device(hdev);
|
||||
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
|
||||
hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
|
||||
|
||||
if (timeout_locked)
|
||||
hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
|
||||
|
|
|
@ -1,29 +1,26 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2018 HabanaLabs, Ltd.
|
||||
* Copyright 2019-2021 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "gaudiP.h"
|
||||
#include "../include/gaudi/gaudi_fw_if.h"
|
||||
#include "habanalabs.h"
|
||||
|
||||
void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
|
||||
void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
|
||||
if (freq == PLL_LAST)
|
||||
hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value);
|
||||
hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
|
||||
hdev->asic_prop.max_freq_value);
|
||||
}
|
||||
|
||||
int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
|
||||
int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
|
||||
{
|
||||
long value;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false);
|
||||
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
|
||||
|
||||
if (value < 0) {
|
||||
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
|
||||
|
@ -33,7 +30,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
|
|||
|
||||
*max_clk = (value / 1000 / 1000);
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true);
|
||||
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
|
||||
|
||||
if (value < 0) {
|
||||
dev_err(hdev->dev,
|
||||
|
@ -51,15 +48,14 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev,
|
|||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
long value;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false);
|
||||
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
|
||||
|
||||
gaudi->max_freq_value = value;
|
||||
hdev->asic_prop.max_freq_value = value;
|
||||
|
||||
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
|
||||
}
|
||||
|
@ -68,7 +64,6 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
|
|||
struct device_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
int rc;
|
||||
u64 value;
|
||||
|
||||
|
@ -83,9 +78,10 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
|
|||
goto fail;
|
||||
}
|
||||
|
||||
gaudi->max_freq_value = value * 1000 * 1000;
|
||||
hdev->asic_prop.max_freq_value = value * 1000 * 1000;
|
||||
|
||||
hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value);
|
||||
hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
|
||||
hdev->asic_prop.max_freq_value);
|
||||
|
||||
fail:
|
||||
return count;
|
||||
|
@ -100,7 +96,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
|
|||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true);
|
||||
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
|
||||
|
||||
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
|
||||
}
|
||||
|
@ -108,14 +104,14 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
|
|||
static DEVICE_ATTR_RW(clk_max_freq_mhz);
|
||||
static DEVICE_ATTR_RO(clk_cur_freq_mhz);
|
||||
|
||||
static struct attribute *gaudi_dev_attrs[] = {
|
||||
static struct attribute *hl_dev_attrs[] = {
|
||||
&dev_attr_clk_max_freq_mhz.attr,
|
||||
&dev_attr_clk_cur_freq_mhz.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
void gaudi_add_device_attr(struct hl_device *hdev,
|
||||
void hl_add_device_attr(struct hl_device *hdev,
|
||||
struct attribute_group *dev_attr_grp)
|
||||
{
|
||||
dev_attr_grp->attrs = gaudi_dev_attrs;
|
||||
dev_attr_grp->attrs = hl_dev_attrs;
|
||||
}
|
|
@ -113,6 +113,9 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
|
|||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
int rc;
|
||||
u32 cpucp_attr;
|
||||
bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
|
||||
CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
@ -121,65 +124,134 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
|
|||
case hwmon_temp:
|
||||
switch (attr) {
|
||||
case hwmon_temp_input:
|
||||
cpucp_attr = cpucp_temp_input;
|
||||
break;
|
||||
case hwmon_temp_max:
|
||||
cpucp_attr = cpucp_temp_max;
|
||||
break;
|
||||
case hwmon_temp_crit:
|
||||
cpucp_attr = cpucp_temp_crit;
|
||||
break;
|
||||
case hwmon_temp_max_hyst:
|
||||
cpucp_attr = cpucp_temp_max_hyst;
|
||||
break;
|
||||
case hwmon_temp_crit_hyst:
|
||||
cpucp_attr = cpucp_temp_crit_hyst;
|
||||
break;
|
||||
case hwmon_temp_offset:
|
||||
cpucp_attr = cpucp_temp_offset;
|
||||
break;
|
||||
case hwmon_temp_highest:
|
||||
cpucp_attr = cpucp_temp_highest;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rc = hl_get_temperature(hdev, channel, attr, val);
|
||||
if (use_cpucp_enum)
|
||||
rc = hl_get_temperature(hdev, channel, cpucp_attr, val);
|
||||
else
|
||||
rc = hl_get_temperature(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_in:
|
||||
switch (attr) {
|
||||
case hwmon_in_input:
|
||||
cpucp_attr = cpucp_in_input;
|
||||
break;
|
||||
case hwmon_in_min:
|
||||
cpucp_attr = cpucp_in_min;
|
||||
break;
|
||||
case hwmon_in_max:
|
||||
cpucp_attr = cpucp_in_max;
|
||||
break;
|
||||
case hwmon_in_highest:
|
||||
cpucp_attr = cpucp_in_highest;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rc = hl_get_voltage(hdev, channel, attr, val);
|
||||
if (use_cpucp_enum)
|
||||
rc = hl_get_voltage(hdev, channel, cpucp_attr, val);
|
||||
else
|
||||
rc = hl_get_voltage(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_curr:
|
||||
switch (attr) {
|
||||
case hwmon_curr_input:
|
||||
cpucp_attr = cpucp_curr_input;
|
||||
break;
|
||||
case hwmon_curr_min:
|
||||
cpucp_attr = cpucp_curr_min;
|
||||
break;
|
||||
case hwmon_curr_max:
|
||||
cpucp_attr = cpucp_curr_max;
|
||||
break;
|
||||
case hwmon_curr_highest:
|
||||
cpucp_attr = cpucp_curr_highest;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rc = hl_get_current(hdev, channel, attr, val);
|
||||
if (use_cpucp_enum)
|
||||
rc = hl_get_current(hdev, channel, cpucp_attr, val);
|
||||
else
|
||||
rc = hl_get_current(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_fan:
|
||||
switch (attr) {
|
||||
case hwmon_fan_input:
|
||||
cpucp_attr = cpucp_fan_input;
|
||||
break;
|
||||
case hwmon_fan_min:
|
||||
cpucp_attr = cpucp_fan_min;
|
||||
break;
|
||||
case hwmon_fan_max:
|
||||
cpucp_attr = cpucp_fan_max;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
rc = hl_get_fan_speed(hdev, channel, attr, val);
|
||||
|
||||
if (use_cpucp_enum)
|
||||
rc = hl_get_fan_speed(hdev, channel, cpucp_attr, val);
|
||||
else
|
||||
rc = hl_get_fan_speed(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_pwm:
|
||||
switch (attr) {
|
||||
case hwmon_pwm_input:
|
||||
cpucp_attr = cpucp_pwm_input;
|
||||
break;
|
||||
case hwmon_pwm_enable:
|
||||
cpucp_attr = cpucp_pwm_enable;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
rc = hl_get_pwm_info(hdev, channel, attr, val);
|
||||
|
||||
if (use_cpucp_enum)
|
||||
rc = hl_get_pwm_info(hdev, channel, cpucp_attr, val);
|
||||
else
|
||||
rc = hl_get_pwm_info(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_power:
|
||||
switch (attr) {
|
||||
case hwmon_power_input:
|
||||
cpucp_attr = CPUCP_POWER_INPUT;
|
||||
break;
|
||||
case hwmon_power_input_highest:
|
||||
cpucp_attr = CPUCP_POWER_INPUT_HIGHEST;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (use_cpucp_enum)
|
||||
rc = hl_get_power(hdev, channel, cpucp_attr, val);
|
||||
else
|
||||
rc = hl_get_power(hdev, channel, attr, val);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
|
@ -191,6 +263,9 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
|
|||
u32 attr, int channel, long val)
|
||||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
u32 cpucp_attr;
|
||||
bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
|
||||
CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
@ -199,40 +274,78 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
|
|||
case hwmon_temp:
|
||||
switch (attr) {
|
||||
case hwmon_temp_offset:
|
||||
cpucp_attr = cpucp_temp_offset;
|
||||
break;
|
||||
case hwmon_temp_reset_history:
|
||||
cpucp_attr = cpucp_temp_reset_history;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
hl_set_temperature(hdev, channel, attr, val);
|
||||
|
||||
if (use_cpucp_enum)
|
||||
hl_set_temperature(hdev, channel, cpucp_attr, val);
|
||||
else
|
||||
hl_set_temperature(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_pwm:
|
||||
switch (attr) {
|
||||
case hwmon_pwm_input:
|
||||
cpucp_attr = cpucp_pwm_input;
|
||||
break;
|
||||
case hwmon_pwm_enable:
|
||||
cpucp_attr = cpucp_pwm_enable;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
hl_set_pwm_info(hdev, channel, attr, val);
|
||||
|
||||
if (use_cpucp_enum)
|
||||
hl_set_pwm_info(hdev, channel, cpucp_attr, val);
|
||||
else
|
||||
hl_set_pwm_info(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_in:
|
||||
switch (attr) {
|
||||
case hwmon_in_reset_history:
|
||||
cpucp_attr = cpucp_in_reset_history;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
hl_set_voltage(hdev, channel, attr, val);
|
||||
|
||||
if (use_cpucp_enum)
|
||||
hl_set_voltage(hdev, channel, cpucp_attr, val);
|
||||
else
|
||||
hl_set_voltage(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_curr:
|
||||
switch (attr) {
|
||||
case hwmon_curr_reset_history:
|
||||
cpucp_attr = cpucp_curr_reset_history;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
hl_set_current(hdev, channel, attr, val);
|
||||
|
||||
if (use_cpucp_enum)
|
||||
hl_set_current(hdev, channel, cpucp_attr, val);
|
||||
else
|
||||
hl_set_current(hdev, channel, attr, val);
|
||||
break;
|
||||
case hwmon_power:
|
||||
switch (attr) {
|
||||
case hwmon_power_reset_history:
|
||||
cpucp_attr = CPUCP_POWER_RESET_INPUT_HISTORY;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (use_cpucp_enum)
|
||||
hl_set_power(hdev, channel, cpucp_attr, val);
|
||||
else
|
||||
hl_set_power(hdev, channel, attr, val);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
|
@ -296,6 +409,15 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
|
|||
return 0644;
|
||||
}
|
||||
break;
|
||||
case hwmon_power:
|
||||
switch (attr) {
|
||||
case hwmon_power_input:
|
||||
case hwmon_power_input_highest:
|
||||
return 0444;
|
||||
case hwmon_power_reset_history:
|
||||
return 0200;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -551,6 +673,60 @@ int hl_set_current(struct hl_device *hdev,
|
|||
return rc;
|
||||
}
|
||||
|
||||
int hl_set_power(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long value)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
pkt.value = __cpu_to_le64(value);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
0, NULL);
|
||||
|
||||
if (rc)
|
||||
dev_err(hdev->dev,
|
||||
"Failed to set power of sensor %d, error %d\n",
|
||||
sensor_index, rc);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_get_power(struct hl_device *hdev,
|
||||
int sensor_index, u32 attr, long *value)
|
||||
{
|
||||
struct cpucp_packet pkt;
|
||||
u64 result;
|
||||
int rc;
|
||||
|
||||
memset(&pkt, 0, sizeof(pkt));
|
||||
|
||||
pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
|
||||
CPUCP_PKT_CTL_OPCODE_SHIFT);
|
||||
pkt.sensor_index = __cpu_to_le16(sensor_index);
|
||||
pkt.type = __cpu_to_le16(attr);
|
||||
|
||||
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
|
||||
0, &result);
|
||||
|
||||
*value = (long) result;
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to get power of sensor %d, error %d\n",
|
||||
sensor_index, rc);
|
||||
*value = 0;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int hl_hwmon_init(struct hl_device *hdev)
|
||||
{
|
||||
struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev;
|
||||
|
|
|
@ -141,10 +141,13 @@ static void handle_user_cq(struct hl_device *hdev,
|
|||
struct hl_user_interrupt *user_cq)
|
||||
{
|
||||
struct hl_user_pending_interrupt *pend;
|
||||
ktime_t now = ktime_get();
|
||||
|
||||
spin_lock(&user_cq->wait_list_lock);
|
||||
list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node)
|
||||
list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
|
||||
pend->fence.timestamp = now;
|
||||
complete_all(&pend->fence.completion);
|
||||
}
|
||||
spin_unlock(&user_cq->wait_list_lock);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright 2016-2019 HabanaLabs, Ltd.
|
||||
* Copyright 2016-2021 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*/
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pci-p2pdma.h>
|
||||
|
||||
#define HL_MMU_DEBUG 0
|
||||
|
||||
|
@ -347,6 +348,12 @@ static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (phys_pg_pack->exporting_cnt) {
|
||||
dev_dbg(hdev->dev, "handle %u is exported, cannot free\n", handle);
|
||||
spin_unlock(&vm->idr_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* must remove from idr before the freeing of the physical
|
||||
* pages as the refcount of the pool is also the trigger of the
|
||||
|
@ -1487,13 +1494,487 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
|
||||
struct device *dev, enum dma_data_direction dir)
|
||||
{
|
||||
dma_addr_t addr;
|
||||
int rc;
|
||||
|
||||
addr = dma_map_resource(dev, bar_address, chunk_size, dir,
|
||||
DMA_ATTR_SKIP_CPU_SYNC);
|
||||
rc = dma_mapping_error(dev, addr);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
sg_set_page(sg, NULL, chunk_size, 0);
|
||||
sg_dma_address(sg) = addr;
|
||||
sg_dma_len(sg) = chunk_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
|
||||
u64 page_size, struct device *dev,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
u64 chunk_size, bar_address, dma_max_seg_size;
|
||||
struct asic_fixed_properties *prop;
|
||||
int rc, i, j, nents, cur_page;
|
||||
struct scatterlist *sg;
|
||||
struct sg_table *sgt;
|
||||
|
||||
prop = &hdev->asic_prop;
|
||||
|
||||
dma_max_seg_size = dma_get_max_seg_size(dev);
|
||||
|
||||
/* We would like to align the max segment size to PAGE_SIZE, so the
|
||||
* SGL will contain aligned addresses that can be easily mapped to
|
||||
* an MMU
|
||||
*/
|
||||
dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
|
||||
if (dma_max_seg_size < PAGE_SIZE) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
|
||||
dma_max_seg_size);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
|
||||
if (!sgt)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/* If the size of each page is larger than the dma max segment size,
|
||||
* then we can't combine pages and the number of entries in the SGL
|
||||
* will just be the
|
||||
* <number of pages> * <chunks of max segment size in each page>
|
||||
*/
|
||||
if (page_size > dma_max_seg_size)
|
||||
nents = npages * DIV_ROUND_UP_ULL(page_size, dma_max_seg_size);
|
||||
else
|
||||
/* Get number of non-contiguous chunks */
|
||||
for (i = 1, nents = 1, chunk_size = page_size ; i < npages ; i++) {
|
||||
if (pages[i - 1] + page_size != pages[i] ||
|
||||
chunk_size + page_size > dma_max_seg_size) {
|
||||
nents++;
|
||||
chunk_size = page_size;
|
||||
continue;
|
||||
}
|
||||
|
||||
chunk_size += page_size;
|
||||
}
|
||||
|
||||
rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
|
||||
if (rc)
|
||||
goto error_free;
|
||||
|
||||
cur_page = 0;
|
||||
|
||||
if (page_size > dma_max_seg_size) {
|
||||
u64 size_left, cur_device_address = 0;
|
||||
|
||||
size_left = page_size;
|
||||
|
||||
/* Need to split each page into the number of chunks of
|
||||
* dma_max_seg_size
|
||||
*/
|
||||
for_each_sgtable_dma_sg(sgt, sg, i) {
|
||||
if (size_left == page_size)
|
||||
cur_device_address =
|
||||
pages[cur_page] - prop->dram_base_address;
|
||||
else
|
||||
cur_device_address += dma_max_seg_size;
|
||||
|
||||
chunk_size = min(size_left, dma_max_seg_size);
|
||||
|
||||
bar_address = hdev->dram_pci_bar_start + cur_device_address;
|
||||
|
||||
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
|
||||
if (rc)
|
||||
goto error_unmap;
|
||||
|
||||
if (size_left > dma_max_seg_size) {
|
||||
size_left -= dma_max_seg_size;
|
||||
} else {
|
||||
cur_page++;
|
||||
size_left = page_size;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Merge pages and put them into the scatterlist */
|
||||
for_each_sgtable_dma_sg(sgt, sg, i) {
|
||||
chunk_size = page_size;
|
||||
for (j = cur_page + 1 ; j < npages ; j++) {
|
||||
if (pages[j - 1] + page_size != pages[j] ||
|
||||
chunk_size + page_size > dma_max_seg_size)
|
||||
break;
|
||||
|
||||
chunk_size += page_size;
|
||||
}
|
||||
|
||||
bar_address = hdev->dram_pci_bar_start +
|
||||
(pages[cur_page] - prop->dram_base_address);
|
||||
|
||||
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
|
||||
if (rc)
|
||||
goto error_unmap;
|
||||
|
||||
cur_page = j;
|
||||
}
|
||||
}
|
||||
|
||||
/* Because we are not going to include a CPU list we want to have some
|
||||
* chance that other users will detect this by setting the orig_nents
|
||||
* to 0 and using only nents (length of DMA list) when going over the
|
||||
* sgl
|
||||
*/
|
||||
sgt->orig_nents = 0;
|
||||
|
||||
return sgt;
|
||||
|
||||
error_unmap:
|
||||
for_each_sgtable_dma_sg(sgt, sg, i) {
|
||||
if (!sg_dma_len(sg))
|
||||
continue;
|
||||
|
||||
dma_unmap_resource(dev, sg_dma_address(sg),
|
||||
sg_dma_len(sg), dir,
|
||||
DMA_ATTR_SKIP_CPU_SYNC);
|
||||
}
|
||||
|
||||
sg_free_table(sgt);
|
||||
|
||||
error_free:
|
||||
kfree(sgt);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
|
||||
static int hl_dmabuf_attach(struct dma_buf *dmabuf,
|
||||
struct dma_buf_attachment *attachment)
|
||||
{
|
||||
struct hl_dmabuf_priv *hl_dmabuf;
|
||||
struct hl_device *hdev;
|
||||
int rc;
|
||||
|
||||
hl_dmabuf = dmabuf->priv;
|
||||
hdev = hl_dmabuf->ctx->hdev;
|
||||
|
||||
rc = pci_p2pdma_distance_many(hdev->pdev, &attachment->dev, 1, true);
|
||||
|
||||
if (rc < 0)
|
||||
attachment->peer2peer = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct dma_buf *dma_buf = attachment->dmabuf;
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
struct hl_dmabuf_priv *hl_dmabuf;
|
||||
struct hl_device *hdev;
|
||||
struct sg_table *sgt;
|
||||
|
||||
hl_dmabuf = dma_buf->priv;
|
||||
hdev = hl_dmabuf->ctx->hdev;
|
||||
phys_pg_pack = hl_dmabuf->phys_pg_pack;
|
||||
|
||||
if (!attachment->peer2peer) {
|
||||
dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
|
||||
return ERR_PTR(-EPERM);
|
||||
}
|
||||
|
||||
if (phys_pg_pack)
|
||||
sgt = alloc_sgt_from_device_pages(hdev,
|
||||
phys_pg_pack->pages,
|
||||
phys_pg_pack->npages,
|
||||
phys_pg_pack->page_size,
|
||||
attachment->dev,
|
||||
dir);
|
||||
else
|
||||
sgt = alloc_sgt_from_device_pages(hdev,
|
||||
&hl_dmabuf->device_address,
|
||||
1,
|
||||
hl_dmabuf->dmabuf->size,
|
||||
attachment->dev,
|
||||
dir);
|
||||
|
||||
if (IS_ERR(sgt))
|
||||
dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
|
||||
|
||||
return sgt;
|
||||
}
|
||||
|
||||
static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment,
|
||||
struct sg_table *sgt,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
/* The memory behind the dma-buf has *always* resided on the device itself, i.e. it lives
|
||||
* only in the 'device' domain (after all, it maps a PCI bar address which points to the
|
||||
* device memory).
|
||||
*
|
||||
* Therefore, it was never in the 'CPU' domain and hence, there is no need to perform
|
||||
* a sync of the memory to the CPU's cache, as it never resided inside that cache.
|
||||
*/
|
||||
for_each_sgtable_dma_sg(sgt, sg, i)
|
||||
dma_unmap_resource(attachment->dev, sg_dma_address(sg),
|
||||
sg_dma_len(sg), dir,
|
||||
DMA_ATTR_SKIP_CPU_SYNC);
|
||||
|
||||
/* Need to restore orig_nents because sg_free_table use that field */
|
||||
sgt->orig_nents = sgt->nents;
|
||||
sg_free_table(sgt);
|
||||
kfree(sgt);
|
||||
}
|
||||
|
||||
static void hl_release_dmabuf(struct dma_buf *dmabuf)
|
||||
{
|
||||
struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv;
|
||||
struct hl_ctx *ctx = hl_dmabuf->ctx;
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct hl_vm *vm = &hdev->vm;
|
||||
|
||||
if (hl_dmabuf->phys_pg_pack) {
|
||||
spin_lock(&vm->idr_lock);
|
||||
hl_dmabuf->phys_pg_pack->exporting_cnt--;
|
||||
spin_unlock(&vm->idr_lock);
|
||||
}
|
||||
|
||||
hl_ctx_put(hl_dmabuf->ctx);
|
||||
|
||||
kfree(hl_dmabuf);
|
||||
}
|
||||
|
||||
static const struct dma_buf_ops habanalabs_dmabuf_ops = {
|
||||
.attach = hl_dmabuf_attach,
|
||||
.map_dma_buf = hl_map_dmabuf,
|
||||
.unmap_dma_buf = hl_unmap_dmabuf,
|
||||
.release = hl_release_dmabuf,
|
||||
};
|
||||
|
||||
static int export_dmabuf_common(struct hl_ctx *ctx,
|
||||
struct hl_dmabuf_priv *hl_dmabuf,
|
||||
u64 total_size, int flags, int *dmabuf_fd)
|
||||
{
|
||||
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
int rc, fd;
|
||||
|
||||
exp_info.ops = &habanalabs_dmabuf_ops;
|
||||
exp_info.size = total_size;
|
||||
exp_info.flags = flags;
|
||||
exp_info.priv = hl_dmabuf;
|
||||
|
||||
hl_dmabuf->dmabuf = dma_buf_export(&exp_info);
|
||||
if (IS_ERR(hl_dmabuf->dmabuf)) {
|
||||
dev_err(hdev->dev, "failed to export dma-buf\n");
|
||||
return PTR_ERR(hl_dmabuf->dmabuf);
|
||||
}
|
||||
|
||||
fd = dma_buf_fd(hl_dmabuf->dmabuf, flags);
|
||||
if (fd < 0) {
|
||||
dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf\n");
|
||||
rc = fd;
|
||||
goto err_dma_buf_put;
|
||||
}
|
||||
|
||||
hl_dmabuf->ctx = ctx;
|
||||
hl_ctx_get(hdev, hl_dmabuf->ctx);
|
||||
|
||||
*dmabuf_fd = fd;
|
||||
|
||||
return 0;
|
||||
|
||||
err_dma_buf_put:
|
||||
dma_buf_put(hl_dmabuf->dmabuf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* export_dmabuf_from_addr() - export a dma-buf object for the given memory
|
||||
* address and size.
|
||||
* @ctx: pointer to the context structure.
|
||||
* @device_addr: device memory physical address.
|
||||
* @size: size of device memory.
|
||||
* @flags: DMA-BUF file/FD flags.
|
||||
* @dmabuf_fd: pointer to result FD that represents the dma-buf object.
|
||||
*
|
||||
* Create and export a dma-buf object for an existing memory allocation inside
|
||||
* the device memory, and return a FD which is associated with the dma-buf
|
||||
* object.
|
||||
*
|
||||
* Return: 0 on success, non-zero for failure.
|
||||
*/
|
||||
static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 device_addr,
|
||||
u64 size, int flags, int *dmabuf_fd)
|
||||
{
|
||||
struct hl_dmabuf_priv *hl_dmabuf;
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop;
|
||||
u64 bar_address;
|
||||
int rc;
|
||||
|
||||
prop = &hdev->asic_prop;
|
||||
|
||||
if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
|
||||
dev_dbg(hdev->dev,
|
||||
"exported device memory address 0x%llx should be aligned to 0x%lx\n",
|
||||
device_addr, PAGE_SIZE);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (size < PAGE_SIZE) {
|
||||
dev_dbg(hdev->dev,
|
||||
"exported device memory size %llu should be equal to or greater than %lu\n",
|
||||
size, PAGE_SIZE);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (device_addr < prop->dram_user_base_address ||
|
||||
device_addr + size > prop->dram_end_address ||
|
||||
device_addr + size < device_addr) {
|
||||
dev_dbg(hdev->dev,
|
||||
"DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
|
||||
device_addr, size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bar_address = hdev->dram_pci_bar_start +
|
||||
(device_addr - prop->dram_base_address);
|
||||
|
||||
if (bar_address + size >
|
||||
hdev->dram_pci_bar_start + prop->dram_pci_bar_size ||
|
||||
bar_address + size < bar_address) {
|
||||
dev_dbg(hdev->dev,
|
||||
"DRAM memory range 0x%llx (+0x%llx) is outside of PCI BAR boundaries\n",
|
||||
device_addr, size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
|
||||
if (!hl_dmabuf)
|
||||
return -ENOMEM;
|
||||
|
||||
hl_dmabuf->device_address = device_addr;
|
||||
|
||||
rc = export_dmabuf_common(ctx, hl_dmabuf, size, flags, dmabuf_fd);
|
||||
if (rc)
|
||||
goto err_free_dmabuf_wrapper;
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_dmabuf_wrapper:
|
||||
kfree(hl_dmabuf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* export_dmabuf_from_handle() - export a dma-buf object for the given memory
|
||||
* handle.
|
||||
* @ctx: pointer to the context structure.
|
||||
* @handle: device memory allocation handle.
|
||||
* @flags: DMA-BUF file/FD flags.
|
||||
* @dmabuf_fd: pointer to result FD that represents the dma-buf object.
|
||||
*
|
||||
* Create and export a dma-buf object for an existing memory allocation inside
|
||||
* the device memory, and return a FD which is associated with the dma-buf
|
||||
* object.
|
||||
*
|
||||
* Return: 0 on success, non-zero for failure.
|
||||
*/
|
||||
static int export_dmabuf_from_handle(struct hl_ctx *ctx, u64 handle, int flags,
|
||||
int *dmabuf_fd)
|
||||
{
|
||||
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
||||
struct hl_dmabuf_priv *hl_dmabuf;
|
||||
struct hl_device *hdev = ctx->hdev;
|
||||
struct asic_fixed_properties *prop;
|
||||
struct hl_vm *vm = &hdev->vm;
|
||||
u64 bar_address;
|
||||
int rc, i;
|
||||
|
||||
prop = &hdev->asic_prop;
|
||||
|
||||
if (upper_32_bits(handle)) {
|
||||
dev_dbg(hdev->dev, "no match for handle 0x%llx\n", handle);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
spin_lock(&vm->idr_lock);
|
||||
|
||||
phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, (u32) handle);
|
||||
if (!phys_pg_pack) {
|
||||
spin_unlock(&vm->idr_lock);
|
||||
dev_dbg(hdev->dev, "no match for handle 0x%x\n", (u32) handle);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* increment now to avoid freeing device memory while exporting */
|
||||
phys_pg_pack->exporting_cnt++;
|
||||
|
||||
spin_unlock(&vm->idr_lock);
|
||||
|
||||
if (phys_pg_pack->vm_type != VM_TYPE_PHYS_PACK) {
|
||||
dev_dbg(hdev->dev, "handle 0x%llx does not represent DRAM memory\n", handle);
|
||||
rc = -EINVAL;
|
||||
goto err_dec_exporting_cnt;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
|
||||
|
||||
bar_address = hdev->dram_pci_bar_start +
|
||||
(phys_pg_pack->pages[i] -
|
||||
prop->dram_base_address);
|
||||
|
||||
if (bar_address + phys_pg_pack->page_size >
|
||||
hdev->dram_pci_bar_start + prop->dram_pci_bar_size ||
|
||||
bar_address + phys_pg_pack->page_size < bar_address) {
|
||||
|
||||
dev_dbg(hdev->dev,
|
||||
"DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
|
||||
phys_pg_pack->pages[i],
|
||||
phys_pg_pack->page_size);
|
||||
|
||||
rc = -EINVAL;
|
||||
goto err_dec_exporting_cnt;
|
||||
}
|
||||
}
|
||||
|
||||
hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
|
||||
if (!hl_dmabuf) {
|
||||
rc = -ENOMEM;
|
||||
goto err_dec_exporting_cnt;
|
||||
}
|
||||
|
||||
hl_dmabuf->phys_pg_pack = phys_pg_pack;
|
||||
|
||||
rc = export_dmabuf_common(ctx, hl_dmabuf, phys_pg_pack->total_size,
|
||||
flags, dmabuf_fd);
|
||||
if (rc)
|
||||
goto err_free_dmabuf_wrapper;
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_dmabuf_wrapper:
|
||||
kfree(hl_dmabuf);
|
||||
|
||||
err_dec_exporting_cnt:
|
||||
spin_lock(&vm->idr_lock);
|
||||
phys_pg_pack->exporting_cnt--;
|
||||
spin_unlock(&vm->idr_lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
|
||||
{
|
||||
struct hl_device *hdev = hpriv->hdev;
|
||||
struct hl_ctx *ctx = hpriv->ctx;
|
||||
u64 block_handle, device_addr = 0;
|
||||
u32 handle = 0, block_size;
|
||||
int rc;
|
||||
int rc, dmabuf_fd = -EBADF;
|
||||
|
||||
switch (args->in.op) {
|
||||
case HL_MEM_OP_ALLOC:
|
||||
|
@ -1542,6 +2023,16 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
|
|||
args->out.block_size = block_size;
|
||||
break;
|
||||
|
||||
case HL_MEM_OP_EXPORT_DMABUF_FD:
|
||||
rc = export_dmabuf_from_addr(ctx,
|
||||
args->in.export_dmabuf_fd.handle,
|
||||
args->in.export_dmabuf_fd.mem_size,
|
||||
args->in.flags,
|
||||
&dmabuf_fd);
|
||||
memset(args, 0, sizeof(*args));
|
||||
args->out.fd = dmabuf_fd;
|
||||
break;
|
||||
|
||||
default:
|
||||
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
|
||||
rc = -ENOTTY;
|
||||
|
@ -1560,7 +2051,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||
struct hl_ctx *ctx = hpriv->ctx;
|
||||
u64 block_handle, device_addr = 0;
|
||||
u32 handle = 0, block_size;
|
||||
int rc;
|
||||
int rc, dmabuf_fd = -EBADF;
|
||||
|
||||
if (!hl_device_operational(hdev, &status)) {
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
|
@ -1651,6 +2142,22 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||
args->out.block_size = block_size;
|
||||
break;
|
||||
|
||||
case HL_MEM_OP_EXPORT_DMABUF_FD:
|
||||
if (hdev->asic_prop.dram_supports_virtual_memory)
|
||||
rc = export_dmabuf_from_handle(ctx,
|
||||
args->in.export_dmabuf_fd.handle,
|
||||
args->in.flags,
|
||||
&dmabuf_fd);
|
||||
else
|
||||
rc = export_dmabuf_from_addr(ctx,
|
||||
args->in.export_dmabuf_fd.handle,
|
||||
args->in.export_dmabuf_fd.mem_size,
|
||||
args->in.flags,
|
||||
&dmabuf_fd);
|
||||
memset(args, 0, sizeof(*args));
|
||||
args->out.fd = dmabuf_fd;
|
||||
break;
|
||||
|
||||
default:
|
||||
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
|
||||
rc = -ENOTTY;
|
||||
|
|
|
@ -501,23 +501,25 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
|
|||
|
||||
if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) &&
|
||||
!is_power_of_2(prop->dram_page_size)) {
|
||||
unsigned long dram_page_size = prop->dram_page_size;
|
||||
u64 page_offset_mask;
|
||||
u64 phys_addr_mask;
|
||||
u32 bit;
|
||||
u64 dram_page_size, dram_base, abs_phys_addr, abs_virt_addr,
|
||||
page_id, page_start;
|
||||
u32 page_off;
|
||||
|
||||
/*
|
||||
* find last set bit in page_size to cover all bits of page
|
||||
* offset. note that 1 has to be added to bit index.
|
||||
* note that the internal ulong variable is used to avoid
|
||||
* alignment issue.
|
||||
* Bit arithmetics cannot be used for non power of two page
|
||||
* sizes. In addition, since bit arithmetics is not used,
|
||||
* we cannot ignore dram base. All that shall be considerd.
|
||||
*/
|
||||
bit = find_last_bit(&dram_page_size,
|
||||
sizeof(dram_page_size) * BITS_PER_BYTE) + 1;
|
||||
page_offset_mask = (BIT_ULL(bit) - 1);
|
||||
phys_addr_mask = ~page_offset_mask;
|
||||
*phys_addr = (tmp_phys_addr & phys_addr_mask) |
|
||||
(virt_addr & page_offset_mask);
|
||||
|
||||
dram_page_size = prop->dram_page_size;
|
||||
dram_base = prop->dram_base_address;
|
||||
abs_phys_addr = tmp_phys_addr - dram_base;
|
||||
abs_virt_addr = virt_addr - dram_base;
|
||||
page_id = DIV_ROUND_DOWN_ULL(abs_phys_addr, dram_page_size);
|
||||
page_start = page_id * dram_page_size;
|
||||
div_u64_rem(abs_virt_addr, dram_page_size, &page_off);
|
||||
|
||||
*phys_addr = page_start + page_off + dram_base;
|
||||
} else {
|
||||
/*
|
||||
* find the correct hop shift field in hl_mmu_properties
|
||||
|
|
|
@ -206,12 +206,12 @@ static ssize_t soft_reset_store(struct device *dev,
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (!hdev->allow_external_soft_reset) {
|
||||
dev_err(hdev->dev, "Device does not support soft-reset\n");
|
||||
if (!hdev->allow_inference_soft_reset) {
|
||||
dev_err(hdev->dev, "Device does not support inference soft-reset\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n");
|
||||
dev_warn(hdev->dev, "Inference Soft-Reset requested through sysfs\n");
|
||||
|
||||
hl_device_reset(hdev, 0);
|
||||
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \
|
||||
HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_security.o \
|
||||
gaudi/gaudi_coresight.o
|
||||
|
|
|
@ -661,6 +661,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
|
|||
|
||||
prop->server_type = HL_SERVER_TYPE_UNKNOWN;
|
||||
|
||||
prop->clk_pll_index = HL_GAUDI_MME_PLL;
|
||||
prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -795,6 +798,7 @@ static int gaudi_early_init(struct hl_device *hdev)
|
|||
}
|
||||
|
||||
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
|
||||
hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
|
||||
|
||||
/* If FW security is enabled at this point it means no access to ELBI */
|
||||
if (hdev->asic_prop.fw_security_enabled) {
|
||||
|
@ -1837,8 +1841,6 @@ static int gaudi_sw_init(struct hl_device *hdev)
|
|||
|
||||
gaudi->cpucp_info_get = gaudi_cpucp_info_get;
|
||||
|
||||
gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
|
||||
|
||||
hdev->asic_specific = gaudi;
|
||||
|
||||
/* Create DMA pool for small allocations */
|
||||
|
@ -2616,7 +2618,7 @@ static void gaudi_init_e2e(struct hl_device *hdev)
|
|||
|
||||
static void gaudi_init_hbm_cred(struct hl_device *hdev)
|
||||
{
|
||||
uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
|
||||
u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
|
||||
|
||||
if (hdev->asic_prop.fw_security_enabled)
|
||||
return;
|
||||
|
@ -7932,6 +7934,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
|||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
|
||||
u32 fw_fatal_err_flag = 0;
|
||||
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
|
||||
>> EQ_CTL_EVENT_TYPE_SHIFT);
|
||||
bool reset_required;
|
||||
|
@ -7972,6 +7975,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
|||
case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
|
||||
gaudi_print_irq_info(hdev, event_type, true);
|
||||
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
|
||||
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
|
||||
goto reset_device;
|
||||
|
||||
case GAUDI_EVENT_GIC500:
|
||||
|
@ -7979,6 +7983,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
|||
case GAUDI_EVENT_L2_RAM_ECC:
|
||||
case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
|
||||
gaudi_print_irq_info(hdev, event_type, false);
|
||||
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
|
||||
goto reset_device;
|
||||
|
||||
case GAUDI_EVENT_HBM0_SPI_0:
|
||||
|
@ -7989,6 +7994,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
|||
gaudi_hbm_read_interrupts(hdev,
|
||||
gaudi_hbm_event_to_dev(event_type),
|
||||
&eq_entry->hbm_ecc_data);
|
||||
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
|
||||
goto reset_device;
|
||||
|
||||
case GAUDI_EVENT_HBM0_SPI_1:
|
||||
|
@ -8171,9 +8177,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
|||
|
||||
reset_device:
|
||||
if (hdev->asic_prop.fw_security_enabled)
|
||||
hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
|
||||
hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag);
|
||||
else if (hdev->hard_reset_on_fw_events)
|
||||
hl_device_reset(hdev, HL_RESET_HARD);
|
||||
hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag);
|
||||
else
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
}
|
||||
|
@ -9439,9 +9445,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
|||
.debugfs_read64 = gaudi_debugfs_read64,
|
||||
.debugfs_write64 = gaudi_debugfs_write64,
|
||||
.debugfs_read_dma = gaudi_debugfs_read_dma,
|
||||
.add_device_attr = gaudi_add_device_attr,
|
||||
.add_device_attr = hl_add_device_attr,
|
||||
.handle_eqe = gaudi_handle_eqe,
|
||||
.set_pll_profile = gaudi_set_pll_profile,
|
||||
.set_pll_profile = hl_set_pll_profile,
|
||||
.get_events_stat = gaudi_get_events_stat,
|
||||
.read_pte = gaudi_read_pte,
|
||||
.write_pte = gaudi_write_pte,
|
||||
|
@ -9465,7 +9471,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
|
|||
.halt_coresight = gaudi_halt_coresight,
|
||||
.ctx_init = gaudi_ctx_init,
|
||||
.ctx_fini = gaudi_ctx_fini,
|
||||
.get_clk_rate = gaudi_get_clk_rate,
|
||||
.get_clk_rate = hl_get_clk_rate,
|
||||
.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
|
||||
.load_firmware_to_device = gaudi_load_firmware_to_device,
|
||||
.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
|
||||
|
|
|
@ -319,7 +319,6 @@ struct gaudi_internal_qman_info {
|
|||
* the actual number of internal queues because they are not in
|
||||
* consecutive order.
|
||||
* @hbm_bar_cur_addr: current address of HBM PCI bar.
|
||||
* @max_freq_value: current max clk frequency.
|
||||
* @events: array that holds all event id's
|
||||
* @events_stat: array that holds histogram of all received events.
|
||||
* @events_stat_aggregate: same as events_stat but doesn't get cleared on reset
|
||||
|
@ -345,7 +344,6 @@ struct gaudi_device {
|
|||
struct gaudi_collective_properties collective_props;
|
||||
|
||||
u64 hbm_bar_cur_addr;
|
||||
u64 max_freq_value;
|
||||
|
||||
u32 events[GAUDI_EVENT_SIZE];
|
||||
u32 events_stat[GAUDI_EVENT_SIZE];
|
||||
|
@ -359,10 +357,8 @@ void gaudi_init_security(struct hl_device *hdev);
|
|||
void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
|
||||
void gaudi_add_device_attr(struct hl_device *hdev,
|
||||
struct attribute_group *dev_attr_grp);
|
||||
void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
|
||||
int gaudi_debug_coresight(struct hl_device *hdev, void *data);
|
||||
void gaudi_halt_coresight(struct hl_device *hdev);
|
||||
int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
|
||||
void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid);
|
||||
|
||||
#endif /* GAUDIP_H_ */
|
||||
|
|
|
@ -471,6 +471,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
|
|||
|
||||
prop->server_type = HL_SERVER_TYPE_UNKNOWN;
|
||||
|
||||
prop->clk_pll_index = HL_GOYA_MME_PLL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -622,6 +624,7 @@ static int goya_early_init(struct hl_device *hdev)
|
|||
}
|
||||
|
||||
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
|
||||
hdev->dram_pci_bar_start = pci_resource_start(pdev, DDR_BAR_ID);
|
||||
|
||||
/* If FW security is enabled at this point it means no access to ELBI */
|
||||
if (hdev->asic_prop.fw_security_enabled) {
|
||||
|
@ -959,7 +962,7 @@ static int goya_sw_init(struct hl_device *hdev)
|
|||
spin_lock_init(&goya->hw_queues_lock);
|
||||
hdev->supports_coresight = true;
|
||||
hdev->supports_soft_reset = true;
|
||||
hdev->allow_external_soft_reset = true;
|
||||
hdev->allow_inference_soft_reset = true;
|
||||
hdev->supports_wait_for_multi_cs = false;
|
||||
|
||||
hdev->asic_funcs->set_pci_memory_regions(hdev);
|
||||
|
@ -4829,6 +4832,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
|||
case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
|
||||
case GOYA_ASYNC_EVENT_ID_AXI_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
|
||||
goya_print_irq_info(hdev, event_type, false);
|
||||
if (hdev->hard_reset_on_fw_events)
|
||||
hl_device_reset(hdev, (HL_RESET_HARD |
|
||||
HL_RESET_FW_FATAL_ERR));
|
||||
break;
|
||||
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
|
||||
goya_print_irq_info(hdev, event_type, false);
|
||||
if (hdev->hard_reset_on_fw_events)
|
||||
|
@ -5649,7 +5658,7 @@ static const struct hl_asic_funcs goya_funcs = {
|
|||
.halt_coresight = goya_halt_coresight,
|
||||
.ctx_init = goya_ctx_init,
|
||||
.ctx_fini = goya_ctx_fini,
|
||||
.get_clk_rate = goya_get_clk_rate,
|
||||
.get_clk_rate = hl_get_clk_rate,
|
||||
.get_queue_id_for_cq = goya_get_queue_id_for_cq,
|
||||
.load_firmware_to_device = goya_load_firmware_to_device,
|
||||
.load_boot_fit_to_device = goya_load_boot_fit_to_device,
|
||||
|
|
|
@ -235,7 +235,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
|
|||
void *vaddr);
|
||||
void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);
|
||||
|
||||
int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
|
||||
u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx);
|
||||
u64 goya_get_device_time(struct hl_device *hdev);
|
||||
|
||||
|
|
|
@ -32,37 +32,6 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
|
|||
}
|
||||
}
|
||||
|
||||
int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
|
||||
{
|
||||
long value;
|
||||
|
||||
if (!hl_device_operational(hdev, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
|
||||
|
||||
if (value < 0) {
|
||||
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
|
||||
value);
|
||||
return value;
|
||||
}
|
||||
|
||||
*max_clk = (value / 1000 / 1000);
|
||||
|
||||
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
|
||||
|
||||
if (value < 0) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to retrieve device current clock %ld\n",
|
||||
value);
|
||||
return value;
|
||||
}
|
||||
|
||||
*cur_clk = (value / 1000 / 1000);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
|
|
|
@ -542,11 +542,14 @@ enum cpucp_packet_rc {
|
|||
*/
|
||||
enum cpucp_temp_type {
|
||||
cpucp_temp_input,
|
||||
cpucp_temp_min = 4,
|
||||
cpucp_temp_min_hyst,
|
||||
cpucp_temp_max = 6,
|
||||
cpucp_temp_max_hyst,
|
||||
cpucp_temp_crit,
|
||||
cpucp_temp_crit_hyst,
|
||||
cpucp_temp_offset = 19,
|
||||
cpucp_temp_lowest = 21,
|
||||
cpucp_temp_highest = 22,
|
||||
cpucp_temp_reset_history = 23
|
||||
};
|
||||
|
@ -555,6 +558,7 @@ enum cpucp_in_attributes {
|
|||
cpucp_in_input,
|
||||
cpucp_in_min,
|
||||
cpucp_in_max,
|
||||
cpucp_in_lowest = 6,
|
||||
cpucp_in_highest = 7,
|
||||
cpucp_in_reset_history
|
||||
};
|
||||
|
@ -563,6 +567,7 @@ enum cpucp_curr_attributes {
|
|||
cpucp_curr_input,
|
||||
cpucp_curr_min,
|
||||
cpucp_curr_max,
|
||||
cpucp_curr_lowest = 6,
|
||||
cpucp_curr_highest = 7,
|
||||
cpucp_curr_reset_history
|
||||
};
|
||||
|
@ -598,6 +603,16 @@ enum cpucp_pll_type_attributes {
|
|||
cpucp_pll_pci,
|
||||
};
|
||||
|
||||
/*
|
||||
* cpucp_power_type aligns with hwmon_power_attributes
|
||||
* defined in Linux kernel hwmon.h file
|
||||
*/
|
||||
enum cpucp_power_type {
|
||||
CPUCP_POWER_INPUT = 8,
|
||||
CPUCP_POWER_INPUT_HIGHEST = 9,
|
||||
CPUCP_POWER_RESET_INPUT_HISTORY = 11
|
||||
};
|
||||
|
||||
/*
|
||||
* MSI type enumeration table for all ASICs and future SW versions.
|
||||
* For future ASIC-LKD compatibility, we can only add new enumerations.
|
||||
|
@ -731,6 +746,9 @@ struct cpucp_security_info {
|
|||
* @pll_map: Bit map of supported PLLs for current ASIC version.
|
||||
* @mme_binning_mask: MME binning mask,
|
||||
* (0 = functional, 1 = binned)
|
||||
* @dram_binning_mask: DRAM binning mask, 1 bit per dram instance
|
||||
* (0 = functional 1 = binned)
|
||||
* @memory_repair_flag: eFuse flag indicating memory repair
|
||||
*/
|
||||
struct cpucp_info {
|
||||
struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
|
||||
|
@ -749,7 +767,9 @@ struct cpucp_info {
|
|||
__le64 reserved3;
|
||||
__le64 reserved4;
|
||||
__u8 reserved5;
|
||||
__u8 pad[7];
|
||||
__u8 dram_binning_mask;
|
||||
__u8 memory_repair_flag;
|
||||
__u8 pad[5];
|
||||
struct cpucp_security_info sec_info;
|
||||
__le32 reserved6;
|
||||
__u8 pll_map[PLL_MAP_LEN];
|
||||
|
|
|
@ -15,6 +15,28 @@
|
|||
|
||||
#define VERSION_MAX_LEN 128
|
||||
|
||||
enum cpu_boot_err {
|
||||
CPU_BOOT_ERR_DRAM_INIT_FAIL = 0,
|
||||
CPU_BOOT_ERR_FIT_CORRUPTED = 1,
|
||||
CPU_BOOT_ERR_TS_INIT_FAIL = 2,
|
||||
CPU_BOOT_ERR_DRAM_SKIPPED = 3,
|
||||
CPU_BOOT_ERR_BMC_WAIT_SKIPPED = 4,
|
||||
CPU_BOOT_ERR_NIC_DATA_NOT_RDY = 5,
|
||||
CPU_BOOT_ERR_NIC_FW_FAIL = 6,
|
||||
CPU_BOOT_ERR_SECURITY_NOT_RDY = 7,
|
||||
CPU_BOOT_ERR_SECURITY_FAIL = 8,
|
||||
CPU_BOOT_ERR_EFUSE_FAIL = 9,
|
||||
CPU_BOOT_ERR_PRI_IMG_VER_FAIL = 10,
|
||||
CPU_BOOT_ERR_SEC_IMG_VER_FAIL = 11,
|
||||
CPU_BOOT_ERR_PLL_FAIL = 12,
|
||||
CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13,
|
||||
CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18,
|
||||
CPU_BOOT_ERR_BINNING_FAIL = 19,
|
||||
CPU_BOOT_ERR_ENABLED = 31,
|
||||
CPU_BOOT_ERR_SCND_EN = 63,
|
||||
CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
|
||||
};
|
||||
|
||||
/*
|
||||
* CPU error bits in BOOT_ERROR registers
|
||||
*
|
||||
|
@ -78,25 +100,13 @@
|
|||
* CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support
|
||||
* should be contacted.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD HALT ACK from ARC0 is not received
|
||||
* within specified retries after issuing
|
||||
* HALT request. ARC0 appears to be in bad
|
||||
* reset.
|
||||
* CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR Critical error was detected during
|
||||
* the execution of ppboot or preboot.
|
||||
* for example: stack overflow.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD HALT ACK from ARC1 is not received
|
||||
* within specified retries after issuing
|
||||
* HALT request. ARC1 appears to be in bad
|
||||
* reset.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD RUN ACK from ARC0 is not received
|
||||
* within specified timeout after issuing
|
||||
* RUN request. ARC0 appears to be in bad
|
||||
* reset.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD RUN ACK from ARC1 is not received
|
||||
* within specified timeout after issuing
|
||||
* RUN request. ARC1 appears to be in bad
|
||||
* reset.
|
||||
* CPU_BOOT_ERR0_BINNING_FAIL Binning settings failed, meaning
|
||||
* malfunctioning components might still be
|
||||
* in use.
|
||||
*
|
||||
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
|
||||
* This is a main indication that the
|
||||
|
@ -104,26 +114,57 @@
|
|||
* registers. Meaning the error bits are
|
||||
* not garbage, but actual error statuses.
|
||||
*/
|
||||
#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << 0)
|
||||
#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << 1)
|
||||
#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << 2)
|
||||
#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << 3)
|
||||
#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4)
|
||||
#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5)
|
||||
#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6)
|
||||
#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << 7)
|
||||
#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << 8)
|
||||
#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << 9)
|
||||
#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << 10)
|
||||
#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << 11)
|
||||
#define CPU_BOOT_ERR0_PLL_FAIL (1 << 12)
|
||||
#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << 13)
|
||||
#define CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD (1 << 14)
|
||||
#define CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD (1 << 15)
|
||||
#define CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD (1 << 16)
|
||||
#define CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD (1 << 17)
|
||||
#define CPU_BOOT_ERR0_ENABLED (1 << 31)
|
||||
#define CPU_BOOT_ERR1_ENABLED (1 << 31)
|
||||
#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << CPU_BOOT_ERR_DRAM_INIT_FAIL)
|
||||
#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << CPU_BOOT_ERR_FIT_CORRUPTED)
|
||||
#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << CPU_BOOT_ERR_TS_INIT_FAIL)
|
||||
#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << CPU_BOOT_ERR_DRAM_SKIPPED)
|
||||
#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << CPU_BOOT_ERR_BMC_WAIT_SKIPPED)
|
||||
#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << CPU_BOOT_ERR_NIC_DATA_NOT_RDY)
|
||||
#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << CPU_BOOT_ERR_NIC_FW_FAIL)
|
||||
#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << CPU_BOOT_ERR_SECURITY_NOT_RDY)
|
||||
#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << CPU_BOOT_ERR_SECURITY_FAIL)
|
||||
#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << CPU_BOOT_ERR_EFUSE_FAIL)
|
||||
#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << CPU_BOOT_ERR_PRI_IMG_VER_FAIL)
|
||||
#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << CPU_BOOT_ERR_SEC_IMG_VER_FAIL)
|
||||
#define CPU_BOOT_ERR0_PLL_FAIL (1 << CPU_BOOT_ERR_PLL_FAIL)
|
||||
#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL)
|
||||
#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR)
|
||||
#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL)
|
||||
#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
|
||||
#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
|
||||
|
||||
enum cpu_boot_dev_sts {
|
||||
CPU_BOOT_DEV_STS_SECURITY_EN = 0,
|
||||
CPU_BOOT_DEV_STS_DEBUG_EN = 1,
|
||||
CPU_BOOT_DEV_STS_WATCHDOG_EN = 2,
|
||||
CPU_BOOT_DEV_STS_DRAM_INIT_EN = 3,
|
||||
CPU_BOOT_DEV_STS_BMC_WAIT_EN = 4,
|
||||
CPU_BOOT_DEV_STS_E2E_CRED_EN = 5,
|
||||
CPU_BOOT_DEV_STS_HBM_CRED_EN = 6,
|
||||
CPU_BOOT_DEV_STS_RL_EN = 7,
|
||||
CPU_BOOT_DEV_STS_SRAM_SCR_EN = 8,
|
||||
CPU_BOOT_DEV_STS_DRAM_SCR_EN = 9,
|
||||
CPU_BOOT_DEV_STS_FW_HARD_RST_EN = 10,
|
||||
CPU_BOOT_DEV_STS_PLL_INFO_EN = 11,
|
||||
CPU_BOOT_DEV_STS_SP_SRAM_EN = 12,
|
||||
CPU_BOOT_DEV_STS_CLK_GATE_EN = 13,
|
||||
CPU_BOOT_DEV_STS_HBM_ECC_EN = 14,
|
||||
CPU_BOOT_DEV_STS_PKT_PI_ACK_EN = 15,
|
||||
CPU_BOOT_DEV_STS_FW_LD_COM_EN = 16,
|
||||
CPU_BOOT_DEV_STS_FW_IATU_CONF_EN = 17,
|
||||
CPU_BOOT_DEV_STS_FW_NIC_MAC_EN = 18,
|
||||
CPU_BOOT_DEV_STS_DYN_PLL_EN = 19,
|
||||
CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN = 20,
|
||||
CPU_BOOT_DEV_STS_EQ_INDEX_EN = 21,
|
||||
CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN = 22,
|
||||
CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN = 23,
|
||||
CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24,
|
||||
CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25,
|
||||
CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26,
|
||||
CPU_BOOT_DEV_STS_ENABLED = 31,
|
||||
CPU_BOOT_DEV_STS_SCND_EN = 63,
|
||||
CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */
|
||||
};
|
||||
|
||||
/*
|
||||
* BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers
|
||||
|
@ -233,7 +274,7 @@
|
|||
* was not served before.
|
||||
* Initialized in: linux
|
||||
*
|
||||
* CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to
|
||||
* CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to
|
||||
* prevent IRQs overriding each other.
|
||||
* Initialized in: linux
|
||||
*
|
||||
|
@ -252,6 +293,11 @@
|
|||
* where a bit is set if the engine is not idle.
|
||||
* Initialized in: linux
|
||||
*
|
||||
* CPU_BOOT_DEV_STS0_MAP_HWMON_EN
|
||||
* If set, means f/w supports proprietary
|
||||
* HWMON enum mapping to cpucp enums.
|
||||
* Initialized in: linux
|
||||
*
|
||||
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
|
||||
* This is a main indication that the
|
||||
* running FW populates the device status
|
||||
|
@ -261,34 +307,35 @@
|
|||
* Initialized in: preboot
|
||||
*
|
||||
*/
|
||||
#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << 0)
|
||||
#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << 1)
|
||||
#define CPU_BOOT_DEV_STS0_WATCHDOG_EN (1 << 2)
|
||||
#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN (1 << 3)
|
||||
#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN (1 << 4)
|
||||
#define CPU_BOOT_DEV_STS0_E2E_CRED_EN (1 << 5)
|
||||
#define CPU_BOOT_DEV_STS0_HBM_CRED_EN (1 << 6)
|
||||
#define CPU_BOOT_DEV_STS0_RL_EN (1 << 7)
|
||||
#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN (1 << 8)
|
||||
#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << 9)
|
||||
#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << 10)
|
||||
#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << 11)
|
||||
#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << 12)
|
||||
#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13)
|
||||
#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << 14)
|
||||
#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15)
|
||||
#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16)
|
||||
#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17)
|
||||
#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << 18)
|
||||
#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19)
|
||||
#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << 20)
|
||||
#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << 21)
|
||||
#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << 22)
|
||||
#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << 23)
|
||||
#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << 24)
|
||||
#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << 25)
|
||||
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
|
||||
#define CPU_BOOT_DEV_STS1_ENABLED (1 << 31)
|
||||
#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << CPU_BOOT_DEV_STS_SECURITY_EN)
|
||||
#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << CPU_BOOT_DEV_STS_DEBUG_EN)
|
||||
#define CPU_BOOT_DEV_STS0_WATCHDOG_EN (1 << CPU_BOOT_DEV_STS_WATCHDOG_EN)
|
||||
#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN (1 << CPU_BOOT_DEV_STS_DRAM_INIT_EN)
|
||||
#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN (1 << CPU_BOOT_DEV_STS_BMC_WAIT_EN)
|
||||
#define CPU_BOOT_DEV_STS0_E2E_CRED_EN (1 << CPU_BOOT_DEV_STS_E2E_CRED_EN)
|
||||
#define CPU_BOOT_DEV_STS0_HBM_CRED_EN (1 << CPU_BOOT_DEV_STS_HBM_CRED_EN)
|
||||
#define CPU_BOOT_DEV_STS0_RL_EN (1 << CPU_BOOT_DEV_STS_RL_EN)
|
||||
#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_SRAM_SCR_EN)
|
||||
#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_DRAM_SCR_EN)
|
||||
#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << CPU_BOOT_DEV_STS_FW_HARD_RST_EN)
|
||||
#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << CPU_BOOT_DEV_STS_PLL_INFO_EN)
|
||||
#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << CPU_BOOT_DEV_STS_SP_SRAM_EN)
|
||||
#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << CPU_BOOT_DEV_STS_CLK_GATE_EN)
|
||||
#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << CPU_BOOT_DEV_STS_HBM_ECC_EN)
|
||||
#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << CPU_BOOT_DEV_STS_PKT_PI_ACK_EN)
|
||||
#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << CPU_BOOT_DEV_STS_FW_LD_COM_EN)
|
||||
#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << CPU_BOOT_DEV_STS_FW_IATU_CONF_EN)
|
||||
#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_MAC_EN)
|
||||
#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << CPU_BOOT_DEV_STS_DYN_PLL_EN)
|
||||
#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN)
|
||||
#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << CPU_BOOT_DEV_STS_EQ_INDEX_EN)
|
||||
#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN)
|
||||
#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN)
|
||||
#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN)
|
||||
#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN)
|
||||
#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN (1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN)
|
||||
#define CPU_BOOT_DEV_STS0_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED)
|
||||
#define CPU_BOOT_DEV_STS1_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED)
|
||||
|
||||
enum cpu_boot_status {
|
||||
CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */
|
||||
|
@ -405,6 +452,8 @@ struct cpu_dyn_regs {
|
|||
enum comms_msg_type {
|
||||
HL_COMMS_DESC_TYPE = 0,
|
||||
HL_COMMS_RESET_CAUSE_TYPE = 1,
|
||||
HL_COMMS_FW_CFG_SKIP_TYPE = 2,
|
||||
HL_COMMS_BINNING_CONF_TYPE = 3,
|
||||
};
|
||||
|
||||
/* TODO: remove this struct after the code is updated to use message */
|
||||
|
@ -464,6 +513,9 @@ struct lkd_fw_comms_msg {
|
|||
struct {
|
||||
__u8 reset_cause;
|
||||
};
|
||||
struct {
|
||||
__u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -507,8 +559,6 @@ struct lkd_fw_comms_msg {
|
|||
* COMMS_SKIP_BMC Perform actions required for BMC-less servers.
|
||||
* Do not wait for BMC response.
|
||||
*
|
||||
* COMMS_LOW_PLL_OPP Initialize PLLs for low OPP.
|
||||
*
|
||||
* COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory
|
||||
* space is allocated in a ELBI access only
|
||||
* address range.
|
||||
|
@ -524,7 +574,6 @@ enum comms_cmd {
|
|||
COMMS_RST_DEV = 6,
|
||||
COMMS_GOTO_WFE = 7,
|
||||
COMMS_SKIP_BMC = 8,
|
||||
COMMS_LOW_PLL_OPP = 9,
|
||||
COMMS_PREP_DESC_ELBI = 10,
|
||||
COMMS_INVLD_LAST
|
||||
};
|
||||
|
|
|
@ -8,8 +8,6 @@
|
|||
#ifndef GAUDI_FW_IF_H
|
||||
#define GAUDI_FW_IF_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define GAUDI_EVENT_QUEUE_MSI_IDX 8
|
||||
#define GAUDI_NIC_PORT1_MSI_IDX 10
|
||||
#define GAUDI_NIC_PORT3_MSI_IDX 12
|
||||
|
@ -78,13 +76,13 @@ struct gaudi_nic_status {
|
|||
__u32 high_ber_cnt;
|
||||
};
|
||||
|
||||
struct gaudi_flops_2_data {
|
||||
struct gaudi_cold_rst_data {
|
||||
union {
|
||||
struct {
|
||||
__u32 spsram_init_done : 1;
|
||||
__u32 reserved : 31;
|
||||
u32 spsram_init_done : 1;
|
||||
u32 reserved : 31;
|
||||
};
|
||||
__u32 data;
|
||||
__le32 data;
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#define mmRDWR_TEST mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
|
||||
#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
|
||||
#define mmPREBOOT_PCIE_EN mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_1
|
||||
#define mmCOLD_RST_DATA mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_2
|
||||
#define mmUPD_PENDING_STS mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_3
|
||||
|
||||
#endif /* GAUDI_REG_MAP_H_ */
|
||||
|
|
|
@ -272,6 +272,16 @@ enum hl_gaudi_pll_index {
|
|||
HL_GAUDI_PLL_MAX
|
||||
};
|
||||
|
||||
/**
|
||||
* enum hl_device_status - Device status information.
|
||||
* @HL_DEVICE_STATUS_OPERATIONAL: Device is operational.
|
||||
* @HL_DEVICE_STATUS_IN_RESET: Device is currently during reset.
|
||||
* @HL_DEVICE_STATUS_MALFUNCTION: Device is unusable.
|
||||
* @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled.
|
||||
* @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in
|
||||
* progress.
|
||||
* @HL_DEVICE_STATUS_LAST: Last status.
|
||||
*/
|
||||
enum hl_device_status {
|
||||
HL_DEVICE_STATUS_OPERATIONAL,
|
||||
HL_DEVICE_STATUS_IN_RESET,
|
||||
|
@ -556,33 +566,30 @@ enum gaudi_dcores {
|
|||
HL_GAUDI_ES_DCORE
|
||||
};
|
||||
|
||||
/**
|
||||
* struct hl_info_args - Main structure to retrieve device related information.
|
||||
* @return_pointer: User space address of the relevant structure related to HL_INFO_* operation
|
||||
* mentioned in @op.
|
||||
* @return_size: Size of the structure used in @return_pointer, just like "size" in "snprintf", it
|
||||
* limits how many bytes the kernel can write. For hw_events array, the size should be
|
||||
* hl_info_hw_ip_info.num_of_events * sizeof(__u32).
|
||||
* @op: Defines which type of information to be retrieved. Refer HL_INFO_* for details.
|
||||
* @dcore_id: DCORE id for which the information is relevant (for Gaudi refer to enum gaudi_dcores).
|
||||
* @ctx_id: Context ID of the user. Currently not in use.
|
||||
* @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms
|
||||
* resolution. Currently not in use.
|
||||
* @pll_index: Index as defined in hl_<asic type>_pll_index enumeration.
|
||||
* @pad: Padding to 64 bit.
|
||||
*/
|
||||
struct hl_info_args {
|
||||
/* Location of relevant struct in userspace */
|
||||
__u64 return_pointer;
|
||||
/*
|
||||
* The size of the return value. Just like "size" in "snprintf",
|
||||
* it limits how many bytes the kernel can write
|
||||
*
|
||||
* For hw_events array, the size should be
|
||||
* hl_info_hw_ip_info.num_of_events * sizeof(__u32)
|
||||
*/
|
||||
__u32 return_size;
|
||||
|
||||
/* HL_INFO_* */
|
||||
__u32 op;
|
||||
|
||||
union {
|
||||
/* Dcore id for which the information is relevant.
|
||||
* For Gaudi refer to 'enum gaudi_dcores'
|
||||
*/
|
||||
__u32 dcore_id;
|
||||
/* Context ID - Currently not in use */
|
||||
__u32 ctx_id;
|
||||
/* Period value for utilization rate (100ms - 1000ms, in 100ms
|
||||
* resolution.
|
||||
*/
|
||||
__u32 period_ms;
|
||||
/* PLL frequency retrieval */
|
||||
__u32 pll_index;
|
||||
};
|
||||
|
||||
|
@ -890,11 +897,7 @@ struct hl_wait_cs_in {
|
|||
*/
|
||||
__u64 addr;
|
||||
/* Target value for completion comparison */
|
||||
__u32 target;
|
||||
/* Absolute timeout to wait for interrupt
|
||||
* in microseconds
|
||||
*/
|
||||
__u32 interrupt_timeout_us;
|
||||
__u64 target;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -910,7 +913,12 @@ struct hl_wait_cs_in {
|
|||
|
||||
/* Multi CS API info- valid entries in multi-CS array */
|
||||
__u8 seq_arr_len;
|
||||
__u8 pad[7];
|
||||
__u8 pad[3];
|
||||
|
||||
/* Absolute timeout to wait for an interrupt in microseconds.
|
||||
* Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
|
||||
*/
|
||||
__u32 interrupt_timeout_us;
|
||||
};
|
||||
|
||||
#define HL_WAIT_CS_STATUS_COMPLETED 0
|
||||
|
@ -952,6 +960,10 @@ union hl_wait_cs_args {
|
|||
#define HL_MEM_OP_UNMAP 3
|
||||
/* Opcode to map a hw block */
|
||||
#define HL_MEM_OP_MAP_BLOCK 4
|
||||
/* Opcode to create DMA-BUF object for an existing device memory allocation
|
||||
* and to export an FD of that DMA-BUF back to the caller
|
||||
*/
|
||||
#define HL_MEM_OP_EXPORT_DMABUF_FD 5
|
||||
|
||||
/* Memory flags */
|
||||
#define HL_MEM_CONTIGUOUS 0x1
|
||||
|
@ -1023,11 +1035,26 @@ struct hl_mem_in {
|
|||
/* Virtual address returned from HL_MEM_OP_MAP */
|
||||
__u64 device_virt_addr;
|
||||
} unmap;
|
||||
|
||||
/* HL_MEM_OP_EXPORT_DMABUF_FD */
|
||||
struct {
|
||||
/* Handle returned from HL_MEM_OP_ALLOC. In Gaudi,
|
||||
* where we don't have MMU for the device memory, the
|
||||
* driver expects a physical address (instead of
|
||||
* a handle) in the device memory space.
|
||||
*/
|
||||
__u64 handle;
|
||||
/* Size of memory allocation. Relevant only for GAUDI */
|
||||
__u64 mem_size;
|
||||
} export_dmabuf_fd;
|
||||
};
|
||||
|
||||
/* HL_MEM_OP_* */
|
||||
__u32 op;
|
||||
/* HL_MEM_* flags */
|
||||
/* HL_MEM_* flags.
|
||||
* For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the
|
||||
* DMA-BUF file/FD flags.
|
||||
*/
|
||||
__u32 flags;
|
||||
/* Context ID - Currently not in use */
|
||||
__u32 ctx_id;
|
||||
|
@ -1064,6 +1091,13 @@ struct hl_mem_out {
|
|||
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
/* Returned in HL_MEM_OP_EXPORT_DMABUF_FD. Represents the
|
||||
* DMA-BUF object that was created to describe a memory
|
||||
* allocation on the device's memory space. The FD should be
|
||||
* passed to the importer driver
|
||||
*/
|
||||
__s32 fd;
|
||||
};
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue