Merge tag 'misc-habanalabs-next-2019-07-04' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next
Oded writes: This tag contains the following changes for kernel 5.3: - Change the way the device's CPU access the host memory. This allows the driver to use the kernel API of setting DMA mask in a standard way (call it once). - Add a new debugfs entry to show the status of the internal DMA and compute engines. This is very helpful for debugging in case a command submission get stuck. - Return to the user a mask of the internal engines indicating their busy state. - Make sure to restore registers that can be modified by the user to their default values. Only applies to registers that are initialized by the driver. - Elimination of redundant and dead-code. - Support memset of the device's memory with size larger then 4GB - Force the user to set the device to debug mode before configuring the device's coresight infrastructure - Improve error printing in case of interrupts from the device * tag 'misc-habanalabs-next-2019-07-04' of git://people.freedesktop.org/~gabbayo/linux: (31 commits) habanalabs: Add busy engines bitmask to HW idle IOCTL habanalabs: Add debugfs node for engines status habanalabs: Update the device idle check habanalabs: Allow accessing host mapped addresses via debugfs habanalabs: add WARN in case of bad MMU mapping habanalabs: remove DMA mask hack for Goya habanalabs: set Goya CPU to use ASIC MMU habanalabs: add MMU mappings for Goya CPU habanalabs: initialize MMU context for driver habanalabs: de-couple MMU and VM module initialization habanalabs: initialize device CPU queues after MMU init docs/habanalabs: update text for some entries in sysfs habanalabs: add rate-limit to an error message habanalabs: remove simulator dedicated code habanalabs: restore unsecured registers default values habanalabs: clear sobs and monitors in context switch habanalabs: make tpc registers secured habanalabs: don't limit packet size for device CPU habanalabs: support device memory memset > 4GB habanalabs: print event name for fatal and non-RAZWI events ...
This commit is contained in:
commit
a94de2e7a3
|
@ -3,7 +3,10 @@ Date: Jan 2019
|
|||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Sets the device address to be used for read or write through
|
||||
PCI bar. The acceptable value is a string that starts with "0x"
|
||||
PCI bar, or the device VA of a host mapped memory to be read or
|
||||
written directly from the host. The latter option is allowed
|
||||
only when the IOMMU is disabled.
|
||||
The acceptable value is a string that starts with "0x"
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
|
||||
Date: Jan 2019
|
||||
|
@ -33,10 +36,12 @@ Contact: oded.gabbay@gmail.com
|
|||
Description: Allows the root user to read or write directly through the
|
||||
device's PCI bar. Writing to this file generates a write
|
||||
transaction while reading from the file generates a read
|
||||
transcation. This custom interface is needed (instead of using
|
||||
transaction. This custom interface is needed (instead of using
|
||||
the generic Linux user-space PCI mapping) because the DDR bar
|
||||
is very small compared to the DDR memory and only the driver can
|
||||
move the bar before and after the transaction
|
||||
move the bar before and after the transaction.
|
||||
If the IOMMU is disabled, it also allows the root user to read
|
||||
or write from the host a device VA of a host mapped memory
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/device
|
||||
Date: Jan 2019
|
||||
|
@ -46,6 +51,13 @@ Description: Enables the root user to set the device to specific state.
|
|||
Valid values are "disable", "enable", "suspend", "resume".
|
||||
User can read this property to see the valid values
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/engines
|
||||
Date: Jul 2019
|
||||
KernelVersion: 5.3
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the status registers values of the device engines and
|
||||
their derived idle status
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_addr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
|
|
|
@ -62,18 +62,20 @@ What: /sys/class/habanalabs/hl<n>/ic_clk
|
|||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Allows the user to set the maximum clock frequency of the
|
||||
Interconnect fabric. Writes to this parameter affect the device
|
||||
only when the power management profile is set to "manual" mode.
|
||||
The device IC clock might be set to lower value then the
|
||||
Description: Allows the user to set the maximum clock frequency, in Hz, of
|
||||
the Interconnect fabric. Writes to this parameter affect the
|
||||
device only when the power management profile is set to "manual"
|
||||
mode. The device IC clock might be set to lower value than the
|
||||
maximum. The user should read the ic_clk_curr to see the actual
|
||||
frequency value of the IC
|
||||
frequency value of the IC. This property is valid only for the
|
||||
Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/ic_clk_curr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the current clock frequency of the Interconnect fabric
|
||||
Description: Displays the current clock frequency, in Hz, of the Interconnect
|
||||
fabric. This property is valid only for the Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/infineon_ver
|
||||
Date: Jan 2019
|
||||
|
@ -92,18 +94,20 @@ What: /sys/class/habanalabs/hl<n>/mme_clk
|
|||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Allows the user to set the maximum clock frequency of the
|
||||
MME compute engine. Writes to this parameter affect the device
|
||||
only when the power management profile is set to "manual" mode.
|
||||
The device MME clock might be set to lower value then the
|
||||
Description: Allows the user to set the maximum clock frequency, in Hz, of
|
||||
the MME compute engine. Writes to this parameter affect the
|
||||
device only when the power management profile is set to "manual"
|
||||
mode. The device MME clock might be set to lower value than the
|
||||
maximum. The user should read the mme_clk_curr to see the actual
|
||||
frequency value of the MME
|
||||
frequency value of the MME. This property is valid only for the
|
||||
Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/mme_clk_curr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the current clock frequency of the MME compute engine
|
||||
Description: Displays the current clock frequency, in Hz, of the MME compute
|
||||
engine. This property is valid only for the Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/pci_addr
|
||||
Date: Jan 2019
|
||||
|
@ -163,18 +167,20 @@ What: /sys/class/habanalabs/hl<n>/tpc_clk
|
|||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Allows the user to set the maximum clock frequency of the
|
||||
TPC compute engines. Writes to this parameter affect the device
|
||||
only when the power management profile is set to "manual" mode.
|
||||
The device TPC clock might be set to lower value then the
|
||||
Description: Allows the user to set the maximum clock frequency, in Hz, of
|
||||
the TPC compute engines. Writes to this parameter affect the
|
||||
device only when the power management profile is set to "manual"
|
||||
mode. The device TPC clock might be set to lower value than the
|
||||
maximum. The user should read the tpc_clk_curr to see the actual
|
||||
frequency value of the TPC
|
||||
frequency value of the TPC. This property is valid only for
|
||||
Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/tpc_clk_curr
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: oded.gabbay@gmail.com
|
||||
Description: Displays the current clock frequency of the TPC compute engines
|
||||
Description: Displays the current clock frequency, in Hz, of the TPC compute
|
||||
engines. This property is valid only for the Goya ASIC family
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/uboot_ver
|
||||
Date: Jan 2019
|
||||
|
|
|
@ -18,7 +18,7 @@ int hl_asid_init(struct hl_device *hdev)
|
|||
|
||||
mutex_init(&hdev->asid_mutex);
|
||||
|
||||
/* ASID 0 is reserved for KMD */
|
||||
/* ASID 0 is reserved for KMD and device CPU */
|
||||
set_bit(0, hdev->asid_bitmap);
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -682,14 +682,12 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||
u32 tmp;
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev,
|
||||
(u64) (uintptr_t) &ctx->thread_ctx_switch_wait_token,
|
||||
jiffies_to_usecs(hdev->timeout_jiffies),
|
||||
&tmp);
|
||||
&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
|
||||
100, jiffies_to_usecs(hdev->timeout_jiffies));
|
||||
|
||||
if (rc || !tmp) {
|
||||
if (rc == -ETIMEDOUT) {
|
||||
dev_err(hdev->dev,
|
||||
"context switch phase didn't finish in time\n");
|
||||
rc = -ETIMEDOUT;
|
||||
"context switch phase timeout (%d)\n", tmp);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,9 +31,13 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
|
|||
* Coresight might be still working by accessing addresses
|
||||
* related to the stopped engines. Hence stop it explicitly.
|
||||
*/
|
||||
hdev->asic_funcs->halt_coresight(hdev);
|
||||
if (hdev->in_debug)
|
||||
hl_device_set_debug_mode(hdev, false);
|
||||
|
||||
hl_vm_ctx_fini(ctx);
|
||||
hl_asid_free(hdev, ctx->asid);
|
||||
} else {
|
||||
hl_mmu_ctx_fini(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,6 +121,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
|
|||
|
||||
if (is_kernel_ctx) {
|
||||
ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
|
||||
rc = hl_mmu_ctx_init(ctx);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to init mmu ctx module\n");
|
||||
goto mem_ctx_err;
|
||||
}
|
||||
} else {
|
||||
ctx->asid = hl_asid_alloc(hdev);
|
||||
if (!ctx->asid) {
|
||||
|
|
|
@ -355,7 +355,7 @@ static int mmu_show(struct seq_file *s, void *data)
|
|||
struct hl_debugfs_entry *entry = s->private;
|
||||
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||
struct hl_device *hdev = dev_entry->hdev;
|
||||
struct hl_ctx *ctx = hdev->user_ctx;
|
||||
struct hl_ctx *ctx;
|
||||
|
||||
u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
|
||||
hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
|
||||
|
@ -367,6 +367,11 @@ static int mmu_show(struct seq_file *s, void *data)
|
|||
if (!hdev->mmu_enable)
|
||||
return 0;
|
||||
|
||||
if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
|
||||
ctx = hdev->kernel_ctx;
|
||||
else
|
||||
ctx = hdev->user_ctx;
|
||||
|
||||
if (!ctx) {
|
||||
dev_err(hdev->dev, "no ctx available\n");
|
||||
return 0;
|
||||
|
@ -495,6 +500,36 @@ err:
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int engines_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct hl_debugfs_entry *entry = s->private;
|
||||
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
|
||||
struct hl_device *hdev = dev_entry->hdev;
|
||||
|
||||
hdev->asic_funcs->is_device_idle(hdev, NULL, s);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
|
||||
if (!hdev->mmu_enable)
|
||||
goto out;
|
||||
|
||||
if (hdev->dram_supports_virtual_memory &&
|
||||
addr >= prop->va_space_dram_start_address &&
|
||||
addr < prop->va_space_dram_end_address)
|
||||
return true;
|
||||
|
||||
if (addr >= prop->va_space_host_start_address &&
|
||||
addr < prop->va_space_host_end_address)
|
||||
return true;
|
||||
out:
|
||||
return false;
|
||||
}
|
||||
|
||||
static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
|
||||
u64 *phys_addr)
|
||||
{
|
||||
|
@ -568,7 +603,6 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
|
|||
{
|
||||
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||
struct hl_device *hdev = entry->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
char tmp_buf[32];
|
||||
u64 addr = entry->addr;
|
||||
u32 val;
|
||||
|
@ -577,11 +611,8 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
|
|||
if (*ppos)
|
||||
return 0;
|
||||
|
||||
if (addr >= prop->va_space_dram_start_address &&
|
||||
addr < prop->va_space_dram_end_address &&
|
||||
hdev->mmu_enable &&
|
||||
hdev->dram_supports_virtual_memory) {
|
||||
rc = device_va_to_pa(hdev, entry->addr, &addr);
|
||||
if (hl_is_device_va(hdev, addr)) {
|
||||
rc = device_va_to_pa(hdev, addr, &addr);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
@ -602,7 +633,6 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
|
|||
{
|
||||
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
|
||||
struct hl_device *hdev = entry->hdev;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 addr = entry->addr;
|
||||
u32 value;
|
||||
ssize_t rc;
|
||||
|
@ -611,11 +641,8 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
|
|||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (addr >= prop->va_space_dram_start_address &&
|
||||
addr < prop->va_space_dram_end_address &&
|
||||
hdev->mmu_enable &&
|
||||
hdev->dram_supports_virtual_memory) {
|
||||
rc = device_va_to_pa(hdev, entry->addr, &addr);
|
||||
if (hl_is_device_va(hdev, addr)) {
|
||||
rc = device_va_to_pa(hdev, addr, &addr);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
@ -877,6 +904,7 @@ static const struct hl_info_list hl_debugfs_list[] = {
|
|||
{"userptr", userptr_show, NULL},
|
||||
{"vm", vm_show, NULL},
|
||||
{"mmu", mmu_show, mmu_write},
|
||||
{"engines", engines_show, NULL}
|
||||
};
|
||||
|
||||
static int hl_debugfs_open(struct inode *inode, struct file *file)
|
||||
|
|
|
@ -231,6 +231,7 @@ static int device_early_init(struct hl_device *hdev)
|
|||
|
||||
mutex_init(&hdev->fd_open_cnt_lock);
|
||||
mutex_init(&hdev->send_cpu_message_lock);
|
||||
mutex_init(&hdev->debug_lock);
|
||||
mutex_init(&hdev->mmu_cache_lock);
|
||||
INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
|
||||
spin_lock_init(&hdev->hw_queues_mirror_lock);
|
||||
|
@ -262,6 +263,7 @@ early_fini:
|
|||
static void device_early_fini(struct hl_device *hdev)
|
||||
{
|
||||
mutex_destroy(&hdev->mmu_cache_lock);
|
||||
mutex_destroy(&hdev->debug_lock);
|
||||
mutex_destroy(&hdev->send_cpu_message_lock);
|
||||
|
||||
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
|
||||
|
@ -324,7 +326,15 @@ static int device_late_init(struct hl_device *hdev)
|
|||
{
|
||||
int rc;
|
||||
|
||||
INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
|
||||
if (hdev->asic_funcs->late_init) {
|
||||
rc = hdev->asic_funcs->late_init(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"failed late initialization for the H/W\n");
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
hdev->high_pll = hdev->asic_prop.high_pll;
|
||||
|
||||
/* force setting to low frequency */
|
||||
|
@ -335,15 +345,7 @@ static int device_late_init(struct hl_device *hdev)
|
|||
else
|
||||
hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
|
||||
|
||||
if (hdev->asic_funcs->late_init) {
|
||||
rc = hdev->asic_funcs->late_init(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"failed late initialization for the H/W\n");
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
|
||||
schedule_delayed_work(&hdev->work_freq,
|
||||
usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
|
||||
|
||||
|
@ -420,6 +422,52 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
|
|||
return 1;
|
||||
}
|
||||
|
||||
int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
mutex_lock(&hdev->debug_lock);
|
||||
|
||||
if (!enable) {
|
||||
if (!hdev->in_debug) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to disable debug mode because device was not in debug mode\n");
|
||||
rc = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hdev->asic_funcs->halt_coresight(hdev);
|
||||
hdev->in_debug = 0;
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (hdev->in_debug) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to enable debug mode because device is already in debug mode\n");
|
||||
rc = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&hdev->fd_open_cnt_lock);
|
||||
|
||||
if (atomic_read(&hdev->fd_open_cnt) > 1) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to enable debug mode. More then a single user is using the device\n");
|
||||
rc = -EPERM;
|
||||
goto unlock_fd_open_lock;
|
||||
}
|
||||
|
||||
hdev->in_debug = 1;
|
||||
|
||||
unlock_fd_open_lock:
|
||||
mutex_unlock(&hdev->fd_open_cnt_lock);
|
||||
out:
|
||||
mutex_unlock(&hdev->debug_lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_device_suspend - initiate device suspend
|
||||
*
|
||||
|
@ -647,13 +695,6 @@ again:
|
|||
|
||||
hdev->hard_reset_pending = true;
|
||||
|
||||
if (!hdev->pdev) {
|
||||
dev_err(hdev->dev,
|
||||
"Reset action is NOT supported in simulator\n");
|
||||
rc = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
device_reset_work = kzalloc(sizeof(*device_reset_work),
|
||||
GFP_ATOMIC);
|
||||
if (!device_reset_work) {
|
||||
|
@ -704,6 +745,7 @@ again:
|
|||
|
||||
if (hard_reset) {
|
||||
hl_vm_fini(hdev);
|
||||
hl_mmu_fini(hdev);
|
||||
hl_eq_reset(hdev, &hdev->event_queue);
|
||||
}
|
||||
|
||||
|
@ -731,6 +773,13 @@ again:
|
|||
goto out_err;
|
||||
}
|
||||
|
||||
rc = hl_mmu_init(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to initialize MMU S/W after hard reset\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* Allocate the kernel context */
|
||||
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
|
||||
GFP_KERNEL);
|
||||
|
@ -902,11 +951,18 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||
goto cq_fini;
|
||||
}
|
||||
|
||||
/* MMU S/W must be initialized before kernel context is created */
|
||||
rc = hl_mmu_init(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
|
||||
goto eq_fini;
|
||||
}
|
||||
|
||||
/* Allocate the kernel context */
|
||||
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
|
||||
if (!hdev->kernel_ctx) {
|
||||
rc = -ENOMEM;
|
||||
goto eq_fini;
|
||||
goto mmu_fini;
|
||||
}
|
||||
|
||||
hdev->user_ctx = NULL;
|
||||
|
@ -954,8 +1010,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
|
|||
goto out_disabled;
|
||||
}
|
||||
|
||||
/* After test_queues, KMD can start sending messages to device CPU */
|
||||
|
||||
rc = device_late_init(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed late initialization\n");
|
||||
|
@ -1001,6 +1055,8 @@ release_ctx:
|
|||
"kernel ctx is still alive on initialization failure\n");
|
||||
free_ctx:
|
||||
kfree(hdev->kernel_ctx);
|
||||
mmu_fini:
|
||||
hl_mmu_fini(hdev);
|
||||
eq_fini:
|
||||
hl_eq_fini(hdev, &hdev->event_queue);
|
||||
cq_fini:
|
||||
|
@ -1105,6 +1161,8 @@ void hl_device_fini(struct hl_device *hdev)
|
|||
|
||||
hl_vm_fini(hdev);
|
||||
|
||||
hl_mmu_fini(hdev);
|
||||
|
||||
hl_eq_fini(hdev, &hdev->event_queue);
|
||||
|
||||
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
|
||||
|
@ -1125,95 +1183,6 @@ void hl_device_fini(struct hl_device *hdev)
|
|||
pr_info("removed device successfully\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_poll_timeout_memory - Periodically poll a host memory address
|
||||
* until it is not zero or a timeout occurs
|
||||
* @hdev: pointer to habanalabs device structure
|
||||
* @addr: Address to poll
|
||||
* @timeout_us: timeout in us
|
||||
* @val: Variable to read the value into
|
||||
*
|
||||
* Returns 0 on success and -ETIMEDOUT upon a timeout. In either
|
||||
* case, the last read value at @addr is stored in @val. Must not
|
||||
* be called from atomic context if sleep_us or timeout_us are used.
|
||||
*
|
||||
* The function sleeps for 100us with timeout value of
|
||||
* timeout_us
|
||||
*/
|
||||
int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr,
|
||||
u32 timeout_us, u32 *val)
|
||||
{
|
||||
/*
|
||||
* address in this function points always to a memory location in the
|
||||
* host's (server's) memory. That location is updated asynchronously
|
||||
* either by the direct access of the device or by another core
|
||||
*/
|
||||
u32 *paddr = (u32 *) (uintptr_t) addr;
|
||||
ktime_t timeout;
|
||||
|
||||
/* timeout should be longer when working with simulator */
|
||||
if (!hdev->pdev)
|
||||
timeout_us *= 10;
|
||||
|
||||
timeout = ktime_add_us(ktime_get(), timeout_us);
|
||||
|
||||
might_sleep();
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* Flush CPU read/write buffers to make sure we read updates
|
||||
* done by other cores or by the device
|
||||
*/
|
||||
mb();
|
||||
*val = *paddr;
|
||||
if (*val)
|
||||
break;
|
||||
if (ktime_compare(ktime_get(), timeout) > 0) {
|
||||
*val = *paddr;
|
||||
break;
|
||||
}
|
||||
usleep_range((100 >> 2) + 1, 100);
|
||||
}
|
||||
|
||||
return *val ? 0 : -ETIMEDOUT;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_poll_timeout_devicememory - Periodically poll a device memory address
|
||||
* until it is not zero or a timeout occurs
|
||||
* @hdev: pointer to habanalabs device structure
|
||||
* @addr: Device address to poll
|
||||
* @timeout_us: timeout in us
|
||||
* @val: Variable to read the value into
|
||||
*
|
||||
* Returns 0 on success and -ETIMEDOUT upon a timeout. In either
|
||||
* case, the last read value at @addr is stored in @val. Must not
|
||||
* be called from atomic context if sleep_us or timeout_us are used.
|
||||
*
|
||||
* The function sleeps for 100us with timeout value of
|
||||
* timeout_us
|
||||
*/
|
||||
int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
|
||||
u32 timeout_us, u32 *val)
|
||||
{
|
||||
ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
|
||||
|
||||
might_sleep();
|
||||
|
||||
for (;;) {
|
||||
*val = readl(addr);
|
||||
if (*val)
|
||||
break;
|
||||
if (ktime_compare(ktime_get(), timeout) > 0) {
|
||||
*val = readl(addr);
|
||||
break;
|
||||
}
|
||||
usleep_range((100 >> 2) + 1, 100);
|
||||
}
|
||||
|
||||
return *val ? 0 : -ETIMEDOUT;
|
||||
}
|
||||
|
||||
/*
|
||||
* MMIO register access helper functions.
|
||||
*/
|
||||
|
|
|
@ -29,13 +29,13 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
|
|||
|
||||
rc = request_firmware(&fw, fw_name, hdev->dev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to request %s\n", fw_name);
|
||||
dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
|
||||
goto out;
|
||||
}
|
||||
|
||||
fw_size = fw->size;
|
||||
if ((fw_size % 4) != 0) {
|
||||
dev_err(hdev->dev, "illegal %s firmware size %zu\n",
|
||||
dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
|
||||
fw_name, fw_size);
|
||||
rc = -EINVAL;
|
||||
goto out;
|
||||
|
@ -85,12 +85,6 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
|||
u32 tmp;
|
||||
int rc = 0;
|
||||
|
||||
if (len > HL_CPU_CB_SIZE) {
|
||||
dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n",
|
||||
len);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
|
||||
&pkt_dma_addr);
|
||||
if (!pkt) {
|
||||
|
@ -117,34 +111,29 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
|
|||
goto out;
|
||||
}
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence,
|
||||
timeout, &tmp);
|
||||
rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
|
||||
(tmp == ARMCP_PACKET_FENCE_VAL), 1000, timeout);
|
||||
|
||||
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
|
||||
|
||||
if (rc == -ETIMEDOUT) {
|
||||
dev_err(hdev->dev, "Timeout while waiting for device CPU\n");
|
||||
dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
|
||||
hdev->device_cpu_disabled = true;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (tmp == ARMCP_PACKET_FENCE_VAL) {
|
||||
u32 ctl = le32_to_cpu(pkt->ctl);
|
||||
tmp = le32_to_cpu(pkt->ctl);
|
||||
|
||||
rc = (ctl & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
|
||||
rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"F/W ERROR %d for CPU packet %d\n",
|
||||
rc, (ctl & ARMCP_PKT_CTL_OPCODE_MASK)
|
||||
dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
|
||||
rc,
|
||||
(tmp & ARMCP_PKT_CTL_OPCODE_MASK)
|
||||
>> ARMCP_PKT_CTL_OPCODE_SHIFT);
|
||||
rc = -EINVAL;
|
||||
rc = -EIO;
|
||||
} else if (result) {
|
||||
*result = (long) le64_to_cpu(pkt->result);
|
||||
}
|
||||
} else {
|
||||
dev_err(hdev->dev, "CPU packet wrong fence value\n");
|
||||
rc = -EINVAL;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&hdev->send_cpu_message_lock);
|
||||
|
@ -186,9 +175,6 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
|
|||
{
|
||||
u64 kernel_addr;
|
||||
|
||||
/* roundup to HL_CPU_PKT_SIZE */
|
||||
size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK;
|
||||
|
||||
kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
|
||||
|
||||
*dma_handle = hdev->cpu_accessible_dma_address +
|
||||
|
@ -200,9 +186,6 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
|
|||
void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
|
||||
void *vaddr)
|
||||
{
|
||||
/* roundup to HL_CPU_PKT_SIZE */
|
||||
size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK;
|
||||
|
||||
gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
|
||||
size);
|
||||
}
|
||||
|
@ -256,7 +239,7 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)
|
|||
HL_ARMCP_INFO_TIMEOUT_USEC, &result);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to send armcp info pkt, error %d\n", rc);
|
||||
"Failed to send ArmCP info pkt, error %d\n", rc);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -291,7 +274,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
|
|||
max_size, &eeprom_info_dma_addr);
|
||||
if (!eeprom_info_cpu_addr) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to allocate DMA memory for EEPROM info packet\n");
|
||||
"Failed to allocate DMA memory for ArmCP EEPROM packet\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
|
@ -307,7 +290,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
|
|||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to send armcp EEPROM pkt, error %d\n", rc);
|
||||
"Failed to send ArmCP EEPROM packet, error %d\n", rc);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#include <linux/genalloc.h>
|
||||
#include <linux/hwmon.h>
|
||||
#include <linux/io-64-nonatomic-lo-hi.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
/*
|
||||
* GOYA security scheme:
|
||||
|
@ -89,6 +91,30 @@
|
|||
#define GOYA_CB_POOL_CB_CNT 512
|
||||
#define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */
|
||||
|
||||
#define IS_QM_IDLE(engine, qm_glbl_sts0) \
|
||||
(((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
|
||||
#define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
|
||||
#define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
|
||||
#define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
|
||||
|
||||
#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
|
||||
(((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
|
||||
engine##_CMDQ_IDLE_MASK)
|
||||
#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
|
||||
IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
|
||||
#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
|
||||
IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
|
||||
|
||||
#define IS_DMA_IDLE(dma_core_sts0) \
|
||||
!((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
|
||||
|
||||
#define IS_TPC_IDLE(tpc_cfg_sts) \
|
||||
(((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
|
||||
|
||||
#define IS_MME_IDLE(mme_arch_sts) \
|
||||
(((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
|
||||
|
||||
|
||||
static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
|
||||
"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
|
||||
"goya cq 4", "goya cpu eq"
|
||||
|
@ -297,6 +323,11 @@ static u32 goya_all_events[] = {
|
|||
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
|
||||
};
|
||||
|
||||
static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
|
||||
static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
|
||||
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
|
||||
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
|
||||
|
||||
void goya_get_fixed_properties(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
|
@ -467,7 +498,7 @@ static int goya_early_init(struct hl_device *hdev)
|
|||
|
||||
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
|
||||
|
||||
rc = hl_pci_init(hdev, 39);
|
||||
rc = hl_pci_init(hdev, 48);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
|
@ -539,9 +570,36 @@ int goya_late_init(struct hl_device *hdev)
|
|||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
int rc;
|
||||
|
||||
goya_fetch_psoc_frequency(hdev);
|
||||
|
||||
rc = goya_mmu_clear_pgt_range(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to clear MMU page tables range %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = goya_mmu_set_dram_default_page(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = goya_mmu_add_mappings_for_device_cpu(hdev);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = goya_init_cpu_queues(hdev);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = goya_test_cpu_queue(hdev);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = goya_armcp_info_get(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to get armcp info\n");
|
||||
dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -553,33 +611,15 @@ int goya_late_init(struct hl_device *hdev)
|
|||
|
||||
rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
|
||||
dev_err(hdev->dev,
|
||||
"Failed to enable PCI access from CPU %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
|
||||
GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
|
||||
|
||||
goya_fetch_psoc_frequency(hdev);
|
||||
|
||||
rc = goya_mmu_clear_pgt_range(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
|
||||
goto disable_pci_access;
|
||||
}
|
||||
|
||||
rc = goya_mmu_set_dram_default_page(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to set DRAM default page\n");
|
||||
goto disable_pci_access;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
disable_pci_access:
|
||||
hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -655,7 +695,10 @@ static int goya_sw_init(struct hl_device *hdev)
|
|||
goto free_dma_pool;
|
||||
}
|
||||
|
||||
hdev->cpu_accessible_dma_pool = gen_pool_create(HL_CPU_PKT_SHIFT, -1);
|
||||
dev_dbg(hdev->dev, "cpu accessible memory at bus address 0x%llx\n",
|
||||
hdev->cpu_accessible_dma_address);
|
||||
|
||||
hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
|
||||
if (!hdev->cpu_accessible_dma_pool) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to create CPU accessible DMA pool\n");
|
||||
|
@ -786,7 +829,6 @@ static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
|
|||
else
|
||||
sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
|
||||
|
||||
WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + reg_off, lower_32_bits(sob_addr));
|
||||
WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
|
||||
WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
|
||||
}
|
||||
|
@ -973,9 +1015,9 @@ int goya_init_cpu_queues(struct hl_device *hdev)
|
|||
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address));
|
||||
|
||||
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8,
|
||||
lower_32_bits(hdev->cpu_accessible_dma_address));
|
||||
lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
|
||||
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9,
|
||||
upper_32_bits(hdev->cpu_accessible_dma_address));
|
||||
upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
|
||||
|
||||
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
|
||||
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES);
|
||||
|
@ -1001,7 +1043,7 @@ int goya_init_cpu_queues(struct hl_device *hdev)
|
|||
|
||||
if (err) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to communicate with ARM CPU (ArmCP timeout)\n");
|
||||
"Failed to setup communication with device CPU\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
|
@ -2061,10 +2103,12 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
|
|||
goya_disable_external_queues(hdev);
|
||||
goya_disable_internal_queues(hdev);
|
||||
|
||||
if (hard_reset)
|
||||
if (hard_reset) {
|
||||
goya_disable_msix(hdev);
|
||||
else
|
||||
goya_mmu_remove_device_cpu_mappings(hdev);
|
||||
} else {
|
||||
goya_sync_irqs(hdev);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2277,14 +2321,14 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
|
|||
goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
|
||||
goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
|
||||
|
||||
if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
|
||||
goto out;
|
||||
|
||||
if (!hdev->fw_loading) {
|
||||
dev_info(hdev->dev, "Skip loading FW\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
|
||||
goto out;
|
||||
|
||||
rc = goya_push_linux_to_device(hdev);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
@ -2466,34 +2510,11 @@ static int goya_hw_init(struct hl_device *hdev)
|
|||
if (rc)
|
||||
goto disable_queues;
|
||||
|
||||
rc = goya_init_cpu_queues(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
|
||||
rc);
|
||||
goto disable_msix;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we managed to set the DMA mask to more then 32 bits. If so,
|
||||
* let's try to increase it again because in Goya we set the initial
|
||||
* dma mask to less then 39 bits so that the allocation of the memory
|
||||
* area for the device's cpu will be under 39 bits
|
||||
*/
|
||||
if (hdev->dma_mask > 32) {
|
||||
rc = hl_pci_set_dma_mask(hdev, 48);
|
||||
if (rc)
|
||||
goto disable_pci_access;
|
||||
}
|
||||
|
||||
/* Perform read from the device to flush all MSI-X configuration */
|
||||
val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
|
||||
|
||||
return 0;
|
||||
|
||||
disable_pci_access:
|
||||
hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
|
||||
disable_msix:
|
||||
goya_disable_msix(hdev);
|
||||
disable_queues:
|
||||
goya_disable_internal_queues(hdev);
|
||||
goya_disable_external_queues(hdev);
|
||||
|
@ -2629,7 +2650,6 @@ static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
|
|||
void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
|
||||
{
|
||||
u32 db_reg_offset, db_value;
|
||||
bool invalid_queue = false;
|
||||
|
||||
switch (hw_queue_id) {
|
||||
case GOYA_QUEUE_ID_DMA_0:
|
||||
|
@ -2653,10 +2673,7 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
|
|||
break;
|
||||
|
||||
case GOYA_QUEUE_ID_CPU_PQ:
|
||||
if (hdev->cpu_queues_enable)
|
||||
db_reg_offset = mmCPU_IF_PF_PQ_PI;
|
||||
else
|
||||
invalid_queue = true;
|
||||
break;
|
||||
|
||||
case GOYA_QUEUE_ID_MME:
|
||||
|
@ -2696,12 +2713,8 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
|
|||
break;
|
||||
|
||||
default:
|
||||
invalid_queue = true;
|
||||
}
|
||||
|
||||
if (invalid_queue) {
|
||||
/* Should never get here */
|
||||
dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
|
||||
dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
|
||||
hw_queue_id);
|
||||
return;
|
||||
}
|
||||
|
@ -2808,7 +2821,6 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
|
|||
dma_addr_t fence_dma_addr;
|
||||
struct hl_cb *cb;
|
||||
u32 tmp, timeout;
|
||||
char buf[16] = {};
|
||||
int rc;
|
||||
|
||||
if (hdev->pldm)
|
||||
|
@ -2816,10 +2828,9 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
|
|||
else
|
||||
timeout = HL_DEVICE_TIMEOUT_USEC;
|
||||
|
||||
if (!hdev->asic_funcs->is_device_idle(hdev, buf, sizeof(buf))) {
|
||||
if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"Can't send KMD job on QMAN0 because %s is busy\n",
|
||||
buf);
|
||||
"Can't send KMD job on QMAN0 because the device is not idle\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
|
@ -2831,16 +2842,8 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
*fence_ptr = 0;
|
||||
|
||||
goya_qman0_set_security(hdev, true);
|
||||
|
||||
/*
|
||||
* goya cs parser saves space for 2xpacket_msg_prot at end of CB. For
|
||||
* synchronized kernel jobs we only need space for 1 packet_msg_prot
|
||||
*/
|
||||
job->job_cb_size -= sizeof(struct packet_msg_prot);
|
||||
|
||||
cb = job->patched_cb;
|
||||
|
||||
fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
|
||||
|
@ -2860,14 +2863,14 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
|
|||
goto free_fence_ptr;
|
||||
}
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr, timeout,
|
||||
&tmp);
|
||||
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
|
||||
(tmp == GOYA_QMAN0_FENCE_VAL), 1000, timeout);
|
||||
|
||||
hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
|
||||
|
||||
if ((rc) || (tmp != GOYA_QMAN0_FENCE_VAL)) {
|
||||
dev_err(hdev->dev, "QMAN0 Job hasn't finished in time\n");
|
||||
rc = -ETIMEDOUT;
|
||||
if (rc == -ETIMEDOUT) {
|
||||
dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
|
||||
goto free_fence_ptr;
|
||||
}
|
||||
|
||||
free_fence_ptr:
|
||||
|
@ -2941,20 +2944,19 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
|
|||
goto free_pkt;
|
||||
}
|
||||
|
||||
rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr,
|
||||
GOYA_TEST_QUEUE_WAIT_USEC, &tmp);
|
||||
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
|
||||
1000, GOYA_TEST_QUEUE_WAIT_USEC);
|
||||
|
||||
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
|
||||
|
||||
if ((!rc) && (tmp == fence_val)) {
|
||||
dev_info(hdev->dev,
|
||||
"queue test on H/W queue %d succeeded\n",
|
||||
hw_queue_id);
|
||||
} else {
|
||||
if (rc == -ETIMEDOUT) {
|
||||
dev_err(hdev->dev,
|
||||
"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
|
||||
hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
|
||||
rc = -EINVAL;
|
||||
rc = -EIO;
|
||||
} else {
|
||||
dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n",
|
||||
hw_queue_id);
|
||||
}
|
||||
|
||||
free_pkt:
|
||||
|
@ -2990,12 +2992,6 @@ int goya_test_queues(struct hl_device *hdev)
|
|||
ret_val = -EINVAL;
|
||||
}
|
||||
|
||||
if (hdev->cpu_queues_enable) {
|
||||
rc = goya_test_cpu_queue(hdev);
|
||||
if (rc)
|
||||
ret_val = -EINVAL;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
|
@ -3028,7 +3024,13 @@ static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
|
|||
void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
|
||||
dma_addr_t *dma_handle)
|
||||
{
|
||||
return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
|
||||
void *vaddr;
|
||||
|
||||
vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
|
||||
*dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR;
|
||||
|
||||
return vaddr;
|
||||
}
|
||||
|
||||
void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
|
||||
|
@ -3907,8 +3909,8 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
|
|||
return goya_parse_cb_no_mmu(hdev, parser);
|
||||
}
|
||||
|
||||
void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
|
||||
u32 cq_val, u32 msix_vec)
|
||||
void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
|
||||
u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec)
|
||||
{
|
||||
struct packet_msg_prot *cq_pkt;
|
||||
u32 tmp;
|
||||
|
@ -3938,6 +3940,11 @@ void goya_update_eq_ci(struct hl_device *hdev, u32 val)
|
|||
}
|
||||
|
||||
void goya_restore_phase_topology(struct hl_device *hdev)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static void goya_clear_sm_regs(struct hl_device *hdev)
|
||||
{
|
||||
int i, num_of_sob_in_longs, num_of_mon_in_longs;
|
||||
|
||||
|
@ -3958,10 +3965,11 @@ void goya_restore_phase_topology(struct hl_device *hdev)
|
|||
}
|
||||
|
||||
/*
|
||||
* goya_debugfs_read32 - read a 32bit value from a given device address
|
||||
* goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
|
||||
* address.
|
||||
*
|
||||
* @hdev: pointer to hl_device structure
|
||||
* @addr: address in device
|
||||
* @addr: device or host mapped address
|
||||
* @val: returned value
|
||||
*
|
||||
* In case of DDR address that is not mapped into the default aperture that
|
||||
|
@ -4002,6 +4010,10 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
|
|||
}
|
||||
if (ddr_bar_addr == U64_MAX)
|
||||
rc = -EIO;
|
||||
|
||||
} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
|
||||
*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
|
||||
|
||||
} else {
|
||||
rc = -EFAULT;
|
||||
}
|
||||
|
@ -4010,10 +4022,11 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
|
|||
}
|
||||
|
||||
/*
|
||||
* goya_debugfs_write32 - write a 32bit value to a given device address
|
||||
* goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
|
||||
* address.
|
||||
*
|
||||
* @hdev: pointer to hl_device structure
|
||||
* @addr: address in device
|
||||
* @addr: device or host mapped address
|
||||
* @val: returned value
|
||||
*
|
||||
* In case of DDR address that is not mapped into the default aperture that
|
||||
|
@ -4054,6 +4067,10 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
|
|||
}
|
||||
if (ddr_bar_addr == U64_MAX)
|
||||
rc = -EIO;
|
||||
|
||||
} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
|
||||
*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
|
||||
|
||||
} else {
|
||||
rc = -EFAULT;
|
||||
}
|
||||
|
@ -4086,6 +4103,47 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
|
|||
static const char *_goya_get_event_desc(u16 event_type)
|
||||
{
|
||||
switch (event_type) {
|
||||
case GOYA_ASYNC_EVENT_ID_PCIE_IF:
|
||||
return "PCIe_if";
|
||||
case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
|
||||
return "TPC%d_ecc";
|
||||
case GOYA_ASYNC_EVENT_ID_MME_ECC:
|
||||
return "MME_ecc";
|
||||
case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
|
||||
return "MME_ecc_ext";
|
||||
case GOYA_ASYNC_EVENT_ID_MMU_ECC:
|
||||
return "MMU_ecc";
|
||||
case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
|
||||
return "DMA_macro";
|
||||
case GOYA_ASYNC_EVENT_ID_DMA_ECC:
|
||||
return "DMA_ecc";
|
||||
case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
|
||||
return "CPU_if_ecc";
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
|
||||
return "PSOC_mem";
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
|
||||
return "PSOC_coresight";
|
||||
case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
|
||||
return "SRAM%d";
|
||||
case GOYA_ASYNC_EVENT_ID_GIC500:
|
||||
return "GIC500";
|
||||
case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
|
||||
return "PLL%d";
|
||||
case GOYA_ASYNC_EVENT_ID_AXI_ECC:
|
||||
return "AXI_ecc";
|
||||
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
|
||||
return "L2_ram_ecc";
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
|
||||
return "PSOC_gpio_05_sw_reset";
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
|
||||
return "PSOC_gpio_10_vrhot_icrit";
|
||||
case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
|
||||
return "PCIe_dec";
|
||||
case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
|
||||
|
@ -4128,6 +4186,17 @@ static const char *_goya_get_event_desc(u16 event_type)
|
|||
return "DMA%d_qm";
|
||||
case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
|
||||
return "DMA%d_ch";
|
||||
case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
|
||||
return "TPC%d_bmon_spmu";
|
||||
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
|
||||
return "DMA_bm_ch%d";
|
||||
default:
|
||||
return "N/A";
|
||||
}
|
||||
|
@ -4138,6 +4207,25 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
|
|||
u8 index;
|
||||
|
||||
switch (event_type) {
|
||||
case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
|
||||
index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
|
||||
snprintf(desc, size, _goya_get_event_desc(event_type), index);
|
||||
break;
|
||||
case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
|
||||
index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
|
||||
snprintf(desc, size, _goya_get_event_desc(event_type), index);
|
||||
break;
|
||||
case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
|
||||
index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
|
||||
snprintf(desc, size, _goya_get_event_desc(event_type), index);
|
||||
break;
|
||||
case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
|
||||
|
@ -4176,6 +4264,21 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
|
|||
index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
|
||||
snprintf(desc, size, _goya_get_event_desc(event_type), index);
|
||||
break;
|
||||
case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
|
||||
index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
|
||||
snprintf(desc, size, _goya_get_event_desc(event_type), index);
|
||||
break;
|
||||
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
|
||||
index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
|
||||
snprintf(desc, size, _goya_get_event_desc(event_type), index);
|
||||
break;
|
||||
default:
|
||||
snprintf(desc, size, _goya_get_event_desc(event_type));
|
||||
break;
|
||||
|
@ -4226,7 +4329,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev)
|
|||
}
|
||||
}
|
||||
|
||||
static void goya_print_irq_info(struct hl_device *hdev, u16 event_type)
|
||||
static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
|
||||
bool razwi)
|
||||
{
|
||||
char desc[20] = "";
|
||||
|
||||
|
@ -4234,8 +4338,10 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type)
|
|||
dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
|
||||
event_type, desc);
|
||||
|
||||
if (razwi) {
|
||||
goya_print_razwi_info(hdev);
|
||||
goya_print_mmu_error_info(hdev);
|
||||
}
|
||||
}
|
||||
|
||||
static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
|
||||
|
@ -4339,19 +4445,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
|||
case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
|
||||
case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
|
||||
case GOYA_ASYNC_EVENT_ID_GIC500:
|
||||
case GOYA_ASYNC_EVENT_ID_PLL0:
|
||||
case GOYA_ASYNC_EVENT_ID_PLL1:
|
||||
case GOYA_ASYNC_EVENT_ID_PLL3:
|
||||
case GOYA_ASYNC_EVENT_ID_PLL4:
|
||||
case GOYA_ASYNC_EVENT_ID_PLL5:
|
||||
case GOYA_ASYNC_EVENT_ID_PLL6:
|
||||
case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
|
||||
case GOYA_ASYNC_EVENT_ID_AXI_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
|
||||
dev_err(hdev->dev,
|
||||
"Received H/W interrupt %d, reset the chip\n",
|
||||
event_type);
|
||||
goya_print_irq_info(hdev, event_type, false);
|
||||
hl_device_reset(hdev, true, false);
|
||||
break;
|
||||
|
||||
|
@ -4382,7 +4481,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
|||
case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
|
||||
case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
|
||||
case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
|
||||
goya_print_irq_info(hdev, event_type);
|
||||
goya_print_irq_info(hdev, event_type, true);
|
||||
goya_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
|
||||
|
@ -4394,12 +4493,9 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
|||
case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
|
||||
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0:
|
||||
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH1:
|
||||
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH2:
|
||||
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH3:
|
||||
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
|
||||
dev_info(hdev->dev, "Received H/W interrupt %d\n", event_type);
|
||||
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
|
||||
goya_print_irq_info(hdev, event_type, false);
|
||||
goya_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -4418,23 +4514,26 @@ void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
|
|||
return goya->events_stat;
|
||||
}
|
||||
|
||||
static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
|
||||
static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
|
||||
u64 val, bool is_dram)
|
||||
{
|
||||
struct packet_lin_dma *lin_dma_pkt;
|
||||
struct hl_cs_job *job;
|
||||
u32 cb_size, ctl;
|
||||
struct hl_cb *cb;
|
||||
int rc;
|
||||
int rc, lin_dma_pkts_cnt;
|
||||
|
||||
cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
|
||||
lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
|
||||
cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
|
||||
sizeof(struct packet_msg_prot);
|
||||
cb = hl_cb_kernel_create(hdev, cb_size);
|
||||
if (!cb)
|
||||
return -EFAULT;
|
||||
return -ENOMEM;
|
||||
|
||||
lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
|
||||
|
||||
do {
|
||||
memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
|
||||
cb_size = sizeof(*lin_dma_pkt);
|
||||
|
||||
ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
|
||||
(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
|
||||
|
@ -4447,8 +4546,16 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
|
|||
|
||||
lin_dma_pkt->src_addr = cpu_to_le64(val);
|
||||
lin_dma_pkt->dst_addr = cpu_to_le64(addr);
|
||||
if (lin_dma_pkts_cnt > 1)
|
||||
lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
|
||||
else
|
||||
lin_dma_pkt->tsize = cpu_to_le32(size);
|
||||
|
||||
size -= SZ_2G;
|
||||
addr += SZ_2G;
|
||||
lin_dma_pkt++;
|
||||
} while (--lin_dma_pkts_cnt);
|
||||
|
||||
job = hl_cs_allocate_job(hdev, true);
|
||||
if (!job) {
|
||||
dev_err(hdev->dev, "Failed to allocate a new job\n");
|
||||
|
@ -4462,8 +4569,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
|
|||
job->user_cb_size = cb_size;
|
||||
job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
|
||||
job->patched_cb = job->user_cb;
|
||||
job->job_cb_size = job->user_cb_size +
|
||||
sizeof(struct packet_msg_prot) * 2;
|
||||
job->job_cb_size = job->user_cb_size;
|
||||
|
||||
hl_debugfs_add_job(hdev, job);
|
||||
|
||||
|
@ -4485,10 +4591,12 @@ release_cb:
|
|||
int goya_context_switch(struct hl_device *hdev, u32 asid)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u64 addr = prop->sram_base_address;
|
||||
u64 addr = prop->sram_base_address, sob_addr;
|
||||
u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
|
||||
u64 val = 0x7777777777777777ull;
|
||||
int rc;
|
||||
int rc, dma_id;
|
||||
u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
|
||||
mmDMA_CH_0_WR_COMP_ADDR_LO;
|
||||
|
||||
rc = goya_memset_device_memory(hdev, addr, size, val, false);
|
||||
if (rc) {
|
||||
|
@ -4496,13 +4604,27 @@ int goya_context_switch(struct hl_device *hdev, u32 asid)
|
|||
return rc;
|
||||
}
|
||||
|
||||
/* we need to reset registers that the user is allowed to change */
|
||||
sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
|
||||
WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
|
||||
|
||||
for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
|
||||
sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
|
||||
(dma_id - 1) * 4;
|
||||
WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
|
||||
lower_32_bits(sob_addr));
|
||||
}
|
||||
|
||||
WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
|
||||
|
||||
goya_mmu_prepare(hdev, asid);
|
||||
|
||||
goya_clear_sm_regs(hdev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int goya_mmu_clear_pgt_range(struct hl_device *hdev)
|
||||
static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
|
@ -4516,7 +4638,7 @@ int goya_mmu_clear_pgt_range(struct hl_device *hdev)
|
|||
return goya_memset_device_memory(hdev, addr, size, 0, true);
|
||||
}
|
||||
|
||||
int goya_mmu_set_dram_default_page(struct hl_device *hdev)
|
||||
static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
|
||||
{
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
|
||||
|
@ -4529,7 +4651,123 @@ int goya_mmu_set_dram_default_page(struct hl_device *hdev)
|
|||
return goya_memset_device_memory(hdev, addr, size, val, true);
|
||||
}
|
||||
|
||||
void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
|
||||
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
s64 off, cpu_off;
|
||||
int rc;
|
||||
|
||||
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
|
||||
return 0;
|
||||
|
||||
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
|
||||
rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Map failed for address 0x%llx\n",
|
||||
prop->dram_base_address + off);
|
||||
goto unmap;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
|
||||
rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
|
||||
hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
|
||||
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Map failed for CPU accessible memory\n");
|
||||
off -= PAGE_SIZE_2MB;
|
||||
goto unmap;
|
||||
}
|
||||
} else {
|
||||
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
|
||||
rc = hl_mmu_map(hdev->kernel_ctx,
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||
hdev->cpu_accessible_dma_address + cpu_off,
|
||||
PAGE_SIZE_4KB);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev,
|
||||
"Map failed for CPU accessible memory\n");
|
||||
cpu_off -= PAGE_SIZE_4KB;
|
||||
goto unmap_cpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
|
||||
goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
|
||||
WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
|
||||
WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
|
||||
|
||||
/* Make sure configuration is flushed to device */
|
||||
RREG32(mmCPU_IF_AWUSER_OVR_EN);
|
||||
|
||||
goya->device_cpu_mmu_mappings_done = true;
|
||||
|
||||
return 0;
|
||||
|
||||
unmap_cpu:
|
||||
for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||
PAGE_SIZE_4KB))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap address 0x%llx\n",
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
|
||||
unmap:
|
||||
for (; off >= 0 ; off -= PAGE_SIZE_2MB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap address 0x%llx\n",
|
||||
prop->dram_base_address + off);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
|
||||
{
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
u32 off, cpu_off;
|
||||
|
||||
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
|
||||
return;
|
||||
|
||||
if (!goya->device_cpu_mmu_mappings_done)
|
||||
return;
|
||||
|
||||
WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
|
||||
WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
|
||||
|
||||
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
|
||||
PAGE_SIZE_2MB))
|
||||
dev_warn(hdev->dev,
|
||||
"Failed to unmap CPU accessible memory\n");
|
||||
} else {
|
||||
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
|
||||
PAGE_SIZE_4KB))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"failed to unmap address 0x%llx\n",
|
||||
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
|
||||
}
|
||||
|
||||
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
|
||||
if (hl_mmu_unmap(hdev->kernel_ctx,
|
||||
prop->dram_base_address + off, PAGE_SIZE_2MB))
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"Failed to unmap address 0x%llx\n",
|
||||
prop->dram_base_address + off);
|
||||
|
||||
goya->device_cpu_mmu_mappings_done = false;
|
||||
}
|
||||
|
||||
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
|
||||
{
|
||||
struct goya_device *goya = hdev->asic_specific;
|
||||
int i;
|
||||
|
@ -4676,57 +4914,82 @@ int goya_armcp_info_get(struct hl_device *hdev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size)
|
||||
static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
|
||||
struct seq_file *s)
|
||||
{
|
||||
u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
|
||||
const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
|
||||
const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
|
||||
u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
|
||||
mme_arch_sts;
|
||||
bool is_idle = true, is_eng_idle;
|
||||
u64 offset;
|
||||
int i;
|
||||
|
||||
if (s)
|
||||
seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
|
||||
"--- ------- ------------ -------------\n");
|
||||
|
||||
offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
|
||||
|
||||
for (i = 0 ; i < DMA_MAX_NUM ; i++) {
|
||||
dma_qm_reg = mmDMA_QM_0_GLBL_STS0 + i * offset;
|
||||
qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
|
||||
dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
|
||||
is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
|
||||
IS_DMA_IDLE(dma_core_sts0);
|
||||
is_idle &= is_eng_idle;
|
||||
|
||||
if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
|
||||
DMA_QM_IDLE_MASK)
|
||||
return HL_ENG_BUSY(buf, size, "DMA%d_QM", i);
|
||||
if (mask)
|
||||
*mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i);
|
||||
if (s)
|
||||
seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
|
||||
qm_glbl_sts0, dma_core_sts0);
|
||||
}
|
||||
|
||||
if (s)
|
||||
seq_puts(s,
|
||||
"\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
|
||||
"--- ------- ------------ -------------- ----------\n");
|
||||
|
||||
offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
|
||||
|
||||
for (i = 0 ; i < TPC_MAX_NUM ; i++) {
|
||||
tpc_qm_reg = mmTPC0_QM_GLBL_STS0 + i * offset;
|
||||
tpc_cmdq_reg = mmTPC0_CMDQ_GLBL_STS0 + i * offset;
|
||||
tpc_cfg_reg = mmTPC0_CFG_STATUS + i * offset;
|
||||
qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
|
||||
cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
|
||||
tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
|
||||
is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
|
||||
IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
|
||||
IS_TPC_IDLE(tpc_cfg_sts);
|
||||
is_idle &= is_eng_idle;
|
||||
|
||||
if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
|
||||
TPC_QM_IDLE_MASK)
|
||||
return HL_ENG_BUSY(buf, size, "TPC%d_QM", i);
|
||||
|
||||
if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
|
||||
TPC_CMDQ_IDLE_MASK)
|
||||
return HL_ENG_BUSY(buf, size, "TPC%d_CMDQ", i);
|
||||
|
||||
if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
|
||||
TPC_CFG_IDLE_MASK)
|
||||
return HL_ENG_BUSY(buf, size, "TPC%d_CFG", i);
|
||||
if (mask)
|
||||
*mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i);
|
||||
if (s)
|
||||
seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
|
||||
qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
|
||||
}
|
||||
|
||||
if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
|
||||
MME_QM_IDLE_MASK)
|
||||
return HL_ENG_BUSY(buf, size, "MME_QM");
|
||||
if (s)
|
||||
seq_puts(s,
|
||||
"\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
|
||||
"--- ------- ------------ -------------- -----------\n");
|
||||
|
||||
if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
|
||||
MME_CMDQ_IDLE_MASK)
|
||||
return HL_ENG_BUSY(buf, size, "MME_CMDQ");
|
||||
qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
|
||||
cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
|
||||
mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
|
||||
is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
|
||||
IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
|
||||
IS_MME_IDLE(mme_arch_sts);
|
||||
is_idle &= is_eng_idle;
|
||||
|
||||
if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
|
||||
MME_ARCH_IDLE_MASK)
|
||||
return HL_ENG_BUSY(buf, size, "MME_ARCH");
|
||||
if (mask)
|
||||
*mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0;
|
||||
if (s) {
|
||||
seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
|
||||
cmdq_glbl_sts0, mme_arch_sts);
|
||||
seq_puts(s, "\n");
|
||||
}
|
||||
|
||||
if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
|
||||
return HL_ENG_BUSY(buf, size, "MME");
|
||||
|
||||
return true;
|
||||
return is_idle;
|
||||
}
|
||||
|
||||
static void goya_hw_queues_lock(struct hl_device *hdev)
|
||||
|
|
|
@ -126,6 +126,12 @@
|
|||
#define VA_DDR_SPACE_SIZE (VA_DDR_SPACE_END - \
|
||||
VA_DDR_SPACE_START) /* 128GB */
|
||||
|
||||
#if (HL_CPU_ACCESSIBLE_MEM_SIZE != SZ_2M)
|
||||
#error "HL_CPU_ACCESSIBLE_MEM_SIZE must be exactly 2MB to enable MMU mapping"
|
||||
#endif
|
||||
|
||||
#define VA_CPU_ACCESSIBLE_MEM_ADDR 0x8000000000ull
|
||||
|
||||
#define DMA_MAX_TRANSFER_SIZE U32_MAX
|
||||
|
||||
#define HW_CAP_PLL 0x00000001
|
||||
|
@ -157,6 +163,7 @@ struct goya_device {
|
|||
u64 ddr_bar_cur_addr;
|
||||
u32 events_stat[GOYA_ASYNC_EVENT_ID_SIZE];
|
||||
u32 hw_cap_initialized;
|
||||
u8 device_cpu_mmu_mappings_done;
|
||||
};
|
||||
|
||||
void goya_get_fixed_properties(struct hl_device *hdev);
|
||||
|
@ -204,18 +211,14 @@ int goya_armcp_info_get(struct hl_device *hdev);
|
|||
int goya_debug_coresight(struct hl_device *hdev, void *data);
|
||||
void goya_halt_coresight(struct hl_device *hdev);
|
||||
|
||||
void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
|
||||
int goya_mmu_clear_pgt_range(struct hl_device *hdev);
|
||||
int goya_mmu_set_dram_default_page(struct hl_device *hdev);
|
||||
|
||||
int goya_suspend(struct hl_device *hdev);
|
||||
int goya_resume(struct hl_device *hdev);
|
||||
|
||||
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
|
||||
void *goya_get_events_stat(struct hl_device *hdev, u32 *size);
|
||||
|
||||
void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
|
||||
u32 cq_val, u32 msix_vec);
|
||||
void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
|
||||
u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec);
|
||||
int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser);
|
||||
void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
|
||||
dma_addr_t *dma_handle, u16 *queue_len);
|
||||
|
@ -225,5 +228,6 @@ void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
|
|||
dma_addr_t *dma_handle);
|
||||
void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
|
||||
void *vaddr);
|
||||
void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);
|
||||
|
||||
#endif /* GOYAP_H_ */
|
||||
|
|
|
@ -677,6 +677,17 @@ static void goya_init_tpc_protection_bits(struct hl_device *hdev)
|
|||
goya_pb_set_block(hdev, mmTPC0_RD_REGULATOR_BASE);
|
||||
goya_pb_set_block(hdev, mmTPC0_WR_REGULATOR_BASE);
|
||||
|
||||
pb_addr = (mmTPC0_CFG_SEMAPHORE & ~0xFFF) + PROT_BITS_OFFS;
|
||||
word_offset = ((mmTPC0_CFG_SEMAPHORE & PROT_BITS_OFFS) >> 7) << 2;
|
||||
|
||||
mask = 1 << ((mmTPC0_CFG_SEMAPHORE & 0x7F) >> 2);
|
||||
mask |= 1 << ((mmTPC0_CFG_VFLAGS & 0x7F) >> 2);
|
||||
mask |= 1 << ((mmTPC0_CFG_SFLAGS & 0x7F) >> 2);
|
||||
mask |= 1 << ((mmTPC0_CFG_LFSR_POLYNOM & 0x7F) >> 2);
|
||||
mask |= 1 << ((mmTPC0_CFG_STATUS & 0x7F) >> 2);
|
||||
|
||||
WREG32(pb_addr + word_offset, ~mask);
|
||||
|
||||
pb_addr = (mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & ~0xFFF) + PROT_BITS_OFFS;
|
||||
word_offset = ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH &
|
||||
PROT_BITS_OFFS) >> 7) << 2;
|
||||
|
@ -684,6 +695,11 @@ static void goya_init_tpc_protection_bits(struct hl_device *hdev)
|
|||
mask |= 1 << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||
mask |= 1 << ((mmTPC0_CFG_SM_BASE_ADDRESS_LOW & 0x7F) >> 2);
|
||||
mask |= 1 << ((mmTPC0_CFG_SM_BASE_ADDRESS_HIGH & 0x7F) >> 2);
|
||||
mask |= 1 << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
|
||||
mask |= 1 << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2);
|
||||
mask |= 1 << ((mmTPC0_CFG_MSS_CONFIG & 0x7F) >> 2);
|
||||
mask |= 1 << ((mmTPC0_CFG_TPC_INTR_CAUSE & 0x7F) >> 2);
|
||||
mask |= 1 << ((mmTPC0_CFG_TPC_INTR_MASK & 0x7F) >> 2);
|
||||
|
||||
WREG32(pb_addr + word_offset, ~mask);
|
||||
|
||||
|
|
|
@ -34,6 +34,8 @@
|
|||
#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
|
||||
#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
|
||||
|
||||
#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
|
||||
|
||||
#define HL_MAX_QUEUES 128
|
||||
|
||||
#define HL_MAX_JOBS_PER_CS 64
|
||||
|
@ -123,7 +125,7 @@ enum hl_device_hw_state {
|
|||
/**
|
||||
* struct asic_fixed_properties - ASIC specific immutable properties.
|
||||
* @hw_queues_props: H/W queues properties.
|
||||
* @armcp_info: received various information from ArmCP regarding the H/W. e.g.
|
||||
* @armcp_info: received various information from ArmCP regarding the H/W, e.g.
|
||||
* available sensors.
|
||||
* @uboot_ver: F/W U-boot version.
|
||||
* @preboot_ver: F/W Preboot version.
|
||||
|
@ -318,18 +320,8 @@ struct hl_cs_job;
|
|||
#define HL_EQ_LENGTH 64
|
||||
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
|
||||
|
||||
#define HL_CPU_PKT_SHIFT 5
|
||||
#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT)
|
||||
#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1))
|
||||
#define HL_CPU_MAX_PKTS_IN_CB 32
|
||||
#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \
|
||||
HL_CPU_MAX_PKTS_IN_CB)
|
||||
#define HL_CPU_CB_QUEUE_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE)
|
||||
|
||||
/* KMD <-> ArmCP shared memory size (EQ + PQ + CPU CB queue) */
|
||||
#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_EQ_SIZE_IN_BYTES + \
|
||||
HL_QUEUE_SIZE_IN_BYTES + \
|
||||
HL_CPU_CB_QUEUE_SIZE)
|
||||
/* KMD <-> ArmCP shared memory size */
|
||||
#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M
|
||||
|
||||
/**
|
||||
* struct hl_hw_queue - describes a H/W transport queue.
|
||||
|
@ -543,8 +535,9 @@ struct hl_asic_funcs {
|
|||
enum dma_data_direction dir);
|
||||
u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
|
||||
struct sg_table *sgt);
|
||||
void (*add_end_of_cb_packets)(u64 kernel_address, u32 len, u64 cq_addr,
|
||||
u32 cq_val, u32 msix_num);
|
||||
void (*add_end_of_cb_packets)(struct hl_device *hdev,
|
||||
u64 kernel_address, u32 len,
|
||||
u64 cq_addr, u32 cq_val, u32 msix_num);
|
||||
void (*update_eq_ci)(struct hl_device *hdev, u32 val);
|
||||
int (*context_switch)(struct hl_device *hdev, u32 asid);
|
||||
void (*restore_phase_topology)(struct hl_device *hdev);
|
||||
|
@ -564,7 +557,8 @@ struct hl_asic_funcs {
|
|||
u32 asid, u64 va, u64 size);
|
||||
int (*send_heartbeat)(struct hl_device *hdev);
|
||||
int (*debug_coresight)(struct hl_device *hdev, void *data);
|
||||
bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size);
|
||||
bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
|
||||
struct seq_file *s);
|
||||
int (*soft_reset_late_init)(struct hl_device *hdev);
|
||||
void (*hw_queues_lock)(struct hl_device *hdev);
|
||||
void (*hw_queues_unlock)(struct hl_device *hdev);
|
||||
|
@ -1065,12 +1059,59 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
|
|||
(cond) ? 0 : -ETIMEDOUT; \
|
||||
})
|
||||
|
||||
/*
|
||||
* address in this macro points always to a memory location in the
|
||||
* host's (server's) memory. That location is updated asynchronously
|
||||
* either by the direct access of the device or by another core
|
||||
*/
|
||||
#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us) \
|
||||
({ \
|
||||
ktime_t __timeout; \
|
||||
/* timeout should be longer when working with simulator */ \
|
||||
if (hdev->pdev) \
|
||||
__timeout = ktime_add_us(ktime_get(), timeout_us); \
|
||||
else \
|
||||
__timeout = ktime_add_us(ktime_get(), (timeout_us * 10)); \
|
||||
might_sleep_if(sleep_us); \
|
||||
for (;;) { \
|
||||
/* Verify we read updates done by other cores or by device */ \
|
||||
mb(); \
|
||||
(val) = *((u32 *) (uintptr_t) (addr)); \
|
||||
if (cond) \
|
||||
break; \
|
||||
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
|
||||
(val) = *((u32 *) (uintptr_t) (addr)); \
|
||||
break; \
|
||||
} \
|
||||
if (sleep_us) \
|
||||
usleep_range((sleep_us >> 2) + 1, sleep_us); \
|
||||
} \
|
||||
(cond) ? 0 : -ETIMEDOUT; \
|
||||
})
|
||||
|
||||
#define HL_ENG_BUSY(buf, size, fmt, ...) ({ \
|
||||
if (buf) \
|
||||
snprintf(buf, size, fmt, ##__VA_ARGS__); \
|
||||
false; \
|
||||
})
|
||||
#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
|
||||
timeout_us) \
|
||||
({ \
|
||||
ktime_t __timeout; \
|
||||
/* timeout should be longer when working with simulator */ \
|
||||
if (hdev->pdev) \
|
||||
__timeout = ktime_add_us(ktime_get(), timeout_us); \
|
||||
else \
|
||||
__timeout = ktime_add_us(ktime_get(), (timeout_us * 10)); \
|
||||
might_sleep_if(sleep_us); \
|
||||
for (;;) { \
|
||||
(val) = readl(addr); \
|
||||
if (cond) \
|
||||
break; \
|
||||
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
|
||||
(val) = readl(addr); \
|
||||
break; \
|
||||
} \
|
||||
if (sleep_us) \
|
||||
usleep_range((sleep_us >> 2) + 1, sleep_us); \
|
||||
} \
|
||||
(cond) ? 0 : -ETIMEDOUT; \
|
||||
})
|
||||
|
||||
struct hwmon_chip_info;
|
||||
|
||||
|
@ -1117,6 +1158,7 @@ struct hl_device_reset_work {
|
|||
* lock here so we can flush user processes which are opening
|
||||
* the device while we are trying to hard reset it
|
||||
* @send_cpu_message_lock: enforces only one message in KMD <-> ArmCP queue.
|
||||
* @debug_lock: protects critical section of setting debug mode for device
|
||||
* @asic_prop: ASIC specific immutable properties.
|
||||
* @asic_funcs: ASIC specific functions.
|
||||
* @asic_specific: ASIC specific information to use only from ASIC files.
|
||||
|
@ -1159,6 +1201,8 @@ struct hl_device_reset_work {
|
|||
* @mmu_enable: is MMU enabled.
|
||||
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
|
||||
* @dma_mask: the dma mask that was set for this device
|
||||
* @in_debug: is device under debug. This, together with fd_open_cnt, enforces
|
||||
* that only a single user is configuring the debug infrastructure.
|
||||
*/
|
||||
struct hl_device {
|
||||
struct pci_dev *pdev;
|
||||
|
@ -1188,6 +1232,7 @@ struct hl_device {
|
|||
/* TODO: remove fd_open_cnt_lock for multiple process support */
|
||||
struct mutex fd_open_cnt_lock;
|
||||
struct mutex send_cpu_message_lock;
|
||||
struct mutex debug_lock;
|
||||
struct asic_fixed_properties asic_prop;
|
||||
const struct hl_asic_funcs *asic_funcs;
|
||||
void *asic_specific;
|
||||
|
@ -1230,6 +1275,7 @@ struct hl_device {
|
|||
u8 init_done;
|
||||
u8 device_cpu_disabled;
|
||||
u8 dma_mask;
|
||||
u8 in_debug;
|
||||
|
||||
/* Parameters for bring-up */
|
||||
u8 mmu_enable;
|
||||
|
@ -1325,13 +1371,10 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
|
|||
int hl_device_open(struct inode *inode, struct file *filp);
|
||||
bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
|
||||
enum hl_device_status hl_device_status(struct hl_device *hdev);
|
||||
int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
|
||||
int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
||||
enum hl_asic_type asic_type, int minor);
|
||||
void destroy_hdev(struct hl_device *hdev);
|
||||
int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr, u32 timeout_us,
|
||||
u32 *val);
|
||||
int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
|
||||
u32 timeout_us, u32 *val);
|
||||
int hl_hw_queues_create(struct hl_device *hdev);
|
||||
void hl_hw_queues_destroy(struct hl_device *hdev);
|
||||
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
|
||||
|
|
|
@ -105,9 +105,17 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
|||
return -EPERM;
|
||||
}
|
||||
|
||||
if (hdev->in_debug) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"Can't open %s because it is being debugged by another user\n",
|
||||
dev_name(hdev->dev));
|
||||
mutex_unlock(&hdev->fd_open_cnt_lock);
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
if (atomic_read(&hdev->fd_open_cnt)) {
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Device %s is already attached to application\n",
|
||||
"Can't open %s because another user is working on it\n",
|
||||
dev_name(hdev->dev));
|
||||
mutex_unlock(&hdev->fd_open_cnt_lock);
|
||||
return -EBUSY;
|
||||
|
@ -164,6 +172,17 @@ close_device:
|
|||
return rc;
|
||||
}
|
||||
|
||||
static void set_driver_behavior_per_device(struct hl_device *hdev)
|
||||
{
|
||||
hdev->mmu_enable = 1;
|
||||
hdev->cpu_enable = 1;
|
||||
hdev->fw_loading = 1;
|
||||
hdev->cpu_queues_enable = 1;
|
||||
hdev->heartbeat = 1;
|
||||
|
||||
hdev->reset_pcilink = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* create_hdev - create habanalabs device instance
|
||||
*
|
||||
|
@ -188,38 +207,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
|||
if (!hdev)
|
||||
return -ENOMEM;
|
||||
|
||||
hdev->major = hl_major;
|
||||
hdev->reset_on_lockup = reset_on_lockup;
|
||||
|
||||
/* Parameters for bring-up - set them to defaults */
|
||||
hdev->mmu_enable = 1;
|
||||
hdev->cpu_enable = 1;
|
||||
hdev->reset_pcilink = 0;
|
||||
hdev->cpu_queues_enable = 1;
|
||||
hdev->fw_loading = 1;
|
||||
hdev->pldm = 0;
|
||||
hdev->heartbeat = 1;
|
||||
|
||||
/* If CPU is disabled, no point in loading FW */
|
||||
if (!hdev->cpu_enable)
|
||||
hdev->fw_loading = 0;
|
||||
|
||||
/* If we don't load FW, no need to initialize CPU queues */
|
||||
if (!hdev->fw_loading)
|
||||
hdev->cpu_queues_enable = 0;
|
||||
|
||||
/* If CPU queues not enabled, no way to do heartbeat */
|
||||
if (!hdev->cpu_queues_enable)
|
||||
hdev->heartbeat = 0;
|
||||
|
||||
if (timeout_locked)
|
||||
hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
|
||||
else
|
||||
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
|
||||
|
||||
hdev->disabled = true;
|
||||
hdev->pdev = pdev; /* can be NULL in case of simulator device */
|
||||
|
||||
/* First, we must find out which ASIC are we handling. This is needed
|
||||
* to configure the behavior of the driver (kernel parameters)
|
||||
*/
|
||||
if (pdev) {
|
||||
hdev->asic_type = get_asic_type(pdev->device);
|
||||
if (hdev->asic_type == ASIC_INVALID) {
|
||||
|
@ -231,6 +221,20 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
|||
hdev->asic_type = asic_type;
|
||||
}
|
||||
|
||||
hdev->major = hl_major;
|
||||
hdev->reset_on_lockup = reset_on_lockup;
|
||||
hdev->pldm = 0;
|
||||
|
||||
set_driver_behavior_per_device(hdev);
|
||||
|
||||
if (timeout_locked)
|
||||
hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
|
||||
else
|
||||
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
|
||||
|
||||
hdev->disabled = true;
|
||||
hdev->pdev = pdev; /* can be NULL in case of simulator device */
|
||||
|
||||
/* Set default DMA mask to 32 bits */
|
||||
hdev->dma_mask = 32;
|
||||
|
||||
|
|
|
@ -119,7 +119,8 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
|
|||
if ((!max_size) || (!out))
|
||||
return -EINVAL;
|
||||
|
||||
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL, 0);
|
||||
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
|
||||
&hw_idle.busy_engines_mask, NULL);
|
||||
|
||||
return copy_to_user(out, &hw_idle,
|
||||
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
|
||||
|
@ -254,10 +255,18 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||
case HL_DEBUG_OP_BMON:
|
||||
case HL_DEBUG_OP_SPMU:
|
||||
case HL_DEBUG_OP_TIMESTAMP:
|
||||
if (!hdev->in_debug) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"Rejecting debug configuration request because device not in debug mode\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
args->input_size =
|
||||
min(args->input_size, hl_debug_struct_size[args->op]);
|
||||
rc = debug_coresight(hdev, args);
|
||||
break;
|
||||
case HL_DEBUG_OP_SET_MODE:
|
||||
rc = hl_device_set_debug_mode(hdev, (bool) args->enable);
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "Invalid request %d\n", args->op);
|
||||
rc = -ENOTTY;
|
||||
|
|
|
@ -265,7 +265,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
|
|||
cq = &hdev->completion_queue[q->hw_queue_id];
|
||||
cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
|
||||
|
||||
hdev->asic_funcs->add_end_of_cb_packets(cb->kernel_address, len,
|
||||
hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
|
||||
cq_addr,
|
||||
__le32_to_cpu(cq_pkt.data),
|
||||
q->hw_queue_id);
|
||||
|
|
|
@ -0,0 +1,418 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2016-2018 HabanaLabs, Ltd.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
/************************************
|
||||
** This is an auto-generated file **
|
||||
** DO NOT EDIT BELOW **
|
||||
************************************/
|
||||
|
||||
#ifndef ASIC_REG_DMA_CH_0_MASKS_H_
|
||||
#define ASIC_REG_DMA_CH_0_MASKS_H_
|
||||
|
||||
/*
|
||||
*****************************************
|
||||
* DMA_CH_0 (Prototype: DMA_CH)
|
||||
*****************************************
|
||||
*/
|
||||
|
||||
/* DMA_CH_0_CFG0 */
|
||||
#define DMA_CH_0_CFG0_RD_MAX_OUTSTAND_SHIFT 0
|
||||
#define DMA_CH_0_CFG0_RD_MAX_OUTSTAND_MASK 0x3FF
|
||||
#define DMA_CH_0_CFG0_WR_MAX_OUTSTAND_SHIFT 16
|
||||
#define DMA_CH_0_CFG0_WR_MAX_OUTSTAND_MASK 0xFFF0000
|
||||
|
||||
/* DMA_CH_0_CFG1 */
|
||||
#define DMA_CH_0_CFG1_RD_BUF_MAX_SIZE_SHIFT 0
|
||||
#define DMA_CH_0_CFG1_RD_BUF_MAX_SIZE_MASK 0x3FF
|
||||
|
||||
/* DMA_CH_0_ERRMSG_ADDR_LO */
|
||||
#define DMA_CH_0_ERRMSG_ADDR_LO_VAL_SHIFT 0
|
||||
#define DMA_CH_0_ERRMSG_ADDR_LO_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_ERRMSG_ADDR_HI */
|
||||
#define DMA_CH_0_ERRMSG_ADDR_HI_VAL_SHIFT 0
|
||||
#define DMA_CH_0_ERRMSG_ADDR_HI_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_ERRMSG_WDATA */
|
||||
#define DMA_CH_0_ERRMSG_WDATA_VAL_SHIFT 0
|
||||
#define DMA_CH_0_ERRMSG_WDATA_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_RD_COMP_ADDR_LO */
|
||||
#define DMA_CH_0_RD_COMP_ADDR_LO_VAL_SHIFT 0
|
||||
#define DMA_CH_0_RD_COMP_ADDR_LO_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_RD_COMP_ADDR_HI */
|
||||
#define DMA_CH_0_RD_COMP_ADDR_HI_VAL_SHIFT 0
|
||||
#define DMA_CH_0_RD_COMP_ADDR_HI_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_RD_COMP_WDATA */
|
||||
#define DMA_CH_0_RD_COMP_WDATA_VAL_SHIFT 0
|
||||
#define DMA_CH_0_RD_COMP_WDATA_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_WR_COMP_ADDR_LO */
|
||||
#define DMA_CH_0_WR_COMP_ADDR_LO_VAL_SHIFT 0
|
||||
#define DMA_CH_0_WR_COMP_ADDR_LO_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_WR_COMP_ADDR_HI */
|
||||
#define DMA_CH_0_WR_COMP_ADDR_HI_VAL_SHIFT 0
|
||||
#define DMA_CH_0_WR_COMP_ADDR_HI_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_WR_COMP_WDATA */
|
||||
#define DMA_CH_0_WR_COMP_WDATA_VAL_SHIFT 0
|
||||
#define DMA_CH_0_WR_COMP_WDATA_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_LDMA_SRC_ADDR_LO */
|
||||
#define DMA_CH_0_LDMA_SRC_ADDR_LO_VAL_SHIFT 0
|
||||
#define DMA_CH_0_LDMA_SRC_ADDR_LO_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_LDMA_SRC_ADDR_HI */
|
||||
#define DMA_CH_0_LDMA_SRC_ADDR_HI_VAL_SHIFT 0
|
||||
#define DMA_CH_0_LDMA_SRC_ADDR_HI_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_LDMA_DST_ADDR_LO */
|
||||
#define DMA_CH_0_LDMA_DST_ADDR_LO_VAL_SHIFT 0
|
||||
#define DMA_CH_0_LDMA_DST_ADDR_LO_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_LDMA_DST_ADDR_HI */
|
||||
#define DMA_CH_0_LDMA_DST_ADDR_HI_VAL_SHIFT 0
|
||||
#define DMA_CH_0_LDMA_DST_ADDR_HI_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_LDMA_TSIZE */
|
||||
#define DMA_CH_0_LDMA_TSIZE_VAL_SHIFT 0
|
||||
#define DMA_CH_0_LDMA_TSIZE_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_COMIT_TRANSFER */
|
||||
#define DMA_CH_0_COMIT_TRANSFER_PCI_UPS_WKORDR_SHIFT 0
|
||||
#define DMA_CH_0_COMIT_TRANSFER_PCI_UPS_WKORDR_MASK 0x1
|
||||
#define DMA_CH_0_COMIT_TRANSFER_RD_COMP_EN_SHIFT 1
|
||||
#define DMA_CH_0_COMIT_TRANSFER_RD_COMP_EN_MASK 0x2
|
||||
#define DMA_CH_0_COMIT_TRANSFER_WR_COMP_EN_SHIFT 2
|
||||
#define DMA_CH_0_COMIT_TRANSFER_WR_COMP_EN_MASK 0x4
|
||||
#define DMA_CH_0_COMIT_TRANSFER_NOSNOOP_SHIFT 3
|
||||
#define DMA_CH_0_COMIT_TRANSFER_NOSNOOP_MASK 0x8
|
||||
#define DMA_CH_0_COMIT_TRANSFER_SRC_ADDR_INC_DIS_SHIFT 4
|
||||
#define DMA_CH_0_COMIT_TRANSFER_SRC_ADDR_INC_DIS_MASK 0x10
|
||||
#define DMA_CH_0_COMIT_TRANSFER_DST_ADDR_INC_DIS_SHIFT 5
|
||||
#define DMA_CH_0_COMIT_TRANSFER_DST_ADDR_INC_DIS_MASK 0x20
|
||||
#define DMA_CH_0_COMIT_TRANSFER_MEM_SET_SHIFT 6
|
||||
#define DMA_CH_0_COMIT_TRANSFER_MEM_SET_MASK 0x40
|
||||
#define DMA_CH_0_COMIT_TRANSFER_MOD_TENSOR_SHIFT 15
|
||||
#define DMA_CH_0_COMIT_TRANSFER_MOD_TENSOR_MASK 0x8000
|
||||
#define DMA_CH_0_COMIT_TRANSFER_CTL_SHIFT 16
|
||||
#define DMA_CH_0_COMIT_TRANSFER_CTL_MASK 0xFFFF0000
|
||||
|
||||
/* DMA_CH_0_STS0 */
|
||||
#define DMA_CH_0_STS0_DMA_BUSY_SHIFT 0
|
||||
#define DMA_CH_0_STS0_DMA_BUSY_MASK 0x1
|
||||
#define DMA_CH_0_STS0_RD_STS_CTX_FULL_SHIFT 1
|
||||
#define DMA_CH_0_STS0_RD_STS_CTX_FULL_MASK 0x2
|
||||
#define DMA_CH_0_STS0_WR_STS_CTX_FULL_SHIFT 2
|
||||
#define DMA_CH_0_STS0_WR_STS_CTX_FULL_MASK 0x4
|
||||
|
||||
/* DMA_CH_0_STS1 */
|
||||
#define DMA_CH_0_STS1_RD_STS_CTX_CNT_SHIFT 0
|
||||
#define DMA_CH_0_STS1_RD_STS_CTX_CNT_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_STS2 */
|
||||
#define DMA_CH_0_STS2_WR_STS_CTX_CNT_SHIFT 0
|
||||
#define DMA_CH_0_STS2_WR_STS_CTX_CNT_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_STS3 */
|
||||
#define DMA_CH_0_STS3_RD_STS_TRN_CNT_SHIFT 0
|
||||
#define DMA_CH_0_STS3_RD_STS_TRN_CNT_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_STS4 */
|
||||
#define DMA_CH_0_STS4_WR_STS_TRN_CNT_SHIFT 0
|
||||
#define DMA_CH_0_STS4_WR_STS_TRN_CNT_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_SRC_ADDR_LO_STS */
|
||||
#define DMA_CH_0_SRC_ADDR_LO_STS_VAL_SHIFT 0
|
||||
#define DMA_CH_0_SRC_ADDR_LO_STS_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_SRC_ADDR_HI_STS */
|
||||
#define DMA_CH_0_SRC_ADDR_HI_STS_VAL_SHIFT 0
|
||||
#define DMA_CH_0_SRC_ADDR_HI_STS_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_SRC_TSIZE_STS */
|
||||
#define DMA_CH_0_SRC_TSIZE_STS_VAL_SHIFT 0
|
||||
#define DMA_CH_0_SRC_TSIZE_STS_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_DST_ADDR_LO_STS */
|
||||
#define DMA_CH_0_DST_ADDR_LO_STS_VAL_SHIFT 0
|
||||
#define DMA_CH_0_DST_ADDR_LO_STS_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_DST_ADDR_HI_STS */
|
||||
#define DMA_CH_0_DST_ADDR_HI_STS_VAL_SHIFT 0
|
||||
#define DMA_CH_0_DST_ADDR_HI_STS_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_DST_TSIZE_STS */
|
||||
#define DMA_CH_0_DST_TSIZE_STS_VAL_SHIFT 0
|
||||
#define DMA_CH_0_DST_TSIZE_STS_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_RD_RATE_LIM_EN */
|
||||
#define DMA_CH_0_RD_RATE_LIM_EN_VAL_SHIFT 0
|
||||
#define DMA_CH_0_RD_RATE_LIM_EN_VAL_MASK 0x1
|
||||
|
||||
/* DMA_CH_0_RD_RATE_LIM_RST_TOKEN */
|
||||
#define DMA_CH_0_RD_RATE_LIM_RST_TOKEN_VAL_SHIFT 0
|
||||
#define DMA_CH_0_RD_RATE_LIM_RST_TOKEN_VAL_MASK 0xFFFF
|
||||
|
||||
/* DMA_CH_0_RD_RATE_LIM_SAT */
|
||||
#define DMA_CH_0_RD_RATE_LIM_SAT_VAL_SHIFT 0
|
||||
#define DMA_CH_0_RD_RATE_LIM_SAT_VAL_MASK 0xFFFF
|
||||
|
||||
/* DMA_CH_0_RD_RATE_LIM_TOUT */
|
||||
#define DMA_CH_0_RD_RATE_LIM_TOUT_VAL_SHIFT 0
|
||||
#define DMA_CH_0_RD_RATE_LIM_TOUT_VAL_MASK 0x7FFFFFFF
|
||||
|
||||
/* DMA_CH_0_WR_RATE_LIM_EN */
|
||||
#define DMA_CH_0_WR_RATE_LIM_EN_VAL_SHIFT 0
|
||||
#define DMA_CH_0_WR_RATE_LIM_EN_VAL_MASK 0x1
|
||||
|
||||
/* DMA_CH_0_WR_RATE_LIM_RST_TOKEN */
|
||||
#define DMA_CH_0_WR_RATE_LIM_RST_TOKEN_VAL_SHIFT 0
|
||||
#define DMA_CH_0_WR_RATE_LIM_RST_TOKEN_VAL_MASK 0xFFFF
|
||||
|
||||
/* DMA_CH_0_WR_RATE_LIM_SAT */
|
||||
#define DMA_CH_0_WR_RATE_LIM_SAT_VAL_SHIFT 0
|
||||
#define DMA_CH_0_WR_RATE_LIM_SAT_VAL_MASK 0xFFFF
|
||||
|
||||
/* DMA_CH_0_WR_RATE_LIM_TOUT */
|
||||
#define DMA_CH_0_WR_RATE_LIM_TOUT_VAL_SHIFT 0
|
||||
#define DMA_CH_0_WR_RATE_LIM_TOUT_VAL_MASK 0x7FFFFFFF
|
||||
|
||||
/* DMA_CH_0_CFG2 */
|
||||
#define DMA_CH_0_CFG2_FORCE_WORD_SHIFT 0
|
||||
#define DMA_CH_0_CFG2_FORCE_WORD_MASK 0x1
|
||||
|
||||
/* DMA_CH_0_TDMA_CTL */
|
||||
#define DMA_CH_0_TDMA_CTL_DTYPE_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_CTL_DTYPE_MASK 0x7
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_BASE_ADDR_LO */
|
||||
#define DMA_CH_0_TDMA_SRC_BASE_ADDR_LO_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_BASE_ADDR_LO_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_BASE_ADDR_HI */
|
||||
#define DMA_CH_0_TDMA_SRC_BASE_ADDR_HI_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_BASE_ADDR_HI_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_ROI_BASE_0 */
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_BASE_0_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_BASE_0_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_ROI_SIZE_0 */
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_SIZE_0_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_SIZE_0_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0 */
|
||||
#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_START_OFFSET_0 */
|
||||
#define DMA_CH_0_TDMA_SRC_START_OFFSET_0_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_START_OFFSET_0_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_STRIDE_0 */
|
||||
#define DMA_CH_0_TDMA_SRC_STRIDE_0_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_STRIDE_0_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_ROI_BASE_1 */
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_BASE_1_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_BASE_1_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_ROI_SIZE_1 */
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_SIZE_1_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_SIZE_1_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1 */
|
||||
#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_START_OFFSET_1 */
|
||||
#define DMA_CH_0_TDMA_SRC_START_OFFSET_1_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_START_OFFSET_1_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_STRIDE_1 */
|
||||
#define DMA_CH_0_TDMA_SRC_STRIDE_1_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_STRIDE_1_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_ROI_BASE_2 */
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_BASE_2_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_BASE_2_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_ROI_SIZE_2 */
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_SIZE_2_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_SIZE_2_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2 */
|
||||
#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_START_OFFSET_2 */
|
||||
#define DMA_CH_0_TDMA_SRC_START_OFFSET_2_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_START_OFFSET_2_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_STRIDE_2 */
|
||||
#define DMA_CH_0_TDMA_SRC_STRIDE_2_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_STRIDE_2_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_ROI_BASE_3 */
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_BASE_3_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_BASE_3_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_ROI_SIZE_3 */
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_SIZE_3_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_SIZE_3_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3 */
|
||||
#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_START_OFFSET_3 */
|
||||
#define DMA_CH_0_TDMA_SRC_START_OFFSET_3_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_START_OFFSET_3_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_STRIDE_3 */
|
||||
#define DMA_CH_0_TDMA_SRC_STRIDE_3_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_STRIDE_3_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_ROI_BASE_4 */
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_BASE_4_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_BASE_4_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_ROI_SIZE_4 */
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_SIZE_4_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_ROI_SIZE_4_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4 */
|
||||
#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_START_OFFSET_4 */
|
||||
#define DMA_CH_0_TDMA_SRC_START_OFFSET_4_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_START_OFFSET_4_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_SRC_STRIDE_4 */
|
||||
#define DMA_CH_0_TDMA_SRC_STRIDE_4_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_SRC_STRIDE_4_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_BASE_ADDR_LO */
|
||||
#define DMA_CH_0_TDMA_DST_BASE_ADDR_LO_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_BASE_ADDR_LO_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_BASE_ADDR_HI */
|
||||
#define DMA_CH_0_TDMA_DST_BASE_ADDR_HI_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_BASE_ADDR_HI_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_ROI_BASE_0 */
|
||||
#define DMA_CH_0_TDMA_DST_ROI_BASE_0_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_ROI_BASE_0_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_ROI_SIZE_0 */
|
||||
#define DMA_CH_0_TDMA_DST_ROI_SIZE_0_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_ROI_SIZE_0_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0 */
|
||||
#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_START_OFFSET_0 */
|
||||
#define DMA_CH_0_TDMA_DST_START_OFFSET_0_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_START_OFFSET_0_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_STRIDE_0 */
|
||||
#define DMA_CH_0_TDMA_DST_STRIDE_0_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_STRIDE_0_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_ROI_BASE_1 */
|
||||
#define DMA_CH_0_TDMA_DST_ROI_BASE_1_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_ROI_BASE_1_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_ROI_SIZE_1 */
|
||||
#define DMA_CH_0_TDMA_DST_ROI_SIZE_1_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_ROI_SIZE_1_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1 */
|
||||
#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_START_OFFSET_1 */
|
||||
#define DMA_CH_0_TDMA_DST_START_OFFSET_1_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_START_OFFSET_1_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_STRIDE_1 */
|
||||
#define DMA_CH_0_TDMA_DST_STRIDE_1_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_STRIDE_1_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_ROI_BASE_2 */
|
||||
#define DMA_CH_0_TDMA_DST_ROI_BASE_2_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_ROI_BASE_2_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_ROI_SIZE_2 */
|
||||
#define DMA_CH_0_TDMA_DST_ROI_SIZE_2_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_ROI_SIZE_2_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2 */
|
||||
#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_START_OFFSET_2 */
|
||||
#define DMA_CH_0_TDMA_DST_START_OFFSET_2_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_START_OFFSET_2_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_STRIDE_2 */
|
||||
#define DMA_CH_0_TDMA_DST_STRIDE_2_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_STRIDE_2_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_ROI_BASE_3 */
|
||||
#define DMA_CH_0_TDMA_DST_ROI_BASE_3_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_ROI_BASE_3_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_ROI_SIZE_3 */
|
||||
#define DMA_CH_0_TDMA_DST_ROI_SIZE_3_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_ROI_SIZE_3_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3 */
|
||||
#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_START_OFFSET_3 */
|
||||
#define DMA_CH_0_TDMA_DST_START_OFFSET_3_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_START_OFFSET_3_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_STRIDE_3 */
|
||||
#define DMA_CH_0_TDMA_DST_STRIDE_3_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_STRIDE_3_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_ROI_BASE_4 */
|
||||
#define DMA_CH_0_TDMA_DST_ROI_BASE_4_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_ROI_BASE_4_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_ROI_SIZE_4 */
|
||||
#define DMA_CH_0_TDMA_DST_ROI_SIZE_4_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_ROI_SIZE_4_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4 */
|
||||
#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_START_OFFSET_4 */
|
||||
#define DMA_CH_0_TDMA_DST_START_OFFSET_4_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_START_OFFSET_4_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_TDMA_DST_STRIDE_4 */
|
||||
#define DMA_CH_0_TDMA_DST_STRIDE_4_VAL_SHIFT 0
|
||||
#define DMA_CH_0_TDMA_DST_STRIDE_4_VAL_MASK 0xFFFFFFFF
|
||||
|
||||
/* DMA_CH_0_MEM_INIT_BUSY */
|
||||
#define DMA_CH_0_MEM_INIT_BUSY_SBC_DATA_SHIFT 0
|
||||
#define DMA_CH_0_MEM_INIT_BUSY_SBC_DATA_MASK 0xFF
|
||||
#define DMA_CH_0_MEM_INIT_BUSY_SBC_MD_SHIFT 8
|
||||
#define DMA_CH_0_MEM_INIT_BUSY_SBC_MD_MASK 0x100
|
||||
|
||||
#endif /* ASIC_REG_DMA_CH_0_MASKS_H_ */
|
|
@ -88,6 +88,7 @@
|
|||
#include "psoc_global_conf_masks.h"
|
||||
#include "dma_macro_masks.h"
|
||||
#include "dma_qm_0_masks.h"
|
||||
#include "dma_ch_0_masks.h"
|
||||
#include "tpc0_qm_masks.h"
|
||||
#include "tpc0_cmdq_masks.h"
|
||||
#include "mme_qm_masks.h"
|
||||
|
|
|
@ -1657,17 +1657,10 @@ int hl_vm_init(struct hl_device *hdev)
|
|||
struct hl_vm *vm = &hdev->vm;
|
||||
int rc;
|
||||
|
||||
rc = hl_mmu_init(hdev);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to init MMU\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
|
||||
if (!vm->dram_pg_pool) {
|
||||
dev_err(hdev->dev, "Failed to create dram page pool\n");
|
||||
rc = -ENOMEM;
|
||||
goto pool_create_err;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
kref_init(&vm->dram_pg_pool_refcount);
|
||||
|
@ -1693,8 +1686,6 @@ int hl_vm_init(struct hl_device *hdev)
|
|||
|
||||
pool_add_err:
|
||||
gen_pool_destroy(vm->dram_pg_pool);
|
||||
pool_create_err:
|
||||
hl_mmu_fini(hdev);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -1724,7 +1715,5 @@ void hl_vm_fini(struct hl_device *hdev)
|
|||
dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
|
||||
__func__);
|
||||
|
||||
hl_mmu_fini(hdev);
|
||||
|
||||
vm->init_done = false;
|
||||
}
|
||||
|
|
|
@ -241,8 +241,9 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
|
|||
hop2_pte_addr, hop3_pte_addr, pte_val;
|
||||
int rc, i, j, hop3_allocated = 0;
|
||||
|
||||
if (!hdev->dram_supports_virtual_memory ||
|
||||
!hdev->dram_default_page_mapping)
|
||||
if ((!hdev->dram_supports_virtual_memory) ||
|
||||
(!hdev->dram_default_page_mapping) ||
|
||||
(ctx->asid == HL_KERNEL_ASID_ID))
|
||||
return 0;
|
||||
|
||||
num_of_hop3 = prop->dram_size_for_default_page_mapping;
|
||||
|
@ -340,8 +341,9 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
|
|||
hop2_pte_addr, hop3_pte_addr;
|
||||
int i, j;
|
||||
|
||||
if (!hdev->dram_supports_virtual_memory ||
|
||||
!hdev->dram_default_page_mapping)
|
||||
if ((!hdev->dram_supports_virtual_memory) ||
|
||||
(!hdev->dram_default_page_mapping) ||
|
||||
(ctx->asid == HL_KERNEL_ASID_ID))
|
||||
return;
|
||||
|
||||
num_of_hop3 = prop->dram_size_for_default_page_mapping;
|
||||
|
@ -385,12 +387,8 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
|
|||
* @hdev: habanalabs device structure.
|
||||
*
|
||||
* This function does the following:
|
||||
* - Allocate max_asid zeroed hop0 pgts so no mapping is available.
|
||||
* - Enable MMU in H/W.
|
||||
* - Invalidate the MMU cache.
|
||||
* - Create a pool of pages for pgt_infos.
|
||||
*
|
||||
* This function depends on DMA QMAN to be working!
|
||||
* - Create a shadow table for pgt
|
||||
*
|
||||
* Return: 0 for success, non-zero for failure.
|
||||
*/
|
||||
|
@ -915,6 +913,10 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
|
|||
return -EFAULT;
|
||||
}
|
||||
|
||||
WARN_ONCE((phys_addr & (real_page_size - 1)),
|
||||
"Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
|
||||
phys_addr, real_page_size);
|
||||
|
||||
npages = page_size / real_page_size;
|
||||
real_virt_addr = virt_addr;
|
||||
real_phys_addr = phys_addr;
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
#include <linux/pci.h>
|
||||
|
||||
#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
|
||||
|
||||
/**
|
||||
* hl_pci_bars_map() - Map PCI BARs.
|
||||
* @hdev: Pointer to hl_device structure.
|
||||
|
@ -88,8 +90,14 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
|
|||
{
|
||||
struct pci_dev *pdev = hdev->pdev;
|
||||
ktime_t timeout;
|
||||
u64 msec;
|
||||
u32 val;
|
||||
|
||||
if (hdev->pldm)
|
||||
msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC;
|
||||
else
|
||||
msec = HL_PCI_ELBI_TIMEOUT_MSEC;
|
||||
|
||||
/* Clear previous status */
|
||||
pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
|
||||
|
||||
|
@ -98,7 +106,7 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
|
|||
pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
|
||||
PCI_CONFIG_ELBI_CTRL_WRITE);
|
||||
|
||||
timeout = ktime_add_ms(ktime_get(), 10);
|
||||
timeout = ktime_add_ms(ktime_get(), msec);
|
||||
for (;;) {
|
||||
pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
|
||||
if (val & PCI_CONFIG_ELBI_STS_MASK)
|
||||
|
|
|
@ -328,10 +328,6 @@ static ssize_t pci_addr_show(struct device *dev, struct device_attribute *attr,
|
|||
{
|
||||
struct hl_device *hdev = dev_get_drvdata(dev);
|
||||
|
||||
/* Use dummy, fixed address for simulator */
|
||||
if (!hdev->pdev)
|
||||
return sprintf(buf, "0000:%02d:00.0\n", hdev->id);
|
||||
|
||||
return sprintf(buf, "%04x:%02x:%02x.%x\n",
|
||||
pci_domain_nr(hdev->pdev->bus),
|
||||
hdev->pdev->bus->number,
|
||||
|
|
|
@ -45,6 +45,30 @@ enum goya_queue_id {
|
|||
GOYA_QUEUE_ID_SIZE
|
||||
};
|
||||
|
||||
/*
|
||||
* Engine Numbering
|
||||
*
|
||||
* Used in the "busy_engines_mask" field in `struct hl_info_hw_idle'
|
||||
*/
|
||||
|
||||
enum goya_engine_id {
|
||||
GOYA_ENGINE_ID_DMA_0 = 0,
|
||||
GOYA_ENGINE_ID_DMA_1,
|
||||
GOYA_ENGINE_ID_DMA_2,
|
||||
GOYA_ENGINE_ID_DMA_3,
|
||||
GOYA_ENGINE_ID_DMA_4,
|
||||
GOYA_ENGINE_ID_MME_0,
|
||||
GOYA_ENGINE_ID_TPC_0,
|
||||
GOYA_ENGINE_ID_TPC_1,
|
||||
GOYA_ENGINE_ID_TPC_2,
|
||||
GOYA_ENGINE_ID_TPC_3,
|
||||
GOYA_ENGINE_ID_TPC_4,
|
||||
GOYA_ENGINE_ID_TPC_5,
|
||||
GOYA_ENGINE_ID_TPC_6,
|
||||
GOYA_ENGINE_ID_TPC_7,
|
||||
GOYA_ENGINE_ID_SIZE
|
||||
};
|
||||
|
||||
enum hl_device_status {
|
||||
HL_DEVICE_STATUS_OPERATIONAL,
|
||||
HL_DEVICE_STATUS_IN_RESET,
|
||||
|
@ -86,7 +110,11 @@ struct hl_info_dram_usage {
|
|||
|
||||
struct hl_info_hw_idle {
|
||||
__u32 is_idle;
|
||||
__u32 pad;
|
||||
/*
|
||||
* Bitmask of busy engines.
|
||||
* Bits definition is according to `enum <chip>_enging_id'.
|
||||
*/
|
||||
__u32 busy_engines_mask;
|
||||
};
|
||||
|
||||
struct hl_info_device_status {
|
||||
|
|
Loading…
Reference in New Issue