This tag contains the following fixes for 5.15-rc3:
- Fix potential race when user waiting for interrupt ioctl - Prevent possible kernel oops in staged CS ioctl - Use direct MSI mechanism in Gaudi as a WA for a H/W issue regarding FLR - Don't support collective wait ioctl operation when it is not supported. e.g. when the NIC ports are disabled - Fix configuration of one of the security mechanism. - Change error print to be rate-limited as it can be initiated by the user and spam the kernel log - Fix return value of CS ioctl when doing staged CS - Fix CS ioctl code when user doesn't supply an offset for the memory area that we use as fence. - Spelling mistake fix -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEE7TEboABC71LctBLFZR1NuKta54AFAmFG7bAACgkQZR1NuKta 54D4vQf/T477hf2MKKIHdEyCUs601smNqK9VcEm5SNRUiqRo3cEyjMPY/OUqINga 6jcvWGsnxv1COL0EF3TxHfTN12Lzdli2emCk7h2ErADVB5UqZmS7uYEh/20M67nX KPDa5fm9Ah0SjKmocq114xqjJvUkOryB8+x2T8BCQXmuS5jBGoe0vuLKtZKcPtCp y+ErR6X8srBv+U1Lb2b1u3JAAsgOg2L7BixaBPnWp/hjqNe+RW+WAEqfWUj2AUfl yZNKuI9DRcKOjqoSqwCG8CxpwsZthiYs3a1wYTE8GpNXGeXxDnS02sbvPcudEQAc seOkIAbsBNgNSYc110BQSVzZErcdtw== =Rzaf -----END PGP SIGNATURE----- Merge tag 'misc-habanalabs-fixes-2021-09-19' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-linus Oded writes: This tag contains the following fixes for 5.15-rc3: - Fix potential race when user waiting for interrupt ioctl - Prevent possible kernel oops in staged CS ioctl - Use direct MSI mechanism in Gaudi as a WA for a H/W issue regarding FLR - Don't support collective wait ioctl operation when it is not supported. e.g. when the NIC ports are disabled - Fix configuration of one of the security mechanism. - Change error print to be rate-limited as it can be initiated by the user and spam the kernel log - Fix return value of CS ioctl when doing staged CS - Fix CS ioctl code when user doesn't supply an offset for the memory area that we use as fence. - Spelling mistake fix * tag 'misc-habanalabs-fixes-2021-09-19' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: habanalabs: expose a single cs seq in staged submissions habanalabs: fix wait offset handling habanalabs: rate limit multi CS completion errors habanalabs/gaudi: fix LBW RR configuration habanalabs: Fix spelling mistake "FEADBACK" -> "FEEDBACK" habanalabs: fail collective wait when not supported habanalabs/gaudi: use direct MSI in single mode habanalabs: fix kernel OOPs related to staged cs habanalabs: fix potential race in interrupt wait ioctl
This commit is contained in:
commit
3e1d5b0f58
|
@ -405,7 +405,7 @@ static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
|
|||
static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
|
||||
{
|
||||
bool next_entry_found = false;
|
||||
struct hl_cs *next;
|
||||
struct hl_cs *next, *first_cs;
|
||||
|
||||
if (!cs_needs_timeout(cs))
|
||||
return;
|
||||
|
@ -415,9 +415,16 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
|
|||
/* We need to handle tdr only once for the complete staged submission.
|
||||
* Hence, we choose the CS that reaches this function first which is
|
||||
* the CS marked as 'staged_last'.
|
||||
* In case single staged cs was submitted which has both first and last
|
||||
* indications, then "cs_find_first" below will return NULL, since we
|
||||
* removed the cs node from the list before getting here,
|
||||
* in such cases just continue with the cs to cancel it's TDR work.
|
||||
*/
|
||||
if (cs->staged_cs && cs->staged_last)
|
||||
cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
|
||||
if (cs->staged_cs && cs->staged_last) {
|
||||
first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
|
||||
if (first_cs)
|
||||
cs = first_cs;
|
||||
}
|
||||
|
||||
spin_unlock(&hdev->cs_mirror_lock);
|
||||
|
||||
|
@ -1288,6 +1295,12 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
|
|||
if (rc)
|
||||
goto free_cs_object;
|
||||
|
||||
/* If this is a staged submission we must return the staged sequence
|
||||
* rather than the internal CS sequence
|
||||
*/
|
||||
if (cs->staged_cs)
|
||||
*cs_seq = cs->staged_sequence;
|
||||
|
||||
/* Validate ALL the CS chunks before submitting the CS */
|
||||
for (i = 0 ; i < num_chunks ; i++) {
|
||||
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
|
||||
|
@ -1988,6 +2001,15 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
|||
goto free_cs_chunk_array;
|
||||
}
|
||||
|
||||
if (!hdev->nic_ports_mask) {
|
||||
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
|
||||
atomic64_inc(&cntr->validation_drop_cnt);
|
||||
dev_err(hdev->dev,
|
||||
"Collective operations not supported when NIC ports are disabled");
|
||||
rc = -EINVAL;
|
||||
goto free_cs_chunk_array;
|
||||
}
|
||||
|
||||
collective_engine_id = chunk->collective_engine_id;
|
||||
}
|
||||
|
||||
|
@ -2026,9 +2048,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
|
|||
spin_unlock(&ctx->sig_mgr.lock);
|
||||
|
||||
if (!handle_found) {
|
||||
dev_err(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
|
||||
/* treat as signal CS already finished */
|
||||
dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
|
||||
signal_seq);
|
||||
rc = -EINVAL;
|
||||
rc = 0;
|
||||
goto free_cs_chunk_array;
|
||||
}
|
||||
|
||||
|
@ -2613,7 +2636,8 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
|
|||
* completed after the poll function.
|
||||
*/
|
||||
if (!mcs_data.completion_bitmap) {
|
||||
dev_err(hdev->dev, "Multi-CS got completion on wait but no CS completed\n");
|
||||
dev_warn_ratelimited(hdev->dev,
|
||||
"Multi-CS got completion on wait but no CS completed\n");
|
||||
rc = -EFAULT;
|
||||
}
|
||||
}
|
||||
|
@ -2740,10 +2764,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
|||
else
|
||||
interrupt = &hdev->user_interrupt[interrupt_offset];
|
||||
|
||||
/* Add pending user interrupt to relevant list for the interrupt
|
||||
* handler to monitor
|
||||
*/
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
/* We check for completion value as interrupt could have been received
|
||||
* before we added the node to the wait list
|
||||
*/
|
||||
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
|
||||
dev_err(hdev->dev, "Failed to copy completion value from user\n");
|
||||
rc = -EFAULT;
|
||||
goto free_fence;
|
||||
goto remove_pending_user_interrupt;
|
||||
}
|
||||
|
||||
if (completion_value >= target_value)
|
||||
|
@ -2752,14 +2786,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
|
|||
*status = CS_WAIT_STATUS_BUSY;
|
||||
|
||||
if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
|
||||
goto free_fence;
|
||||
|
||||
/* Add pending user interrupt to relevant list for the interrupt
|
||||
* handler to monitor
|
||||
*/
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
goto remove_pending_user_interrupt;
|
||||
|
||||
wait_again:
|
||||
/* Wait for interrupt handler to signal completion */
|
||||
|
@ -2770,6 +2797,15 @@ wait_again:
|
|||
* If comparison fails, keep waiting until timeout expires
|
||||
*/
|
||||
if (completion_rc > 0) {
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
/* reinit_completion must be called before we check for user
|
||||
* completion value, otherwise, if interrupt is received after
|
||||
* the comparison and before the next wait_for_completion,
|
||||
* we will reach timeout and fail
|
||||
*/
|
||||
reinit_completion(&pend->fence.completion);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
|
||||
dev_err(hdev->dev, "Failed to copy completion value from user\n");
|
||||
rc = -EFAULT;
|
||||
|
@ -2780,11 +2816,7 @@ wait_again:
|
|||
if (completion_value >= target_value) {
|
||||
*status = CS_WAIT_STATUS_COMPLETED;
|
||||
} else {
|
||||
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
|
||||
reinit_completion(&pend->fence.completion);
|
||||
timeout = completion_rc;
|
||||
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
goto wait_again;
|
||||
}
|
||||
} else if (completion_rc == -ERESTARTSYS) {
|
||||
|
@ -2802,7 +2834,6 @@ remove_pending_user_interrupt:
|
|||
list_del(&pend->wait_list_node);
|
||||
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
|
||||
|
||||
free_fence:
|
||||
kfree(pend);
|
||||
hl_ctx_put(ctx);
|
||||
|
||||
|
|
|
@ -437,6 +437,7 @@ void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
|
|||
struct hl_cs_compl *cs_cmpl)
|
||||
{
|
||||
struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
|
||||
u32 offset = 0;
|
||||
|
||||
cs_cmpl->hw_sob = handle->hw_sob;
|
||||
|
||||
|
@ -446,9 +447,13 @@ void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
|
|||
* set offset 1 for example he mean to wait only for the first
|
||||
* signal only, which will be pre_sob_val, and if he set offset 2
|
||||
* then the value required is (pre_sob_val + 1) and so on...
|
||||
* if user set wait offset to 0, then treat it as legacy wait cs,
|
||||
* wait for the next signal.
|
||||
*/
|
||||
cs_cmpl->sob_val = handle->pre_sob_val +
|
||||
(job->encaps_sig_wait_offset - 1);
|
||||
if (job->encaps_sig_wait_offset)
|
||||
offset = job->encaps_sig_wait_offset - 1;
|
||||
|
||||
cs_cmpl->sob_val = handle->pre_sob_val + offset;
|
||||
}
|
||||
|
||||
static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
|
||||
|
|
|
@ -395,7 +395,7 @@ static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
|
|||
|
||||
static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
|
||||
{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
|
||||
{ .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
|
||||
{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
|
||||
{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
|
||||
{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
|
||||
{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
|
||||
|
@ -5802,6 +5802,7 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
|
|||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
struct packet_msg_prot *cq_pkt;
|
||||
u64 msi_addr;
|
||||
u32 tmp;
|
||||
|
||||
cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
|
||||
|
@ -5823,10 +5824,12 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
|
|||
cq_pkt->ctl = cpu_to_le32(tmp);
|
||||
cq_pkt->value = cpu_to_le32(1);
|
||||
|
||||
if (!gaudi->multi_msi_mode)
|
||||
msi_vec = 0;
|
||||
if (gaudi->multi_msi_mode)
|
||||
msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
|
||||
else
|
||||
msi_addr = mmPCIE_CORE_MSI_REQ;
|
||||
|
||||
cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
|
||||
cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
|
||||
}
|
||||
|
||||
static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
|
||||
|
|
|
@ -8,16 +8,21 @@
|
|||
#include "gaudiP.h"
|
||||
#include "../include/gaudi/asic_reg/gaudi_regs.h"
|
||||
|
||||
#define GAUDI_NUMBER_OF_RR_REGS 24
|
||||
#define GAUDI_NUMBER_OF_LBW_RANGES 12
|
||||
#define GAUDI_NUMBER_OF_LBW_RR_REGS 28
|
||||
#define GAUDI_NUMBER_OF_HBW_RR_REGS 24
|
||||
#define GAUDI_NUMBER_OF_LBW_RANGES 10
|
||||
|
||||
static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_SOB_HIT_WPROT,
|
||||
mmDMA_IF_W_S_DMA0_HIT_WPROT,
|
||||
mmDMA_IF_W_S_DMA1_HIT_WPROT,
|
||||
mmDMA_IF_E_S_SOB_HIT_WPROT,
|
||||
mmDMA_IF_E_S_DMA0_HIT_WPROT,
|
||||
mmDMA_IF_E_S_DMA1_HIT_WPROT,
|
||||
mmDMA_IF_W_N_SOB_HIT_WPROT,
|
||||
mmDMA_IF_W_N_DMA0_HIT_WPROT,
|
||||
mmDMA_IF_W_N_DMA1_HIT_WPROT,
|
||||
mmDMA_IF_E_N_SOB_HIT_WPROT,
|
||||
mmDMA_IF_E_N_DMA0_HIT_WPROT,
|
||||
mmDMA_IF_E_N_DMA1_HIT_WPROT,
|
||||
mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AW,
|
||||
|
@ -38,13 +43,17 @@ static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AW,
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_SOB_HIT_RPROT,
|
||||
mmDMA_IF_W_S_DMA0_HIT_RPROT,
|
||||
mmDMA_IF_W_S_DMA1_HIT_RPROT,
|
||||
mmDMA_IF_E_S_SOB_HIT_RPROT,
|
||||
mmDMA_IF_E_S_DMA0_HIT_RPROT,
|
||||
mmDMA_IF_E_S_DMA1_HIT_RPROT,
|
||||
mmDMA_IF_W_N_SOB_HIT_RPROT,
|
||||
mmDMA_IF_W_N_DMA0_HIT_RPROT,
|
||||
mmDMA_IF_W_N_DMA1_HIT_RPROT,
|
||||
mmDMA_IF_E_N_SOB_HIT_RPROT,
|
||||
mmDMA_IF_E_N_DMA0_HIT_RPROT,
|
||||
mmDMA_IF_E_N_DMA1_HIT_RPROT,
|
||||
mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AR,
|
||||
|
@ -65,13 +74,17 @@ static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AR,
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_SOB_MIN_WPROT_0,
|
||||
mmDMA_IF_W_S_DMA0_MIN_WPROT_0,
|
||||
mmDMA_IF_W_S_DMA1_MIN_WPROT_0,
|
||||
mmDMA_IF_E_S_SOB_MIN_WPROT_0,
|
||||
mmDMA_IF_E_S_DMA0_MIN_WPROT_0,
|
||||
mmDMA_IF_E_S_DMA1_MIN_WPROT_0,
|
||||
mmDMA_IF_W_N_SOB_MIN_WPROT_0,
|
||||
mmDMA_IF_W_N_DMA0_MIN_WPROT_0,
|
||||
mmDMA_IF_W_N_DMA1_MIN_WPROT_0,
|
||||
mmDMA_IF_E_N_SOB_MIN_WPROT_0,
|
||||
mmDMA_IF_E_N_DMA0_MIN_WPROT_0,
|
||||
mmDMA_IF_E_N_DMA1_MIN_WPROT_0,
|
||||
mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AW_0,
|
||||
|
@ -92,13 +105,17 @@ static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AW_0,
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_SOB_MAX_WPROT_0,
|
||||
mmDMA_IF_W_S_DMA0_MAX_WPROT_0,
|
||||
mmDMA_IF_W_S_DMA1_MAX_WPROT_0,
|
||||
mmDMA_IF_E_S_SOB_MAX_WPROT_0,
|
||||
mmDMA_IF_E_S_DMA0_MAX_WPROT_0,
|
||||
mmDMA_IF_E_S_DMA1_MAX_WPROT_0,
|
||||
mmDMA_IF_W_N_SOB_MAX_WPROT_0,
|
||||
mmDMA_IF_W_N_DMA0_MAX_WPROT_0,
|
||||
mmDMA_IF_W_N_DMA1_MAX_WPROT_0,
|
||||
mmDMA_IF_E_N_SOB_MAX_WPROT_0,
|
||||
mmDMA_IF_E_N_DMA0_MAX_WPROT_0,
|
||||
mmDMA_IF_E_N_DMA1_MAX_WPROT_0,
|
||||
mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AW_0,
|
||||
|
@ -119,13 +136,17 @@ static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AW_0,
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_SOB_MIN_RPROT_0,
|
||||
mmDMA_IF_W_S_DMA0_MIN_RPROT_0,
|
||||
mmDMA_IF_W_S_DMA1_MIN_RPROT_0,
|
||||
mmDMA_IF_E_S_SOB_MIN_RPROT_0,
|
||||
mmDMA_IF_E_S_DMA0_MIN_RPROT_0,
|
||||
mmDMA_IF_E_S_DMA1_MIN_RPROT_0,
|
||||
mmDMA_IF_W_N_SOB_MIN_RPROT_0,
|
||||
mmDMA_IF_W_N_DMA0_MIN_RPROT_0,
|
||||
mmDMA_IF_W_N_DMA1_MIN_RPROT_0,
|
||||
mmDMA_IF_E_N_SOB_MIN_RPROT_0,
|
||||
mmDMA_IF_E_N_DMA0_MIN_RPROT_0,
|
||||
mmDMA_IF_E_N_DMA1_MIN_RPROT_0,
|
||||
mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AR_0,
|
||||
|
@ -146,13 +167,17 @@ static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AR_0,
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_SOB_MAX_RPROT_0,
|
||||
mmDMA_IF_W_S_DMA0_MAX_RPROT_0,
|
||||
mmDMA_IF_W_S_DMA1_MAX_RPROT_0,
|
||||
mmDMA_IF_E_S_SOB_MAX_RPROT_0,
|
||||
mmDMA_IF_E_S_DMA0_MAX_RPROT_0,
|
||||
mmDMA_IF_E_S_DMA1_MAX_RPROT_0,
|
||||
mmDMA_IF_W_N_SOB_MAX_RPROT_0,
|
||||
mmDMA_IF_W_N_DMA0_MAX_RPROT_0,
|
||||
mmDMA_IF_W_N_DMA1_MAX_RPROT_0,
|
||||
mmDMA_IF_E_N_SOB_MAX_RPROT_0,
|
||||
mmDMA_IF_E_N_DMA0_MAX_RPROT_0,
|
||||
mmDMA_IF_E_N_DMA1_MAX_RPROT_0,
|
||||
mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AR_0,
|
||||
|
@ -173,7 +198,7 @@ static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AR_0,
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AW,
|
||||
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AW,
|
||||
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AW,
|
||||
|
@ -200,7 +225,7 @@ static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AW
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AR,
|
||||
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AR,
|
||||
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AR,
|
||||
|
@ -227,7 +252,7 @@ static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AR
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0,
|
||||
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AW_0,
|
||||
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0,
|
||||
|
@ -254,7 +279,7 @@ static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AW_0
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0,
|
||||
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AW_0,
|
||||
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0,
|
||||
|
@ -281,7 +306,7 @@ static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AW_0
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0,
|
||||
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AW_0,
|
||||
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0,
|
||||
|
@ -308,7 +333,7 @@ static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AW_0
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0,
|
||||
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AW_0,
|
||||
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0,
|
||||
|
@ -335,7 +360,7 @@ static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_HIGH_AW_0
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0,
|
||||
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AR_0,
|
||||
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0,
|
||||
|
@ -362,7 +387,7 @@ static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AR_0
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0,
|
||||
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AR_0,
|
||||
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0,
|
||||
|
@ -389,7 +414,7 @@ static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AR_0
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0,
|
||||
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AR_0,
|
||||
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0,
|
||||
|
@ -416,7 +441,7 @@ static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
|||
mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AR_0
|
||||
};
|
||||
|
||||
static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
|
||||
static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
|
||||
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0,
|
||||
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AR_0,
|
||||
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0,
|
||||
|
@ -12849,50 +12874,44 @@ static void gaudi_init_range_registers_lbw(struct hl_device *hdev)
|
|||
u32 lbw_rng_end[GAUDI_NUMBER_OF_LBW_RANGES];
|
||||
int i, j;
|
||||
|
||||
lbw_rng_start[0] = (0xFBFE0000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[0] = (0xFBFFF000 & 0x3FFFFFF) + 1;
|
||||
lbw_rng_start[0] = (0xFC0E8000 & 0x3FFFFFF) - 1; /* 0x000E7FFF */
|
||||
lbw_rng_end[0] = (0xFC11FFFF & 0x3FFFFFF) + 1; /* 0x00120000 */
|
||||
|
||||
lbw_rng_start[1] = (0xFC0E8000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[1] = (0xFC120000 & 0x3FFFFFF) + 1;
|
||||
lbw_rng_start[1] = (0xFC1E8000 & 0x3FFFFFF) - 1; /* 0x001E7FFF */
|
||||
lbw_rng_end[1] = (0xFC48FFFF & 0x3FFFFFF) + 1; /* 0x00490000 */
|
||||
|
||||
lbw_rng_start[2] = (0xFC1E8000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[2] = (0xFC48FFFF & 0x3FFFFFF) + 1;
|
||||
lbw_rng_start[2] = (0xFC600000 & 0x3FFFFFF) - 1; /* 0x005FFFFF */
|
||||
lbw_rng_end[2] = (0xFCC48FFF & 0x3FFFFFF) + 1; /* 0x00C49000 */
|
||||
|
||||
lbw_rng_start[3] = (0xFC600000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[3] = (0xFCC48FFF & 0x3FFFFFF) + 1;
|
||||
lbw_rng_start[3] = (0xFCC4A000 & 0x3FFFFFF) - 1; /* 0x00C49FFF */
|
||||
lbw_rng_end[3] = (0xFCCDFFFF & 0x3FFFFFF) + 1; /* 0x00CE0000 */
|
||||
|
||||
lbw_rng_start[4] = (0xFCC4A000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[4] = (0xFCCDFFFF & 0x3FFFFFF) + 1;
|
||||
lbw_rng_start[4] = (0xFCCE4000 & 0x3FFFFFF) - 1; /* 0x00CE3FFF */
|
||||
lbw_rng_end[4] = (0xFCD1FFFF & 0x3FFFFFF) + 1; /* 0x00D20000 */
|
||||
|
||||
lbw_rng_start[5] = (0xFCCE4000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[5] = (0xFCD1FFFF & 0x3FFFFFF) + 1;
|
||||
lbw_rng_start[5] = (0xFCD24000 & 0x3FFFFFF) - 1; /* 0x00D23FFF */
|
||||
lbw_rng_end[5] = (0xFCD5FFFF & 0x3FFFFFF) + 1; /* 0x00D60000 */
|
||||
|
||||
lbw_rng_start[6] = (0xFCD24000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[6] = (0xFCD5FFFF & 0x3FFFFFF) + 1;
|
||||
lbw_rng_start[6] = (0xFCD64000 & 0x3FFFFFF) - 1; /* 0x00D63FFF */
|
||||
lbw_rng_end[6] = (0xFCD9FFFF & 0x3FFFFFF) + 1; /* 0x00DA0000 */
|
||||
|
||||
lbw_rng_start[7] = (0xFCD64000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[7] = (0xFCD9FFFF & 0x3FFFFFF) + 1;
|
||||
lbw_rng_start[7] = (0xFCDA4000 & 0x3FFFFFF) - 1; /* 0x00DA3FFF */
|
||||
lbw_rng_end[7] = (0xFCDDFFFF & 0x3FFFFFF) + 1; /* 0x00DE0000 */
|
||||
|
||||
lbw_rng_start[8] = (0xFCDA4000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[8] = (0xFCDDFFFF & 0x3FFFFFF) + 1;
|
||||
lbw_rng_start[8] = (0xFCDE4000 & 0x3FFFFFF) - 1; /* 0x00DE3FFF */
|
||||
lbw_rng_end[8] = (0xFCE05FFF & 0x3FFFFFF) + 1; /* 0x00E06000 */
|
||||
|
||||
lbw_rng_start[9] = (0xFCDE4000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[9] = (0xFCE05FFF & 0x3FFFFFF) + 1;
|
||||
lbw_rng_start[9] = (0xFCFC9000 & 0x3FFFFFF) - 1; /* 0x00FC8FFF */
|
||||
lbw_rng_end[9] = (0xFFFFFFFE & 0x3FFFFFF) + 1; /* 0x03FFFFFF */
|
||||
|
||||
lbw_rng_start[10] = (0xFEC43000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[10] = (0xFEC43FFF & 0x3FFFFFF) + 1;
|
||||
|
||||
lbw_rng_start[11] = (0xFE484000 & 0x3FFFFFF) - 1;
|
||||
lbw_rng_end[11] = (0xFE484FFF & 0x3FFFFFF) + 1;
|
||||
|
||||
for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
|
||||
for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++) {
|
||||
WREG32(gaudi_rr_lbw_hit_aw_regs[i],
|
||||
(1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1);
|
||||
WREG32(gaudi_rr_lbw_hit_ar_regs[i],
|
||||
(1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1);
|
||||
}
|
||||
|
||||
for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++)
|
||||
for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++)
|
||||
for (j = 0 ; j < GAUDI_NUMBER_OF_LBW_RANGES ; j++) {
|
||||
WREG32(gaudi_rr_lbw_min_aw_regs[i] + (j << 2),
|
||||
lbw_rng_start[j]);
|
||||
|
@ -12939,12 +12958,12 @@ static void gaudi_init_range_registers_hbw(struct hl_device *hdev)
|
|||
* 6th range is the host
|
||||
*/
|
||||
|
||||
for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
|
||||
for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) {
|
||||
WREG32(gaudi_rr_hbw_hit_aw_regs[i], 0x1F);
|
||||
WREG32(gaudi_rr_hbw_hit_ar_regs[i], 0x1D);
|
||||
}
|
||||
|
||||
for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
|
||||
for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) {
|
||||
WREG32(gaudi_rr_hbw_base_low_aw_regs[i], dram_addr_lo);
|
||||
WREG32(gaudi_rr_hbw_base_low_ar_regs[i], dram_addr_lo);
|
||||
|
||||
|
|
|
@ -308,6 +308,8 @@
|
|||
#define mmPCIE_AUX_FLR_CTRL 0xC07394
|
||||
#define mmPCIE_AUX_DBI 0xC07490
|
||||
|
||||
#define mmPCIE_CORE_MSI_REQ 0xC04100
|
||||
|
||||
#define mmPSOC_PCI_PLL_NR 0xC72100
|
||||
#define mmSRAM_W_PLL_NR 0x4C8100
|
||||
#define mmPSOC_HBM_PLL_NR 0xC74100
|
||||
|
|
Loading…
Reference in New Issue