habanalabs: halt debug engines on user process close
This patch fix a potential bug where a user's process has closed unexpectedly without disabling the debug engines. In that case, the debug engines might continue running but because the user's MMU mappings are going away, we will get page fault errors. This behavior is also opposed to the general rule where nothing runs on the device after the user process closes. The patch stops the debug H/W engines upon process termination and thus makes sure nothing runs on the device after the process goes away. Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
a188339ca5
commit
89225ce4fc
|
@ -26,6 +26,12 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
|
|||
dma_fence_put(ctx->cs_pending[i]);
|
||||
|
||||
if (ctx->asid != HL_KERNEL_ASID_ID) {
|
||||
/*
|
||||
* The engines are stopped as there is no executing CS, but the
|
||||
* Coresight might be still working by accessing addresses
|
||||
* related to the stopped engines. Hence stop it explicitly.
|
||||
*/
|
||||
hdev->asic_funcs->halt_coresight(hdev);
|
||||
hl_vm_ctx_fini(ctx);
|
||||
hl_asid_free(hdev, ctx->asid);
|
||||
}
|
||||
|
|
|
@ -4819,7 +4819,8 @@ static const struct hl_asic_funcs goya_funcs = {
|
|||
.set_dram_bar_base = goya_set_ddr_bar_base,
|
||||
.init_iatu = goya_init_iatu,
|
||||
.rreg = hl_rreg,
|
||||
.wreg = hl_wreg
|
||||
.wreg = hl_wreg,
|
||||
.halt_coresight = goya_halt_coresight
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -202,6 +202,7 @@ void goya_add_device_attr(struct hl_device *hdev,
|
|||
struct attribute_group *dev_attr_grp);
|
||||
int goya_armcp_info_get(struct hl_device *hdev);
|
||||
int goya_debug_coresight(struct hl_device *hdev, void *data);
|
||||
void goya_halt_coresight(struct hl_device *hdev);
|
||||
|
||||
void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
|
||||
int goya_mmu_clear_pgt_range(struct hl_device *hdev);
|
||||
|
|
|
@ -626,3 +626,20 @@ int goya_debug_coresight(struct hl_device *hdev, void *data)
|
|||
|
||||
return rc;
|
||||
}
|
||||
|
||||
void goya_halt_coresight(struct hl_device *hdev)
|
||||
{
|
||||
struct hl_debug_params params = {};
|
||||
int i, rc;
|
||||
|
||||
for (i = GOYA_ETF_FIRST ; i <= GOYA_ETF_LAST ; i++) {
|
||||
params.reg_idx = i;
|
||||
rc = goya_config_etf(hdev, ¶ms);
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "halt ETF failed, %d/%d\n", rc, i);
|
||||
}
|
||||
|
||||
rc = goya_config_etr(hdev, ¶ms);
|
||||
if (rc)
|
||||
dev_err(hdev->dev, "halt ETR failed, %d\n", rc);
|
||||
}
|
||||
|
|
|
@ -501,6 +501,7 @@ enum hl_pll_frequency {
|
|||
* @init_iatu: Initialize the iATU unit inside the PCI controller.
|
||||
* @rreg: Read a register. Needed for simulator support.
|
||||
* @wreg: Write a register. Needed for simulator support.
|
||||
* @halt_coresight: stop the ETF and ETR traces.
|
||||
*/
|
||||
struct hl_asic_funcs {
|
||||
int (*early_init)(struct hl_device *hdev);
|
||||
|
@ -578,6 +579,7 @@ struct hl_asic_funcs {
|
|||
int (*init_iatu)(struct hl_device *hdev);
|
||||
u32 (*rreg)(struct hl_device *hdev, u32 reg);
|
||||
void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
|
||||
void (*halt_coresight)(struct hl_device *hdev);
|
||||
};
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue