drm/amdgpu: add and implement the GPU reset status query
Signed-off-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
This commit is contained in:
parent
1f8d962513
commit
d94aed5a6c
|
@ -1040,7 +1040,7 @@ struct amdgpu_vm_manager {
|
||||||
|
|
||||||
struct amdgpu_ctx_state {
|
struct amdgpu_ctx_state {
|
||||||
uint64_t flags;
|
uint64_t flags;
|
||||||
uint64_t hangs;
|
uint32_t hangs;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_ctx {
|
struct amdgpu_ctx {
|
||||||
|
@ -1049,6 +1049,7 @@ struct amdgpu_ctx {
|
||||||
struct amdgpu_fpriv *fpriv;
|
struct amdgpu_fpriv *fpriv;
|
||||||
struct amdgpu_ctx_state state;
|
struct amdgpu_ctx_state state;
|
||||||
uint32_t id;
|
uint32_t id;
|
||||||
|
unsigned reset_counter;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_ctx_mgr {
|
struct amdgpu_ctx_mgr {
|
||||||
|
@ -1897,8 +1898,6 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev,struct amdgpu_fpriv *fpriv,
|
||||||
uint32_t *id,uint32_t flags);
|
uint32_t *id,uint32_t flags);
|
||||||
int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
|
int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
|
||||||
uint32_t id);
|
uint32_t id);
|
||||||
int amdgpu_ctx_query(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
|
|
||||||
uint32_t id,struct amdgpu_ctx_state *state);
|
|
||||||
|
|
||||||
void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv);
|
void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv);
|
||||||
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
|
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
|
||||||
|
@ -2006,6 +2005,7 @@ struct amdgpu_device {
|
||||||
atomic64_t vram_vis_usage;
|
atomic64_t vram_vis_usage;
|
||||||
atomic64_t gtt_usage;
|
atomic64_t gtt_usage;
|
||||||
atomic64_t num_bytes_moved;
|
atomic64_t num_bytes_moved;
|
||||||
|
atomic_t gpu_reset_counter;
|
||||||
|
|
||||||
/* display */
|
/* display */
|
||||||
struct amdgpu_mode_info mode_info;
|
struct amdgpu_mode_info mode_info;
|
||||||
|
|
|
@ -81,21 +81,36 @@ int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int amdgpu_ctx_query(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t id, struct amdgpu_ctx_state *state)
|
static int amdgpu_ctx_query(struct amdgpu_device *adev,
|
||||||
|
struct amdgpu_fpriv *fpriv, uint32_t id,
|
||||||
|
union drm_amdgpu_ctx_out *out)
|
||||||
{
|
{
|
||||||
struct amdgpu_ctx *ctx;
|
struct amdgpu_ctx *ctx;
|
||||||
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
|
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
|
||||||
|
unsigned reset_counter;
|
||||||
|
|
||||||
mutex_lock(&mgr->lock);
|
mutex_lock(&mgr->lock);
|
||||||
ctx = idr_find(&mgr->ctx_handles, id);
|
ctx = idr_find(&mgr->ctx_handles, id);
|
||||||
if (ctx) {
|
if (!ctx) {
|
||||||
/* state should alter with CS activity */
|
|
||||||
*state = ctx->state;
|
|
||||||
mutex_unlock(&mgr->lock);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
mutex_unlock(&mgr->lock);
|
mutex_unlock(&mgr->lock);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: these two are always zero */
|
||||||
|
out->state.flags = ctx->state.flags;
|
||||||
|
out->state.hangs = ctx->state.hangs;
|
||||||
|
|
||||||
|
/* determine if a GPU reset has occured since the last call */
|
||||||
|
reset_counter = atomic_read(&adev->gpu_reset_counter);
|
||||||
|
/* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
|
||||||
|
if (ctx->reset_counter == reset_counter)
|
||||||
|
out->state.reset_status = AMDGPU_CTX_NO_RESET;
|
||||||
|
else
|
||||||
|
out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
|
||||||
|
ctx->reset_counter = reset_counter;
|
||||||
|
|
||||||
|
mutex_unlock(&mgr->lock);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv)
|
void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv)
|
||||||
|
@ -120,7 +135,6 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
|
||||||
int r;
|
int r;
|
||||||
uint32_t id;
|
uint32_t id;
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
struct amdgpu_ctx_state state;
|
|
||||||
|
|
||||||
union drm_amdgpu_ctx *args = data;
|
union drm_amdgpu_ctx *args = data;
|
||||||
struct amdgpu_device *adev = dev->dev_private;
|
struct amdgpu_device *adev = dev->dev_private;
|
||||||
|
@ -139,11 +153,7 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
|
||||||
r = amdgpu_ctx_free(adev, fpriv, id);
|
r = amdgpu_ctx_free(adev, fpriv, id);
|
||||||
break;
|
break;
|
||||||
case AMDGPU_CTX_OP_QUERY_STATE:
|
case AMDGPU_CTX_OP_QUERY_STATE:
|
||||||
r = amdgpu_ctx_query(adev, fpriv, id, &state);
|
r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
|
||||||
if (r == 0) {
|
|
||||||
args->out.state.flags = state.flags;
|
|
||||||
args->out.state.hangs = state.hangs;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
|
@ -1781,6 +1781,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
|
||||||
}
|
}
|
||||||
|
|
||||||
adev->needs_reset = false;
|
adev->needs_reset = false;
|
||||||
|
atomic_inc(&adev->gpu_reset_counter);
|
||||||
|
|
||||||
/* block TTM */
|
/* block TTM */
|
||||||
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
|
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
|
||||||
|
|
|
@ -149,6 +149,12 @@ union drm_amdgpu_bo_list {
|
||||||
|
|
||||||
#define AMDGPU_CTX_OP_STATE_RUNNING 1
|
#define AMDGPU_CTX_OP_STATE_RUNNING 1
|
||||||
|
|
||||||
|
/* GPU reset status */
|
||||||
|
#define AMDGPU_CTX_NO_RESET 0
|
||||||
|
#define AMDGPU_CTX_GUILTY_RESET 1 /* this the context caused it */
|
||||||
|
#define AMDGPU_CTX_INNOCENT_RESET 2 /* some other context caused it */
|
||||||
|
#define AMDGPU_CTX_UNKNOWN_RESET 3 /* unknown cause */
|
||||||
|
|
||||||
struct drm_amdgpu_ctx_in {
|
struct drm_amdgpu_ctx_in {
|
||||||
uint32_t op;
|
uint32_t op;
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
|
@ -164,7 +170,10 @@ union drm_amdgpu_ctx_out {
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
uint64_t flags;
|
uint64_t flags;
|
||||||
uint64_t hangs;
|
/** Number of resets caused by this context so far. */
|
||||||
|
uint32_t hangs;
|
||||||
|
/** Reset status since the last call of the ioctl. */
|
||||||
|
uint32_t reset_status;
|
||||||
} state;
|
} state;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue