diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 666fce66f9dd..b01bd7fa9c2b 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -108,7 +108,8 @@ struct msm_drm_private { struct drm_fb_helper *fbdev; - struct msm_rd_state *rd; + struct msm_rd_state *rd; /* debugfs to dump all submits */ + struct msm_rd_state *hangrd; /* debugfs to dump hanging submits */ struct msm_perf_state *perf; /* list of GEM objects: */ @@ -298,7 +299,7 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m); int msm_debugfs_late_init(struct drm_device *dev); int msm_rd_debugfs_init(struct drm_minor *minor); void msm_rd_debugfs_cleanup(struct msm_drm_private *priv); -void msm_rd_dump_submit(struct msm_gem_submit *submit); +void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit); int msm_perf_debugfs_init(struct drm_minor *minor); void msm_perf_debugfs_cleanup(struct msm_drm_private *priv); #else diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 8481014192d1..7fc6f68d6aec 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -557,7 +557,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, list_add_tail(&submit->node, &ring->submits); - msm_rd_dump_submit(submit); + msm_rd_dump_submit(priv->rd, submit); update_sw_cntrs(gpu); diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 47c0c78f2076..0fac6ea5f906 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -19,11 +19,17 @@ * * tail -f /sys/kernel/debug/dri//rd > logfile.rd * - * To log the cmdstream in a format that is understood by freedreno/cffdump + * to log the cmdstream in a format that is understood by freedreno/cffdump * utility. By comparing the last successfully completed fence #, to the * cmdstream for the next fence, you can narrow down which process and submit * caused the gpu crash/lockup. * + * Additionally: + * + * tail -f /sys/kernel/debug/dri//hangrd > logfile.rd + * + * will capture just the cmdstream from submits which triggered a GPU hang. + * * This bypasses drm_debugfs_create_files() mainly because we need to use * our own fops for a bit more control. In particular, we don't want to * do anything if userspace doesn't have the debugfs file open. @@ -212,53 +218,89 @@ static const struct file_operations rd_debugfs_fops = { .release = rd_release, }; -int msm_rd_debugfs_init(struct drm_minor *minor) + +static void rd_cleanup(struct msm_rd_state *rd) +{ + if (!rd) + return; + + mutex_destroy(&rd->read_lock); + kfree(rd); +} + +static struct msm_rd_state *rd_init(struct drm_minor *minor, const char *name) { - struct msm_drm_private *priv = minor->dev->dev_private; struct msm_rd_state *rd; struct dentry *ent; - - /* only create on first minor: */ - if (priv->rd) - return 0; + int ret = 0; rd = kzalloc(sizeof(*rd), GFP_KERNEL); if (!rd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); rd->dev = minor->dev; rd->fifo.buf = rd->buf; mutex_init(&rd->read_lock); - priv->rd = rd; init_waitqueue_head(&rd->fifo_event); - ent = debugfs_create_file("rd", S_IFREG | S_IRUGO, + ent = debugfs_create_file(name, S_IFREG | S_IRUGO, minor->debugfs_root, rd, &rd_debugfs_fops); if (!ent) { - DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/rd\n", - minor->debugfs_root); + DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/%s\n", + minor->debugfs_root, name); + ret = -ENOMEM; goto fail; } + return rd; + +fail: + rd_cleanup(rd); + return ERR_PTR(ret); +} + +int msm_rd_debugfs_init(struct drm_minor *minor) +{ + struct msm_drm_private *priv = minor->dev->dev_private; + struct msm_rd_state *rd; + int ret; + + /* only create on first minor: */ + if (priv->rd) + return 0; + + rd = rd_init(minor, "rd"); + if (IS_ERR(rd)) { + ret = PTR_ERR(rd); + goto fail; + } + + priv->rd = rd; + + rd = rd_init(minor, "hangrd"); + if (IS_ERR(rd)) { + ret = PTR_ERR(rd); + goto fail; + } + + priv->hangrd = rd; + return 0; fail: msm_rd_debugfs_cleanup(priv); - return -1; + return ret; } void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) { - struct msm_rd_state *rd = priv->rd; - - if (!rd) - return; - + rd_cleanup(priv->rd); priv->rd = NULL; - mutex_destroy(&rd->read_lock); - kfree(rd); + + rd_cleanup(priv->hangrd); + priv->hangrd = NULL; } static void snapshot_buf(struct msm_rd_state *rd, @@ -296,11 +338,10 @@ static void snapshot_buf(struct msm_rd_state *rd, } /* called under struct_mutex */ -void msm_rd_dump_submit(struct msm_gem_submit *submit) +void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit) { struct drm_device *dev = submit->dev; - struct msm_drm_private *priv = dev->dev_private; - struct msm_rd_state *rd = priv->rd; + struct task_struct *task; char msg[128]; int i, n; @@ -312,9 +353,17 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit) */ WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u", - TASK_COMM_LEN, current->comm, task_pid_nr(current), - submit->fence->seqno); + rcu_read_lock(); + task = pid_task(submit->pid, PIDTYPE_PID); + if (task) { + n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u", + TASK_COMM_LEN, task->comm, + pid_nr(submit->pid), submit->seqno); + } else { + n = snprintf(msg, sizeof(msg), "???/%d: fence=%u", + pid_nr(submit->pid), submit->seqno); + } + rcu_read_unlock(); rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));