OpenCloudOS-Kernel/drivers/gpu/drm/i915/i915_debugfs.c

5126 lines
140 KiB
C
Raw Normal View History

/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Keith Packard <keithp@keithp.com>
*
*/
#include <linux/debugfs.h>
#include <linux/sort.h>
#include <linux/sched/mm.h>
#include "intel_drv.h"
#include "intel_guc_submission.h"
static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
{
return to_i915(node->minor->dev);
}
static int i915_capabilities(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
const struct intel_device_info *info = INTEL_INFO(dev_priv);
struct drm_printer p = drm_seq_file_printer(m);
seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv));
seq_printf(m, "platform: %s\n", intel_platform_name(info->platform));
seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv));
intel_device_info_dump_flags(info, &p);
intel_device_info_dump_runtime(info, &p);
intel_driver_caps_print(&dev_priv->caps, &p);
kernel_param_lock(THIS_MODULE);
i915_params_dump(&i915_modparams, &p);
kernel_param_unlock(THIS_MODULE);
return 0;
}
static char get_active_flag(struct drm_i915_gem_object *obj)
{
return i915_gem_object_is_active(obj) ? '*' : ' ';
}
static char get_pin_flag(struct drm_i915_gem_object *obj)
{
return obj->pin_global ? 'p' : ' ';
}
static char get_tiling_flag(struct drm_i915_gem_object *obj)
{
switch (i915_gem_object_get_tiling(obj)) {
default:
case I915_TILING_NONE: return ' ';
case I915_TILING_X: return 'X';
case I915_TILING_Y: return 'Y';
}
}
static char get_global_flag(struct drm_i915_gem_object *obj)
{
return obj->userfault_count ? 'g' : ' ';
}
static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
{
return obj->mm.mapping ? 'M' : ' ';
}
static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
{
u64 size = 0;
struct i915_vma *vma;
for_each_ggtt_vma(vma, obj) {
if (drm_mm_node_allocated(&vma->node))
size += vma->node.size;
}
return size;
}
static const char *
stringify_page_sizes(unsigned int page_sizes, char *buf, size_t len)
{
size_t x = 0;
switch (page_sizes) {
case 0:
return "";
case I915_GTT_PAGE_SIZE_4K:
return "4K";
case I915_GTT_PAGE_SIZE_64K:
return "64K";
case I915_GTT_PAGE_SIZE_2M:
return "2M";
default:
if (!buf)
return "M";
if (page_sizes & I915_GTT_PAGE_SIZE_2M)
x += snprintf(buf + x, len - x, "2M, ");
if (page_sizes & I915_GTT_PAGE_SIZE_64K)
x += snprintf(buf + x, len - x, "64K, ");
if (page_sizes & I915_GTT_PAGE_SIZE_4K)
x += snprintf(buf + x, len - x, "4K, ");
buf[x-2] = '\0';
return buf;
}
}
static void
describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
{
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-27 20:41:17 +08:00
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct intel_engine_cs *engine;
struct i915_vma *vma;
unsigned int frontbuffer_bits;
int pin_count = 0;
drm/i915: Add struct_mutex locking for debugs/i915_gem_framebuffer Since describe_obj() looks at state guarded by the struct_mutex, we need to be holding it. [ 580.201054] drv_suspend: starting subtest debugfs-reader [ 580.239652] ------------[ cut here ]------------ [ 580.239696] WARNING: CPU: 0 PID: 920 at include/linux/list_check.h:25 describe_obj+0x419/0x440() [ 580.239725] CPU: 0 PID: 920 Comm: cat Not tainted 4.5.0-rc6+ #835 [ 580.239745] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 580.239767] 0000000000000000 ffff88027554fcf8 ffffffff812c1135 0000000000000000 [ 580.239815] ffffffff8193dc42 ffff88027554fd30 ffffffff8107419d ffff880071727c00 [ 580.239858] ffff8802757d8000 ffffffff818f693c ffffffff818f693c ffff8802757b9048 [ 580.239896] Call Trace: [ 580.239917] [<ffffffff812c1135>] dump_stack+0x67/0x92 [ 580.239939] [<ffffffff8107419d>] warn_slowpath_common+0x7d/0xb0 [ 580.239959] [<ffffffff810742ba>] warn_slowpath_null+0x1a/0x20 [ 580.239981] [<ffffffff813ce579>] describe_obj+0x419/0x440 [ 580.240006] [<ffffffff813ced22>] i915_gem_framebuffer_info+0xa2/0x100 [ 580.240033] [<ffffffff811a9286>] seq_read+0xe6/0x3b0 [ 580.240059] [<ffffffff81182288>] __vfs_read+0x28/0xd0 [ 580.240085] [<ffffffff81173378>] ? SyS_fadvise64+0x228/0x2c0 [ 580.240112] [<ffffffff811823b2>] vfs_read+0x82/0x110 [ 580.240137] [<ffffffff811827d9>] SyS_read+0x49/0xa0 [ 580.240162] [<ffffffff815bac57>] entry_SYSCALL_64_fastpath+0x12/0x6b [ 580.240187] ---[ end trace 3e2cbf34576c9878 ]--- [ 580.281900] ------------[ cut here ]------------ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1459689261-7920-1-git-send-email-chris@chris-wilson.co.uk
2016-04-03 21:14:20 +08:00
lockdep_assert_held(&obj->base.dev->struct_mutex);
drm/i915: Move GEM activity tracking into a common struct reservation_object In preparation to support many distinct timelines, we need to expand the activity tracking on the GEM object to handle more than just a request per engine. We already use the struct reservation_object on the dma-buf to handle many fence contexts, so integrating that into the GEM object itself is the preferred solution. (For example, we can now share the same reservation_object between every consumer/producer using this buffer and skip the manual import/export via dma-buf.) v2: Reimplement busy-ioctl (by walking the reservation object), postpone the ABI change for another day. Similarly use the reservation object to find the last_write request (if active and from i915) for choosing display CS flips. Caveats: * busy-ioctl: busy-ioctl only reports on the native fences, it will not warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is being rendered to by external fences. It also will not report the same busy state as wait-ioctl (or polling on the dma-buf) in the same circumstances. On the plus side, it does retain reporting of which *i915* engines are engaged with this object. * non-blocking atomic modesets take a step backwards as the wait for render completion blocks the ioctl. This is fixed in a subsequent patch to use a fence instead for awaiting on the rendering, see "drm/i915: Restore nonblocking awaits for modesetting" * dynamic array manipulation for shared-fences in reservation is slower than the previous lockless static assignment (e.g. gem_exec_lut_handle runtime on ivb goes from 42s to 66s), mainly due to atomic operations (maintaining the fence refcounts). * loss of object-level retirement callbacks, emulated by VMA retirement tracking. * minor loss of object-level last activity information from debugfs, could be replaced with per-vma information if desired Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-21-chris@chris-wilson.co.uk
2016-10-28 20:58:44 +08:00
seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s",
&obj->base,
get_active_flag(obj),
get_pin_flag(obj),
get_tiling_flag(obj),
get_global_flag(obj),
get_pin_mapped_flag(obj),
obj->base.size / 1024,
obj->read_domains,
obj->write_domain,
i915_cache_level_str(dev_priv, obj->cache_level),
obj->mm.dirty ? " dirty" : "",
obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
if (obj->base.name)
seq_printf(m, " (name: %d)", obj->base.name);
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (i915_vma_is_pinned(vma))
pin_count++;
}
seq_printf(m, " (pinned x %d)", pin_count);
if (obj->pin_global)
seq_printf(m, " (global)");
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
seq_printf(m, " (%sgtt offset: %08llx, size: %08llx, pages: %s",
i915_vma_is_ggtt(vma) ? "g" : "pp",
vma->node.start, vma->node.size,
stringify_page_sizes(vma->page_sizes.gtt, NULL, 0));
if (i915_vma_is_ggtt(vma)) {
switch (vma->ggtt_view.type) {
case I915_GGTT_VIEW_NORMAL:
seq_puts(m, ", normal");
break;
case I915_GGTT_VIEW_PARTIAL:
seq_printf(m, ", partial [%08llx+%x]",
vma->ggtt_view.partial.offset << PAGE_SHIFT,
vma->ggtt_view.partial.size << PAGE_SHIFT);
break;
case I915_GGTT_VIEW_ROTATED:
seq_printf(m, ", rotated [(%ux%u, stride=%u, offset=%u), (%ux%u, stride=%u, offset=%u)]",
vma->ggtt_view.rotated.plane[0].width,
vma->ggtt_view.rotated.plane[0].height,
vma->ggtt_view.rotated.plane[0].stride,
vma->ggtt_view.rotated.plane[0].offset,
vma->ggtt_view.rotated.plane[1].width,
vma->ggtt_view.rotated.plane[1].height,
vma->ggtt_view.rotated.plane[1].stride,
vma->ggtt_view.rotated.plane[1].offset);
break;
default:
MISSING_CASE(vma->ggtt_view.type);
break;
}
}
if (vma->fence)
seq_printf(m, " , fence: %d%s",
vma->fence->id,
i915_gem_active_isset(&vma->last_fence) ? "*" : "");
seq_puts(m, ")");
}
if (obj->stolen)
seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
drm/i915: Move GEM activity tracking into a common struct reservation_object In preparation to support many distinct timelines, we need to expand the activity tracking on the GEM object to handle more than just a request per engine. We already use the struct reservation_object on the dma-buf to handle many fence contexts, so integrating that into the GEM object itself is the preferred solution. (For example, we can now share the same reservation_object between every consumer/producer using this buffer and skip the manual import/export via dma-buf.) v2: Reimplement busy-ioctl (by walking the reservation object), postpone the ABI change for another day. Similarly use the reservation object to find the last_write request (if active and from i915) for choosing display CS flips. Caveats: * busy-ioctl: busy-ioctl only reports on the native fences, it will not warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is being rendered to by external fences. It also will not report the same busy state as wait-ioctl (or polling on the dma-buf) in the same circumstances. On the plus side, it does retain reporting of which *i915* engines are engaged with this object. * non-blocking atomic modesets take a step backwards as the wait for render completion blocks the ioctl. This is fixed in a subsequent patch to use a fence instead for awaiting on the rendering, see "drm/i915: Restore nonblocking awaits for modesetting" * dynamic array manipulation for shared-fences in reservation is slower than the previous lockless static assignment (e.g. gem_exec_lut_handle runtime on ivb goes from 42s to 66s), mainly due to atomic operations (maintaining the fence refcounts). * loss of object-level retirement callbacks, emulated by VMA retirement tracking. * minor loss of object-level last activity information from debugfs, could be replaced with per-vma information if desired Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-21-chris@chris-wilson.co.uk
2016-10-28 20:58:44 +08:00
engine = i915_gem_object_last_write_engine(obj);
if (engine)
seq_printf(m, " (%s)", engine->name);
frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
if (frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
}
static int obj_rank_by_stolen(const void *A, const void *B)
{
const struct drm_i915_gem_object *a =
*(const struct drm_i915_gem_object **)A;
const struct drm_i915_gem_object *b =
*(const struct drm_i915_gem_object **)B;
if (a->stolen->start < b->stolen->start)
return -1;
if (a->stolen->start > b->stolen->start)
return 1;
return 0;
}
static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct drm_i915_gem_object **objects;
struct drm_i915_gem_object *obj;
u64 total_obj_size, total_gtt_size;
unsigned long total, count, n;
int ret;
total = READ_ONCE(dev_priv->mm.object_count);
objects = kvmalloc_array(total, sizeof(*objects), GFP_KERNEL);
if (!objects)
return -ENOMEM;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
goto out;
total_obj_size = total_gtt_size = count = 0;
spin_lock(&dev_priv->mm.obj_lock);
list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
if (count == total)
break;
if (obj->stolen == NULL)
continue;
objects[count++] = obj;
total_obj_size += obj->base.size;
total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
}
list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) {
if (count == total)
break;
if (obj->stolen == NULL)
continue;
objects[count++] = obj;
total_obj_size += obj->base.size;
}
spin_unlock(&dev_priv->mm.obj_lock);
sort(objects, count, sizeof(*objects), obj_rank_by_stolen, NULL);
seq_puts(m, "Stolen:\n");
for (n = 0; n < count; n++) {
seq_puts(m, " ");
describe_obj(m, objects[n]);
seq_putc(m, '\n');
}
seq_printf(m, "Total %lu objects, %llu bytes, %llu GTT size\n",
count, total_obj_size, total_gtt_size);
mutex_unlock(&dev->struct_mutex);
out:
kvfree(objects);
return ret;
}
struct file_stats {
struct drm_i915_file_private *file_priv;
unsigned long count;
u64 total, unbound;
u64 global, shared;
u64 active, inactive;
};
static int per_file_stats(int id, void *ptr, void *data)
{
struct drm_i915_gem_object *obj = ptr;
struct file_stats *stats = data;
struct i915_vma *vma;
lockdep_assert_held(&obj->base.dev->struct_mutex);
stats->count++;
stats->total += obj->base.size;
if (!obj->bind_count)
stats->unbound += obj->base.size;
if (obj->base.name || obj->base.dma_buf)
stats->shared += obj->base.size;
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
if (i915_vma_is_ggtt(vma)) {
stats->global += vma->node.size;
} else {
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm);
if (ppgtt->vm.file != stats->file_priv)
continue;
}
if (i915_vma_is_active(vma))
stats->active += vma->node.size;
else
stats->inactive += vma->node.size;
}
return 0;
}
#define print_file_stats(m, name, stats) do { \
if (stats.count) \
seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound)\n", \
name, \
stats.count, \
stats.total, \
stats.active, \
stats.inactive, \
stats.global, \
stats.shared, \
stats.unbound); \
} while (0)
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
static void print_batch_pool_stats(struct seq_file *m,
struct drm_i915_private *dev_priv)
{
struct drm_i915_gem_object *obj;
struct file_stats stats;
struct intel_engine_cs *engine;
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
enum intel_engine_id id;
int j;
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
memset(&stats, 0, sizeof(stats));
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
for_each_engine(engine, dev_priv, id) {
for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
list_for_each_entry(obj,
&engine->batch_pool.cache_list[j],
batch_pool_link)
per_file_stats(0, obj, &stats);
}
}
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
print_file_stats(m, "[k]batch pool", stats);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
}
static int per_file_ctx_stats(int idx, void *ptr, void *data)
{
struct i915_gem_context *ctx = ptr;
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, ctx->i915, id) {
struct intel_context *ce = to_intel_context(ctx, engine);
if (ce->state)
per_file_stats(0, ce->state->obj, data);
if (ce->ring)
per_file_stats(0, ce->ring->vma->obj, data);
}
return 0;
}
static void print_context_stats(struct seq_file *m,
struct drm_i915_private *dev_priv)
{
struct drm_device *dev = &dev_priv->drm;
struct file_stats stats;
struct drm_file *file;
memset(&stats, 0, sizeof(stats));
mutex_lock(&dev->struct_mutex);
if (dev_priv->kernel_context)
per_file_ctx_stats(0, dev_priv->kernel_context, &stats);
list_for_each_entry(file, &dev->filelist, lhead) {
struct drm_i915_file_private *fpriv = file->driver_priv;
idr_for_each(&fpriv->context_idr, per_file_ctx_stats, &stats);
}
mutex_unlock(&dev->struct_mutex);
print_file_stats(m, "[k]contexts", stats);
}
static int i915_gem_object_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct i915_ggtt *ggtt = &dev_priv->ggtt;
u32 count, mapped_count, purgeable_count, dpy_count, huge_count;
u64 size, mapped_size, purgeable_size, dpy_size, huge_size;
struct drm_i915_gem_object *obj;
unsigned int page_sizes = 0;
struct drm_file *file;
char buf[80];
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
seq_printf(m, "%u objects, %llu bytes\n",
dev_priv->mm.object_count,
dev_priv->mm.object_memory);
size = count = 0;
mapped_size = mapped_count = 0;
purgeable_size = purgeable_count = 0;
huge_size = huge_count = 0;
spin_lock(&dev_priv->mm.obj_lock);
list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) {
size += obj->base.size;
++count;
if (obj->mm.madv == I915_MADV_DONTNEED) {
purgeable_size += obj->base.size;
++purgeable_count;
}
if (obj->mm.mapping) {
mapped_count++;
mapped_size += obj->base.size;
}
if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) {
huge_count++;
huge_size += obj->base.size;
page_sizes |= obj->mm.page_sizes.sg;
}
}
seq_printf(m, "%u unbound objects, %llu bytes\n", count, size);
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 17:40:46 +08:00
size = count = dpy_size = dpy_count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
size += obj->base.size;
++count;
if (obj->pin_global) {
dpy_size += obj->base.size;
++dpy_count;
}
if (obj->mm.madv == I915_MADV_DONTNEED) {
purgeable_size += obj->base.size;
++purgeable_count;
}
if (obj->mm.mapping) {
mapped_count++;
mapped_size += obj->base.size;
}
if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) {
huge_count++;
huge_size += obj->base.size;
page_sizes |= obj->mm.page_sizes.sg;
}
}
spin_unlock(&dev_priv->mm.obj_lock);
seq_printf(m, "%u bound objects, %llu bytes\n",
count, size);
seq_printf(m, "%u purgeable objects, %llu bytes\n",
purgeable_count, purgeable_size);
seq_printf(m, "%u mapped objects, %llu bytes\n",
mapped_count, mapped_size);
seq_printf(m, "%u huge-paged objects (%s) %llu bytes\n",
huge_count,
stringify_page_sizes(page_sizes, buf, sizeof(buf)),
huge_size);
seq_printf(m, "%u display objects (globally pinned), %llu bytes\n",
dpy_count, dpy_size);
seq_printf(m, "%llu [%pa] gtt total\n",
ggtt->vm.total, &ggtt->mappable_end);
seq_printf(m, "Supported page sizes: %s\n",
stringify_page_sizes(INTEL_INFO(dev_priv)->page_sizes,
buf, sizeof(buf)));
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
seq_putc(m, '\n');
print_batch_pool_stats(m, dev_priv);
mutex_unlock(&dev->struct_mutex);
mutex_lock(&dev->filelist_mutex);
print_context_stats(m, dev_priv);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct file_stats stats;
struct drm_i915_file_private *file_priv = file->driver_priv;
struct i915_request *request;
struct task_struct *task;
mutex_lock(&dev->struct_mutex);
memset(&stats, 0, sizeof(stats));
stats.file_priv = file->driver_priv;
drm/i915: Hold the table lock whilst walking the file's idr and counting the objects in debugfs Fixes an issue whereby we may race with the table updates (before the core takes the struct_mutex) and so risk dereferencing a stale pointer in the iterator for /debugfs/.../i915_gem_objects. For example, [ 1524.757545] BUG: unable to handle kernel paging request at f53af748 [ 1524.757572] IP: [<c1406982>] per_file_stats+0x12/0x100 [ 1524.757599] *pdpt = 0000000001b13001 *pde = 00000000379fb067 *pte = 80000000353af060 [ 1524.757621] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 1524.757637] Modules linked in: ctr ccm arc4 ath9k ath9k_common ath9k_hw ath snd_hda_codec_conexant mac80211 snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec bnep snd_hwdep rfcomm snd_pcm gpio_ich dell_wmi sparse_keymap snd_seq_midi hid_multitouch uvcvideo snd_seq_midi_event dell_laptop snd_rawmidi dcdbas snd_seq videobuf2_vmalloc videobuf2_memops videobuf2_core usbhid videodev snd_seq_device coretemp snd_timer hid joydev kvm_intel cfg80211 ath3k kvm btusb bluetooth serio_raw snd microcode soundcore lpc_ich wmi mac_hid parport_pc ppdev lp parport psmouse ahci libahci [ 1524.757825] CPU: 3 PID: 1911 Comm: intel-gpu-overl Tainted: G W OE 3.15.0-rc3+ #96 [ 1524.757840] Hardware name: Dell Inc. Inspiron 1090/Inspiron 1090, BIOS A06 08/23/2011 [ 1524.757855] task: f52f36c0 ti: f4cbc000 task.ti: f4cbc000 [ 1524.757869] EIP: 0060:[<c1406982>] EFLAGS: 00210202 CPU: 3 [ 1524.757884] EIP is at per_file_stats+0x12/0x100 [ 1524.757896] EAX: 0000002d EBX: 00000000 ECX: f4cbdefc EDX: f53af700 [ 1524.757909] ESI: c1406970 EDI: f53af700 EBP: f4cbde6c ESP: f4cbde5c [ 1524.757922] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 1524.757934] CR0: 80050033 CR2: f53af748 CR3: 356af000 CR4: 000007f0 [ 1524.757945] Stack: [ 1524.757957] f4cbdefc 00000000 c1406970 f53af700 f4cbdea8 c12e5f15 f4cbdefc c1406970 [ 1524.757993] 0000ffff f4cbde90 0000002d f5dc5cd0 e4e80438 c1181d59 f4cbded8 f4d89900 [ 1524.758027] f5631b40 e5131074 c1903f37 f4cbdf28 c14068e6 f52648a0 c1927748 c1903f37 [ 1524.758062] Call Trace: [ 1524.758084] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758106] [<c12e5f15>] idr_for_each+0xa5/0x100 [ 1524.758126] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758148] [<c1181d59>] ? seq_vprintf+0x29/0x50 [ 1524.758168] [<c14068e6>] i915_gem_object_info+0x486/0x510 [ 1524.758189] [<c11823a6>] seq_read+0xd6/0x380 [ 1524.758208] [<c116d11d>] ? final_putname+0x1d/0x40 [ 1524.758227] [<c11822d0>] ? seq_hlist_next_percpu+0x90/0x90 [ 1524.758246] [<c1163e52>] vfs_read+0x82/0x150 [ 1524.758265] [<c11645d6>] SyS_read+0x46/0x90 [ 1524.758285] [<c16b8d8c>] sysenter_do_call+0x12/0x22 [ 1524.758298] Code: f5 8f 2a 00 83 c4 6c 31 c0 5b 5e 5f 5d c3 8d 74 26 00 8d bc 27 00 00 00 00 55 89 e5 57 56 53 83 ec 04 3e 8d 74 26 00 83 41 04 01 <8b> 42 48 01 41 08 8b 42 4c 89 d7 85 c0 75 07 8b 42 60 85 c0 74 [ 1524.758461] EIP: [<c1406982>] per_file_stats+0x12/0x100 SS:ESP 0068:f4cbde5c [ 1524.758485] CR2: 00000000f53af748 Reported-by: Sam Jansen <sam.jansen@starleaf.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Sam Jansen <sam.jansen@starleaf.com> Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Jani Nikula <jani.nikula@intel.com>
2014-06-17 16:56:24 +08:00
spin_lock(&file->table_lock);
idr_for_each(&file->object_idr, per_file_stats, &stats);
drm/i915: Hold the table lock whilst walking the file's idr and counting the objects in debugfs Fixes an issue whereby we may race with the table updates (before the core takes the struct_mutex) and so risk dereferencing a stale pointer in the iterator for /debugfs/.../i915_gem_objects. For example, [ 1524.757545] BUG: unable to handle kernel paging request at f53af748 [ 1524.757572] IP: [<c1406982>] per_file_stats+0x12/0x100 [ 1524.757599] *pdpt = 0000000001b13001 *pde = 00000000379fb067 *pte = 80000000353af060 [ 1524.757621] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 1524.757637] Modules linked in: ctr ccm arc4 ath9k ath9k_common ath9k_hw ath snd_hda_codec_conexant mac80211 snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec bnep snd_hwdep rfcomm snd_pcm gpio_ich dell_wmi sparse_keymap snd_seq_midi hid_multitouch uvcvideo snd_seq_midi_event dell_laptop snd_rawmidi dcdbas snd_seq videobuf2_vmalloc videobuf2_memops videobuf2_core usbhid videodev snd_seq_device coretemp snd_timer hid joydev kvm_intel cfg80211 ath3k kvm btusb bluetooth serio_raw snd microcode soundcore lpc_ich wmi mac_hid parport_pc ppdev lp parport psmouse ahci libahci [ 1524.757825] CPU: 3 PID: 1911 Comm: intel-gpu-overl Tainted: G W OE 3.15.0-rc3+ #96 [ 1524.757840] Hardware name: Dell Inc. Inspiron 1090/Inspiron 1090, BIOS A06 08/23/2011 [ 1524.757855] task: f52f36c0 ti: f4cbc000 task.ti: f4cbc000 [ 1524.757869] EIP: 0060:[<c1406982>] EFLAGS: 00210202 CPU: 3 [ 1524.757884] EIP is at per_file_stats+0x12/0x100 [ 1524.757896] EAX: 0000002d EBX: 00000000 ECX: f4cbdefc EDX: f53af700 [ 1524.757909] ESI: c1406970 EDI: f53af700 EBP: f4cbde6c ESP: f4cbde5c [ 1524.757922] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 1524.757934] CR0: 80050033 CR2: f53af748 CR3: 356af000 CR4: 000007f0 [ 1524.757945] Stack: [ 1524.757957] f4cbdefc 00000000 c1406970 f53af700 f4cbdea8 c12e5f15 f4cbdefc c1406970 [ 1524.757993] 0000ffff f4cbde90 0000002d f5dc5cd0 e4e80438 c1181d59 f4cbded8 f4d89900 [ 1524.758027] f5631b40 e5131074 c1903f37 f4cbdf28 c14068e6 f52648a0 c1927748 c1903f37 [ 1524.758062] Call Trace: [ 1524.758084] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758106] [<c12e5f15>] idr_for_each+0xa5/0x100 [ 1524.758126] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758148] [<c1181d59>] ? seq_vprintf+0x29/0x50 [ 1524.758168] [<c14068e6>] i915_gem_object_info+0x486/0x510 [ 1524.758189] [<c11823a6>] seq_read+0xd6/0x380 [ 1524.758208] [<c116d11d>] ? final_putname+0x1d/0x40 [ 1524.758227] [<c11822d0>] ? seq_hlist_next_percpu+0x90/0x90 [ 1524.758246] [<c1163e52>] vfs_read+0x82/0x150 [ 1524.758265] [<c11645d6>] SyS_read+0x46/0x90 [ 1524.758285] [<c16b8d8c>] sysenter_do_call+0x12/0x22 [ 1524.758298] Code: f5 8f 2a 00 83 c4 6c 31 c0 5b 5e 5f 5d c3 8d 74 26 00 8d bc 27 00 00 00 00 55 89 e5 57 56 53 83 ec 04 3e 8d 74 26 00 83 41 04 01 <8b> 42 48 01 41 08 8b 42 4c 89 d7 85 c0 75 07 8b 42 60 85 c0 74 [ 1524.758461] EIP: [<c1406982>] per_file_stats+0x12/0x100 SS:ESP 0068:f4cbde5c [ 1524.758485] CR2: 00000000f53af748 Reported-by: Sam Jansen <sam.jansen@starleaf.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Sam Jansen <sam.jansen@starleaf.com> Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Jani Nikula <jani.nikula@intel.com>
2014-06-17 16:56:24 +08:00
spin_unlock(&file->table_lock);
/*
* Although we have a valid reference on file->pid, that does
* not guarantee that the task_struct who called get_pid() is
* still alive (e.g. get_pid(current) => fork() => exit()).
* Therefore, we need to protect this ->comm access using RCU.
*/
request = list_first_entry_or_null(&file_priv->mm.request_list,
struct i915_request,
client_link);
rcu_read_lock();
task = pid_task(request && request->gem_context->pid ?
request->gem_context->pid : file->pid,
PIDTYPE_PID);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
print_file_stats(m, task ? task->comm : "<unknown>", stats);
rcu_read_unlock();
mutex_unlock(&dev->struct_mutex);
}
mutex_unlock(&dev->filelist_mutex);
return 0;
}
static int i915_gem_gtt_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_i915_private *dev_priv = node_to_i915(node);
struct drm_device *dev = &dev_priv->drm;
struct drm_i915_gem_object **objects;
struct drm_i915_gem_object *obj;
u64 total_obj_size, total_gtt_size;
unsigned long nobject, n;
int count, ret;
nobject = READ_ONCE(dev_priv->mm.object_count);
objects = kvmalloc_array(nobject, sizeof(*objects), GFP_KERNEL);
if (!objects)
return -ENOMEM;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
count = 0;
spin_lock(&dev_priv->mm.obj_lock);
list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
objects[count++] = obj;
if (count == nobject)
break;
}
spin_unlock(&dev_priv->mm.obj_lock);
total_obj_size = total_gtt_size = 0;
for (n = 0; n < count; n++) {
obj = objects[n];
seq_puts(m, " ");
describe_obj(m, obj);
seq_putc(m, '\n');
total_obj_size += obj->base.size;
total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
}
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
count, total_obj_size, total_gtt_size);
kvfree(objects);
return 0;
}
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
struct drm_i915_gem_object *obj;
struct intel_engine_cs *engine;
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
enum intel_engine_id id;
int total = 0;
int ret, j;
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
for_each_engine(engine, dev_priv, id) {
for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
int count;
count = 0;
list_for_each_entry(obj,
&engine->batch_pool.cache_list[j],
batch_pool_link)
count++;
seq_printf(m, "%s cache[%d]: %d objects\n",
engine->name, j, count);
list_for_each_entry(obj,
&engine->batch_pool.cache_list[j],
batch_pool_link) {
seq_puts(m, " ");
describe_obj(m, obj);
seq_putc(m, '\n');
}
total += count;
}
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
}
seq_printf(m, "total: %d\n", total);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
mutex_unlock(&dev->struct_mutex);
return 0;
}
static void gen8_display_interrupt_info(struct seq_file *m)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
int pipe;
for_each_pipe(dev_priv, pipe) {
enum intel_display_power_domain power_domain;
power_domain = POWER_DOMAIN_PIPE(pipe);
if (!intel_display_power_get_if_enabled(dev_priv,
power_domain)) {
seq_printf(m, "Pipe %c power disabled\n",
pipe_name(pipe));
continue;
}
seq_printf(m, "Pipe %c IMR:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IMR(pipe)));
seq_printf(m, "Pipe %c IIR:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IIR(pipe)));
seq_printf(m, "Pipe %c IER:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IER(pipe)));
intel_display_power_put(dev_priv, power_domain);
}
seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
I915_READ(GEN8_DE_PORT_IMR));
seq_printf(m, "Display Engine port interrupt identity:\t%08x\n",
I915_READ(GEN8_DE_PORT_IIR));
seq_printf(m, "Display Engine port interrupt enable:\t%08x\n",
I915_READ(GEN8_DE_PORT_IER));
seq_printf(m, "Display Engine misc interrupt mask:\t%08x\n",
I915_READ(GEN8_DE_MISC_IMR));
seq_printf(m, "Display Engine misc interrupt identity:\t%08x\n",
I915_READ(GEN8_DE_MISC_IIR));
seq_printf(m, "Display Engine misc interrupt enable:\t%08x\n",
I915_READ(GEN8_DE_MISC_IER));
seq_printf(m, "PCU interrupt mask:\t%08x\n",
I915_READ(GEN8_PCU_IMR));
seq_printf(m, "PCU interrupt identity:\t%08x\n",
I915_READ(GEN8_PCU_IIR));
seq_printf(m, "PCU interrupt enable:\t%08x\n",
I915_READ(GEN8_PCU_IER));
}
static int i915_interrupt_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct intel_engine_cs *engine;
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
enum intel_engine_id id;
int i, pipe;
intel_runtime_pm_get(dev_priv);
if (IS_CHERRYVIEW(dev_priv)) {
seq_printf(m, "Master Interrupt Control:\t%08x\n",
I915_READ(GEN8_MASTER_IRQ));
seq_printf(m, "Display IER:\t%08x\n",
I915_READ(VLV_IER));
seq_printf(m, "Display IIR:\t%08x\n",
I915_READ(VLV_IIR));
seq_printf(m, "Display IIR_RW:\t%08x\n",
I915_READ(VLV_IIR_RW));
seq_printf(m, "Display IMR:\t%08x\n",
I915_READ(VLV_IMR));
for_each_pipe(dev_priv, pipe) {
enum intel_display_power_domain power_domain;
power_domain = POWER_DOMAIN_PIPE(pipe);
if (!intel_display_power_get_if_enabled(dev_priv,
power_domain)) {
seq_printf(m, "Pipe %c power disabled\n",
pipe_name(pipe));
continue;
}
seq_printf(m, "Pipe %c stat:\t%08x\n",
pipe_name(pipe),
I915_READ(PIPESTAT(pipe)));
intel_display_power_put(dev_priv, power_domain);
}
intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
seq_printf(m, "Port hotplug:\t%08x\n",
I915_READ(PORT_HOTPLUG_EN));
seq_printf(m, "DPFLIPSTAT:\t%08x\n",
I915_READ(VLV_DPFLIPSTAT));
seq_printf(m, "DPINVGTT:\t%08x\n",
I915_READ(DPINVGTT));
intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
for (i = 0; i < 4; i++) {
seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IMR(i)));
seq_printf(m, "GT Interrupt IIR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IIR(i)));
seq_printf(m, "GT Interrupt IER %d:\t%08x\n",
i, I915_READ(GEN8_GT_IER(i)));
}
seq_printf(m, "PCU interrupt mask:\t%08x\n",
I915_READ(GEN8_PCU_IMR));
seq_printf(m, "PCU interrupt identity:\t%08x\n",
I915_READ(GEN8_PCU_IIR));
seq_printf(m, "PCU interrupt enable:\t%08x\n",
I915_READ(GEN8_PCU_IER));
} else if (INTEL_GEN(dev_priv) >= 11) {
seq_printf(m, "Master Interrupt Control: %08x\n",
I915_READ(GEN11_GFX_MSTR_IRQ));
seq_printf(m, "Render/Copy Intr Enable: %08x\n",
I915_READ(GEN11_RENDER_COPY_INTR_ENABLE));
seq_printf(m, "VCS/VECS Intr Enable: %08x\n",
I915_READ(GEN11_VCS_VECS_INTR_ENABLE));
seq_printf(m, "GUC/SG Intr Enable:\t %08x\n",
I915_READ(GEN11_GUC_SG_INTR_ENABLE));
seq_printf(m, "GPM/WGBOXPERF Intr Enable: %08x\n",
I915_READ(GEN11_GPM_WGBOXPERF_INTR_ENABLE));
seq_printf(m, "Crypto Intr Enable:\t %08x\n",
I915_READ(GEN11_CRYPTO_RSVD_INTR_ENABLE));
seq_printf(m, "GUnit/CSME Intr Enable:\t %08x\n",
I915_READ(GEN11_GUNIT_CSME_INTR_ENABLE));
seq_printf(m, "Display Interrupt Control:\t%08x\n",
I915_READ(GEN11_DISPLAY_INT_CTL));
gen8_display_interrupt_info(m);
} else if (INTEL_GEN(dev_priv) >= 8) {
seq_printf(m, "Master Interrupt Control:\t%08x\n",
I915_READ(GEN8_MASTER_IRQ));
for (i = 0; i < 4; i++) {
seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IMR(i)));
seq_printf(m, "GT Interrupt IIR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IIR(i)));
seq_printf(m, "GT Interrupt IER %d:\t%08x\n",
i, I915_READ(GEN8_GT_IER(i)));
}
gen8_display_interrupt_info(m);
} else if (IS_VALLEYVIEW(dev_priv)) {
seq_printf(m, "Display IER:\t%08x\n",
I915_READ(VLV_IER));
seq_printf(m, "Display IIR:\t%08x\n",
I915_READ(VLV_IIR));
seq_printf(m, "Display IIR_RW:\t%08x\n",
I915_READ(VLV_IIR_RW));
seq_printf(m, "Display IMR:\t%08x\n",
I915_READ(VLV_IMR));
drm/i915/byt: Take powerwell for reading PIPESTAT in debugfs [12493.693827] WARNING: CPU: 1 PID: 14860 at drivers/gpu/drm/i915/intel_uncore.c:795 __unclaimed_reg_debug+0x5d/0x80 [i915] [12493.693868] Unclaimed read from register 0x1f0024 [12493.693905] Modules linked in: vgem i915 drm_kms_helper drm intel_gtt i2c_algo_bit syscopyarea sysfillrect sysimgblt fb_sys_fops prime_numbers intel_powerclamp crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel cryptd lpc_ich i2c_i801 mfd_core video i2c_designware_platform i2c_designware_core i2c_core button autofs4 sd_mod ahci libahci libata scsi_mod [last unloaded: i915] [12493.694039] CPU: 1 PID: 14860 Comm: intel-gpu-overl Tainted: G U 4.10.0-rc7+ #11 [12493.694079] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F8 03/02/2016 [12493.694121] Call Trace: [12493.694169] dump_stack+0x67/0x9d [12493.694235] __warn+0x117/0x140 [12493.694288] warn_slowpath_fmt+0x4f/0x60 [12493.694344] ? do_raw_spin_lock+0x116/0x180 [12493.694533] ? check_for_unclaimed_mmio+0x98/0xe0 [i915] [12493.694727] __unclaimed_reg_debug+0x5d/0x80 [i915] [12493.694923] fwtable_read32+0x2c5/0x330 [i915] [12493.695108] i915_interrupt_info+0xd52/0xf80 [i915] [12493.695302] ? gen6_write16+0x310/0x310 [i915] [12493.695357] seq_read+0x187/0x710 [12493.695412] full_proxy_read+0x75/0xc0 [12493.695472] __vfs_read+0x5a/0x220 [12493.695524] ? kmem_cache_free+0x6c/0x260 [12493.695577] ? putname+0x97/0xa0 [12493.695629] ? putname+0x97/0xa0 [12493.695682] ? rcu_read_lock_sched_held+0xb8/0xd0 [12493.695735] ? rw_verify_area+0x65/0x140 [12493.695787] vfs_read+0xd1/0x1f0 [12493.695840] SyS_read+0x62/0xc0 [12493.695893] entry_SYSCALL_64_fastpath+0x1c/0xb1 [12493.695943] RIP: 0033:0x7f82dca99ba0 [12493.695985] RSP: 002b:00007ffc0bdfd4f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [12493.696031] RAX: ffffffffffffffda RBX: 00007ffc0be005a0 RCX: 00007f82dca99ba0 [12493.696073] RDX: 0000000000001fff RSI: 00007ffc0bdfd500 RDI: 000000000000001a [12493.696115] RBP: ffffffff810fb639 R08: 302f6972642f6775 R09: 00007f82dca0999a [12493.696157] R10: 00007f82dcd62760 R11: 0000000000000246 R12: ffff880069a17f98 [12493.696199] R13: 00007ffc0bdfd428 R14: 0000000000000003 R15: 00007ffc0bdfd428 [12493.696250] ? trace_hardirqs_off_caller+0xd9/0x130 [12493.696300] ---[ end trace 52ccf4d39793cc59 ]--- Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99761 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/20170210133632.16946-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
2017-02-10 21:36:32 +08:00
for_each_pipe(dev_priv, pipe) {
enum intel_display_power_domain power_domain;
power_domain = POWER_DOMAIN_PIPE(pipe);
if (!intel_display_power_get_if_enabled(dev_priv,
power_domain)) {
seq_printf(m, "Pipe %c power disabled\n",
pipe_name(pipe));
continue;
}
seq_printf(m, "Pipe %c stat:\t%08x\n",
pipe_name(pipe),
I915_READ(PIPESTAT(pipe)));
drm/i915/byt: Take powerwell for reading PIPESTAT in debugfs [12493.693827] WARNING: CPU: 1 PID: 14860 at drivers/gpu/drm/i915/intel_uncore.c:795 __unclaimed_reg_debug+0x5d/0x80 [i915] [12493.693868] Unclaimed read from register 0x1f0024 [12493.693905] Modules linked in: vgem i915 drm_kms_helper drm intel_gtt i2c_algo_bit syscopyarea sysfillrect sysimgblt fb_sys_fops prime_numbers intel_powerclamp crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel cryptd lpc_ich i2c_i801 mfd_core video i2c_designware_platform i2c_designware_core i2c_core button autofs4 sd_mod ahci libahci libata scsi_mod [last unloaded: i915] [12493.694039] CPU: 1 PID: 14860 Comm: intel-gpu-overl Tainted: G U 4.10.0-rc7+ #11 [12493.694079] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F8 03/02/2016 [12493.694121] Call Trace: [12493.694169] dump_stack+0x67/0x9d [12493.694235] __warn+0x117/0x140 [12493.694288] warn_slowpath_fmt+0x4f/0x60 [12493.694344] ? do_raw_spin_lock+0x116/0x180 [12493.694533] ? check_for_unclaimed_mmio+0x98/0xe0 [i915] [12493.694727] __unclaimed_reg_debug+0x5d/0x80 [i915] [12493.694923] fwtable_read32+0x2c5/0x330 [i915] [12493.695108] i915_interrupt_info+0xd52/0xf80 [i915] [12493.695302] ? gen6_write16+0x310/0x310 [i915] [12493.695357] seq_read+0x187/0x710 [12493.695412] full_proxy_read+0x75/0xc0 [12493.695472] __vfs_read+0x5a/0x220 [12493.695524] ? kmem_cache_free+0x6c/0x260 [12493.695577] ? putname+0x97/0xa0 [12493.695629] ? putname+0x97/0xa0 [12493.695682] ? rcu_read_lock_sched_held+0xb8/0xd0 [12493.695735] ? rw_verify_area+0x65/0x140 [12493.695787] vfs_read+0xd1/0x1f0 [12493.695840] SyS_read+0x62/0xc0 [12493.695893] entry_SYSCALL_64_fastpath+0x1c/0xb1 [12493.695943] RIP: 0033:0x7f82dca99ba0 [12493.695985] RSP: 002b:00007ffc0bdfd4f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [12493.696031] RAX: ffffffffffffffda RBX: 00007ffc0be005a0 RCX: 00007f82dca99ba0 [12493.696073] RDX: 0000000000001fff RSI: 00007ffc0bdfd500 RDI: 000000000000001a [12493.696115] RBP: ffffffff810fb639 R08: 302f6972642f6775 R09: 00007f82dca0999a [12493.696157] R10: 00007f82dcd62760 R11: 0000000000000246 R12: ffff880069a17f98 [12493.696199] R13: 00007ffc0bdfd428 R14: 0000000000000003 R15: 00007ffc0bdfd428 [12493.696250] ? trace_hardirqs_off_caller+0xd9/0x130 [12493.696300] ---[ end trace 52ccf4d39793cc59 ]--- Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99761 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/20170210133632.16946-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
2017-02-10 21:36:32 +08:00
intel_display_power_put(dev_priv, power_domain);
}
seq_printf(m, "Master IER:\t%08x\n",
I915_READ(VLV_MASTER_IER));
seq_printf(m, "Render IER:\t%08x\n",
I915_READ(GTIER));
seq_printf(m, "Render IIR:\t%08x\n",
I915_READ(GTIIR));
seq_printf(m, "Render IMR:\t%08x\n",
I915_READ(GTIMR));
seq_printf(m, "PM IER:\t\t%08x\n",
I915_READ(GEN6_PMIER));
seq_printf(m, "PM IIR:\t\t%08x\n",
I915_READ(GEN6_PMIIR));
seq_printf(m, "PM IMR:\t\t%08x\n",
I915_READ(GEN6_PMIMR));
seq_printf(m, "Port hotplug:\t%08x\n",
I915_READ(PORT_HOTPLUG_EN));
seq_printf(m, "DPFLIPSTAT:\t%08x\n",
I915_READ(VLV_DPFLIPSTAT));
seq_printf(m, "DPINVGTT:\t%08x\n",
I915_READ(DPINVGTT));
} else if (!HAS_PCH_SPLIT(dev_priv)) {
seq_printf(m, "Interrupt enable: %08x\n",
I915_READ(IER));
seq_printf(m, "Interrupt identity: %08x\n",
I915_READ(IIR));
seq_printf(m, "Interrupt mask: %08x\n",
I915_READ(IMR));
for_each_pipe(dev_priv, pipe)
seq_printf(m, "Pipe %c stat: %08x\n",
pipe_name(pipe),
I915_READ(PIPESTAT(pipe)));
} else {
seq_printf(m, "North Display Interrupt enable: %08x\n",
I915_READ(DEIER));
seq_printf(m, "North Display Interrupt identity: %08x\n",
I915_READ(DEIIR));
seq_printf(m, "North Display Interrupt mask: %08x\n",
I915_READ(DEIMR));
seq_printf(m, "South Display Interrupt enable: %08x\n",
I915_READ(SDEIER));
seq_printf(m, "South Display Interrupt identity: %08x\n",
I915_READ(SDEIIR));
seq_printf(m, "South Display Interrupt mask: %08x\n",
I915_READ(SDEIMR));
seq_printf(m, "Graphics Interrupt enable: %08x\n",
I915_READ(GTIER));
seq_printf(m, "Graphics Interrupt identity: %08x\n",
I915_READ(GTIIR));
seq_printf(m, "Graphics Interrupt mask: %08x\n",
I915_READ(GTIMR));
}
if (INTEL_GEN(dev_priv) >= 11) {
seq_printf(m, "RCS Intr Mask:\t %08x\n",
I915_READ(GEN11_RCS0_RSVD_INTR_MASK));
seq_printf(m, "BCS Intr Mask:\t %08x\n",
I915_READ(GEN11_BCS_RSVD_INTR_MASK));
seq_printf(m, "VCS0/VCS1 Intr Mask:\t %08x\n",
I915_READ(GEN11_VCS0_VCS1_INTR_MASK));
seq_printf(m, "VCS2/VCS3 Intr Mask:\t %08x\n",
I915_READ(GEN11_VCS2_VCS3_INTR_MASK));
seq_printf(m, "VECS0/VECS1 Intr Mask:\t %08x\n",
I915_READ(GEN11_VECS0_VECS1_INTR_MASK));
seq_printf(m, "GUC/SG Intr Mask:\t %08x\n",
I915_READ(GEN11_GUC_SG_INTR_MASK));
seq_printf(m, "GPM/WGBOXPERF Intr Mask: %08x\n",
I915_READ(GEN11_GPM_WGBOXPERF_INTR_MASK));
seq_printf(m, "Crypto Intr Mask:\t %08x\n",
I915_READ(GEN11_CRYPTO_RSVD_INTR_MASK));
seq_printf(m, "Gunit/CSME Intr Mask:\t %08x\n",
I915_READ(GEN11_GUNIT_CSME_INTR_MASK));
} else if (INTEL_GEN(dev_priv) >= 6) {
for_each_engine(engine, dev_priv, id) {
seq_printf(m,
"Graphics Interrupt mask (%s): %08x\n",
engine->name, I915_READ_IMR(engine));
}
}
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
int i, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
for (i = 0; i < dev_priv->num_fence_regs; i++) {
struct i915_vma *vma = dev_priv->fence_regs[i].vma;
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 17:40:46 +08:00
seq_printf(m, "Fence %d, pin count = %d, object = ",
i, dev_priv->fence_regs[i].pin_count);
if (!vma)
seq_puts(m, "unused");
else
describe_obj(m, vma->obj);
seq_putc(m, '\n');
}
mutex_unlock(&dev->struct_mutex);
return 0;
}
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
size_t count, loff_t *pos)
{
struct i915_gpu_state *error = file->private_data;
struct drm_i915_error_state_buf str;
ssize_t ret;
loff_t tmp;
if (!error)
return 0;
ret = i915_error_state_buf_init(&str, error->i915, count, *pos);
if (ret)
return ret;
ret = i915_error_state_to_str(&str, error);
if (ret)
goto out;
tmp = 0;
ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes);
if (ret < 0)
goto out;
*pos = str.start + ret;
out:
i915_error_state_buf_release(&str);
return ret;
}
static int gpu_state_release(struct inode *inode, struct file *file)
{
i915_gpu_state_put(file->private_data);
return 0;
}
static int i915_gpu_info_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
struct i915_gpu_state *gpu;
intel_runtime_pm_get(i915);
gpu = i915_capture_gpu_state(i915);
intel_runtime_pm_put(i915);
if (!gpu)
return -ENOMEM;
file->private_data = gpu;
return 0;
}
static const struct file_operations i915_gpu_info_fops = {
.owner = THIS_MODULE,
.open = i915_gpu_info_open,
.read = gpu_state_read,
.llseek = default_llseek,
.release = gpu_state_release,
};
static ssize_t
i915_error_state_write(struct file *filp,
const char __user *ubuf,
size_t cnt,
loff_t *ppos)
{
struct i915_gpu_state *error = filp->private_data;
if (!error)
return 0;
DRM_DEBUG_DRIVER("Resetting error state\n");
i915_reset_error_state(error->i915);
return cnt;
}
static int i915_error_state_open(struct inode *inode, struct file *file)
{
file->private_data = i915_first_error_state(inode->i_private);
return 0;
}
static const struct file_operations i915_error_state_fops = {
.owner = THIS_MODULE,
.open = i915_error_state_open,
.read = gpu_state_read,
.write = i915_error_state_write,
.llseek = default_llseek,
.release = gpu_state_release,
};
#endif
static int
i915_next_seqno_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
struct drm_device *dev = &dev_priv->drm;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
ret = i915_gem_set_global_seqno(dev, val);
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
NULL, i915_next_seqno_set,
"0x%llx\n");
static int i915_frequency_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct intel_rps *rps = &dev_priv->gt_pm.rps;
int ret = 0;
intel_runtime_pm_get(dev_priv);
if (IS_GEN5(dev_priv)) {
u16 rgvswctl = I915_READ16(MEMSWCTL);
u16 rgvstat = I915_READ16(MEMSTAT_ILK);
seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f);
seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >>
MEMSTAT_VID_SHIFT);
seq_printf(m, "Current P-state: %d\n",
(rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
u32 rpmodectl, freq_sts;
mutex_lock(&dev_priv->pcu_lock);
rpmodectl = I915_READ(GEN6_RP_CONTROL);
seq_printf(m, "Video Turbo Mode: %s\n",
yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
seq_printf(m, "HW control enabled: %s\n",
yesno(rpmodectl & GEN6_RP_ENABLE));
seq_printf(m, "SW control enabled: %s\n",
yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
GEN6_RP_MEDIA_SW_MODE));
freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq);
seq_printf(m, "actual GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff));
seq_printf(m, "current GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, rps->cur_freq));
seq_printf(m, "max GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, rps->max_freq));
seq_printf(m, "min GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, rps->min_freq));
seq_printf(m, "idle GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, rps->idle_freq));
seq_printf(m,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(dev_priv, rps->efficient_freq));
mutex_unlock(&dev_priv->pcu_lock);
} else if (INTEL_GEN(dev_priv) >= 6) {
u32 rp_state_limits;
u32 gt_perf_status;
u32 rp_state_cap;
u32 rpmodectl, rpinclimit, rpdeclimit;
u32 rpstat, cagf, reqf;
u32 rpupei, rpcurup, rpprevup;
u32 rpdownei, rpcurdown, rpprevdown;
u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
int max_freq;
rp_state_limits = I915_READ(GEN6_RP_STATE_LIMITS);
if (IS_GEN9_LP(dev_priv)) {
rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
gt_perf_status = I915_READ(BXT_GT_PERF_STATUS);
} else {
rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
}
/* RPSTAT1 is in the GT power well */
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
reqf = I915_READ(GEN6_RPNSWREQ);
if (INTEL_GEN(dev_priv) >= 9)
reqf >>= 23;
else {
reqf &= ~GEN6_TURBO_DISABLE;
if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
reqf >>= 24;
else
reqf >>= 25;
}
2015-01-24 03:04:26 +08:00
reqf = intel_gpu_freq(dev_priv, reqf);
rpmodectl = I915_READ(GEN6_RP_CONTROL);
rpinclimit = I915_READ(GEN6_RP_UP_THRESHOLD);
rpdeclimit = I915_READ(GEN6_RP_DOWN_THRESHOLD);
rpstat = I915_READ(GEN6_RPSTAT1);
rpupei = I915_READ(GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
rpcurup = I915_READ(GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
rpprevup = I915_READ(GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
cagf = intel_gpu_freq(dev_priv,
intel_get_cagf(dev_priv, rpstat));
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
if (INTEL_GEN(dev_priv) >= 11) {
pm_ier = I915_READ(GEN11_GPM_WGBOXPERF_INTR_ENABLE);
pm_imr = I915_READ(GEN11_GPM_WGBOXPERF_INTR_MASK);
/*
* The equivalent to the PM ISR & IIR cannot be read
* without affecting the current state of the system
*/
pm_isr = 0;
pm_iir = 0;
} else if (INTEL_GEN(dev_priv) >= 8) {
pm_ier = I915_READ(GEN8_GT_IER(2));
pm_imr = I915_READ(GEN8_GT_IMR(2));
pm_isr = I915_READ(GEN8_GT_ISR(2));
pm_iir = I915_READ(GEN8_GT_IIR(2));
} else {
pm_ier = I915_READ(GEN6_PMIER);
pm_imr = I915_READ(GEN6_PMIMR);
pm_isr = I915_READ(GEN6_PMISR);
pm_iir = I915_READ(GEN6_PMIIR);
}
pm_mask = I915_READ(GEN6_PMINTRMSK);
seq_printf(m, "Video Turbo Mode: %s\n",
yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
seq_printf(m, "HW control enabled: %s\n",
yesno(rpmodectl & GEN6_RP_ENABLE));
seq_printf(m, "SW control enabled: %s\n",
yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
GEN6_RP_MEDIA_SW_MODE));
seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
pm_ier, pm_imr, pm_mask);
if (INTEL_GEN(dev_priv) <= 10)
seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n",
pm_isr, pm_iir);
seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n",
rps->pm_intrmsk_mbz);
seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
seq_printf(m, "Render p-state ratio: %d\n",
(gt_perf_status & (INTEL_GEN(dev_priv) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
seq_printf(m, "Render p-state VID: %d\n",
gt_perf_status & 0xff);
seq_printf(m, "Render p-state limit: %d\n",
rp_state_limits & 0xff);
seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl);
seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit);
seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
seq_printf(m, "CAGF: %dMHz\n", cagf);
seq_printf(m, "RP CUR UP EI: %d (%dus)\n",
rpupei, GT_PM_INTERVAL_TO_US(dev_priv, rpupei));
seq_printf(m, "RP CUR UP: %d (%dus)\n",
rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup));
seq_printf(m, "RP PREV UP: %d (%dus)\n",
rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup));
drm/i915: Interactive RPS mode RPS provides a feedback loop where we use the load during the previous evaluation interval to decide whether to up or down clock the GPU frequency. Our responsiveness is split into 3 regimes, a high and low plateau with the intent to keep the gpu clocked high to cover occasional stalls under high load, and low despite occasional glitches under steady low load, and inbetween. However, we run into situations like kodi where we want to stay at low power (video decoding is done efficiently inside the fixed function HW and doesn't need high clocks even for high bitrate streams), but just occasionally the pipeline is more complex than a video decode and we need a smidgen of extra GPU power to present on time. In the high power regime, we sample at sub frame intervals with a bias to upclocking, and conversely at low power we sample over a few frames worth to provide what we consider to be the right levels of responsiveness respectively. At low power, we more or less expect to be kicked out to high power at the start of a busy sequence by waitboosting. Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") whenever we missed the frame or stalled, we would immediate go full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we relaxed the waitboosting to only apply if the pipeline was deep to avoid over-committing resources for a near miss. Sadly though, a near miss is still a miss, and perceptible as jitter in the frame delivery. To try and prevent the near miss before having to resort to boosting after the fact, we use the pageflip queue as an indication that we are in an "interactive" regime and so should sample the load more frequently to provide power before the frame misses it vblank. This will make us more favorable to providing a small power increase (one or two bins) as required rather than going all the way to maximum and then having to work back down again. (We still keep the waitboosting mechanism around just in case a dramatic change in system load requires urgent uplocking, faster than we can provide in a few evaluation intervals.) v2: Reduce rps_set_interactive to a boolean parameter to avoid the confusion of what if they wanted a new power mode after pinning to a different mode (which to choose?) v3: Only reprogram RPS while the GT is awake, it will be set when we wake the GT, and while off warns about being used outside of rpm. v4: Fix deferred application of interactive mode v5: s/state/interactive/ v6: Group the mutex with its principle in a substruct Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107111 Fixes: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180731132629.3381-1-chris@chris-wilson.co.uk
2018-07-31 21:26:29 +08:00
seq_printf(m, "Up threshold: %d%%\n",
rps->power.up_threshold);
seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei));
seq_printf(m, "RP CUR DOWN: %d (%dus)\n",
rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown));
seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown));
drm/i915: Interactive RPS mode RPS provides a feedback loop where we use the load during the previous evaluation interval to decide whether to up or down clock the GPU frequency. Our responsiveness is split into 3 regimes, a high and low plateau with the intent to keep the gpu clocked high to cover occasional stalls under high load, and low despite occasional glitches under steady low load, and inbetween. However, we run into situations like kodi where we want to stay at low power (video decoding is done efficiently inside the fixed function HW and doesn't need high clocks even for high bitrate streams), but just occasionally the pipeline is more complex than a video decode and we need a smidgen of extra GPU power to present on time. In the high power regime, we sample at sub frame intervals with a bias to upclocking, and conversely at low power we sample over a few frames worth to provide what we consider to be the right levels of responsiveness respectively. At low power, we more or less expect to be kicked out to high power at the start of a busy sequence by waitboosting. Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") whenever we missed the frame or stalled, we would immediate go full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we relaxed the waitboosting to only apply if the pipeline was deep to avoid over-committing resources for a near miss. Sadly though, a near miss is still a miss, and perceptible as jitter in the frame delivery. To try and prevent the near miss before having to resort to boosting after the fact, we use the pageflip queue as an indication that we are in an "interactive" regime and so should sample the load more frequently to provide power before the frame misses it vblank. This will make us more favorable to providing a small power increase (one or two bins) as required rather than going all the way to maximum and then having to work back down again. (We still keep the waitboosting mechanism around just in case a dramatic change in system load requires urgent uplocking, faster than we can provide in a few evaluation intervals.) v2: Reduce rps_set_interactive to a boolean parameter to avoid the confusion of what if they wanted a new power mode after pinning to a different mode (which to choose?) v3: Only reprogram RPS while the GT is awake, it will be set when we wake the GT, and while off warns about being used outside of rpm. v4: Fix deferred application of interactive mode v5: s/state/interactive/ v6: Group the mutex with its principle in a substruct Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107111 Fixes: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180731132629.3381-1-chris@chris-wilson.co.uk
2018-07-31 21:26:29 +08:00
seq_printf(m, "Down threshold: %d%%\n",
rps->power.down_threshold);
max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 :
rp_state_cap >> 16) & 0xff;
max_freq *= (IS_GEN9_BC(dev_priv) ||
INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1);
seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
2015-01-24 03:04:26 +08:00
intel_gpu_freq(dev_priv, max_freq));
max_freq = (rp_state_cap & 0xff00) >> 8;
max_freq *= (IS_GEN9_BC(dev_priv) ||
INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1);
seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
2015-01-24 03:04:26 +08:00
intel_gpu_freq(dev_priv, max_freq));
max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 :
rp_state_cap >> 0) & 0xff;
max_freq *= (IS_GEN9_BC(dev_priv) ||
INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1);
seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
2015-01-24 03:04:26 +08:00
intel_gpu_freq(dev_priv, max_freq));
seq_printf(m, "Max overclocked frequency: %dMHz\n",
intel_gpu_freq(dev_priv, rps->max_freq));
seq_printf(m, "Current freq: %d MHz\n",
intel_gpu_freq(dev_priv, rps->cur_freq));
seq_printf(m, "Actual freq: %d MHz\n", cagf);
seq_printf(m, "Idle freq: %d MHz\n",
intel_gpu_freq(dev_priv, rps->idle_freq));
seq_printf(m, "Min freq: %d MHz\n",
intel_gpu_freq(dev_priv, rps->min_freq));
seq_printf(m, "Boost freq: %d MHz\n",
intel_gpu_freq(dev_priv, rps->boost_freq));
seq_printf(m, "Max freq: %d MHz\n",
intel_gpu_freq(dev_priv, rps->max_freq));
seq_printf(m,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(dev_priv, rps->efficient_freq));
} else {
seq_puts(m, "no P-state info available\n");
}
seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk.hw.cdclk);
seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq);
seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq);
intel_runtime_pm_put(dev_priv);
return ret;
}
static void i915_instdone_info(struct drm_i915_private *dev_priv,
struct seq_file *m,
struct intel_instdone *instdone)
{
int slice;
int subslice;
seq_printf(m, "\t\tINSTDONE: 0x%08x\n",
instdone->instdone);
if (INTEL_GEN(dev_priv) <= 3)
return;
seq_printf(m, "\t\tSC_INSTDONE: 0x%08x\n",
instdone->slice_common);
if (INTEL_GEN(dev_priv) <= 6)
return;
for_each_instdone_slice_subslice(dev_priv, slice, subslice)
seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
slice, subslice, instdone->sampler[slice][subslice]);
for_each_instdone_slice_subslice(dev_priv, slice, subslice)
seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n",
slice, subslice, instdone->row[slice][subslice]);
}
static int i915_hangcheck_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct intel_engine_cs *engine;
u64 acthd[I915_NUM_ENGINES];
u32 seqno[I915_NUM_ENGINES];
struct intel_instdone instdone;
enum intel_engine_id id;
if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
seq_puts(m, "Wedged\n");
if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
seq_puts(m, "Reset in progress: struct_mutex backoff\n");
if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags))
seq_puts(m, "Reset in progress: reset handoff to waiter\n");
if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
seq_puts(m, "Waiter holding struct mutex\n");
if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
seq_puts(m, "struct_mutex blocked for reset\n");
if (!i915_modparams.enable_hangcheck) {
seq_puts(m, "Hangcheck disabled\n");
return 0;
}
intel_runtime_pm_get(dev_priv);
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
for_each_engine(engine, dev_priv, id) {
acthd[id] = intel_engine_get_active_head(engine);
seqno[id] = intel_engine_get_seqno(engine);
}
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
intel_runtime_pm_put(dev_priv);
if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer))
seq_printf(m, "Hangcheck active, timer fires in %dms\n",
jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
jiffies));
else if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work))
seq_puts(m, "Hangcheck active, work pending\n");
else
seq_puts(m, "Hangcheck inactive\n");
seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
for_each_engine(engine, dev_priv, id) {
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct rb_node *rb;
seq_printf(m, "%s:\n", engine->name);
seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
drm/i915: Avoid accessing request->timeline outside of its lifetime Whilst waiting on a request, we may do so without holding any locks or any guards beyond a reference to the request. In order to avoid taking locks within request deallocation, we drop references to its timeline (via the context and ppgtt) upon retirement. We should avoid chasing such pointers outside of their control, in particular we inspect the request->timeline to see if we may restore the RPS waitboost for a client. If we instead look at the engine->timeline, we will have similar behaviour on both full-ppgtt and !full-ppgtt systems and reduce the amount of reward we give towards stalling clients (i.e. only if the client stalls and the GPU is uncontended does it reclaim its boost). This restores behaviour back to pre-timelines, whilst fixing: [ 645.078485] BUG: KASAN: use-after-free in i915_gem_object_wait_fence+0x1ee/0x2e0 at addr ffff8802335643a0 [ 645.078577] Read of size 4 by task gem_exec_schedu/28408 [ 645.078638] CPU: 1 PID: 28408 Comm: gem_exec_schedu Not tainted 4.9.0-rc2+ #64 [ 645.078724] Hardware name: / , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 645.078816] ffff88022daef9a0 ffffffff8143d059 ffff880235402a80 ffff880233564200 [ 645.078998] ffff88022daef9c8 ffffffff81229c5c ffff88022daefa48 ffff880233564200 [ 645.079172] ffff880235402a80 ffff88022daefa38 ffffffff81229ef0 000000008110a796 [ 645.079345] Call Trace: [ 645.079404] [<ffffffff8143d059>] dump_stack+0x68/0x9f [ 645.079467] [<ffffffff81229c5c>] kasan_object_err+0x1c/0x70 [ 645.079534] [<ffffffff81229ef0>] kasan_report_error+0x1f0/0x4b0 [ 645.079601] [<ffffffff8122a244>] kasan_report+0x34/0x40 [ 645.079676] [<ffffffff81634f5e>] ? i915_gem_object_wait_fence+0x1ee/0x2e0 [ 645.079741] [<ffffffff81229951>] __asan_load4+0x61/0x80 [ 645.079807] [<ffffffff81634f5e>] i915_gem_object_wait_fence+0x1ee/0x2e0 [ 645.079876] [<ffffffff816364bf>] i915_gem_object_wait+0x19f/0x590 [ 645.079944] [<ffffffff81636320>] ? i915_gem_object_wait_priority+0x500/0x500 [ 645.080016] [<ffffffff8110fb30>] ? debug_show_all_locks+0x1e0/0x1e0 [ 645.080084] [<ffffffff8110abdc>] ? check_chain_key+0x14c/0x210 [ 645.080157] [<ffffffff8110a796>] ? __lock_is_held+0x46/0xc0 [ 645.080226] [<ffffffff8163bc61>] ? i915_gem_set_domain_ioctl+0x141/0x690 [ 645.080296] [<ffffffff8163bcc2>] i915_gem_set_domain_ioctl+0x1a2/0x690 [ 645.080366] [<ffffffff811f8f85>] ? __might_fault+0x75/0xe0 [ 645.080433] [<ffffffff815a55f7>] drm_ioctl+0x327/0x640 [ 645.080508] [<ffffffff8163bb20>] ? i915_gem_obj_prepare_shmem_write+0x3a0/0x3a0 [ 645.080603] [<ffffffff815a52d0>] ? drm_ioctl_permit+0x120/0x120 [ 645.080670] [<ffffffff8110abdc>] ? check_chain_key+0x14c/0x210 [ 645.080738] [<ffffffff81275717>] do_vfs_ioctl+0x127/0xa20 [ 645.080804] [<ffffffff8120268c>] ? do_mmap+0x47c/0x580 [ 645.080871] [<ffffffff811da567>] ? vm_mmap_pgoff+0x117/0x140 [ 645.080938] [<ffffffff812755f0>] ? ioctl_preallocate+0x150/0x150 [ 645.081011] [<ffffffff81108c53>] ? up_write+0x23/0x50 [ 645.081078] [<ffffffff811da567>] ? vm_mmap_pgoff+0x117/0x140 [ 645.081145] [<ffffffff811da450>] ? vma_is_stack_for_current+0x90/0x90 [ 645.081214] [<ffffffff8110d853>] ? mark_held_locks+0x23/0xc0 [ 645.082030] [<ffffffff81288408>] ? __fget+0x168/0x250 [ 645.082106] [<ffffffff819ad517>] ? entry_SYSCALL_64_fastpath+0x5/0xb1 [ 645.082176] [<ffffffff81288592>] ? __fget_light+0xa2/0xc0 [ 645.082242] [<ffffffff8127604c>] SyS_ioctl+0x3c/0x70 [ 645.082309] [<ffffffff819ad52e>] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 645.082374] Object at ffff880233564200, in cache kmalloc-8192 size: 8192 [ 645.082431] Allocated: [ 645.082480] PID = 28408 [ 645.082535] [ 645.082566] [<ffffffff8103ae66>] save_stack_trace+0x16/0x20 [ 645.082623] [ 645.082656] [<ffffffff81228b06>] save_stack+0x46/0xd0 [ 645.082716] [ 645.082756] [<ffffffff812292fd>] kasan_kmalloc+0xad/0xe0 [ 645.082817] [ 645.082848] [<ffffffff81631752>] i915_ppgtt_create+0x52/0x220 [ 645.082908] [ 645.082941] [<ffffffff8161db96>] i915_gem_create_context+0x396/0x560 [ 645.083027] [ 645.083059] [<ffffffff8161f857>] i915_gem_context_create_ioctl+0x97/0xf0 [ 645.083152] [ 645.083183] [<ffffffff815a55f7>] drm_ioctl+0x327/0x640 [ 645.083243] [ 645.083274] [<ffffffff81275717>] do_vfs_ioctl+0x127/0xa20 [ 645.083334] [ 645.083372] [<ffffffff8127604c>] SyS_ioctl+0x3c/0x70 [ 645.083432] [ 645.083464] [<ffffffff819ad52e>] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 645.083551] Freed: [ 645.083599] PID = 27629 [ 645.083648] [ 645.083676] [<ffffffff8103ae66>] save_stack_trace+0x16/0x20 [ 645.083738] [ 645.083770] [<ffffffff81228b06>] save_stack+0x46/0xd0 [ 645.083830] [ 645.083862] [<ffffffff81229203>] kasan_slab_free+0x73/0xc0 [ 645.083922] [ 645.083961] [<ffffffff812279c9>] kfree+0xa9/0x170 [ 645.084021] [ 645.084053] [<ffffffff81629f60>] i915_ppgtt_release+0x100/0x180 [ 645.084139] [ 645.084171] [<ffffffff8161d414>] i915_gem_context_free+0x1b4/0x230 [ 645.084257] [ 645.084288] [<ffffffff816537b2>] intel_lr_context_unpin+0x192/0x230 [ 645.084380] [ 645.084413] [<ffffffff81645250>] i915_gem_request_retire+0x620/0x630 [ 645.084500] [ 645.085226] [<ffffffff816473d1>] i915_gem_retire_requests+0x181/0x280 [ 645.085313] [ 645.085352] [<ffffffff816352ba>] i915_gem_retire_work_handler+0xca/0xe0 [ 645.085440] [ 645.085471] [<ffffffff810c725b>] process_one_work+0x4fb/0x920 [ 645.085532] [ 645.085562] [<ffffffff810c770d>] worker_thread+0x8d/0x840 [ 645.085622] [ 645.085653] [<ffffffff810d21e5>] kthread+0x185/0x1b0 [ 645.085718] [ 645.085750] [<ffffffff819ad7a7>] ret_from_fork+0x27/0x40 [ 645.085811] Memory state around the buggy address: [ 645.085869] ffff880233564280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 645.085956] ffff880233564300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 645.086053] >ffff880233564380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 645.086138] ^ [ 645.086193] ffff880233564400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 645.086283] ffff880233564480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb v2: Add a comment to document the hint like nature of intel_engine_last_submit() Fixes: 73cb97010d4f ("drm/i915: Combine seqno + tracking into a global timeline struct") Fixes: 80b204bce8f2 ("drm/i915: Enable multiple timelines") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20161101100317.11129-1-chris@chris-wilson.co.uk
2016-11-01 18:03:16 +08:00
engine->hangcheck.seqno, seqno[id],
intel_engine_last_submit(engine));
seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n",
yesno(intel_engine_has_waiter(engine)),
yesno(test_bit(engine->id,
drm/i915: Decouple hang detection from hangcheck period Hangcheck state accumulation has gained more steps along the years, like head movement and more recently the subunit inactivity check. As the subunit sampling is only done if the previous state check showed inactivity, we have added more stages (and time) to reach a hang verdict. Asymmetric engine states led to different actual weight of 'one hangcheck unit' and it was demonstrated in some hangs that due to difference in stages, simpler engines were accused falsely of a hang as their scoring was much more quicker to accumulate above the hang treshold. To completely decouple the hangcheck guilty score from the hangcheck period, convert hangcheck score to a rough period of inactivity measurement. As these are tracked as jiffies, they are meaningful also across reset boundaries. This makes finding a guilty engine more accurate across multi engine activity scenarios, especially across asymmetric engines. We lose the ability to detect cross batch malicious attempts to hinder the progress. Plan is to move this functionality to be part of context banning which is more natural fit, later in the series. v2: use time_before macros (Chris) reinstate the pardoning of moving engine after hc (Chris) v3: avoid global state for per engine stall detection (Chris) v4: take timeline last retirement into account (Chris) v5: do debug print on pardoning, split out retirement timestamp (Chris) Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
2016-11-18 21:09:04 +08:00
&dev_priv->gpu_error.missed_irq_rings)),
yesno(engine->hangcheck.stalled),
yesno(engine->hangcheck.wedged));
drm/i915: Decouple hang detection from hangcheck period Hangcheck state accumulation has gained more steps along the years, like head movement and more recently the subunit inactivity check. As the subunit sampling is only done if the previous state check showed inactivity, we have added more stages (and time) to reach a hang verdict. Asymmetric engine states led to different actual weight of 'one hangcheck unit' and it was demonstrated in some hangs that due to difference in stages, simpler engines were accused falsely of a hang as their scoring was much more quicker to accumulate above the hang treshold. To completely decouple the hangcheck guilty score from the hangcheck period, convert hangcheck score to a rough period of inactivity measurement. As these are tracked as jiffies, they are meaningful also across reset boundaries. This makes finding a guilty engine more accurate across multi engine activity scenarios, especially across asymmetric engines. We lose the ability to detect cross batch malicious attempts to hinder the progress. Plan is to move this functionality to be part of context banning which is more natural fit, later in the series. v2: use time_before macros (Chris) reinstate the pardoning of moving engine after hc (Chris) v3: avoid global state for per engine stall detection (Chris) v4: take timeline last retirement into account (Chris) v5: do debug print on pardoning, split out retirement timestamp (Chris) Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
2016-11-18 21:09:04 +08:00
spin_lock_irq(&b->rb_lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = rb_entry(rb, typeof(*w), node);
seq_printf(m, "\t%s [%d] waiting for %x\n",
w->tsk->comm, w->tsk->pid, w->seqno);
}
spin_unlock_irq(&b->rb_lock);
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
(long long)acthd[id]);
drm/i915: Decouple hang detection from hangcheck period Hangcheck state accumulation has gained more steps along the years, like head movement and more recently the subunit inactivity check. As the subunit sampling is only done if the previous state check showed inactivity, we have added more stages (and time) to reach a hang verdict. Asymmetric engine states led to different actual weight of 'one hangcheck unit' and it was demonstrated in some hangs that due to difference in stages, simpler engines were accused falsely of a hang as their scoring was much more quicker to accumulate above the hang treshold. To completely decouple the hangcheck guilty score from the hangcheck period, convert hangcheck score to a rough period of inactivity measurement. As these are tracked as jiffies, they are meaningful also across reset boundaries. This makes finding a guilty engine more accurate across multi engine activity scenarios, especially across asymmetric engines. We lose the ability to detect cross batch malicious attempts to hinder the progress. Plan is to move this functionality to be part of context banning which is more natural fit, later in the series. v2: use time_before macros (Chris) reinstate the pardoning of moving engine after hc (Chris) v3: avoid global state for per engine stall detection (Chris) v4: take timeline last retirement into account (Chris) v5: do debug print on pardoning, split out retirement timestamp (Chris) Cc: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
2016-11-18 21:09:04 +08:00
seq_printf(m, "\taction = %s(%d) %d ms ago\n",
hangcheck_action_to_str(engine->hangcheck.action),
engine->hangcheck.action,
jiffies_to_msecs(jiffies -
engine->hangcheck.action_timestamp));
if (engine->id == RCS) {
seq_puts(m, "\tinstdone read =\n");
i915_instdone_info(dev_priv, m, &instdone);
seq_puts(m, "\tinstdone accu =\n");
i915_instdone_info(dev_priv, m,
&engine->hangcheck.instdone);
}
}
return 0;
}
static int i915_reset_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct i915_gpu_error *error = &dev_priv->gpu_error;
struct intel_engine_cs *engine;
enum intel_engine_id id;
seq_printf(m, "full gpu reset = %u\n", i915_reset_count(error));
for_each_engine(engine, dev_priv, id) {
seq_printf(m, "%s = %u\n", engine->name,
i915_reset_engine_count(error, engine));
}
return 0;
}
static int ironlake_drpc_info(struct seq_file *m)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
u32 rgvmodectl, rstdbyctl;
u16 crstandvid;
rgvmodectl = I915_READ(MEMMODECTL);
rstdbyctl = I915_READ(RSTDBYCTL);
crstandvid = I915_READ16(CRSTANDVID);
seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
seq_printf(m, "Boost freq: %d\n",
(rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >>
MEMMODE_BOOST_FREQ_SHIFT);
seq_printf(m, "HW control enabled: %s\n",
yesno(rgvmodectl & MEMMODE_HWIDLE_EN));
seq_printf(m, "SW control enabled: %s\n",
yesno(rgvmodectl & MEMMODE_SWMODE_EN));
seq_printf(m, "Gated voltage change: %s\n",
yesno(rgvmodectl & MEMMODE_RCLK_GATE));
seq_printf(m, "Starting frequency: P%d\n",
(rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT);
seq_printf(m, "Max P-state: P%d\n",
(rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT);
seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK));
seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f));
seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f));
seq_printf(m, "Render standby enabled: %s\n",
yesno(!(rstdbyctl & RCX_SW_EXIT)));
seq_puts(m, "Current RS state: ");
switch (rstdbyctl & RSX_STATUS_MASK) {
case RSX_STATUS_ON:
seq_puts(m, "on\n");
break;
case RSX_STATUS_RC1:
seq_puts(m, "RC1\n");
break;
case RSX_STATUS_RC1E:
seq_puts(m, "RC1E\n");
break;
case RSX_STATUS_RS1:
seq_puts(m, "RS1\n");
break;
case RSX_STATUS_RS2:
seq_puts(m, "RS2 (RC6)\n");
break;
case RSX_STATUS_RS3:
seq_puts(m, "RC3 (RC6+)\n");
break;
default:
seq_puts(m, "unknown\n");
break;
}
return 0;
}
static int i915_forcewake_domains(struct seq_file *m, void *data)
{
struct drm_i915_private *i915 = node_to_i915(m->private);
struct intel_uncore_forcewake_domain *fw_domain;
unsigned int tmp;
seq_printf(m, "user.bypass_count = %u\n",
i915->uncore.user_forcewake.count);
for_each_fw_domain(fw_domain, i915, tmp)
seq_printf(m, "%s.wake_count = %u\n",
intel_uncore_forcewake_domain_to_str(fw_domain->id),
READ_ONCE(fw_domain->wake_count));
return 0;
}
static void print_rc6_res(struct seq_file *m,
const char *title,
const i915_reg_t reg)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
seq_printf(m, "%s %u (%llu us)\n",
title, I915_READ(reg),
intel_rc6_residency_us(dev_priv, reg));
}
static int vlv_drpc_info(struct seq_file *m)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
u32 rcctl1, pw_status;
pw_status = I915_READ(VLV_GTLC_PW_STATUS);
rcctl1 = I915_READ(GEN6_RC_CONTROL);
seq_printf(m, "RC6 Enabled: %s\n",
yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE |
GEN6_RC_CTL_EI_MODE(1))));
seq_printf(m, "Render Power Well: %s\n",
(pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down");
seq_printf(m, "Media Power Well: %s\n",
(pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
print_rc6_res(m, "Render RC6 residency since boot:", VLV_GT_RENDER_RC6);
print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6);
return i915_forcewake_domains(m, NULL);
}
static int gen6_drpc_info(struct seq_file *m)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
u32 gt_core_status, rcctl1, rc6vids = 0;
u32 gen9_powergate_enable = 0, gen9_powergate_status = 0;
gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS);
trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true);
rcctl1 = I915_READ(GEN6_RC_CONTROL);
if (INTEL_GEN(dev_priv) >= 9) {
gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE);
gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS);
}
drm/i915: Extend rpm wakelock for debugfs/i915_drpc_info i915_drpc_info missed covering a few register read with the runtime pm wakelock. Be simple and cover the entire function with a single wakelock so that new additions are not similarly missed in future. WARNING: CPU: 2 PID: 1334 at drivers/gpu/drm/i915/intel_drv.h:1743 gen6_read32+0x192/0x1e0 [i915] RPM wakelock ref not held during HW access Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver netconsole nfsd auth_rpcgss ipmi_watchdog ipmi_poweroff ipmi_devintf ipmi_msghandler overlay btrfs xor raid6_pq dm_mod sg sd_mod snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ata_generic pata_acpi intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_intel kvm_intel snd_hda_codec kvm eeepc_wmi irqbypass snd_hda_core crct10dif_pclmul crc32_pclmul crc32c_intel asus_wmi sparse_keymap ghash_clmulni_intel snd_hwdep i915 rfkill ppdev pcbc aesni_intel ata_piix crypto_simd glue_helper snd_pcm pata_via cryptd pcspkr snd_timer drm_kms_helper syscopyarea snd sysfillrect libata sysimgblt fb_sys_fops soundcore shpchp drm wmi parport_pc parport tpm_infineon video CPU: 2 PID: 1334 Comm: php5 Not tainted 4.10.0-rc8-01615-g1f58c8e #1 Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 1002 04/01/2011 Call Trace: dump_stack+0x63/0x8a __warn+0xcb/0xf0 warn_slowpath_fmt+0x4f/0x60 ? seq_vprintf+0x35/0x50 gen6_read32+0x192/0x1e0 [i915] i915_drpc_info+0x55d/0x990 [i915] seq_read+0xf2/0x3b0 full_proxy_read+0x51/0x80 __vfs_read+0x28/0x130 ? security_file_permission+0x9b/0xc0 ? rw_verify_area+0x4e/0xb0 vfs_read+0xa8/0x170 SyS_read+0x46/0xa0 entry_SYSCALL_64_fastpath+0x1a/0xa9 RIP: 0033:0x7fd97bf175a0 RSP: 002b:00007ffdf730db68 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 00007fd978028738 RCX: 00007fd97bf175a0 RDX: 0000000000002000 RSI: 00007fd97740e0d8 RDI: 0000000000000005 RBP: 0000000000000001 R08: 0000000000e97840 R09: 00007fd977ef8d58 R10: 0000000000000027 R11: 0000000000000246 R12: 00007fd977ef8d58 R13: 0000000000000000 R14: 0000000000eb4640 R15: 0000000000000000 Reported-by: kernel test robot <xiaolong.ye@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/20170313095617.29010-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
2017-03-13 17:56:17 +08:00
if (INTEL_GEN(dev_priv) <= 7) {
mutex_lock(&dev_priv->pcu_lock);
sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
&rc6vids);
mutex_unlock(&dev_priv->pcu_lock);
}
seq_printf(m, "RC1e Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
seq_printf(m, "RC6 Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
if (INTEL_GEN(dev_priv) >= 9) {
seq_printf(m, "Render Well Gating Enabled: %s\n",
yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE));
seq_printf(m, "Media Well Gating Enabled: %s\n",
yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE));
}
seq_printf(m, "Deep RC6 Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE));
seq_printf(m, "Deepest RC6 Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE));
seq_puts(m, "Current RC state: ");
switch (gt_core_status & GEN6_RCn_MASK) {
case GEN6_RC0:
if (gt_core_status & GEN6_CORE_CPD_STATE_MASK)
seq_puts(m, "Core Power Down\n");
else
seq_puts(m, "on\n");
break;
case GEN6_RC3:
seq_puts(m, "RC3\n");
break;
case GEN6_RC6:
seq_puts(m, "RC6\n");
break;
case GEN6_RC7:
seq_puts(m, "RC7\n");
break;
default:
seq_puts(m, "Unknown\n");
break;
}
seq_printf(m, "Core Power Down: %s\n",
yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK));
if (INTEL_GEN(dev_priv) >= 9) {
seq_printf(m, "Render Power Well: %s\n",
(gen9_powergate_status &
GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
seq_printf(m, "Media Power Well: %s\n",
(gen9_powergate_status &
GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
}
/* Not exactly sure what this is */
print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:",
GEN6_GT_GFX_RC6_LOCKED);
print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6);
print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p);
print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp);
if (INTEL_GEN(dev_priv) <= 7) {
seq_printf(m, "RC6 voltage: %dmV\n",
GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff)));
seq_printf(m, "RC6+ voltage: %dmV\n",
GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff)));
seq_printf(m, "RC6++ voltage: %dmV\n",
GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff)));
}
return i915_forcewake_domains(m, NULL);
}
static int i915_drpc_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
drm/i915: Extend rpm wakelock for debugfs/i915_drpc_info i915_drpc_info missed covering a few register read with the runtime pm wakelock. Be simple and cover the entire function with a single wakelock so that new additions are not similarly missed in future. WARNING: CPU: 2 PID: 1334 at drivers/gpu/drm/i915/intel_drv.h:1743 gen6_read32+0x192/0x1e0 [i915] RPM wakelock ref not held during HW access Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver netconsole nfsd auth_rpcgss ipmi_watchdog ipmi_poweroff ipmi_devintf ipmi_msghandler overlay btrfs xor raid6_pq dm_mod sg sd_mod snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ata_generic pata_acpi intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_intel kvm_intel snd_hda_codec kvm eeepc_wmi irqbypass snd_hda_core crct10dif_pclmul crc32_pclmul crc32c_intel asus_wmi sparse_keymap ghash_clmulni_intel snd_hwdep i915 rfkill ppdev pcbc aesni_intel ata_piix crypto_simd glue_helper snd_pcm pata_via cryptd pcspkr snd_timer drm_kms_helper syscopyarea snd sysfillrect libata sysimgblt fb_sys_fops soundcore shpchp drm wmi parport_pc parport tpm_infineon video CPU: 2 PID: 1334 Comm: php5 Not tainted 4.10.0-rc8-01615-g1f58c8e #1 Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 1002 04/01/2011 Call Trace: dump_stack+0x63/0x8a __warn+0xcb/0xf0 warn_slowpath_fmt+0x4f/0x60 ? seq_vprintf+0x35/0x50 gen6_read32+0x192/0x1e0 [i915] i915_drpc_info+0x55d/0x990 [i915] seq_read+0xf2/0x3b0 full_proxy_read+0x51/0x80 __vfs_read+0x28/0x130 ? security_file_permission+0x9b/0xc0 ? rw_verify_area+0x4e/0xb0 vfs_read+0xa8/0x170 SyS_read+0x46/0xa0 entry_SYSCALL_64_fastpath+0x1a/0xa9 RIP: 0033:0x7fd97bf175a0 RSP: 002b:00007ffdf730db68 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 00007fd978028738 RCX: 00007fd97bf175a0 RDX: 0000000000002000 RSI: 00007fd97740e0d8 RDI: 0000000000000005 RBP: 0000000000000001 R08: 0000000000e97840 R09: 00007fd977ef8d58 R10: 0000000000000027 R11: 0000000000000246 R12: 00007fd977ef8d58 R13: 0000000000000000 R14: 0000000000eb4640 R15: 0000000000000000 Reported-by: kernel test robot <xiaolong.ye@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/20170313095617.29010-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
2017-03-13 17:56:17 +08:00
int err;
intel_runtime_pm_get(dev_priv);
if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
drm/i915: Extend rpm wakelock for debugfs/i915_drpc_info i915_drpc_info missed covering a few register read with the runtime pm wakelock. Be simple and cover the entire function with a single wakelock so that new additions are not similarly missed in future. WARNING: CPU: 2 PID: 1334 at drivers/gpu/drm/i915/intel_drv.h:1743 gen6_read32+0x192/0x1e0 [i915] RPM wakelock ref not held during HW access Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver netconsole nfsd auth_rpcgss ipmi_watchdog ipmi_poweroff ipmi_devintf ipmi_msghandler overlay btrfs xor raid6_pq dm_mod sg sd_mod snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ata_generic pata_acpi intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_intel kvm_intel snd_hda_codec kvm eeepc_wmi irqbypass snd_hda_core crct10dif_pclmul crc32_pclmul crc32c_intel asus_wmi sparse_keymap ghash_clmulni_intel snd_hwdep i915 rfkill ppdev pcbc aesni_intel ata_piix crypto_simd glue_helper snd_pcm pata_via cryptd pcspkr snd_timer drm_kms_helper syscopyarea snd sysfillrect libata sysimgblt fb_sys_fops soundcore shpchp drm wmi parport_pc parport tpm_infineon video CPU: 2 PID: 1334 Comm: php5 Not tainted 4.10.0-rc8-01615-g1f58c8e #1 Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 1002 04/01/2011 Call Trace: dump_stack+0x63/0x8a __warn+0xcb/0xf0 warn_slowpath_fmt+0x4f/0x60 ? seq_vprintf+0x35/0x50 gen6_read32+0x192/0x1e0 [i915] i915_drpc_info+0x55d/0x990 [i915] seq_read+0xf2/0x3b0 full_proxy_read+0x51/0x80 __vfs_read+0x28/0x130 ? security_file_permission+0x9b/0xc0 ? rw_verify_area+0x4e/0xb0 vfs_read+0xa8/0x170 SyS_read+0x46/0xa0 entry_SYSCALL_64_fastpath+0x1a/0xa9 RIP: 0033:0x7fd97bf175a0 RSP: 002b:00007ffdf730db68 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 00007fd978028738 RCX: 00007fd97bf175a0 RDX: 0000000000002000 RSI: 00007fd97740e0d8 RDI: 0000000000000005 RBP: 0000000000000001 R08: 0000000000e97840 R09: 00007fd977ef8d58 R10: 0000000000000027 R11: 0000000000000246 R12: 00007fd977ef8d58 R13: 0000000000000000 R14: 0000000000eb4640 R15: 0000000000000000 Reported-by: kernel test robot <xiaolong.ye@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/20170313095617.29010-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
2017-03-13 17:56:17 +08:00
err = vlv_drpc_info(m);
else if (INTEL_GEN(dev_priv) >= 6)
drm/i915: Extend rpm wakelock for debugfs/i915_drpc_info i915_drpc_info missed covering a few register read with the runtime pm wakelock. Be simple and cover the entire function with a single wakelock so that new additions are not similarly missed in future. WARNING: CPU: 2 PID: 1334 at drivers/gpu/drm/i915/intel_drv.h:1743 gen6_read32+0x192/0x1e0 [i915] RPM wakelock ref not held during HW access Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver netconsole nfsd auth_rpcgss ipmi_watchdog ipmi_poweroff ipmi_devintf ipmi_msghandler overlay btrfs xor raid6_pq dm_mod sg sd_mod snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ata_generic pata_acpi intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_intel kvm_intel snd_hda_codec kvm eeepc_wmi irqbypass snd_hda_core crct10dif_pclmul crc32_pclmul crc32c_intel asus_wmi sparse_keymap ghash_clmulni_intel snd_hwdep i915 rfkill ppdev pcbc aesni_intel ata_piix crypto_simd glue_helper snd_pcm pata_via cryptd pcspkr snd_timer drm_kms_helper syscopyarea snd sysfillrect libata sysimgblt fb_sys_fops soundcore shpchp drm wmi parport_pc parport tpm_infineon video CPU: 2 PID: 1334 Comm: php5 Not tainted 4.10.0-rc8-01615-g1f58c8e #1 Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 1002 04/01/2011 Call Trace: dump_stack+0x63/0x8a __warn+0xcb/0xf0 warn_slowpath_fmt+0x4f/0x60 ? seq_vprintf+0x35/0x50 gen6_read32+0x192/0x1e0 [i915] i915_drpc_info+0x55d/0x990 [i915] seq_read+0xf2/0x3b0 full_proxy_read+0x51/0x80 __vfs_read+0x28/0x130 ? security_file_permission+0x9b/0xc0 ? rw_verify_area+0x4e/0xb0 vfs_read+0xa8/0x170 SyS_read+0x46/0xa0 entry_SYSCALL_64_fastpath+0x1a/0xa9 RIP: 0033:0x7fd97bf175a0 RSP: 002b:00007ffdf730db68 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 00007fd978028738 RCX: 00007fd97bf175a0 RDX: 0000000000002000 RSI: 00007fd97740e0d8 RDI: 0000000000000005 RBP: 0000000000000001 R08: 0000000000e97840 R09: 00007fd977ef8d58 R10: 0000000000000027 R11: 0000000000000246 R12: 00007fd977ef8d58 R13: 0000000000000000 R14: 0000000000eb4640 R15: 0000000000000000 Reported-by: kernel test robot <xiaolong.ye@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/20170313095617.29010-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
2017-03-13 17:56:17 +08:00
err = gen6_drpc_info(m);
else
drm/i915: Extend rpm wakelock for debugfs/i915_drpc_info i915_drpc_info missed covering a few register read with the runtime pm wakelock. Be simple and cover the entire function with a single wakelock so that new additions are not similarly missed in future. WARNING: CPU: 2 PID: 1334 at drivers/gpu/drm/i915/intel_drv.h:1743 gen6_read32+0x192/0x1e0 [i915] RPM wakelock ref not held during HW access Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver netconsole nfsd auth_rpcgss ipmi_watchdog ipmi_poweroff ipmi_devintf ipmi_msghandler overlay btrfs xor raid6_pq dm_mod sg sd_mod snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ata_generic pata_acpi intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_intel kvm_intel snd_hda_codec kvm eeepc_wmi irqbypass snd_hda_core crct10dif_pclmul crc32_pclmul crc32c_intel asus_wmi sparse_keymap ghash_clmulni_intel snd_hwdep i915 rfkill ppdev pcbc aesni_intel ata_piix crypto_simd glue_helper snd_pcm pata_via cryptd pcspkr snd_timer drm_kms_helper syscopyarea snd sysfillrect libata sysimgblt fb_sys_fops soundcore shpchp drm wmi parport_pc parport tpm_infineon video CPU: 2 PID: 1334 Comm: php5 Not tainted 4.10.0-rc8-01615-g1f58c8e #1 Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 1002 04/01/2011 Call Trace: dump_stack+0x63/0x8a __warn+0xcb/0xf0 warn_slowpath_fmt+0x4f/0x60 ? seq_vprintf+0x35/0x50 gen6_read32+0x192/0x1e0 [i915] i915_drpc_info+0x55d/0x990 [i915] seq_read+0xf2/0x3b0 full_proxy_read+0x51/0x80 __vfs_read+0x28/0x130 ? security_file_permission+0x9b/0xc0 ? rw_verify_area+0x4e/0xb0 vfs_read+0xa8/0x170 SyS_read+0x46/0xa0 entry_SYSCALL_64_fastpath+0x1a/0xa9 RIP: 0033:0x7fd97bf175a0 RSP: 002b:00007ffdf730db68 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 00007fd978028738 RCX: 00007fd97bf175a0 RDX: 0000000000002000 RSI: 00007fd97740e0d8 RDI: 0000000000000005 RBP: 0000000000000001 R08: 0000000000e97840 R09: 00007fd977ef8d58 R10: 0000000000000027 R11: 0000000000000246 R12: 00007fd977ef8d58 R13: 0000000000000000 R14: 0000000000eb4640 R15: 0000000000000000 Reported-by: kernel test robot <xiaolong.ye@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/20170313095617.29010-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
2017-03-13 17:56:17 +08:00
err = ironlake_drpc_info(m);
intel_runtime_pm_put(dev_priv);
return err;
}
static int i915_frontbuffer_tracking(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
seq_printf(m, "FB tracking busy bits: 0x%08x\n",
dev_priv->fb_tracking.busy_bits);
seq_printf(m, "FB tracking flip bits: 0x%08x\n",
dev_priv->fb_tracking.flip_bits);
return 0;
}
static int i915_fbc_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct intel_fbc *fbc = &dev_priv->fbc;
if (!HAS_FBC(dev_priv))
return -ENODEV;
intel_runtime_pm_get(dev_priv);
mutex_lock(&fbc->lock);
if (intel_fbc_is_active(dev_priv))
seq_puts(m, "FBC enabled\n");
else
seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason);
drm/i915: Implement fbc_status "Compressing" info for all platforms The number of compressed segments has been available ever since FBC2 was introduced in g4x, it just moved from the STATUS register into STATUS2 on IVB. For FBC1 if we really wanted the number of compressed segments we'd have to trawl through the tags, but in this case since the code just uses the number of compressed segments as an indicator whether compression has occurred we can just check the state of the COMPRESSING and COMPRESSED bits. IIRC the hardware will try to periodically recompress all uncompressed lines even if they haven't changed and the COMPRESSED bit will be cleared while the compressor is running, so just checking the COMPRESSED bit might not give us the right answer. Hence it seems better to check for both COMPRESSED and COMPRESSING as that should tell us that the compressor is at least trying to do something. While at it move the IVB+ register define to the right place, unify the naming convention of the compressed segment count masks, and fix up the mask for g4x. v2: s/ILK_DPFC_STATUS2/IVB_FBC_STATUS2/ (Paulo) Cc: Paulo Zanoni <paulo.r.zanoni@intel.com> Cc: Gabriel Krisman Bertazi <krisman@collabora.co.uk> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Tested-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk> # SNB Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com> # ilk+ Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com> # pre-ilk Link: http://patchwork.freedesktop.org/patch/msgid/20170606124318.31755-1-ville.syrjala@linux.intel.com
2017-06-06 20:43:18 +08:00
if (intel_fbc_is_active(dev_priv)) {
u32 mask;
if (INTEL_GEN(dev_priv) >= 8)
mask = I915_READ(IVB_FBC_STATUS2) & BDW_FBC_COMP_SEG_MASK;
else if (INTEL_GEN(dev_priv) >= 7)
mask = I915_READ(IVB_FBC_STATUS2) & IVB_FBC_COMP_SEG_MASK;
else if (INTEL_GEN(dev_priv) >= 5)
mask = I915_READ(ILK_DPFC_STATUS) & ILK_DPFC_COMP_SEG_MASK;
else if (IS_G4X(dev_priv))
mask = I915_READ(DPFC_STATUS) & DPFC_COMP_SEG_MASK;
else
mask = I915_READ(FBC_STATUS) & (FBC_STAT_COMPRESSING |
FBC_STAT_COMPRESSED);
seq_printf(m, "Compressing: %s\n", yesno(mask));
}
mutex_unlock(&fbc->lock);
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_fbc_false_color_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
if (INTEL_GEN(dev_priv) < 7 || !HAS_FBC(dev_priv))
return -ENODEV;
*val = dev_priv->fbc.false_color;
return 0;
}
static int i915_fbc_false_color_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
u32 reg;
if (INTEL_GEN(dev_priv) < 7 || !HAS_FBC(dev_priv))
return -ENODEV;
mutex_lock(&dev_priv->fbc.lock);
reg = I915_READ(ILK_DPFC_CONTROL);
dev_priv->fbc.false_color = val;
I915_WRITE(ILK_DPFC_CONTROL, val ?
(reg | FBC_CTL_FALSE_COLOR) :
(reg & ~FBC_CTL_FALSE_COLOR));
mutex_unlock(&dev_priv->fbc.lock);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_false_color_fops,
i915_fbc_false_color_get, i915_fbc_false_color_set,
"%llu\n");
static int i915_ips_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
if (!HAS_IPS(dev_priv))
return -ENODEV;
intel_runtime_pm_get(dev_priv);
seq_printf(m, "Enabled by kernel parameter: %s\n",
yesno(i915_modparams.enable_ips));
if (INTEL_GEN(dev_priv) >= 8) {
seq_puts(m, "Currently: unknown\n");
} else {
if (I915_READ(IPS_CTL) & IPS_ENABLE)
seq_puts(m, "Currently: enabled\n");
else
seq_puts(m, "Currently: disabled\n");
}
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_sr_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
bool sr_enabled = false;
intel_runtime_pm_get(dev_priv);
intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
if (INTEL_GEN(dev_priv) >= 9)
/* no global SR status; inspect per-plane WM */;
else if (HAS_PCH_SPLIT(dev_priv))
sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN;
else if (IS_I965GM(dev_priv) || IS_G4X(dev_priv) ||
IS_I945G(dev_priv) || IS_I945GM(dev_priv))
sr_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
else if (IS_I915GM(dev_priv))
sr_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
else if (IS_PINEVIEW(dev_priv))
sr_enabled = I915_READ(DSPFW3) & PINEVIEW_SELF_REFRESH_EN;
else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
intel_runtime_pm_put(dev_priv);
seq_printf(m, "self-refresh: %s\n", enableddisabled(sr_enabled));
return 0;
}
static int i915_emon_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
unsigned long temp, chipset, gfx;
int ret;
if (!IS_GEN5(dev_priv))
return -ENODEV;
drm/i915/ilk: Fix warning when reading emon_status with no output When there is no output no one will hold a runtime_pm reference causing a warning when trying to read emom_status in debugfs. [22.756480] ------------[ cut here ]------------ [22.756489] RPM wakelock ref not held during HW access [22.756578] WARNING: CPU: 0 PID: 1058 at drivers/gpu/drm/i915/intel_drv.h:2104 gen5_read32+0x16b/0x1a0 [i915] [22.756580] Modules linked in: snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core e1000e snd_pcm mei_me prime_numbers mei lpc_ich [22.756595] CPU: 0 PID: 1058 Comm: debugfs_test Not tainted 4.20.0-rc1-CI-Trybot_3219+ #1 [22.756597] Hardware name: Hewlett-Packard HP Compaq 8100 Elite SFF PC/304Ah, BIOS 786H1 v01.13 07/14/2011 [22.756634] RIP: 0010:gen5_read32+0x16b/0x1a0 [i915] [22.756637] Code: a4 ea e0 0f 0b e9 d2 fe ff ff 80 3d a5 71 19 00 00 0f 85 d3 fe ff ff 48 c7 c7 48 d0 2d a0 c6 05 91 71 19 00 01 e8 35 a4 ea e0 <0f> 0b e9 b9 fe ff ff e8 69 c6 f2 e0 85 c0 75 92 48 c7 c2 78 d0 2d [22.756639] RSP: 0018:ffffc90000f1fd38 EFLAGS: 00010282 [22.756642] RAX: 0000000000000000 RBX: ffff8801f7ab0000 RCX: 0000000000000006 [22.756643] RDX: 0000000000000006 RSI: ffffffff8212886a RDI: ffffffff820d6d57 [22.756645] RBP: 0000000000011020 R08: 0000000043e3d1a8 R09: 0000000000000000 [22.756647] R10: ffffc90000f1fd80 R11: 0000000000000000 R12: 0000000000000001 [22.756649] R13: ffff8801f7ab0068 R14: 0000000000000001 R15: ffff88020d53d188 [22.756651] FS: 00007f2878849980(0000) GS:ffff880213a00000(0000) knlGS:0000000000000000 [22.756653] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [22.756655] CR2: 00005638deedf028 CR3: 0000000203292001 CR4: 00000000000206f0 [22.756657] Call Trace: [22.756689] i915_mch_val+0x1b/0x60 [i915] [22.756721] i915_emon_status+0x45/0xd0 [i915] [22.756730] seq_read+0xdb/0x3c0 [22.756736] ? lockdep_hardirqs_off+0x94/0xd0 [22.756740] ? __slab_free+0x24e/0x510 [22.756746] full_proxy_read+0x52/0x90 [22.756752] __vfs_read+0x31/0x170 [22.756759] ? do_sys_open+0x13b/0x240 [22.756763] ? rcu_read_lock_sched_held+0x6f/0x80 [22.756766] vfs_read+0x9e/0x140 [22.756770] ksys_read+0x50/0xc0 [22.756775] do_syscall_64+0x55/0x190 [22.756781] entry_SYSCALL_64_after_hwframe+0x49/0xbe [22.756783] RIP: 0033:0x7f28781dc34e [22.756786] Code: 00 00 00 00 48 8b 15 71 8c 20 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff c3 0f 1f 40 00 8b 05 ba d0 20 00 85 c0 75 16 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 5a f3 c3 0f 1f 84 00 00 00 00 00 41 54 55 49 [22.756787] RSP: 002b:00007ffd33fa0d08 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [22.756790] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f28781dc34e [22.756792] RDX: 0000000000000200 RSI: 00007ffd33fa0d50 RDI: 0000000000000008 [22.756794] RBP: 00007ffd33fa0f60 R08: 0000000000000000 R09: 0000000000000020 [22.756796] R10: 0000000000000000 R11: 0000000000000246 R12: 00005638de45c2c0 [22.756797] R13: 00007ffd33fa14b0 R14: 0000000000000000 R15: 0000000000000000 [22.756806] irq event stamp: 47950 [22.756811] hardirqs last enabled at (47949): [<ffffffff810fba74>] vprintk_emit+0x124/0x320 [22.756813] hardirqs last disabled at (47950): [<ffffffff810019b0>] trace_hardirqs_off_thunk+0x1a/0x1c [22.756816] softirqs last enabled at (47518): [<ffffffff81c0033a>] __do_softirq+0x33a/0x4b9 [22.756820] softirqs last disabled at (47479): [<ffffffff8108df29>] irq_exit+0xa9/0xc0 [22.756858] WARNING: CPU: 0 PID: 1058 at drivers/gpu/drm/i915/intel_drv.h:2104 gen5_read32+0x16b/0x1a0 [i915] [22.756860] ---[ end trace bf56fa7d6a3cbf7a ] Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181119230101.32460-1-jose.souza@intel.com
2018-11-20 07:01:01 +08:00
intel_runtime_pm_get(dev_priv);
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
temp = i915_mch_val(dev_priv);
chipset = i915_chipset_val(dev_priv);
gfx = i915_gfx_val(dev_priv);
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "GMCH temp: %ld\n", temp);
seq_printf(m, "Chipset power: %ld\n", chipset);
seq_printf(m, "GFX power: %ld\n", gfx);
seq_printf(m, "Total power: %ld\n", chipset + gfx);
drm/i915/ilk: Fix warning when reading emon_status with no output When there is no output no one will hold a runtime_pm reference causing a warning when trying to read emom_status in debugfs. [22.756480] ------------[ cut here ]------------ [22.756489] RPM wakelock ref not held during HW access [22.756578] WARNING: CPU: 0 PID: 1058 at drivers/gpu/drm/i915/intel_drv.h:2104 gen5_read32+0x16b/0x1a0 [i915] [22.756580] Modules linked in: snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core e1000e snd_pcm mei_me prime_numbers mei lpc_ich [22.756595] CPU: 0 PID: 1058 Comm: debugfs_test Not tainted 4.20.0-rc1-CI-Trybot_3219+ #1 [22.756597] Hardware name: Hewlett-Packard HP Compaq 8100 Elite SFF PC/304Ah, BIOS 786H1 v01.13 07/14/2011 [22.756634] RIP: 0010:gen5_read32+0x16b/0x1a0 [i915] [22.756637] Code: a4 ea e0 0f 0b e9 d2 fe ff ff 80 3d a5 71 19 00 00 0f 85 d3 fe ff ff 48 c7 c7 48 d0 2d a0 c6 05 91 71 19 00 01 e8 35 a4 ea e0 <0f> 0b e9 b9 fe ff ff e8 69 c6 f2 e0 85 c0 75 92 48 c7 c2 78 d0 2d [22.756639] RSP: 0018:ffffc90000f1fd38 EFLAGS: 00010282 [22.756642] RAX: 0000000000000000 RBX: ffff8801f7ab0000 RCX: 0000000000000006 [22.756643] RDX: 0000000000000006 RSI: ffffffff8212886a RDI: ffffffff820d6d57 [22.756645] RBP: 0000000000011020 R08: 0000000043e3d1a8 R09: 0000000000000000 [22.756647] R10: ffffc90000f1fd80 R11: 0000000000000000 R12: 0000000000000001 [22.756649] R13: ffff8801f7ab0068 R14: 0000000000000001 R15: ffff88020d53d188 [22.756651] FS: 00007f2878849980(0000) GS:ffff880213a00000(0000) knlGS:0000000000000000 [22.756653] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [22.756655] CR2: 00005638deedf028 CR3: 0000000203292001 CR4: 00000000000206f0 [22.756657] Call Trace: [22.756689] i915_mch_val+0x1b/0x60 [i915] [22.756721] i915_emon_status+0x45/0xd0 [i915] [22.756730] seq_read+0xdb/0x3c0 [22.756736] ? lockdep_hardirqs_off+0x94/0xd0 [22.756740] ? __slab_free+0x24e/0x510 [22.756746] full_proxy_read+0x52/0x90 [22.756752] __vfs_read+0x31/0x170 [22.756759] ? do_sys_open+0x13b/0x240 [22.756763] ? rcu_read_lock_sched_held+0x6f/0x80 [22.756766] vfs_read+0x9e/0x140 [22.756770] ksys_read+0x50/0xc0 [22.756775] do_syscall_64+0x55/0x190 [22.756781] entry_SYSCALL_64_after_hwframe+0x49/0xbe [22.756783] RIP: 0033:0x7f28781dc34e [22.756786] Code: 00 00 00 00 48 8b 15 71 8c 20 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff c3 0f 1f 40 00 8b 05 ba d0 20 00 85 c0 75 16 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 5a f3 c3 0f 1f 84 00 00 00 00 00 41 54 55 49 [22.756787] RSP: 002b:00007ffd33fa0d08 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [22.756790] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f28781dc34e [22.756792] RDX: 0000000000000200 RSI: 00007ffd33fa0d50 RDI: 0000000000000008 [22.756794] RBP: 00007ffd33fa0f60 R08: 0000000000000000 R09: 0000000000000020 [22.756796] R10: 0000000000000000 R11: 0000000000000246 R12: 00005638de45c2c0 [22.756797] R13: 00007ffd33fa14b0 R14: 0000000000000000 R15: 0000000000000000 [22.756806] irq event stamp: 47950 [22.756811] hardirqs last enabled at (47949): [<ffffffff810fba74>] vprintk_emit+0x124/0x320 [22.756813] hardirqs last disabled at (47950): [<ffffffff810019b0>] trace_hardirqs_off_thunk+0x1a/0x1c [22.756816] softirqs last enabled at (47518): [<ffffffff81c0033a>] __do_softirq+0x33a/0x4b9 [22.756820] softirqs last disabled at (47479): [<ffffffff8108df29>] irq_exit+0xa9/0xc0 [22.756858] WARNING: CPU: 0 PID: 1058 at drivers/gpu/drm/i915/intel_drv.h:2104 gen5_read32+0x16b/0x1a0 [i915] [22.756860] ---[ end trace bf56fa7d6a3cbf7a ] Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181119230101.32460-1-jose.souza@intel.com
2018-11-20 07:01:01 +08:00
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_ring_freq_table(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct intel_rps *rps = &dev_priv->gt_pm.rps;
unsigned int max_gpu_freq, min_gpu_freq;
int gpu_freq, ia_freq;
int ret;
if (!HAS_LLC(dev_priv))
return -ENODEV;
intel_runtime_pm_get(dev_priv);
ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
if (ret)
goto out;
min_gpu_freq = rps->min_freq;
max_gpu_freq = rps->max_freq;
if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
/* Convert GT frequency to 50 HZ units */
min_gpu_freq /= GEN9_FREQ_SCALER;
max_gpu_freq /= GEN9_FREQ_SCALER;
}
seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
ia_freq = gpu_freq;
sandybridge_pcode_read(dev_priv,
GEN6_PCODE_READ_MIN_FREQ_TABLE,
&ia_freq);
seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
intel_gpu_freq(dev_priv, (gpu_freq *
(IS_GEN9_BC(dev_priv) ||
INTEL_GEN(dev_priv) >= 10 ?
GEN9_FREQ_SCALER : 1))),
((ia_freq >> 0) & 0xff) * 100,
((ia_freq >> 8) & 0xff) * 100);
}
mutex_unlock(&dev_priv->pcu_lock);
out:
intel_runtime_pm_put(dev_priv);
return ret;
}
static int i915_opregion(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct intel_opregion *opregion = &dev_priv->opregion;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
goto out;
if (opregion->header)
seq_write(m, opregion->header, OPREGION_SIZE);
mutex_unlock(&dev->struct_mutex);
out:
return 0;
}
static int i915_vbt(struct seq_file *m, void *unused)
{
struct intel_opregion *opregion = &node_to_i915(m->private)->opregion;
if (opregion->vbt)
seq_write(m, opregion->vbt, opregion->vbt_size);
return 0;
}
static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct intel_framebuffer *fbdev_fb = NULL;
struct drm_framebuffer *drm_fb;
drm/i915: Add struct_mutex locking for debugs/i915_gem_framebuffer Since describe_obj() looks at state guarded by the struct_mutex, we need to be holding it. [ 580.201054] drv_suspend: starting subtest debugfs-reader [ 580.239652] ------------[ cut here ]------------ [ 580.239696] WARNING: CPU: 0 PID: 920 at include/linux/list_check.h:25 describe_obj+0x419/0x440() [ 580.239725] CPU: 0 PID: 920 Comm: cat Not tainted 4.5.0-rc6+ #835 [ 580.239745] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 580.239767] 0000000000000000 ffff88027554fcf8 ffffffff812c1135 0000000000000000 [ 580.239815] ffffffff8193dc42 ffff88027554fd30 ffffffff8107419d ffff880071727c00 [ 580.239858] ffff8802757d8000 ffffffff818f693c ffffffff818f693c ffff8802757b9048 [ 580.239896] Call Trace: [ 580.239917] [<ffffffff812c1135>] dump_stack+0x67/0x92 [ 580.239939] [<ffffffff8107419d>] warn_slowpath_common+0x7d/0xb0 [ 580.239959] [<ffffffff810742ba>] warn_slowpath_null+0x1a/0x20 [ 580.239981] [<ffffffff813ce579>] describe_obj+0x419/0x440 [ 580.240006] [<ffffffff813ced22>] i915_gem_framebuffer_info+0xa2/0x100 [ 580.240033] [<ffffffff811a9286>] seq_read+0xe6/0x3b0 [ 580.240059] [<ffffffff81182288>] __vfs_read+0x28/0xd0 [ 580.240085] [<ffffffff81173378>] ? SyS_fadvise64+0x228/0x2c0 [ 580.240112] [<ffffffff811823b2>] vfs_read+0x82/0x110 [ 580.240137] [<ffffffff811827d9>] SyS_read+0x49/0xa0 [ 580.240162] [<ffffffff815bac57>] entry_SYSCALL_64_fastpath+0x12/0x6b [ 580.240187] ---[ end trace 3e2cbf34576c9878 ]--- [ 580.281900] ------------[ cut here ]------------ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1459689261-7920-1-git-send-email-chris@chris-wilson.co.uk
2016-04-03 21:14:20 +08:00
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
#ifdef CONFIG_DRM_FBDEV_EMULATION
if (dev_priv->fbdev && dev_priv->fbdev->helper.fb) {
fbdev_fb = to_intel_framebuffer(dev_priv->fbdev->helper.fb);
seq_printf(m, "fbcon size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
fbdev_fb->base.width,
fbdev_fb->base.height,
fbdev_fb->base.format->depth,
drm: Nuke fb->bits_per_pixel Replace uses of fb->bits_per_pixel with fb->format->cpp[0]*8. Less duplicated information is a good thing. Note that I didn't put parens around the cpp*8 in the below cocci script, on account of not wanting spurious parens all over the place. Instead I did the unsafe way, and tried to look over the entire diff to spot if any dangerous expressions were produced. I didn't see any. There are some cases where previously the code did X*bpp/8, so the division happened after the multiplication. Those are now just X*cpp so the division effectively happens before the multiplication, but that is perfectly fine since bpp is always a multiple of 8. @@ struct drm_framebuffer *FB; expression E; @@ drm_helper_mode_fill_fb_struct(...) { ... - FB->bits_per_pixel = E; ... } @@ struct drm_framebuffer *FB; expression E; @@ i9xx_get_initial_plane_config(...) { ... - FB->bits_per_pixel = E; ... } @@ struct drm_framebuffer *FB; expression E; @@ ironlake_get_initial_plane_config(...) { ... - FB->bits_per_pixel = E; ... } @@ struct drm_framebuffer *FB; expression E; @@ skylake_get_initial_plane_config(...) { ... - FB->bits_per_pixel = E; ... } @@ struct drm_framebuffer FB; expression E; @@ ( - E * FB.bits_per_pixel / 8 + E * FB.format->cpp[0] | - FB.bits_per_pixel / 8 + FB.format->cpp[0] | - E * FB.bits_per_pixel >> 3 + E * FB.format->cpp[0] | - FB.bits_per_pixel >> 3 + FB.format->cpp[0] | - (FB.bits_per_pixel + 7) / 8 + FB.format->cpp[0] | - FB.bits_per_pixel + FB.format->cpp[0] * 8 | - FB.format->cpp[0] * 8 != 8 + FB.format->cpp[0] != 1 ) @@ struct drm_framebuffer *FB; expression E; @@ ( - E * FB->bits_per_pixel / 8 + E * FB->format->cpp[0] | - FB->bits_per_pixel / 8 + FB->format->cpp[0] | - E * FB->bits_per_pixel >> 3 + E * FB->format->cpp[0] | - FB->bits_per_pixel >> 3 + FB->format->cpp[0] | - (FB->bits_per_pixel + 7) / 8 + FB->format->cpp[0] | - FB->bits_per_pixel + FB->format->cpp[0] * 8 | - FB->format->cpp[0] * 8 != 8 + FB->format->cpp[0] != 1 ) @@ struct drm_plane_state *state; expression E; @@ ( - E * state->fb->bits_per_pixel / 8 + E * state->fb->format->cpp[0] | - state->fb->bits_per_pixel / 8 + state->fb->format->cpp[0] | - E * state->fb->bits_per_pixel >> 3 + E * state->fb->format->cpp[0] | - state->fb->bits_per_pixel >> 3 + state->fb->format->cpp[0] | - (state->fb->bits_per_pixel + 7) / 8 + state->fb->format->cpp[0] | - state->fb->bits_per_pixel + state->fb->format->cpp[0] * 8 | - state->fb->format->cpp[0] * 8 != 8 + state->fb->format->cpp[0] != 1 ) @@ @@ - (8 * 8) + 8 * 8 @@ struct drm_framebuffer FB; @@ - (FB.format->cpp[0]) + FB.format->cpp[0] @@ struct drm_framebuffer *FB; @@ - (FB->format->cpp[0]) + FB->format->cpp[0] @@ @@ struct drm_framebuffer { ... - int bits_per_pixel; ... }; v2: Clean up the 'cpp*8 != 8' and '(8 * 8)' cases (Laurent) v3: Adjusted the semantic patch a bit and regenerated due to code changes Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1) Link: http://patchwork.freedesktop.org/patch/msgid/1481751140-18352-1-git-send-email-ville.syrjala@linux.intel.com
2016-12-15 05:32:20 +08:00
fbdev_fb->base.format->cpp[0] * 8,
drm: Nuke modifier[1-3] It has been suggested that having per-plane modifiers is making life more difficult for userspace, so let's just retire modifier[1-3] and use modifier[0] to apply to the entire framebuffer. Obviosuly this means that if individual planes need different tiling layouts and whatnot we will need a new modifier for each combination of planes with different tiling layouts. For a bit of extra backwards compatilbilty the kernel will allow non-zero modifier[1+] but it require that they will match modifier[0]. This in case there's existing userspace out there that sets modifier[1+] to something non-zero with planar formats. Mostly a cocci job, with a bit of manual stuff mixed in. @@ struct drm_framebuffer *fb; expression E; @@ - fb->modifier[E] + fb->modifier @@ struct drm_framebuffer fb; expression E; @@ - fb.modifier[E] + fb.modifier Cc: Kristian Høgsberg <hoegsberg@gmail.com> Cc: Ben Widawsky <benjamin.widawsky@intel.com> Cc: Rob Clark <robdclark@gmail.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Tomeu Vizoso <tomeu@tomeuvizoso.net> Cc: dczaplejewicz@collabora.co.uk Suggested-by: Kristian Høgsberg <hoegsberg@gmail.com> Acked-by: Ben Widawsky <ben@bwidawsk.net> Acked-by: Daniel Stone <daniels@collabora.com> Acked-by: Rob Clark <robdclark@gmail.com> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1479295996-26246-1-git-send-email-ville.syrjala@linux.intel.com
2016-11-16 19:33:16 +08:00
fbdev_fb->base.modifier,
drm_framebuffer_read_refcount(&fbdev_fb->base));
describe_obj(m, intel_fb_obj(&fbdev_fb->base));
seq_putc(m, '\n');
}
#endif
drm: revamp locking around fb creation/destruction Well, at least step 1. The goal here is that framebuffer objects can survive outside of the mode_config lock, with just a reference held as protection. The first step to get there is to introduce a special fb_lock which protects fb lookup, creation and destruction, to make them appear atomic. This new fb_lock can nest within the mode_config lock. But the idea is (once the reference counting part is completed) that we only quickly take that fb_lock to lookup a framebuffer and grab a reference, without any other locks involved. vmwgfx is the only driver which does framebuffer lookups itself, also wrap those calls to drm_mode_object_find with the new lock. Also protect the fb_list walking in i915 and omapdrm with the new lock. As a slight complication there's also the list of user-created fbs attached to the file private. The problem now is that at fclose() time we need to walk that list, eventually do a modeset call to remove the fb from active usage (and are required to be able to take the mode_config lock), but in the end we need to grab the new fb_lock to remove the fb from the list. The easiest solution is to add another mutex to protect this per-file list. Currently that new fbs_lock nests within the modeset locks and so appears redudant. But later patches will switch around this sequence so that taking the modeset locks in the fb destruction path is optional in the fastpath. Ultimately the goal is that addfb and rmfb do not require the mode_config lock, since otherwise they have the potential to introduce stalls in the pageflip sequence of a compositor (if the compositor e.g. switches to a fullscreen client or if it enables a plane). But that requires a few more steps and hoops to jump through. Note that framebuffer creation/destruction is now double-protected - once by the fb_lock and in parts by the idr_lock. The later would be unnecessariy if framebuffers would have their own idr allocator. But that's material for another patch (series). v2: Properly initialize the fb->filp_head list in _init, otherwise the newly added WARN to check whether the fb isn't on a fpriv list any more will fail for driver-private objects. v3: Fixup two error-case unlock bugs spotted by Richard Wilbur. Reviewed-by: Rob Clark <rob@ti.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-11 04:19:18 +08:00
mutex_lock(&dev->mode_config.fb_lock);
drm_for_each_fb(drm_fb, dev) {
struct intel_framebuffer *fb = to_intel_framebuffer(drm_fb);
if (fb == fbdev_fb)
continue;
seq_printf(m, "user size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
fb->base.width,
fb->base.height,
fb->base.format->depth,
drm: Nuke fb->bits_per_pixel Replace uses of fb->bits_per_pixel with fb->format->cpp[0]*8. Less duplicated information is a good thing. Note that I didn't put parens around the cpp*8 in the below cocci script, on account of not wanting spurious parens all over the place. Instead I did the unsafe way, and tried to look over the entire diff to spot if any dangerous expressions were produced. I didn't see any. There are some cases where previously the code did X*bpp/8, so the division happened after the multiplication. Those are now just X*cpp so the division effectively happens before the multiplication, but that is perfectly fine since bpp is always a multiple of 8. @@ struct drm_framebuffer *FB; expression E; @@ drm_helper_mode_fill_fb_struct(...) { ... - FB->bits_per_pixel = E; ... } @@ struct drm_framebuffer *FB; expression E; @@ i9xx_get_initial_plane_config(...) { ... - FB->bits_per_pixel = E; ... } @@ struct drm_framebuffer *FB; expression E; @@ ironlake_get_initial_plane_config(...) { ... - FB->bits_per_pixel = E; ... } @@ struct drm_framebuffer *FB; expression E; @@ skylake_get_initial_plane_config(...) { ... - FB->bits_per_pixel = E; ... } @@ struct drm_framebuffer FB; expression E; @@ ( - E * FB.bits_per_pixel / 8 + E * FB.format->cpp[0] | - FB.bits_per_pixel / 8 + FB.format->cpp[0] | - E * FB.bits_per_pixel >> 3 + E * FB.format->cpp[0] | - FB.bits_per_pixel >> 3 + FB.format->cpp[0] | - (FB.bits_per_pixel + 7) / 8 + FB.format->cpp[0] | - FB.bits_per_pixel + FB.format->cpp[0] * 8 | - FB.format->cpp[0] * 8 != 8 + FB.format->cpp[0] != 1 ) @@ struct drm_framebuffer *FB; expression E; @@ ( - E * FB->bits_per_pixel / 8 + E * FB->format->cpp[0] | - FB->bits_per_pixel / 8 + FB->format->cpp[0] | - E * FB->bits_per_pixel >> 3 + E * FB->format->cpp[0] | - FB->bits_per_pixel >> 3 + FB->format->cpp[0] | - (FB->bits_per_pixel + 7) / 8 + FB->format->cpp[0] | - FB->bits_per_pixel + FB->format->cpp[0] * 8 | - FB->format->cpp[0] * 8 != 8 + FB->format->cpp[0] != 1 ) @@ struct drm_plane_state *state; expression E; @@ ( - E * state->fb->bits_per_pixel / 8 + E * state->fb->format->cpp[0] | - state->fb->bits_per_pixel / 8 + state->fb->format->cpp[0] | - E * state->fb->bits_per_pixel >> 3 + E * state->fb->format->cpp[0] | - state->fb->bits_per_pixel >> 3 + state->fb->format->cpp[0] | - (state->fb->bits_per_pixel + 7) / 8 + state->fb->format->cpp[0] | - state->fb->bits_per_pixel + state->fb->format->cpp[0] * 8 | - state->fb->format->cpp[0] * 8 != 8 + state->fb->format->cpp[0] != 1 ) @@ @@ - (8 * 8) + 8 * 8 @@ struct drm_framebuffer FB; @@ - (FB.format->cpp[0]) + FB.format->cpp[0] @@ struct drm_framebuffer *FB; @@ - (FB->format->cpp[0]) + FB->format->cpp[0] @@ @@ struct drm_framebuffer { ... - int bits_per_pixel; ... }; v2: Clean up the 'cpp*8 != 8' and '(8 * 8)' cases (Laurent) v3: Adjusted the semantic patch a bit and regenerated due to code changes Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1) Link: http://patchwork.freedesktop.org/patch/msgid/1481751140-18352-1-git-send-email-ville.syrjala@linux.intel.com
2016-12-15 05:32:20 +08:00
fb->base.format->cpp[0] * 8,
drm: Nuke modifier[1-3] It has been suggested that having per-plane modifiers is making life more difficult for userspace, so let's just retire modifier[1-3] and use modifier[0] to apply to the entire framebuffer. Obviosuly this means that if individual planes need different tiling layouts and whatnot we will need a new modifier for each combination of planes with different tiling layouts. For a bit of extra backwards compatilbilty the kernel will allow non-zero modifier[1+] but it require that they will match modifier[0]. This in case there's existing userspace out there that sets modifier[1+] to something non-zero with planar formats. Mostly a cocci job, with a bit of manual stuff mixed in. @@ struct drm_framebuffer *fb; expression E; @@ - fb->modifier[E] + fb->modifier @@ struct drm_framebuffer fb; expression E; @@ - fb.modifier[E] + fb.modifier Cc: Kristian Høgsberg <hoegsberg@gmail.com> Cc: Ben Widawsky <benjamin.widawsky@intel.com> Cc: Rob Clark <robdclark@gmail.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Tomeu Vizoso <tomeu@tomeuvizoso.net> Cc: dczaplejewicz@collabora.co.uk Suggested-by: Kristian Høgsberg <hoegsberg@gmail.com> Acked-by: Ben Widawsky <ben@bwidawsk.net> Acked-by: Daniel Stone <daniels@collabora.com> Acked-by: Rob Clark <robdclark@gmail.com> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1479295996-26246-1-git-send-email-ville.syrjala@linux.intel.com
2016-11-16 19:33:16 +08:00
fb->base.modifier,
drm_framebuffer_read_refcount(&fb->base));
describe_obj(m, intel_fb_obj(&fb->base));
seq_putc(m, '\n');
}
drm: revamp locking around fb creation/destruction Well, at least step 1. The goal here is that framebuffer objects can survive outside of the mode_config lock, with just a reference held as protection. The first step to get there is to introduce a special fb_lock which protects fb lookup, creation and destruction, to make them appear atomic. This new fb_lock can nest within the mode_config lock. But the idea is (once the reference counting part is completed) that we only quickly take that fb_lock to lookup a framebuffer and grab a reference, without any other locks involved. vmwgfx is the only driver which does framebuffer lookups itself, also wrap those calls to drm_mode_object_find with the new lock. Also protect the fb_list walking in i915 and omapdrm with the new lock. As a slight complication there's also the list of user-created fbs attached to the file private. The problem now is that at fclose() time we need to walk that list, eventually do a modeset call to remove the fb from active usage (and are required to be able to take the mode_config lock), but in the end we need to grab the new fb_lock to remove the fb from the list. The easiest solution is to add another mutex to protect this per-file list. Currently that new fbs_lock nests within the modeset locks and so appears redudant. But later patches will switch around this sequence so that taking the modeset locks in the fb destruction path is optional in the fastpath. Ultimately the goal is that addfb and rmfb do not require the mode_config lock, since otherwise they have the potential to introduce stalls in the pageflip sequence of a compositor (if the compositor e.g. switches to a fullscreen client or if it enables a plane). But that requires a few more steps and hoops to jump through. Note that framebuffer creation/destruction is now double-protected - once by the fb_lock and in parts by the idr_lock. The later would be unnecessariy if framebuffers would have their own idr allocator. But that's material for another patch (series). v2: Properly initialize the fb->filp_head list in _init, otherwise the newly added WARN to check whether the fb isn't on a fpriv list any more will fail for driver-private objects. v3: Fixup two error-case unlock bugs spotted by Richard Wilbur. Reviewed-by: Rob Clark <rob@ti.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-11 04:19:18 +08:00
mutex_unlock(&dev->mode_config.fb_lock);
drm/i915: Add struct_mutex locking for debugs/i915_gem_framebuffer Since describe_obj() looks at state guarded by the struct_mutex, we need to be holding it. [ 580.201054] drv_suspend: starting subtest debugfs-reader [ 580.239652] ------------[ cut here ]------------ [ 580.239696] WARNING: CPU: 0 PID: 920 at include/linux/list_check.h:25 describe_obj+0x419/0x440() [ 580.239725] CPU: 0 PID: 920 Comm: cat Not tainted 4.5.0-rc6+ #835 [ 580.239745] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 580.239767] 0000000000000000 ffff88027554fcf8 ffffffff812c1135 0000000000000000 [ 580.239815] ffffffff8193dc42 ffff88027554fd30 ffffffff8107419d ffff880071727c00 [ 580.239858] ffff8802757d8000 ffffffff818f693c ffffffff818f693c ffff8802757b9048 [ 580.239896] Call Trace: [ 580.239917] [<ffffffff812c1135>] dump_stack+0x67/0x92 [ 580.239939] [<ffffffff8107419d>] warn_slowpath_common+0x7d/0xb0 [ 580.239959] [<ffffffff810742ba>] warn_slowpath_null+0x1a/0x20 [ 580.239981] [<ffffffff813ce579>] describe_obj+0x419/0x440 [ 580.240006] [<ffffffff813ced22>] i915_gem_framebuffer_info+0xa2/0x100 [ 580.240033] [<ffffffff811a9286>] seq_read+0xe6/0x3b0 [ 580.240059] [<ffffffff81182288>] __vfs_read+0x28/0xd0 [ 580.240085] [<ffffffff81173378>] ? SyS_fadvise64+0x228/0x2c0 [ 580.240112] [<ffffffff811823b2>] vfs_read+0x82/0x110 [ 580.240137] [<ffffffff811827d9>] SyS_read+0x49/0xa0 [ 580.240162] [<ffffffff815bac57>] entry_SYSCALL_64_fastpath+0x12/0x6b [ 580.240187] ---[ end trace 3e2cbf34576c9878 ]--- [ 580.281900] ------------[ cut here ]------------ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1459689261-7920-1-git-send-email-chris@chris-wilson.co.uk
2016-04-03 21:14:20 +08:00
mutex_unlock(&dev->struct_mutex);
return 0;
}
static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
{
seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, emit: %u)",
ring->space, ring->head, ring->tail, ring->emit);
}
static int i915_context_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
enum intel_engine_id id;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
drm/i915: Reduce context HW ID lifetime Future gen reduce the number of bits we will have available to differentiate between contexts, so reduce the lifetime of the ID assignment from that of the context to its current active cycle (i.e. only while it is pinned for use by the HW, will it have a constant ID). This means that instead of a max of 2k allocated contexts (worst case before fun with bit twiddling), we instead have a limit of 2k in flight contexts (minus a few that have been pinned by the kernel or by perf). To reduce the number of contexts id we require, we allocate a context id on first and mark it as pinned for as long as the GEM context itself is, that is we keep it pinned it while active on each engine. If we exhaust our context id space, then we try to reclaim an id from an idle context. In the extreme case where all context ids are pinned by active contexts, we force the system to idle in order to recover ids. We cannot reduce the scope of an HW-ID to an engine (allowing the same gem_context to have different ids on each engine) as in the future we will need to preassign an id before we know which engine the context is being executed on. v2: Improved commentary (Tvrtko) [I tried at least] References: https://bugs.freedesktop.org/show_bug.cgi?id=107788 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Michel Thierry <michel.thierry@intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180904153117.3907-1-chris@chris-wilson.co.uk
2018-09-04 23:31:17 +08:00
seq_puts(m, "HW context ");
if (!list_empty(&ctx->hw_id_link))
seq_printf(m, "%x [pin %u]", ctx->hw_id,
atomic_read(&ctx->hw_id_pin_count));
if (ctx->pid) {
struct task_struct *task;
task = get_pid_task(ctx->pid, PIDTYPE_PID);
if (task) {
seq_printf(m, "(%s [%d]) ",
task->comm, task->pid);
put_task_struct(task);
}
} else if (IS_ERR(ctx->file_priv)) {
seq_puts(m, "(deleted) ");
} else {
seq_puts(m, "(kernel) ");
}
seq_putc(m, ctx->remap_slice ? 'R' : 'r');
seq_putc(m, '\n');
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
for_each_engine(engine, dev_priv, id) {
struct intel_context *ce =
to_intel_context(ctx, engine);
seq_printf(m, "%s: ", engine->name);
if (ce->state)
describe_obj(m, ce->state->obj);
if (ce->ring)
describe_ctx_ring(m, ce->ring);
seq_putc(m, '\n');
}
seq_putc(m, '\n');
}
mutex_unlock(&dev->struct_mutex);
return 0;
}
static const char *swizzle_string(unsigned swizzle)
{
switch (swizzle) {
case I915_BIT_6_SWIZZLE_NONE:
return "none";
case I915_BIT_6_SWIZZLE_9:
return "bit9";
case I915_BIT_6_SWIZZLE_9_10:
return "bit9/bit10";
case I915_BIT_6_SWIZZLE_9_11:
return "bit9/bit11";
case I915_BIT_6_SWIZZLE_9_10_11:
return "bit9/bit10/bit11";
case I915_BIT_6_SWIZZLE_9_17:
return "bit9/bit17";
case I915_BIT_6_SWIZZLE_9_10_17:
return "bit9/bit10/bit17";
case I915_BIT_6_SWIZZLE_UNKNOWN:
return "unknown";
}
return "bug";
}
static int i915_swizzle_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
intel_runtime_pm_get(dev_priv);
seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
swizzle_string(dev_priv->mm.bit_6_swizzle_x));
seq_printf(m, "bit6 swizzle for Y-tiling = %s\n",
swizzle_string(dev_priv->mm.bit_6_swizzle_y));
if (IS_GEN3(dev_priv) || IS_GEN4(dev_priv)) {
seq_printf(m, "DDC = 0x%08x\n",
I915_READ(DCC));
seq_printf(m, "DDC2 = 0x%08x\n",
I915_READ(DCC2));
seq_printf(m, "C0DRB3 = 0x%04x\n",
I915_READ16(C0DRB3));
seq_printf(m, "C1DRB3 = 0x%04x\n",
I915_READ16(C1DRB3));
} else if (INTEL_GEN(dev_priv) >= 6) {
seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n",
I915_READ(MAD_DIMM_C0));
seq_printf(m, "MAD_DIMM_C1 = 0x%08x\n",
I915_READ(MAD_DIMM_C1));
seq_printf(m, "MAD_DIMM_C2 = 0x%08x\n",
I915_READ(MAD_DIMM_C2));
seq_printf(m, "TILECTL = 0x%08x\n",
I915_READ(TILECTL));
if (INTEL_GEN(dev_priv) >= 8)
seq_printf(m, "GAMTARBMODE = 0x%08x\n",
I915_READ(GAMTARBMODE));
else
seq_printf(m, "ARB_MODE = 0x%08x\n",
I915_READ(ARB_MODE));
seq_printf(m, "DISP_ARB_CTL = 0x%08x\n",
I915_READ(DISP_ARB_CTL));
}
if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
seq_puts(m, "L-shaped memory detected\n");
intel_runtime_pm_put(dev_priv);
return 0;
}
static int per_file_ctx(int id, void *ptr, void *data)
{
struct i915_gem_context *ctx = ptr;
struct seq_file *m = data;
struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
if (!ppgtt) {
seq_printf(m, " no ppgtt for context %d\n",
ctx->user_handle);
return 0;
}
if (i915_gem_context_is_default(ctx))
seq_puts(m, " default context:\n");
else
drm/i915: Emphasize that ctx->id is merely a user handle This is an Execlists preparatory patch, since they make context ID become an overloaded term: - In the software, it was used to distinguish which context userspace was trying to use. - In the BSpec, the term is used to describe the 20-bits long field the hardware uses to it to discriminate the contexts that are submitted to the ELSP and inform the driver about their current status (via Context Switch Interrupts and Context Status Buffers). Initially, I tried to make the different meanings converge, but it proved impossible: - The software ctx->id is per-filp, while the hardware one needs to be globally unique. - Also, we multiplex several backing states objects per intel_context, and all of them need unique HW IDs. - I tried adding a per-filp ID and then composing the HW context ID as: ctx->id + file_priv->id + ring->id, but the fact that the hardware only uses 20-bits means we have to artificially limit the number of filps or contexts the userspace can create. The ctx->user_handle renaming bits are done with this Cocci patch (plus manual frobbing of the struct declaration): @@ struct intel_context c; @@ - (c).id + c.user_handle @@ struct intel_context *c; @@ - (c)->id + c->user_handle Also, while we are at it, s/DEFAULT_CONTEXT_ID/DEFAULT_CONTEXT_HANDLE and change the type to unsigned 32 bits. v2: s/handle/user_handle and change the type to uint32_t as suggested by Chris Wilson. Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v1) Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-03 23:28:00 +08:00
seq_printf(m, " context %d:\n", ctx->user_handle);
ppgtt->debug_dump(ppgtt, m);
return 0;
}
static void gen8_ppgtt_info(struct seq_file *m,
struct drm_i915_private *dev_priv)
{
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
struct intel_engine_cs *engine;
enum intel_engine_id id;
int i;
if (!ppgtt)
return;
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
for_each_engine(engine, dev_priv, id) {
seq_printf(m, "%s\n", engine->name);
for (i = 0; i < 4; i++) {
u64 pdp = I915_READ(GEN8_RING_PDP_UDW(engine, i));
pdp <<= 32;
pdp |= I915_READ(GEN8_RING_PDP_LDW(engine, i));
seq_printf(m, "\tPDP%d 0x%016llx\n", i, pdp);
}
}
}
static void gen6_ppgtt_info(struct seq_file *m,
struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
enum intel_engine_id id;
if (IS_GEN6(dev_priv))
seq_printf(m, "GFX_MODE: 0x%08x\n", I915_READ(GFX_MODE));
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
for_each_engine(engine, dev_priv, id) {
seq_printf(m, "%s\n", engine->name);
if (IS_GEN7(dev_priv))
seq_printf(m, "GFX_MODE: 0x%08x\n",
I915_READ(RING_MODE_GEN7(engine)));
seq_printf(m, "PP_DIR_BASE: 0x%08x\n",
I915_READ(RING_PP_DIR_BASE(engine)));
seq_printf(m, "PP_DIR_BASE_READ: 0x%08x\n",
I915_READ(RING_PP_DIR_BASE_READ(engine)));
seq_printf(m, "PP_DIR_DCLV: 0x%08x\n",
I915_READ(RING_PP_DIR_DCLV(engine)));
}
if (dev_priv->mm.aliasing_ppgtt) {
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
seq_puts(m, "aliasing PPGTT:\n");
seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd.base.ggtt_offset);
ppgtt->debug_dump(ppgtt, m);
}
seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
}
static int i915_ppgtt_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct drm_file *file;
drm/i915: Fix nesting of filelist_mutex vs struct_mutex in i915_ppgtt_info An unlikely ABBA deadlock in debugfs that no one has reported. [ 284.922349] ====================================================== [ 284.922355] [ INFO: possible circular locking dependency detected ] [ 284.922361] 4.8.0-rc2+ #430 Tainted: G W [ 284.922366] ------------------------------------------------------- [ 284.922371] cat/1197 is trying to acquire lock: [ 284.922376] (&dev->filelist_mutex){+.+...}, at: [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922423] [ 284.922423] but task is already holding lock: [ 284.922429] (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922465] [ 284.922465] which lock already depends on the new lock. [ 284.922465] [ 284.922471] [ 284.922471] the existing dependency chain (in reverse order) is: [ 284.922477] -> #1 (&dev->struct_mutex){+.+.+.}: [ 284.922493] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922505] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922520] [<ffffffffa004f877>] print_context_stats+0x37/0xf0 [i915] [ 284.922549] [<ffffffffa00535f5>] i915_gem_object_info+0x265/0x490 [i915] [ 284.922581] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922592] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922604] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922616] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922626] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922636] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922648] -> #0 (&dev->filelist_mutex){+.+...}: [ 284.922667] [<ffffffff810871fc>] __lock_acquire+0x10fc/0x1270 [ 284.922678] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922689] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922701] [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922729] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922739] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922750] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922761] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922771] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922781] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922793] [ 284.922793] other info that might help us debug this: [ 284.922793] [ 284.922809] Possible unsafe locking scenario: [ 284.922809] [ 284.922818] CPU0 CPU1 [ 284.922825] ---- ---- [ 284.922831] lock(&dev->struct_mutex); [ 284.922842] lock(&dev->filelist_mutex); [ 284.922854] lock(&dev->struct_mutex); [ 284.922865] lock(&dev->filelist_mutex); [ 284.922875] [ 284.922875] *** DEADLOCK *** [ 284.922875] [ 284.922888] 3 locks held by cat/1197: [ 284.922895] #0: (debugfs_srcu){......}, at: [<ffffffff811f7730>] full_proxy_read+0x0/0xb0 [ 284.922919] #1: (&p->lock){+.+.+.}, at: [<ffffffff811443e8>] seq_read+0x38/0x3b0 [ 284.922942] #2: (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922983] Fixes: 1d2ac403ae3b ("drm: Protect dev->filelist with its own mutex") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160822132820.21725-1-chris@chris-wilson.co.uk
2016-08-22 21:28:20 +08:00
int ret;
drm/i915: Fix nesting of filelist_mutex vs struct_mutex in i915_ppgtt_info An unlikely ABBA deadlock in debugfs that no one has reported. [ 284.922349] ====================================================== [ 284.922355] [ INFO: possible circular locking dependency detected ] [ 284.922361] 4.8.0-rc2+ #430 Tainted: G W [ 284.922366] ------------------------------------------------------- [ 284.922371] cat/1197 is trying to acquire lock: [ 284.922376] (&dev->filelist_mutex){+.+...}, at: [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922423] [ 284.922423] but task is already holding lock: [ 284.922429] (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922465] [ 284.922465] which lock already depends on the new lock. [ 284.922465] [ 284.922471] [ 284.922471] the existing dependency chain (in reverse order) is: [ 284.922477] -> #1 (&dev->struct_mutex){+.+.+.}: [ 284.922493] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922505] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922520] [<ffffffffa004f877>] print_context_stats+0x37/0xf0 [i915] [ 284.922549] [<ffffffffa00535f5>] i915_gem_object_info+0x265/0x490 [i915] [ 284.922581] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922592] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922604] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922616] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922626] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922636] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922648] -> #0 (&dev->filelist_mutex){+.+...}: [ 284.922667] [<ffffffff810871fc>] __lock_acquire+0x10fc/0x1270 [ 284.922678] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922689] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922701] [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922729] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922739] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922750] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922761] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922771] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922781] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922793] [ 284.922793] other info that might help us debug this: [ 284.922793] [ 284.922809] Possible unsafe locking scenario: [ 284.922809] [ 284.922818] CPU0 CPU1 [ 284.922825] ---- ---- [ 284.922831] lock(&dev->struct_mutex); [ 284.922842] lock(&dev->filelist_mutex); [ 284.922854] lock(&dev->struct_mutex); [ 284.922865] lock(&dev->filelist_mutex); [ 284.922875] [ 284.922875] *** DEADLOCK *** [ 284.922875] [ 284.922888] 3 locks held by cat/1197: [ 284.922895] #0: (debugfs_srcu){......}, at: [<ffffffff811f7730>] full_proxy_read+0x0/0xb0 [ 284.922919] #1: (&p->lock){+.+.+.}, at: [<ffffffff811443e8>] seq_read+0x38/0x3b0 [ 284.922942] #2: (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922983] Fixes: 1d2ac403ae3b ("drm: Protect dev->filelist with its own mutex") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160822132820.21725-1-chris@chris-wilson.co.uk
2016-08-22 21:28:20 +08:00
mutex_lock(&dev->filelist_mutex);
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
drm/i915: Fix nesting of filelist_mutex vs struct_mutex in i915_ppgtt_info An unlikely ABBA deadlock in debugfs that no one has reported. [ 284.922349] ====================================================== [ 284.922355] [ INFO: possible circular locking dependency detected ] [ 284.922361] 4.8.0-rc2+ #430 Tainted: G W [ 284.922366] ------------------------------------------------------- [ 284.922371] cat/1197 is trying to acquire lock: [ 284.922376] (&dev->filelist_mutex){+.+...}, at: [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922423] [ 284.922423] but task is already holding lock: [ 284.922429] (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922465] [ 284.922465] which lock already depends on the new lock. [ 284.922465] [ 284.922471] [ 284.922471] the existing dependency chain (in reverse order) is: [ 284.922477] -> #1 (&dev->struct_mutex){+.+.+.}: [ 284.922493] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922505] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922520] [<ffffffffa004f877>] print_context_stats+0x37/0xf0 [i915] [ 284.922549] [<ffffffffa00535f5>] i915_gem_object_info+0x265/0x490 [i915] [ 284.922581] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922592] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922604] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922616] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922626] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922636] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922648] -> #0 (&dev->filelist_mutex){+.+...}: [ 284.922667] [<ffffffff810871fc>] __lock_acquire+0x10fc/0x1270 [ 284.922678] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922689] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922701] [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922729] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922739] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922750] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922761] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922771] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922781] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922793] [ 284.922793] other info that might help us debug this: [ 284.922793] [ 284.922809] Possible unsafe locking scenario: [ 284.922809] [ 284.922818] CPU0 CPU1 [ 284.922825] ---- ---- [ 284.922831] lock(&dev->struct_mutex); [ 284.922842] lock(&dev->filelist_mutex); [ 284.922854] lock(&dev->struct_mutex); [ 284.922865] lock(&dev->filelist_mutex); [ 284.922875] [ 284.922875] *** DEADLOCK *** [ 284.922875] [ 284.922888] 3 locks held by cat/1197: [ 284.922895] #0: (debugfs_srcu){......}, at: [<ffffffff811f7730>] full_proxy_read+0x0/0xb0 [ 284.922919] #1: (&p->lock){+.+.+.}, at: [<ffffffff811443e8>] seq_read+0x38/0x3b0 [ 284.922942] #2: (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922983] Fixes: 1d2ac403ae3b ("drm: Protect dev->filelist with its own mutex") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160822132820.21725-1-chris@chris-wilson.co.uk
2016-08-22 21:28:20 +08:00
goto out_unlock;
intel_runtime_pm_get(dev_priv);
if (INTEL_GEN(dev_priv) >= 8)
gen8_ppgtt_info(m, dev_priv);
else if (INTEL_GEN(dev_priv) >= 6)
gen6_ppgtt_info(m, dev_priv);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct drm_i915_file_private *file_priv = file->driver_priv;
struct task_struct *task;
task = get_pid_task(file->pid, PIDTYPE_PID);
if (!task) {
ret = -ESRCH;
drm/i915: Fix nesting of filelist_mutex vs struct_mutex in i915_ppgtt_info An unlikely ABBA deadlock in debugfs that no one has reported. [ 284.922349] ====================================================== [ 284.922355] [ INFO: possible circular locking dependency detected ] [ 284.922361] 4.8.0-rc2+ #430 Tainted: G W [ 284.922366] ------------------------------------------------------- [ 284.922371] cat/1197 is trying to acquire lock: [ 284.922376] (&dev->filelist_mutex){+.+...}, at: [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922423] [ 284.922423] but task is already holding lock: [ 284.922429] (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922465] [ 284.922465] which lock already depends on the new lock. [ 284.922465] [ 284.922471] [ 284.922471] the existing dependency chain (in reverse order) is: [ 284.922477] -> #1 (&dev->struct_mutex){+.+.+.}: [ 284.922493] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922505] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922520] [<ffffffffa004f877>] print_context_stats+0x37/0xf0 [i915] [ 284.922549] [<ffffffffa00535f5>] i915_gem_object_info+0x265/0x490 [i915] [ 284.922581] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922592] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922604] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922616] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922626] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922636] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922648] -> #0 (&dev->filelist_mutex){+.+...}: [ 284.922667] [<ffffffff810871fc>] __lock_acquire+0x10fc/0x1270 [ 284.922678] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922689] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922701] [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922729] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922739] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922750] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922761] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922771] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922781] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922793] [ 284.922793] other info that might help us debug this: [ 284.922793] [ 284.922809] Possible unsafe locking scenario: [ 284.922809] [ 284.922818] CPU0 CPU1 [ 284.922825] ---- ---- [ 284.922831] lock(&dev->struct_mutex); [ 284.922842] lock(&dev->filelist_mutex); [ 284.922854] lock(&dev->struct_mutex); [ 284.922865] lock(&dev->filelist_mutex); [ 284.922875] [ 284.922875] *** DEADLOCK *** [ 284.922875] [ 284.922888] 3 locks held by cat/1197: [ 284.922895] #0: (debugfs_srcu){......}, at: [<ffffffff811f7730>] full_proxy_read+0x0/0xb0 [ 284.922919] #1: (&p->lock){+.+.+.}, at: [<ffffffff811443e8>] seq_read+0x38/0x3b0 [ 284.922942] #2: (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922983] Fixes: 1d2ac403ae3b ("drm: Protect dev->filelist with its own mutex") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160822132820.21725-1-chris@chris-wilson.co.uk
2016-08-22 21:28:20 +08:00
goto out_rpm;
}
seq_printf(m, "\nproc: %s\n", task->comm);
put_task_struct(task);
idr_for_each(&file_priv->context_idr, per_file_ctx,
(void *)(unsigned long)m);
}
drm/i915: Fix nesting of filelist_mutex vs struct_mutex in i915_ppgtt_info An unlikely ABBA deadlock in debugfs that no one has reported. [ 284.922349] ====================================================== [ 284.922355] [ INFO: possible circular locking dependency detected ] [ 284.922361] 4.8.0-rc2+ #430 Tainted: G W [ 284.922366] ------------------------------------------------------- [ 284.922371] cat/1197 is trying to acquire lock: [ 284.922376] (&dev->filelist_mutex){+.+...}, at: [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922423] [ 284.922423] but task is already holding lock: [ 284.922429] (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922465] [ 284.922465] which lock already depends on the new lock. [ 284.922465] [ 284.922471] [ 284.922471] the existing dependency chain (in reverse order) is: [ 284.922477] -> #1 (&dev->struct_mutex){+.+.+.}: [ 284.922493] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922505] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922520] [<ffffffffa004f877>] print_context_stats+0x37/0xf0 [i915] [ 284.922549] [<ffffffffa00535f5>] i915_gem_object_info+0x265/0x490 [i915] [ 284.922581] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922592] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922604] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922616] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922626] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922636] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922648] -> #0 (&dev->filelist_mutex){+.+...}: [ 284.922667] [<ffffffff810871fc>] __lock_acquire+0x10fc/0x1270 [ 284.922678] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922689] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922701] [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922729] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922739] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922750] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922761] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922771] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922781] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922793] [ 284.922793] other info that might help us debug this: [ 284.922793] [ 284.922809] Possible unsafe locking scenario: [ 284.922809] [ 284.922818] CPU0 CPU1 [ 284.922825] ---- ---- [ 284.922831] lock(&dev->struct_mutex); [ 284.922842] lock(&dev->filelist_mutex); [ 284.922854] lock(&dev->struct_mutex); [ 284.922865] lock(&dev->filelist_mutex); [ 284.922875] [ 284.922875] *** DEADLOCK *** [ 284.922875] [ 284.922888] 3 locks held by cat/1197: [ 284.922895] #0: (debugfs_srcu){......}, at: [<ffffffff811f7730>] full_proxy_read+0x0/0xb0 [ 284.922919] #1: (&p->lock){+.+.+.}, at: [<ffffffff811443e8>] seq_read+0x38/0x3b0 [ 284.922942] #2: (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922983] Fixes: 1d2ac403ae3b ("drm: Protect dev->filelist with its own mutex") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160822132820.21725-1-chris@chris-wilson.co.uk
2016-08-22 21:28:20 +08:00
out_rpm:
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
drm/i915: Fix nesting of filelist_mutex vs struct_mutex in i915_ppgtt_info An unlikely ABBA deadlock in debugfs that no one has reported. [ 284.922349] ====================================================== [ 284.922355] [ INFO: possible circular locking dependency detected ] [ 284.922361] 4.8.0-rc2+ #430 Tainted: G W [ 284.922366] ------------------------------------------------------- [ 284.922371] cat/1197 is trying to acquire lock: [ 284.922376] (&dev->filelist_mutex){+.+...}, at: [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922423] [ 284.922423] but task is already holding lock: [ 284.922429] (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922465] [ 284.922465] which lock already depends on the new lock. [ 284.922465] [ 284.922471] [ 284.922471] the existing dependency chain (in reverse order) is: [ 284.922477] -> #1 (&dev->struct_mutex){+.+.+.}: [ 284.922493] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922505] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922520] [<ffffffffa004f877>] print_context_stats+0x37/0xf0 [i915] [ 284.922549] [<ffffffffa00535f5>] i915_gem_object_info+0x265/0x490 [i915] [ 284.922581] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922592] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922604] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922616] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922626] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922636] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922648] -> #0 (&dev->filelist_mutex){+.+...}: [ 284.922667] [<ffffffff810871fc>] __lock_acquire+0x10fc/0x1270 [ 284.922678] [<ffffffff81087710>] lock_acquire+0x60/0x80 [ 284.922689] [<ffffffff8143e96f>] mutex_lock_nested+0x5f/0x360 [ 284.922701] [<ffffffffa0055ba2>] i915_ppgtt_info+0x82/0x390 [i915] [ 284.922729] [<ffffffff81144491>] seq_read+0xe1/0x3b0 [ 284.922739] [<ffffffff811f77b3>] full_proxy_read+0x83/0xb0 [ 284.922750] [<ffffffff8111ba03>] __vfs_read+0x23/0x110 [ 284.922761] [<ffffffff8111c9b9>] vfs_read+0x89/0x110 [ 284.922771] [<ffffffff8111dbf4>] SyS_read+0x44/0xa0 [ 284.922781] [<ffffffff81442be9>] entry_SYSCALL_64_fastpath+0x1c/0xac [ 284.922793] [ 284.922793] other info that might help us debug this: [ 284.922793] [ 284.922809] Possible unsafe locking scenario: [ 284.922809] [ 284.922818] CPU0 CPU1 [ 284.922825] ---- ---- [ 284.922831] lock(&dev->struct_mutex); [ 284.922842] lock(&dev->filelist_mutex); [ 284.922854] lock(&dev->struct_mutex); [ 284.922865] lock(&dev->filelist_mutex); [ 284.922875] [ 284.922875] *** DEADLOCK *** [ 284.922875] [ 284.922888] 3 locks held by cat/1197: [ 284.922895] #0: (debugfs_srcu){......}, at: [<ffffffff811f7730>] full_proxy_read+0x0/0xb0 [ 284.922919] #1: (&p->lock){+.+.+.}, at: [<ffffffff811443e8>] seq_read+0x38/0x3b0 [ 284.922942] #2: (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0055b55>] i915_ppgtt_info+0x35/0x390 [i915] [ 284.922983] Fixes: 1d2ac403ae3b ("drm: Protect dev->filelist with its own mutex") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160822132820.21725-1-chris@chris-wilson.co.uk
2016-08-22 21:28:20 +08:00
out_unlock:
mutex_unlock(&dev->filelist_mutex);
return ret;
}
static int count_irq_waiters(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
enum intel_engine_id id;
int count = 0;
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
for_each_engine(engine, i915, id)
drm/i915: Slaughter the thundering i915_wait_request herd One particularly stressful scenario consists of many independent tasks all competing for GPU time and waiting upon the results (e.g. realtime transcoding of many, many streams). One bottleneck in particular is that each client waits on its own results, but every client is woken up after every batchbuffer - hence the thunder of hooves as then every client must do its heavyweight dance to read a coherent seqno to see if it is the lucky one. Ideally, we only want one client to wake up after the interrupt and check its request for completion. Since the requests must retire in order, we can select the first client on the oldest request to be woken. Once that client has completed his wait, we can then wake up the next client and so on. However, all clients then incur latency as every process in the chain may be delayed for scheduling - this may also then cause some priority inversion. To reduce the latency, when a client is added or removed from the list, we scan the tree for completed seqno and wake up all the completed waiters in parallel. Using igt/benchmarks/gem_latency, we can demonstrate this effect. The benchmark measures the number of GPU cycles between completion of a batch and the client waking up from a call to wait-ioctl. With many concurrent waiters, with each on a different request, we observe that the wakeup latency before the patch scales nearly linearly with the number of waiters (before external factors kick in making the scaling much worse). After applying the patch, we can see that only the single waiter for the request is being woken up, providing a constant wakeup latency for every operation. However, the situation is not quite as rosy for many waiters on the same request, though to the best of my knowledge this is much less likely in practice. Here, we can observe that the concurrent waiters incur extra latency from being woken up by the solitary bottom-half, rather than directly by the interrupt. This appears to be scheduler induced (having discounted adverse effects from having a rbtree walk/erase in the wakeup path), each additional wake_up_process() costs approximately 1us on big core. Another effect of performing the secondary wakeups from the first bottom-half is the incurred delay this imposes on high priority threads - rather than immediately returning to userspace and leaving the interrupt handler to wake the others. To offset the delay incurred with additional waiters on a request, we could use a hybrid scheme that did a quick read in the interrupt handler and dequeued all the completed waiters (incurring the overhead in the interrupt handler, not the best plan either as we then incur GPU submission latency) but we would still have to wake up the bottom-half every time to do the heavyweight slow read. Or we could only kick the waiters on the seqno with the same priority as the current task (i.e. in the realtime waiter scenario, only it is woken up immediately by the interrupt and simply queues the next waiter before returning to userspace, minimising its delay at the expense of the chain, and also reducing contention on its scheduler runqueue). This is effective at avoid long pauses in the interrupt handler and at avoiding the extra latency in realtime/high-priority waiters. v2: Convert from a kworker per engine into a dedicated kthread for the bottom-half. v3: Rename request members and tweak comments. v4: Use a per-engine spinlock in the breadcrumbs bottom-half. v5: Fix race in locklessly checking waiter status and kicking the task on adding a new waiter. v6: Fix deciding when to force the timer to hide missing interrupts. v7: Move the bottom-half from the kthread to the first client process. v8: Reword a few comments v9: Break the busy loop when the interrupt is unmasked or has fired. v10: Comments, unnecessary churn, better debugging from Tvrtko v11: Wake all completed waiters on removing the current bottom-half to reduce the latency of waking up a herd of clients all waiting on the same request. v12: Rearrange missed-interrupt fault injection so that it works with igt/drv_missed_irq_hang v13: Rename intel_breadcrumb and friends to intel_wait in preparation for signal handling. v14: RCU commentary, assert_spin_locked v15: Hide BUG_ON behind the compiler; report on gem_latency findings. v16: Sort seqno-groups by priority so that first-waiter has the highest task priority (and so avoid priority inversion). v17: Add waiters to post-mortem GPU hang state. v18: Return early for a completed wait after acquiring the spinlock. Avoids adding ourselves to the tree if the is already complete, and skips the awkward question of why we don't do completion wakeups for waits earlier than or equal to ourselves. v19: Prepare for init_breadcrumbs to fail. Later patches may want to allocate during init, so be prepared to propagate back the error code. Testcase: igt/gem_concurrent_blit Testcase: igt/benchmarks/gem_latency Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Dave Gordon <david.s.gordon@intel.com> Cc: "Goel, Akash" <akash.goel@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> #v18 Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-6-git-send-email-chris@chris-wilson.co.uk
2016-07-02 00:23:15 +08:00
count += intel_engine_has_waiter(engine);
return count;
}
static const char *rps_power_to_str(unsigned int power)
{
static const char * const strings[] = {
[LOW_POWER] = "low power",
[BETWEEN] = "mixed",
[HIGH_POWER] = "high power",
};
if (power >= ARRAY_SIZE(strings) || !strings[power])
return "unknown";
return strings[power];
}
static int i915_rps_boost_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 act_freq = rps->cur_freq;
struct drm_file *file;
if (intel_runtime_pm_get_if_in_use(dev_priv)) {
if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
mutex_lock(&dev_priv->pcu_lock);
act_freq = vlv_punit_read(dev_priv,
PUNIT_REG_GPU_FREQ_STS);
act_freq = (act_freq >> 8) & 0xff;
mutex_unlock(&dev_priv->pcu_lock);
} else {
act_freq = intel_get_cagf(dev_priv,
I915_READ(GEN6_RPSTAT1));
}
intel_runtime_pm_put(dev_priv);
}
seq_printf(m, "RPS enabled? %d\n", rps->enabled);
seq_printf(m, "GPU busy? %s [%d requests]\n",
yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
drm/i915: Avoid keeping waitboost active for signaling threads Once a client has requested a waitboost, we keep that waitboost active until all clients are no longer waiting. This is because we don't distinguish which waiter deserves the boost. However, with the advent of fence signaling, the signaler threads appear as waiters to the RPS interrupt handler. So instead of using a single boolean to track when to keep the waitboost active, use a counter of all outstanding waitboosted requests. At this point, I have removed all vestiges of the rate limiting on clients. Whilst this means that compositors should remain more fluid, it also means that boosts are more prevalent. See commit b29c19b64528 ("drm/i915: Boost RPS frequency for CPU stalls") for a longer discussion on the pros and cons of both approaches. A drawback of this implementation is that it requires constant request submission to keep the waitboost trimmed (as it is now cancelled when the request is completed). This will be fine for a busy system, but near idle the boosts may be kept for longer than desired (effectively tens of vblanks worstcase) and there is a reliance on rc6 instead. v2: Remove defunct rps.client_lock Reported-by: Michał Winiarski <michal.winiarski@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Michał Winiarski <michal.winiarski@intel.com> Reviewed-by: Michał Winiarski <michal.winiarski@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170628123548.9236-1-chris@chris-wilson.co.uk
2017-06-28 20:35:48 +08:00
seq_printf(m, "Boosts outstanding? %d\n",
atomic_read(&rps->num_waiters));
drm/i915: Interactive RPS mode RPS provides a feedback loop where we use the load during the previous evaluation interval to decide whether to up or down clock the GPU frequency. Our responsiveness is split into 3 regimes, a high and low plateau with the intent to keep the gpu clocked high to cover occasional stalls under high load, and low despite occasional glitches under steady low load, and inbetween. However, we run into situations like kodi where we want to stay at low power (video decoding is done efficiently inside the fixed function HW and doesn't need high clocks even for high bitrate streams), but just occasionally the pipeline is more complex than a video decode and we need a smidgen of extra GPU power to present on time. In the high power regime, we sample at sub frame intervals with a bias to upclocking, and conversely at low power we sample over a few frames worth to provide what we consider to be the right levels of responsiveness respectively. At low power, we more or less expect to be kicked out to high power at the start of a busy sequence by waitboosting. Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") whenever we missed the frame or stalled, we would immediate go full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we relaxed the waitboosting to only apply if the pipeline was deep to avoid over-committing resources for a near miss. Sadly though, a near miss is still a miss, and perceptible as jitter in the frame delivery. To try and prevent the near miss before having to resort to boosting after the fact, we use the pageflip queue as an indication that we are in an "interactive" regime and so should sample the load more frequently to provide power before the frame misses it vblank. This will make us more favorable to providing a small power increase (one or two bins) as required rather than going all the way to maximum and then having to work back down again. (We still keep the waitboosting mechanism around just in case a dramatic change in system load requires urgent uplocking, faster than we can provide in a few evaluation intervals.) v2: Reduce rps_set_interactive to a boolean parameter to avoid the confusion of what if they wanted a new power mode after pinning to a different mode (which to choose?) v3: Only reprogram RPS while the GT is awake, it will be set when we wake the GT, and while off warns about being used outside of rpm. v4: Fix deferred application of interactive mode v5: s/state/interactive/ v6: Group the mutex with its principle in a substruct Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107111 Fixes: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180731132629.3381-1-chris@chris-wilson.co.uk
2018-07-31 21:26:29 +08:00
seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
seq_printf(m, "Frequency requested %d, actual %d\n",
intel_gpu_freq(dev_priv, rps->cur_freq),
intel_gpu_freq(dev_priv, act_freq));
seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n",
intel_gpu_freq(dev_priv, rps->min_freq),
intel_gpu_freq(dev_priv, rps->min_freq_softlimit),
intel_gpu_freq(dev_priv, rps->max_freq_softlimit),
intel_gpu_freq(dev_priv, rps->max_freq));
seq_printf(m, " idle:%d, efficient:%d, boost:%d\n",
intel_gpu_freq(dev_priv, rps->idle_freq),
intel_gpu_freq(dev_priv, rps->efficient_freq),
intel_gpu_freq(dev_priv, rps->boost_freq));
mutex_lock(&dev->filelist_mutex);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct drm_i915_file_private *file_priv = file->driver_priv;
struct task_struct *task;
rcu_read_lock();
task = pid_task(file->pid, PIDTYPE_PID);
drm/i915: Avoid keeping waitboost active for signaling threads Once a client has requested a waitboost, we keep that waitboost active until all clients are no longer waiting. This is because we don't distinguish which waiter deserves the boost. However, with the advent of fence signaling, the signaler threads appear as waiters to the RPS interrupt handler. So instead of using a single boolean to track when to keep the waitboost active, use a counter of all outstanding waitboosted requests. At this point, I have removed all vestiges of the rate limiting on clients. Whilst this means that compositors should remain more fluid, it also means that boosts are more prevalent. See commit b29c19b64528 ("drm/i915: Boost RPS frequency for CPU stalls") for a longer discussion on the pros and cons of both approaches. A drawback of this implementation is that it requires constant request submission to keep the waitboost trimmed (as it is now cancelled when the request is completed). This will be fine for a busy system, but near idle the boosts may be kept for longer than desired (effectively tens of vblanks worstcase) and there is a reliance on rc6 instead. v2: Remove defunct rps.client_lock Reported-by: Michał Winiarski <michal.winiarski@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Michał Winiarski <michal.winiarski@intel.com> Reviewed-by: Michał Winiarski <michal.winiarski@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170628123548.9236-1-chris@chris-wilson.co.uk
2017-06-28 20:35:48 +08:00
seq_printf(m, "%s [%d]: %d boosts\n",
task ? task->comm : "<unknown>",
task ? task->pid : -1,
atomic_read(&file_priv->rps_client.boosts));
rcu_read_unlock();
}
drm/i915: Avoid keeping waitboost active for signaling threads Once a client has requested a waitboost, we keep that waitboost active until all clients are no longer waiting. This is because we don't distinguish which waiter deserves the boost. However, with the advent of fence signaling, the signaler threads appear as waiters to the RPS interrupt handler. So instead of using a single boolean to track when to keep the waitboost active, use a counter of all outstanding waitboosted requests. At this point, I have removed all vestiges of the rate limiting on clients. Whilst this means that compositors should remain more fluid, it also means that boosts are more prevalent. See commit b29c19b64528 ("drm/i915: Boost RPS frequency for CPU stalls") for a longer discussion on the pros and cons of both approaches. A drawback of this implementation is that it requires constant request submission to keep the waitboost trimmed (as it is now cancelled when the request is completed). This will be fine for a busy system, but near idle the boosts may be kept for longer than desired (effectively tens of vblanks worstcase) and there is a reliance on rc6 instead. v2: Remove defunct rps.client_lock Reported-by: Michał Winiarski <michal.winiarski@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Michał Winiarski <michal.winiarski@intel.com> Reviewed-by: Michał Winiarski <michal.winiarski@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170628123548.9236-1-chris@chris-wilson.co.uk
2017-06-28 20:35:48 +08:00
seq_printf(m, "Kernel (anonymous) boosts: %d\n",
atomic_read(&rps->boosts));
mutex_unlock(&dev->filelist_mutex);
if (INTEL_GEN(dev_priv) >= 6 &&
rps->enabled &&
dev_priv->gt.active_requests) {
u32 rpup, rpupei;
u32 rpdown, rpdownei;
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
rpup = I915_READ_FW(GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK;
rpupei = I915_READ_FW(GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK;
rpdown = I915_READ_FW(GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK;
rpdownei = I915_READ_FW(GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK;
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n",
drm/i915: Interactive RPS mode RPS provides a feedback loop where we use the load during the previous evaluation interval to decide whether to up or down clock the GPU frequency. Our responsiveness is split into 3 regimes, a high and low plateau with the intent to keep the gpu clocked high to cover occasional stalls under high load, and low despite occasional glitches under steady low load, and inbetween. However, we run into situations like kodi where we want to stay at low power (video decoding is done efficiently inside the fixed function HW and doesn't need high clocks even for high bitrate streams), but just occasionally the pipeline is more complex than a video decode and we need a smidgen of extra GPU power to present on time. In the high power regime, we sample at sub frame intervals with a bias to upclocking, and conversely at low power we sample over a few frames worth to provide what we consider to be the right levels of responsiveness respectively. At low power, we more or less expect to be kicked out to high power at the start of a busy sequence by waitboosting. Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") whenever we missed the frame or stalled, we would immediate go full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we relaxed the waitboosting to only apply if the pipeline was deep to avoid over-committing resources for a near miss. Sadly though, a near miss is still a miss, and perceptible as jitter in the frame delivery. To try and prevent the near miss before having to resort to boosting after the fact, we use the pageflip queue as an indication that we are in an "interactive" regime and so should sample the load more frequently to provide power before the frame misses it vblank. This will make us more favorable to providing a small power increase (one or two bins) as required rather than going all the way to maximum and then having to work back down again. (We still keep the waitboosting mechanism around just in case a dramatic change in system load requires urgent uplocking, faster than we can provide in a few evaluation intervals.) v2: Reduce rps_set_interactive to a boolean parameter to avoid the confusion of what if they wanted a new power mode after pinning to a different mode (which to choose?) v3: Only reprogram RPS while the GT is awake, it will be set when we wake the GT, and while off warns about being used outside of rpm. v4: Fix deferred application of interactive mode v5: s/state/interactive/ v6: Group the mutex with its principle in a substruct Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107111 Fixes: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180731132629.3381-1-chris@chris-wilson.co.uk
2018-07-31 21:26:29 +08:00
rps_power_to_str(rps->power.mode));
seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n",
rpup && rpupei ? 100 * rpup / rpupei : 0,
drm/i915: Interactive RPS mode RPS provides a feedback loop where we use the load during the previous evaluation interval to decide whether to up or down clock the GPU frequency. Our responsiveness is split into 3 regimes, a high and low plateau with the intent to keep the gpu clocked high to cover occasional stalls under high load, and low despite occasional glitches under steady low load, and inbetween. However, we run into situations like kodi where we want to stay at low power (video decoding is done efficiently inside the fixed function HW and doesn't need high clocks even for high bitrate streams), but just occasionally the pipeline is more complex than a video decode and we need a smidgen of extra GPU power to present on time. In the high power regime, we sample at sub frame intervals with a bias to upclocking, and conversely at low power we sample over a few frames worth to provide what we consider to be the right levels of responsiveness respectively. At low power, we more or less expect to be kicked out to high power at the start of a busy sequence by waitboosting. Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") whenever we missed the frame or stalled, we would immediate go full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we relaxed the waitboosting to only apply if the pipeline was deep to avoid over-committing resources for a near miss. Sadly though, a near miss is still a miss, and perceptible as jitter in the frame delivery. To try and prevent the near miss before having to resort to boosting after the fact, we use the pageflip queue as an indication that we are in an "interactive" regime and so should sample the load more frequently to provide power before the frame misses it vblank. This will make us more favorable to providing a small power increase (one or two bins) as required rather than going all the way to maximum and then having to work back down again. (We still keep the waitboosting mechanism around just in case a dramatic change in system load requires urgent uplocking, faster than we can provide in a few evaluation intervals.) v2: Reduce rps_set_interactive to a boolean parameter to avoid the confusion of what if they wanted a new power mode after pinning to a different mode (which to choose?) v3: Only reprogram RPS while the GT is awake, it will be set when we wake the GT, and while off warns about being used outside of rpm. v4: Fix deferred application of interactive mode v5: s/state/interactive/ v6: Group the mutex with its principle in a substruct Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107111 Fixes: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180731132629.3381-1-chris@chris-wilson.co.uk
2018-07-31 21:26:29 +08:00
rps->power.up_threshold);
seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n",
rpdown && rpdownei ? 100 * rpdown / rpdownei : 0,
drm/i915: Interactive RPS mode RPS provides a feedback loop where we use the load during the previous evaluation interval to decide whether to up or down clock the GPU frequency. Our responsiveness is split into 3 regimes, a high and low plateau with the intent to keep the gpu clocked high to cover occasional stalls under high load, and low despite occasional glitches under steady low load, and inbetween. However, we run into situations like kodi where we want to stay at low power (video decoding is done efficiently inside the fixed function HW and doesn't need high clocks even for high bitrate streams), but just occasionally the pipeline is more complex than a video decode and we need a smidgen of extra GPU power to present on time. In the high power regime, we sample at sub frame intervals with a bias to upclocking, and conversely at low power we sample over a few frames worth to provide what we consider to be the right levels of responsiveness respectively. At low power, we more or less expect to be kicked out to high power at the start of a busy sequence by waitboosting. Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") whenever we missed the frame or stalled, we would immediate go full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we relaxed the waitboosting to only apply if the pipeline was deep to avoid over-committing resources for a near miss. Sadly though, a near miss is still a miss, and perceptible as jitter in the frame delivery. To try and prevent the near miss before having to resort to boosting after the fact, we use the pageflip queue as an indication that we are in an "interactive" regime and so should sample the load more frequently to provide power before the frame misses it vblank. This will make us more favorable to providing a small power increase (one or two bins) as required rather than going all the way to maximum and then having to work back down again. (We still keep the waitboosting mechanism around just in case a dramatic change in system load requires urgent uplocking, faster than we can provide in a few evaluation intervals.) v2: Reduce rps_set_interactive to a boolean parameter to avoid the confusion of what if they wanted a new power mode after pinning to a different mode (which to choose?) v3: Only reprogram RPS while the GT is awake, it will be set when we wake the GT, and while off warns about being used outside of rpm. v4: Fix deferred application of interactive mode v5: s/state/interactive/ v6: Group the mutex with its principle in a substruct Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107111 Fixes: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180731132629.3381-1-chris@chris-wilson.co.uk
2018-07-31 21:26:29 +08:00
rps->power.down_threshold);
} else {
seq_puts(m, "\nRPS Autotuning inactive\n");
}
return 0;
}
static int i915_llc(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
const bool edram = INTEL_GEN(dev_priv) > 8;
seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(dev_priv)));
seq_printf(m, "%s: %lluMB\n", edram ? "eDRAM" : "eLLC",
intel_uncore_edram_size(dev_priv)/1024/1024);
return 0;
}
static int i915_huc_load_status_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_printer p;
if (!HAS_HUC(dev_priv))
return -ENODEV;
p = drm_seq_file_printer(m);
intel_uc_fw_dump(&dev_priv->huc.fw, &p);
intel_runtime_pm_get(dev_priv);
seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_guc_load_status_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_printer p;
u32 tmp, i;
if (!HAS_GUC(dev_priv))
return -ENODEV;
p = drm_seq_file_printer(m);
intel_uc_fw_dump(&dev_priv->guc.fw, &p);
intel_runtime_pm_get(dev_priv);
tmp = I915_READ(GUC_STATUS);
seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
seq_printf(m, "\tBootrom status = 0x%x\n",
(tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
seq_printf(m, "\tuKernel status = 0x%x\n",
(tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
seq_printf(m, "\tMIA Core status = 0x%x\n",
(tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
seq_puts(m, "\nScratch registers:\n");
for (i = 0; i < 16; i++)
seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
intel_runtime_pm_put(dev_priv);
return 0;
}
static const char *
stringify_guc_log_type(enum guc_log_buffer_type type)
{
switch (type) {
case GUC_ISR_LOG_BUFFER:
return "ISR";
case GUC_DPC_LOG_BUFFER:
return "DPC";
case GUC_CRASH_DUMP_LOG_BUFFER:
return "CRASH";
default:
MISSING_CASE(type);
}
return "";
}
static void i915_guc_log_info(struct seq_file *m,
struct drm_i915_private *dev_priv)
{
struct intel_guc_log *log = &dev_priv->guc.log;
enum guc_log_buffer_type type;
if (!intel_guc_log_relay_enabled(log)) {
seq_puts(m, "GuC log relay disabled\n");
return;
}
seq_puts(m, "GuC logging stats:\n");
seq_printf(m, "\tRelay full count: %u\n",
log->relay.full_count);
for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
seq_printf(m, "\t%s:\tflush count %10u, overflow count %10u\n",
stringify_guc_log_type(type),
log->stats[type].flush,
log->stats[type].sampled_overflow);
}
}
static void i915_guc_client_info(struct seq_file *m,
struct drm_i915_private *dev_priv,
struct intel_guc_client *client)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
uint64_t tot = 0;
seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n",
client->priority, client->stage_id, client->proc_desc_offset);
seq_printf(m, "\tDoorbell id %d, offset: 0x%lx\n",
client->doorbell_id, client->doorbell_offset);
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
for_each_engine(engine, dev_priv, id) {
u64 submissions = client->submissions[id];
tot += submissions;
seq_printf(m, "\tSubmissions: %llu %s\n",
submissions, engine->name);
}
seq_printf(m, "\tTotal: %llu\n", tot);
}
static int i915_guc_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
const struct intel_guc *guc = &dev_priv->guc;
if (!USES_GUC(dev_priv))
return -ENODEV;
i915_guc_log_info(m, dev_priv);
if (!USES_GUC_SUBMISSION(dev_priv))
return 0;
GEM_BUG_ON(!guc->execbuf_client);
seq_printf(m, "\nDoorbell map:\n");
drm/i915/guc: Sanitize GuC client initialization Started adding proper teardown to guc_client_alloc, ended up removing quite a few dead ends where errors communicating with the GuC were silently ignored. There also seemed to be quite a few erronous teardown actions performed in case of an error (ordering wrong). v2: - Increase function symmetry/proximity (Michal/Daniele) - Fix __reserve_doorbell accounting for high priority (Daniele) - Call __update_doorbell_desc! (Daniele) - Isolate __guc_{,de}allocate_doorbell (Michal/Daniele) v3: - "Select" a cacheline is a more accurate verb than "reserve" (Daniele). - We cannot update & create the doorbell without reserving it first, so move the whole doorbell creation for execbuf_client to the submission enable (Oscar).i - Add a fixme for ignoring possible doorbell destroy errors. v4: - Remove comment about is_high_priority (Daniele) - Debug message typo (Daniele) - Reuse __get_doorbell in more places (Daniele) - Do not do arithmetic on void pointers (Daniele) - Add comment to __reset_doorbell (Daniele) v5: - gccisms like arithmetic on void pointers are not frowned upon (Oscar) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
2017-03-23 01:39:44 +08:00
seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
seq_printf(m, "Doorbell next cacheline: 0x%x\n", guc->db_cacheline);
seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client);
i915_guc_client_info(m, dev_priv, guc->execbuf_client);
if (guc->preempt_client) {
seq_printf(m, "\nGuC preempt client @ %p:\n",
guc->preempt_client);
i915_guc_client_info(m, dev_priv, guc->preempt_client);
}
/* Add more as required ... */
return 0;
}
static int i915_guc_stage_pool(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
const struct intel_guc *guc = &dev_priv->guc;
struct guc_stage_desc *desc = guc->stage_desc_pool_vaddr;
struct intel_guc_client *client = guc->execbuf_client;
unsigned int tmp;
int index;
if (!USES_GUC_SUBMISSION(dev_priv))
return -ENODEV;
for (index = 0; index < GUC_MAX_STAGE_DESCRIPTORS; index++, desc++) {
struct intel_engine_cs *engine;
if (!(desc->attribute & GUC_STAGE_DESC_ATTR_ACTIVE))
continue;
seq_printf(m, "GuC stage descriptor %u:\n", index);
seq_printf(m, "\tIndex: %u\n", desc->stage_id);
seq_printf(m, "\tAttribute: 0x%x\n", desc->attribute);
seq_printf(m, "\tPriority: %d\n", desc->priority);
seq_printf(m, "\tDoorbell id: %d\n", desc->db_id);
seq_printf(m, "\tEngines used: 0x%x\n",
desc->engines_used);
seq_printf(m, "\tDoorbell trigger phy: 0x%llx, cpu: 0x%llx, uK: 0x%x\n",
desc->db_trigger_phy,
desc->db_trigger_cpu,
desc->db_trigger_uk);
seq_printf(m, "\tProcess descriptor: 0x%x\n",
desc->process_desc);
seq_printf(m, "\tWorkqueue address: 0x%x, size: 0x%x\n",
desc->wq_addr, desc->wq_size);
seq_putc(m, '\n');
for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
u32 guc_engine_id = engine->guc_id;
struct guc_execlist_context *lrc =
&desc->lrc[guc_engine_id];
seq_printf(m, "\t%s LRC:\n", engine->name);
seq_printf(m, "\t\tContext desc: 0x%x\n",
lrc->context_desc);
seq_printf(m, "\t\tContext id: 0x%x\n", lrc->context_id);
seq_printf(m, "\t\tLRCA: 0x%x\n", lrc->ring_lrca);
seq_printf(m, "\t\tRing begin: 0x%x\n", lrc->ring_begin);
seq_printf(m, "\t\tRing end: 0x%x\n", lrc->ring_end);
seq_putc(m, '\n');
}
}
return 0;
}
static int i915_guc_log_dump(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_i915_private *dev_priv = node_to_i915(node);
bool dump_load_err = !!node->info_ent->data;
struct drm_i915_gem_object *obj = NULL;
u32 *log;
int i = 0;
if (!HAS_GUC(dev_priv))
return -ENODEV;
if (dump_load_err)
obj = dev_priv->guc.load_err_log;
else if (dev_priv->guc.log.vma)
obj = dev_priv->guc.log.vma->obj;
if (!obj)
return 0;
log = i915_gem_object_pin_map(obj, I915_MAP_WC);
if (IS_ERR(log)) {
DRM_DEBUG("Failed to pin object\n");
seq_puts(m, "(log data unaccessible)\n");
return PTR_ERR(log);
}
for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
*(log + i), *(log + i + 1),
*(log + i + 2), *(log + i + 3));
seq_putc(m, '\n');
i915_gem_object_unpin_map(obj);
return 0;
}
static int i915_guc_log_level_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
if (!USES_GUC(dev_priv))
return -ENODEV;
*val = intel_guc_log_get_level(&dev_priv->guc.log);
return 0;
}
static int i915_guc_log_level_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
if (!USES_GUC(dev_priv))
return -ENODEV;
return intel_guc_log_set_level(&dev_priv->guc.log, val);
}
DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_level_fops,
i915_guc_log_level_get, i915_guc_log_level_set,
"%lld\n");
static int i915_guc_log_relay_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *dev_priv = inode->i_private;
if (!USES_GUC(dev_priv))
return -ENODEV;
file->private_data = &dev_priv->guc.log;
return intel_guc_log_relay_open(&dev_priv->guc.log);
}
static ssize_t
i915_guc_log_relay_write(struct file *filp,
const char __user *ubuf,
size_t cnt,
loff_t *ppos)
{
struct intel_guc_log *log = filp->private_data;
intel_guc_log_relay_flush(log);
return cnt;
}
static int i915_guc_log_relay_release(struct inode *inode, struct file *file)
{
struct drm_i915_private *dev_priv = inode->i_private;
intel_guc_log_relay_close(&dev_priv->guc.log);
return 0;
}
static const struct file_operations i915_guc_log_relay_fops = {
.owner = THIS_MODULE,
.open = i915_guc_log_relay_open,
.write = i915_guc_log_relay_write,
.release = i915_guc_log_relay_release,
};
static int i915_psr_sink_status_show(struct seq_file *m, void *data)
{
u8 val;
static const char * const sink_status[] = {
"inactive",
"transition to active, capture and display",
"active, display from RFB",
"active, capture and display on sink device timings",
"transition to inactive, capture and display, timing re-sync",
"reserved",
"reserved",
"sink internal error",
};
struct drm_connector *connector = m->private;
struct drm_i915_private *dev_priv = to_i915(connector->dev);
struct intel_dp *intel_dp =
enc_to_intel_dp(&intel_attached_encoder(connector)->base);
int ret;
if (!CAN_PSR(dev_priv)) {
seq_puts(m, "PSR Unsupported\n");
return -ENODEV;
}
if (connector->status != connector_status_connected)
return -ENODEV;
ret = drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_STATUS, &val);
if (ret == 1) {
const char *str = "unknown";
val &= DP_PSR_SINK_STATE_MASK;
if (val < ARRAY_SIZE(sink_status))
str = sink_status[val];
seq_printf(m, "Sink PSR status: 0x%x [%s]\n", val, str);
} else {
return ret;
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(i915_psr_sink_status);
static void
psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m)
{
u32 val, psr_status;
if (dev_priv->psr.psr2_enabled) {
static const char * const live_status[] = {
"IDLE",
"CAPTURE",
"CAPTURE_FS",
"SLEEP",
"BUFON_FW",
"ML_UP",
"SU_STANDBY",
"FAST_SLEEP",
"DEEP_SLEEP",
"BUF_ON",
"TG_ON"
};
psr_status = I915_READ(EDP_PSR2_STATUS);
val = (psr_status & EDP_PSR2_STATUS_STATE_MASK) >>
EDP_PSR2_STATUS_STATE_SHIFT;
if (val < ARRAY_SIZE(live_status)) {
seq_printf(m, "Source PSR status: 0x%x [%s]\n",
psr_status, live_status[val]);
return;
}
} else {
static const char * const live_status[] = {
"IDLE",
"SRDONACK",
"SRDENT",
"BUFOFF",
"BUFON",
"AUXACK",
"SRDOFFACK",
"SRDENT_ON",
};
psr_status = I915_READ(EDP_PSR_STATUS);
val = (psr_status & EDP_PSR_STATUS_STATE_MASK) >>
EDP_PSR_STATUS_STATE_SHIFT;
if (val < ARRAY_SIZE(live_status)) {
seq_printf(m, "Source PSR status: 0x%x [%s]\n",
psr_status, live_status[val]);
return;
}
}
seq_printf(m, "Source PSR status: 0x%x [%s]\n", psr_status, "unknown");
}
static int i915_edp_psr_status(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
u32 psrperf = 0;
bool enabled = false;
bool sink_support;
if (!HAS_PSR(dev_priv))
return -ENODEV;
sink_support = dev_priv->psr.sink_support;
seq_printf(m, "Sink_Support: %s\n", yesno(sink_support));
if (!sink_support)
return 0;
intel_runtime_pm_get(dev_priv);
mutex_lock(&dev_priv->psr.lock);
seq_printf(m, "PSR mode: %s\n",
dev_priv->psr.psr2_enabled ? "PSR2" : "PSR1");
drm/i915: Allow control of PSR at runtime through debugfs, v6 Currently tests modify i915.enable_psr and then do a modeset cycle to change PSR. We can write a value to i915_edp_psr_debug to force a certain PSR mode without a modeset. To retain compatibility with older userspace, we also still allow the override through the module parameter, and add some tracking to check whether a debugfs mode is specified. Changes since v1: - Rename dev_priv->psr.enabled to .dp, and .hw_configured to .enabled. - Fix i915_psr_debugfs_mode to match the writes to debugfs. - Rename __i915_edp_psr_write to intel_psr_set_debugfs_mode, simplify it and move it to intel_psr.c. This keeps all internals in intel_psr.c - Perform an interruptible wait for hw completion outside of the psr lock, instead of being forced to trywait and return -EBUSY. Changes since v2: - Rebase on top of intel_psr changes. Changes since v3: - Assign psr.dp during init. (dhnkrn) - Add prepared bool, which should be used instead of relying on psr.dp. (dhnkrn) - Fix -EDEADLK handling in debugfs. (dhnkrn) - Clean up waiting for idle in intel_psr_set_debugfs_mode. - Print PSR mode when trying to enable PSR. (dhnkrn) - Move changing psr debug setting to i915_edp_psr_debug_set. (dhnkrn) Changes since v4: - Return error in _set() function. - Change flag values to make them easier to remember. (dhnkrn) - Only assign psr.dp once. (dhnkrn) - Only set crtc_state->has_psr on the crtc with psr.dp. - Fix typo. (dhnkrn) Changes since v5: - Only wait for PSR idle on the PSR connector correctly. (dhnkrn) - Reinstate WARN_ON(drrs.dp) in intel_psr_enable. (dhnkrn) - Remove stray comment. (dhnkrn) - Be silent in intel_psr_compute_config on wrong connector. (dhnkrn) Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180809142101.26155-1-maarten.lankhorst@linux.intel.com Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
2018-08-09 22:21:01 +08:00
seq_printf(m, "Enabled: %s\n", yesno(dev_priv->psr.enabled));
seq_printf(m, "Busy frontbuffer bits: 0x%03x\n",
dev_priv->psr.busy_frontbuffer_bits);
if (dev_priv->psr.psr2_enabled)
enabled = I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE;
else
enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE;
seq_printf(m, "Main link in standby mode: %s\n",
yesno(dev_priv->psr.link_standby));
seq_printf(m, "HW Enabled & Active bit: %s\n", yesno(enabled));
/*
* SKL+ Perf counter is reset to 0 everytime DC state is entered
*/
if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
psrperf = I915_READ(EDP_PSR_PERF_CNT) &
EDP_PSR_PERF_CNT_MASK;
seq_printf(m, "Performance_Counter: %u\n", psrperf);
}
psr_source_status(dev_priv, m);
mutex_unlock(&dev_priv->psr.lock);
if (READ_ONCE(dev_priv->psr.debug) & I915_PSR_DEBUG_IRQ) {
seq_printf(m, "Last attempted entry at: %lld\n",
dev_priv->psr.last_entry_attempt);
seq_printf(m, "Last exit at: %lld\n",
dev_priv->psr.last_exit);
}
intel_runtime_pm_put(dev_priv);
return 0;
}
static int
i915_edp_psr_debug_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
drm/i915: Allow control of PSR at runtime through debugfs, v6 Currently tests modify i915.enable_psr and then do a modeset cycle to change PSR. We can write a value to i915_edp_psr_debug to force a certain PSR mode without a modeset. To retain compatibility with older userspace, we also still allow the override through the module parameter, and add some tracking to check whether a debugfs mode is specified. Changes since v1: - Rename dev_priv->psr.enabled to .dp, and .hw_configured to .enabled. - Fix i915_psr_debugfs_mode to match the writes to debugfs. - Rename __i915_edp_psr_write to intel_psr_set_debugfs_mode, simplify it and move it to intel_psr.c. This keeps all internals in intel_psr.c - Perform an interruptible wait for hw completion outside of the psr lock, instead of being forced to trywait and return -EBUSY. Changes since v2: - Rebase on top of intel_psr changes. Changes since v3: - Assign psr.dp during init. (dhnkrn) - Add prepared bool, which should be used instead of relying on psr.dp. (dhnkrn) - Fix -EDEADLK handling in debugfs. (dhnkrn) - Clean up waiting for idle in intel_psr_set_debugfs_mode. - Print PSR mode when trying to enable PSR. (dhnkrn) - Move changing psr debug setting to i915_edp_psr_debug_set. (dhnkrn) Changes since v4: - Return error in _set() function. - Change flag values to make them easier to remember. (dhnkrn) - Only assign psr.dp once. (dhnkrn) - Only set crtc_state->has_psr on the crtc with psr.dp. - Fix typo. (dhnkrn) Changes since v5: - Only wait for PSR idle on the PSR connector correctly. (dhnkrn) - Reinstate WARN_ON(drrs.dp) in intel_psr_enable. (dhnkrn) - Remove stray comment. (dhnkrn) - Be silent in intel_psr_compute_config on wrong connector. (dhnkrn) Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180809142101.26155-1-maarten.lankhorst@linux.intel.com Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
2018-08-09 22:21:01 +08:00
struct drm_modeset_acquire_ctx ctx;
int ret;
if (!CAN_PSR(dev_priv))
return -ENODEV;
drm/i915: Allow control of PSR at runtime through debugfs, v6 Currently tests modify i915.enable_psr and then do a modeset cycle to change PSR. We can write a value to i915_edp_psr_debug to force a certain PSR mode without a modeset. To retain compatibility with older userspace, we also still allow the override through the module parameter, and add some tracking to check whether a debugfs mode is specified. Changes since v1: - Rename dev_priv->psr.enabled to .dp, and .hw_configured to .enabled. - Fix i915_psr_debugfs_mode to match the writes to debugfs. - Rename __i915_edp_psr_write to intel_psr_set_debugfs_mode, simplify it and move it to intel_psr.c. This keeps all internals in intel_psr.c - Perform an interruptible wait for hw completion outside of the psr lock, instead of being forced to trywait and return -EBUSY. Changes since v2: - Rebase on top of intel_psr changes. Changes since v3: - Assign psr.dp during init. (dhnkrn) - Add prepared bool, which should be used instead of relying on psr.dp. (dhnkrn) - Fix -EDEADLK handling in debugfs. (dhnkrn) - Clean up waiting for idle in intel_psr_set_debugfs_mode. - Print PSR mode when trying to enable PSR. (dhnkrn) - Move changing psr debug setting to i915_edp_psr_debug_set. (dhnkrn) Changes since v4: - Return error in _set() function. - Change flag values to make them easier to remember. (dhnkrn) - Only assign psr.dp once. (dhnkrn) - Only set crtc_state->has_psr on the crtc with psr.dp. - Fix typo. (dhnkrn) Changes since v5: - Only wait for PSR idle on the PSR connector correctly. (dhnkrn) - Reinstate WARN_ON(drrs.dp) in intel_psr_enable. (dhnkrn) - Remove stray comment. (dhnkrn) - Be silent in intel_psr_compute_config on wrong connector. (dhnkrn) Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180809142101.26155-1-maarten.lankhorst@linux.intel.com Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
2018-08-09 22:21:01 +08:00
DRM_DEBUG_KMS("Setting PSR debug to %llx\n", val);
intel_runtime_pm_get(dev_priv);
drm/i915: Allow control of PSR at runtime through debugfs, v6 Currently tests modify i915.enable_psr and then do a modeset cycle to change PSR. We can write a value to i915_edp_psr_debug to force a certain PSR mode without a modeset. To retain compatibility with older userspace, we also still allow the override through the module parameter, and add some tracking to check whether a debugfs mode is specified. Changes since v1: - Rename dev_priv->psr.enabled to .dp, and .hw_configured to .enabled. - Fix i915_psr_debugfs_mode to match the writes to debugfs. - Rename __i915_edp_psr_write to intel_psr_set_debugfs_mode, simplify it and move it to intel_psr.c. This keeps all internals in intel_psr.c - Perform an interruptible wait for hw completion outside of the psr lock, instead of being forced to trywait and return -EBUSY. Changes since v2: - Rebase on top of intel_psr changes. Changes since v3: - Assign psr.dp during init. (dhnkrn) - Add prepared bool, which should be used instead of relying on psr.dp. (dhnkrn) - Fix -EDEADLK handling in debugfs. (dhnkrn) - Clean up waiting for idle in intel_psr_set_debugfs_mode. - Print PSR mode when trying to enable PSR. (dhnkrn) - Move changing psr debug setting to i915_edp_psr_debug_set. (dhnkrn) Changes since v4: - Return error in _set() function. - Change flag values to make them easier to remember. (dhnkrn) - Only assign psr.dp once. (dhnkrn) - Only set crtc_state->has_psr on the crtc with psr.dp. - Fix typo. (dhnkrn) Changes since v5: - Only wait for PSR idle on the PSR connector correctly. (dhnkrn) - Reinstate WARN_ON(drrs.dp) in intel_psr_enable. (dhnkrn) - Remove stray comment. (dhnkrn) - Be silent in intel_psr_compute_config on wrong connector. (dhnkrn) Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180809142101.26155-1-maarten.lankhorst@linux.intel.com Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
2018-08-09 22:21:01 +08:00
drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
retry:
ret = intel_psr_set_debugfs_mode(dev_priv, &ctx, val);
if (ret == -EDEADLK) {
ret = drm_modeset_backoff(&ctx);
if (!ret)
goto retry;
}
drm_modeset_drop_locks(&ctx);
drm_modeset_acquire_fini(&ctx);
intel_runtime_pm_put(dev_priv);
drm/i915: Allow control of PSR at runtime through debugfs, v6 Currently tests modify i915.enable_psr and then do a modeset cycle to change PSR. We can write a value to i915_edp_psr_debug to force a certain PSR mode without a modeset. To retain compatibility with older userspace, we also still allow the override through the module parameter, and add some tracking to check whether a debugfs mode is specified. Changes since v1: - Rename dev_priv->psr.enabled to .dp, and .hw_configured to .enabled. - Fix i915_psr_debugfs_mode to match the writes to debugfs. - Rename __i915_edp_psr_write to intel_psr_set_debugfs_mode, simplify it and move it to intel_psr.c. This keeps all internals in intel_psr.c - Perform an interruptible wait for hw completion outside of the psr lock, instead of being forced to trywait and return -EBUSY. Changes since v2: - Rebase on top of intel_psr changes. Changes since v3: - Assign psr.dp during init. (dhnkrn) - Add prepared bool, which should be used instead of relying on psr.dp. (dhnkrn) - Fix -EDEADLK handling in debugfs. (dhnkrn) - Clean up waiting for idle in intel_psr_set_debugfs_mode. - Print PSR mode when trying to enable PSR. (dhnkrn) - Move changing psr debug setting to i915_edp_psr_debug_set. (dhnkrn) Changes since v4: - Return error in _set() function. - Change flag values to make them easier to remember. (dhnkrn) - Only assign psr.dp once. (dhnkrn) - Only set crtc_state->has_psr on the crtc with psr.dp. - Fix typo. (dhnkrn) Changes since v5: - Only wait for PSR idle on the PSR connector correctly. (dhnkrn) - Reinstate WARN_ON(drrs.dp) in intel_psr_enable. (dhnkrn) - Remove stray comment. (dhnkrn) - Be silent in intel_psr_compute_config on wrong connector. (dhnkrn) Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180809142101.26155-1-maarten.lankhorst@linux.intel.com Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
2018-08-09 22:21:01 +08:00
return ret;
}
static int
i915_edp_psr_debug_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
if (!CAN_PSR(dev_priv))
return -ENODEV;
*val = READ_ONCE(dev_priv->psr.debug);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_edp_psr_debug_fops,
i915_edp_psr_debug_get, i915_edp_psr_debug_set,
"%llu\n");
static int i915_energy_uJ(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
drm/i915: Handle msr read failure gracefully When reading the i915_energy_uJ debugfs file, it tries to fetch MSR_RAPL_POWER_UNIT, which might not be available, like in a vm environment, causing the exception shown below. We can easily prevent it by doing a rdmsrl_safe read instead, which will handle the exception, allowing us to abort the debugfs file read. This was caught by the new igt@debugfs_test@read_all_entries testcase in the CI. unchecked MSR access error: RDMSR from 0x606 at rIP:0xffffffffa0078f66 (i915_energy_uJ+0x36/0xb0 [i915]) Call Trace: seq_read+0xdc/0x3a0 full_proxy_read+0x4f/0x70 __vfs_read+0x23/0x120 ? putname+0x4f/0x60 ? rcu_read_lock_sched_held+0x75/0x80 ? entry_SYSCALL_64_fastpath+0x5/0xb1 vfs_read+0xa0/0x150 SyS_read+0x44/0xb0 entry_SYSCALL_64_fastpath+0x1c/0xb1 RIP: 0033:0x7f1f5e9f4500 RSP: 002b:00007ffc77e65cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: ffffffff8146e003 RCX: 00007f1f5e9f4500 RDX: 0000000000000200 RSI: 00007ffc77e65d10 RDI: 0000000000000006 RBP: ffffc900007abf88 R08: 0000000001eaff20 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000006 R14: 0000000000000005 R15: 0000000001eb94db ? __this_cpu_preempt_check+0x13/0x20 v2: - Drop unsigned long long cast and improve calculation (Chris) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101901 Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/87o9s7zrx3.fsf@dilma.collabora.co.uk Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2017-07-26 13:30:16 +08:00
unsigned long long power;
u32 units;
if (INTEL_GEN(dev_priv) < 6)
return -ENODEV;
intel_runtime_pm_get(dev_priv);
drm/i915: Handle msr read failure gracefully When reading the i915_energy_uJ debugfs file, it tries to fetch MSR_RAPL_POWER_UNIT, which might not be available, like in a vm environment, causing the exception shown below. We can easily prevent it by doing a rdmsrl_safe read instead, which will handle the exception, allowing us to abort the debugfs file read. This was caught by the new igt@debugfs_test@read_all_entries testcase in the CI. unchecked MSR access error: RDMSR from 0x606 at rIP:0xffffffffa0078f66 (i915_energy_uJ+0x36/0xb0 [i915]) Call Trace: seq_read+0xdc/0x3a0 full_proxy_read+0x4f/0x70 __vfs_read+0x23/0x120 ? putname+0x4f/0x60 ? rcu_read_lock_sched_held+0x75/0x80 ? entry_SYSCALL_64_fastpath+0x5/0xb1 vfs_read+0xa0/0x150 SyS_read+0x44/0xb0 entry_SYSCALL_64_fastpath+0x1c/0xb1 RIP: 0033:0x7f1f5e9f4500 RSP: 002b:00007ffc77e65cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: ffffffff8146e003 RCX: 00007f1f5e9f4500 RDX: 0000000000000200 RSI: 00007ffc77e65d10 RDI: 0000000000000006 RBP: ffffc900007abf88 R08: 0000000001eaff20 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000006 R14: 0000000000000005 R15: 0000000001eb94db ? __this_cpu_preempt_check+0x13/0x20 v2: - Drop unsigned long long cast and improve calculation (Chris) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101901 Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/87o9s7zrx3.fsf@dilma.collabora.co.uk Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2017-07-26 13:30:16 +08:00
if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &power)) {
intel_runtime_pm_put(dev_priv);
return -ENODEV;
}
units = (power & 0x1f00) >> 8;
power = I915_READ(MCH_SECP_NRG_STTS);
drm/i915: Handle msr read failure gracefully When reading the i915_energy_uJ debugfs file, it tries to fetch MSR_RAPL_POWER_UNIT, which might not be available, like in a vm environment, causing the exception shown below. We can easily prevent it by doing a rdmsrl_safe read instead, which will handle the exception, allowing us to abort the debugfs file read. This was caught by the new igt@debugfs_test@read_all_entries testcase in the CI. unchecked MSR access error: RDMSR from 0x606 at rIP:0xffffffffa0078f66 (i915_energy_uJ+0x36/0xb0 [i915]) Call Trace: seq_read+0xdc/0x3a0 full_proxy_read+0x4f/0x70 __vfs_read+0x23/0x120 ? putname+0x4f/0x60 ? rcu_read_lock_sched_held+0x75/0x80 ? entry_SYSCALL_64_fastpath+0x5/0xb1 vfs_read+0xa0/0x150 SyS_read+0x44/0xb0 entry_SYSCALL_64_fastpath+0x1c/0xb1 RIP: 0033:0x7f1f5e9f4500 RSP: 002b:00007ffc77e65cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: ffffffff8146e003 RCX: 00007f1f5e9f4500 RDX: 0000000000000200 RSI: 00007ffc77e65d10 RDI: 0000000000000006 RBP: ffffc900007abf88 R08: 0000000001eaff20 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000006 R14: 0000000000000005 R15: 0000000001eb94db ? __this_cpu_preempt_check+0x13/0x20 v2: - Drop unsigned long long cast and improve calculation (Chris) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101901 Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/87o9s7zrx3.fsf@dilma.collabora.co.uk Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2017-07-26 13:30:16 +08:00
power = (1000000 * power) >> units; /* convert to uJ */
intel_runtime_pm_put(dev_priv);
drm/i915: Handle msr read failure gracefully When reading the i915_energy_uJ debugfs file, it tries to fetch MSR_RAPL_POWER_UNIT, which might not be available, like in a vm environment, causing the exception shown below. We can easily prevent it by doing a rdmsrl_safe read instead, which will handle the exception, allowing us to abort the debugfs file read. This was caught by the new igt@debugfs_test@read_all_entries testcase in the CI. unchecked MSR access error: RDMSR from 0x606 at rIP:0xffffffffa0078f66 (i915_energy_uJ+0x36/0xb0 [i915]) Call Trace: seq_read+0xdc/0x3a0 full_proxy_read+0x4f/0x70 __vfs_read+0x23/0x120 ? putname+0x4f/0x60 ? rcu_read_lock_sched_held+0x75/0x80 ? entry_SYSCALL_64_fastpath+0x5/0xb1 vfs_read+0xa0/0x150 SyS_read+0x44/0xb0 entry_SYSCALL_64_fastpath+0x1c/0xb1 RIP: 0033:0x7f1f5e9f4500 RSP: 002b:00007ffc77e65cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: ffffffff8146e003 RCX: 00007f1f5e9f4500 RDX: 0000000000000200 RSI: 00007ffc77e65d10 RDI: 0000000000000006 RBP: ffffc900007abf88 R08: 0000000001eaff20 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000006 R14: 0000000000000005 R15: 0000000001eb94db ? __this_cpu_preempt_check+0x13/0x20 v2: - Drop unsigned long long cast and improve calculation (Chris) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101901 Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/87o9s7zrx3.fsf@dilma.collabora.co.uk Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2017-07-26 13:30:16 +08:00
seq_printf(m, "%llu", power);
return 0;
}
static int i915_runtime_pm_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct pci_dev *pdev = dev_priv->drm.pdev;
if (!HAS_RUNTIME_PM(dev_priv))
seq_puts(m, "Runtime power management not supported\n");
seq_printf(m, "GPU idle: %s (epoch %u)\n",
yesno(!dev_priv->gt.awake), dev_priv->gt.epoch);
seq_printf(m, "IRQs disabled: %s\n",
yesno(!intel_irqs_enabled(dev_priv)));
#ifdef CONFIG_PM
seq_printf(m, "Usage count: %d\n",
atomic_read(&dev_priv->drm.dev->power.usage_count));
#else
seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n");
#endif
seq_printf(m, "PCI device power state: %s [%d]\n",
pci_power_name(pdev->current_state),
pdev->current_state);
return 0;
}
static int i915_power_domain_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct i915_power_domains *power_domains = &dev_priv->power_domains;
int i;
mutex_lock(&power_domains->lock);
seq_printf(m, "%-25s %s\n", "Power well/domain", "Use count");
for (i = 0; i < power_domains->power_well_count; i++) {
struct i915_power_well *power_well;
enum intel_display_power_domain power_domain;
power_well = &power_domains->power_wells[i];
seq_printf(m, "%-25s %d\n", power_well->desc->name,
power_well->count);
for_each_power_domain(power_domain, power_well->desc->domains)
seq_printf(m, " %-23s %d\n",
intel_display_power_domain_str(power_domain),
power_domains->domain_use_count[power_domain]);
}
mutex_unlock(&power_domains->lock);
return 0;
}
static int i915_dmc_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct intel_csr *csr;
if (!HAS_CSR(dev_priv))
return -ENODEV;
csr = &dev_priv->csr;
intel_runtime_pm_get(dev_priv);
seq_printf(m, "fw loaded: %s\n", yesno(csr->dmc_payload != NULL));
seq_printf(m, "path: %s\n", csr->fw_path);
if (!csr->dmc_payload)
goto out;
seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version),
CSR_VERSION_MINOR(csr->version));
if (WARN_ON(INTEL_GEN(dev_priv) > 11))
goto out;
seq_printf(m, "DC3 -> DC5 count: %d\n",
I915_READ(IS_BROXTON(dev_priv) ? BXT_CSR_DC3_DC5_COUNT :
SKL_CSR_DC3_DC5_COUNT));
if (!IS_GEN9_LP(dev_priv))
seq_printf(m, "DC5 -> DC6 count: %d\n",
I915_READ(SKL_CSR_DC5_DC6_COUNT));
out:
seq_printf(m, "program base: 0x%08x\n", I915_READ(CSR_PROGRAM(0)));
seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE));
seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL));
intel_runtime_pm_put(dev_priv);
return 0;
}
static void intel_seq_print_mode(struct seq_file *m, int tabs,
struct drm_display_mode *mode)
{
int i;
for (i = 0; i < tabs; i++)
seq_putc(m, '\t');
seq_printf(m, "id %d:\"%s\" freq %d clock %d hdisp %d hss %d hse %d htot %d vdisp %d vss %d vse %d vtot %d type 0x%x flags 0x%x\n",
mode->base.id, mode->name,
mode->vrefresh, mode->clock,
mode->hdisplay, mode->hsync_start,
mode->hsync_end, mode->htotal,
mode->vdisplay, mode->vsync_start,
mode->vsync_end, mode->vtotal,
mode->type, mode->flags);
}
static void intel_encoder_info(struct seq_file *m,
struct intel_crtc *intel_crtc,
struct intel_encoder *intel_encoder)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct drm_crtc *crtc = &intel_crtc->base;
struct intel_connector *intel_connector;
struct drm_encoder *encoder;
encoder = &intel_encoder->base;
seq_printf(m, "\tencoder %d: type: %s, connectors:\n",
encoder->base.id, encoder->name);
for_each_connector_on_encoder(dev, encoder, intel_connector) {
struct drm_connector *connector = &intel_connector->base;
seq_printf(m, "\t\tconnector %d: type: %s, status: %s",
connector->base.id,
connector->name,
drm_get_connector_status_name(connector->status));
if (connector->status == connector_status_connected) {
struct drm_display_mode *mode = &crtc->mode;
seq_printf(m, ", mode:\n");
intel_seq_print_mode(m, 2, mode);
} else {
seq_putc(m, '\n');
}
}
}
static void intel_crtc_info(struct seq_file *m, struct intel_crtc *intel_crtc)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct drm_crtc *crtc = &intel_crtc->base;
struct intel_encoder *intel_encoder;
struct drm_plane_state *plane_state = crtc->primary->state;
struct drm_framebuffer *fb = plane_state->fb;
if (fb)
seq_printf(m, "\tfb: %d, pos: %dx%d, size: %dx%d\n",
fb->base.id, plane_state->src_x >> 16,
plane_state->src_y >> 16, fb->width, fb->height);
else
seq_puts(m, "\tprimary plane disabled\n");
for_each_encoder_on_crtc(dev, crtc, intel_encoder)
intel_encoder_info(m, intel_crtc, intel_encoder);
}
static void intel_panel_info(struct seq_file *m, struct intel_panel *panel)
{
struct drm_display_mode *mode = panel->fixed_mode;
seq_printf(m, "\tfixed mode:\n");
intel_seq_print_mode(m, 2, mode);
}
static void intel_dp_info(struct seq_file *m,
struct intel_connector *intel_connector)
{
struct intel_encoder *intel_encoder = intel_connector->encoder;
struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
seq_printf(m, "\tDPCD rev: %x\n", intel_dp->dpcd[DP_DPCD_REV]);
seq_printf(m, "\taudio support: %s\n", yesno(intel_dp->has_audio));
if (intel_connector->base.connector_type == DRM_MODE_CONNECTOR_eDP)
intel_panel_info(m, &intel_connector->panel);
drm_dp_downstream_debug(m, intel_dp->dpcd, intel_dp->downstream_ports,
&intel_dp->aux);
}
static void intel_dp_mst_info(struct seq_file *m,
struct intel_connector *intel_connector)
{
struct intel_encoder *intel_encoder = intel_connector->encoder;
struct intel_dp_mst_encoder *intel_mst =
enc_to_mst(&intel_encoder->base);
struct intel_digital_port *intel_dig_port = intel_mst->primary;
struct intel_dp *intel_dp = &intel_dig_port->dp;
bool has_audio = drm_dp_mst_port_has_audio(&intel_dp->mst_mgr,
intel_connector->port);
seq_printf(m, "\taudio support: %s\n", yesno(has_audio));
}
static void intel_hdmi_info(struct seq_file *m,
struct intel_connector *intel_connector)
{
struct intel_encoder *intel_encoder = intel_connector->encoder;
struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&intel_encoder->base);
seq_printf(m, "\taudio support: %s\n", yesno(intel_hdmi->has_audio));
}
static void intel_lvds_info(struct seq_file *m,
struct intel_connector *intel_connector)
{
intel_panel_info(m, &intel_connector->panel);
}
static void intel_connector_info(struct seq_file *m,
struct drm_connector *connector)
{
struct intel_connector *intel_connector = to_intel_connector(connector);
struct intel_encoder *intel_encoder = intel_connector->encoder;
struct drm_display_mode *mode;
seq_printf(m, "connector %d: type %s, status: %s\n",
connector->base.id, connector->name,
drm_get_connector_status_name(connector->status));
if (connector->status == connector_status_disconnected)
return;
seq_printf(m, "\tname: %s\n", connector->display_info.name);
seq_printf(m, "\tphysical dimensions: %dx%dmm\n",
connector->display_info.width_mm,
connector->display_info.height_mm);
seq_printf(m, "\tsubpixel order: %s\n",
drm_get_subpixel_order_name(connector->display_info.subpixel_order));
seq_printf(m, "\tCEA rev: %d\n", connector->display_info.cea_rev);
if (!intel_encoder)
return;
switch (connector->connector_type) {
case DRM_MODE_CONNECTOR_DisplayPort:
case DRM_MODE_CONNECTOR_eDP:
if (intel_encoder->type == INTEL_OUTPUT_DP_MST)
intel_dp_mst_info(m, intel_connector);
else
intel_dp_info(m, intel_connector);
break;
case DRM_MODE_CONNECTOR_LVDS:
if (intel_encoder->type == INTEL_OUTPUT_LVDS)
intel_lvds_info(m, intel_connector);
break;
case DRM_MODE_CONNECTOR_HDMIA:
if (intel_encoder->type == INTEL_OUTPUT_HDMI ||
drm/i915: Stop frobbing with DDI encoder->type Currently the DDI encoder->type will change at runtime depending on what kind of hotplugs we've processed. That's quite bad since we can't really trust that that current value of encoder->type actually matches the type of signal we're trying to drive through it. Let's eliminate that problem by declaring that non-eDP DDI port will always have the encoder type as INTEL_OUTPUT_DDI. This means the code can no longer try to distinguish DP vs. HDMI based on encoder->type. We'll leave eDP as INTEL_OUTPUT_EDP, since it'll never change and there's a bunch of code that relies on that value to identify eDP encoders. We'll introduce a new encoder .compute_output_type() hook. This allows us to compute the full output_types before any encoder .compute_config() hooks get called, thus those hooks can rely on output_types being correct, which is useful for cloning on oldr platforms. For now we'll just look at the connector type and pick the correct mode based on that. In the future the new hook could be used to implement dynamic switching between LS and PCON modes for LSPCON. v2: Fix BXT/GLK PPS explosion with DSI/MST encoders v3: Avoid the PPS warn on pure HDMI/DVI DDI encoders by checking dp.output_reg v4: Rebase v5: Populate output_types in .get_config() rather than in the caller v5: Split out populating output_types in .get_config() (Maarten) Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171027193128.14483-3-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
2017-10-28 03:31:24 +08:00
intel_encoder->type == INTEL_OUTPUT_DDI)
intel_hdmi_info(m, intel_connector);
break;
default:
break;
}
seq_printf(m, "\tmodes:\n");
list_for_each_entry(mode, &connector->modes, head)
intel_seq_print_mode(m, 2, mode);
}
static const char *plane_type(enum drm_plane_type type)
{
switch (type) {
case DRM_PLANE_TYPE_OVERLAY:
return "OVL";
case DRM_PLANE_TYPE_PRIMARY:
return "PRI";
case DRM_PLANE_TYPE_CURSOR:
return "CUR";
/*
* Deliberately omitting default: to generate compiler warnings
* when a new drm_plane_type gets added.
*/
}
return "unknown";
}
static const char *plane_rotation(unsigned int rotation)
{
static char buf[48];
/*
* According to doc only one DRM_MODE_ROTATE_ is allowed but this
* will print them all to visualize if the values are misused
*/
snprintf(buf, sizeof(buf),
"%s%s%s%s%s%s(0x%08x)",
(rotation & DRM_MODE_ROTATE_0) ? "0 " : "",
(rotation & DRM_MODE_ROTATE_90) ? "90 " : "",
(rotation & DRM_MODE_ROTATE_180) ? "180 " : "",
(rotation & DRM_MODE_ROTATE_270) ? "270 " : "",
(rotation & DRM_MODE_REFLECT_X) ? "FLIPX " : "",
(rotation & DRM_MODE_REFLECT_Y) ? "FLIPY " : "",
rotation);
return buf;
}
static void intel_plane_info(struct seq_file *m, struct intel_crtc *intel_crtc)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct intel_plane *intel_plane;
for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
struct drm_plane_state *state;
struct drm_plane *plane = &intel_plane->base;
struct drm_format_name_buf format_name;
if (!plane->state) {
seq_puts(m, "plane->state is NULL!\n");
continue;
}
state = plane->state;
if (state->fb) {
drm: Nuke fb->pixel_format Replace uses of fb->pixel_format with fb->format->format. Less duplicated information is a good thing. Note that coccinelle failed to eliminate the "/* fourcc format */" comment from drm_framebuffer.h, so I had to do that part manually. @@ struct drm_framebuffer *FB; expression E; @@ drm_helper_mode_fill_fb_struct(...) { ... - FB->pixel_format = E; ... } @@ struct drm_framebuffer *FB; expression E; @@ i9xx_get_initial_plane_config(...) { ... - FB->pixel_format = E; ... } @@ struct drm_framebuffer *FB; expression E; @@ ironlake_get_initial_plane_config(...) { ... - FB->pixel_format = E; ... } @@ struct drm_framebuffer *FB; expression E; @@ skylake_get_initial_plane_config(...) { ... - FB->pixel_format = E; ... } @@ struct drm_framebuffer *a; struct drm_framebuffer b; @@ ( - a->pixel_format + a->format->format | - b.pixel_format + b.format->format ) @@ struct drm_plane_state *a; struct drm_plane_state b; @@ ( - a->fb->pixel_format + a->fb->format->format | - b.fb->pixel_format + b.fb->format->format ) @@ struct drm_crtc *CRTC; @@ ( - CRTC->primary->fb->pixel_format + CRTC->primary->fb->format->format | - CRTC->primary->state->fb->pixel_format + CRTC->primary->state->fb->format->format ) @@ struct drm_mode_set *set; @@ ( - set->fb->pixel_format + set->fb->format->format | - set->crtc->primary->fb->pixel_format + set->crtc->primary->fb->format->format ) @@ @@ struct drm_framebuffer { ... - uint32_t pixel_format; ... }; v2: Fix commit message (Laurent) Rebase due to earlier removal of many fb->pixel_format uses, including the 'fb->format = drm_format_info(fb->format->format);' snafu v3: Adjusted the semantic patch a bit and regenerated due to code changes Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1) Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Link: http://patchwork.freedesktop.org/patch/msgid/1481751175-18463-1-git-send-email-ville.syrjala@linux.intel.com
2016-12-15 05:32:55 +08:00
drm_get_format_name(state->fb->format->format,
&format_name);
} else {
sprintf(format_name.str, "N/A");
}
seq_printf(m, "\t--Plane id %d: type=%s, crtc_pos=%4dx%4d, crtc_size=%4dx%4d, src_pos=%d.%04ux%d.%04u, src_size=%d.%04ux%d.%04u, format=%s, rotation=%s\n",
plane->base.id,
plane_type(intel_plane->base.type),
state->crtc_x, state->crtc_y,
state->crtc_w, state->crtc_h,
(state->src_x >> 16),
((state->src_x & 0xffff) * 15625) >> 10,
(state->src_y >> 16),
((state->src_y & 0xffff) * 15625) >> 10,
(state->src_w >> 16),
((state->src_w & 0xffff) * 15625) >> 10,
(state->src_h >> 16),
((state->src_h & 0xffff) * 15625) >> 10,
format_name.str,
plane_rotation(state->rotation));
}
}
static void intel_scaler_info(struct seq_file *m, struct intel_crtc *intel_crtc)
{
struct intel_crtc_state *pipe_config;
int num_scalers = intel_crtc->num_scalers;
int i;
pipe_config = to_intel_crtc_state(intel_crtc->base.state);
/* Not all platformas have a scaler */
if (num_scalers) {
seq_printf(m, "\tnum_scalers=%d, scaler_users=%x scaler_id=%d",
num_scalers,
pipe_config->scaler_state.scaler_users,
pipe_config->scaler_state.scaler_id);
for (i = 0; i < num_scalers; i++) {
struct intel_scaler *sc =
&pipe_config->scaler_state.scalers[i];
seq_printf(m, ", scalers[%d]: use=%s, mode=%x",
i, yesno(sc->in_use), sc->mode);
}
seq_puts(m, "\n");
} else {
seq_puts(m, "\tNo scalers available on this platform\n");
}
}
static int i915_display_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct intel_crtc *crtc;
struct drm_connector *connector;
struct drm_connector_list_iter conn_iter;
intel_runtime_pm_get(dev_priv);
seq_printf(m, "CRTC info\n");
seq_printf(m, "---------\n");
for_each_intel_crtc(dev, crtc) {
struct intel_crtc_state *pipe_config;
drm_modeset_lock(&crtc->base.mutex, NULL);
pipe_config = to_intel_crtc_state(crtc->base.state);
seq_printf(m, "CRTC %d: pipe: %c, active=%s, (size=%dx%d), dither=%s, bpp=%d\n",
crtc->base.base.id, pipe_name(crtc->pipe),
yesno(pipe_config->base.active),
pipe_config->pipe_src_w, pipe_config->pipe_src_h,
yesno(pipe_config->dither), pipe_config->pipe_bpp);
if (pipe_config->base.active) {
struct intel_plane *cursor =
to_intel_plane(crtc->base.cursor);
intel_crtc_info(m, crtc);
seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x\n",
yesno(cursor->base.state->visible),
cursor->base.state->crtc_x,
cursor->base.state->crtc_y,
cursor->base.state->crtc_w,
cursor->base.state->crtc_h,
cursor->cursor.base);
intel_scaler_info(m, crtc);
intel_plane_info(m, crtc);
}
seq_printf(m, "\tunderrun reporting: cpu=%s pch=%s \n",
yesno(!crtc->cpu_fifo_underrun_disabled),
yesno(!crtc->pch_fifo_underrun_disabled));
drm_modeset_unlock(&crtc->base.mutex);
}
seq_printf(m, "\n");
seq_printf(m, "Connector info\n");
seq_printf(m, "--------------\n");
mutex_lock(&dev->mode_config.mutex);
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter)
intel_connector_info(m, connector);
drm_connector_list_iter_end(&conn_iter);
mutex_unlock(&dev->mode_config.mutex);
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_engine_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct intel_engine_cs *engine;
drm/i915: Allocate intel_engine_cs structure only for the enabled engines With the possibility of addition of many more number of rings in future, the drm_i915_private structure could bloat as an array, of type intel_engine_cs, is embedded inside it. struct intel_engine_cs engine[I915_NUM_ENGINES]; Though this is still fine as generally there is only a single instance of drm_i915_private structure used, but not all of the possible rings would be enabled or active on most of the platforms. Some memory can be saved by allocating intel_engine_cs structure only for the enabled/active engines. Currently the engine/ring ID is kept static and dev_priv->engine[] is simply indexed using the enums defined in intel_engine_id. To save memory and continue using the static engine/ring IDs, 'engine' is defined as an array of pointers. struct intel_engine_cs *engine[I915_NUM_ENGINES]; dev_priv->engine[engine_ID] will be NULL for disabled engine instances. There is a text size reduction of 928 bytes, from 1028200 to 1027272, for i915.o file (but for i915.ko file text size remain same as 1193131 bytes). v2: - Remove the engine iterator field added in drm_i915_private structure, instead pass a local iterator variable to the for_each_engine** macros. (Chris) - Do away with intel_engine_initialized() and instead directly use the NULL pointer check on engine pointer. (Chris) v3: - Remove for_each_engine_id() macro, as the updated macro for_each_engine() can be used in place of it. (Chris) - Protect the access to Render engine Fault register with a NULL check, as engine specific init is done later in Driver load sequence. v4: - Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris) - Kill the superfluous init_engine_lists(). v5: - Cleanup the intel_engines_init() & intel_engines_setup(), with respect to allocation of intel_engine_cs structure. (Chris) v6: - Rebase. v7: - Optimize the for_each_engine_masked() macro. (Chris) - Change the type of 'iter' local variable to enum intel_engine_id. (Chris) - Rebase. v8: Rebase. v9: Rebase. v10: - For index calculation use engine ID instead of pointer based arithmetic in intel_engine_sync_index() as engine pointers are not contiguous now (Chris) - For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas) - Use for_each_engine macro for cleanup in intel_engines_init() and remove check for NULL engine pointer in cleanup() routines. (Joonas) v11: Rebase. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Akash Goel <akash.goel@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 01:14:48 +08:00
enum intel_engine_id id;
struct drm_printer p;
intel_runtime_pm_get(dev_priv);
seq_printf(m, "GT awake? %s (epoch %u)\n",
yesno(dev_priv->gt.awake), dev_priv->gt.epoch);
seq_printf(m, "Global active requests: %d\n",
dev_priv->gt.active_requests);
seq_printf(m, "CS timestamp frequency: %u kHz\n",
dev_priv->info.cs_timestamp_frequency_khz);
p = drm_seq_file_printer(m);
for_each_engine(engine, dev_priv, id)
intel_engine_dump(engine, &p, "%s\n", engine->name);
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_rcs_topology(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_printer p = drm_seq_file_printer(m);
intel_device_info_dump_topology(&INTEL_INFO(dev_priv)->sseu, &p);
return 0;
}
static int i915_shrinker_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *i915 = node_to_i915(m->private);
seq_printf(m, "seeks = %d\n", i915->mm.shrinker.seeks);
seq_printf(m, "batch = %lu\n", i915->mm.shrinker.batch);
return 0;
}
static int i915_shared_dplls_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
int i;
drm_modeset_lock_all(dev);
for (i = 0; i < dev_priv->num_shared_dpll; i++) {
struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i];
seq_printf(m, "DPLL%i: %s, id: %i\n", i, pll->info->name,
pll->info->id);
seq_printf(m, " crtc_mask: 0x%08x, active: 0x%x, on: %s\n",
pll->state.crtc_mask, pll->active_mask, yesno(pll->on));
seq_printf(m, " tracked hardware state:\n");
seq_printf(m, " dpll: 0x%08x\n", pll->state.hw_state.dpll);
seq_printf(m, " dpll_md: 0x%08x\n",
pll->state.hw_state.dpll_md);
seq_printf(m, " fp0: 0x%08x\n", pll->state.hw_state.fp0);
seq_printf(m, " fp1: 0x%08x\n", pll->state.hw_state.fp1);
seq_printf(m, " wrpll: 0x%08x\n", pll->state.hw_state.wrpll);
seq_printf(m, " cfgcr0: 0x%08x\n", pll->state.hw_state.cfgcr0);
seq_printf(m, " cfgcr1: 0x%08x\n", pll->state.hw_state.cfgcr1);
seq_printf(m, " mg_refclkin_ctl: 0x%08x\n",
pll->state.hw_state.mg_refclkin_ctl);
seq_printf(m, " mg_clktop2_coreclkctl1: 0x%08x\n",
pll->state.hw_state.mg_clktop2_coreclkctl1);
seq_printf(m, " mg_clktop2_hsclkctl: 0x%08x\n",
pll->state.hw_state.mg_clktop2_hsclkctl);
seq_printf(m, " mg_pll_div0: 0x%08x\n",
pll->state.hw_state.mg_pll_div0);
seq_printf(m, " mg_pll_div1: 0x%08x\n",
pll->state.hw_state.mg_pll_div1);
seq_printf(m, " mg_pll_lf: 0x%08x\n",
pll->state.hw_state.mg_pll_lf);
seq_printf(m, " mg_pll_frac_lock: 0x%08x\n",
pll->state.hw_state.mg_pll_frac_lock);
seq_printf(m, " mg_pll_ssc: 0x%08x\n",
pll->state.hw_state.mg_pll_ssc);
seq_printf(m, " mg_pll_bias: 0x%08x\n",
pll->state.hw_state.mg_pll_bias);
seq_printf(m, " mg_pll_tdc_coldst_bias: 0x%08x\n",
pll->state.hw_state.mg_pll_tdc_coldst_bias);
}
drm_modeset_unlock_all(dev);
return 0;
}
static int i915_wa_registers(struct seq_file *m, void *unused)
{
drm/i915: Keep the ctx workarounds tightly packed For each platform, we have a few registers that are rewritten with different values -- they are not part of a sequence, just different parts of a masked register set at different times (e.g. platform and gen workarounds). Consolidate these into a single register write to keep the table compact, important since we are running of room in the current fixed sized buffer. While adjusting the construction of the wa table, make it non fatal so that the driver still loads but keeping the warning and extra details for inspection. Inspecting the changes for a Kabylake system, Before: Address val mask read 0x07014 0x20002000 0x00002000 0x00002100 0x0E194 0x01000100 0x00000100 0x00000114 0x0E4F0 0x81008100 0x00008100 0xFFFF8120 0x0E184 0x00200020 0x00000020 0x00000022 0x0E194 0x00140014 0x00000014 0x00000114 0x07004 0x00420042 0x00000042 0x000029C2 0x0E188 0x00080000 0x00000008 0x00008030 0x07300 0x80208020 0x00008020 0x00008830 0x07300 0x00100010 0x00000010 0x00008830 0x0E184 0x00020002 0x00000002 0x00000022 0x0E180 0x20002000 0x00002000 0x00002000 0x02580 0x00010000 0x00000001 0x00000004 0x02580 0x00060004 0x00000006 0x00000004 0x07014 0x01000100 0x00000100 0x00002100 0x0E100 0x00100010 0x00000010 0x00008050 After: Address val mask read 0x02580 0x00070004 0x00000007 0x00000004 0x07004 0x00420042 0x00000042 0x000029C2 0x07014 0x21002100 0x00002100 0x00002100 0x07300 0x80308030 0x00008030 0x00008830 0x0E100 0x00100010 0x00000010 0x00008050 0x0E180 0x20002000 0x00002000 0x00002000 0x0E184 0x00220022 0x00000022 0x00000022 0x0E188 0x00080000 0x00000008 0x00008030 0x0E194 0x01140114 0x00000114 0x00000114 0x0E4F0 0x81008100 0x00008100 0xFFFF8120 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Oscar Mateo <oscar.mateo@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180615120207.13952-1-chris@chris-wilson.co.uk
2018-06-15 20:02:07 +08:00
struct i915_workarounds *wa = &node_to_i915(m->private)->workarounds;
int i;
drm/i915: Keep the ctx workarounds tightly packed For each platform, we have a few registers that are rewritten with different values -- they are not part of a sequence, just different parts of a masked register set at different times (e.g. platform and gen workarounds). Consolidate these into a single register write to keep the table compact, important since we are running of room in the current fixed sized buffer. While adjusting the construction of the wa table, make it non fatal so that the driver still loads but keeping the warning and extra details for inspection. Inspecting the changes for a Kabylake system, Before: Address val mask read 0x07014 0x20002000 0x00002000 0x00002100 0x0E194 0x01000100 0x00000100 0x00000114 0x0E4F0 0x81008100 0x00008100 0xFFFF8120 0x0E184 0x00200020 0x00000020 0x00000022 0x0E194 0x00140014 0x00000014 0x00000114 0x07004 0x00420042 0x00000042 0x000029C2 0x0E188 0x00080000 0x00000008 0x00008030 0x07300 0x80208020 0x00008020 0x00008830 0x07300 0x00100010 0x00000010 0x00008830 0x0E184 0x00020002 0x00000002 0x00000022 0x0E180 0x20002000 0x00002000 0x00002000 0x02580 0x00010000 0x00000001 0x00000004 0x02580 0x00060004 0x00000006 0x00000004 0x07014 0x01000100 0x00000100 0x00002100 0x0E100 0x00100010 0x00000010 0x00008050 After: Address val mask read 0x02580 0x00070004 0x00000007 0x00000004 0x07004 0x00420042 0x00000042 0x000029C2 0x07014 0x21002100 0x00002100 0x00002100 0x07300 0x80308030 0x00008030 0x00008830 0x0E100 0x00100010 0x00000010 0x00008050 0x0E180 0x20002000 0x00002000 0x00002000 0x0E184 0x00220022 0x00000022 0x00000022 0x0E188 0x00080000 0x00000008 0x00008030 0x0E194 0x01140114 0x00000114 0x00000114 0x0E4F0 0x81008100 0x00008100 0xFFFF8120 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Oscar Mateo <oscar.mateo@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180615120207.13952-1-chris@chris-wilson.co.uk
2018-06-15 20:02:07 +08:00
seq_printf(m, "Workarounds applied: %d\n", wa->count);
for (i = 0; i < wa->count; ++i)
seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
wa->reg[i].addr, wa->reg[i].value, wa->reg[i].mask);
return 0;
}
static int i915_ipc_status_show(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = m->private;
seq_printf(m, "Isochronous Priority Control: %s\n",
yesno(dev_priv->ipc_enabled));
return 0;
}
static int i915_ipc_status_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *dev_priv = inode->i_private;
if (!HAS_IPC(dev_priv))
return -ENODEV;
return single_open(file, i915_ipc_status_show, dev_priv);
}
static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_i915_private *dev_priv = m->private;
int ret;
bool enable;
ret = kstrtobool_from_user(ubuf, len, &enable);
if (ret < 0)
return ret;
intel_runtime_pm_get(dev_priv);
if (!dev_priv->ipc_enabled && enable)
DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
dev_priv->wm.distrust_bios_wm = true;
dev_priv->ipc_enabled = enable;
intel_enable_ipc(dev_priv);
intel_runtime_pm_put(dev_priv);
return len;
}
static const struct file_operations i915_ipc_status_fops = {
.owner = THIS_MODULE,
.open = i915_ipc_status_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = i915_ipc_status_write
};
static int i915_ddb_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct skl_ddb_allocation *ddb;
struct skl_ddb_entry *entry;
enum pipe pipe;
int plane;
if (INTEL_GEN(dev_priv) < 9)
return -ENODEV;
drm_modeset_lock_all(dev);
ddb = &dev_priv->wm.skl_hw.ddb;
seq_printf(m, "%-15s%8s%8s%8s\n", "", "Start", "End", "Size");
for_each_pipe(dev_priv, pipe) {
seq_printf(m, "Pipe %c\n", pipe_name(pipe));
for_each_universal_plane(dev_priv, pipe, plane) {
entry = &ddb->plane[pipe][plane];
seq_printf(m, " Plane%-8d%8u%8u%8u\n", plane + 1,
entry->start, entry->end,
skl_ddb_entry_size(entry));
}
drm/i915/skl: Simplify wm structures slightly (v2) A bunch of SKL watermark-related structures have the cursor plane as a separate entry from the rest of the planes. Since a previous patch updated I915_MAX_PLANES such that those plane arrays now have a slot for the cursor, update the code to use the new slot in the existing plane arrays and kill off the cursor-specific structures. There shouldn't be any functional change here; this is just shuffling around how the data is stored in some of the data structures. The whole patch is generated with Coccinelle via the following semantic patch: @@ struct skl_pipe_wm_parameters WMP; @@ - WMP.cursor + WMP.plane[PLANE_CURSOR] @@ struct skl_pipe_wm_parameters *WMP; @@ - WMP->cursor + WMP->plane[PLANE_CURSOR] @@ @@ struct skl_pipe_wm_parameters { ... - struct intel_plane_wm_parameters cursor; ... }; @@ struct skl_ddb_allocation DDB; expression E; @@ - DDB.cursor[E] + DDB.plane[E][PLANE_CURSOR] @@ struct skl_ddb_allocation *DDB; expression E; @@ - DDB->cursor[E] + DDB->plane[E][PLANE_CURSOR] @@ @@ struct skl_ddb_allocation { ... - struct skl_ddb_entry cursor[I915_MAX_PIPES]; ... }; @@ struct skl_wm_values WMV; expression E1, E2; @@ ( - WMV.cursor[E1][E2] + WMV.plane[E1][PLANE_CURSOR][E2] | - WMV.cursor_trans[E1] + WMV.plane_trans[E1][PLANE_CURSOR] ) @@ struct skl_wm_values *WMV; expression E1, E2; @@ ( - WMV->cursor[E1][E2] + WMV->plane[E1][PLANE_CURSOR][E2] | - WMV->cursor_trans[E1] + WMV->plane_trans[E1][PLANE_CURSOR] ) @@ @@ struct skl_wm_values { ... - uint32_t cursor[I915_MAX_PIPES][8]; ... - uint32_t cursor_trans[I915_MAX_PIPES]; ... }; @@ struct skl_wm_level WML; @@ ( - WML.cursor_en + WML.plane_en[PLANE_CURSOR] | - WML.cursor_res_b + WML.plane_res_b[PLANE_CURSOR] | - WML.cursor_res_l + WML.plane_res_l[PLANE_CURSOR] ) @@ struct skl_wm_level *WML; @@ ( - WML->cursor_en + WML->plane_en[PLANE_CURSOR] | - WML->cursor_res_b + WML->plane_res_b[PLANE_CURSOR] | - WML->cursor_res_l + WML->plane_res_l[PLANE_CURSOR] ) @@ @@ struct skl_wm_level { ... - bool cursor_en; ... - uint16_t cursor_res_b; - uint8_t cursor_res_l; ... }; v2: Use a PLANE_CURSOR enum entry rather than making the code reference I915_MAX_PLANES or I915_MAX_PLANES+1, which was confusing. (Ander) Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-09-25 06:53:10 +08:00
entry = &ddb->plane[pipe][PLANE_CURSOR];
seq_printf(m, " %-13s%8u%8u%8u\n", "Cursor", entry->start,
entry->end, skl_ddb_entry_size(entry));
}
drm_modeset_unlock_all(dev);
return 0;
}
static void drrs_status_per_crtc(struct seq_file *m,
struct drm_device *dev,
struct intel_crtc *intel_crtc)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_drrs *drrs = &dev_priv->drrs;
int vrefresh = 0;
struct drm_connector *connector;
struct drm_connector_list_iter conn_iter;
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
if (connector->state->crtc != &intel_crtc->base)
continue;
seq_printf(m, "%s:\n", connector->name);
}
drm_connector_list_iter_end(&conn_iter);
if (dev_priv->vbt.drrs_type == STATIC_DRRS_SUPPORT)
seq_puts(m, "\tVBT: DRRS_type: Static");
else if (dev_priv->vbt.drrs_type == SEAMLESS_DRRS_SUPPORT)
seq_puts(m, "\tVBT: DRRS_type: Seamless");
else if (dev_priv->vbt.drrs_type == DRRS_NOT_SUPPORTED)
seq_puts(m, "\tVBT: DRRS_type: None");
else
seq_puts(m, "\tVBT: DRRS_type: FIXME: Unrecognized Value");
seq_puts(m, "\n\n");
if (to_intel_crtc_state(intel_crtc->base.state)->has_drrs) {
struct intel_panel *panel;
mutex_lock(&drrs->mutex);
/* DRRS Supported */
seq_puts(m, "\tDRRS Supported: Yes\n");
/* disable_drrs() will make drrs->dp NULL */
if (!drrs->dp) {
seq_puts(m, "Idleness DRRS: Disabled\n");
if (dev_priv->psr.enabled)
seq_puts(m,
"\tAs PSR is enabled, DRRS is not enabled\n");
mutex_unlock(&drrs->mutex);
return;
}
panel = &drrs->dp->attached_connector->panel;
seq_printf(m, "\t\tBusy_frontbuffer_bits: 0x%X",
drrs->busy_frontbuffer_bits);
seq_puts(m, "\n\t\t");
if (drrs->refresh_rate_type == DRRS_HIGH_RR) {
seq_puts(m, "DRRS_State: DRRS_HIGH_RR\n");
vrefresh = panel->fixed_mode->vrefresh;
} else if (drrs->refresh_rate_type == DRRS_LOW_RR) {
seq_puts(m, "DRRS_State: DRRS_LOW_RR\n");
vrefresh = panel->downclock_mode->vrefresh;
} else {
seq_printf(m, "DRRS_State: Unknown(%d)\n",
drrs->refresh_rate_type);
mutex_unlock(&drrs->mutex);
return;
}
seq_printf(m, "\t\tVrefresh: %d", vrefresh);
seq_puts(m, "\n\t\t");
mutex_unlock(&drrs->mutex);
} else {
/* DRRS not supported. Print the VBT parameter*/
seq_puts(m, "\tDRRS Supported : No");
}
seq_puts(m, "\n");
}
static int i915_drrs_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct intel_crtc *intel_crtc;
int active_crtc_cnt = 0;
drm_modeset_lock_all(dev);
for_each_intel_crtc(dev, intel_crtc) {
if (intel_crtc->base.state->active) {
active_crtc_cnt++;
seq_printf(m, "\nCRTC %d: ", active_crtc_cnt);
drrs_status_per_crtc(m, dev, intel_crtc);
}
}
drm_modeset_unlock_all(dev);
if (!active_crtc_cnt)
seq_puts(m, "No active crtc found\n");
return 0;
}
static int i915_dp_mst_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
struct intel_encoder *intel_encoder;
struct intel_digital_port *intel_dig_port;
struct drm_connector *connector;
struct drm_connector_list_iter conn_iter;
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort)
continue;
intel_encoder = intel_attached_encoder(connector);
if (!intel_encoder || intel_encoder->type == INTEL_OUTPUT_DP_MST)
continue;
intel_dig_port = enc_to_dig_port(&intel_encoder->base);
if (!intel_dig_port->dp.can_mst)
continue;
seq_printf(m, "MST Source Port %c\n",
drm/i915: Nuke intel_digital_port->port Remove intel_digital_port->port and replace its users with intel_encoder->port. intel_encoder->port is a superset of intel_digital_port->port, and it works correctly even for MST encoders. v2: Eliminate a few dp_to_dig_port()->base.port cases too (DK) Performed with cocci: @@ @@ struct intel_digital_port { ... - enum port port; ... } @@ struct intel_digital_port *D; expression E; @@ - D->port = E; @@ struct intel_digital_port *D; @@ - D->port + D->base.port @ expression E; @@ ( - dp_to_dig_port(E)->port + dp_to_dig_port(E)->base.port | - enc_to_dig_port(E)->port + to_intel_encoder(E)->port ) @@ expression E; @@ - to_intel_encoder(&E->base) + E @@ struct intel_digital_port *D; identifier I, M; @@ I = &D->base <... ( - D->base.M + I->M | - &D->base + I ) ...> @@ identifier D; expression E; identifier M; @@ D = enc_to_dig_port(&E->base) <... ( - D->base.M + E->M | - &D->base + E ) ...> @@ identifier D, DP; expression E; identifier M; @@ DP = enc_to_intel_dp(&E->base) <... ( - dp_to_dig_port(DP)->base.M + E->M | - &dp_to_dig_port(DP)->base + E ) ...> @@ expression E; identifier M; @@ ( - enc_to_dig_port(&E->base)->base.M + E->M | - enc_to_dig_port(&E->base)->base + E | - enc_to_mst(&E->base)->primary->base.port + E->port ) @@ expression E; identifier D; @@ - struct intel_digital_port *D = E; ... when != D Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Acked-by: Jani Nikula <jani.nikula@intel.com> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171109152434.32074-1-ville.syrjala@linux.intel.com
2017-11-09 23:24:34 +08:00
port_name(intel_dig_port->base.port));
drm_dp_mst_dump_topology(m, &intel_dig_port->dp.mst_mgr);
}
drm_connector_list_iter_end(&conn_iter);
return 0;
}
static ssize_t i915_displayport_test_active_write(struct file *file,
const char __user *ubuf,
size_t len, loff_t *offp)
{
char *input_buffer;
int status = 0;
struct drm_device *dev;
struct drm_connector *connector;
struct drm_connector_list_iter conn_iter;
struct intel_dp *intel_dp;
int val = 0;
dev = ((struct seq_file *)file->private_data)->private;
if (len == 0)
return 0;
input_buffer = memdup_user_nul(ubuf, len);
if (IS_ERR(input_buffer))
return PTR_ERR(input_buffer);
DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len);
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
struct intel_encoder *encoder;
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
encoder = to_intel_encoder(connector->encoder);
if (encoder && encoder->type == INTEL_OUTPUT_DP_MST)
continue;
if (encoder && connector->status == connector_status_connected) {
intel_dp = enc_to_intel_dp(&encoder->base);
status = kstrtoint(input_buffer, 10, &val);
if (status < 0)
break;
DRM_DEBUG_DRIVER("Got %d for test active\n", val);
/* To prevent erroneous activation of the compliance
* testing code, only accept an actual value of 1 here
*/
if (val == 1)
intel_dp->compliance.test_active = 1;
else
intel_dp->compliance.test_active = 0;
}
}
drm_connector_list_iter_end(&conn_iter);
kfree(input_buffer);
if (status < 0)
return status;
*offp += len;
return len;
}
static int i915_displayport_test_active_show(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = m->private;
struct drm_device *dev = &dev_priv->drm;
struct drm_connector *connector;
struct drm_connector_list_iter conn_iter;
struct intel_dp *intel_dp;
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
struct intel_encoder *encoder;
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
encoder = to_intel_encoder(connector->encoder);
if (encoder && encoder->type == INTEL_OUTPUT_DP_MST)
continue;
if (encoder && connector->status == connector_status_connected) {
intel_dp = enc_to_intel_dp(&encoder->base);
if (intel_dp->compliance.test_active)
seq_puts(m, "1");
else
seq_puts(m, "0");
} else
seq_puts(m, "0");
}
drm_connector_list_iter_end(&conn_iter);
return 0;
}
static int i915_displayport_test_active_open(struct inode *inode,
struct file *file)
{
return single_open(file, i915_displayport_test_active_show,
inode->i_private);
}
static const struct file_operations i915_displayport_test_active_fops = {
.owner = THIS_MODULE,
.open = i915_displayport_test_active_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = i915_displayport_test_active_write
};
static int i915_displayport_test_data_show(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = m->private;
struct drm_device *dev = &dev_priv->drm;
struct drm_connector *connector;
struct drm_connector_list_iter conn_iter;
struct intel_dp *intel_dp;
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
struct intel_encoder *encoder;
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
encoder = to_intel_encoder(connector->encoder);
if (encoder && encoder->type == INTEL_OUTPUT_DP_MST)
continue;
if (encoder && connector->status == connector_status_connected) {
intel_dp = enc_to_intel_dp(&encoder->base);
if (intel_dp->compliance.test_type ==
DP_TEST_LINK_EDID_READ)
seq_printf(m, "%lx",
intel_dp->compliance.test_data.edid);
drm/i915: Add support for DP Video pattern compliance tests The intel_dp_autotest_video_pattern() function gets invoked through the compliance test handler on a HPD short pulse if the test type is set to DP_TEST_VIDEO_PATTERN. This performs the DPCD registers reads to read the requested test pattern, video pattern resolution, frame rate and bits per color value. The results of this analysis are handed off to userspace so that the userspace app can set the video pattern mode appropriately for the test result/response. When the test is requested with specific BPC value, we read the BPC value from the DPCD register. If this BPC value in intel_dp structure has a non-zero value and we're on a display port connector, then we use the value to calculate the bpp for the pipe. Also in this case if its a 18bpp video pattern request, then we force the dithering on pipe to be disabled since it causes CRC mismatches. The compliance_test_active flag is set at the end of the individual test handling functions. This is so that the kernel-side operations can be completed without the risk of interruption from the userspace app that is polling on that flag. v5: * Remove test_result variable * Populate the compliance test data at the end of the function (Jani Nikula) v4: *Return TEST_NAK on read failures and invalid values (Jani Nikula) * Address CRC mismatch errors v3: * Use the updated properly shifted bit definitions (Jani Nikula) * Force dithering to be disabled on 18bpp compliance test request (Manasi Navare) v2: * Updated the DPCD Register reads based on proper defines in header (Jani Nikula) * Squahsed the patch that forced the pipe bpp to compliance test bpp (Jani Nikula) Signed-off-by: Manasi Navare <manasi.d.navare@intel.com> Cc: Jani Nikula <jani.nikula@linux.intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Ville Syrjala <ville.syrjala@linux.intel.com> Signed-off-by: Jani Nikula <jani.nikula@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1485274909-17470-1-git-send-email-manasi.d.navare@intel.com
2017-01-25 00:21:49 +08:00
else if (intel_dp->compliance.test_type ==
DP_TEST_LINK_VIDEO_PATTERN) {
seq_printf(m, "hdisplay: %d\n",
intel_dp->compliance.test_data.hdisplay);
seq_printf(m, "vdisplay: %d\n",
intel_dp->compliance.test_data.vdisplay);
seq_printf(m, "bpc: %u\n",
intel_dp->compliance.test_data.bpc);
}
} else
seq_puts(m, "0");
}
drm_connector_list_iter_end(&conn_iter);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(i915_displayport_test_data);
static int i915_displayport_test_type_show(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = m->private;
struct drm_device *dev = &dev_priv->drm;
struct drm_connector *connector;
struct drm_connector_list_iter conn_iter;
struct intel_dp *intel_dp;
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
struct intel_encoder *encoder;
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
encoder = to_intel_encoder(connector->encoder);
if (encoder && encoder->type == INTEL_OUTPUT_DP_MST)
continue;
if (encoder && connector->status == connector_status_connected) {
intel_dp = enc_to_intel_dp(&encoder->base);
seq_printf(m, "%02lx", intel_dp->compliance.test_type);
} else
seq_puts(m, "0");
}
drm_connector_list_iter_end(&conn_iter);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(i915_displayport_test_type);
static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
{
struct drm_i915_private *dev_priv = m->private;
struct drm_device *dev = &dev_priv->drm;
int level;
int num_levels;
if (IS_CHERRYVIEW(dev_priv))
num_levels = 3;
else if (IS_VALLEYVIEW(dev_priv))
num_levels = 1;
drm/i915: Two stage watermarks for g4x Implement proper two stage watermark programming for g4x. As with other pre-SKL platforms, the watermark registers aren't double buffered on g4x. Hence we must sequence the watermark update carefully around plane updates. The code is quite heavily modelled on the VLV/CHV code, with some fairly significant differences due to the different hardware architecture: * g4x doesn't use inverted watermark values * CxSR actually affects the watermarks since it controls memory self refresh in addition to the max FIFO mode * A further HPLL SR mode is possible with higher memory wakeup latency * g4x has FBC2 and so it also has FBC watermarks * max FIFO mode for primary plane only (cursor is allowed, sprite is not) * g4x has no manual FIFO repartitioning * some TLB miss related workarounds are needed for the watermarks Actually the hardware is quite similar to ILK+ in many ways. The most visible differences are in the actual watermakr register layout. ILK revamped that part quite heavily whereas g4x is still using the layout inherited from earlier platforms. Note that we didn't previously enable the HPLL SR on g4x. So in order to not introduce too many functional changes in this patch I've not actually enabled it here either, even though the code is now fully ready for it. We'll enable it separately later on. Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-13-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
2017-04-22 02:14:29 +08:00
else if (IS_G4X(dev_priv))
num_levels = 3;
else
num_levels = ilk_wm_max_level(dev_priv) + 1;
drm_modeset_lock_all(dev);
for (level = 0; level < num_levels; level++) {
unsigned int latency = wm[level];
/*
* - WM1+ latency values in 0.5us units
* - latencies are in us on gen9/vlv/chv
*/
drm/i915: Two stage watermarks for g4x Implement proper two stage watermark programming for g4x. As with other pre-SKL platforms, the watermark registers aren't double buffered on g4x. Hence we must sequence the watermark update carefully around plane updates. The code is quite heavily modelled on the VLV/CHV code, with some fairly significant differences due to the different hardware architecture: * g4x doesn't use inverted watermark values * CxSR actually affects the watermarks since it controls memory self refresh in addition to the max FIFO mode * A further HPLL SR mode is possible with higher memory wakeup latency * g4x has FBC2 and so it also has FBC watermarks * max FIFO mode for primary plane only (cursor is allowed, sprite is not) * g4x has no manual FIFO repartitioning * some TLB miss related workarounds are needed for the watermarks Actually the hardware is quite similar to ILK+ in many ways. The most visible differences are in the actual watermakr register layout. ILK revamped that part quite heavily whereas g4x is still using the layout inherited from earlier platforms. Note that we didn't previously enable the HPLL SR on g4x. So in order to not introduce too many functional changes in this patch I've not actually enabled it here either, even though the code is now fully ready for it. We'll enable it separately later on. Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-13-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
2017-04-22 02:14:29 +08:00
if (INTEL_GEN(dev_priv) >= 9 ||
IS_VALLEYVIEW(dev_priv) ||
IS_CHERRYVIEW(dev_priv) ||
IS_G4X(dev_priv))
latency *= 10;
else if (level > 0)
latency *= 5;
seq_printf(m, "WM%d %u (%u.%u usec)\n",
level, wm[level], latency / 10, latency % 10);
}
drm_modeset_unlock_all(dev);
}
static int pri_wm_latency_show(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = m->private;
const uint16_t *latencies;
if (INTEL_GEN(dev_priv) >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = dev_priv->wm.pri_latency;
wm_latency_show(m, latencies);
return 0;
}
static int spr_wm_latency_show(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = m->private;
const uint16_t *latencies;
if (INTEL_GEN(dev_priv) >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = dev_priv->wm.spr_latency;
wm_latency_show(m, latencies);
return 0;
}
static int cur_wm_latency_show(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = m->private;
const uint16_t *latencies;
if (INTEL_GEN(dev_priv) >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = dev_priv->wm.cur_latency;
wm_latency_show(m, latencies);
return 0;
}
static int pri_wm_latency_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *dev_priv = inode->i_private;
drm/i915: Two stage watermarks for g4x Implement proper two stage watermark programming for g4x. As with other pre-SKL platforms, the watermark registers aren't double buffered on g4x. Hence we must sequence the watermark update carefully around plane updates. The code is quite heavily modelled on the VLV/CHV code, with some fairly significant differences due to the different hardware architecture: * g4x doesn't use inverted watermark values * CxSR actually affects the watermarks since it controls memory self refresh in addition to the max FIFO mode * A further HPLL SR mode is possible with higher memory wakeup latency * g4x has FBC2 and so it also has FBC watermarks * max FIFO mode for primary plane only (cursor is allowed, sprite is not) * g4x has no manual FIFO repartitioning * some TLB miss related workarounds are needed for the watermarks Actually the hardware is quite similar to ILK+ in many ways. The most visible differences are in the actual watermakr register layout. ILK revamped that part quite heavily whereas g4x is still using the layout inherited from earlier platforms. Note that we didn't previously enable the HPLL SR on g4x. So in order to not introduce too many functional changes in this patch I've not actually enabled it here either, even though the code is now fully ready for it. We'll enable it separately later on. Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-13-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
2017-04-22 02:14:29 +08:00
if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv))
return -ENODEV;
return single_open(file, pri_wm_latency_show, dev_priv);
}
static int spr_wm_latency_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *dev_priv = inode->i_private;
if (HAS_GMCH_DISPLAY(dev_priv))
return -ENODEV;
return single_open(file, spr_wm_latency_show, dev_priv);
}
static int cur_wm_latency_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *dev_priv = inode->i_private;
if (HAS_GMCH_DISPLAY(dev_priv))
return -ENODEV;
return single_open(file, cur_wm_latency_show, dev_priv);
}
static ssize_t wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp, uint16_t wm[8])
{
struct seq_file *m = file->private_data;
struct drm_i915_private *dev_priv = m->private;
struct drm_device *dev = &dev_priv->drm;
uint16_t new[8] = { 0 };
int num_levels;
int level;
int ret;
char tmp[32];
if (IS_CHERRYVIEW(dev_priv))
num_levels = 3;
else if (IS_VALLEYVIEW(dev_priv))
num_levels = 1;
drm/i915: Two stage watermarks for g4x Implement proper two stage watermark programming for g4x. As with other pre-SKL platforms, the watermark registers aren't double buffered on g4x. Hence we must sequence the watermark update carefully around plane updates. The code is quite heavily modelled on the VLV/CHV code, with some fairly significant differences due to the different hardware architecture: * g4x doesn't use inverted watermark values * CxSR actually affects the watermarks since it controls memory self refresh in addition to the max FIFO mode * A further HPLL SR mode is possible with higher memory wakeup latency * g4x has FBC2 and so it also has FBC watermarks * max FIFO mode for primary plane only (cursor is allowed, sprite is not) * g4x has no manual FIFO repartitioning * some TLB miss related workarounds are needed for the watermarks Actually the hardware is quite similar to ILK+ in many ways. The most visible differences are in the actual watermakr register layout. ILK revamped that part quite heavily whereas g4x is still using the layout inherited from earlier platforms. Note that we didn't previously enable the HPLL SR on g4x. So in order to not introduce too many functional changes in this patch I've not actually enabled it here either, even though the code is now fully ready for it. We'll enable it separately later on. Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-13-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
2017-04-22 02:14:29 +08:00
else if (IS_G4X(dev_priv))
num_levels = 3;
else
num_levels = ilk_wm_max_level(dev_priv) + 1;
if (len >= sizeof(tmp))
return -EINVAL;
if (copy_from_user(tmp, ubuf, len))
return -EFAULT;
tmp[len] = '\0';
ret = sscanf(tmp, "%hu %hu %hu %hu %hu %hu %hu %hu",
&new[0], &new[1], &new[2], &new[3],
&new[4], &new[5], &new[6], &new[7]);
if (ret != num_levels)
return -EINVAL;
drm_modeset_lock_all(dev);
for (level = 0; level < num_levels; level++)
wm[level] = new[level];
drm_modeset_unlock_all(dev);
return len;
}
static ssize_t pri_wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_i915_private *dev_priv = m->private;
uint16_t *latencies;
if (INTEL_GEN(dev_priv) >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = dev_priv->wm.pri_latency;
return wm_latency_write(file, ubuf, len, offp, latencies);
}
static ssize_t spr_wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_i915_private *dev_priv = m->private;
uint16_t *latencies;
if (INTEL_GEN(dev_priv) >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = dev_priv->wm.spr_latency;
return wm_latency_write(file, ubuf, len, offp, latencies);
}
static ssize_t cur_wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_i915_private *dev_priv = m->private;
uint16_t *latencies;
if (INTEL_GEN(dev_priv) >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = dev_priv->wm.cur_latency;
return wm_latency_write(file, ubuf, len, offp, latencies);
}
static const struct file_operations i915_pri_wm_latency_fops = {
.owner = THIS_MODULE,
.open = pri_wm_latency_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = pri_wm_latency_write
};
static const struct file_operations i915_spr_wm_latency_fops = {
.owner = THIS_MODULE,
.open = spr_wm_latency_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = spr_wm_latency_write
};
static const struct file_operations i915_cur_wm_latency_fops = {
.owner = THIS_MODULE,
.open = cur_wm_latency_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = cur_wm_latency_write
};
static int
i915_wedged_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
*val = i915_terminally_wedged(&dev_priv->gpu_error);
return 0;
}
static int
i915_wedged_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
struct intel_engine_cs *engine;
unsigned int tmp;
/*
* There is no safeguard against this debugfs entry colliding
* with the hangcheck calling same i915_handle_error() in
* parallel, causing an explosion. For now we assume that the
* test harness is responsible enough not to inject gpu hangs
* while it is writing to 'i915_wedged'
*/
if (i915_reset_backoff(&i915->gpu_error))
return -EAGAIN;
for_each_engine_masked(engine, i915, val, tmp) {
engine->hangcheck.seqno = intel_engine_get_seqno(engine);
engine->hangcheck.stalled = true;
}
i915_handle_error(i915, val, I915_ERROR_CAPTURE,
"Manually set wedged engine mask = %llx", val);
wait_on_bit(&i915->gpu_error.flags,
I915_RESET_HANDOFF,
TASK_UNINTERRUPTIBLE);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
i915_wedged_get, i915_wedged_set,
"%llu\n");
static int
fault_irq_set(struct drm_i915_private *i915,
unsigned long *irq,
unsigned long val)
{
int err;
err = mutex_lock_interruptible(&i915->drm.struct_mutex);
if (err)
return err;
err = i915_gem_wait_for_idle(i915,
I915_WAIT_LOCKED |
drm/i915: Provide a timeout to i915_gem_wait_for_idle() Usually we have no idea about the upper bound we need to wait to catch up with userspace when idling the device, but in a few situations we know the system was idle beforehand and can provide a short timeout in order to very quickly catch a failure, long before hangcheck kicks in. In the following patches, we will use the timeout to curtain two overly long waits, where we know we can expect the GPU to complete within a reasonable time or declare it broken. In particular, with a broken GPU we expect it to fail during the initial GPU setup where do a couple of context switches to record the defaults. This is a task that takes a few milliseconds even on the slowest of devices, but we may have to wait 60s for hangcheck to give in and declare the machine inoperable. In this a case where any gpu hang is unacceptable, both from a timeliness and practical standpoint. The other improvement is that in selftests, we do not need to arm an independent timer to inject a wedge, as we can just limit the timeout on the wait directly. v2: Include the timeout parameter in the trace. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180709122044.7028-1-chris@chris-wilson.co.uk
2018-07-09 20:20:42 +08:00
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
if (err)
goto err_unlock;
*irq = val;
mutex_unlock(&i915->drm.struct_mutex);
/* Flush idle worker to disarm irq */
drain_delayed_work(&i915->gt.idle_work);
return 0;
err_unlock:
mutex_unlock(&i915->drm.struct_mutex);
return err;
}
static int
i915_ring_missed_irq_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
*val = dev_priv->gpu_error.missed_irq_rings;
return 0;
}
static int
i915_ring_missed_irq_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val);
}
DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops,
i915_ring_missed_irq_get, i915_ring_missed_irq_set,
"0x%08llx\n");
static int
i915_ring_test_irq_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
*val = dev_priv->gpu_error.test_irq_rings;
return 0;
}
static int
i915_ring_test_irq_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
/* GuC keeps the user interrupt permanently enabled for submission */
if (USES_GUC_SUBMISSION(i915))
return -ENODEV;
/*
* From icl, we can no longer individually mask interrupt generation
* from each engine.
*/
if (INTEL_GEN(i915) >= 11)
return -ENODEV;
val &= INTEL_INFO(i915)->ring_mask;
DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val);
return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val);
}
DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
i915_ring_test_irq_get, i915_ring_test_irq_set,
"0x%08llx\n");
#define DROP_UNBOUND BIT(0)
#define DROP_BOUND BIT(1)
#define DROP_RETIRE BIT(2)
#define DROP_ACTIVE BIT(3)
#define DROP_FREED BIT(4)
#define DROP_SHRINK_ALL BIT(5)
#define DROP_IDLE BIT(6)
#define DROP_RESET_ACTIVE BIT(7)
#define DROP_RESET_SEQNO BIT(8)
#define DROP_ALL (DROP_UNBOUND | \
DROP_BOUND | \
DROP_RETIRE | \
DROP_ACTIVE | \
DROP_FREED | \
DROP_SHRINK_ALL |\
DROP_IDLE | \
DROP_RESET_ACTIVE | \
DROP_RESET_SEQNO)
static int
i915_drop_caches_get(void *data, u64 *val)
{
*val = DROP_ALL;
return 0;
}
static int
i915_drop_caches_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
int ret = 0;
DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
val, val & DROP_ALL);
intel_runtime_pm_get(i915);
if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915))
i915_gem_set_wedged(i915);
/* No need to check and wait for gpu resets, only libdrm auto-restarts
* on ioctls on -EAGAIN. */
if (val & (DROP_ACTIVE | DROP_RETIRE | DROP_RESET_SEQNO)) {
ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
if (ret)
goto out;
if (val & DROP_ACTIVE)
ret = i915_gem_wait_for_idle(i915,
I915_WAIT_INTERRUPTIBLE |
drm/i915: Provide a timeout to i915_gem_wait_for_idle() Usually we have no idea about the upper bound we need to wait to catch up with userspace when idling the device, but in a few situations we know the system was idle beforehand and can provide a short timeout in order to very quickly catch a failure, long before hangcheck kicks in. In the following patches, we will use the timeout to curtain two overly long waits, where we know we can expect the GPU to complete within a reasonable time or declare it broken. In particular, with a broken GPU we expect it to fail during the initial GPU setup where do a couple of context switches to record the defaults. This is a task that takes a few milliseconds even on the slowest of devices, but we may have to wait 60s for hangcheck to give in and declare the machine inoperable. In this a case where any gpu hang is unacceptable, both from a timeliness and practical standpoint. The other improvement is that in selftests, we do not need to arm an independent timer to inject a wedge, as we can just limit the timeout on the wait directly. v2: Include the timeout parameter in the trace. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180709122044.7028-1-chris@chris-wilson.co.uk
2018-07-09 20:20:42 +08:00
I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);
if (ret == 0 && val & DROP_RESET_SEQNO)
ret = i915_gem_set_global_seqno(&i915->drm, 1);
if (val & DROP_RETIRE)
i915_retire_requests(i915);
mutex_unlock(&i915->drm.struct_mutex);
}
if (val & DROP_RESET_ACTIVE &&
i915_terminally_wedged(&i915->gpu_error)) {
i915_handle_error(i915, ALL_ENGINES, 0, NULL);
wait_on_bit(&i915->gpu_error.flags,
I915_RESET_HANDOFF,
TASK_UNINTERRUPTIBLE);
}
fs_reclaim_acquire(GFP_KERNEL);
if (val & DROP_BOUND)
i915_gem_shrink(i915, LONG_MAX, NULL, I915_SHRINK_BOUND);
if (val & DROP_UNBOUND)
i915_gem_shrink(i915, LONG_MAX, NULL, I915_SHRINK_UNBOUND);
if (val & DROP_SHRINK_ALL)
i915_gem_shrink_all(i915);
fs_reclaim_release(GFP_KERNEL);
if (val & DROP_IDLE) {
do {
if (READ_ONCE(i915->gt.active_requests))
flush_delayed_work(&i915->gt.retire_work);
drain_delayed_work(&i915->gt.idle_work);
} while (READ_ONCE(i915->gt.awake));
}
if (val & DROP_FREED)
i915_gem_drain_freed_objects(i915);
out:
intel_runtime_pm_put(i915);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
i915_drop_caches_get, i915_drop_caches_set,
"0x%08llx\n");
static int
i915_cache_sharing_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
u32 snpcr;
if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
return -ENODEV;
intel_runtime_pm_get(dev_priv);
snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
intel_runtime_pm_put(dev_priv);
*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
return 0;
}
static int
i915_cache_sharing_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
u32 snpcr;
if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
return -ENODEV;
if (val > 3)
return -EINVAL;
intel_runtime_pm_get(dev_priv);
DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val);
/* Update the cache sharing policy here as well */
snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
snpcr &= ~GEN6_MBC_SNPCR_MASK;
snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
intel_runtime_pm_put(dev_priv);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops,
i915_cache_sharing_get, i915_cache_sharing_set,
"%llu\n");
static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
struct sseu_dev_info *sseu)
{
#define SS_MAX 2
const int ss_max = SS_MAX;
u32 sig1[SS_MAX], sig2[SS_MAX];
int ss;
sig1[0] = I915_READ(CHV_POWER_SS0_SIG1);
sig1[1] = I915_READ(CHV_POWER_SS1_SIG1);
sig2[0] = I915_READ(CHV_POWER_SS0_SIG2);
sig2[1] = I915_READ(CHV_POWER_SS1_SIG2);
for (ss = 0; ss < ss_max; ss++) {
unsigned int eu_cnt;
if (sig1[ss] & CHV_SS_PG_ENABLE)
/* skip disabled subslice */
continue;
sseu->slice_mask = BIT(0);
sseu->subslice_mask[0] |= BIT(ss);
eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
((sig2[ss] & CHV_EU311_PG_ENABLE) ? 0 : 2);
sseu->eu_total += eu_cnt;
sseu->eu_per_subslice = max_t(unsigned int,
sseu->eu_per_subslice, eu_cnt);
}
#undef SS_MAX
}
2017-10-26 08:15:46 +08:00
static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
struct sseu_dev_info *sseu)
{
#define SS_MAX 6
2017-10-26 08:15:46 +08:00
const struct intel_device_info *info = INTEL_INFO(dev_priv);
u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
2017-10-26 08:15:46 +08:00
int s, ss;
for (s = 0; s < info->sseu.max_slices; s++) {
2017-10-26 08:15:46 +08:00
/*
* FIXME: Valid SS Mask respects the spec and read
* only valid bits for those registers, excluding reserved
2017-10-26 08:15:46 +08:00
* although this seems wrong because it would leave many
* subslices without ACK.
*/
s_reg[s] = I915_READ(GEN10_SLICE_PGCTL_ACK(s)) &
GEN10_PGCTL_VALID_SS_MASK(s);
eu_reg[2 * s] = I915_READ(GEN10_SS01_EU_PGCTL_ACK(s));
eu_reg[2 * s + 1] = I915_READ(GEN10_SS23_EU_PGCTL_ACK(s));
}
eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK |
GEN9_PGCTL_SSA_EU19_ACK |
GEN9_PGCTL_SSA_EU210_ACK |
GEN9_PGCTL_SSA_EU311_ACK;
eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK |
GEN9_PGCTL_SSB_EU19_ACK |
GEN9_PGCTL_SSB_EU210_ACK |
GEN9_PGCTL_SSB_EU311_ACK;
for (s = 0; s < info->sseu.max_slices; s++) {
2017-10-26 08:15:46 +08:00
if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
/* skip disabled slice */
continue;
sseu->slice_mask |= BIT(s);
sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
2017-10-26 08:15:46 +08:00
for (ss = 0; ss < info->sseu.max_subslices; ss++) {
2017-10-26 08:15:46 +08:00
unsigned int eu_cnt;
if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
/* skip disabled subslice */
continue;
eu_cnt = 2 * hweight32(eu_reg[2 * s + ss / 2] &
eu_mask[ss % 2]);
sseu->eu_total += eu_cnt;
sseu->eu_per_subslice = max_t(unsigned int,
sseu->eu_per_subslice,
eu_cnt);
}
}
#undef SS_MAX
2017-10-26 08:15:46 +08:00
}
static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
struct sseu_dev_info *sseu)
{
#define SS_MAX 3
const struct intel_device_info *info = INTEL_INFO(dev_priv);
u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
int s, ss;
for (s = 0; s < info->sseu.max_slices; s++) {
s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s));
eu_reg[2*s] = I915_READ(GEN9_SS01_EU_PGCTL_ACK(s));
eu_reg[2*s + 1] = I915_READ(GEN9_SS23_EU_PGCTL_ACK(s));
}
eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK |
GEN9_PGCTL_SSA_EU19_ACK |
GEN9_PGCTL_SSA_EU210_ACK |
GEN9_PGCTL_SSA_EU311_ACK;
eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK |
GEN9_PGCTL_SSB_EU19_ACK |
GEN9_PGCTL_SSB_EU210_ACK |
GEN9_PGCTL_SSB_EU311_ACK;
for (s = 0; s < info->sseu.max_slices; s++) {
if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
/* skip disabled slice */
continue;
sseu->slice_mask |= BIT(s);
2017-10-26 08:15:46 +08:00
if (IS_GEN9_BC(dev_priv))
sseu->subslice_mask[s] =
INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
for (ss = 0; ss < info->sseu.max_subslices; ss++) {
unsigned int eu_cnt;
if (IS_GEN9_LP(dev_priv)) {
if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
/* skip disabled subslice */
continue;
sseu->subslice_mask[s] |= BIT(ss);
}
eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
eu_mask[ss%2]);
sseu->eu_total += eu_cnt;
sseu->eu_per_subslice = max_t(unsigned int,
sseu->eu_per_subslice,
eu_cnt);
}
}
#undef SS_MAX
}
static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
struct sseu_dev_info *sseu)
{
u32 slice_info = I915_READ(GEN8_GT_SLICE_INFO);
int s;
sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
if (sseu->slice_mask) {
sseu->eu_per_subslice =
INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
for (s = 0; s < fls(sseu->slice_mask); s++) {
sseu->subslice_mask[s] =
INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
}
sseu->eu_total = sseu->eu_per_subslice *
sseu_subslice_total(sseu);
/* subtract fused off EU(s) from enabled slice(s) */
for (s = 0; s < fls(sseu->slice_mask); s++) {
u8 subslice_7eu =
INTEL_INFO(dev_priv)->sseu.subslice_7eu[s];
sseu->eu_total -= hweight8(subslice_7eu);
}
}
}
static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
const struct sseu_dev_info *sseu)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
const char *type = is_available_info ? "Available" : "Enabled";
int s;
seq_printf(m, " %s Slice Mask: %04x\n", type,
sseu->slice_mask);
seq_printf(m, " %s Slice Total: %u\n", type,
hweight8(sseu->slice_mask));
seq_printf(m, " %s Subslice Total: %u\n", type,
sseu_subslice_total(sseu));
for (s = 0; s < fls(sseu->slice_mask); s++) {
seq_printf(m, " %s Slice%i subslices: %u\n", type,
s, hweight8(sseu->subslice_mask[s]));
}
seq_printf(m, " %s EU Total: %u\n", type,
sseu->eu_total);
seq_printf(m, " %s EU Per Subslice: %u\n", type,
sseu->eu_per_subslice);
if (!is_available_info)
return;
seq_printf(m, " Has Pooled EU: %s\n", yesno(HAS_POOLED_EU(dev_priv)));
if (HAS_POOLED_EU(dev_priv))
seq_printf(m, " Min EU in pool: %u\n", sseu->min_eu_in_pool);
seq_printf(m, " Has Slice Power Gating: %s\n",
yesno(sseu->has_slice_pg));
seq_printf(m, " Has Subslice Power Gating: %s\n",
yesno(sseu->has_subslice_pg));
seq_printf(m, " Has EU Power Gating: %s\n",
yesno(sseu->has_eu_pg));
}
static int i915_sseu_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct sseu_dev_info sseu;
if (INTEL_GEN(dev_priv) < 8)
return -ENODEV;
seq_puts(m, "SSEU Device Info\n");
i915_print_sseu_info(m, true, &INTEL_INFO(dev_priv)->sseu);
seq_puts(m, "SSEU Device Status\n");
memset(&sseu, 0, sizeof(sseu));
sseu.max_slices = INTEL_INFO(dev_priv)->sseu.max_slices;
sseu.max_subslices = INTEL_INFO(dev_priv)->sseu.max_subslices;
sseu.max_eus_per_subslice =
INTEL_INFO(dev_priv)->sseu.max_eus_per_subslice;
intel_runtime_pm_get(dev_priv);
if (IS_CHERRYVIEW(dev_priv)) {
cherryview_sseu_device_status(dev_priv, &sseu);
} else if (IS_BROADWELL(dev_priv)) {
broadwell_sseu_device_status(dev_priv, &sseu);
2017-10-26 08:15:46 +08:00
} else if (IS_GEN9(dev_priv)) {
gen9_sseu_device_status(dev_priv, &sseu);
2017-10-26 08:15:46 +08:00
} else if (INTEL_GEN(dev_priv) >= 10) {
gen10_sseu_device_status(dev_priv, &sseu);
}
intel_runtime_pm_put(dev_priv);
i915_print_sseu_info(m, false, &sseu);
return 0;
}
static int i915_forcewake_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
if (INTEL_GEN(i915) < 6)
return 0;
intel_runtime_pm_get(i915);
intel_uncore_forcewake_user_get(i915);
return 0;
}
static int i915_forcewake_release(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
if (INTEL_GEN(i915) < 6)
return 0;
intel_uncore_forcewake_user_put(i915);
intel_runtime_pm_put(i915);
return 0;
}
static const struct file_operations i915_forcewake_fops = {
.owner = THIS_MODULE,
.open = i915_forcewake_open,
.release = i915_forcewake_release,
};
static int i915_hpd_storm_ctl_show(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = m->private;
struct i915_hotplug *hotplug = &dev_priv->hotplug;
/* Synchronize with everything first in case there's been an HPD
* storm, but we haven't finished handling it in the kernel yet
*/
synchronize_irq(dev_priv->drm.irq);
flush_work(&dev_priv->hotplug.dig_port_work);
flush_work(&dev_priv->hotplug.hotplug_work);
seq_printf(m, "Threshold: %d\n", hotplug->hpd_storm_threshold);
seq_printf(m, "Detected: %s\n",
yesno(delayed_work_pending(&hotplug->reenable_work)));
return 0;
}
static ssize_t i915_hpd_storm_ctl_write(struct file *file,
const char __user *ubuf, size_t len,
loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_i915_private *dev_priv = m->private;
struct i915_hotplug *hotplug = &dev_priv->hotplug;
unsigned int new_threshold;
int i;
char *newline;
char tmp[16];
if (len >= sizeof(tmp))
return -EINVAL;
if (copy_from_user(tmp, ubuf, len))
return -EFAULT;
tmp[len] = '\0';
/* Strip newline, if any */
newline = strchr(tmp, '\n');
if (newline)
*newline = '\0';
if (strcmp(tmp, "reset") == 0)
new_threshold = HPD_STORM_DEFAULT_THRESHOLD;
else if (kstrtouint(tmp, 10, &new_threshold) != 0)
return -EINVAL;
if (new_threshold > 0)
DRM_DEBUG_KMS("Setting HPD storm detection threshold to %d\n",
new_threshold);
else
DRM_DEBUG_KMS("Disabling HPD storm detection\n");
spin_lock_irq(&dev_priv->irq_lock);
hotplug->hpd_storm_threshold = new_threshold;
/* Reset the HPD storm stats so we don't accidentally trigger a storm */
for_each_hpd_pin(i)
hotplug->stats[i].count = 0;
spin_unlock_irq(&dev_priv->irq_lock);
/* Re-enable hpd immediately if we were in an irq storm */
flush_delayed_work(&dev_priv->hotplug.reenable_work);
return len;
}
static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file)
{
return single_open(file, i915_hpd_storm_ctl_show, inode->i_private);
}
static const struct file_operations i915_hpd_storm_ctl_fops = {
.owner = THIS_MODULE,
.open = i915_hpd_storm_ctl_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = i915_hpd_storm_ctl_write
};
drm/i915: Add short HPD IRQ storm detection for non-MST systems Unfortunately, it seems that the HPD IRQ storm problem from the early days of Intel GPUs was never entirely solved, only mostly. Within the last couple of days, I got a bug report from one of our customers who had been having issues with their machine suddenly booting up very slowly after having updated. The amount of time it took to boot went from around 30 seconds, to over 6 minutes consistently. After some investigation, I discovered that i915 was reporting massive amounts of short HPD IRQ spam on this system from the DisplayPort port, despite there not being anything actually connected. The symptoms would start with one "long" HPD IRQ being detected at boot: [ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0 [ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long [ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0 [ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long [ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0 [ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long [ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long … followed by constant short IRQs afterwards: [ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected [ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event. [ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3] [ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080 [ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short [ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080 [ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short [ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085 [ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080 [ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short [ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080 [ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short [ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080 The customer's system in question has a GM45 GPU, which is apparently well known for hotplugging storms. So, workaround this impressively broken hardware by changing the default HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and short IRQs count for 1. This makes it so that 5 long IRQs will trigger an HPD storm, and on systems with short HPD storm detection 50 short IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of constant pulsing, which seems like a good middleground between being too sensitive and not being sensitive enough (which would cause visible stutters in userspace every time a storm occurs). And just to be extra safe: we don't enable this by default on systems with MST support. There's too high of a chance of MST support triggering storm detection, and systems that are new enough to support MST are a lot less likely to have issues with IRQ storms anyway. As a note: this patch was tested using a ThinkPad T450s and a Chamelium to simulate the short IRQ storms. Changes since v1: - Don't use two separate thresholds, just make long IRQs count for 10 each and short IRQs count for 1. This simplifies the code a bit - Ville Syrjälä Changes since v2: - Document @long_hpd in intel_hpd_irq_storm_detect, no functional changes Changes since v4: - Remove !! in long_hpd assignment - Ville Syrjälä - queue_hp = true - Ville Syrjälä Signed-off-by: Lyude Paul <lyude@redhat.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 05:30:16 +08:00
static int i915_hpd_short_storm_ctl_show(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = m->private;
seq_printf(m, "Enabled: %s\n",
yesno(dev_priv->hotplug.hpd_short_storm_enabled));
return 0;
}
static int
i915_hpd_short_storm_ctl_open(struct inode *inode, struct file *file)
{
return single_open(file, i915_hpd_short_storm_ctl_show,
inode->i_private);
}
static ssize_t i915_hpd_short_storm_ctl_write(struct file *file,
const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_i915_private *dev_priv = m->private;
struct i915_hotplug *hotplug = &dev_priv->hotplug;
char *newline;
char tmp[16];
int i;
bool new_state;
if (len >= sizeof(tmp))
return -EINVAL;
if (copy_from_user(tmp, ubuf, len))
return -EFAULT;
tmp[len] = '\0';
/* Strip newline, if any */
newline = strchr(tmp, '\n');
if (newline)
*newline = '\0';
/* Reset to the "default" state for this system */
if (strcmp(tmp, "reset") == 0)
new_state = !HAS_DP_MST(dev_priv);
else if (kstrtobool(tmp, &new_state) != 0)
return -EINVAL;
DRM_DEBUG_KMS("%sabling HPD short storm detection\n",
new_state ? "En" : "Dis");
spin_lock_irq(&dev_priv->irq_lock);
hotplug->hpd_short_storm_enabled = new_state;
/* Reset the HPD storm stats so we don't accidentally trigger a storm */
for_each_hpd_pin(i)
hotplug->stats[i].count = 0;
spin_unlock_irq(&dev_priv->irq_lock);
/* Re-enable hpd immediately if we were in an irq storm */
flush_delayed_work(&dev_priv->hotplug.reenable_work);
return len;
}
static const struct file_operations i915_hpd_short_storm_ctl_fops = {
.owner = THIS_MODULE,
.open = i915_hpd_short_storm_ctl_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = i915_hpd_short_storm_ctl_write,
};
static int i915_drrs_ctl_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
struct drm_device *dev = &dev_priv->drm;
struct intel_crtc *crtc;
if (INTEL_GEN(dev_priv) < 7)
return -ENODEV;
for_each_intel_crtc(dev, crtc) {
struct drm_connector_list_iter conn_iter;
struct intel_crtc_state *crtc_state;
struct drm_connector *connector;
struct drm_crtc_commit *commit;
int ret;
ret = drm_modeset_lock_single_interruptible(&crtc->base.mutex);
if (ret)
return ret;
crtc_state = to_intel_crtc_state(crtc->base.state);
if (!crtc_state->base.active ||
!crtc_state->has_drrs)
goto out;
commit = crtc_state->base.commit;
if (commit) {
ret = wait_for_completion_interruptible(&commit->hw_done);
if (ret)
goto out;
}
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
struct intel_encoder *encoder;
struct intel_dp *intel_dp;
if (!(crtc_state->base.connector_mask &
drm_connector_mask(connector)))
continue;
encoder = intel_attached_encoder(connector);
if (encoder->type != INTEL_OUTPUT_EDP)
continue;
DRM_DEBUG_DRIVER("Manually %sabling DRRS. %llu\n",
val ? "en" : "dis", val);
intel_dp = enc_to_intel_dp(&encoder->base);
if (val)
intel_edp_drrs_enable(intel_dp,
crtc_state);
else
intel_edp_drrs_disable(intel_dp,
crtc_state);
}
drm_connector_list_iter_end(&conn_iter);
out:
drm_modeset_unlock(&crtc->base.mutex);
if (ret)
return ret;
}
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_drrs_ctl_fops, NULL, i915_drrs_ctl_set, "%llu\n");
static ssize_t
i915_fifo_underrun_reset_write(struct file *filp,
const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct drm_i915_private *dev_priv = filp->private_data;
struct intel_crtc *intel_crtc;
struct drm_device *dev = &dev_priv->drm;
int ret;
bool reset;
ret = kstrtobool_from_user(ubuf, cnt, &reset);
if (ret)
return ret;
if (!reset)
return cnt;
for_each_intel_crtc(dev, intel_crtc) {
struct drm_crtc_commit *commit;
struct intel_crtc_state *crtc_state;
ret = drm_modeset_lock_single_interruptible(&intel_crtc->base.mutex);
if (ret)
return ret;
crtc_state = to_intel_crtc_state(intel_crtc->base.state);
commit = crtc_state->base.commit;
if (commit) {
ret = wait_for_completion_interruptible(&commit->hw_done);
if (!ret)
ret = wait_for_completion_interruptible(&commit->flip_done);
}
if (!ret && crtc_state->base.active) {
DRM_DEBUG_KMS("Re-arming FIFO underruns on pipe %c\n",
pipe_name(intel_crtc->pipe));
intel_crtc_arm_fifo_underrun(intel_crtc, crtc_state);
}
drm_modeset_unlock(&intel_crtc->base.mutex);
if (ret)
return ret;
}
ret = intel_fbc_reset_underrun(dev_priv);
if (ret)
return ret;
return cnt;
}
static const struct file_operations i915_fifo_underrun_reset_ops = {
.owner = THIS_MODULE,
.open = simple_open,
.write = i915_fifo_underrun_reset_write,
.llseek = default_llseek,
};
static const struct drm_info_list i915_debugfs_list[] = {
{"i915_capabilities", i915_capabilities, 0},
{"i915_gem_objects", i915_gem_object_info, 0},
{"i915_gem_gtt", i915_gem_gtt_info, 0},
{"i915_gem_stolen", i915_gem_stolen_list_info },
{"i915_gem_fence_regs", i915_gem_fence_regs_info, 0},
{"i915_gem_interrupt", i915_interrupt_info, 0},
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 04:13:08 +08:00
{"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
{"i915_guc_info", i915_guc_info, 0},
{"i915_guc_load_status", i915_guc_load_status_info, 0},
{"i915_guc_log_dump", i915_guc_log_dump, 0},
{"i915_guc_load_err_log_dump", i915_guc_log_dump, 0, (void *)1},
{"i915_guc_stage_pool", i915_guc_stage_pool, 0},
{"i915_huc_load_status", i915_huc_load_status_info, 0},
{"i915_frequency_info", i915_frequency_info, 0},
{"i915_hangcheck_info", i915_hangcheck_info, 0},
{"i915_reset_info", i915_reset_info, 0},
{"i915_drpc_info", i915_drpc_info, 0},
{"i915_emon_status", i915_emon_status, 0},
{"i915_ring_freq_table", i915_ring_freq_table, 0},
{"i915_frontbuffer_tracking", i915_frontbuffer_tracking, 0},
{"i915_fbc_status", i915_fbc_status, 0},
{"i915_ips_status", i915_ips_status, 0},
{"i915_sr_status", i915_sr_status, 0},
{"i915_opregion", i915_opregion, 0},
{"i915_vbt", i915_vbt, 0},
{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
{"i915_context_status", i915_context_status, 0},
{"i915_forcewake_domains", i915_forcewake_domains, 0},
{"i915_swizzle_info", i915_swizzle_info, 0},
{"i915_ppgtt_info", i915_ppgtt_info, 0},
{"i915_llc", i915_llc, 0},
{"i915_edp_psr_status", i915_edp_psr_status, 0},
{"i915_energy_uJ", i915_energy_uJ, 0},
{"i915_runtime_pm_status", i915_runtime_pm_status, 0},
{"i915_power_domain_info", i915_power_domain_info, 0},
{"i915_dmc_info", i915_dmc_info, 0},
{"i915_display_info", i915_display_info, 0},
{"i915_engine_info", i915_engine_info, 0},
{"i915_rcs_topology", i915_rcs_topology, 0},
{"i915_shrinker_info", i915_shrinker_info, 0},
{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
{"i915_dp_mst_info", i915_dp_mst_info, 0},
{"i915_wa_registers", i915_wa_registers, 0},
{"i915_ddb_info", i915_ddb_info, 0},
{"i915_sseu_status", i915_sseu_status, 0},
{"i915_drrs_status", i915_drrs_status, 0},
{"i915_rps_boost_info", i915_rps_boost_info, 0},
};
#define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
static const struct i915_debugfs_files {
const char *name;
const struct file_operations *fops;
} i915_debugfs_files[] = {
{"i915_wedged", &i915_wedged_fops},
{"i915_cache_sharing", &i915_cache_sharing_fops},
{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
{"i915_ring_test_irq", &i915_ring_test_irq_fops},
{"i915_gem_drop_caches", &i915_drop_caches_fops},
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
{"i915_error_state", &i915_error_state_fops},
{"i915_gpu_info", &i915_gpu_info_fops},
#endif
{"i915_fifo_underrun_reset", &i915_fifo_underrun_reset_ops},
{"i915_next_seqno", &i915_next_seqno_fops},
{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
{"i915_spr_wm_latency", &i915_spr_wm_latency_fops},
{"i915_cur_wm_latency", &i915_cur_wm_latency_fops},
{"i915_fbc_false_color", &i915_fbc_false_color_fops},
{"i915_dp_test_data", &i915_displayport_test_data_fops},
{"i915_dp_test_type", &i915_displayport_test_type_fops},
{"i915_dp_test_active", &i915_displayport_test_active_fops},
{"i915_guc_log_level", &i915_guc_log_level_fops},
{"i915_guc_log_relay", &i915_guc_log_relay_fops},
{"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops},
drm/i915: Add short HPD IRQ storm detection for non-MST systems Unfortunately, it seems that the HPD IRQ storm problem from the early days of Intel GPUs was never entirely solved, only mostly. Within the last couple of days, I got a bug report from one of our customers who had been having issues with their machine suddenly booting up very slowly after having updated. The amount of time it took to boot went from around 30 seconds, to over 6 minutes consistently. After some investigation, I discovered that i915 was reporting massive amounts of short HPD IRQ spam on this system from the DisplayPort port, despite there not being anything actually connected. The symptoms would start with one "long" HPD IRQ being detected at boot: [ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0 [ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long [ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0 [ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long [ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0 [ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long [ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long … followed by constant short IRQs afterwards: [ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected [ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event. [ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3] [ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080 [ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short [ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080 [ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short [ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085 [ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080 [ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short [ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080 [ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short [ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080 The customer's system in question has a GM45 GPU, which is apparently well known for hotplugging storms. So, workaround this impressively broken hardware by changing the default HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and short IRQs count for 1. This makes it so that 5 long IRQs will trigger an HPD storm, and on systems with short HPD storm detection 50 short IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of constant pulsing, which seems like a good middleground between being too sensitive and not being sensitive enough (which would cause visible stutters in userspace every time a storm occurs). And just to be extra safe: we don't enable this by default on systems with MST support. There's too high of a chance of MST support triggering storm detection, and systems that are new enough to support MST are a lot less likely to have issues with IRQ storms anyway. As a note: this patch was tested using a ThinkPad T450s and a Chamelium to simulate the short IRQ storms. Changes since v1: - Don't use two separate thresholds, just make long IRQs count for 10 each and short IRQs count for 1. This simplifies the code a bit - Ville Syrjälä Changes since v2: - Document @long_hpd in intel_hpd_irq_storm_detect, no functional changes Changes since v4: - Remove !! in long_hpd assignment - Ville Syrjälä - queue_hp = true - Ville Syrjälä Signed-off-by: Lyude Paul <lyude@redhat.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 05:30:16 +08:00
{"i915_hpd_short_storm_ctl", &i915_hpd_short_storm_ctl_fops},
{"i915_ipc_status", &i915_ipc_status_fops},
{"i915_drrs_ctl", &i915_drrs_ctl_fops},
{"i915_edp_psr_debug", &i915_edp_psr_debug_fops}
};
int i915_debugfs_register(struct drm_i915_private *dev_priv)
{
struct drm_minor *minor = dev_priv->drm.primary;
struct dentry *ent;
int i;
ent = debugfs_create_file("i915_forcewake_user", S_IRUSR,
minor->debugfs_root, to_i915(minor->dev),
&i915_forcewake_fops);
if (!ent)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) {
ent = debugfs_create_file(i915_debugfs_files[i].name,
S_IRUGO | S_IWUSR,
minor->debugfs_root,
to_i915(minor->dev),
i915_debugfs_files[i].fops);
if (!ent)
return -ENOMEM;
}
return drm_debugfs_create_files(i915_debugfs_list,
I915_DEBUGFS_ENTRIES,
minor->debugfs_root, minor);
}
struct dpcd_block {
/* DPCD dump start address. */
unsigned int offset;
/* DPCD dump end address, inclusive. If unset, .size will be used. */
unsigned int end;
/* DPCD dump size. Used if .end is unset. If unset, defaults to 1. */
size_t size;
/* Only valid for eDP. */
bool edp;
};
static const struct dpcd_block i915_dpcd_debug[] = {
{ .offset = DP_DPCD_REV, .size = DP_RECEIVER_CAP_SIZE },
{ .offset = DP_PSR_SUPPORT, .end = DP_PSR_CAPS },
{ .offset = DP_DOWNSTREAM_PORT_0, .size = 16 },
{ .offset = DP_LINK_BW_SET, .end = DP_EDP_CONFIGURATION_SET },
{ .offset = DP_SINK_COUNT, .end = DP_ADJUST_REQUEST_LANE2_3 },
{ .offset = DP_SET_POWER },
{ .offset = DP_EDP_DPCD_REV },
{ .offset = DP_EDP_GENERAL_CAP_1, .end = DP_EDP_GENERAL_CAP_3 },
{ .offset = DP_EDP_DISPLAY_CONTROL_REGISTER, .end = DP_EDP_BACKLIGHT_FREQ_CAP_MAX_LSB },
{ .offset = DP_EDP_DBC_MINIMUM_BRIGHTNESS_SET, .end = DP_EDP_DBC_MAXIMUM_BRIGHTNESS_SET },
};
static int i915_dpcd_show(struct seq_file *m, void *data)
{
struct drm_connector *connector = m->private;
struct intel_dp *intel_dp =
enc_to_intel_dp(&intel_attached_encoder(connector)->base);
uint8_t buf[16];
ssize_t err;
int i;
if (connector->status != connector_status_connected)
return -ENODEV;
for (i = 0; i < ARRAY_SIZE(i915_dpcd_debug); i++) {
const struct dpcd_block *b = &i915_dpcd_debug[i];
size_t size = b->end ? b->end - b->offset + 1 : (b->size ?: 1);
if (b->edp &&
connector->connector_type != DRM_MODE_CONNECTOR_eDP)
continue;
/* low tech for now */
if (WARN_ON(size > sizeof(buf)))
continue;
err = drm_dp_dpcd_read(&intel_dp->aux, b->offset, buf, size);
if (err < 0)
seq_printf(m, "%04x: ERROR %d\n", b->offset, (int)err);
else
seq_printf(m, "%04x: %*ph\n", b->offset, (int)err, buf);
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(i915_dpcd);
static int i915_panel_show(struct seq_file *m, void *data)
{
struct drm_connector *connector = m->private;
struct intel_dp *intel_dp =
enc_to_intel_dp(&intel_attached_encoder(connector)->base);
if (connector->status != connector_status_connected)
return -ENODEV;
seq_printf(m, "Panel power up delay: %d\n",
intel_dp->panel_power_up_delay);
seq_printf(m, "Panel power down delay: %d\n",
intel_dp->panel_power_down_delay);
seq_printf(m, "Backlight on delay: %d\n",
intel_dp->backlight_on_delay);
seq_printf(m, "Backlight off delay: %d\n",
intel_dp->backlight_off_delay);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(i915_panel);
static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data)
{
struct drm_connector *connector = m->private;
struct intel_connector *intel_connector = to_intel_connector(connector);
if (connector->status != connector_status_connected)
return -ENODEV;
/* HDCP is supported by connector */
if (!intel_connector->hdcp.shim)
return -EINVAL;
seq_printf(m, "%s:%d HDCP version: ", connector->name,
connector->base.id);
seq_printf(m, "%s ", !intel_hdcp_capable(intel_connector) ?
"None" : "HDCP1.4");
seq_puts(m, "\n");
return 0;
}
DEFINE_SHOW_ATTRIBUTE(i915_hdcp_sink_capability);
/**
* i915_debugfs_connector_add - add i915 specific connector debugfs files
* @connector: pointer to a registered drm_connector
*
* Cleanup will be done by drm_connector_unregister() through a call to
* drm_debugfs_connector_remove().
*
* Returns 0 on success, negative error codes on error.
*/
int i915_debugfs_connector_add(struct drm_connector *connector)
{
struct dentry *root = connector->debugfs_entry;
/* The connector must have been registered beforehands. */
if (!root)
return -ENODEV;
if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
connector->connector_type == DRM_MODE_CONNECTOR_eDP)
debugfs_create_file("i915_dpcd", S_IRUGO, root,
connector, &i915_dpcd_fops);
if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
debugfs_create_file("i915_panel_timings", S_IRUGO, root,
connector, &i915_panel_fops);
debugfs_create_file("i915_psr_sink_status", S_IRUGO, root,
connector, &i915_psr_sink_status_fops);
}
if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) {
debugfs_create_file("i915_hdcp_sink_capability", S_IRUGO, root,
connector, &i915_hdcp_sink_capability_fops);
}
return 0;
}