drm/i915: swizzling support for snb/ivb
We have to do this manually. Somebody had a Great Idea. I've measured speed-ups just a few percent above the noise level (below 5% for the best case), but no slowdows. Chris Wilson measured quite a bit more (10-20% above the usual snb variance) on a more recent and better tuned version of sna, but also recorded a few slow-downs on benchmarks know for uglier amounts of snb-induced variance. v2: Incorporate Ben Widawsky's preliminary review comments and elaborate a bit about the performance impact in the changelog. v3: Add a comment as to why we don't need to check the 3rd memory channel. v4: Fixup whitespace. Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Ben Widawsky <ben@bwidawsk.net> Reviewed-by: Eric Anholt <eric@anholt.net> Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
parent
172975aa74
commit
f691e2f4ce
|
@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev)
|
|||
i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
|
||||
|
||||
mutex_lock(&dev->struct_mutex);
|
||||
ret = i915_gem_init_ringbuffer(dev);
|
||||
ret = i915_gem_init_hw(dev);
|
||||
mutex_unlock(&dev->struct_mutex);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
|
|
@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev)
|
|||
mutex_lock(&dev->struct_mutex);
|
||||
dev_priv->mm.suspended = 0;
|
||||
|
||||
error = i915_gem_init_ringbuffer(dev);
|
||||
error = i915_gem_init_hw(dev);
|
||||
mutex_unlock(&dev->struct_mutex);
|
||||
|
||||
if (HAS_PCH_SPLIT(dev))
|
||||
|
@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags)
|
|||
!dev_priv->mm.suspended) {
|
||||
dev_priv->mm.suspended = 0;
|
||||
|
||||
i915_gem_init_swizzling(dev);
|
||||
|
||||
dev_priv->ring[RCS].init(&dev_priv->ring[RCS]);
|
||||
if (HAS_BSD(dev))
|
||||
dev_priv->ring[VCS].init(&dev_priv->ring[VCS]);
|
||||
|
|
|
@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
|
|||
uint32_t read_domains,
|
||||
uint32_t write_domain);
|
||||
int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
|
||||
int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
|
||||
int __must_check i915_gem_init_hw(struct drm_device *dev);
|
||||
void i915_gem_init_swizzling(struct drm_device *dev);
|
||||
void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
|
||||
void i915_gem_do_init(struct drm_device *dev,
|
||||
unsigned long start,
|
||||
|
|
|
@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void i915_gem_init_swizzling(struct drm_device *dev)
|
||||
{
|
||||
drm_i915_private_t *dev_priv = dev->dev_private;
|
||||
|
||||
if (INTEL_INFO(dev)->gen < 6 ||
|
||||
dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
|
||||
return;
|
||||
|
||||
I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
|
||||
DISP_TILE_SURFACE_SWIZZLING);
|
||||
|
||||
I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
|
||||
if (IS_GEN6(dev))
|
||||
I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
|
||||
else
|
||||
I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
|
||||
}
|
||||
int
|
||||
i915_gem_init_ringbuffer(struct drm_device *dev)
|
||||
i915_gem_init_hw(struct drm_device *dev)
|
||||
{
|
||||
drm_i915_private_t *dev_priv = dev->dev_private;
|
||||
int ret;
|
||||
|
||||
i915_gem_init_swizzling(dev);
|
||||
|
||||
ret = intel_init_render_ring_buffer(dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
|
|||
mutex_lock(&dev->struct_mutex);
|
||||
dev_priv->mm.suspended = 0;
|
||||
|
||||
ret = i915_gem_init_ringbuffer(dev);
|
||||
ret = i915_gem_init_hw(dev);
|
||||
if (ret != 0) {
|
||||
mutex_unlock(&dev->struct_mutex);
|
||||
return ret;
|
||||
|
|
|
@ -93,8 +93,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
|
|||
uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
|
||||
|
||||
if (INTEL_INFO(dev)->gen >= 6) {
|
||||
uint32_t dimm_c0, dimm_c1;
|
||||
dimm_c0 = I915_READ(MAD_DIMM_C0);
|
||||
dimm_c1 = I915_READ(MAD_DIMM_C1);
|
||||
dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
|
||||
dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
|
||||
/* Enable swizzling when the channels are populated with
|
||||
* identically sized dimms. We don't need to check the 3rd
|
||||
* channel because no cpu with gpu attached ships in that
|
||||
* configuration. Also, swizzling only makes sense for 2
|
||||
* channels anyway. */
|
||||
if (dimm_c0 == dimm_c1) {
|
||||
swizzle_x = I915_BIT_6_SWIZZLE_9_10;
|
||||
swizzle_y = I915_BIT_6_SWIZZLE_9;
|
||||
} else {
|
||||
swizzle_x = I915_BIT_6_SWIZZLE_NONE;
|
||||
swizzle_y = I915_BIT_6_SWIZZLE_NONE;
|
||||
}
|
||||
} else if (IS_GEN5(dev)) {
|
||||
/* On Ironlake whatever DRAM config, GPU always do
|
||||
* same swizzling setup.
|
||||
|
|
|
@ -295,6 +295,12 @@
|
|||
#define FENCE_REG_SANDYBRIDGE_0 0x100000
|
||||
#define SANDYBRIDGE_FENCE_PITCH_SHIFT 32
|
||||
|
||||
/* control register for cpu gtt access */
|
||||
#define TILECTL 0x101000
|
||||
#define TILECTL_SWZCTL (1 << 0)
|
||||
#define TILECTL_TLB_PREFETCH_DIS (1 << 2)
|
||||
#define TILECTL_BACKSNOOP_DIS (1 << 3)
|
||||
|
||||
/*
|
||||
* Instruction and interrupt control regs
|
||||
*/
|
||||
|
@ -318,6 +324,11 @@
|
|||
#define RING_MAX_IDLE(base) ((base)+0x54)
|
||||
#define RING_HWS_PGA(base) ((base)+0x80)
|
||||
#define RING_HWS_PGA_GEN6(base) ((base)+0x2080)
|
||||
#define ARB_MODE 0x04030
|
||||
#define ARB_MODE_SWIZZLE_SNB (1<<4)
|
||||
#define ARB_MODE_SWIZZLE_IVB (1<<5)
|
||||
#define ARB_MODE_ENABLE(x) GFX_MODE_ENABLE(x)
|
||||
#define ARB_MODE_DISABLE(x) GFX_MODE_DISABLE(x)
|
||||
#define RENDER_HWS_PGA_GEN7 (0x04080)
|
||||
#define RING_FAULT_REG(ring) (0x4094 + 0x100*(ring)->id)
|
||||
#define DONE_REG 0x40b0
|
||||
|
@ -1037,6 +1048,29 @@
|
|||
#define C0DRB3 0x10206
|
||||
#define C1DRB3 0x10606
|
||||
|
||||
/** snb MCH registers for reading the DRAM channel configuration */
|
||||
#define MAD_DIMM_C0 (MCHBAR_MIRROR_BASE_SNB + 0x5004)
|
||||
#define MAD_DIMM_C1 (MCHBAR_MIRROR_BASE_SNB + 0x5008)
|
||||
#define MAD_DIMM_C2 (MCHBAR_MIRROR_BASE_SNB + 0x500C)
|
||||
#define MAD_DIMM_ECC_MASK (0x3 << 24)
|
||||
#define MAD_DIMM_ECC_OFF (0x0 << 24)
|
||||
#define MAD_DIMM_ECC_IO_ON_LOGIC_OFF (0x1 << 24)
|
||||
#define MAD_DIMM_ECC_IO_OFF_LOGIC_ON (0x2 << 24)
|
||||
#define MAD_DIMM_ECC_ON (0x3 << 24)
|
||||
#define MAD_DIMM_ENH_INTERLEAVE (0x1 << 22)
|
||||
#define MAD_DIMM_RANK_INTERLEAVE (0x1 << 21)
|
||||
#define MAD_DIMM_B_WIDTH_X16 (0x1 << 20) /* X8 chips if unset */
|
||||
#define MAD_DIMM_A_WIDTH_X16 (0x1 << 19) /* X8 chips if unset */
|
||||
#define MAD_DIMM_B_DUAL_RANK (0x1 << 18)
|
||||
#define MAD_DIMM_A_DUAL_RANK (0x1 << 17)
|
||||
#define MAD_DIMM_A_SELECT (0x1 << 16)
|
||||
/* DIMM sizes are in multiples of 256mb. */
|
||||
#define MAD_DIMM_B_SIZE_SHIFT 8
|
||||
#define MAD_DIMM_B_SIZE_MASK (0xff << MAD_DIMM_B_SIZE_SHIFT)
|
||||
#define MAD_DIMM_A_SIZE_SHIFT 0
|
||||
#define MAD_DIMM_A_SIZE_MASK (0xff << MAD_DIMM_A_SIZE_SHIFT)
|
||||
|
||||
|
||||
/* Clocking configuration register */
|
||||
#define CLKCFG 0x10c00
|
||||
#define CLKCFG_FSB_400 (5 << 0) /* hrawclk 100 */
|
||||
|
|
Loading…
Reference in New Issue