drm/i915: Fallback to using CPU relocations for large batch buffers
If the batch buffer is too large to fit into the aperture and we need a GTT mapping for relocations, we currently fail. This only applies to a subset of machines for a subset of environments, quite undesirable. We can simply check after failing to insert the batch into the GTT as to whether we only need a mappable binding for relocation and, if so, we can revert to using a non-mappable binding and an alternate relocation method. However, using relocate_entry_cpu() is excruciatingly slow for large buffers on non-LLC as the entire buffer requires clflushing before and after the relocation handling. Alternatively, we can implement a third relocation method that only clflushes around the relocation entry. This is still slower than updating through the GTT, so we prefer using the GTT where possible, but is orders of magnitude faster as we typically do not have to then clflush the entire buffer. An alternative idea of using a temporary WC mapping of the backing store is promising (it should be faster than using the GTT itself), but requires fairly extensive arch/x86 support - along the lines of kmap_atomic_prof_pfn() (which is not universally implemented even for x86). Testcase: igt/gem_exec_big #pnv,byt Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88392 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Add a WARN_ONCE for the impossible reloc case and explain in a short comment why we want to avoid ping-pong.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
parent
6fafab76d5
commit
edf4427b80
|
@ -251,7 +251,6 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
|
||||||
{
|
{
|
||||||
return (HAS_LLC(obj->base.dev) ||
|
return (HAS_LLC(obj->base.dev) ||
|
||||||
obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
|
obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
|
||||||
!obj->map_and_fenceable ||
|
|
||||||
obj->cache_level != I915_CACHE_NONE);
|
obj->cache_level != I915_CACHE_NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -337,6 +336,51 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
clflush_write32(void *addr, uint32_t value)
|
||||||
|
{
|
||||||
|
/* This is not a fast path, so KISS. */
|
||||||
|
drm_clflush_virt_range(addr, sizeof(uint32_t));
|
||||||
|
*(uint32_t *)addr = value;
|
||||||
|
drm_clflush_virt_range(addr, sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
relocate_entry_clflush(struct drm_i915_gem_object *obj,
|
||||||
|
struct drm_i915_gem_relocation_entry *reloc,
|
||||||
|
uint64_t target_offset)
|
||||||
|
{
|
||||||
|
struct drm_device *dev = obj->base.dev;
|
||||||
|
uint32_t page_offset = offset_in_page(reloc->offset);
|
||||||
|
uint64_t delta = (int)reloc->delta + target_offset;
|
||||||
|
char *vaddr;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = i915_gem_object_set_to_gtt_domain(obj, true);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
vaddr = kmap_atomic(i915_gem_object_get_page(obj,
|
||||||
|
reloc->offset >> PAGE_SHIFT));
|
||||||
|
clflush_write32(vaddr + page_offset, lower_32_bits(delta));
|
||||||
|
|
||||||
|
if (INTEL_INFO(dev)->gen >= 8) {
|
||||||
|
page_offset = offset_in_page(page_offset + sizeof(uint32_t));
|
||||||
|
|
||||||
|
if (page_offset == 0) {
|
||||||
|
kunmap_atomic(vaddr);
|
||||||
|
vaddr = kmap_atomic(i915_gem_object_get_page(obj,
|
||||||
|
(reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
|
||||||
|
}
|
||||||
|
|
||||||
|
clflush_write32(vaddr + page_offset, upper_32_bits(delta));
|
||||||
|
}
|
||||||
|
|
||||||
|
kunmap_atomic(vaddr);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
|
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
|
||||||
struct eb_vmas *eb,
|
struct eb_vmas *eb,
|
||||||
|
@ -426,8 +470,14 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
|
||||||
|
|
||||||
if (use_cpu_reloc(obj))
|
if (use_cpu_reloc(obj))
|
||||||
ret = relocate_entry_cpu(obj, reloc, target_offset);
|
ret = relocate_entry_cpu(obj, reloc, target_offset);
|
||||||
else
|
else if (obj->map_and_fenceable)
|
||||||
ret = relocate_entry_gtt(obj, reloc, target_offset);
|
ret = relocate_entry_gtt(obj, reloc, target_offset);
|
||||||
|
else if (cpu_has_clflush)
|
||||||
|
ret = relocate_entry_clflush(obj, reloc, target_offset);
|
||||||
|
else {
|
||||||
|
WARN_ONCE(1, "Impossible case in relocation handling\n");
|
||||||
|
ret = -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -525,6 +575,12 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool only_mappable_for_reloc(unsigned int flags)
|
||||||
|
{
|
||||||
|
return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
|
||||||
|
__EXEC_OBJECT_NEEDS_MAP;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
|
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
|
||||||
struct intel_engine_cs *ring,
|
struct intel_engine_cs *ring,
|
||||||
|
@ -536,14 +592,21 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
flags = 0;
|
flags = 0;
|
||||||
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
|
if (!drm_mm_node_allocated(&vma->node)) {
|
||||||
flags |= PIN_GLOBAL | PIN_MAPPABLE;
|
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
|
||||||
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
|
flags |= PIN_GLOBAL | PIN_MAPPABLE;
|
||||||
flags |= PIN_GLOBAL;
|
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
|
||||||
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
|
flags |= PIN_GLOBAL;
|
||||||
flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
|
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
|
||||||
|
flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
|
||||||
|
}
|
||||||
|
|
||||||
ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
|
ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
|
||||||
|
if ((ret == -ENOSPC || ret == -E2BIG) &&
|
||||||
|
only_mappable_for_reloc(entry->flags))
|
||||||
|
ret = i915_gem_object_pin(obj, vma->vm,
|
||||||
|
entry->alignment,
|
||||||
|
flags & ~(PIN_GLOBAL | PIN_MAPPABLE));
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
@ -605,13 +668,14 @@ eb_vma_misplaced(struct i915_vma *vma)
|
||||||
vma->node.start & (entry->alignment - 1))
|
vma->node.start & (entry->alignment - 1))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
|
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
|
||||||
vma->node.start < BATCH_OFFSET_BIAS)
|
vma->node.start < BATCH_OFFSET_BIAS)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
/* avoid costly ping-pong once a batch bo ended up non-mappable */
|
||||||
|
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
|
||||||
|
return !only_mappable_for_reloc(entry->flags);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue