Merge tag 'drm-intel-gt-next-2022-03-03' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Cross-subsystem Changes: - drm-next backmerge for buddy allocator changes Driver Changes: - Skip i915_perf init for DG2 as it is not yet enabled (Ram) - Add missing workarounds for DG2 (Clint) - Add 64K page/align support for platforms like DG2 that require it (Matt A, Ram, Bob) - Add accelerated migration support for DG2 (Matt A) - Add flat CCS support for XeHP SDV (Abdiel, Ram) - Add Compute Command Streamer (CCS) engine support for XeHP SDV (Michel, Daniele, Aravind, Matt R) - Don't support parallel submission on compute / render (Matt B, Matt R) - Disable i915 build on PREEMPT_RT until RT behaviour fixed (Sebastian) - Remove RPS interrupt support for TGL+ (Jose) - Fix S/R with PM_EARLY for non-GTT mappable objects on DG2 (Matt, Lucas) - Skip stolen memory init if it is fully reserved (Jose) - Use iosys_map for GuC data structures that may be in LMEM BAR or SMEM (Lucas) - Do not complain about stale GuC reset notifications for banned contexts (John) - Move context descriptor fields to intel_lrc.h - Start adding support for small BAR (Matt A) - Clarify vma lifetime (Thomas) - Simplify subplatform detection on TGL (Jose) - Correct the param count for unset GuC SLPC param (Vinay, Umesh) - Read RP_STATE_CAP correctly on Gen12 with GuC SLPC (Vinay) - Initialize GuC submission locks and queues early (Daniele) - Fix GuC flag query helper function to not modify state (John) - Drop fake lmem support now we have real hardware available (Lucas) - Move misplaced W/A to their correct locations (Srinivasan) - Use get_reset_domain() helper (Tejas) - Move context descriptor fields to intel_lrc.h (Matt R) - Selftest improvements (Matt A) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/YiBzY1dM7bKwMQ3H@jlahtine-mobl.ger.corp.intel.com
This commit is contained in:
commit
2ab82efeee
|
@ -3,6 +3,7 @@ config DRM_I915
|
|||
tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
|
||||
depends on DRM
|
||||
depends on X86 && PCI
|
||||
depends on !PREEMPT_RT
|
||||
select INTEL_GTT
|
||||
select INTERVAL_TREE
|
||||
# we need shmfs for the swappable backing store, and in particular
|
||||
|
|
|
@ -19,11 +19,3 @@ config DRM_I915_UNSTABLE
|
|||
Recommended for driver developers _only_.
|
||||
|
||||
If in the slightest bit of doubt, say "N".
|
||||
|
||||
config DRM_I915_UNSTABLE_FAKE_LMEM
|
||||
bool "Enable the experimental fake lmem"
|
||||
depends on DRM_I915_UNSTABLE
|
||||
default n
|
||||
help
|
||||
Convert some system memory into a fake local memory region for
|
||||
testing.
|
||||
|
|
|
@ -1321,7 +1321,7 @@ tgl_get_combo_buf_trans_dp(struct intel_encoder *encoder,
|
|||
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
|
||||
|
||||
if (crtc_state->port_clock > 270000) {
|
||||
if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) {
|
||||
if (IS_TGL_UY(dev_priv)) {
|
||||
return intel_get_buf_trans(&tgl_uy_combo_phy_trans_dp_hbr2,
|
||||
n_entries);
|
||||
} else {
|
||||
|
|
|
@ -265,7 +265,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
|
|||
struct intel_memory_region *mem = obj->mm.region;
|
||||
|
||||
info->apertures->ranges[0].base = mem->io_start;
|
||||
info->apertures->ranges[0].size = mem->total;
|
||||
info->apertures->ranges[0].size = mem->io_size;
|
||||
|
||||
/* Use fbdev's framebuffer from lmem for discrete */
|
||||
info->fix.smem_start =
|
||||
|
|
|
@ -670,6 +670,16 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
|
|||
goto out_err;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't support breadcrumb handshake on these
|
||||
* classes
|
||||
*/
|
||||
if (siblings[n]->class == RENDER_CLASS ||
|
||||
siblings[n]->class == COMPUTE_CLASS) {
|
||||
err = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
if (n) {
|
||||
if (prev_engine.engine_class !=
|
||||
ci.engine_class) {
|
||||
|
|
|
@ -272,12 +272,6 @@ void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
|
|||
if (!list_empty(&obj->vma.list)) {
|
||||
struct i915_vma *vma;
|
||||
|
||||
/*
|
||||
* Note that the vma keeps an object reference while
|
||||
* it is active, so it *should* not sleep while we
|
||||
* destroy it. Our debug code errs insits it *might*.
|
||||
* For the moment, play along.
|
||||
*/
|
||||
spin_lock(&obj->vma.lock);
|
||||
while ((vma = list_first_entry_or_null(&obj->vma.list,
|
||||
struct i915_vma,
|
||||
|
@ -285,13 +279,7 @@ void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
|
|||
GEM_BUG_ON(vma->obj != obj);
|
||||
spin_unlock(&obj->vma.lock);
|
||||
|
||||
/* Verify that the vma is unbound under the vm mutex. */
|
||||
mutex_lock(&vma->vm->mutex);
|
||||
atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
|
||||
__i915_vma_unbind(vma);
|
||||
mutex_unlock(&vma->vm->mutex);
|
||||
|
||||
__i915_vma_put(vma);
|
||||
i915_vma_destroy(vma);
|
||||
|
||||
spin_lock(&obj->vma.lock);
|
||||
}
|
||||
|
|
|
@ -319,16 +319,23 @@ struct drm_i915_gem_object {
|
|||
#define I915_BO_ALLOC_PM_VOLATILE BIT(4)
|
||||
/* Object needs to be restored early using memcpy during resume */
|
||||
#define I915_BO_ALLOC_PM_EARLY BIT(5)
|
||||
/*
|
||||
* Object is likely never accessed by the CPU. This will prioritise the BO to be
|
||||
* allocated in the non-mappable portion of lmem. This is merely a hint, and if
|
||||
* dealing with userspace objects the CPU fault handler is free to ignore this.
|
||||
*/
|
||||
#define I915_BO_ALLOC_GPU_ONLY BIT(6)
|
||||
#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
|
||||
I915_BO_ALLOC_VOLATILE | \
|
||||
I915_BO_ALLOC_CPU_CLEAR | \
|
||||
I915_BO_ALLOC_USER | \
|
||||
I915_BO_ALLOC_PM_VOLATILE | \
|
||||
I915_BO_ALLOC_PM_EARLY)
|
||||
#define I915_BO_READONLY BIT(6)
|
||||
#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */
|
||||
#define I915_BO_PROTECTED BIT(8)
|
||||
#define I915_BO_WAS_BOUND_BIT 9
|
||||
I915_BO_ALLOC_PM_EARLY | \
|
||||
I915_BO_ALLOC_GPU_ONLY)
|
||||
#define I915_BO_READONLY BIT(7)
|
||||
#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
|
||||
#define I915_BO_PROTECTED BIT(9)
|
||||
#define I915_BO_WAS_BOUND_BIT 10
|
||||
/**
|
||||
* @mem_flags - Mutable placement-related flags
|
||||
*
|
||||
|
|
|
@ -358,6 +358,9 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
|
|||
!i915_gem_object_has_iomem(obj))
|
||||
return ERR_PTR(-ENXIO);
|
||||
|
||||
if (WARN_ON_ONCE(obj->flags & I915_BO_ALLOC_GPU_ONLY))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
assert_object_held(obj);
|
||||
|
||||
pinned = !(type & I915_MAP_OVERRIDE);
|
||||
|
|
|
@ -45,6 +45,11 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
|
|||
|
||||
GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS);
|
||||
|
||||
if (WARN_ON_ONCE(flags & I915_BO_ALLOC_GPU_ONLY &&
|
||||
(flags & I915_BO_ALLOC_CPU_CLEAR ||
|
||||
flags & I915_BO_ALLOC_PM_EARLY)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (!mem)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
|
@ -67,6 +72,17 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
|
|||
if (!obj)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/*
|
||||
* Anything smaller than the min_page_size can't be freely inserted into
|
||||
* the GTT, due to alignemnt restrictions. For such special objects,
|
||||
* make sure we force memcpy based suspend-resume. In the future we can
|
||||
* revisit this, either by allowing special mis-aligned objects in the
|
||||
* migration path, or by mapping all of LMEM upfront using cheap 1G
|
||||
* GTT entries.
|
||||
*/
|
||||
if (default_page_size < mem->min_page_size)
|
||||
flags |= I915_BO_ALLOC_PM_EARLY;
|
||||
|
||||
err = mem->ops->init_object(mem, obj, size, page_size, flags);
|
||||
if (err)
|
||||
goto err_object_free;
|
||||
|
|
|
@ -699,7 +699,7 @@ struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915,
|
|||
{
|
||||
return intel_memory_region_create(i915, 0,
|
||||
totalram_pages() << PAGE_SHIFT,
|
||||
PAGE_SIZE, 0,
|
||||
PAGE_SIZE, 0, 0,
|
||||
type, instance,
|
||||
&shmem_region_ops);
|
||||
}
|
||||
|
|
|
@ -492,18 +492,22 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
|
|||
|
||||
/* Exclude the reserved region from driver use */
|
||||
mem->region.end = reserved_base - 1;
|
||||
mem->io_size = resource_size(&mem->region);
|
||||
|
||||
/* It is possible for the reserved area to end before the end of stolen
|
||||
* memory, so just consider the start. */
|
||||
reserved_total = stolen_top - reserved_base;
|
||||
|
||||
i915->stolen_usable_size =
|
||||
resource_size(&i915->dsm) - reserved_total;
|
||||
|
||||
drm_dbg(&i915->drm,
|
||||
"Memory reserved for graphics device: %lluK, usable: %lluK\n",
|
||||
(u64)resource_size(&i915->dsm) >> 10,
|
||||
((u64)resource_size(&i915->dsm) - reserved_total) >> 10);
|
||||
(u64)i915->stolen_usable_size >> 10);
|
||||
|
||||
i915->stolen_usable_size =
|
||||
resource_size(&i915->dsm) - reserved_total;
|
||||
if (i915->stolen_usable_size == 0)
|
||||
return 0;
|
||||
|
||||
/* Basic memrange allocator for stolen space. */
|
||||
drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size);
|
||||
|
@ -748,7 +752,7 @@ static int init_stolen_lmem(struct intel_memory_region *mem)
|
|||
|
||||
if (!io_mapping_init_wc(&mem->iomap,
|
||||
mem->io_start,
|
||||
resource_size(&mem->region)))
|
||||
mem->io_size))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
|
@ -803,7 +807,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
|
|||
I915_GTT_PAGE_SIZE_4K;
|
||||
|
||||
mem = intel_memory_region_create(i915, lmem_base, lmem_size,
|
||||
min_page_size, io_start,
|
||||
min_page_size,
|
||||
io_start, lmem_size,
|
||||
type, instance,
|
||||
&i915_region_stolen_lmem_ops);
|
||||
if (IS_ERR(mem))
|
||||
|
@ -834,7 +839,7 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
|
|||
mem = intel_memory_region_create(i915,
|
||||
intel_graphics_stolen_res.start,
|
||||
resource_size(&intel_graphics_stolen_res),
|
||||
PAGE_SIZE, 0, type, instance,
|
||||
PAGE_SIZE, 0, 0, type, instance,
|
||||
&i915_region_stolen_smem_ops);
|
||||
if (IS_ERR(mem))
|
||||
return mem;
|
||||
|
|
|
@ -7,8 +7,10 @@
|
|||
|
||||
#include <drm/ttm/ttm_bo_driver.h>
|
||||
#include <drm/ttm/ttm_placement.h>
|
||||
#include <drm/drm_buddy.h>
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "i915_ttm_buddy_manager.h"
|
||||
#include "intel_memory_region.h"
|
||||
#include "intel_region_ttm.h"
|
||||
|
||||
|
@ -22,6 +24,7 @@
|
|||
#define I915_TTM_PRIO_PURGE 0
|
||||
#define I915_TTM_PRIO_NO_PAGES 1
|
||||
#define I915_TTM_PRIO_HAS_PAGES 2
|
||||
#define I915_TTM_PRIO_NEEDS_CPU_ACCESS 3
|
||||
|
||||
/*
|
||||
* Size of struct ttm_place vector in on-stack struct ttm_placement allocs
|
||||
|
@ -129,7 +132,15 @@ i915_ttm_place_from_region(const struct intel_memory_region *mr,
|
|||
place->mem_type = intel_region_to_ttm_type(mr);
|
||||
|
||||
if (flags & I915_BO_ALLOC_CONTIGUOUS)
|
||||
place->flags = TTM_PL_FLAG_CONTIGUOUS;
|
||||
place->flags |= TTM_PL_FLAG_CONTIGUOUS;
|
||||
if (mr->io_size && mr->io_size < mr->total) {
|
||||
if (flags & I915_BO_ALLOC_GPU_ONLY) {
|
||||
place->flags |= TTM_PL_FLAG_TOPDOWN;
|
||||
} else {
|
||||
place->fpfn = 0;
|
||||
place->lpfn = mr->io_size >> PAGE_SHIFT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -331,6 +342,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
|
|||
const struct ttm_place *place)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
|
||||
struct ttm_resource *res = bo->resource;
|
||||
|
||||
if (!obj)
|
||||
return false;
|
||||
|
@ -344,7 +356,48 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
|
|||
return false;
|
||||
|
||||
/* Will do for now. Our pinned objects are still on TTM's LRU lists */
|
||||
return i915_gem_object_evictable(obj);
|
||||
if (!i915_gem_object_evictable(obj))
|
||||
return false;
|
||||
|
||||
switch (res->mem_type) {
|
||||
case I915_PL_LMEM0: {
|
||||
struct ttm_resource_manager *man =
|
||||
ttm_manager_type(bo->bdev, res->mem_type);
|
||||
struct i915_ttm_buddy_resource *bman_res =
|
||||
to_ttm_buddy_resource(res);
|
||||
struct drm_buddy *mm = bman_res->mm;
|
||||
struct drm_buddy_block *block;
|
||||
|
||||
if (!place->fpfn && !place->lpfn)
|
||||
return true;
|
||||
|
||||
GEM_BUG_ON(!place->lpfn);
|
||||
|
||||
/*
|
||||
* If we just want something mappable then we can quickly check
|
||||
* if the current victim resource is using any of the CPU
|
||||
* visible portion.
|
||||
*/
|
||||
if (!place->fpfn &&
|
||||
place->lpfn == i915_ttm_buddy_man_visible_size(man))
|
||||
return bman_res->used_visible_size > 0;
|
||||
|
||||
/* Real range allocation */
|
||||
list_for_each_entry(block, &bman_res->blocks, link) {
|
||||
unsigned long fpfn =
|
||||
drm_buddy_block_offset(block) >> PAGE_SHIFT;
|
||||
unsigned long lpfn = fpfn +
|
||||
(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
|
||||
|
||||
if (place->fpfn < lpfn && place->lpfn > fpfn)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
} default:
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void i915_ttm_evict_flags(struct ttm_buffer_object *bo,
|
||||
|
@ -585,11 +638,24 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
|
|||
i915_ttm_purge(obj);
|
||||
}
|
||||
|
||||
static bool i915_ttm_resource_mappable(struct ttm_resource *res)
|
||||
{
|
||||
struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
|
||||
|
||||
if (!i915_ttm_cpu_maps_iomem(res))
|
||||
return true;
|
||||
|
||||
return bman_res->used_visible_size == bman_res->base.num_pages;
|
||||
}
|
||||
|
||||
static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem)
|
||||
{
|
||||
if (!i915_ttm_cpu_maps_iomem(mem))
|
||||
return 0;
|
||||
|
||||
if (!i915_ttm_resource_mappable(mem))
|
||||
return -EINVAL;
|
||||
|
||||
mem->bus.caching = ttm_write_combined;
|
||||
mem->bus.is_iomem = true;
|
||||
|
||||
|
@ -728,14 +794,15 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj)
|
|||
* Gem forced migration using the i915_ttm_migrate() op, is allowed even
|
||||
* to regions that are not in the object's list of allowable placements.
|
||||
*/
|
||||
static int i915_ttm_migrate(struct drm_i915_gem_object *obj,
|
||||
struct intel_memory_region *mr)
|
||||
static int __i915_ttm_migrate(struct drm_i915_gem_object *obj,
|
||||
struct intel_memory_region *mr,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct ttm_place requested;
|
||||
struct ttm_placement placement;
|
||||
int ret;
|
||||
|
||||
i915_ttm_place_from_region(mr, &requested, obj->flags);
|
||||
i915_ttm_place_from_region(mr, &requested, flags);
|
||||
placement.num_placement = 1;
|
||||
placement.num_busy_placement = 1;
|
||||
placement.placement = &requested;
|
||||
|
@ -758,6 +825,12 @@ static int i915_ttm_migrate(struct drm_i915_gem_object *obj,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int i915_ttm_migrate(struct drm_i915_gem_object *obj,
|
||||
struct intel_memory_region *mr)
|
||||
{
|
||||
return __i915_ttm_migrate(obj, mr, obj->flags);
|
||||
}
|
||||
|
||||
static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
|
||||
struct sg_table *st)
|
||||
{
|
||||
|
@ -844,7 +917,23 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
|
|||
} else if (!i915_gem_object_has_pages(obj)) {
|
||||
bo->priority = I915_TTM_PRIO_NO_PAGES;
|
||||
} else {
|
||||
bo->priority = I915_TTM_PRIO_HAS_PAGES;
|
||||
struct ttm_resource_manager *man =
|
||||
ttm_manager_type(bo->bdev, bo->resource->mem_type);
|
||||
|
||||
/*
|
||||
* If we need to place an LMEM resource which doesn't need CPU
|
||||
* access then we should try not to victimize mappable objects
|
||||
* first, since we likely end up stealing more of the mappable
|
||||
* portion. And likewise when we try to find space for a mappble
|
||||
* object, we know not to ever victimize objects that don't
|
||||
* occupy any mappable pages.
|
||||
*/
|
||||
if (i915_ttm_cpu_maps_iomem(bo->resource) &&
|
||||
i915_ttm_buddy_man_visible_size(man) < man->size &&
|
||||
!(obj->flags & I915_BO_ALLOC_GPU_ONLY))
|
||||
bo->priority = I915_TTM_PRIO_NEEDS_CPU_ACCESS;
|
||||
else
|
||||
bo->priority = I915_TTM_PRIO_HAS_PAGES;
|
||||
}
|
||||
|
||||
ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
|
||||
|
@ -900,6 +989,31 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
|
|||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
if (!i915_ttm_resource_mappable(bo->resource)) {
|
||||
int err = -ENODEV;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < obj->mm.n_placements; i++) {
|
||||
struct intel_memory_region *mr = obj->mm.placements[i];
|
||||
unsigned int flags;
|
||||
|
||||
if (!mr->io_size && mr->type != INTEL_MEMORY_SYSTEM)
|
||||
continue;
|
||||
|
||||
flags = obj->flags;
|
||||
flags &= ~I915_BO_ALLOC_GPU_ONLY;
|
||||
err = __i915_ttm_migrate(obj, mr, flags);
|
||||
if (!err)
|
||||
break;
|
||||
}
|
||||
|
||||
if (err) {
|
||||
drm_dbg(dev, "Unable to make resource CPU accessible\n");
|
||||
dma_resv_unlock(bo->base.resv);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
}
|
||||
|
||||
if (drm_dev_enter(dev, &idx)) {
|
||||
ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
|
||||
TTM_BO_VM_NUM_PREFAULT);
|
||||
|
@ -1103,7 +1217,7 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915,
|
|||
|
||||
mr = intel_memory_region_create(i915, 0,
|
||||
totalram_pages() << PAGE_SHIFT,
|
||||
PAGE_SIZE, 0,
|
||||
PAGE_SIZE, 0, 0,
|
||||
type, instance,
|
||||
&ttm_system_region_ops);
|
||||
if (IS_ERR(mr))
|
||||
|
|
|
@ -500,7 +500,7 @@ static int igt_mock_memory_region_huge_pages(void *arg)
|
|||
int bit;
|
||||
int err = 0;
|
||||
|
||||
mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
|
||||
mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
|
||||
if (IS_ERR(mem)) {
|
||||
pr_err("%s failed to create memory region\n", __func__);
|
||||
return PTR_ERR(mem);
|
||||
|
@ -1345,7 +1345,7 @@ try_again:
|
|||
|
||||
err = i915_gem_object_pin_pages_unlocked(obj);
|
||||
if (err) {
|
||||
if (err == -ENXIO || err == -E2BIG) {
|
||||
if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
|
||||
i915_gem_object_put(obj);
|
||||
size >>= 1;
|
||||
goto try_again;
|
||||
|
@ -1479,6 +1479,65 @@ out:
|
|||
return err;
|
||||
}
|
||||
|
||||
static int igt_ppgtt_compact(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
struct drm_i915_gem_object *obj;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Simple test to catch issues with compact 64K pages -- since the pt is
|
||||
* compacted to 256B that gives us 32 entries per pt, however since the
|
||||
* backing page for the pt is 4K, any extra entries we might incorrectly
|
||||
* write out should be ignored by the HW. If ever hit such a case this
|
||||
* test should catch it since some of our writes would land in scratch.
|
||||
*/
|
||||
|
||||
if (!HAS_64K_PAGES(i915)) {
|
||||
pr_info("device lacks compact 64K page support, skipping\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!HAS_LMEM(i915)) {
|
||||
pr_info("device lacks LMEM support, skipping\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* We want the range to cover multiple page-table boundaries. */
|
||||
obj = i915_gem_object_create_lmem(i915, SZ_4M, 0);
|
||||
if (IS_ERR(obj))
|
||||
return PTR_ERR(obj);
|
||||
|
||||
err = i915_gem_object_pin_pages_unlocked(obj);
|
||||
if (err)
|
||||
goto out_put;
|
||||
|
||||
if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
|
||||
pr_info("LMEM compact unable to allocate huge-page(s)\n");
|
||||
goto out_unpin;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable 2M GTT pages by forcing the page-size to 64K for the GTT
|
||||
* insertion.
|
||||
*/
|
||||
obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K;
|
||||
|
||||
err = igt_write_huge(i915, obj);
|
||||
if (err)
|
||||
pr_err("LMEM compact write-huge failed\n");
|
||||
|
||||
out_unpin:
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
out_put:
|
||||
i915_gem_object_put(obj);
|
||||
|
||||
if (err == -ENOMEM)
|
||||
err = 0;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_tmpfs_fallback(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
|
@ -1736,6 +1795,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
|
|||
SUBTEST(igt_tmpfs_fallback),
|
||||
SUBTEST(igt_ppgtt_smoke_huge),
|
||||
SUBTEST(igt_ppgtt_sanity_check),
|
||||
SUBTEST(igt_ppgtt_compact),
|
||||
};
|
||||
|
||||
if (!HAS_PPGTT(i915)) {
|
||||
|
|
|
@ -40,6 +40,7 @@ struct tiled_blits {
|
|||
struct blit_buffer scratch;
|
||||
struct i915_vma *batch;
|
||||
u64 hole;
|
||||
u64 align;
|
||||
u32 width;
|
||||
u32 height;
|
||||
};
|
||||
|
@ -411,14 +412,19 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng)
|
|||
goto err_free;
|
||||
}
|
||||
|
||||
hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4);
|
||||
t->align = i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL);
|
||||
t->align = max(t->align,
|
||||
i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM));
|
||||
|
||||
hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align);
|
||||
hole_size *= 2; /* room to maneuver */
|
||||
hole_size += 2 * I915_GTT_MIN_ALIGNMENT;
|
||||
hole_size += 2 * t->align; /* padding on either side */
|
||||
|
||||
mutex_lock(&t->ce->vm->mutex);
|
||||
memset(&hole, 0, sizeof(hole));
|
||||
err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole,
|
||||
hole_size, 0, I915_COLOR_UNEVICTABLE,
|
||||
hole_size, t->align,
|
||||
I915_COLOR_UNEVICTABLE,
|
||||
0, U64_MAX,
|
||||
DRM_MM_INSERT_BEST);
|
||||
if (!err)
|
||||
|
@ -429,7 +435,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng)
|
|||
goto err_put;
|
||||
}
|
||||
|
||||
t->hole = hole.start + I915_GTT_MIN_ALIGNMENT;
|
||||
t->hole = hole.start + t->align;
|
||||
pr_info("Using hole at %llx\n", t->hole);
|
||||
|
||||
err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng);
|
||||
|
@ -456,7 +462,7 @@ static void tiled_blits_destroy(struct tiled_blits *t)
|
|||
static int tiled_blits_prepare(struct tiled_blits *t,
|
||||
struct rnd_state *prng)
|
||||
{
|
||||
u64 offset = PAGE_ALIGN(t->width * t->height * 4);
|
||||
u64 offset = round_up(t->width * t->height * 4, t->align);
|
||||
u32 *map;
|
||||
int err;
|
||||
int i;
|
||||
|
@ -487,8 +493,7 @@ static int tiled_blits_prepare(struct tiled_blits *t,
|
|||
|
||||
static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
|
||||
{
|
||||
u64 offset =
|
||||
round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT);
|
||||
u64 offset = round_up(t->width * t->height * 4, 2 * t->align);
|
||||
int err;
|
||||
|
||||
/* We want to check position invariant tiling across GTT eviction */
|
||||
|
@ -501,7 +506,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
|
|||
|
||||
/* Reposition so that we overlap the old addresses, and slightly off */
|
||||
err = tiled_blit(t,
|
||||
&t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT,
|
||||
&t->buffers[2], t->hole + t->align,
|
||||
&t->buffers[1], t->hole + 3 * offset / 2);
|
||||
if (err)
|
||||
return err;
|
||||
|
|
|
@ -885,7 +885,9 @@ out_file:
|
|||
return err;
|
||||
}
|
||||
|
||||
static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma)
|
||||
static int rpcs_query_batch(struct drm_i915_gem_object *rpcs,
|
||||
struct i915_vma *vma,
|
||||
struct intel_engine_cs *engine)
|
||||
{
|
||||
u32 *cmd;
|
||||
|
||||
|
@ -896,7 +898,7 @@ static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *v
|
|||
return PTR_ERR(cmd);
|
||||
|
||||
*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
|
||||
*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE));
|
||||
*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base));
|
||||
*cmd++ = lower_32_bits(vma->node.start);
|
||||
*cmd++ = upper_32_bits(vma->node.start);
|
||||
*cmd = MI_BATCH_BUFFER_END;
|
||||
|
@ -957,7 +959,7 @@ retry:
|
|||
if (err)
|
||||
goto err_vma;
|
||||
|
||||
err = rpcs_query_batch(rpcs, vma);
|
||||
err = rpcs_query_batch(rpcs, vma, ce->engine);
|
||||
if (err)
|
||||
goto err_batch;
|
||||
|
||||
|
|
|
@ -8,10 +8,13 @@
|
|||
|
||||
#include "gem/i915_gem_internal.h"
|
||||
#include "gem/i915_gem_region.h"
|
||||
#include "gem/i915_gem_ttm.h"
|
||||
#include "gt/intel_engine_pm.h"
|
||||
#include "gt/intel_gpu_commands.h"
|
||||
#include "gt/intel_gt.h"
|
||||
#include "gt/intel_gt_pm.h"
|
||||
#include "gt/intel_migrate.h"
|
||||
#include "i915_ttm_buddy_manager.h"
|
||||
|
||||
#include "huge_gem_object.h"
|
||||
#include "i915_selftest.h"
|
||||
|
@ -169,7 +172,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
|
|||
|
||||
out:
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
__i915_vma_put(vma);
|
||||
i915_vma_destroy(vma);
|
||||
i915_gem_object_unlock(obj);
|
||||
return err;
|
||||
}
|
||||
|
@ -266,7 +269,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
|
|||
return err;
|
||||
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
__i915_vma_put(vma);
|
||||
i915_vma_destroy(vma);
|
||||
i915_gem_object_unlock(obj);
|
||||
|
||||
if (igt_timeout(end_time,
|
||||
|
@ -1001,6 +1004,331 @@ static int igt_mmap(void *arg)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void igt_close_objects(struct drm_i915_private *i915,
|
||||
struct list_head *objects)
|
||||
{
|
||||
struct drm_i915_gem_object *obj, *on;
|
||||
|
||||
list_for_each_entry_safe(obj, on, objects, st_link) {
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
if (i915_gem_object_has_pinned_pages(obj))
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
/* No polluting the memory region between tests */
|
||||
__i915_gem_object_put_pages(obj);
|
||||
i915_gem_object_unlock(obj);
|
||||
list_del(&obj->st_link);
|
||||
i915_gem_object_put(obj);
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
|
||||
i915_gem_drain_freed_objects(i915);
|
||||
}
|
||||
|
||||
static void igt_make_evictable(struct list_head *objects)
|
||||
{
|
||||
struct drm_i915_gem_object *obj;
|
||||
|
||||
list_for_each_entry(obj, objects, st_link) {
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
if (i915_gem_object_has_pinned_pages(obj))
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
i915_gem_object_unlock(obj);
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
static int igt_fill_mappable(struct intel_memory_region *mr,
|
||||
struct list_head *objects)
|
||||
{
|
||||
u64 size, total;
|
||||
int err;
|
||||
|
||||
total = 0;
|
||||
size = mr->io_size;
|
||||
do {
|
||||
struct drm_i915_gem_object *obj;
|
||||
|
||||
obj = i915_gem_object_create_region(mr, size, 0, 0);
|
||||
if (IS_ERR(obj)) {
|
||||
err = PTR_ERR(obj);
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
list_add(&obj->st_link, objects);
|
||||
|
||||
err = i915_gem_object_pin_pages_unlocked(obj);
|
||||
if (err) {
|
||||
if (err != -ENXIO && err != -ENOMEM)
|
||||
goto err_close;
|
||||
|
||||
if (size == mr->min_page_size) {
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
size >>= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
total += obj->base.size;
|
||||
} while (1);
|
||||
|
||||
pr_info("%s filled=%lluMiB\n", __func__, total >> 20);
|
||||
return 0;
|
||||
|
||||
err_close:
|
||||
igt_close_objects(mr->i915, objects);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ___igt_mmap_migrate(struct drm_i915_private *i915,
|
||||
struct drm_i915_gem_object *obj,
|
||||
unsigned long addr,
|
||||
bool unfaultable)
|
||||
{
|
||||
struct vm_area_struct *area;
|
||||
int err = 0, i;
|
||||
|
||||
pr_info("igt_mmap(%s, %d) @ %lx\n",
|
||||
obj->mm.region->name, I915_MMAP_TYPE_FIXED, addr);
|
||||
|
||||
mmap_read_lock(current->mm);
|
||||
area = vma_lookup(current->mm, addr);
|
||||
mmap_read_unlock(current->mm);
|
||||
if (!area) {
|
||||
pr_err("%s: Did not create a vm_area_struct for the mmap\n",
|
||||
obj->mm.region->name);
|
||||
err = -EINVAL;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
for (i = 0; i < obj->base.size / sizeof(u32); i++) {
|
||||
u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux)));
|
||||
u32 x;
|
||||
|
||||
if (get_user(x, ux)) {
|
||||
err = -EFAULT;
|
||||
if (!unfaultable) {
|
||||
pr_err("%s: Unable to read from mmap, offset:%zd\n",
|
||||
obj->mm.region->name, i * sizeof(x));
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unfaultable) {
|
||||
pr_err("%s: Faulted unmappable memory\n",
|
||||
obj->mm.region->name);
|
||||
err = -EINVAL;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
if (x != expand32(POISON_INUSE)) {
|
||||
pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n",
|
||||
obj->mm.region->name,
|
||||
i * sizeof(x), x, expand32(POISON_INUSE));
|
||||
err = -EINVAL;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
x = expand32(POISON_FREE);
|
||||
if (put_user(x, ux)) {
|
||||
pr_err("%s: Unable to write to mmap, offset:%zd\n",
|
||||
obj->mm.region->name, i * sizeof(x));
|
||||
err = -EFAULT;
|
||||
goto out_unmap;
|
||||
}
|
||||
}
|
||||
|
||||
if (unfaultable) {
|
||||
if (err == -EFAULT)
|
||||
err = 0;
|
||||
} else {
|
||||
obj->flags &= ~I915_BO_ALLOC_GPU_ONLY;
|
||||
err = wc_check(obj);
|
||||
}
|
||||
out_unmap:
|
||||
vm_munmap(addr, obj->base.size);
|
||||
return err;
|
||||
}
|
||||
|
||||
#define IGT_MMAP_MIGRATE_TOPDOWN (1 << 0)
|
||||
#define IGT_MMAP_MIGRATE_FILL (1 << 1)
|
||||
#define IGT_MMAP_MIGRATE_EVICTABLE (1 << 2)
|
||||
#define IGT_MMAP_MIGRATE_UNFAULTABLE (1 << 3)
|
||||
static int __igt_mmap_migrate(struct intel_memory_region **placements,
|
||||
int n_placements,
|
||||
struct intel_memory_region *expected_mr,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct drm_i915_private *i915 = placements[0]->i915;
|
||||
struct drm_i915_gem_object *obj;
|
||||
struct i915_request *rq = NULL;
|
||||
unsigned long addr;
|
||||
LIST_HEAD(objects);
|
||||
u64 offset;
|
||||
int err;
|
||||
|
||||
obj = __i915_gem_object_create_user(i915, PAGE_SIZE,
|
||||
placements,
|
||||
n_placements);
|
||||
if (IS_ERR(obj))
|
||||
return PTR_ERR(obj);
|
||||
|
||||
if (flags & IGT_MMAP_MIGRATE_TOPDOWN)
|
||||
obj->flags |= I915_BO_ALLOC_GPU_ONLY;
|
||||
|
||||
err = __assign_mmap_offset(obj, I915_MMAP_TYPE_FIXED, &offset, NULL);
|
||||
if (err)
|
||||
goto out_put;
|
||||
|
||||
/*
|
||||
* This will eventually create a GEM context, due to opening dummy drm
|
||||
* file, which needs a tiny amount of mappable device memory for the top
|
||||
* level paging structures(and perhaps scratch), so make sure we
|
||||
* allocate early, to avoid tears.
|
||||
*/
|
||||
addr = igt_mmap_offset(i915, offset, obj->base.size,
|
||||
PROT_WRITE, MAP_SHARED);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
err = addr;
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
if (flags & IGT_MMAP_MIGRATE_FILL) {
|
||||
err = igt_fill_mappable(placements[0], &objects);
|
||||
if (err)
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
err = i915_gem_object_lock(obj, NULL);
|
||||
if (err)
|
||||
goto out_put;
|
||||
|
||||
err = i915_gem_object_pin_pages(obj);
|
||||
if (err) {
|
||||
i915_gem_object_unlock(obj);
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL,
|
||||
obj->mm.pages->sgl, obj->cache_level,
|
||||
i915_gem_object_is_lmem(obj),
|
||||
expand32(POISON_INUSE), &rq);
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
if (rq) {
|
||||
dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
|
||||
i915_gem_object_set_moving_fence(obj, &rq->fence);
|
||||
i915_request_put(rq);
|
||||
}
|
||||
i915_gem_object_unlock(obj);
|
||||
if (err)
|
||||
goto out_put;
|
||||
|
||||
if (flags & IGT_MMAP_MIGRATE_EVICTABLE)
|
||||
igt_make_evictable(&objects);
|
||||
|
||||
err = ___igt_mmap_migrate(i915, obj, addr,
|
||||
flags & IGT_MMAP_MIGRATE_UNFAULTABLE);
|
||||
if (!err && obj->mm.region != expected_mr) {
|
||||
pr_err("%s region mismatch %s\n", __func__, expected_mr->name);
|
||||
err = -EINVAL;
|
||||
}
|
||||
|
||||
out_put:
|
||||
i915_gem_object_put(obj);
|
||||
igt_close_objects(i915, &objects);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_mmap_migrate(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
struct intel_memory_region *system = i915->mm.regions[INTEL_REGION_SMEM];
|
||||
struct intel_memory_region *mr;
|
||||
enum intel_region_id id;
|
||||
|
||||
for_each_memory_region(mr, i915, id) {
|
||||
struct intel_memory_region *mixed[] = { mr, system };
|
||||
struct intel_memory_region *single[] = { mr };
|
||||
struct ttm_resource_manager *man = mr->region_private;
|
||||
resource_size_t saved_io_size;
|
||||
int err;
|
||||
|
||||
if (mr->private)
|
||||
continue;
|
||||
|
||||
if (!mr->io_size)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* For testing purposes let's force small BAR, if not already
|
||||
* present.
|
||||
*/
|
||||
saved_io_size = mr->io_size;
|
||||
if (mr->io_size == mr->total) {
|
||||
resource_size_t io_size = mr->io_size;
|
||||
|
||||
io_size = rounddown_pow_of_two(io_size >> 1);
|
||||
if (io_size < PAGE_SIZE)
|
||||
continue;
|
||||
|
||||
mr->io_size = io_size;
|
||||
i915_ttm_buddy_man_force_visible_size(man,
|
||||
io_size >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate in the mappable portion, should be no suprises here.
|
||||
*/
|
||||
err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), mr, 0);
|
||||
if (err)
|
||||
goto out_io_size;
|
||||
|
||||
/*
|
||||
* Allocate in the non-mappable portion, but force migrating to
|
||||
* the mappable portion on fault (LMEM -> LMEM)
|
||||
*/
|
||||
err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr,
|
||||
IGT_MMAP_MIGRATE_TOPDOWN |
|
||||
IGT_MMAP_MIGRATE_FILL |
|
||||
IGT_MMAP_MIGRATE_EVICTABLE);
|
||||
if (err)
|
||||
goto out_io_size;
|
||||
|
||||
/*
|
||||
* Allocate in the non-mappable portion, but force spilling into
|
||||
* system memory on fault (LMEM -> SMEM)
|
||||
*/
|
||||
err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), system,
|
||||
IGT_MMAP_MIGRATE_TOPDOWN |
|
||||
IGT_MMAP_MIGRATE_FILL);
|
||||
if (err)
|
||||
goto out_io_size;
|
||||
|
||||
/*
|
||||
* Allocate in the non-mappable portion, but since the mappable
|
||||
* portion is already full, and we can't spill to system memory,
|
||||
* then we should expect the fault to fail.
|
||||
*/
|
||||
err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr,
|
||||
IGT_MMAP_MIGRATE_TOPDOWN |
|
||||
IGT_MMAP_MIGRATE_FILL |
|
||||
IGT_MMAP_MIGRATE_UNFAULTABLE);
|
||||
out_io_size:
|
||||
mr->io_size = saved_io_size;
|
||||
i915_ttm_buddy_man_force_visible_size(man,
|
||||
mr->io_size >> PAGE_SHIFT);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *repr_mmap_type(enum i915_mmap_type type)
|
||||
{
|
||||
switch (type) {
|
||||
|
@ -1426,6 +1754,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
|
|||
SUBTEST(igt_smoke_tiling),
|
||||
SUBTEST(igt_mmap_offset_exhaustion),
|
||||
SUBTEST(igt_mmap),
|
||||
SUBTEST(igt_mmap_migrate),
|
||||
SUBTEST(igt_mmap_access),
|
||||
SUBTEST(igt_mmap_revoke),
|
||||
SUBTEST(igt_mmap_gpu),
|
||||
|
|
|
@ -201,6 +201,8 @@ static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
|
|||
|
||||
int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
|
||||
{
|
||||
struct intel_engine_cs *engine = rq->engine;
|
||||
|
||||
if (mode & EMIT_FLUSH) {
|
||||
u32 flags = 0;
|
||||
u32 *cs;
|
||||
|
@ -219,6 +221,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
|
|||
|
||||
flags |= PIPE_CONTROL_CS_STALL;
|
||||
|
||||
if (engine->class == COMPUTE_CLASS)
|
||||
flags &= ~PIPE_CONTROL_3D_FLAGS;
|
||||
|
||||
cs = intel_ring_begin(rq, 6);
|
||||
if (IS_ERR(cs))
|
||||
return PTR_ERR(cs);
|
||||
|
@ -246,6 +251,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
|
|||
|
||||
flags |= PIPE_CONTROL_CS_STALL;
|
||||
|
||||
if (engine->class == COMPUTE_CLASS)
|
||||
flags &= ~PIPE_CONTROL_3D_FLAGS;
|
||||
|
||||
cs = intel_ring_begin(rq, 8 + 4);
|
||||
if (IS_ERR(cs))
|
||||
return PTR_ERR(cs);
|
||||
|
@ -618,19 +626,27 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
|
|||
|
||||
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
|
||||
{
|
||||
struct drm_i915_private *i915 = rq->engine->i915;
|
||||
u32 flags = (PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_TILE_CACHE_FLUSH |
|
||||
PIPE_CONTROL_FLUSH_L3 |
|
||||
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DC_FLUSH_ENABLE |
|
||||
PIPE_CONTROL_FLUSH_ENABLE);
|
||||
|
||||
if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
|
||||
/* Wa_1409600907 */
|
||||
flags |= PIPE_CONTROL_DEPTH_STALL;
|
||||
|
||||
if (rq->engine->class == COMPUTE_CLASS)
|
||||
flags &= ~PIPE_CONTROL_3D_FLAGS;
|
||||
|
||||
cs = gen12_emit_ggtt_write_rcs(cs,
|
||||
rq->fence.seqno,
|
||||
hwsp_offset(rq),
|
||||
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_TILE_CACHE_FLUSH |
|
||||
PIPE_CONTROL_FLUSH_L3 |
|
||||
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
/* Wa_1409600907:tgl */
|
||||
PIPE_CONTROL_DEPTH_STALL |
|
||||
PIPE_CONTROL_DC_FLUSH_ENABLE |
|
||||
PIPE_CONTROL_FLUSH_ENABLE);
|
||||
flags);
|
||||
|
||||
return gen12_emit_fini_breadcrumb_tail(rq, cs);
|
||||
}
|
||||
|
|
|
@ -233,6 +233,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
|
|||
start, end, lvl);
|
||||
} else {
|
||||
unsigned int count;
|
||||
unsigned int pte = gen8_pd_index(start, 0);
|
||||
unsigned int num_ptes;
|
||||
u64 *vaddr;
|
||||
|
||||
count = gen8_pt_count(start, end);
|
||||
|
@ -242,10 +244,18 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
|
|||
atomic_read(&pt->used));
|
||||
GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
|
||||
|
||||
num_ptes = count;
|
||||
if (pt->is_compact) {
|
||||
GEM_BUG_ON(num_ptes % 16);
|
||||
GEM_BUG_ON(pte % 16);
|
||||
num_ptes /= 16;
|
||||
pte /= 16;
|
||||
}
|
||||
|
||||
vaddr = px_vaddr(pt);
|
||||
memset64(vaddr + gen8_pd_index(start, 0),
|
||||
memset64(vaddr + pte,
|
||||
vm->scratch[0]->encode,
|
||||
count);
|
||||
num_ptes);
|
||||
|
||||
atomic_sub(count, &pt->used);
|
||||
start += count;
|
||||
|
@ -453,6 +463,95 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
|
|||
return idx;
|
||||
}
|
||||
|
||||
static void
|
||||
xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
|
||||
struct i915_vma_resource *vma_res,
|
||||
struct sgt_dma *iter,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
{
|
||||
const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
|
||||
unsigned int rem = sg_dma_len(iter->sg);
|
||||
u64 start = vma_res->start;
|
||||
|
||||
GEM_BUG_ON(!i915_vm_is_4lvl(vm));
|
||||
|
||||
do {
|
||||
struct i915_page_directory * const pdp =
|
||||
gen8_pdp_for_page_address(vm, start);
|
||||
struct i915_page_directory * const pd =
|
||||
i915_pd_entry(pdp, __gen8_pte_index(start, 2));
|
||||
struct i915_page_table *pt =
|
||||
i915_pt_entry(pd, __gen8_pte_index(start, 1));
|
||||
gen8_pte_t encode = pte_encode;
|
||||
unsigned int page_size;
|
||||
gen8_pte_t *vaddr;
|
||||
u16 index, max;
|
||||
|
||||
max = I915_PDES;
|
||||
|
||||
if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
|
||||
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
|
||||
rem >= I915_GTT_PAGE_SIZE_2M &&
|
||||
!__gen8_pte_index(start, 0)) {
|
||||
index = __gen8_pte_index(start, 1);
|
||||
encode |= GEN8_PDE_PS_2M;
|
||||
page_size = I915_GTT_PAGE_SIZE_2M;
|
||||
|
||||
vaddr = px_vaddr(pd);
|
||||
} else {
|
||||
if (encode & GEN12_PPGTT_PTE_LM) {
|
||||
GEM_BUG_ON(__gen8_pte_index(start, 0) % 16);
|
||||
GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K);
|
||||
GEM_BUG_ON(!IS_ALIGNED(iter->dma,
|
||||
I915_GTT_PAGE_SIZE_64K));
|
||||
|
||||
index = __gen8_pte_index(start, 0) / 16;
|
||||
page_size = I915_GTT_PAGE_SIZE_64K;
|
||||
|
||||
max /= 16;
|
||||
|
||||
vaddr = px_vaddr(pd);
|
||||
vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
|
||||
|
||||
pt->is_compact = true;
|
||||
} else {
|
||||
GEM_BUG_ON(pt->is_compact);
|
||||
index = __gen8_pte_index(start, 0);
|
||||
page_size = I915_GTT_PAGE_SIZE;
|
||||
}
|
||||
|
||||
vaddr = px_vaddr(pt);
|
||||
}
|
||||
|
||||
do {
|
||||
GEM_BUG_ON(rem < page_size);
|
||||
vaddr[index++] = encode | iter->dma;
|
||||
|
||||
start += page_size;
|
||||
iter->dma += page_size;
|
||||
rem -= page_size;
|
||||
if (iter->dma >= iter->max) {
|
||||
iter->sg = __sg_next(iter->sg);
|
||||
if (!iter->sg)
|
||||
break;
|
||||
|
||||
rem = sg_dma_len(iter->sg);
|
||||
if (!rem)
|
||||
break;
|
||||
|
||||
iter->dma = sg_dma_address(iter->sg);
|
||||
iter->max = iter->dma + rem;
|
||||
|
||||
if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
|
||||
break;
|
||||
}
|
||||
} while (rem >= page_size && index < max);
|
||||
|
||||
vma_res->page_sizes_gtt |= page_size;
|
||||
} while (iter->sg && sg_dma_len(iter->sg));
|
||||
}
|
||||
|
||||
static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
|
||||
struct i915_vma_resource *vma_res,
|
||||
struct sgt_dma *iter,
|
||||
|
@ -586,7 +685,10 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
|
|||
struct sgt_dma iter = sgt_dma(vma_res);
|
||||
|
||||
if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
|
||||
gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
|
||||
if (HAS_64K_PAGES(vm->i915))
|
||||
xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
|
||||
else
|
||||
gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
|
||||
} else {
|
||||
u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
|
||||
|
||||
|
@ -613,13 +715,56 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
|
|||
gen8_pdp_for_page_index(vm, idx);
|
||||
struct i915_page_directory *pd =
|
||||
i915_pd_entry(pdp, gen8_pd_index(idx, 2));
|
||||
struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
|
||||
gen8_pte_t *vaddr;
|
||||
|
||||
vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
|
||||
GEM_BUG_ON(pt->is_compact);
|
||||
|
||||
vaddr = px_vaddr(pt);
|
||||
vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
|
||||
clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
|
||||
}
|
||||
|
||||
static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
|
||||
dma_addr_t addr,
|
||||
u64 offset,
|
||||
enum i915_cache_level level,
|
||||
u32 flags)
|
||||
{
|
||||
u64 idx = offset >> GEN8_PTE_SHIFT;
|
||||
struct i915_page_directory * const pdp =
|
||||
gen8_pdp_for_page_index(vm, idx);
|
||||
struct i915_page_directory *pd =
|
||||
i915_pd_entry(pdp, gen8_pd_index(idx, 2));
|
||||
struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
|
||||
gen8_pte_t *vaddr;
|
||||
|
||||
GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
|
||||
GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
|
||||
|
||||
if (!pt->is_compact) {
|
||||
vaddr = px_vaddr(pd);
|
||||
vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
|
||||
pt->is_compact = true;
|
||||
}
|
||||
|
||||
vaddr = px_vaddr(pt);
|
||||
vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
|
||||
}
|
||||
|
||||
static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
|
||||
dma_addr_t addr,
|
||||
u64 offset,
|
||||
enum i915_cache_level level,
|
||||
u32 flags)
|
||||
{
|
||||
if (flags & PTE_LM)
|
||||
return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
|
||||
level, flags);
|
||||
|
||||
return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags);
|
||||
}
|
||||
|
||||
static int gen8_init_scratch(struct i915_address_space *vm)
|
||||
{
|
||||
u32 pte_flags;
|
||||
|
@ -819,7 +964,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
|
|||
|
||||
ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
|
||||
ppgtt->vm.insert_entries = gen8_ppgtt_insert;
|
||||
ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
|
||||
if (HAS_64K_PAGES(gt->i915))
|
||||
ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
|
||||
else
|
||||
ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
|
||||
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
|
||||
ppgtt->vm.clear_range = gen8_ppgtt_clear;
|
||||
ppgtt->vm.foreach = gen8_ppgtt_foreach;
|
||||
|
|
|
@ -265,6 +265,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine,
|
|||
|
||||
void intel_engine_destroy_pinned_context(struct intel_context *ce);
|
||||
|
||||
void xehp_enable_ccs_engines(struct intel_engine_cs *engine);
|
||||
|
||||
#define ENGINE_PHYSICAL 0
|
||||
#define ENGINE_MOCK 1
|
||||
#define ENGINE_VIRTUAL 2
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "intel_gt.h"
|
||||
#include "intel_gt_requests.h"
|
||||
#include "intel_gt_pm.h"
|
||||
#include "intel_lrc.h"
|
||||
#include "intel_lrc_reg.h"
|
||||
#include "intel_reset.h"
|
||||
#include "intel_ring.h"
|
||||
|
@ -156,6 +157,34 @@ static const struct engine_info intel_engines[] = {
|
|||
{ .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE }
|
||||
},
|
||||
},
|
||||
[CCS0] = {
|
||||
.class = COMPUTE_CLASS,
|
||||
.instance = 0,
|
||||
.mmio_bases = {
|
||||
{ .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE }
|
||||
}
|
||||
},
|
||||
[CCS1] = {
|
||||
.class = COMPUTE_CLASS,
|
||||
.instance = 1,
|
||||
.mmio_bases = {
|
||||
{ .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE }
|
||||
}
|
||||
},
|
||||
[CCS2] = {
|
||||
.class = COMPUTE_CLASS,
|
||||
.instance = 2,
|
||||
.mmio_bases = {
|
||||
{ .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE }
|
||||
}
|
||||
},
|
||||
[CCS3] = {
|
||||
.class = COMPUTE_CLASS,
|
||||
.instance = 3,
|
||||
.mmio_bases = {
|
||||
{ .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE }
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -180,6 +209,8 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
|
|||
BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
|
||||
|
||||
switch (class) {
|
||||
case COMPUTE_CLASS:
|
||||
fallthrough;
|
||||
case RENDER_CLASS:
|
||||
switch (GRAPHICS_VER(gt->i915)) {
|
||||
default:
|
||||
|
@ -293,6 +324,50 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
|
|||
GEM_DEBUG_WARN_ON(iir);
|
||||
}
|
||||
|
||||
static u32 get_reset_domain(u8 ver, enum intel_engine_id id)
|
||||
{
|
||||
u32 reset_domain;
|
||||
|
||||
if (ver >= 11) {
|
||||
static const u32 engine_reset_domains[] = {
|
||||
[RCS0] = GEN11_GRDOM_RENDER,
|
||||
[BCS0] = GEN11_GRDOM_BLT,
|
||||
[VCS0] = GEN11_GRDOM_MEDIA,
|
||||
[VCS1] = GEN11_GRDOM_MEDIA2,
|
||||
[VCS2] = GEN11_GRDOM_MEDIA3,
|
||||
[VCS3] = GEN11_GRDOM_MEDIA4,
|
||||
[VCS4] = GEN11_GRDOM_MEDIA5,
|
||||
[VCS5] = GEN11_GRDOM_MEDIA6,
|
||||
[VCS6] = GEN11_GRDOM_MEDIA7,
|
||||
[VCS7] = GEN11_GRDOM_MEDIA8,
|
||||
[VECS0] = GEN11_GRDOM_VECS,
|
||||
[VECS1] = GEN11_GRDOM_VECS2,
|
||||
[VECS2] = GEN11_GRDOM_VECS3,
|
||||
[VECS3] = GEN11_GRDOM_VECS4,
|
||||
[CCS0] = GEN11_GRDOM_RENDER,
|
||||
[CCS1] = GEN11_GRDOM_RENDER,
|
||||
[CCS2] = GEN11_GRDOM_RENDER,
|
||||
[CCS3] = GEN11_GRDOM_RENDER,
|
||||
};
|
||||
GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
|
||||
!engine_reset_domains[id]);
|
||||
reset_domain = engine_reset_domains[id];
|
||||
} else {
|
||||
static const u32 engine_reset_domains[] = {
|
||||
[RCS0] = GEN6_GRDOM_RENDER,
|
||||
[BCS0] = GEN6_GRDOM_BLT,
|
||||
[VCS0] = GEN6_GRDOM_MEDIA,
|
||||
[VCS1] = GEN8_GRDOM_MEDIA2,
|
||||
[VECS0] = GEN6_GRDOM_VECS,
|
||||
};
|
||||
GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
|
||||
!engine_reset_domains[id]);
|
||||
reset_domain = engine_reset_domains[id];
|
||||
}
|
||||
|
||||
return reset_domain;
|
||||
}
|
||||
|
||||
static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
|
||||
u8 logical_instance)
|
||||
{
|
||||
|
@ -328,38 +403,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
|
|||
engine->id = id;
|
||||
engine->legacy_idx = INVALID_ENGINE;
|
||||
engine->mask = BIT(id);
|
||||
if (GRAPHICS_VER(gt->i915) >= 11) {
|
||||
static const u32 engine_reset_domains[] = {
|
||||
[RCS0] = GEN11_GRDOM_RENDER,
|
||||
[BCS0] = GEN11_GRDOM_BLT,
|
||||
[VCS0] = GEN11_GRDOM_MEDIA,
|
||||
[VCS1] = GEN11_GRDOM_MEDIA2,
|
||||
[VCS2] = GEN11_GRDOM_MEDIA3,
|
||||
[VCS3] = GEN11_GRDOM_MEDIA4,
|
||||
[VCS4] = GEN11_GRDOM_MEDIA5,
|
||||
[VCS5] = GEN11_GRDOM_MEDIA6,
|
||||
[VCS6] = GEN11_GRDOM_MEDIA7,
|
||||
[VCS7] = GEN11_GRDOM_MEDIA8,
|
||||
[VECS0] = GEN11_GRDOM_VECS,
|
||||
[VECS1] = GEN11_GRDOM_VECS2,
|
||||
[VECS2] = GEN11_GRDOM_VECS3,
|
||||
[VECS3] = GEN11_GRDOM_VECS4,
|
||||
};
|
||||
GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
|
||||
!engine_reset_domains[id]);
|
||||
engine->reset_domain = engine_reset_domains[id];
|
||||
} else {
|
||||
static const u32 engine_reset_domains[] = {
|
||||
[RCS0] = GEN6_GRDOM_RENDER,
|
||||
[BCS0] = GEN6_GRDOM_BLT,
|
||||
[VCS0] = GEN6_GRDOM_MEDIA,
|
||||
[VCS1] = GEN8_GRDOM_MEDIA2,
|
||||
[VECS0] = GEN6_GRDOM_VECS,
|
||||
};
|
||||
GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
|
||||
!engine_reset_domains[id]);
|
||||
engine->reset_domain = engine_reset_domains[id];
|
||||
}
|
||||
engine->reset_domain = get_reset_domain(GRAPHICS_VER(gt->i915),
|
||||
id);
|
||||
engine->i915 = i915;
|
||||
engine->gt = gt;
|
||||
engine->uncore = gt->uncore;
|
||||
|
@ -389,6 +434,12 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
|
|||
if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
|
||||
engine->props.preempt_timeout_ms = 0;
|
||||
|
||||
/* features common between engines sharing EUs */
|
||||
if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
|
||||
engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
|
||||
engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
|
||||
}
|
||||
|
||||
engine->defaults = engine->props; /* never to change again */
|
||||
|
||||
engine->context_size = intel_engine_context_size(gt, engine->class);
|
||||
|
@ -541,6 +592,29 @@ bool gen11_vdbox_has_sfc(struct intel_gt *gt,
|
|||
return false;
|
||||
}
|
||||
|
||||
static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
|
||||
{
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
struct intel_gt_info *info = >->info;
|
||||
int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS;
|
||||
unsigned long ccs_mask;
|
||||
unsigned int i;
|
||||
|
||||
if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
|
||||
return;
|
||||
|
||||
ccs_mask = intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu),
|
||||
ss_per_ccs);
|
||||
/*
|
||||
* If all DSS in a quadrant are fused off, the corresponding CCS
|
||||
* engine is not available for use.
|
||||
*/
|
||||
for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) {
|
||||
info->engine_mask &= ~BIT(_CCS(i));
|
||||
drm_dbg(&i915->drm, "ccs%u fused off\n", i);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine which engines are fused off in our particular hardware.
|
||||
* Note that we have a catch-22 situation where we need to be able to access
|
||||
|
@ -622,6 +696,8 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
|
|||
vebox_mask, VEBOX_MASK(gt));
|
||||
GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
|
||||
|
||||
engine_mask_apply_compute_fuses(gt);
|
||||
|
||||
return info->engine_mask;
|
||||
}
|
||||
|
||||
|
@ -2019,6 +2095,23 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
|
|||
return active;
|
||||
}
|
||||
|
||||
void xehp_enable_ccs_engines(struct intel_engine_cs *engine)
|
||||
{
|
||||
/*
|
||||
* If there are any non-fused-off CCS engines, we need to enable CCS
|
||||
* support in the RCU_MODE register. This only needs to be done once,
|
||||
* so for simplicity we'll take care of this in the RCS engine's
|
||||
* resume handler; since the RCS and all CCS engines belong to the
|
||||
* same reset domain and are reset together, this will also take care
|
||||
* of re-applying the setting after i915-triggered resets.
|
||||
*/
|
||||
if (!CCS_MASK(engine->gt))
|
||||
return;
|
||||
|
||||
intel_uncore_write(engine->uncore, GEN12_RCU_MODE,
|
||||
_MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
#include "mock_engine.c"
|
||||
#include "selftest_engine.c"
|
||||
|
|
|
@ -33,7 +33,8 @@
|
|||
#define VIDEO_ENHANCEMENT_CLASS 2
|
||||
#define COPY_ENGINE_CLASS 3
|
||||
#define OTHER_CLASS 4
|
||||
#define MAX_ENGINE_CLASS 4
|
||||
#define COMPUTE_CLASS 5
|
||||
#define MAX_ENGINE_CLASS 5
|
||||
#define MAX_ENGINE_INSTANCE 7
|
||||
|
||||
#define I915_MAX_SLICES 3
|
||||
|
@ -95,6 +96,7 @@ struct i915_ctx_workarounds {
|
|||
|
||||
#define I915_MAX_VCS 8
|
||||
#define I915_MAX_VECS 4
|
||||
#define I915_MAX_CCS 4
|
||||
|
||||
/*
|
||||
* Engine IDs definitions.
|
||||
|
@ -117,6 +119,11 @@ enum intel_engine_id {
|
|||
VECS2,
|
||||
VECS3,
|
||||
#define _VECS(n) (VECS0 + (n))
|
||||
CCS0,
|
||||
CCS1,
|
||||
CCS2,
|
||||
CCS3,
|
||||
#define _CCS(n) (CCS0 + (n))
|
||||
I915_NUM_ENGINES
|
||||
#define INVALID_ENGINE ((enum intel_engine_id)-1)
|
||||
};
|
||||
|
@ -517,6 +524,8 @@ struct intel_engine_cs {
|
|||
#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
|
||||
#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
|
||||
#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8)
|
||||
#define I915_ENGINE_HAS_RCS_REG_STATE BIT(9)
|
||||
#define I915_ENGINE_HAS_EU_PRIORITY BIT(10)
|
||||
unsigned int flags;
|
||||
|
||||
/*
|
||||
|
|
|
@ -47,6 +47,7 @@ static const u8 uabi_classes[] = {
|
|||
[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
|
||||
[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
|
||||
[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
|
||||
/* TODO: Add COMPUTE_CLASS mapping once ABI is available */
|
||||
};
|
||||
|
||||
static int engine_cmp(void *priv, const struct list_head *A,
|
||||
|
@ -139,6 +140,7 @@ const char *intel_engine_class_repr(u8 class)
|
|||
[COPY_ENGINE_CLASS] = "bcs",
|
||||
[VIDEO_DECODE_CLASS] = "vcs",
|
||||
[VIDEO_ENHANCEMENT_CLASS] = "vecs",
|
||||
[COMPUTE_CLASS] = "ccs",
|
||||
};
|
||||
|
||||
if (class >= ARRAY_SIZE(uabi_names) || !uabi_names[class])
|
||||
|
@ -162,6 +164,7 @@ static int legacy_ring_idx(const struct legacy_ring *ring)
|
|||
[COPY_ENGINE_CLASS] = { BCS0, 1 },
|
||||
[VIDEO_DECODE_CLASS] = { VCS0, I915_MAX_VCS },
|
||||
[VIDEO_ENHANCEMENT_CLASS] = { VECS0, I915_MAX_VECS },
|
||||
[COMPUTE_CLASS] = { CCS0, I915_MAX_CCS },
|
||||
};
|
||||
|
||||
if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map)))
|
||||
|
@ -190,7 +193,7 @@ static void add_legacy_ring(struct legacy_ring *ring,
|
|||
void intel_engines_driver_register(struct drm_i915_private *i915)
|
||||
{
|
||||
struct legacy_ring ring = {};
|
||||
u8 uabi_instances[4] = {};
|
||||
u8 uabi_instances[5] = {};
|
||||
struct list_head *it, *next;
|
||||
struct rb_node **p, *prev;
|
||||
LIST_HEAD(engines);
|
||||
|
|
|
@ -665,9 +665,13 @@ static inline void execlists_schedule_out(struct i915_request *rq)
|
|||
static u64 execlists_update_context(struct i915_request *rq)
|
||||
{
|
||||
struct intel_context *ce = rq->context;
|
||||
u64 desc = ce->lrc.desc;
|
||||
u64 desc;
|
||||
u32 tail, prev;
|
||||
|
||||
desc = ce->lrc.desc;
|
||||
if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
|
||||
desc |= lrc_desc_priority(rq_prio(rq));
|
||||
|
||||
/*
|
||||
* WaIdleLiteRestore:bdw,skl
|
||||
*
|
||||
|
@ -2907,6 +2911,9 @@ static int execlists_resume(struct intel_engine_cs *engine)
|
|||
|
||||
enable_execlists(engine);
|
||||
|
||||
if (engine->class == RENDER_CLASS)
|
||||
xehp_enable_ccs_engines(engine);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3480,7 +3487,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
|
|||
logical_ring_default_vfuncs(engine);
|
||||
logical_ring_default_irqs(engine);
|
||||
|
||||
if (engine->class == RENDER_CLASS)
|
||||
if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
|
||||
rcs_submission_override(engine);
|
||||
|
||||
lrc_init_wa_ctx(engine);
|
||||
|
|
|
@ -228,11 +228,14 @@
|
|||
#define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) /* gen11+ */
|
||||
#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) /* gen11+ */
|
||||
#define PIPE_CONTROL_FLUSH_L3 (1<<27)
|
||||
#define PIPE_CONTROL_AMFS_FLUSH (1<<25) /* gen12+ */
|
||||
#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */
|
||||
#define PIPE_CONTROL_MMIO_WRITE (1<<23)
|
||||
#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21)
|
||||
#define PIPE_CONTROL_CS_STALL (1<<20)
|
||||
#define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19)
|
||||
#define PIPE_CONTROL_TLB_INVALIDATE (1<<18)
|
||||
#define PIPE_CONTROL_PSD_SYNC (1<<17) /* gen11+ */
|
||||
#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16)
|
||||
#define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14)
|
||||
#define PIPE_CONTROL_QW_WRITE (1<<14)
|
||||
|
@ -254,6 +257,18 @@
|
|||
#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0)
|
||||
#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
|
||||
|
||||
/* 3D-related flags can't be set on compute engine */
|
||||
#define PIPE_CONTROL_3D_FLAGS (\
|
||||
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
|
||||
PIPE_CONTROL_TILE_CACHE_FLUSH | \
|
||||
PIPE_CONTROL_DEPTH_STALL | \
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD | \
|
||||
PIPE_CONTROL_PSD_SYNC | \
|
||||
PIPE_CONTROL_AMFS_FLUSH | \
|
||||
PIPE_CONTROL_VF_CACHE_INVALIDATE | \
|
||||
PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
|
||||
|
||||
#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
|
||||
#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
|
||||
/* Opcodes for MI_MATH_INSTR */
|
||||
|
|
|
@ -65,8 +65,6 @@ int intel_gt_probe_lmem(struct intel_gt *gt)
|
|||
int err;
|
||||
|
||||
mem = intel_gt_setup_lmem(gt);
|
||||
if (mem == ERR_PTR(-ENODEV))
|
||||
mem = intel_gt_setup_fake_lmem(gt);
|
||||
if (IS_ERR(mem)) {
|
||||
err = PTR_ERR(mem);
|
||||
if (err == -ENODEV)
|
||||
|
@ -458,7 +456,9 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
|
|||
struct i915_vma *vma;
|
||||
int ret;
|
||||
|
||||
obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
|
||||
obj = i915_gem_object_create_lmem(i915, size,
|
||||
I915_BO_ALLOC_VOLATILE |
|
||||
I915_BO_ALLOC_GPU_ONLY);
|
||||
if (IS_ERR(obj))
|
||||
obj = i915_gem_object_create_stolen(i915, size);
|
||||
if (IS_ERR(obj))
|
||||
|
@ -913,6 +913,25 @@ u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg)
|
|||
return intel_uncore_read_fw(gt->uncore, reg);
|
||||
}
|
||||
|
||||
u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg)
|
||||
{
|
||||
int type;
|
||||
u8 sliceid, subsliceid;
|
||||
|
||||
for (type = 0; type < NUM_STEERING_TYPES; type++) {
|
||||
if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
|
||||
intel_gt_get_valid_steering(gt, type, &sliceid,
|
||||
&subsliceid);
|
||||
return intel_uncore_read_with_mcr_steering(gt->uncore,
|
||||
reg,
|
||||
sliceid,
|
||||
subsliceid);
|
||||
}
|
||||
}
|
||||
|
||||
return intel_uncore_read(gt->uncore, reg);
|
||||
}
|
||||
|
||||
void intel_gt_info_print(const struct intel_gt_info *info,
|
||||
struct drm_printer *p)
|
||||
{
|
||||
|
|
|
@ -85,6 +85,7 @@ static inline bool intel_gt_needs_read_steering(struct intel_gt *gt,
|
|||
}
|
||||
|
||||
u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg);
|
||||
u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg);
|
||||
|
||||
void intel_gt_info_print(const struct intel_gt_info *info,
|
||||
struct drm_printer *p);
|
||||
|
|
|
@ -100,7 +100,7 @@ gen11_gt_identity_handler(struct intel_gt *gt, const u32 identity)
|
|||
if (unlikely(!intr))
|
||||
return;
|
||||
|
||||
if (class <= COPY_ENGINE_CLASS)
|
||||
if (class <= COPY_ENGINE_CLASS || class == COMPUTE_CLASS)
|
||||
return gen11_engine_irq_handler(gt, class, instance, intr);
|
||||
|
||||
if (class == OTHER_CLASS)
|
||||
|
@ -182,6 +182,8 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
|
|||
/* Disable RCS, BCS, VCS and VECS class engines. */
|
||||
intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0);
|
||||
intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0);
|
||||
if (CCS_MASK(gt))
|
||||
intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, 0);
|
||||
|
||||
/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
|
||||
intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~0);
|
||||
|
@ -195,6 +197,10 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
|
|||
intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~0);
|
||||
if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
|
||||
intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~0);
|
||||
if (HAS_ENGINE(gt, CCS0) || HAS_ENGINE(gt, CCS1))
|
||||
intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~0);
|
||||
if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3))
|
||||
intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~0);
|
||||
|
||||
intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
|
||||
intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0);
|
||||
|
@ -225,6 +231,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
|
|||
/* Enable RCS, BCS, VCS and VECS class interrupts. */
|
||||
intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask);
|
||||
intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask);
|
||||
if (CCS_MASK(gt))
|
||||
intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, smask);
|
||||
|
||||
/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
|
||||
intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask);
|
||||
|
@ -238,6 +246,11 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
|
|||
intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~dmask);
|
||||
if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
|
||||
intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask);
|
||||
if (HAS_ENGINE(gt, CCS0) || HAS_ENGINE(gt, CCS1))
|
||||
intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~dmask);
|
||||
if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3))
|
||||
intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~dmask);
|
||||
|
||||
/*
|
||||
* RPS interrupts will get enabled/disabled on demand when RPS itself
|
||||
* is enabled/disabled.
|
||||
|
|
|
@ -465,6 +465,9 @@
|
|||
#define GEN9_PGCTL_SSB_EU210_ACK (1 << 12)
|
||||
#define GEN9_PGCTL_SSB_EU311_ACK (1 << 14)
|
||||
|
||||
#define VF_PREEMPTION _MMIO(0x83a4)
|
||||
#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
|
||||
|
||||
#define GEN8_RC6_CTX_INFO _MMIO(0x8504)
|
||||
|
||||
#define GEN12_SQCM _MMIO(0x8724)
|
||||
|
@ -1057,6 +1060,7 @@
|
|||
#define FLOW_CONTROL_ENABLE REG_BIT(15)
|
||||
#define UGM_BACKUP_MODE REG_BIT(13)
|
||||
#define MDQ_ARBITRATION_MODE REG_BIT(12)
|
||||
#define SYSTOLIC_DOP_CLOCK_GATING_DIS REG_BIT(10)
|
||||
#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE REG_BIT(8)
|
||||
#define STALL_DOP_GATING_DISABLE REG_BIT(5)
|
||||
#define THROTTLE_12_5 REG_GENMASK(4, 2)
|
||||
|
@ -1324,6 +1328,9 @@
|
|||
#define ECOBITS_PPGTT_CACHE64B (3 << 8)
|
||||
#define ECOBITS_PPGTT_CACHE4B (0 << 8)
|
||||
|
||||
#define GEN12_RCU_MODE _MMIO(0x14800)
|
||||
#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)
|
||||
|
||||
#define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168)
|
||||
#define CHV_FGT_DISABLE_SS0 (1 << 10)
|
||||
#define CHV_FGT_DISABLE_SS1 (1 << 11)
|
||||
|
@ -1449,6 +1456,10 @@
|
|||
#define GEN11_KCR (19)
|
||||
#define GEN11_GTPM (16)
|
||||
#define GEN11_BCS (15)
|
||||
#define GEN12_CCS3 (7)
|
||||
#define GEN12_CCS2 (6)
|
||||
#define GEN12_CCS1 (5)
|
||||
#define GEN12_CCS0 (4)
|
||||
#define GEN11_RCS0 (0)
|
||||
#define GEN11_VECS(x) (31 - (x))
|
||||
#define GEN11_VCS(x) (x)
|
||||
|
@ -1461,6 +1472,7 @@
|
|||
#define GEN11_GPM_WGBOXPERF_INTR_ENABLE _MMIO(0x19003c)
|
||||
#define GEN11_CRYPTO_RSVD_INTR_ENABLE _MMIO(0x190040)
|
||||
#define GEN11_GUNIT_CSME_INTR_ENABLE _MMIO(0x190044)
|
||||
#define GEN12_CCS_RSVD_INTR_ENABLE _MMIO(0x190048)
|
||||
|
||||
#define GEN11_INTR_IDENTITY_REG(x) _MMIO(0x190060 + ((x) * 4))
|
||||
#define GEN11_INTR_DATA_VALID (1 << 31)
|
||||
|
@ -1486,41 +1498,9 @@
|
|||
#define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec)
|
||||
#define GEN11_CRYPTO_RSVD_INTR_MASK _MMIO(0x1900f0)
|
||||
#define GEN11_GUNIT_CSME_INTR_MASK _MMIO(0x1900f4)
|
||||
#define GEN12_CCS0_CCS1_INTR_MASK _MMIO(0x190100)
|
||||
#define GEN12_CCS2_CCS3_INTR_MASK _MMIO(0x190104)
|
||||
|
||||
#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000)
|
||||
|
||||
enum {
|
||||
INTEL_ADVANCED_CONTEXT = 0,
|
||||
INTEL_LEGACY_32B_CONTEXT,
|
||||
INTEL_ADVANCED_AD_CONTEXT,
|
||||
INTEL_LEGACY_64B_CONTEXT
|
||||
};
|
||||
|
||||
enum {
|
||||
FAULT_AND_HANG = 0,
|
||||
FAULT_AND_HALT, /* Debug only */
|
||||
FAULT_AND_STREAM,
|
||||
FAULT_AND_CONTINUE /* Unsupported */
|
||||
};
|
||||
|
||||
#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12)
|
||||
#define GEN8_CTX_VALID (1 << 0)
|
||||
#define GEN8_CTX_FORCE_PD_RESTORE (1 << 1)
|
||||
#define GEN8_CTX_FORCE_RESTORE (1 << 2)
|
||||
#define GEN8_CTX_L3LLC_COHERENT (1 << 5)
|
||||
#define GEN8_CTX_PRIVILEGE (1 << 8)
|
||||
#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
|
||||
#define GEN8_CTX_ID_SHIFT 32
|
||||
#define GEN8_CTX_ID_WIDTH 21
|
||||
#define GEN11_SW_CTX_ID_SHIFT 37
|
||||
#define GEN11_SW_CTX_ID_WIDTH 11
|
||||
#define GEN11_ENGINE_CLASS_SHIFT 61
|
||||
#define GEN11_ENGINE_CLASS_WIDTH 3
|
||||
#define GEN11_ENGINE_INSTANCE_SHIFT 48
|
||||
#define GEN11_ENGINE_INSTANCE_WIDTH 6
|
||||
#define XEHP_SW_CTX_ID_SHIFT 39
|
||||
#define XEHP_SW_CTX_ID_WIDTH 16
|
||||
#define XEHP_SW_COUNTER_SHIFT 58
|
||||
#define XEHP_SW_COUNTER_WIDTH 6
|
||||
|
||||
#endif /* __INTEL_GT_REGS__ */
|
||||
|
|
|
@ -107,14 +107,19 @@ void __i915_vm_close(struct i915_address_space *vm)
|
|||
list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
|
||||
struct drm_i915_gem_object *obj = vma->obj;
|
||||
|
||||
/* Keep the obj (and hence the vma) alive as _we_ destroy it */
|
||||
if (!kref_get_unless_zero(&obj->base.refcount))
|
||||
if (!kref_get_unless_zero(&obj->base.refcount)) {
|
||||
/*
|
||||
* Unbind the dying vma to ensure the bound_list
|
||||
* is completely drained. We leave the destruction to
|
||||
* the object destructor.
|
||||
*/
|
||||
atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
|
||||
WARN_ON(__i915_vma_unbind(vma));
|
||||
continue;
|
||||
}
|
||||
|
||||
atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
|
||||
WARN_ON(__i915_vma_unbind(vma));
|
||||
__i915_vma_put(vma);
|
||||
|
||||
/* Keep the obj (and hence the vma) alive as _we_ destroy it */
|
||||
i915_vma_destroy_locked(vma);
|
||||
i915_gem_object_put(obj);
|
||||
}
|
||||
GEM_BUG_ON(!list_empty(&vm->bound_list));
|
||||
|
@ -225,6 +230,18 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
|
|||
|
||||
GEM_BUG_ON(!vm->total);
|
||||
drm_mm_init(&vm->mm, 0, vm->total);
|
||||
|
||||
memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
|
||||
ARRAY_SIZE(vm->min_alignment));
|
||||
|
||||
if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915)) {
|
||||
vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M;
|
||||
vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M;
|
||||
} else if (HAS_64K_PAGES(vm->i915)) {
|
||||
vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
|
||||
vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K;
|
||||
}
|
||||
|
||||
vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
|
||||
|
||||
INIT_LIST_HEAD(&vm->bound_list);
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
#include "i915_selftest.h"
|
||||
#include "i915_vma_resource.h"
|
||||
#include "i915_vma_types.h"
|
||||
#include "i915_params.h"
|
||||
#include "intel_memory_region.h"
|
||||
|
||||
#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
|
||||
|
||||
|
@ -90,6 +92,8 @@ typedef u64 gen8_pte_t;
|
|||
|
||||
#define GEN12_GGTT_PTE_LM BIT_ULL(1)
|
||||
|
||||
#define GEN12_PDE_64K BIT(6)
|
||||
|
||||
/*
|
||||
* Cacheability Control is a 4-bit value. The low three bits are stored in bits
|
||||
* 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
|
||||
|
@ -158,6 +162,7 @@ struct i915_page_table {
|
|||
atomic_t used;
|
||||
struct i915_page_table *stash;
|
||||
};
|
||||
bool is_compact;
|
||||
};
|
||||
|
||||
struct i915_page_directory {
|
||||
|
@ -195,6 +200,14 @@ void *__px_vaddr(struct drm_i915_gem_object *p);
|
|||
struct i915_vm_pt_stash {
|
||||
/* preallocated chains of page tables/directories */
|
||||
struct i915_page_table *pt[2];
|
||||
/*
|
||||
* Optionally override the alignment/size of the physical page that
|
||||
* contains each PT. If not set defaults back to the usual
|
||||
* I915_GTT_PAGE_SIZE_4K. This does not influence the other paging
|
||||
* structures. MUST be a power-of-two. ONLY applicable on discrete
|
||||
* platforms.
|
||||
*/
|
||||
int pt_sz;
|
||||
};
|
||||
|
||||
struct i915_vma_ops {
|
||||
|
@ -223,6 +236,7 @@ struct i915_address_space {
|
|||
struct device *dma;
|
||||
u64 total; /* size addr space maps (ex. 2GB for ggtt) */
|
||||
u64 reserved; /* size addr space reserved */
|
||||
u64 min_alignment[INTEL_MEMORY_STOLEN_LOCAL + 1];
|
||||
|
||||
unsigned int bind_async_flags;
|
||||
|
||||
|
@ -384,6 +398,25 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm)
|
|||
return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K);
|
||||
}
|
||||
|
||||
static inline u64 i915_vm_min_alignment(struct i915_address_space *vm,
|
||||
enum intel_memory_type type)
|
||||
{
|
||||
/* avoid INTEL_MEMORY_MOCK overflow */
|
||||
if ((int)type >= ARRAY_SIZE(vm->min_alignment))
|
||||
type = INTEL_MEMORY_SYSTEM;
|
||||
|
||||
return vm->min_alignment[type];
|
||||
}
|
||||
|
||||
static inline u64 i915_vm_obj_min_alignment(struct i915_address_space *vm,
|
||||
struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
|
||||
enum intel_memory_type type = mr ? mr->type : INTEL_MEMORY_SYSTEM;
|
||||
|
||||
return i915_vm_min_alignment(vm, type);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
i915_vm_has_cache_coloring(struct i915_address_space *vm)
|
||||
{
|
||||
|
@ -570,7 +603,7 @@ void free_scratch(struct i915_address_space *vm);
|
|||
|
||||
struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz);
|
||||
struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz);
|
||||
struct i915_page_table *alloc_pt(struct i915_address_space *vm);
|
||||
struct i915_page_table *alloc_pt(struct i915_address_space *vm, int sz);
|
||||
struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
|
||||
struct i915_page_directory *__alloc_pd(int npde);
|
||||
|
||||
|
|
|
@ -623,7 +623,7 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
|
|||
GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
|
||||
!intel_engine_has_relative_mmio(engine));
|
||||
|
||||
if (engine->class == RENDER_CLASS) {
|
||||
if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
|
||||
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
|
||||
return dg2_rcs_offsets;
|
||||
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
|
||||
|
@ -1217,6 +1217,14 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
|
|||
cs = gen12_emit_timestamp_wa(ce, cs);
|
||||
cs = gen12_emit_restore_scratch(ce, cs);
|
||||
|
||||
/* Wa_16013000631:dg2 */
|
||||
if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
|
||||
IS_DG2_G11(ce->engine->i915))
|
||||
if (ce->engine->class == COMPUTE_CLASS)
|
||||
cs = gen8_emit_pipe_control(cs,
|
||||
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
|
||||
0);
|
||||
|
||||
return cs;
|
||||
}
|
||||
|
||||
|
@ -1619,7 +1627,7 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine)
|
|||
unsigned int i;
|
||||
int err;
|
||||
|
||||
if (engine->class != RENDER_CLASS)
|
||||
if (!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
|
||||
return;
|
||||
|
||||
switch (GRAPHICS_VER(engine->i915)) {
|
||||
|
|
|
@ -6,6 +6,9 @@
|
|||
#ifndef __INTEL_LRC_H__
|
||||
#define __INTEL_LRC_H__
|
||||
|
||||
#include "i915_priolist_types.h"
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct drm_i915_gem_object;
|
||||
|
@ -69,4 +72,52 @@ void lrc_check_regs(const struct intel_context *ce,
|
|||
|
||||
void lrc_update_runtime(struct intel_context *ce);
|
||||
|
||||
enum {
|
||||
INTEL_ADVANCED_CONTEXT = 0,
|
||||
INTEL_LEGACY_32B_CONTEXT,
|
||||
INTEL_ADVANCED_AD_CONTEXT,
|
||||
INTEL_LEGACY_64B_CONTEXT
|
||||
};
|
||||
|
||||
enum {
|
||||
FAULT_AND_HANG = 0,
|
||||
FAULT_AND_HALT, /* Debug only */
|
||||
FAULT_AND_STREAM,
|
||||
FAULT_AND_CONTINUE /* Unsupported */
|
||||
};
|
||||
|
||||
#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12)
|
||||
#define GEN8_CTX_VALID (1 << 0)
|
||||
#define GEN8_CTX_FORCE_PD_RESTORE (1 << 1)
|
||||
#define GEN8_CTX_FORCE_RESTORE (1 << 2)
|
||||
#define GEN8_CTX_L3LLC_COHERENT (1 << 5)
|
||||
#define GEN8_CTX_PRIVILEGE (1 << 8)
|
||||
#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
|
||||
#define GEN12_CTX_PRIORITY_MASK GENMASK(10, 9)
|
||||
#define GEN12_CTX_PRIORITY_HIGH FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 2)
|
||||
#define GEN12_CTX_PRIORITY_NORMAL FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 1)
|
||||
#define GEN12_CTX_PRIORITY_LOW FIELD_PREP(GEN12_CTX_PRIORITY_MASK, 0)
|
||||
#define GEN8_CTX_ID_SHIFT 32
|
||||
#define GEN8_CTX_ID_WIDTH 21
|
||||
#define GEN11_SW_CTX_ID_SHIFT 37
|
||||
#define GEN11_SW_CTX_ID_WIDTH 11
|
||||
#define GEN11_ENGINE_CLASS_SHIFT 61
|
||||
#define GEN11_ENGINE_CLASS_WIDTH 3
|
||||
#define GEN11_ENGINE_INSTANCE_SHIFT 48
|
||||
#define GEN11_ENGINE_INSTANCE_WIDTH 6
|
||||
#define XEHP_SW_CTX_ID_SHIFT 39
|
||||
#define XEHP_SW_CTX_ID_WIDTH 16
|
||||
#define XEHP_SW_COUNTER_SHIFT 58
|
||||
#define XEHP_SW_COUNTER_WIDTH 6
|
||||
|
||||
static inline u32 lrc_desc_priority(int prio)
|
||||
{
|
||||
if (prio > I915_PRIORITY_NORMAL)
|
||||
return GEN12_CTX_PRIORITY_HIGH;
|
||||
else if (prio < I915_PRIORITY_NORMAL)
|
||||
return GEN12_CTX_PRIORITY_LOW;
|
||||
else
|
||||
return GEN12_CTX_PRIORITY_NORMAL;
|
||||
}
|
||||
|
||||
#endif /* __INTEL_LRC_H__ */
|
||||
|
|
|
@ -32,6 +32,38 @@ static bool engine_supports_migration(struct intel_engine_cs *engine)
|
|||
return true;
|
||||
}
|
||||
|
||||
static void xehpsdv_toggle_pdes(struct i915_address_space *vm,
|
||||
struct i915_page_table *pt,
|
||||
void *data)
|
||||
{
|
||||
struct insert_pte_data *d = data;
|
||||
|
||||
/*
|
||||
* Insert a dummy PTE into every PT that will map to LMEM to ensure
|
||||
* we have a correctly setup PDE structure for later use.
|
||||
*/
|
||||
vm->insert_page(vm, 0, d->offset, I915_CACHE_NONE, PTE_LM);
|
||||
GEM_BUG_ON(!pt->is_compact);
|
||||
d->offset += SZ_2M;
|
||||
}
|
||||
|
||||
static void xehpsdv_insert_pte(struct i915_address_space *vm,
|
||||
struct i915_page_table *pt,
|
||||
void *data)
|
||||
{
|
||||
struct insert_pte_data *d = data;
|
||||
|
||||
/*
|
||||
* We are playing tricks here, since the actual pt, from the hw
|
||||
* pov, is only 256bytes with 32 entries, or 4096bytes with 512
|
||||
* entries, but we are still guaranteed that the physical
|
||||
* alignment is 64K underneath for the pt, and we are careful
|
||||
* not to access the space in the void.
|
||||
*/
|
||||
vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, PTE_LM);
|
||||
d->offset += SZ_64K;
|
||||
}
|
||||
|
||||
static void insert_pte(struct i915_address_space *vm,
|
||||
struct i915_page_table *pt,
|
||||
void *data)
|
||||
|
@ -74,7 +106,32 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
|
|||
* i.e. within the same non-preemptible window so that we do not switch
|
||||
* to another migration context that overwrites the PTE.
|
||||
*
|
||||
* TODO: Add support for huge LMEM PTEs
|
||||
* This changes quite a bit on platforms with HAS_64K_PAGES support,
|
||||
* where we instead have three windows, each CHUNK_SIZE in size. The
|
||||
* first is reserved for mapping system-memory, and that just uses the
|
||||
* 512 entry layout using 4K GTT pages. The other two windows just map
|
||||
* lmem pages and must use the new compact 32 entry layout using 64K GTT
|
||||
* pages, which ensures we can address any lmem object that the user
|
||||
* throws at us. We then also use the xehpsdv_toggle_pdes as a way of
|
||||
* just toggling the PDE bit(GEN12_PDE_64K) for us, to enable the
|
||||
* compact layout for each of these page-tables, that fall within the
|
||||
* [CHUNK_SIZE, 3 * CHUNK_SIZE) range.
|
||||
*
|
||||
* We lay the ppGTT out as:
|
||||
*
|
||||
* [0, CHUNK_SZ) -> first window/object, maps smem
|
||||
* [CHUNK_SZ, 2 * CHUNK_SZ) -> second window/object, maps lmem src
|
||||
* [2 * CHUNK_SZ, 3 * CHUNK_SZ) -> third window/object, maps lmem dst
|
||||
*
|
||||
* For the PTE window it's also quite different, since each PTE must
|
||||
* point to some 64K page, one for each PT(since it's in lmem), and yet
|
||||
* each is only <= 4096bytes, but since the unused space within that PTE
|
||||
* range is never touched, this should be fine.
|
||||
*
|
||||
* So basically each PT now needs 64K of virtual memory, instead of 4K,
|
||||
* which looks like:
|
||||
*
|
||||
* [3 * CHUNK_SZ, 3 * CHUNK_SZ + ((3 * CHUNK_SZ / SZ_2M) * SZ_64K)] -> PTE
|
||||
*/
|
||||
|
||||
vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY);
|
||||
|
@ -86,6 +143,9 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
|
|||
goto err_vm;
|
||||
}
|
||||
|
||||
if (HAS_64K_PAGES(gt->i915))
|
||||
stash.pt_sz = I915_GTT_PAGE_SIZE_64K;
|
||||
|
||||
/*
|
||||
* Each engine instance is assigned its own chunk in the VM, so
|
||||
* that we can run multiple instances concurrently
|
||||
|
@ -105,14 +165,20 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
|
|||
* We copy in 8MiB chunks. Each PDE covers 2MiB, so we need
|
||||
* 4x2 page directories for source/destination.
|
||||
*/
|
||||
sz = 2 * CHUNK_SZ;
|
||||
if (HAS_64K_PAGES(gt->i915))
|
||||
sz = 3 * CHUNK_SZ;
|
||||
else
|
||||
sz = 2 * CHUNK_SZ;
|
||||
d.offset = base + sz;
|
||||
|
||||
/*
|
||||
* We need another page directory setup so that we can write
|
||||
* the 8x512 PTE in each chunk.
|
||||
*/
|
||||
sz += (sz >> 12) * sizeof(u64);
|
||||
if (HAS_64K_PAGES(gt->i915))
|
||||
sz += (sz / SZ_2M) * SZ_64K;
|
||||
else
|
||||
sz += (sz >> 12) * sizeof(u64);
|
||||
|
||||
err = i915_vm_alloc_pt_stash(&vm->vm, &stash, sz);
|
||||
if (err)
|
||||
|
@ -133,7 +199,18 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
|
|||
goto err_vm;
|
||||
|
||||
/* Now allow the GPU to rewrite the PTE via its own ppGTT */
|
||||
vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d);
|
||||
if (HAS_64K_PAGES(gt->i915)) {
|
||||
vm->vm.foreach(&vm->vm, base, d.offset - base,
|
||||
xehpsdv_insert_pte, &d);
|
||||
d.offset = base + CHUNK_SZ;
|
||||
vm->vm.foreach(&vm->vm,
|
||||
d.offset,
|
||||
2 * CHUNK_SZ,
|
||||
xehpsdv_toggle_pdes, &d);
|
||||
} else {
|
||||
vm->vm.foreach(&vm->vm, base, d.offset - base,
|
||||
insert_pte, &d);
|
||||
}
|
||||
}
|
||||
|
||||
return &vm->vm;
|
||||
|
@ -269,19 +346,38 @@ static int emit_pte(struct i915_request *rq,
|
|||
u64 offset,
|
||||
int length)
|
||||
{
|
||||
bool has_64K_pages = HAS_64K_PAGES(rq->engine->i915);
|
||||
const u64 encode = rq->context->vm->pte_encode(0, cache_level,
|
||||
is_lmem ? PTE_LM : 0);
|
||||
struct intel_ring *ring = rq->ring;
|
||||
int total = 0;
|
||||
int pkt, dword_length;
|
||||
u32 total = 0;
|
||||
u32 page_size;
|
||||
u32 *hdr, *cs;
|
||||
int pkt;
|
||||
|
||||
GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8);
|
||||
|
||||
page_size = I915_GTT_PAGE_SIZE;
|
||||
dword_length = 0x400;
|
||||
|
||||
/* Compute the page directory offset for the target address range */
|
||||
offset >>= 12;
|
||||
offset *= sizeof(u64);
|
||||
offset += 2 * CHUNK_SZ;
|
||||
if (has_64K_pages) {
|
||||
GEM_BUG_ON(!IS_ALIGNED(offset, SZ_2M));
|
||||
|
||||
offset /= SZ_2M;
|
||||
offset *= SZ_64K;
|
||||
offset += 3 * CHUNK_SZ;
|
||||
|
||||
if (is_lmem) {
|
||||
page_size = I915_GTT_PAGE_SIZE_64K;
|
||||
dword_length = 0x40;
|
||||
}
|
||||
} else {
|
||||
offset >>= 12;
|
||||
offset *= sizeof(u64);
|
||||
offset += 2 * CHUNK_SZ;
|
||||
}
|
||||
|
||||
offset += (u64)rq->engine->instance << 32;
|
||||
|
||||
cs = intel_ring_begin(rq, 6);
|
||||
|
@ -289,7 +385,7 @@ static int emit_pte(struct i915_request *rq,
|
|||
return PTR_ERR(cs);
|
||||
|
||||
/* Pack as many PTE updates as possible into a single MI command */
|
||||
pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5);
|
||||
pkt = min_t(int, dword_length, ring->space / sizeof(u32) + 5);
|
||||
pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5);
|
||||
|
||||
hdr = cs;
|
||||
|
@ -299,6 +395,8 @@ static int emit_pte(struct i915_request *rq,
|
|||
|
||||
do {
|
||||
if (cs - hdr >= pkt) {
|
||||
int dword_rem;
|
||||
|
||||
*hdr += cs - hdr - 2;
|
||||
*cs++ = MI_NOOP;
|
||||
|
||||
|
@ -310,7 +408,18 @@ static int emit_pte(struct i915_request *rq,
|
|||
if (IS_ERR(cs))
|
||||
return PTR_ERR(cs);
|
||||
|
||||
pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5);
|
||||
dword_rem = dword_length;
|
||||
if (has_64K_pages) {
|
||||
if (IS_ALIGNED(total, SZ_2M)) {
|
||||
offset = round_up(offset, SZ_64K);
|
||||
} else {
|
||||
dword_rem = SZ_2M - (total & (SZ_2M - 1));
|
||||
dword_rem /= page_size;
|
||||
dword_rem *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
pkt = min_t(int, dword_rem, ring->space / sizeof(u32) + 5);
|
||||
pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5);
|
||||
|
||||
hdr = cs;
|
||||
|
@ -319,13 +428,15 @@ static int emit_pte(struct i915_request *rq,
|
|||
*cs++ = upper_32_bits(offset);
|
||||
}
|
||||
|
||||
GEM_BUG_ON(!IS_ALIGNED(it->dma, page_size));
|
||||
|
||||
*cs++ = lower_32_bits(encode | it->dma);
|
||||
*cs++ = upper_32_bits(encode | it->dma);
|
||||
|
||||
offset += 8;
|
||||
total += I915_GTT_PAGE_SIZE;
|
||||
total += page_size;
|
||||
|
||||
it->dma += I915_GTT_PAGE_SIZE;
|
||||
it->dma += page_size;
|
||||
if (it->dma >= it->max) {
|
||||
it->sg = __sg_next(it->sg);
|
||||
if (!it->sg || sg_dma_len(it->sg) == 0)
|
||||
|
@ -356,7 +467,8 @@ static bool wa_1209644611_applies(int ver, u32 size)
|
|||
return height % 4 == 3 && height <= 8;
|
||||
}
|
||||
|
||||
static int emit_copy(struct i915_request *rq, int size)
|
||||
static int emit_copy(struct i915_request *rq,
|
||||
u32 dst_offset, u32 src_offset, int size)
|
||||
{
|
||||
const int ver = GRAPHICS_VER(rq->engine->i915);
|
||||
u32 instance = rq->engine->instance;
|
||||
|
@ -371,31 +483,31 @@ static int emit_copy(struct i915_request *rq, int size)
|
|||
*cs++ = BLT_DEPTH_32 | PAGE_SIZE;
|
||||
*cs++ = 0;
|
||||
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
|
||||
*cs++ = CHUNK_SZ; /* dst offset */
|
||||
*cs++ = dst_offset;
|
||||
*cs++ = instance;
|
||||
*cs++ = 0;
|
||||
*cs++ = PAGE_SIZE;
|
||||
*cs++ = 0; /* src offset */
|
||||
*cs++ = src_offset;
|
||||
*cs++ = instance;
|
||||
} else if (ver >= 8) {
|
||||
*cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
|
||||
*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
|
||||
*cs++ = 0;
|
||||
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
|
||||
*cs++ = CHUNK_SZ; /* dst offset */
|
||||
*cs++ = dst_offset;
|
||||
*cs++ = instance;
|
||||
*cs++ = 0;
|
||||
*cs++ = PAGE_SIZE;
|
||||
*cs++ = 0; /* src offset */
|
||||
*cs++ = src_offset;
|
||||
*cs++ = instance;
|
||||
} else {
|
||||
GEM_BUG_ON(instance);
|
||||
*cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
|
||||
*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
|
||||
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
|
||||
*cs++ = CHUNK_SZ; /* dst offset */
|
||||
*cs++ = dst_offset;
|
||||
*cs++ = PAGE_SIZE;
|
||||
*cs++ = 0; /* src offset */
|
||||
*cs++ = src_offset;
|
||||
}
|
||||
|
||||
intel_ring_advance(rq, cs);
|
||||
|
@ -423,6 +535,7 @@ intel_context_migrate_copy(struct intel_context *ce,
|
|||
GEM_BUG_ON(ce->ring->size < SZ_64K);
|
||||
|
||||
do {
|
||||
u32 src_offset, dst_offset;
|
||||
int len;
|
||||
|
||||
rq = i915_request_create(ce);
|
||||
|
@ -450,15 +563,28 @@ intel_context_migrate_copy(struct intel_context *ce,
|
|||
if (err)
|
||||
goto out_rq;
|
||||
|
||||
len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, 0,
|
||||
CHUNK_SZ);
|
||||
src_offset = 0;
|
||||
dst_offset = CHUNK_SZ;
|
||||
if (HAS_64K_PAGES(ce->engine->i915)) {
|
||||
GEM_BUG_ON(!src_is_lmem && !dst_is_lmem);
|
||||
|
||||
src_offset = 0;
|
||||
dst_offset = 0;
|
||||
if (src_is_lmem)
|
||||
src_offset = CHUNK_SZ;
|
||||
if (dst_is_lmem)
|
||||
dst_offset = 2 * CHUNK_SZ;
|
||||
}
|
||||
|
||||
len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
|
||||
src_offset, CHUNK_SZ);
|
||||
if (len <= 0) {
|
||||
err = len;
|
||||
goto out_rq;
|
||||
}
|
||||
|
||||
err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem,
|
||||
CHUNK_SZ, len);
|
||||
dst_offset, len);
|
||||
if (err < 0)
|
||||
goto out_rq;
|
||||
if (err < len) {
|
||||
|
@ -470,7 +596,7 @@ intel_context_migrate_copy(struct intel_context *ce,
|
|||
if (err)
|
||||
goto out_rq;
|
||||
|
||||
err = emit_copy(rq, len);
|
||||
err = emit_copy(rq, dst_offset, src_offset, len);
|
||||
|
||||
/* Arbitration is re-enabled between requests. */
|
||||
out_rq:
|
||||
|
@ -488,14 +614,15 @@ out_ce:
|
|||
return err;
|
||||
}
|
||||
|
||||
static int emit_clear(struct i915_request *rq, int size, u32 value)
|
||||
static int emit_clear(struct i915_request *rq, u64 offset, int size, u32 value)
|
||||
{
|
||||
const int ver = GRAPHICS_VER(rq->engine->i915);
|
||||
u32 instance = rq->engine->instance;
|
||||
u32 *cs;
|
||||
|
||||
GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
|
||||
|
||||
offset += (u64)rq->engine->instance << 32;
|
||||
|
||||
cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6);
|
||||
if (IS_ERR(cs))
|
||||
return PTR_ERR(cs);
|
||||
|
@ -505,17 +632,17 @@ static int emit_clear(struct i915_request *rq, int size, u32 value)
|
|||
*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
|
||||
*cs++ = 0;
|
||||
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
|
||||
*cs++ = 0; /* offset */
|
||||
*cs++ = instance;
|
||||
*cs++ = lower_32_bits(offset);
|
||||
*cs++ = upper_32_bits(offset);
|
||||
*cs++ = value;
|
||||
*cs++ = MI_NOOP;
|
||||
} else {
|
||||
GEM_BUG_ON(instance);
|
||||
GEM_BUG_ON(upper_32_bits(offset));
|
||||
*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
|
||||
*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
|
||||
*cs++ = 0;
|
||||
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
|
||||
*cs++ = 0;
|
||||
*cs++ = lower_32_bits(offset);
|
||||
*cs++ = value;
|
||||
}
|
||||
|
||||
|
@ -542,6 +669,7 @@ intel_context_migrate_clear(struct intel_context *ce,
|
|||
GEM_BUG_ON(ce->ring->size < SZ_64K);
|
||||
|
||||
do {
|
||||
u32 offset;
|
||||
int len;
|
||||
|
||||
rq = i915_request_create(ce);
|
||||
|
@ -569,7 +697,11 @@ intel_context_migrate_clear(struct intel_context *ce,
|
|||
if (err)
|
||||
goto out_rq;
|
||||
|
||||
len = emit_pte(rq, &it, cache_level, is_lmem, 0, CHUNK_SZ);
|
||||
offset = 0;
|
||||
if (HAS_64K_PAGES(ce->engine->i915) && is_lmem)
|
||||
offset = CHUNK_SZ;
|
||||
|
||||
len = emit_pte(rq, &it, cache_level, is_lmem, offset, CHUNK_SZ);
|
||||
if (len <= 0) {
|
||||
err = len;
|
||||
goto out_rq;
|
||||
|
@ -579,7 +711,7 @@ intel_context_migrate_clear(struct intel_context *ce,
|
|||
if (err)
|
||||
goto out_rq;
|
||||
|
||||
err = emit_clear(rq, len, value);
|
||||
err = emit_clear(rq, offset, len, value);
|
||||
|
||||
/* Arbitration is re-enabled between requests. */
|
||||
out_rq:
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include "gen6_ppgtt.h"
|
||||
#include "gen8_ppgtt.h"
|
||||
|
||||
struct i915_page_table *alloc_pt(struct i915_address_space *vm)
|
||||
struct i915_page_table *alloc_pt(struct i915_address_space *vm, int sz)
|
||||
{
|
||||
struct i915_page_table *pt;
|
||||
|
||||
|
@ -20,12 +20,13 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
|
|||
if (unlikely(!pt))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
pt->base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
|
||||
pt->base = vm->alloc_pt_dma(vm, sz);
|
||||
if (IS_ERR(pt->base)) {
|
||||
kfree(pt);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
pt->is_compact = false;
|
||||
atomic_set(&pt->used, 0);
|
||||
return pt;
|
||||
}
|
||||
|
@ -220,17 +221,25 @@ int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
|
|||
u64 size)
|
||||
{
|
||||
unsigned long count;
|
||||
int shift, n;
|
||||
int shift, n, pt_sz;
|
||||
|
||||
shift = vm->pd_shift;
|
||||
if (!shift)
|
||||
return 0;
|
||||
|
||||
pt_sz = stash->pt_sz;
|
||||
if (!pt_sz)
|
||||
pt_sz = I915_GTT_PAGE_SIZE_4K;
|
||||
else
|
||||
GEM_BUG_ON(!IS_DGFX(vm->i915));
|
||||
|
||||
GEM_BUG_ON(!is_power_of_2(pt_sz));
|
||||
|
||||
count = pd_count(size, shift);
|
||||
while (count--) {
|
||||
struct i915_page_table *pt;
|
||||
|
||||
pt = alloc_pt(vm);
|
||||
pt = alloc_pt(vm, pt_sz);
|
||||
if (IS_ERR(pt)) {
|
||||
i915_vm_free_pt_stash(vm, stash);
|
||||
return PTR_ERR(pt);
|
||||
|
|
|
@ -14,60 +14,6 @@
|
|||
#include "gt/intel_gt.h"
|
||||
#include "gt/intel_gt_regs.h"
|
||||
|
||||
static int init_fake_lmem_bar(struct intel_memory_region *mem)
|
||||
{
|
||||
struct drm_i915_private *i915 = mem->i915;
|
||||
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
|
||||
unsigned long n;
|
||||
int ret;
|
||||
|
||||
/* We want to 1:1 map the mappable aperture to our reserved region */
|
||||
|
||||
mem->fake_mappable.start = 0;
|
||||
mem->fake_mappable.size = resource_size(&mem->region);
|
||||
mem->fake_mappable.color = I915_COLOR_UNEVICTABLE;
|
||||
|
||||
ret = drm_mm_reserve_node(&ggtt->vm.mm, &mem->fake_mappable);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mem->remap_addr = dma_map_resource(i915->drm.dev,
|
||||
mem->region.start,
|
||||
mem->fake_mappable.size,
|
||||
DMA_BIDIRECTIONAL,
|
||||
DMA_ATTR_FORCE_CONTIGUOUS);
|
||||
if (dma_mapping_error(i915->drm.dev, mem->remap_addr)) {
|
||||
drm_mm_remove_node(&mem->fake_mappable);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (n = 0; n < mem->fake_mappable.size >> PAGE_SHIFT; ++n) {
|
||||
ggtt->vm.insert_page(&ggtt->vm,
|
||||
mem->remap_addr + (n << PAGE_SHIFT),
|
||||
n << PAGE_SHIFT,
|
||||
I915_CACHE_NONE, 0);
|
||||
}
|
||||
|
||||
mem->region = (struct resource)DEFINE_RES_MEM(mem->remap_addr,
|
||||
mem->fake_mappable.size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void release_fake_lmem_bar(struct intel_memory_region *mem)
|
||||
{
|
||||
if (!drm_mm_node_allocated(&mem->fake_mappable))
|
||||
return;
|
||||
|
||||
drm_mm_remove_node(&mem->fake_mappable);
|
||||
|
||||
dma_unmap_resource(mem->i915->drm.dev,
|
||||
mem->remap_addr,
|
||||
mem->fake_mappable.size,
|
||||
DMA_BIDIRECTIONAL,
|
||||
DMA_ATTR_FORCE_CONTIGUOUS);
|
||||
}
|
||||
|
||||
static int
|
||||
region_lmem_release(struct intel_memory_region *mem)
|
||||
{
|
||||
|
@ -75,7 +21,6 @@ region_lmem_release(struct intel_memory_region *mem)
|
|||
|
||||
ret = intel_region_ttm_fini(mem);
|
||||
io_mapping_fini(&mem->iomap);
|
||||
release_fake_lmem_bar(mem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -85,17 +30,10 @@ region_lmem_init(struct intel_memory_region *mem)
|
|||
{
|
||||
int ret;
|
||||
|
||||
if (mem->i915->params.fake_lmem_start) {
|
||||
ret = init_fake_lmem_bar(mem);
|
||||
GEM_BUG_ON(ret);
|
||||
}
|
||||
|
||||
if (!io_mapping_init_wc(&mem->iomap,
|
||||
mem->io_start,
|
||||
resource_size(&mem->region))) {
|
||||
ret = -EIO;
|
||||
goto out_no_io;
|
||||
}
|
||||
mem->io_size))
|
||||
return -EIO;
|
||||
|
||||
ret = intel_region_ttm_init(mem);
|
||||
if (ret)
|
||||
|
@ -105,8 +43,6 @@ region_lmem_init(struct intel_memory_region *mem)
|
|||
|
||||
out_no_buddy:
|
||||
io_mapping_fini(&mem->iomap);
|
||||
out_no_io:
|
||||
release_fake_lmem_bar(mem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -117,50 +53,6 @@ static const struct intel_memory_region_ops intel_region_lmem_ops = {
|
|||
.init_object = __i915_gem_ttm_object_init,
|
||||
};
|
||||
|
||||
struct intel_memory_region *
|
||||
intel_gt_setup_fake_lmem(struct intel_gt *gt)
|
||||
{
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
|
||||
struct intel_memory_region *mem;
|
||||
resource_size_t mappable_end;
|
||||
resource_size_t io_start;
|
||||
resource_size_t start;
|
||||
|
||||
if (!HAS_LMEM(i915))
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
if (!i915->params.fake_lmem_start)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
GEM_BUG_ON(i915_ggtt_has_aperture(to_gt(i915)->ggtt));
|
||||
|
||||
/* Your mappable aperture belongs to me now! */
|
||||
mappable_end = pci_resource_len(pdev, 2);
|
||||
io_start = pci_resource_start(pdev, 2);
|
||||
start = i915->params.fake_lmem_start;
|
||||
|
||||
mem = intel_memory_region_create(i915,
|
||||
start,
|
||||
mappable_end,
|
||||
PAGE_SIZE,
|
||||
io_start,
|
||||
INTEL_MEMORY_LOCAL,
|
||||
0,
|
||||
&intel_region_lmem_ops);
|
||||
if (!IS_ERR(mem)) {
|
||||
drm_info(&i915->drm, "Intel graphics fake LMEM: %pR\n",
|
||||
&mem->region);
|
||||
drm_info(&i915->drm,
|
||||
"Intel graphics fake LMEM IO start: %llx\n",
|
||||
(u64)mem->io_start);
|
||||
drm_info(&i915->drm, "Intel graphics fake LMEM size: %llx\n",
|
||||
(u64)resource_size(&mem->region));
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
static bool get_legacy_lowmem_region(struct intel_uncore *uncore,
|
||||
u64 *start, u32 *size)
|
||||
{
|
||||
|
@ -207,8 +99,29 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
|
|||
if (!IS_DGFX(i915))
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
/* Stolen starts from GSMBASE on DG1 */
|
||||
lmem_size = intel_uncore_read64(uncore, GEN12_GSMBASE);
|
||||
if (HAS_FLAT_CCS(i915)) {
|
||||
u64 tile_stolen, flat_ccs_base;
|
||||
|
||||
lmem_size = pci_resource_len(pdev, 2);
|
||||
flat_ccs_base = intel_gt_read_register(gt, XEHPSDV_FLAT_CCS_BASE_ADDR);
|
||||
flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K;
|
||||
|
||||
if (GEM_WARN_ON(lmem_size < flat_ccs_base))
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
tile_stolen = lmem_size - flat_ccs_base;
|
||||
|
||||
/* If the FLAT_CCS_BASE_ADDR register is not populated, flag an error */
|
||||
if (tile_stolen == lmem_size)
|
||||
drm_err(&i915->drm,
|
||||
"CCS_BASE_ADDR register did not have expected value\n");
|
||||
|
||||
lmem_size -= tile_stolen;
|
||||
} else {
|
||||
/* Stolen starts from GSMBASE without CCS */
|
||||
lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE);
|
||||
}
|
||||
|
||||
|
||||
io_start = pci_resource_start(pdev, 2);
|
||||
if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2)))
|
||||
|
@ -221,6 +134,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
|
|||
lmem_size,
|
||||
min_page_size,
|
||||
io_start,
|
||||
lmem_size,
|
||||
INTEL_MEMORY_LOCAL,
|
||||
0,
|
||||
&intel_region_lmem_ops);
|
||||
|
@ -234,6 +148,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
|
|||
drm_dbg(&i915->drm, "Local memory: %pR\n", &mem->region);
|
||||
drm_dbg(&i915->drm, "Local memory IO start: %pa\n",
|
||||
&mem->io_start);
|
||||
drm_info(&i915->drm, "Local memory IO size: %pa\n",
|
||||
&mem->io_size);
|
||||
drm_info(&i915->drm, "Local memory available: %pa\n",
|
||||
&lmem_size);
|
||||
|
||||
|
|
|
@ -10,7 +10,4 @@ struct intel_gt;
|
|||
|
||||
struct intel_memory_region *intel_gt_setup_lmem(struct intel_gt *gt);
|
||||
|
||||
struct intel_memory_region *
|
||||
intel_gt_setup_fake_lmem(struct intel_gt *gt);
|
||||
|
||||
#endif /* !__INTEL_REGION_LMEM_H */
|
||||
|
|
|
@ -1486,7 +1486,7 @@ void intel_rps_enable(struct intel_rps *rps)
|
|||
|
||||
if (has_busy_stats(rps))
|
||||
intel_rps_set_timer(rps);
|
||||
else if (GRAPHICS_VER(i915) >= 6)
|
||||
else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11)
|
||||
intel_rps_set_interrupts(rps);
|
||||
else
|
||||
/* Ironlake currently uses intel_ips.ko */ {}
|
||||
|
|
|
@ -32,7 +32,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
|
|||
return total;
|
||||
}
|
||||
|
||||
u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
|
||||
static u32
|
||||
_intel_sseu_get_subslices(const struct sseu_dev_info *sseu,
|
||||
const u8 *subslice_mask, u8 slice)
|
||||
{
|
||||
int i, offset = slice * sseu->ss_stride;
|
||||
u32 mask = 0;
|
||||
|
@ -40,12 +42,21 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
|
|||
GEM_BUG_ON(slice >= sseu->max_slices);
|
||||
|
||||
for (i = 0; i < sseu->ss_stride; i++)
|
||||
mask |= (u32)sseu->subslice_mask[offset + i] <<
|
||||
i * BITS_PER_BYTE;
|
||||
mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
|
||||
{
|
||||
return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice);
|
||||
}
|
||||
|
||||
u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu)
|
||||
{
|
||||
return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
|
||||
}
|
||||
|
||||
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
|
||||
u8 *subslice_mask, u32 ss_mask)
|
||||
{
|
||||
|
|
|
@ -103,7 +103,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
|
|||
unsigned int
|
||||
intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
|
||||
|
||||
u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
|
||||
u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
|
||||
|
||||
u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu);
|
||||
|
||||
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
|
||||
u8 *subslice_mask, u32 ss_mask);
|
||||
|
|
|
@ -684,11 +684,10 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|||
wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1,
|
||||
MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
|
||||
|
||||
/* Wa_22012532006:dg2 */
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
|
||||
IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
|
||||
wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
|
||||
DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
|
||||
/* Wa_14014947963:dg2 */
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
|
||||
IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
|
||||
wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
|
||||
}
|
||||
|
||||
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
|
||||
|
@ -1344,12 +1343,6 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
|||
/* Wa_1409757795:xehpsdv */
|
||||
wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
|
||||
|
||||
/* Wa_18011725039:xehpsdv */
|
||||
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
|
||||
wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
|
||||
wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
|
||||
}
|
||||
|
||||
/* Wa_16011155590:xehpsdv */
|
||||
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
|
||||
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
|
||||
|
@ -1386,19 +1379,12 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
|||
GAMTLBVEBOX0_CLKGATE_DIS);
|
||||
}
|
||||
|
||||
/* Wa_14012362059:xehpsdv */
|
||||
wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
|
||||
/* Wa_16012725990:xehpsdv */
|
||||
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
|
||||
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
|
||||
|
||||
/* Wa_14011060649:xehpsdv */
|
||||
wa_14011060649(gt, wal);
|
||||
|
||||
/* Wa_14014368820:xehpsdv */
|
||||
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
|
||||
GLOBAL_INVALIDATION_MODE);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1440,10 +1426,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
|||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
|
||||
/* Wa_14010680813:dg2_g10 */
|
||||
wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
|
||||
EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
|
||||
|
||||
/* Wa_14010948348:dg2_g10 */
|
||||
wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
|
||||
|
||||
|
@ -1490,16 +1472,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
|||
wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
|
||||
IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
|
||||
/* Wa_14012362059:dg2 */
|
||||
wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
}
|
||||
|
||||
/* Wa_1509235366:dg2 */
|
||||
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
|
||||
GLOBAL_INVALIDATION_MODE);
|
||||
|
||||
/* Wa_14014830051:dg2 */
|
||||
wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
|
||||
|
||||
|
@ -1508,14 +1480,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
|||
* recommended tuning settings documented in the bspec's
|
||||
* performance guide section.
|
||||
*/
|
||||
wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
|
||||
wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
|
||||
|
||||
/* Wa_18018781329:dg2 */
|
||||
wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1943,6 +1908,11 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine)
|
|||
RING_FORCE_TO_NONPRIV_RANGE_4);
|
||||
|
||||
break;
|
||||
case COMPUTE_CLASS:
|
||||
/* Wa_16011157294:dg2_g10 */
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
|
||||
whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -2049,6 +2019,23 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
|||
if (IS_DG2(i915)) {
|
||||
/* Wa_14015227452:dg2 */
|
||||
wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
|
||||
|
||||
/* Wa_1509235366:dg2 */
|
||||
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
|
||||
GLOBAL_INVALIDATION_MODE);
|
||||
|
||||
/*
|
||||
* The following are not actually "workarounds" but rather
|
||||
* recommended tuning settings documented in the bspec's
|
||||
* performance guide section.
|
||||
*/
|
||||
wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
|
||||
|
||||
/* Wa_18018781329:dg2 */
|
||||
wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
|
||||
|
@ -2149,6 +2136,24 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
|||
IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
|
||||
wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
|
||||
|
||||
/* Wa_22012532006:dg2 */
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
|
||||
IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
|
||||
wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
|
||||
DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
|
||||
/* Wa_14010680813:dg2_g10 */
|
||||
wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
|
||||
EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
|
||||
IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
|
||||
/* Wa_14012362059:dg2 */
|
||||
wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
}
|
||||
|
||||
if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
|
||||
IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
|
||||
/*
|
||||
|
@ -2568,6 +2573,53 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The workarounds in this function apply to shared registers in
|
||||
* the general render reset domain that aren't tied to a
|
||||
* specific engine. Since all render+compute engines get reset
|
||||
* together, and the contents of these registers are lost during
|
||||
* the shared render domain reset, we'll define such workarounds
|
||||
* here and then add them to just a single RCS or CCS engine's
|
||||
* workaround list (whichever engine has the XXXX flag).
|
||||
*/
|
||||
static void
|
||||
general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
||||
{
|
||||
struct drm_i915_private *i915 = engine->i915;
|
||||
|
||||
if (IS_XEHPSDV(i915)) {
|
||||
/* Wa_1409954639 */
|
||||
wa_masked_en(wal,
|
||||
GEN8_ROW_CHICKEN,
|
||||
SYSTOLIC_DOP_CLOCK_GATING_DIS);
|
||||
|
||||
/* Wa_1607196519 */
|
||||
wa_masked_en(wal,
|
||||
GEN9_ROW_CHICKEN4,
|
||||
GEN12_DISABLE_GRF_CLEAR);
|
||||
|
||||
/* Wa_14010670810:xehpsdv */
|
||||
wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
|
||||
|
||||
/* Wa_14010449647:xehpsdv */
|
||||
wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
|
||||
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
|
||||
|
||||
/* Wa_18011725039:xehpsdv */
|
||||
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
|
||||
wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
|
||||
wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
|
||||
}
|
||||
|
||||
/* Wa_14012362059:xehpsdv */
|
||||
wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
|
||||
/* Wa_14014368820:xehpsdv */
|
||||
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
|
||||
GLOBAL_INVALIDATION_MODE);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
||||
{
|
||||
|
@ -2576,6 +2628,14 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
|
|||
|
||||
engine_fake_wa_init(engine, wal);
|
||||
|
||||
/*
|
||||
* These are common workarounds that just need to applied
|
||||
* to a single RCS/CCS engine's workaround list since
|
||||
* they're reset as part of the general render domain reset.
|
||||
*/
|
||||
if (engine->class == RENDER_CLASS)
|
||||
general_render_compute_wa_init(engine, wal);
|
||||
|
||||
if (engine->class == RENDER_CLASS)
|
||||
rcs_engine_wa_init(engine, wal);
|
||||
else
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
* Copyright © 2020 Intel Corporation
|
||||
*/
|
||||
|
||||
#include <linux/iosys-map.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
|
@ -123,6 +124,37 @@ static int __shmem_rw(struct file *file, loff_t off,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int shmem_read_to_iosys_map(struct file *file, loff_t off,
|
||||
struct iosys_map *map, size_t map_off, size_t len)
|
||||
{
|
||||
unsigned long pfn;
|
||||
|
||||
for (pfn = off >> PAGE_SHIFT; len; pfn++) {
|
||||
unsigned int this =
|
||||
min_t(size_t, PAGE_SIZE - offset_in_page(off), len);
|
||||
struct page *page;
|
||||
void *vaddr;
|
||||
|
||||
page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
|
||||
GFP_KERNEL);
|
||||
if (IS_ERR(page))
|
||||
return PTR_ERR(page);
|
||||
|
||||
vaddr = kmap(page);
|
||||
iosys_map_memcpy_to(map, map_off, vaddr + offset_in_page(off),
|
||||
this);
|
||||
mark_page_accessed(page);
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
|
||||
len -= this;
|
||||
map_off += this;
|
||||
off = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int shmem_read(struct file *file, loff_t off, void *dst, size_t len)
|
||||
{
|
||||
return __shmem_rw(file, off, dst, len, false);
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include <linux/types.h>
|
||||
|
||||
struct iosys_map;
|
||||
struct drm_i915_gem_object;
|
||||
struct file;
|
||||
|
||||
|
@ -17,6 +18,8 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj);
|
|||
void *shmem_pin_map(struct file *file);
|
||||
void shmem_unpin_map(struct file *file, void *ptr);
|
||||
|
||||
int shmem_read_to_iosys_map(struct file *file, loff_t off,
|
||||
struct iosys_map *map, size_t map_off, size_t len);
|
||||
int shmem_read(struct file *file, loff_t off, void *dst, size_t len);
|
||||
int shmem_write(struct file *file, loff_t off, void *src, size_t len);
|
||||
|
||||
|
|
|
@ -6,8 +6,9 @@
|
|||
#ifndef _INTEL_GUC_H_
|
||||
#define _INTEL_GUC_H_
|
||||
|
||||
#include <linux/xarray.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/iosys-map.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
#include "intel_uncore.h"
|
||||
#include "intel_guc_fw.h"
|
||||
|
@ -146,8 +147,8 @@ struct intel_guc {
|
|||
|
||||
/** @ads_vma: object allocated to hold the GuC ADS */
|
||||
struct i915_vma *ads_vma;
|
||||
/** @ads_blob: contents of the GuC ADS */
|
||||
struct __guc_ads_blob *ads_blob;
|
||||
/** @ads_map: contents of the GuC ADS */
|
||||
struct iosys_map ads_map;
|
||||
/** @ads_regset_size: size of the save/restore regsets in the ADS */
|
||||
u32 ads_regset_size;
|
||||
/**
|
||||
|
|
|
@ -60,6 +60,19 @@ struct __guc_ads_blob {
|
|||
struct guc_mmio_reg regset[0];
|
||||
} __packed;
|
||||
|
||||
#define ads_blob_read(guc_, field_) \
|
||||
iosys_map_rd_field(&(guc_)->ads_map, 0, struct __guc_ads_blob, field_)
|
||||
|
||||
#define ads_blob_write(guc_, field_, val_) \
|
||||
iosys_map_wr_field(&(guc_)->ads_map, 0, struct __guc_ads_blob, \
|
||||
field_, val_)
|
||||
|
||||
#define info_map_write(map_, field_, val_) \
|
||||
iosys_map_wr_field(map_, 0, struct guc_gt_system_info, field_, val_)
|
||||
|
||||
#define info_map_read(map_, field_) \
|
||||
iosys_map_rd_field(map_, 0, struct guc_gt_system_info, field_)
|
||||
|
||||
static u32 guc_ads_regset_size(struct intel_guc *guc)
|
||||
{
|
||||
GEM_BUG_ON(!guc->ads_regset_size);
|
||||
|
@ -123,33 +136,37 @@ static u32 guc_ads_blob_size(struct intel_guc *guc)
|
|||
guc_ads_private_data_size(guc);
|
||||
}
|
||||
|
||||
static void guc_policies_init(struct intel_guc *guc, struct guc_policies *policies)
|
||||
static void guc_policies_init(struct intel_guc *guc)
|
||||
{
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
u32 global_flags = 0;
|
||||
|
||||
policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US;
|
||||
policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI;
|
||||
ads_blob_write(guc, policies.dpc_promote_time,
|
||||
GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
|
||||
ads_blob_write(guc, policies.max_num_work_items,
|
||||
GLOBAL_POLICY_MAX_NUM_WI);
|
||||
|
||||
policies->global_flags = 0;
|
||||
if (i915->params.reset < 2)
|
||||
policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
|
||||
global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
|
||||
|
||||
policies->is_valid = 1;
|
||||
ads_blob_write(guc, policies.global_flags, global_flags);
|
||||
ads_blob_write(guc, policies.is_valid, 1);
|
||||
}
|
||||
|
||||
void intel_guc_ads_print_policy_info(struct intel_guc *guc,
|
||||
struct drm_printer *dp)
|
||||
{
|
||||
struct __guc_ads_blob *blob = guc->ads_blob;
|
||||
|
||||
if (unlikely(!blob))
|
||||
if (unlikely(iosys_map_is_null(&guc->ads_map)))
|
||||
return;
|
||||
|
||||
drm_printf(dp, "Global scheduling policies:\n");
|
||||
drm_printf(dp, " DPC promote time = %u\n", blob->policies.dpc_promote_time);
|
||||
drm_printf(dp, " Max num work items = %u\n", blob->policies.max_num_work_items);
|
||||
drm_printf(dp, " Flags = %u\n", blob->policies.global_flags);
|
||||
drm_printf(dp, " DPC promote time = %u\n",
|
||||
ads_blob_read(guc, policies.dpc_promote_time));
|
||||
drm_printf(dp, " Max num work items = %u\n",
|
||||
ads_blob_read(guc, policies.max_num_work_items));
|
||||
drm_printf(dp, " Flags = %u\n",
|
||||
ads_blob_read(guc, policies.global_flags));
|
||||
}
|
||||
|
||||
static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset)
|
||||
|
@ -164,29 +181,30 @@ static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset)
|
|||
|
||||
int intel_guc_global_policies_update(struct intel_guc *guc)
|
||||
{
|
||||
struct __guc_ads_blob *blob = guc->ads_blob;
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
u32 scheduler_policies;
|
||||
intel_wakeref_t wakeref;
|
||||
int ret;
|
||||
|
||||
if (!blob)
|
||||
if (iosys_map_is_null(&guc->ads_map))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
GEM_BUG_ON(!blob->ads.scheduler_policies);
|
||||
scheduler_policies = ads_blob_read(guc, ads.scheduler_policies);
|
||||
GEM_BUG_ON(!scheduler_policies);
|
||||
|
||||
guc_policies_init(guc, &blob->policies);
|
||||
guc_policies_init(guc);
|
||||
|
||||
if (!intel_guc_is_ready(guc))
|
||||
return 0;
|
||||
|
||||
with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
|
||||
ret = guc_action_policies_update(guc, blob->ads.scheduler_policies);
|
||||
ret = guc_action_policies_update(guc, scheduler_policies);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void guc_mapping_table_init(struct intel_gt *gt,
|
||||
struct guc_gt_system_info *system_info)
|
||||
struct iosys_map *info_map)
|
||||
{
|
||||
unsigned int i, j;
|
||||
struct intel_engine_cs *engine;
|
||||
|
@ -195,14 +213,14 @@ static void guc_mapping_table_init(struct intel_gt *gt,
|
|||
/* Table must be set to invalid values for entries not used */
|
||||
for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
|
||||
for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
|
||||
system_info->mapping_table[i][j] =
|
||||
GUC_MAX_INSTANCES_PER_CLASS;
|
||||
info_map_write(info_map, mapping_table[i][j],
|
||||
GUC_MAX_INSTANCES_PER_CLASS);
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
u8 guc_class = engine_class_to_guc_class(engine->class);
|
||||
|
||||
system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
|
||||
engine->instance;
|
||||
info_map_write(info_map, mapping_table[guc_class][ilog2(engine->logical_mask)],
|
||||
engine->instance);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -317,6 +335,10 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
|
|||
ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false);
|
||||
ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
|
||||
|
||||
if (engine->class == RENDER_CLASS &&
|
||||
CCS_MASK(engine->gt))
|
||||
ret |= GUC_MMIO_REG_ADD(regset, GEN12_RCU_MODE, true);
|
||||
|
||||
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
|
||||
ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
|
||||
|
||||
|
@ -365,64 +387,69 @@ fail_regset_init:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void guc_mmio_reg_state_init(struct intel_guc *guc,
|
||||
struct __guc_ads_blob *blob)
|
||||
static void guc_mmio_reg_state_init(struct intel_guc *guc)
|
||||
{
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
struct intel_engine_cs *engine;
|
||||
struct guc_mmio_reg *ads_registers;
|
||||
enum intel_engine_id id;
|
||||
u32 addr_ggtt, offset;
|
||||
|
||||
offset = guc_ads_regset_offset(guc);
|
||||
addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
|
||||
ads_registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset);
|
||||
|
||||
memcpy(ads_registers, guc->ads_regset, guc->ads_regset_size);
|
||||
iosys_map_memcpy_to(&guc->ads_map, offset, guc->ads_regset,
|
||||
guc->ads_regset_size);
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
u32 count = guc->ads_regset_count[id];
|
||||
struct guc_mmio_reg_set *ads_reg_set;
|
||||
u8 guc_class;
|
||||
|
||||
/* Class index is checked in class converter */
|
||||
GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS);
|
||||
|
||||
guc_class = engine_class_to_guc_class(engine->class);
|
||||
ads_reg_set = &blob->ads.reg_state_list[guc_class][engine->instance];
|
||||
|
||||
if (!count) {
|
||||
ads_reg_set->address = 0;
|
||||
ads_reg_set->count = 0;
|
||||
ads_blob_write(guc,
|
||||
ads.reg_state_list[guc_class][engine->instance].address,
|
||||
0);
|
||||
ads_blob_write(guc,
|
||||
ads.reg_state_list[guc_class][engine->instance].count,
|
||||
0);
|
||||
continue;
|
||||
}
|
||||
|
||||
ads_reg_set->address = addr_ggtt;
|
||||
ads_reg_set->count = count;
|
||||
ads_blob_write(guc,
|
||||
ads.reg_state_list[guc_class][engine->instance].address,
|
||||
addr_ggtt);
|
||||
ads_blob_write(guc,
|
||||
ads.reg_state_list[guc_class][engine->instance].count,
|
||||
count);
|
||||
|
||||
addr_ggtt += count * sizeof(struct guc_mmio_reg);
|
||||
}
|
||||
}
|
||||
|
||||
static void fill_engine_enable_masks(struct intel_gt *gt,
|
||||
struct guc_gt_system_info *info)
|
||||
struct iosys_map *info_map)
|
||||
{
|
||||
info->engine_enabled_masks[GUC_RENDER_CLASS] = 1;
|
||||
info->engine_enabled_masks[GUC_BLITTER_CLASS] = 1;
|
||||
info->engine_enabled_masks[GUC_VIDEO_CLASS] = VDBOX_MASK(gt);
|
||||
info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt);
|
||||
info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], 1);
|
||||
info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt));
|
||||
info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1);
|
||||
info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt));
|
||||
info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt));
|
||||
}
|
||||
|
||||
#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
|
||||
#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE)
|
||||
static int guc_prep_golden_context(struct intel_guc *guc,
|
||||
struct __guc_ads_blob *blob)
|
||||
static int guc_prep_golden_context(struct intel_guc *guc)
|
||||
{
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
u32 addr_ggtt, offset;
|
||||
u32 total_size = 0, alloc_size, real_size;
|
||||
u8 engine_class, guc_class;
|
||||
struct guc_gt_system_info *info, local_info;
|
||||
struct guc_gt_system_info local_info;
|
||||
struct iosys_map info_map;
|
||||
|
||||
/*
|
||||
* Reserve the memory for the golden contexts and point GuC at it but
|
||||
|
@ -436,14 +463,15 @@ static int guc_prep_golden_context(struct intel_guc *guc,
|
|||
* GuC will also validate that the LRC base + size fall within the
|
||||
* allowed GGTT range.
|
||||
*/
|
||||
if (blob) {
|
||||
if (!iosys_map_is_null(&guc->ads_map)) {
|
||||
offset = guc_ads_golden_ctxt_offset(guc);
|
||||
addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
|
||||
info = &blob->system_info;
|
||||
info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
|
||||
offsetof(struct __guc_ads_blob, system_info));
|
||||
} else {
|
||||
memset(&local_info, 0, sizeof(local_info));
|
||||
info = &local_info;
|
||||
fill_engine_enable_masks(gt, info);
|
||||
iosys_map_set_vaddr(&info_map, &local_info);
|
||||
fill_engine_enable_masks(gt, &info_map);
|
||||
}
|
||||
|
||||
for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) {
|
||||
|
@ -452,14 +480,14 @@ static int guc_prep_golden_context(struct intel_guc *guc,
|
|||
|
||||
guc_class = engine_class_to_guc_class(engine_class);
|
||||
|
||||
if (!info->engine_enabled_masks[guc_class])
|
||||
if (!info_map_read(&info_map, engine_enabled_masks[guc_class]))
|
||||
continue;
|
||||
|
||||
real_size = intel_engine_context_size(gt, engine_class);
|
||||
alloc_size = PAGE_ALIGN(real_size);
|
||||
total_size += alloc_size;
|
||||
|
||||
if (!blob)
|
||||
if (iosys_map_is_null(&guc->ads_map))
|
||||
continue;
|
||||
|
||||
/*
|
||||
|
@ -473,15 +501,18 @@ static int guc_prep_golden_context(struct intel_guc *guc,
|
|||
* what comes before it in the context image (which is identical
|
||||
* on all engines).
|
||||
*/
|
||||
blob->ads.eng_state_size[guc_class] = real_size - LRC_SKIP_SIZE;
|
||||
blob->ads.golden_context_lrca[guc_class] = addr_ggtt;
|
||||
ads_blob_write(guc, ads.eng_state_size[guc_class],
|
||||
real_size - LRC_SKIP_SIZE);
|
||||
ads_blob_write(guc, ads.golden_context_lrca[guc_class],
|
||||
addr_ggtt);
|
||||
|
||||
addr_ggtt += alloc_size;
|
||||
}
|
||||
|
||||
if (!blob)
|
||||
return total_size;
|
||||
/* Make sure current size matches what we calculated previously */
|
||||
if (guc->ads_golden_ctxt_size)
|
||||
GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
|
||||
|
||||
GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
|
||||
return total_size;
|
||||
}
|
||||
|
||||
|
@ -505,18 +536,16 @@ static struct intel_engine_cs *find_engine_state(struct intel_gt *gt, u8 engine_
|
|||
|
||||
static void guc_init_golden_context(struct intel_guc *guc)
|
||||
{
|
||||
struct __guc_ads_blob *blob = guc->ads_blob;
|
||||
struct intel_engine_cs *engine;
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
u32 addr_ggtt, offset;
|
||||
u32 total_size = 0, alloc_size, real_size;
|
||||
unsigned long offset;
|
||||
u32 addr_ggtt, total_size = 0, alloc_size, real_size;
|
||||
u8 engine_class, guc_class;
|
||||
u8 *ptr;
|
||||
|
||||
if (!intel_uc_uses_guc_submission(>->uc))
|
||||
return;
|
||||
|
||||
GEM_BUG_ON(!blob);
|
||||
GEM_BUG_ON(iosys_map_is_null(&guc->ads_map));
|
||||
|
||||
/*
|
||||
* Go back and fill in the golden context data now that it is
|
||||
|
@ -524,15 +553,13 @@ static void guc_init_golden_context(struct intel_guc *guc)
|
|||
*/
|
||||
offset = guc_ads_golden_ctxt_offset(guc);
|
||||
addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
|
||||
ptr = ((u8 *)blob) + offset;
|
||||
|
||||
for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) {
|
||||
if (engine_class == OTHER_CLASS)
|
||||
continue;
|
||||
|
||||
guc_class = engine_class_to_guc_class(engine_class);
|
||||
|
||||
if (!blob->system_info.engine_enabled_masks[guc_class])
|
||||
if (!ads_blob_read(guc, system_info.engine_enabled_masks[guc_class]))
|
||||
continue;
|
||||
|
||||
real_size = intel_engine_context_size(gt, engine_class);
|
||||
|
@ -543,24 +570,26 @@ static void guc_init_golden_context(struct intel_guc *guc)
|
|||
if (!engine) {
|
||||
drm_err(>->i915->drm, "No engine state recorded for class %d!\n",
|
||||
engine_class);
|
||||
blob->ads.eng_state_size[guc_class] = 0;
|
||||
blob->ads.golden_context_lrca[guc_class] = 0;
|
||||
ads_blob_write(guc, ads.eng_state_size[guc_class], 0);
|
||||
ads_blob_write(guc, ads.golden_context_lrca[guc_class], 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
GEM_BUG_ON(blob->ads.eng_state_size[guc_class] !=
|
||||
GEM_BUG_ON(ads_blob_read(guc, ads.eng_state_size[guc_class]) !=
|
||||
real_size - LRC_SKIP_SIZE);
|
||||
GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt);
|
||||
GEM_BUG_ON(ads_blob_read(guc, ads.golden_context_lrca[guc_class]) != addr_ggtt);
|
||||
|
||||
addr_ggtt += alloc_size;
|
||||
|
||||
shmem_read(engine->default_state, 0, ptr, real_size);
|
||||
ptr += alloc_size;
|
||||
shmem_read_to_iosys_map(engine->default_state, 0, &guc->ads_map,
|
||||
offset, real_size);
|
||||
offset += alloc_size;
|
||||
}
|
||||
|
||||
GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
|
||||
}
|
||||
|
||||
static void guc_capture_list_init(struct intel_guc *guc, struct __guc_ads_blob *blob)
|
||||
static void guc_capture_list_init(struct intel_guc *guc)
|
||||
{
|
||||
int i, j;
|
||||
u32 addr_ggtt, offset;
|
||||
|
@ -572,11 +601,11 @@ static void guc_capture_list_init(struct intel_guc *guc, struct __guc_ads_blob *
|
|||
|
||||
for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
|
||||
for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
|
||||
blob->ads.capture_instance[i][j] = addr_ggtt;
|
||||
blob->ads.capture_class[i][j] = addr_ggtt;
|
||||
ads_blob_write(guc, ads.capture_instance[i][j], addr_ggtt);
|
||||
ads_blob_write(guc, ads.capture_class[i][j], addr_ggtt);
|
||||
}
|
||||
|
||||
blob->ads.capture_global[i] = addr_ggtt;
|
||||
ads_blob_write(guc, ads.capture_global[i], addr_ggtt);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -584,47 +613,52 @@ static void __guc_ads_init(struct intel_guc *guc)
|
|||
{
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
struct __guc_ads_blob *blob = guc->ads_blob;
|
||||
struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
|
||||
offsetof(struct __guc_ads_blob, system_info));
|
||||
u32 base;
|
||||
|
||||
/* GuC scheduling policies */
|
||||
guc_policies_init(guc, &blob->policies);
|
||||
guc_policies_init(guc);
|
||||
|
||||
/* System info */
|
||||
fill_engine_enable_masks(gt, &blob->system_info);
|
||||
fill_engine_enable_masks(gt, &info_map);
|
||||
|
||||
blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] =
|
||||
hweight8(gt->info.sseu.slice_mask);
|
||||
blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK] =
|
||||
gt->info.vdbox_sfc_access;
|
||||
ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED],
|
||||
hweight8(gt->info.sseu.slice_mask));
|
||||
ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK],
|
||||
gt->info.vdbox_sfc_access);
|
||||
|
||||
if (GRAPHICS_VER(i915) >= 12 && !IS_DGFX(i915)) {
|
||||
u32 distdbreg = intel_uncore_read(gt->uncore,
|
||||
GEN12_DIST_DBS_POPULATED);
|
||||
blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI] =
|
||||
((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) &
|
||||
GEN12_DOORBELLS_PER_SQIDI) + 1;
|
||||
ads_blob_write(guc,
|
||||
system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
|
||||
((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT)
|
||||
& GEN12_DOORBELLS_PER_SQIDI) + 1);
|
||||
}
|
||||
|
||||
/* Golden contexts for re-initialising after a watchdog reset */
|
||||
guc_prep_golden_context(guc, blob);
|
||||
guc_prep_golden_context(guc);
|
||||
|
||||
guc_mapping_table_init(guc_to_gt(guc), &blob->system_info);
|
||||
guc_mapping_table_init(guc_to_gt(guc), &info_map);
|
||||
|
||||
base = intel_guc_ggtt_offset(guc, guc->ads_vma);
|
||||
|
||||
/* Capture list for hang debug */
|
||||
guc_capture_list_init(guc, blob);
|
||||
guc_capture_list_init(guc);
|
||||
|
||||
/* ADS */
|
||||
blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
|
||||
blob->ads.gt_system_info = base + ptr_offset(blob, system_info);
|
||||
ads_blob_write(guc, ads.scheduler_policies, base +
|
||||
offsetof(struct __guc_ads_blob, policies));
|
||||
ads_blob_write(guc, ads.gt_system_info, base +
|
||||
offsetof(struct __guc_ads_blob, system_info));
|
||||
|
||||
/* MMIO save/restore list */
|
||||
guc_mmio_reg_state_init(guc, blob);
|
||||
guc_mmio_reg_state_init(guc);
|
||||
|
||||
/* Private Data */
|
||||
blob->ads.private_data = base + guc_ads_private_data_offset(guc);
|
||||
ads_blob_write(guc, ads.private_data, base +
|
||||
guc_ads_private_data_offset(guc));
|
||||
|
||||
i915_gem_object_flush_map(guc->ads_vma->obj);
|
||||
}
|
||||
|
@ -638,6 +672,7 @@ static void __guc_ads_init(struct intel_guc *guc)
|
|||
*/
|
||||
int intel_guc_ads_create(struct intel_guc *guc)
|
||||
{
|
||||
void *ads_blob;
|
||||
u32 size;
|
||||
int ret;
|
||||
|
||||
|
@ -653,7 +688,7 @@ int intel_guc_ads_create(struct intel_guc *guc)
|
|||
guc->ads_regset_size = ret;
|
||||
|
||||
/* Likewise the golden contexts: */
|
||||
ret = guc_prep_golden_context(guc, NULL);
|
||||
ret = guc_prep_golden_context(guc);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
guc->ads_golden_ctxt_size = ret;
|
||||
|
@ -662,10 +697,15 @@ int intel_guc_ads_create(struct intel_guc *guc)
|
|||
size = guc_ads_blob_size(guc);
|
||||
|
||||
ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma,
|
||||
(void **)&guc->ads_blob);
|
||||
&ads_blob);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (i915_gem_object_is_lmem(guc->ads_vma->obj))
|
||||
iosys_map_set_vaddr_iomem(&guc->ads_map, (void __iomem *)ads_blob);
|
||||
else
|
||||
iosys_map_set_vaddr(&guc->ads_map, ads_blob);
|
||||
|
||||
__guc_ads_init(guc);
|
||||
|
||||
return 0;
|
||||
|
@ -686,7 +726,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc)
|
|||
void intel_guc_ads_destroy(struct intel_guc *guc)
|
||||
{
|
||||
i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP);
|
||||
guc->ads_blob = NULL;
|
||||
iosys_map_clear(&guc->ads_map);
|
||||
kfree(guc->ads_regset);
|
||||
}
|
||||
|
||||
|
@ -698,8 +738,8 @@ static void guc_ads_private_data_reset(struct intel_guc *guc)
|
|||
if (!size)
|
||||
return;
|
||||
|
||||
memset((void *)guc->ads_blob + guc_ads_private_data_offset(guc), 0,
|
||||
size);
|
||||
iosys_map_memset(&guc->ads_map, guc_ads_private_data_offset(guc),
|
||||
0, size);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -722,18 +762,16 @@ void intel_guc_ads_reset(struct intel_guc *guc)
|
|||
|
||||
u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
|
||||
{
|
||||
struct __guc_ads_blob *blob = guc->ads_blob;
|
||||
u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma);
|
||||
u32 offset = base + ptr_offset(blob, engine_usage);
|
||||
|
||||
return offset;
|
||||
return intel_guc_ggtt_offset(guc, guc->ads_vma) +
|
||||
offsetof(struct __guc_ads_blob, engine_usage);
|
||||
}
|
||||
|
||||
struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine)
|
||||
struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct intel_guc *guc = &engine->gt->uc.guc;
|
||||
struct __guc_ads_blob *blob = guc->ads_blob;
|
||||
u8 guc_class = engine_class_to_guc_class(engine->class);
|
||||
size_t offset = offsetof(struct __guc_ads_blob,
|
||||
engine_usage.engines[guc_class][ilog2(engine->logical_mask)]);
|
||||
|
||||
return &blob->engine_usage.engines[guc_class][ilog2(engine->logical_mask)];
|
||||
return IOSYS_MAP_INIT_OFFSET(&guc->ads_map, offset);
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#define _INTEL_GUC_ADS_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/iosys-map.h>
|
||||
|
||||
struct intel_guc;
|
||||
struct drm_printer;
|
||||
|
@ -18,7 +19,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc);
|
|||
void intel_guc_ads_reset(struct intel_guc *guc);
|
||||
void intel_guc_ads_print_policy_info(struct intel_guc *guc,
|
||||
struct drm_printer *p);
|
||||
struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine);
|
||||
struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine);
|
||||
u32 intel_guc_engine_usage_offset(struct intel_guc *guc);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -46,8 +46,8 @@
|
|||
#define GUC_VIDEO_CLASS 1
|
||||
#define GUC_VIDEOENHANCE_CLASS 2
|
||||
#define GUC_BLITTER_CLASS 3
|
||||
#define GUC_RESERVED_CLASS 4
|
||||
#define GUC_LAST_ENGINE_CLASS GUC_RESERVED_CLASS
|
||||
#define GUC_COMPUTE_CLASS 4
|
||||
#define GUC_LAST_ENGINE_CLASS GUC_COMPUTE_CLASS
|
||||
#define GUC_MAX_ENGINE_CLASSES 16
|
||||
#define GUC_MAX_INSTANCES_PER_CLASS 32
|
||||
|
||||
|
@ -156,23 +156,37 @@ FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \
|
|||
FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, c) \
|
||||
)
|
||||
|
||||
/* the GuC arrays don't include OTHER_CLASS */
|
||||
static u8 engine_class_guc_class_map[] = {
|
||||
[RENDER_CLASS] = GUC_RENDER_CLASS,
|
||||
[COPY_ENGINE_CLASS] = GUC_BLITTER_CLASS,
|
||||
[VIDEO_DECODE_CLASS] = GUC_VIDEO_CLASS,
|
||||
[VIDEO_ENHANCEMENT_CLASS] = GUC_VIDEOENHANCE_CLASS,
|
||||
[COMPUTE_CLASS] = GUC_COMPUTE_CLASS,
|
||||
};
|
||||
|
||||
static u8 guc_class_engine_class_map[] = {
|
||||
[GUC_RENDER_CLASS] = RENDER_CLASS,
|
||||
[GUC_BLITTER_CLASS] = COPY_ENGINE_CLASS,
|
||||
[GUC_VIDEO_CLASS] = VIDEO_DECODE_CLASS,
|
||||
[GUC_VIDEOENHANCE_CLASS] = VIDEO_ENHANCEMENT_CLASS,
|
||||
[GUC_COMPUTE_CLASS] = COMPUTE_CLASS,
|
||||
};
|
||||
|
||||
static inline u8 engine_class_to_guc_class(u8 class)
|
||||
{
|
||||
BUILD_BUG_ON(GUC_RENDER_CLASS != RENDER_CLASS);
|
||||
BUILD_BUG_ON(GUC_BLITTER_CLASS != COPY_ENGINE_CLASS);
|
||||
BUILD_BUG_ON(GUC_VIDEO_CLASS != VIDEO_DECODE_CLASS);
|
||||
BUILD_BUG_ON(GUC_VIDEOENHANCE_CLASS != VIDEO_ENHANCEMENT_CLASS);
|
||||
BUILD_BUG_ON(ARRAY_SIZE(engine_class_guc_class_map) != MAX_ENGINE_CLASS + 1);
|
||||
GEM_BUG_ON(class > MAX_ENGINE_CLASS || class == OTHER_CLASS);
|
||||
|
||||
return class;
|
||||
return engine_class_guc_class_map[class];
|
||||
}
|
||||
|
||||
static inline u8 guc_class_to_engine_class(u8 guc_class)
|
||||
{
|
||||
BUILD_BUG_ON(ARRAY_SIZE(guc_class_engine_class_map) != GUC_LAST_ENGINE_CLASS + 1);
|
||||
GEM_BUG_ON(guc_class > GUC_LAST_ENGINE_CLASS);
|
||||
GEM_BUG_ON(guc_class == GUC_RESERVED_CLASS);
|
||||
|
||||
return guc_class;
|
||||
return guc_class_engine_class_map[guc_class];
|
||||
}
|
||||
|
||||
/* Work item for submitting workloads into work queue of GuC. */
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "intel_mchbar_regs.h"
|
||||
#include "gt/intel_gt.h"
|
||||
#include "gt/intel_gt_regs.h"
|
||||
#include "gt/intel_rps.h"
|
||||
|
||||
static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc)
|
||||
{
|
||||
|
@ -115,7 +116,7 @@ static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)
|
|||
{
|
||||
u32 request[] = {
|
||||
GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
|
||||
SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2),
|
||||
SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
|
||||
id,
|
||||
};
|
||||
|
||||
|
@ -579,10 +580,10 @@ static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
|
|||
|
||||
static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
|
||||
{
|
||||
struct intel_rps *rps = &slpc_to_gt(slpc)->rps;
|
||||
u32 rp_state_cap;
|
||||
|
||||
rp_state_cap = intel_uncore_read(slpc_to_gt(slpc)->uncore,
|
||||
GEN6_RP_STATE_CAP);
|
||||
rp_state_cap = intel_rps_read_state_cap(rps);
|
||||
|
||||
slpc->rp0_freq = REG_FIELD_GET(RP0_CAP_MASK, rp_state_cap) *
|
||||
GT_FREQUENCY_MULTIPLIER;
|
||||
|
|
|
@ -174,11 +174,8 @@ static inline void init_sched_state(struct intel_context *ce)
|
|||
__maybe_unused
|
||||
static bool sched_state_is_init(struct intel_context *ce)
|
||||
{
|
||||
/*
|
||||
* XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after
|
||||
* suspend.
|
||||
*/
|
||||
return !(ce->guc_state.sched_state &=
|
||||
/* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
|
||||
return !(ce->guc_state.sched_state &
|
||||
~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
|
||||
}
|
||||
|
||||
|
@ -1139,6 +1136,9 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
|
|||
*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
|
||||
}
|
||||
|
||||
#define record_read(map_, field_) \
|
||||
iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
|
||||
|
||||
/*
|
||||
* GuC updates shared memory and KMD reads it. Since this is not synchronized,
|
||||
* we run into a race where the value read is inconsistent. Sometimes the
|
||||
|
@ -1153,17 +1153,17 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
|
|||
static void __get_engine_usage_record(struct intel_engine_cs *engine,
|
||||
u32 *last_in, u32 *id, u32 *total)
|
||||
{
|
||||
struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
|
||||
struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
|
||||
int i = 0;
|
||||
|
||||
do {
|
||||
*last_in = READ_ONCE(rec->last_switch_in_stamp);
|
||||
*id = READ_ONCE(rec->current_context_index);
|
||||
*total = READ_ONCE(rec->total_runtime);
|
||||
*last_in = record_read(&rec_map, last_switch_in_stamp);
|
||||
*id = record_read(&rec_map, current_context_index);
|
||||
*total = record_read(&rec_map, total_runtime);
|
||||
|
||||
if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
|
||||
READ_ONCE(rec->current_context_index) == *id &&
|
||||
READ_ONCE(rec->total_runtime) == *total)
|
||||
if (record_read(&rec_map, last_switch_in_stamp) == *last_in &&
|
||||
record_read(&rec_map, current_context_index) == *id &&
|
||||
record_read(&rec_map, total_runtime) == *total)
|
||||
break;
|
||||
} while (++i < 6);
|
||||
}
|
||||
|
@ -1818,24 +1818,11 @@ int intel_guc_submission_init(struct intel_guc *guc)
|
|||
*/
|
||||
GEM_BUG_ON(!guc->lrc_desc_pool);
|
||||
|
||||
xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
|
||||
|
||||
spin_lock_init(&guc->submission_state.lock);
|
||||
INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
|
||||
ida_init(&guc->submission_state.guc_ids);
|
||||
INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
|
||||
INIT_WORK(&guc->submission_state.destroyed_worker,
|
||||
destroyed_worker_func);
|
||||
INIT_WORK(&guc->submission_state.reset_fail_worker,
|
||||
reset_fail_worker_func);
|
||||
|
||||
guc->submission_state.guc_ids_bitmap =
|
||||
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
|
||||
if (!guc->submission_state.guc_ids_bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&guc->timestamp.lock);
|
||||
INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
|
||||
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
|
||||
guc->timestamp.shift = gpm_timestamp_shift(gt);
|
||||
|
||||
|
@ -3608,6 +3595,9 @@ static int guc_resume(struct intel_engine_cs *engine)
|
|||
setup_hwsp(engine);
|
||||
start_engine(engine);
|
||||
|
||||
if (engine->class == RENDER_CLASS)
|
||||
xehp_enable_ccs_engines(engine);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3789,7 +3779,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
|
|||
guc_default_irqs(engine);
|
||||
guc_init_breadcrumbs(engine);
|
||||
|
||||
if (engine->class == RENDER_CLASS)
|
||||
if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
|
||||
rcs_submission_override(engine);
|
||||
|
||||
lrc_init_wa_ctx(engine);
|
||||
|
@ -3831,6 +3821,20 @@ static bool __guc_submission_selected(struct intel_guc *guc)
|
|||
|
||||
void intel_guc_submission_init_early(struct intel_guc *guc)
|
||||
{
|
||||
xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
|
||||
|
||||
spin_lock_init(&guc->submission_state.lock);
|
||||
INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
|
||||
ida_init(&guc->submission_state.guc_ids);
|
||||
INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
|
||||
INIT_WORK(&guc->submission_state.destroyed_worker,
|
||||
destroyed_worker_func);
|
||||
INIT_WORK(&guc->submission_state.reset_fail_worker,
|
||||
reset_fail_worker_func);
|
||||
|
||||
spin_lock_init(&guc->timestamp.lock);
|
||||
INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
|
||||
|
||||
guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS;
|
||||
guc->submission_supported = __guc_submission_supported(guc);
|
||||
guc->submission_selected = __guc_submission_selected(guc);
|
||||
|
@ -4022,10 +4026,9 @@ static void guc_handle_context_reset(struct intel_guc *guc,
|
|||
capture_error_state(guc, ce);
|
||||
guc_context_replay(ce);
|
||||
} else {
|
||||
drm_err(&guc_to_gt(guc)->i915->drm,
|
||||
"Invalid GuC engine reset notificaion for 0x%04X on %s: banned = %d, blocked = %d",
|
||||
ce->guc_id.id, ce->engine->name, intel_context_is_banned(ce),
|
||||
context_blocked(ce));
|
||||
drm_info(&guc_to_gt(guc)->i915->drm,
|
||||
"Ignoring context reset notification of banned context 0x%04X on %s",
|
||||
ce->guc_id.id, ce->engine->name);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -154,6 +154,10 @@ static int intel_guc_multi_lrc_basic(void *arg)
|
|||
int ret;
|
||||
|
||||
for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
|
||||
/* We don't support breadcrumb handshake on these classes */
|
||||
if (class == COMPUTE_CLASS || class == RENDER_CLASS)
|
||||
continue;
|
||||
|
||||
ret = __intel_guc_multi_lrc_basic(gt, class);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
|
|
@ -835,21 +835,6 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
|||
if (!i915->params.nuclear_pageflip && match_info->graphics.ver < 5)
|
||||
i915->drm.driver_features &= ~DRIVER_ATOMIC;
|
||||
|
||||
/*
|
||||
* Check if we support fake LMEM -- for now we only unleash this for
|
||||
* the live selftests(test-and-exit).
|
||||
*/
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
if (IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) {
|
||||
if (GRAPHICS_VER(i915) >= 9 && i915_selftest.live < 0 &&
|
||||
i915->params.fake_lmem_start) {
|
||||
mkwrite_device_info(i915)->memory_regions =
|
||||
REGION_SMEM | REGION_LMEM | REGION_STOLEN_SMEM;
|
||||
GEM_BUG_ON(!HAS_LMEM(i915));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
ret = pci_enable_device(pdev);
|
||||
if (ret)
|
||||
goto out_fini;
|
||||
|
|
|
@ -1147,11 +1147,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
|
|||
#define IS_ICL_WITH_PORT_F(dev_priv) \
|
||||
IS_SUBPLATFORM(dev_priv, INTEL_ICELAKE, INTEL_SUBPLATFORM_PORTF)
|
||||
|
||||
#define IS_TGL_U(dev_priv) \
|
||||
IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULT)
|
||||
|
||||
#define IS_TGL_Y(dev_priv) \
|
||||
IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX)
|
||||
#define IS_TGL_UY(dev_priv) \
|
||||
IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_UY)
|
||||
|
||||
#define IS_SKL_GRAPHICS_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GRAPHICS_STEP(p, since, until))
|
||||
|
||||
|
@ -1170,11 +1167,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
|
|||
IS_DISPLAY_STEP(__i915, since, until))
|
||||
|
||||
#define IS_TGL_UY_GRAPHICS_STEP(__i915, since, until) \
|
||||
((IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \
|
||||
(IS_TGL_UY(__i915) && \
|
||||
IS_GRAPHICS_STEP(__i915, since, until))
|
||||
|
||||
#define IS_TGL_GRAPHICS_STEP(__i915, since, until) \
|
||||
(IS_TIGERLAKE(__i915) && !(IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \
|
||||
(IS_TIGERLAKE(__i915) && !IS_TGL_UY(__i915)) && \
|
||||
IS_GRAPHICS_STEP(__i915, since, until))
|
||||
|
||||
#define IS_RKL_DISPLAY_STEP(p, since, until) \
|
||||
|
@ -1244,6 +1241,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
|
|||
ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS)
|
||||
#define VEBOX_MASK(gt) \
|
||||
ENGINE_INSTANCES_MASK(gt, VECS0, I915_MAX_VECS)
|
||||
#define CCS_MASK(gt) \
|
||||
ENGINE_INSTANCES_MASK(gt, CCS0, I915_MAX_CCS)
|
||||
|
||||
/*
|
||||
* The Gen7 cmdparser copies the scanned buffer to the ggtt for execution
|
||||
|
@ -1340,17 +1339,28 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
|
|||
|
||||
/*
|
||||
* Set this flag, when platform requires 64K GTT page sizes or larger for
|
||||
* device local memory access. Also this flag implies that we require or
|
||||
* at least support the compact PT layout for the ppGTT when using the 64K
|
||||
* GTT pages.
|
||||
* device local memory access.
|
||||
*/
|
||||
#define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages)
|
||||
|
||||
/*
|
||||
* Set this flag when platform doesn't allow both 64k pages and 4k pages in
|
||||
* the same PT. this flag means we need to support compact PT layout for the
|
||||
* ppGTT when using the 64K GTT pages.
|
||||
*/
|
||||
#define NEEDS_COMPACT_PT(dev_priv) (INTEL_INFO(dev_priv)->needs_compact_pt)
|
||||
|
||||
#define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc)
|
||||
|
||||
#define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i))
|
||||
#define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM)
|
||||
|
||||
/*
|
||||
* Platform has the dedicated compression control state for each lmem surfaces
|
||||
* stored in lmem to support the 3D and media compression formats.
|
||||
*/
|
||||
#define HAS_FLAT_CCS(dev_priv) (INTEL_INFO(dev_priv)->has_flat_ccs)
|
||||
|
||||
#define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc)
|
||||
|
||||
#define HAS_POOLED_EU(dev_priv) (INTEL_INFO(dev_priv)->has_pooled_eu)
|
||||
|
|
|
@ -195,11 +195,6 @@ i915_param_named(enable_gvt, bool, 0400,
|
|||
"Enable support for Intel GVT-g graphics virtualization host support(default:false)");
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)
|
||||
i915_param_named_unsafe(fake_lmem_start, ulong, 0400,
|
||||
"Fake LMEM start offset (default: 0)");
|
||||
#endif
|
||||
|
||||
#if CONFIG_DRM_I915_REQUEST_TIMEOUT
|
||||
i915_param_named_unsafe(request_timeout_ms, uint, 0600,
|
||||
"Default request/fence/batch buffer expiration timeout.");
|
||||
|
|
|
@ -72,7 +72,6 @@ struct drm_printer;
|
|||
param(int, fastboot, -1, 0600) \
|
||||
param(int, enable_dpcd_backlight, -1, 0600) \
|
||||
param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \
|
||||
param(unsigned long, fake_lmem_start, 0, IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) ? 0400 : 0) \
|
||||
param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \
|
||||
/* leave bools at the end to not create holes */ \
|
||||
param(bool, enable_hangcheck, true, 0600) \
|
||||
|
|
|
@ -1005,6 +1005,7 @@ static const struct intel_device_info adl_p_info = {
|
|||
XE_HP_PAGE_SIZES, \
|
||||
.dma_mask_size = 46, \
|
||||
.has_64bit_reloc = 1, \
|
||||
.has_flat_ccs = 1, \
|
||||
.has_global_mocs = 1, \
|
||||
.has_gt_uc = 1, \
|
||||
.has_llc = 1, \
|
||||
|
@ -1030,6 +1031,7 @@ static const struct intel_device_info xehpsdv_info = {
|
|||
PLATFORM(INTEL_XEHPSDV),
|
||||
.display = { },
|
||||
.has_64k_pages = 1,
|
||||
.needs_compact_pt = 1,
|
||||
.platform_engine_mask =
|
||||
BIT(RCS0) | BIT(BCS0) |
|
||||
BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) |
|
||||
|
@ -1049,6 +1051,7 @@ static const struct intel_device_info dg2_info = {
|
|||
PLATFORM(INTEL_DG2),
|
||||
.has_guc_deprivilege = 1,
|
||||
.has_64k_pages = 1,
|
||||
.needs_compact_pt = 1,
|
||||
.platform_engine_mask =
|
||||
BIT(RCS0) | BIT(BCS0) |
|
||||
BIT(VECS0) | BIT(VECS1) |
|
||||
|
|
|
@ -4374,6 +4374,10 @@ void i915_perf_init(struct drm_i915_private *i915)
|
|||
|
||||
/* XXX const struct i915_perf_ops! */
|
||||
|
||||
/* i915_perf is not enabled for DG2 yet */
|
||||
if (IS_DG2(i915))
|
||||
return;
|
||||
|
||||
perf->oa_formats = oa_formats;
|
||||
if (IS_HASWELL(i915)) {
|
||||
perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
|
||||
|
|
|
@ -971,6 +971,10 @@
|
|||
#define GEN11_VEBOX2_RING_BASE 0x1d8000
|
||||
#define XEHP_VEBOX3_RING_BASE 0x1e8000
|
||||
#define XEHP_VEBOX4_RING_BASE 0x1f8000
|
||||
#define GEN12_COMPUTE0_RING_BASE 0x1a000
|
||||
#define GEN12_COMPUTE1_RING_BASE 0x1c000
|
||||
#define GEN12_COMPUTE2_RING_BASE 0x1e000
|
||||
#define GEN12_COMPUTE3_RING_BASE 0x26000
|
||||
#define BLT_RING_BASE 0x22000
|
||||
|
||||
|
||||
|
@ -8460,6 +8464,20 @@ enum skl_power_gate {
|
|||
#define SGGI_DIS REG_BIT(15)
|
||||
#define SGR_DIS REG_BIT(13)
|
||||
|
||||
#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910)
|
||||
#define XEHPSDV_CCS_BASE_SHIFT 8
|
||||
|
||||
/* gamt regs */
|
||||
#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
|
||||
#define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */
|
||||
#define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV 0x5FF101FF /* max/min for LRA1/2 */
|
||||
#define GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL 0x67F1427F /* " " */
|
||||
#define GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT 0x5FF101FF /* " " */
|
||||
|
||||
#define MMCD_MISC_CTRL _MMIO(0x4ddc) /* skl+ */
|
||||
#define MMCD_PCLA (1 << 31)
|
||||
#define MMCD_HOTSPOT_EN (1 << 27)
|
||||
|
||||
#define _ICL_PHY_MISC_A 0x64C00
|
||||
#define _ICL_PHY_MISC_B 0x64C04
|
||||
#define _DG2_PHY_MISC_TC1 0x64C14 /* TC1="PHY E" but offset as if "PHY F" */
|
||||
|
@ -8823,12 +8841,6 @@ enum skl_power_gate {
|
|||
#define DSB_ENABLE (1 << 31)
|
||||
#define DSB_STATUS (1 << 0)
|
||||
|
||||
#define TGL_ROOT_DEVICE_ID 0x9A00
|
||||
#define TGL_ROOT_DEVICE_MASK 0xFF00
|
||||
#define TGL_ROOT_DEVICE_SKU_MASK 0xF
|
||||
#define TGL_ROOT_DEVICE_SKU_ULX 0x2
|
||||
#define TGL_ROOT_DEVICE_SKU_ULT 0x4
|
||||
|
||||
#define CLKREQ_POLICY _MMIO(0x101038)
|
||||
#define CLKREQ_POLICY_MEM_UP_OVRD REG_BIT(1)
|
||||
|
||||
|
|
|
@ -19,6 +19,9 @@ struct i915_ttm_buddy_manager {
|
|||
struct drm_buddy mm;
|
||||
struct list_head reserved;
|
||||
struct mutex lock;
|
||||
unsigned long visible_size;
|
||||
unsigned long visible_avail;
|
||||
unsigned long visible_reserved;
|
||||
u64 default_page_size;
|
||||
};
|
||||
|
||||
|
@ -79,7 +82,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
|
|||
lpfn = pages;
|
||||
}
|
||||
|
||||
if (size > mm->size) {
|
||||
if (size > lpfn << PAGE_SHIFT) {
|
||||
err = -E2BIG;
|
||||
goto err_free_res;
|
||||
}
|
||||
|
@ -87,6 +90,12 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
|
|||
n_pages = size >> ilog2(mm->chunk_size);
|
||||
|
||||
mutex_lock(&bman->lock);
|
||||
if (lpfn <= bman->visible_size && n_pages > bman->visible_avail) {
|
||||
mutex_unlock(&bman->lock);
|
||||
err = -ENOSPC;
|
||||
goto err_free_res;
|
||||
}
|
||||
|
||||
err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
|
||||
(u64)lpfn << PAGE_SHIFT,
|
||||
(u64)n_pages << PAGE_SHIFT,
|
||||
|
@ -107,6 +116,38 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
|
|||
mutex_unlock(&bman->lock);
|
||||
}
|
||||
|
||||
if (lpfn <= bman->visible_size) {
|
||||
bman_res->used_visible_size = bman_res->base.num_pages;
|
||||
} else {
|
||||
struct drm_buddy_block *block;
|
||||
|
||||
list_for_each_entry(block, &bman_res->blocks, link) {
|
||||
unsigned long start =
|
||||
drm_buddy_block_offset(block) >> PAGE_SHIFT;
|
||||
|
||||
if (start < bman->visible_size) {
|
||||
unsigned long end = start +
|
||||
(drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
|
||||
|
||||
bman_res->used_visible_size +=
|
||||
min(end, bman->visible_size) - start;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bman_res->used_visible_size) {
|
||||
mutex_lock(&bman->lock);
|
||||
bman->visible_avail -= bman_res->used_visible_size;
|
||||
mutex_unlock(&bman->lock);
|
||||
}
|
||||
|
||||
if (place->lpfn - place->fpfn == n_pages)
|
||||
bman_res->base.start = place->fpfn;
|
||||
else if (lpfn <= bman->visible_size)
|
||||
bman_res->base.start = 0;
|
||||
else
|
||||
bman_res->base.start = bman->visible_size;
|
||||
|
||||
*res = &bman_res->base;
|
||||
return 0;
|
||||
|
||||
|
@ -128,6 +169,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man,
|
|||
|
||||
mutex_lock(&bman->lock);
|
||||
drm_buddy_free_list(&bman->mm, &bman_res->blocks);
|
||||
bman->visible_avail += bman_res->used_visible_size;
|
||||
mutex_unlock(&bman->lock);
|
||||
|
||||
ttm_resource_fini(man, res);
|
||||
|
@ -143,6 +185,12 @@ static void i915_ttm_buddy_man_debug(struct ttm_resource_manager *man,
|
|||
mutex_lock(&bman->lock);
|
||||
drm_printf(printer, "default_page_size: %lluKiB\n",
|
||||
bman->default_page_size >> 10);
|
||||
drm_printf(printer, "visible_avail: %lluMiB\n",
|
||||
(u64)bman->visible_avail << PAGE_SHIFT >> 20);
|
||||
drm_printf(printer, "visible_size: %lluMiB\n",
|
||||
(u64)bman->visible_size << PAGE_SHIFT >> 20);
|
||||
drm_printf(printer, "visible_reserved: %lluMiB\n",
|
||||
(u64)bman->visible_reserved << PAGE_SHIFT >> 20);
|
||||
|
||||
drm_buddy_print(&bman->mm, printer);
|
||||
|
||||
|
@ -164,6 +212,7 @@ static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = {
|
|||
* @type: Memory type we want to manage
|
||||
* @use_tt: Set use_tt for the manager
|
||||
* @size: The size in bytes to manage
|
||||
* @visible_size: The CPU visible size in bytes to manage
|
||||
* @default_page_size: The default minimum page size in bytes for allocations,
|
||||
* this must be at least as large as @chunk_size, and can be overridden by
|
||||
* setting the BO page_alignment, to be larger or smaller as needed.
|
||||
|
@ -187,7 +236,7 @@ static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = {
|
|||
*/
|
||||
int i915_ttm_buddy_man_init(struct ttm_device *bdev,
|
||||
unsigned int type, bool use_tt,
|
||||
u64 size, u64 default_page_size,
|
||||
u64 size, u64 visible_size, u64 default_page_size,
|
||||
u64 chunk_size)
|
||||
{
|
||||
struct ttm_resource_manager *man;
|
||||
|
@ -206,6 +255,8 @@ int i915_ttm_buddy_man_init(struct ttm_device *bdev,
|
|||
INIT_LIST_HEAD(&bman->reserved);
|
||||
GEM_BUG_ON(default_page_size < chunk_size);
|
||||
bman->default_page_size = default_page_size;
|
||||
bman->visible_size = visible_size >> PAGE_SHIFT;
|
||||
bman->visible_avail = bman->visible_size;
|
||||
|
||||
man = &bman->manager;
|
||||
man->use_tt = use_tt;
|
||||
|
@ -250,6 +301,8 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
|
|||
mutex_lock(&bman->lock);
|
||||
drm_buddy_free_list(mm, &bman->reserved);
|
||||
drm_buddy_fini(mm);
|
||||
bman->visible_avail += bman->visible_reserved;
|
||||
WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
|
||||
mutex_unlock(&bman->lock);
|
||||
|
||||
ttm_resource_manager_cleanup(man);
|
||||
|
@ -273,6 +326,7 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
|
|||
{
|
||||
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
|
||||
struct drm_buddy *mm = &bman->mm;
|
||||
unsigned long fpfn = start >> PAGE_SHIFT;
|
||||
unsigned long flags = 0;
|
||||
int ret;
|
||||
|
||||
|
@ -284,8 +338,37 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
|
|||
size, mm->chunk_size,
|
||||
&bman->reserved,
|
||||
flags);
|
||||
|
||||
if (fpfn < bman->visible_size) {
|
||||
unsigned long lpfn = fpfn + (size >> PAGE_SHIFT);
|
||||
unsigned long visible = min(lpfn, bman->visible_size) - fpfn;
|
||||
|
||||
bman->visible_reserved += visible;
|
||||
bman->visible_avail -= visible;
|
||||
}
|
||||
mutex_unlock(&bman->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_ttm_buddy_man_visible_size - Return the size of the CPU visible portion
|
||||
* in pages.
|
||||
* @man: The buddy allocator ttm manager
|
||||
*/
|
||||
u64 i915_ttm_buddy_man_visible_size(struct ttm_resource_manager *man)
|
||||
{
|
||||
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
|
||||
|
||||
return bman->visible_size;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
void i915_ttm_buddy_man_force_visible_size(struct ttm_resource_manager *man,
|
||||
u64 size)
|
||||
{
|
||||
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
|
||||
|
||||
bman->visible_size = size;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -21,6 +21,8 @@ struct drm_buddy;
|
|||
* @base: struct ttm_resource base class we extend
|
||||
* @blocks: the list of struct i915_buddy_block for this resource/allocation
|
||||
* @flags: DRM_BUDDY_*_ALLOCATION flags
|
||||
* @used_visible_size: How much of this resource, if any, uses the CPU visible
|
||||
* portion, in pages.
|
||||
* @mm: the struct i915_buddy_mm for this resource
|
||||
*
|
||||
* Extends the struct ttm_resource to manage an address space allocation with
|
||||
|
@ -30,6 +32,7 @@ struct i915_ttm_buddy_resource {
|
|||
struct ttm_resource base;
|
||||
struct list_head blocks;
|
||||
unsigned long flags;
|
||||
unsigned long used_visible_size;
|
||||
struct drm_buddy *mm;
|
||||
};
|
||||
|
||||
|
@ -48,11 +51,19 @@ to_ttm_buddy_resource(struct ttm_resource *res)
|
|||
|
||||
int i915_ttm_buddy_man_init(struct ttm_device *bdev,
|
||||
unsigned type, bool use_tt,
|
||||
u64 size, u64 default_page_size, u64 chunk_size);
|
||||
u64 size, u64 visible_size,
|
||||
u64 default_page_size, u64 chunk_size);
|
||||
int i915_ttm_buddy_man_fini(struct ttm_device *bdev,
|
||||
unsigned int type);
|
||||
|
||||
int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
|
||||
u64 start, u64 size);
|
||||
|
||||
u64 i915_ttm_buddy_man_visible_size(struct ttm_resource_manager *man);
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
void i915_ttm_buddy_man_force_visible_size(struct ttm_resource_manager *man,
|
||||
u64 size);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -540,6 +540,9 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
|
|||
void __iomem *ptr;
|
||||
int err;
|
||||
|
||||
if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_GPU_ONLY))
|
||||
return IO_ERR_PTR(-EINVAL);
|
||||
|
||||
if (!i915_gem_object_is_lmem(vma->obj)) {
|
||||
if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
|
||||
err = -ENODEV;
|
||||
|
@ -757,6 +760,14 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
|||
end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
|
||||
GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
|
||||
|
||||
alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, vma->obj));
|
||||
/*
|
||||
* for compact-pt we round up the reservation to prevent
|
||||
* any smaller pages being used within the same PDE
|
||||
*/
|
||||
if (NEEDS_COMPACT_PT(vma->vm->i915))
|
||||
size = round_up(size, alignment);
|
||||
|
||||
/* If binding the object/GGTT view requires more space than the entire
|
||||
* aperture has, reject it early before evicting everything in a vain
|
||||
* attempt to find space.
|
||||
|
@ -769,6 +780,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
|||
}
|
||||
|
||||
color = 0;
|
||||
|
||||
if (i915_vm_has_cache_coloring(vma->vm))
|
||||
color = vma->obj->cache_level;
|
||||
|
||||
|
@ -1609,15 +1621,27 @@ void i915_vma_reopen(struct i915_vma *vma)
|
|||
void i915_vma_release(struct kref *ref)
|
||||
{
|
||||
struct i915_vma *vma = container_of(ref, typeof(*vma), ref);
|
||||
|
||||
i915_vm_put(vma->vm);
|
||||
i915_active_fini(&vma->active);
|
||||
GEM_WARN_ON(vma->resource);
|
||||
i915_vma_free(vma);
|
||||
}
|
||||
|
||||
static void force_unbind(struct i915_vma *vma)
|
||||
{
|
||||
if (!drm_mm_node_allocated(&vma->node))
|
||||
return;
|
||||
|
||||
atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
|
||||
WARN_ON(__i915_vma_unbind(vma));
|
||||
GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
|
||||
}
|
||||
|
||||
static void release_references(struct i915_vma *vma)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = vma->obj;
|
||||
|
||||
if (drm_mm_node_allocated(&vma->node)) {
|
||||
mutex_lock(&vma->vm->mutex);
|
||||
atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
|
||||
WARN_ON(__i915_vma_unbind(vma));
|
||||
mutex_unlock(&vma->vm->mutex);
|
||||
GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
|
||||
}
|
||||
GEM_BUG_ON(i915_vma_is_active(vma));
|
||||
|
||||
spin_lock(&obj->vma.lock);
|
||||
|
@ -1627,11 +1651,49 @@ void i915_vma_release(struct kref *ref)
|
|||
spin_unlock(&obj->vma.lock);
|
||||
|
||||
__i915_vma_remove_closed(vma);
|
||||
i915_vm_put(vma->vm);
|
||||
|
||||
i915_active_fini(&vma->active);
|
||||
GEM_WARN_ON(vma->resource);
|
||||
i915_vma_free(vma);
|
||||
__i915_vma_put(vma);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_vma_destroy_locked - Remove all weak reference to the vma and put
|
||||
* the initial reference.
|
||||
*
|
||||
* This function should be called when it's decided the vma isn't needed
|
||||
* anymore. The caller must assure that it doesn't race with another lookup
|
||||
* plus destroy, typically by taking an appropriate reference.
|
||||
*
|
||||
* Current callsites are
|
||||
* - __i915_gem_object_pages_fini()
|
||||
* - __i915_vm_close() - Blocks the above function by taking a reference on
|
||||
* the object.
|
||||
* - __i915_vma_parked() - Blocks the above functions by taking an open-count on
|
||||
* the vm and a reference on the object.
|
||||
*
|
||||
* Because of locks taken during destruction, a vma is also guaranteed to
|
||||
* stay alive while the following locks are held if it was looked up while
|
||||
* holding one of the locks:
|
||||
* - vm->mutex
|
||||
* - obj->vma.lock
|
||||
* - gt->closed_lock
|
||||
*
|
||||
* A vma user can also temporarily keep the vma alive while holding a vma
|
||||
* reference.
|
||||
*/
|
||||
void i915_vma_destroy_locked(struct i915_vma *vma)
|
||||
{
|
||||
lockdep_assert_held(&vma->vm->mutex);
|
||||
|
||||
force_unbind(vma);
|
||||
release_references(vma);
|
||||
}
|
||||
|
||||
void i915_vma_destroy(struct i915_vma *vma)
|
||||
{
|
||||
mutex_lock(&vma->vm->mutex);
|
||||
force_unbind(vma);
|
||||
mutex_unlock(&vma->vm->mutex);
|
||||
release_references(vma);
|
||||
}
|
||||
|
||||
void i915_vma_parked(struct intel_gt *gt)
|
||||
|
@ -1665,7 +1727,7 @@ void i915_vma_parked(struct intel_gt *gt)
|
|||
|
||||
if (i915_gem_object_trylock(obj, NULL)) {
|
||||
INIT_LIST_HEAD(&vma->closed_link);
|
||||
__i915_vma_put(vma);
|
||||
i915_vma_destroy(vma);
|
||||
i915_gem_object_unlock(obj);
|
||||
} else {
|
||||
/* back you go.. */
|
||||
|
|
|
@ -236,6 +236,9 @@ static inline void __i915_vma_put(struct i915_vma *vma)
|
|||
kref_put(&vma->ref, i915_vma_release);
|
||||
}
|
||||
|
||||
void i915_vma_destroy_locked(struct i915_vma *vma);
|
||||
void i915_vma_destroy(struct i915_vma *vma);
|
||||
|
||||
#define assert_vma_held(vma) dma_resv_assert_held((vma)->obj->base.resv)
|
||||
|
||||
static inline void i915_vma_lock(struct i915_vma *vma)
|
||||
|
|
|
@ -170,6 +170,10 @@ static const u16 subplatform_portf_ids[] = {
|
|||
INTEL_ICL_PORT_F_IDS(0),
|
||||
};
|
||||
|
||||
static const u16 subplatform_uy_ids[] = {
|
||||
INTEL_TGL_12_GT2_IDS(0),
|
||||
};
|
||||
|
||||
static const u16 subplatform_n_ids[] = {
|
||||
INTEL_ADLN_IDS(0),
|
||||
};
|
||||
|
@ -214,6 +218,9 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915)
|
|||
} else if (find_devid(devid, subplatform_portf_ids,
|
||||
ARRAY_SIZE(subplatform_portf_ids))) {
|
||||
mask = BIT(INTEL_SUBPLATFORM_PORTF);
|
||||
} else if (find_devid(devid, subplatform_uy_ids,
|
||||
ARRAY_SIZE(subplatform_uy_ids))) {
|
||||
mask = BIT(INTEL_SUBPLATFORM_UY);
|
||||
} else if (find_devid(devid, subplatform_n_ids,
|
||||
ARRAY_SIZE(subplatform_n_ids))) {
|
||||
mask = BIT(INTEL_SUBPLATFORM_N);
|
||||
|
@ -222,25 +229,6 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915)
|
|||
mask = BIT(INTEL_SUBPLATFORM_RPL_S);
|
||||
}
|
||||
|
||||
if (IS_TIGERLAKE(i915)) {
|
||||
struct pci_dev *root, *pdev = to_pci_dev(i915->drm.dev);
|
||||
|
||||
root = list_first_entry(&pdev->bus->devices, typeof(*root), bus_list);
|
||||
|
||||
drm_WARN_ON(&i915->drm, mask);
|
||||
drm_WARN_ON(&i915->drm, (root->device & TGL_ROOT_DEVICE_MASK) !=
|
||||
TGL_ROOT_DEVICE_ID);
|
||||
|
||||
switch (root->device & TGL_ROOT_DEVICE_SKU_MASK) {
|
||||
case TGL_ROOT_DEVICE_SKU_ULX:
|
||||
mask = BIT(INTEL_SUBPLATFORM_ULX);
|
||||
break;
|
||||
case TGL_ROOT_DEVICE_SKU_ULT:
|
||||
mask = BIT(INTEL_SUBPLATFORM_ULT);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK);
|
||||
|
||||
RUNTIME_INFO(i915)->platform_mask[pi] |= mask;
|
||||
|
|
|
@ -106,6 +106,9 @@ enum intel_platform {
|
|||
/* ICL */
|
||||
#define INTEL_SUBPLATFORM_PORTF (0)
|
||||
|
||||
/* TGL */
|
||||
#define INTEL_SUBPLATFORM_UY (0)
|
||||
|
||||
/* DG2 */
|
||||
#define INTEL_SUBPLATFORM_G10 0
|
||||
#define INTEL_SUBPLATFORM_G11 1
|
||||
|
@ -131,8 +134,10 @@ enum intel_ppgtt_type {
|
|||
/* Keep has_* in alphabetical order */ \
|
||||
func(has_64bit_reloc); \
|
||||
func(has_64k_pages); \
|
||||
func(needs_compact_pt); \
|
||||
func(gpu_reset_clobbers_display); \
|
||||
func(has_reset_engine); \
|
||||
func(has_flat_ccs); \
|
||||
func(has_global_mocs); \
|
||||
func(has_gt_uc); \
|
||||
func(has_guc_deprivilege); \
|
||||
|
|
|
@ -97,10 +97,14 @@ static int iomemtest(struct intel_memory_region *mem,
|
|||
bool test_all,
|
||||
const void *caller)
|
||||
{
|
||||
resource_size_t last = resource_size(&mem->region) - PAGE_SIZE;
|
||||
resource_size_t page;
|
||||
resource_size_t last, page;
|
||||
int err;
|
||||
|
||||
if (mem->io_size < PAGE_SIZE)
|
||||
return 0;
|
||||
|
||||
last = mem->io_size - PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* Quick test to check read/write access to the iomap (backing store).
|
||||
*
|
||||
|
@ -217,6 +221,7 @@ intel_memory_region_create(struct drm_i915_private *i915,
|
|||
resource_size_t size,
|
||||
resource_size_t min_page_size,
|
||||
resource_size_t io_start,
|
||||
resource_size_t io_size,
|
||||
u16 type,
|
||||
u16 instance,
|
||||
const struct intel_memory_region_ops *ops)
|
||||
|
@ -231,6 +236,7 @@ intel_memory_region_create(struct drm_i915_private *i915,
|
|||
mem->i915 = i915;
|
||||
mem->region = (struct resource)DEFINE_RES_MEM(start, size);
|
||||
mem->io_start = io_start;
|
||||
mem->io_size = io_size;
|
||||
mem->min_page_size = min_page_size;
|
||||
mem->ops = ops;
|
||||
mem->total = size;
|
||||
|
|
|
@ -67,10 +67,8 @@ struct intel_memory_region {
|
|||
struct io_mapping iomap;
|
||||
struct resource region;
|
||||
|
||||
/* For fake LMEM */
|
||||
struct drm_mm_node fake_mappable;
|
||||
|
||||
resource_size_t io_start;
|
||||
resource_size_t io_size;
|
||||
resource_size_t min_page_size;
|
||||
resource_size_t total;
|
||||
resource_size_t avail;
|
||||
|
@ -81,8 +79,6 @@ struct intel_memory_region {
|
|||
char name[16];
|
||||
bool private; /* not for userspace */
|
||||
|
||||
dma_addr_t remap_addr;
|
||||
|
||||
struct {
|
||||
struct mutex lock; /* Protects access to objects */
|
||||
struct list_head list;
|
||||
|
@ -103,6 +99,7 @@ intel_memory_region_create(struct drm_i915_private *i915,
|
|||
resource_size_t size,
|
||||
resource_size_t min_page_size,
|
||||
resource_size_t io_start,
|
||||
resource_size_t io_size,
|
||||
u16 type,
|
||||
u16 instance,
|
||||
const struct intel_memory_region_ops *ops);
|
||||
|
|
|
@ -87,6 +87,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
|
|||
|
||||
ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
|
||||
resource_size(&mem->region),
|
||||
mem->io_size,
|
||||
mem->min_page_size, PAGE_SIZE);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -199,12 +200,25 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
|
|||
struct ttm_resource *res;
|
||||
int ret;
|
||||
|
||||
if (flags & I915_BO_ALLOC_CONTIGUOUS)
|
||||
place.flags |= TTM_PL_FLAG_CONTIGUOUS;
|
||||
if (mem->io_size && mem->io_size < mem->total) {
|
||||
if (flags & I915_BO_ALLOC_GPU_ONLY) {
|
||||
place.flags |= TTM_PL_FLAG_TOPDOWN;
|
||||
} else {
|
||||
place.fpfn = 0;
|
||||
place.lpfn = mem->io_size >> PAGE_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
mock_bo.base.size = size;
|
||||
place.flags = flags;
|
||||
mock_bo.bdev = &mem->i915->bdev;
|
||||
|
||||
ret = man->func->alloc(man, &mock_bo, &place, &res);
|
||||
if (ret == -ENOSPC)
|
||||
ret = -ENXIO;
|
||||
if (!ret)
|
||||
res->bo = NULL; /* Rather blow up, then some uaf */
|
||||
return ret ? ERR_PTR(ret) : res;
|
||||
}
|
||||
|
||||
|
@ -219,6 +233,11 @@ void intel_region_ttm_resource_free(struct intel_memory_region *mem,
|
|||
struct ttm_resource *res)
|
||||
{
|
||||
struct ttm_resource_manager *man = mem->region_private;
|
||||
struct ttm_buffer_object mock_bo = {};
|
||||
|
||||
mock_bo.base.size = res->num_pages << PAGE_SHIFT;
|
||||
mock_bo.bdev = &mem->i915->bdev;
|
||||
res->bo = &mock_bo;
|
||||
|
||||
man->func->free(man, res);
|
||||
}
|
||||
|
|
|
@ -165,7 +165,7 @@ void intel_step_init(struct drm_i915_private *i915)
|
|||
} else if (IS_ROCKETLAKE(i915)) {
|
||||
revids = rkl_revids;
|
||||
size = ARRAY_SIZE(rkl_revids);
|
||||
} else if (IS_TGL_U(i915) || IS_TGL_Y(i915)) {
|
||||
} else if (IS_TGL_UY(i915)) {
|
||||
revids = tgl_uy_revids;
|
||||
size = ARRAY_SIZE(tgl_uy_revids);
|
||||
} else if (IS_TIGERLAKE(i915)) {
|
||||
|
|
|
@ -27,9 +27,11 @@
|
|||
|
||||
#include "gem/i915_gem_context.h"
|
||||
#include "gem/i915_gem_internal.h"
|
||||
#include "gem/i915_gem_region.h"
|
||||
#include "gem/selftests/mock_context.h"
|
||||
#include "gt/intel_context.h"
|
||||
#include "gt/intel_gpu_commands.h"
|
||||
#include "gt/intel_gtt.h"
|
||||
|
||||
#include "i915_random.h"
|
||||
#include "i915_selftest.h"
|
||||
|
@ -239,6 +241,8 @@ static int lowlevel_hole(struct i915_address_space *vm,
|
|||
u64 hole_start, u64 hole_end,
|
||||
unsigned long end_time)
|
||||
{
|
||||
const unsigned int min_alignment =
|
||||
i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
||||
I915_RND_STATE(seed_prng);
|
||||
struct i915_vma_resource *mock_vma_res;
|
||||
unsigned int size;
|
||||
|
@ -252,9 +256,10 @@ static int lowlevel_hole(struct i915_address_space *vm,
|
|||
I915_RND_SUBSTATE(prng, seed_prng);
|
||||
struct drm_i915_gem_object *obj;
|
||||
unsigned int *order, count, n;
|
||||
u64 hole_size;
|
||||
u64 hole_size, aligned_size;
|
||||
|
||||
hole_size = (hole_end - hole_start) >> size;
|
||||
aligned_size = max_t(u32, ilog2(min_alignment), size);
|
||||
hole_size = (hole_end - hole_start) >> aligned_size;
|
||||
if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32))
|
||||
hole_size = KMALLOC_MAX_SIZE / sizeof(u32);
|
||||
count = hole_size >> 1;
|
||||
|
@ -275,8 +280,8 @@ static int lowlevel_hole(struct i915_address_space *vm,
|
|||
}
|
||||
GEM_BUG_ON(!order);
|
||||
|
||||
GEM_BUG_ON(count * BIT_ULL(size) > vm->total);
|
||||
GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end);
|
||||
GEM_BUG_ON(count * BIT_ULL(aligned_size) > vm->total);
|
||||
GEM_BUG_ON(hole_start + count * BIT_ULL(aligned_size) > hole_end);
|
||||
|
||||
/* Ignore allocation failures (i.e. don't report them as
|
||||
* a test failure) as we are purposefully allocating very
|
||||
|
@ -299,10 +304,10 @@ static int lowlevel_hole(struct i915_address_space *vm,
|
|||
}
|
||||
|
||||
for (n = 0; n < count; n++) {
|
||||
u64 addr = hole_start + order[n] * BIT_ULL(size);
|
||||
u64 addr = hole_start + order[n] * BIT_ULL(aligned_size);
|
||||
intel_wakeref_t wakeref;
|
||||
|
||||
GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
|
||||
GEM_BUG_ON(addr + BIT_ULL(aligned_size) > vm->total);
|
||||
|
||||
if (igt_timeout(end_time,
|
||||
"%s timed out before %d/%d\n",
|
||||
|
@ -345,7 +350,7 @@ alloc_vm_end:
|
|||
}
|
||||
|
||||
mock_vma_res->bi.pages = obj->mm.pages;
|
||||
mock_vma_res->node_size = BIT_ULL(size);
|
||||
mock_vma_res->node_size = BIT_ULL(aligned_size);
|
||||
mock_vma_res->start = addr;
|
||||
|
||||
with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
|
||||
|
@ -356,7 +361,7 @@ alloc_vm_end:
|
|||
|
||||
i915_random_reorder(order, count, &prng);
|
||||
for (n = 0; n < count; n++) {
|
||||
u64 addr = hole_start + order[n] * BIT_ULL(size);
|
||||
u64 addr = hole_start + order[n] * BIT_ULL(aligned_size);
|
||||
intel_wakeref_t wakeref;
|
||||
|
||||
GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
|
||||
|
@ -400,8 +405,10 @@ static int fill_hole(struct i915_address_space *vm,
|
|||
{
|
||||
const u64 hole_size = hole_end - hole_start;
|
||||
struct drm_i915_gem_object *obj;
|
||||
const unsigned int min_alignment =
|
||||
i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
||||
const unsigned long max_pages =
|
||||
min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT);
|
||||
min_t(u64, ULONG_MAX - 1, (hole_size / 2) >> ilog2(min_alignment));
|
||||
const unsigned long max_step = max(int_sqrt(max_pages), 2UL);
|
||||
unsigned long npages, prime, flags;
|
||||
struct i915_vma *vma;
|
||||
|
@ -442,14 +449,17 @@ static int fill_hole(struct i915_address_space *vm,
|
|||
|
||||
offset = p->offset;
|
||||
list_for_each_entry(obj, &objects, st_link) {
|
||||
u64 aligned_size = round_up(obj->base.size,
|
||||
min_alignment);
|
||||
|
||||
vma = i915_vma_instance(obj, vm, NULL);
|
||||
if (IS_ERR(vma))
|
||||
continue;
|
||||
|
||||
if (p->step < 0) {
|
||||
if (offset < hole_start + obj->base.size)
|
||||
if (offset < hole_start + aligned_size)
|
||||
break;
|
||||
offset -= obj->base.size;
|
||||
offset -= aligned_size;
|
||||
}
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, offset | flags);
|
||||
|
@ -471,22 +481,25 @@ static int fill_hole(struct i915_address_space *vm,
|
|||
i915_vma_unpin(vma);
|
||||
|
||||
if (p->step > 0) {
|
||||
if (offset + obj->base.size > hole_end)
|
||||
if (offset + aligned_size > hole_end)
|
||||
break;
|
||||
offset += obj->base.size;
|
||||
offset += aligned_size;
|
||||
}
|
||||
}
|
||||
|
||||
offset = p->offset;
|
||||
list_for_each_entry(obj, &objects, st_link) {
|
||||
u64 aligned_size = round_up(obj->base.size,
|
||||
min_alignment);
|
||||
|
||||
vma = i915_vma_instance(obj, vm, NULL);
|
||||
if (IS_ERR(vma))
|
||||
continue;
|
||||
|
||||
if (p->step < 0) {
|
||||
if (offset < hole_start + obj->base.size)
|
||||
if (offset < hole_start + aligned_size)
|
||||
break;
|
||||
offset -= obj->base.size;
|
||||
offset -= aligned_size;
|
||||
}
|
||||
|
||||
if (!drm_mm_node_allocated(&vma->node) ||
|
||||
|
@ -507,22 +520,25 @@ static int fill_hole(struct i915_address_space *vm,
|
|||
}
|
||||
|
||||
if (p->step > 0) {
|
||||
if (offset + obj->base.size > hole_end)
|
||||
if (offset + aligned_size > hole_end)
|
||||
break;
|
||||
offset += obj->base.size;
|
||||
offset += aligned_size;
|
||||
}
|
||||
}
|
||||
|
||||
offset = p->offset;
|
||||
list_for_each_entry_reverse(obj, &objects, st_link) {
|
||||
u64 aligned_size = round_up(obj->base.size,
|
||||
min_alignment);
|
||||
|
||||
vma = i915_vma_instance(obj, vm, NULL);
|
||||
if (IS_ERR(vma))
|
||||
continue;
|
||||
|
||||
if (p->step < 0) {
|
||||
if (offset < hole_start + obj->base.size)
|
||||
if (offset < hole_start + aligned_size)
|
||||
break;
|
||||
offset -= obj->base.size;
|
||||
offset -= aligned_size;
|
||||
}
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, offset | flags);
|
||||
|
@ -544,22 +560,25 @@ static int fill_hole(struct i915_address_space *vm,
|
|||
i915_vma_unpin(vma);
|
||||
|
||||
if (p->step > 0) {
|
||||
if (offset + obj->base.size > hole_end)
|
||||
if (offset + aligned_size > hole_end)
|
||||
break;
|
||||
offset += obj->base.size;
|
||||
offset += aligned_size;
|
||||
}
|
||||
}
|
||||
|
||||
offset = p->offset;
|
||||
list_for_each_entry_reverse(obj, &objects, st_link) {
|
||||
u64 aligned_size = round_up(obj->base.size,
|
||||
min_alignment);
|
||||
|
||||
vma = i915_vma_instance(obj, vm, NULL);
|
||||
if (IS_ERR(vma))
|
||||
continue;
|
||||
|
||||
if (p->step < 0) {
|
||||
if (offset < hole_start + obj->base.size)
|
||||
if (offset < hole_start + aligned_size)
|
||||
break;
|
||||
offset -= obj->base.size;
|
||||
offset -= aligned_size;
|
||||
}
|
||||
|
||||
if (!drm_mm_node_allocated(&vma->node) ||
|
||||
|
@ -580,9 +599,9 @@ static int fill_hole(struct i915_address_space *vm,
|
|||
}
|
||||
|
||||
if (p->step > 0) {
|
||||
if (offset + obj->base.size > hole_end)
|
||||
if (offset + aligned_size > hole_end)
|
||||
break;
|
||||
offset += obj->base.size;
|
||||
offset += aligned_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -612,6 +631,7 @@ static int walk_hole(struct i915_address_space *vm,
|
|||
const u64 hole_size = hole_end - hole_start;
|
||||
const unsigned long max_pages =
|
||||
min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT);
|
||||
unsigned long min_alignment;
|
||||
unsigned long flags;
|
||||
u64 size;
|
||||
|
||||
|
@ -621,6 +641,8 @@ static int walk_hole(struct i915_address_space *vm,
|
|||
if (i915_is_ggtt(vm))
|
||||
flags |= PIN_GLOBAL;
|
||||
|
||||
min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
||||
|
||||
for_each_prime_number_from(size, 1, max_pages) {
|
||||
struct drm_i915_gem_object *obj;
|
||||
struct i915_vma *vma;
|
||||
|
@ -639,7 +661,7 @@ static int walk_hole(struct i915_address_space *vm,
|
|||
|
||||
for (addr = hole_start;
|
||||
addr + obj->base.size < hole_end;
|
||||
addr += obj->base.size) {
|
||||
addr += round_up(obj->base.size, min_alignment)) {
|
||||
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
||||
if (err) {
|
||||
pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n",
|
||||
|
@ -691,6 +713,7 @@ static int pot_hole(struct i915_address_space *vm,
|
|||
{
|
||||
struct drm_i915_gem_object *obj;
|
||||
struct i915_vma *vma;
|
||||
unsigned int min_alignment;
|
||||
unsigned long flags;
|
||||
unsigned int pot;
|
||||
int err = 0;
|
||||
|
@ -699,6 +722,8 @@ static int pot_hole(struct i915_address_space *vm,
|
|||
if (i915_is_ggtt(vm))
|
||||
flags |= PIN_GLOBAL;
|
||||
|
||||
min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
||||
|
||||
obj = i915_gem_object_create_internal(vm->i915, 2 * I915_GTT_PAGE_SIZE);
|
||||
if (IS_ERR(obj))
|
||||
return PTR_ERR(obj);
|
||||
|
@ -711,13 +736,13 @@ static int pot_hole(struct i915_address_space *vm,
|
|||
|
||||
/* Insert a pair of pages across every pot boundary within the hole */
|
||||
for (pot = fls64(hole_end - 1) - 1;
|
||||
pot > ilog2(2 * I915_GTT_PAGE_SIZE);
|
||||
pot > ilog2(2 * min_alignment);
|
||||
pot--) {
|
||||
u64 step = BIT_ULL(pot);
|
||||
u64 addr;
|
||||
|
||||
for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE;
|
||||
addr <= round_down(hole_end - 2*I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE;
|
||||
for (addr = round_up(hole_start + min_alignment, step) - min_alignment;
|
||||
addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment;
|
||||
addr += step) {
|
||||
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
||||
if (err) {
|
||||
|
@ -762,6 +787,7 @@ static int drunk_hole(struct i915_address_space *vm,
|
|||
unsigned long end_time)
|
||||
{
|
||||
I915_RND_STATE(prng);
|
||||
unsigned int min_alignment;
|
||||
unsigned int size;
|
||||
unsigned long flags;
|
||||
|
||||
|
@ -769,15 +795,18 @@ static int drunk_hole(struct i915_address_space *vm,
|
|||
if (i915_is_ggtt(vm))
|
||||
flags |= PIN_GLOBAL;
|
||||
|
||||
min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
||||
|
||||
/* Keep creating larger objects until one cannot fit into the hole */
|
||||
for (size = 12; (hole_end - hole_start) >> size; size++) {
|
||||
struct drm_i915_gem_object *obj;
|
||||
unsigned int *order, count, n;
|
||||
struct i915_vma *vma;
|
||||
u64 hole_size;
|
||||
u64 hole_size, aligned_size;
|
||||
int err = -ENODEV;
|
||||
|
||||
hole_size = (hole_end - hole_start) >> size;
|
||||
aligned_size = max_t(u32, ilog2(min_alignment), size);
|
||||
hole_size = (hole_end - hole_start) >> aligned_size;
|
||||
if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32))
|
||||
hole_size = KMALLOC_MAX_SIZE / sizeof(u32);
|
||||
count = hole_size >> 1;
|
||||
|
@ -817,7 +846,7 @@ static int drunk_hole(struct i915_address_space *vm,
|
|||
GEM_BUG_ON(vma->size != BIT_ULL(size));
|
||||
|
||||
for (n = 0; n < count; n++) {
|
||||
u64 addr = hole_start + order[n] * BIT_ULL(size);
|
||||
u64 addr = hole_start + order[n] * BIT_ULL(aligned_size);
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
||||
if (err) {
|
||||
|
@ -869,11 +898,14 @@ static int __shrink_hole(struct i915_address_space *vm,
|
|||
{
|
||||
struct drm_i915_gem_object *obj;
|
||||
unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
|
||||
unsigned int min_alignment;
|
||||
unsigned int order = 12;
|
||||
LIST_HEAD(objects);
|
||||
int err = 0;
|
||||
u64 addr;
|
||||
|
||||
min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
||||
|
||||
/* Keep creating larger objects until one cannot fit into the hole */
|
||||
for (addr = hole_start; addr < hole_end; ) {
|
||||
struct i915_vma *vma;
|
||||
|
@ -914,7 +946,7 @@ static int __shrink_hole(struct i915_address_space *vm,
|
|||
}
|
||||
|
||||
i915_vma_unpin(vma);
|
||||
addr += size;
|
||||
addr += round_up(size, min_alignment);
|
||||
|
||||
/*
|
||||
* Since we are injecting allocation faults at random intervals,
|
||||
|
@ -1038,6 +1070,118 @@ err_purge:
|
|||
return err;
|
||||
}
|
||||
|
||||
static int misaligned_case(struct i915_address_space *vm, struct intel_memory_region *mr,
|
||||
u64 addr, u64 size, unsigned long flags)
|
||||
{
|
||||
struct drm_i915_gem_object *obj;
|
||||
struct i915_vma *vma;
|
||||
int err = 0;
|
||||
u64 expected_vma_size, expected_node_size;
|
||||
bool is_stolen = mr->type == INTEL_MEMORY_STOLEN_SYSTEM ||
|
||||
mr->type == INTEL_MEMORY_STOLEN_LOCAL;
|
||||
|
||||
obj = i915_gem_object_create_region(mr, size, 0, 0);
|
||||
if (IS_ERR(obj)) {
|
||||
/* if iGVT-g or DMAR is active, stolen mem will be uninitialized */
|
||||
if (PTR_ERR(obj) == -ENODEV && is_stolen)
|
||||
return 0;
|
||||
return PTR_ERR(obj);
|
||||
}
|
||||
|
||||
vma = i915_vma_instance(obj, vm, NULL);
|
||||
if (IS_ERR(vma)) {
|
||||
err = PTR_ERR(vma);
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
||||
if (err)
|
||||
goto err_put;
|
||||
i915_vma_unpin(vma);
|
||||
|
||||
if (!drm_mm_node_allocated(&vma->node)) {
|
||||
err = -EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
if (i915_vma_misplaced(vma, 0, 0, addr | flags)) {
|
||||
err = -EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
expected_vma_size = round_up(size, 1 << (ffs(vma->resource->page_sizes_gtt) - 1));
|
||||
expected_node_size = expected_vma_size;
|
||||
|
||||
if (NEEDS_COMPACT_PT(vm->i915) && i915_gem_object_is_lmem(obj)) {
|
||||
/* compact-pt should expand lmem node to 2MB */
|
||||
expected_vma_size = round_up(size, I915_GTT_PAGE_SIZE_64K);
|
||||
expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_2M);
|
||||
}
|
||||
|
||||
if (vma->size != expected_vma_size || vma->node.size != expected_node_size) {
|
||||
err = i915_vma_unbind_unlocked(vma);
|
||||
err = -EBADSLT;
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
err = i915_vma_unbind_unlocked(vma);
|
||||
if (err)
|
||||
goto err_put;
|
||||
|
||||
GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
|
||||
|
||||
err_put:
|
||||
i915_gem_object_put(obj);
|
||||
cleanup_freed_objects(vm->i915);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int misaligned_pin(struct i915_address_space *vm,
|
||||
u64 hole_start, u64 hole_end,
|
||||
unsigned long end_time)
|
||||
{
|
||||
struct intel_memory_region *mr;
|
||||
enum intel_region_id id;
|
||||
unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
|
||||
int err = 0;
|
||||
u64 hole_size = hole_end - hole_start;
|
||||
|
||||
if (i915_is_ggtt(vm))
|
||||
flags |= PIN_GLOBAL;
|
||||
|
||||
for_each_memory_region(mr, vm->i915, id) {
|
||||
u64 min_alignment = i915_vm_min_alignment(vm, (enum intel_memory_type)id);
|
||||
u64 size = min_alignment;
|
||||
u64 addr = round_down(hole_start + (hole_size / 2), min_alignment);
|
||||
|
||||
/* avoid -ENOSPC on very small hole setups */
|
||||
if (hole_size < 3 * min_alignment)
|
||||
continue;
|
||||
|
||||
/* we can't test < 4k alignment due to flags being encoded in lower bits */
|
||||
if (min_alignment != I915_GTT_PAGE_SIZE_4K) {
|
||||
err = misaligned_case(vm, mr, addr + (min_alignment / 2), size, flags);
|
||||
/* misaligned should error with -EINVAL*/
|
||||
if (!err)
|
||||
err = -EBADSLT;
|
||||
if (err != -EINVAL)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* test for vma->size expansion to min page size */
|
||||
err = misaligned_case(vm, mr, addr, PAGE_SIZE, flags);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* test for intermediate size not expanding vma->size for large alignments */
|
||||
err = misaligned_case(vm, mr, addr, size / 2, flags);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int exercise_ppgtt(struct drm_i915_private *dev_priv,
|
||||
int (*func)(struct i915_address_space *vm,
|
||||
u64 hole_start, u64 hole_end,
|
||||
|
@ -1107,6 +1251,11 @@ static int igt_ppgtt_shrink_boom(void *arg)
|
|||
return exercise_ppgtt(arg, shrink_boom);
|
||||
}
|
||||
|
||||
static int igt_ppgtt_misaligned_pin(void *arg)
|
||||
{
|
||||
return exercise_ppgtt(arg, misaligned_pin);
|
||||
}
|
||||
|
||||
static int sort_holes(void *priv, const struct list_head *A,
|
||||
const struct list_head *B)
|
||||
{
|
||||
|
@ -1179,6 +1328,11 @@ static int igt_ggtt_lowlevel(void *arg)
|
|||
return exercise_ggtt(arg, lowlevel_hole);
|
||||
}
|
||||
|
||||
static int igt_ggtt_misaligned_pin(void *arg)
|
||||
{
|
||||
return exercise_ggtt(arg, misaligned_pin);
|
||||
}
|
||||
|
||||
static int igt_ggtt_page(void *arg)
|
||||
{
|
||||
const unsigned int count = PAGE_SIZE/sizeof(u32);
|
||||
|
@ -2151,12 +2305,14 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
|
|||
SUBTEST(igt_ppgtt_fill),
|
||||
SUBTEST(igt_ppgtt_shrink),
|
||||
SUBTEST(igt_ppgtt_shrink_boom),
|
||||
SUBTEST(igt_ppgtt_misaligned_pin),
|
||||
SUBTEST(igt_ggtt_lowlevel),
|
||||
SUBTEST(igt_ggtt_drunk),
|
||||
SUBTEST(igt_ggtt_walk),
|
||||
SUBTEST(igt_ggtt_pot),
|
||||
SUBTEST(igt_ggtt_fill),
|
||||
SUBTEST(igt_ggtt_page),
|
||||
SUBTEST(igt_ggtt_misaligned_pin),
|
||||
SUBTEST(igt_cs_tlb),
|
||||
};
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "gem/i915_gem_context.h"
|
||||
#include "gem/i915_gem_lmem.h"
|
||||
#include "gem/i915_gem_region.h"
|
||||
#include "gem/i915_gem_ttm.h"
|
||||
#include "gem/selftests/igt_gem_utils.h"
|
||||
#include "gem/selftests/mock_context.h"
|
||||
#include "gt/intel_engine_pm.h"
|
||||
|
@ -170,7 +171,7 @@ static int igt_mock_reserve(void *arg)
|
|||
if (!order)
|
||||
return 0;
|
||||
|
||||
mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
|
||||
mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
|
||||
if (IS_ERR(mem)) {
|
||||
pr_err("failed to create memory region\n");
|
||||
err = PTR_ERR(mem);
|
||||
|
@ -383,7 +384,7 @@ static int igt_mock_splintered_region(void *arg)
|
|||
*/
|
||||
|
||||
size = (SZ_4G - 1) & PAGE_MASK;
|
||||
mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0);
|
||||
mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
|
||||
if (IS_ERR(mem))
|
||||
return PTR_ERR(mem);
|
||||
|
||||
|
@ -471,7 +472,7 @@ static int igt_mock_max_segment(void *arg)
|
|||
*/
|
||||
|
||||
size = SZ_8G;
|
||||
mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0);
|
||||
mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
|
||||
if (IS_ERR(mem))
|
||||
return PTR_ERR(mem);
|
||||
|
||||
|
@ -512,6 +513,147 @@ out_put:
|
|||
return err;
|
||||
}
|
||||
|
||||
static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct intel_memory_region *mr = obj->mm.region;
|
||||
struct i915_ttm_buddy_resource *bman_res =
|
||||
to_ttm_buddy_resource(obj->mm.res);
|
||||
struct drm_buddy *mm = bman_res->mm;
|
||||
struct drm_buddy_block *block;
|
||||
u64 total;
|
||||
|
||||
total = 0;
|
||||
list_for_each_entry(block, &bman_res->blocks, link) {
|
||||
u64 start = drm_buddy_block_offset(block);
|
||||
u64 end = start + drm_buddy_block_size(mm, block);
|
||||
|
||||
if (start < mr->io_size)
|
||||
total += min_t(u64, end, mr->io_size) - start;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static int igt_mock_io_size(void *arg)
|
||||
{
|
||||
struct intel_memory_region *mr = arg;
|
||||
struct drm_i915_private *i915 = mr->i915;
|
||||
struct drm_i915_gem_object *obj;
|
||||
u64 mappable_theft_total;
|
||||
u64 io_size;
|
||||
u64 total;
|
||||
u64 ps;
|
||||
u64 rem;
|
||||
u64 size;
|
||||
I915_RND_STATE(prng);
|
||||
LIST_HEAD(objects);
|
||||
int err = 0;
|
||||
|
||||
ps = SZ_4K;
|
||||
if (i915_prandom_u64_state(&prng) & 1)
|
||||
ps = SZ_64K; /* For something like DG2 */
|
||||
|
||||
div64_u64_rem(i915_prandom_u64_state(&prng), SZ_8G, &total);
|
||||
total = round_down(total, ps);
|
||||
total = max_t(u64, total, SZ_1G);
|
||||
|
||||
div64_u64_rem(i915_prandom_u64_state(&prng), total - ps, &io_size);
|
||||
io_size = round_down(io_size, ps);
|
||||
io_size = max_t(u64, io_size, SZ_256M); /* 256M seems to be the common lower limit */
|
||||
|
||||
pr_info("%s with ps=%llx, io_size=%llx, total=%llx\n",
|
||||
__func__, ps, io_size, total);
|
||||
|
||||
mr = mock_region_create(i915, 0, total, ps, 0, io_size);
|
||||
if (IS_ERR(mr)) {
|
||||
err = PTR_ERR(mr);
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
mappable_theft_total = 0;
|
||||
rem = total - io_size;
|
||||
do {
|
||||
div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
|
||||
size = round_down(size, ps);
|
||||
size = max(size, ps);
|
||||
|
||||
obj = igt_object_create(mr, &objects, size,
|
||||
I915_BO_ALLOC_GPU_ONLY);
|
||||
if (IS_ERR(obj)) {
|
||||
pr_err("%s TOPDOWN failed with rem=%llx, size=%llx\n",
|
||||
__func__, rem, size);
|
||||
err = PTR_ERR(obj);
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
mappable_theft_total += igt_object_mappable_total(obj);
|
||||
rem -= size;
|
||||
} while (rem);
|
||||
|
||||
pr_info("%s mappable theft=(%lluMiB/%lluMiB), total=%lluMiB\n",
|
||||
__func__,
|
||||
(u64)mappable_theft_total >> 20,
|
||||
(u64)io_size >> 20,
|
||||
(u64)total >> 20);
|
||||
|
||||
/*
|
||||
* Even if we allocate all of the non-mappable portion, we should still
|
||||
* be able to dip into the mappable portion.
|
||||
*/
|
||||
obj = igt_object_create(mr, &objects, io_size,
|
||||
I915_BO_ALLOC_GPU_ONLY);
|
||||
if (IS_ERR(obj)) {
|
||||
pr_err("%s allocation unexpectedly failed\n", __func__);
|
||||
err = PTR_ERR(obj);
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
close_objects(mr, &objects);
|
||||
|
||||
rem = io_size;
|
||||
do {
|
||||
div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
|
||||
size = round_down(size, ps);
|
||||
size = max(size, ps);
|
||||
|
||||
obj = igt_object_create(mr, &objects, size, 0);
|
||||
if (IS_ERR(obj)) {
|
||||
pr_err("%s MAPPABLE failed with rem=%llx, size=%llx\n",
|
||||
__func__, rem, size);
|
||||
err = PTR_ERR(obj);
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
if (igt_object_mappable_total(obj) != size) {
|
||||
pr_err("%s allocation is not mappable(size=%llx)\n",
|
||||
__func__, size);
|
||||
err = -EINVAL;
|
||||
goto out_close;
|
||||
}
|
||||
rem -= size;
|
||||
} while (rem);
|
||||
|
||||
/*
|
||||
* We assume CPU access is required by default, which should result in a
|
||||
* failure here, even though the non-mappable portion is free.
|
||||
*/
|
||||
obj = igt_object_create(mr, &objects, ps, 0);
|
||||
if (!IS_ERR(obj)) {
|
||||
pr_err("%s allocation unexpectedly succeeded\n", __func__);
|
||||
err = -EINVAL;
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
out_close:
|
||||
close_objects(mr, &objects);
|
||||
intel_memory_region_destroy(mr);
|
||||
out_err:
|
||||
if (err == -ENOMEM)
|
||||
err = 0;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_gpu_write_dw(struct intel_context *ce,
|
||||
struct i915_vma *vma,
|
||||
u32 dword,
|
||||
|
@ -680,8 +822,14 @@ static int igt_lmem_create_with_ps(void *arg)
|
|||
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
err = i915_gem_object_pin_pages(obj);
|
||||
if (err)
|
||||
if (err) {
|
||||
if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
|
||||
pr_info("%s not enough lmem for ps(%u) err=%d\n",
|
||||
__func__, ps, err);
|
||||
err = 0;
|
||||
}
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
daddr = i915_gem_object_get_dma_address(obj, 0);
|
||||
if (!IS_ALIGNED(daddr, ps)) {
|
||||
|
@ -1179,6 +1327,7 @@ int intel_memory_region_mock_selftests(void)
|
|||
SUBTEST(igt_mock_contiguous),
|
||||
SUBTEST(igt_mock_splintered_region),
|
||||
SUBTEST(igt_mock_max_segment),
|
||||
SUBTEST(igt_mock_io_size),
|
||||
};
|
||||
struct intel_memory_region *mem;
|
||||
struct drm_i915_private *i915;
|
||||
|
@ -1188,7 +1337,7 @@ int intel_memory_region_mock_selftests(void)
|
|||
if (!i915)
|
||||
return -ENOMEM;
|
||||
|
||||
mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
|
||||
mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
|
||||
if (IS_ERR(mem)) {
|
||||
pr_err("failed to create memory region\n");
|
||||
err = PTR_ERR(mem);
|
||||
|
|
|
@ -22,17 +22,12 @@ static void mock_region_put_pages(struct drm_i915_gem_object *obj,
|
|||
|
||||
static int mock_region_get_pages(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
unsigned int flags;
|
||||
struct sg_table *pages;
|
||||
int err;
|
||||
|
||||
flags = 0;
|
||||
if (obj->flags & I915_BO_ALLOC_CONTIGUOUS)
|
||||
flags |= TTM_PL_FLAG_CONTIGUOUS;
|
||||
|
||||
obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region,
|
||||
obj->base.size,
|
||||
flags);
|
||||
obj->flags);
|
||||
if (IS_ERR(obj->mm.res))
|
||||
return PTR_ERR(obj->mm.res);
|
||||
|
||||
|
@ -107,7 +102,8 @@ mock_region_create(struct drm_i915_private *i915,
|
|||
resource_size_t start,
|
||||
resource_size_t size,
|
||||
resource_size_t min_page_size,
|
||||
resource_size_t io_start)
|
||||
resource_size_t io_start,
|
||||
resource_size_t io_size)
|
||||
{
|
||||
int instance = ida_alloc_max(&i915->selftest.mock_region_instances,
|
||||
TTM_NUM_MEM_TYPES - TTM_PL_PRIV - 1,
|
||||
|
@ -117,6 +113,7 @@ mock_region_create(struct drm_i915_private *i915,
|
|||
return ERR_PTR(instance);
|
||||
|
||||
return intel_memory_region_create(i915, start, size, min_page_size,
|
||||
io_start, INTEL_MEMORY_MOCK, instance,
|
||||
io_start, io_size,
|
||||
INTEL_MEMORY_MOCK, instance,
|
||||
&mock_region_ops);
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ mock_region_create(struct drm_i915_private *i915,
|
|||
resource_size_t start,
|
||||
resource_size_t size,
|
||||
resource_size_t min_page_size,
|
||||
resource_size_t io_start);
|
||||
resource_size_t io_start,
|
||||
resource_size_t io_size);
|
||||
|
||||
#endif /* !__MOCK_REGION_H */
|
||||
|
|
|
@ -1118,10 +1118,16 @@ struct drm_i915_gem_exec_object2 {
|
|||
/**
|
||||
* When the EXEC_OBJECT_PINNED flag is specified this is populated by
|
||||
* the user with the GTT offset at which this object will be pinned.
|
||||
*
|
||||
* When the I915_EXEC_NO_RELOC flag is specified this must contain the
|
||||
* presumed_offset of the object.
|
||||
*
|
||||
* During execbuffer2 the kernel populates it with the value of the
|
||||
* current GTT offset of the object, for future presumed_offset writes.
|
||||
*
|
||||
* See struct drm_i915_gem_create_ext for the rules when dealing with
|
||||
* alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with
|
||||
* minimum page sizes, like DG2.
|
||||
*/
|
||||
__u64 offset;
|
||||
|
||||
|
@ -3144,11 +3150,40 @@ struct drm_i915_gem_create_ext {
|
|||
*
|
||||
* The (page-aligned) allocated size for the object will be returned.
|
||||
*
|
||||
* Note that for some devices we have might have further minimum
|
||||
* page-size restrictions(larger than 4K), like for device local-memory.
|
||||
* However in general the final size here should always reflect any
|
||||
* rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS
|
||||
* extension to place the object in device local-memory.
|
||||
*
|
||||
* DG2 64K min page size implications:
|
||||
*
|
||||
* On discrete platforms, starting from DG2, we have to contend with GTT
|
||||
* page size restrictions when dealing with I915_MEMORY_CLASS_DEVICE
|
||||
* objects. Specifically the hardware only supports 64K or larger GTT
|
||||
* page sizes for such memory. The kernel will already ensure that all
|
||||
* I915_MEMORY_CLASS_DEVICE memory is allocated using 64K or larger page
|
||||
* sizes underneath.
|
||||
*
|
||||
* Note that the returned size here will always reflect any required
|
||||
* rounding up done by the kernel, i.e 4K will now become 64K on devices
|
||||
* such as DG2.
|
||||
*
|
||||
* Special DG2 GTT address alignment requirement:
|
||||
*
|
||||
* The GTT alignment will also need to be at least 2M for such objects.
|
||||
*
|
||||
* Note that due to how the hardware implements 64K GTT page support, we
|
||||
* have some further complications:
|
||||
*
|
||||
* 1) The entire PDE (which covers a 2MB virtual address range), must
|
||||
* contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same
|
||||
* PDE is forbidden by the hardware.
|
||||
*
|
||||
* 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM
|
||||
* objects.
|
||||
*
|
||||
* To keep things simple for userland, we mandate that any GTT mappings
|
||||
* must be aligned to and rounded up to 2MB. The kernel will internally
|
||||
* pad them out to the next 2MB boundary. As this only wastes virtual
|
||||
* address space and avoids userland having to copy any needlessly
|
||||
* complicated PDE sharing scheme (coloring) and only affects DG2, this
|
||||
* is deemed to be a good compromise.
|
||||
*/
|
||||
__u64 size;
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue