Merge tag 'drm-intel-gt-next-2022-05-05' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes: - Add kerneldoc for engine class enum (Matt Roper) - Add compute engine ABI (Matt Roper) Driver Changes: - Define GuC firmware version for DG2 (John Harrison) - Clear SET_PREDICATE_RESULT prior to executing the ring (Chris Wilson) - Fix race in __i915_vma_remove_closed (Karol Herbst) - Add register for compute engine's MMIO-based TLB invalidation (Matt Roper) - Xe_HP SDV and DG2 have up to 4 CCS engines (Daniele Ceraolo Spurio) - Add initial Ponte Vecchio definitions (Stuart Summers) - Document the eviction of the Flat-CCS objects (Ramalingam C) - Use existing uncore helper to read gpm_timestamp (Umesh Nerlige Ramappa) - Fix issue with LRI relative addressing (Akeem G Abodunrin) - Skip poisoning SET_PREDICATE_RESULT on dg2 (Chris Wilson) - Optimize the ccs_sz calculation per chunk (Ramalingam C) - Remove superfluous string helper include (Jani Nikula) - Fix assert in i915_ggtt_pin (Tvrtko Ursulin) - Use IOMEM_ERR_PTR() directly (Kefeng Wang) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/YnNxCm1pyflu3taj@tursulin-mobl2
This commit is contained in:
commit
af3847a747
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "gen8_engine_cs.h"
|
||||
#include "i915_drv.h"
|
||||
#include "intel_engine_regs.h"
|
||||
#include "intel_gpu_commands.h"
|
||||
#include "intel_lrc.h"
|
||||
#include "intel_ring.h"
|
||||
|
@ -385,6 +386,59 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int __gen125_emit_bb_start(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags,
|
||||
u32 arb)
|
||||
{
|
||||
struct intel_context *ce = rq->context;
|
||||
u32 wa_offset = lrc_indirect_bb(ce);
|
||||
u32 *cs;
|
||||
|
||||
cs = intel_ring_begin(rq, 12);
|
||||
if (IS_ERR(cs))
|
||||
return PTR_ERR(cs);
|
||||
|
||||
*cs++ = MI_ARB_ON_OFF | arb;
|
||||
|
||||
*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
|
||||
MI_SRM_LRM_GLOBAL_GTT |
|
||||
MI_LRI_LRM_CS_MMIO;
|
||||
*cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0));
|
||||
*cs++ = wa_offset + DG2_PREDICATE_RESULT_WA;
|
||||
*cs++ = 0;
|
||||
|
||||
*cs++ = MI_BATCH_BUFFER_START_GEN8 |
|
||||
(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
|
||||
*cs++ = lower_32_bits(offset);
|
||||
*cs++ = upper_32_bits(offset);
|
||||
|
||||
/* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */
|
||||
*cs++ = MI_BATCH_BUFFER_START_GEN8;
|
||||
*cs++ = wa_offset + DG2_PREDICATE_RESULT_BB;
|
||||
*cs++ = 0;
|
||||
|
||||
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
|
||||
|
||||
intel_ring_advance(rq, cs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gen125_emit_bb_start_noarb(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags)
|
||||
{
|
||||
return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
|
||||
}
|
||||
|
||||
int gen125_emit_bb_start(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags)
|
||||
{
|
||||
return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
|
||||
}
|
||||
|
||||
int gen8_emit_bb_start_noarb(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags)
|
||||
|
|
|
@ -31,6 +31,13 @@ int gen8_emit_bb_start(struct i915_request *rq,
|
|||
u64 offset, u32 len,
|
||||
const unsigned int flags);
|
||||
|
||||
int gen125_emit_bb_start_noarb(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags);
|
||||
int gen125_emit_bb_start(struct i915_request *rq,
|
||||
u64 offset, u32 len,
|
||||
const unsigned int flags);
|
||||
|
||||
u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
|
||||
u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
|
||||
|
||||
|
|
|
@ -148,6 +148,7 @@
|
|||
(REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \
|
||||
REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
|
||||
|
||||
#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8) /* gen12+ */
|
||||
#define MI_PREDICATE_RESULT_2(base) _MMIO((base) + 0x3bc)
|
||||
#define LOWER_SLICE_ENABLED (1 << 0)
|
||||
#define LOWER_SLICE_DISABLED (0 << 0)
|
||||
|
@ -193,6 +194,7 @@
|
|||
#define RING_TIMESTAMP_UDW(base) _MMIO((base) + 0x358 + 4)
|
||||
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
|
||||
#define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */
|
||||
#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8)
|
||||
#define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4)
|
||||
#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2)
|
||||
#define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) /* CFL+ & Gen11+ */
|
||||
|
|
|
@ -47,7 +47,7 @@ static const u8 uabi_classes[] = {
|
|||
[COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
|
||||
[VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
|
||||
[VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
|
||||
/* TODO: Add COMPUTE_CLASS mapping once ABI is available */
|
||||
[COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE,
|
||||
};
|
||||
|
||||
static int engine_cmp(void *priv, const struct list_head *A,
|
||||
|
|
|
@ -3433,10 +3433,17 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
|
|||
}
|
||||
}
|
||||
|
||||
if (intel_engine_has_preemption(engine))
|
||||
engine->emit_bb_start = gen8_emit_bb_start;
|
||||
else
|
||||
engine->emit_bb_start = gen8_emit_bb_start_noarb;
|
||||
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
|
||||
if (intel_engine_has_preemption(engine))
|
||||
engine->emit_bb_start = gen125_emit_bb_start;
|
||||
else
|
||||
engine->emit_bb_start = gen125_emit_bb_start_noarb;
|
||||
} else {
|
||||
if (intel_engine_has_preemption(engine))
|
||||
engine->emit_bb_start = gen8_emit_bb_start;
|
||||
else
|
||||
engine->emit_bb_start = gen8_emit_bb_start_noarb;
|
||||
}
|
||||
|
||||
engine->busyness = execlists_engine_busyness;
|
||||
}
|
||||
|
|
|
@ -39,6 +39,8 @@
|
|||
#define MI_GLOBAL_GTT (1<<22)
|
||||
|
||||
#define MI_NOOP MI_INSTR(0, 0)
|
||||
#define MI_SET_PREDICATE MI_INSTR(0x01, 0)
|
||||
#define MI_SET_PREDICATE_DISABLE (0 << 0)
|
||||
#define MI_USER_INTERRUPT MI_INSTR(0x02, 0)
|
||||
#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0)
|
||||
#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16)
|
||||
|
|
|
@ -1175,6 +1175,7 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
|
|||
[VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
|
||||
[VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
|
||||
[COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
|
||||
[COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
|
||||
};
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
|
|
|
@ -1007,6 +1007,7 @@
|
|||
#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
|
||||
#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
|
||||
#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
|
||||
#define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04)
|
||||
|
||||
#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28)
|
||||
#define RENDER_MOD_CTRL _MMIO(0xcf2c)
|
||||
|
|
|
@ -904,6 +904,24 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
|
|||
engine->name);
|
||||
}
|
||||
|
||||
static u32 context_wa_bb_offset(const struct intel_context *ce)
|
||||
{
|
||||
return PAGE_SIZE * ce->wa_bb_page;
|
||||
}
|
||||
|
||||
static u32 *context_indirect_bb(const struct intel_context *ce)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
GEM_BUG_ON(!ce->wa_bb_page);
|
||||
|
||||
ptr = ce->lrc_reg_state;
|
||||
ptr -= LRC_STATE_OFFSET; /* back to start of context image */
|
||||
ptr += context_wa_bb_offset(ce);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void lrc_init_state(struct intel_context *ce,
|
||||
struct intel_engine_cs *engine,
|
||||
void *state)
|
||||
|
@ -922,6 +940,10 @@ void lrc_init_state(struct intel_context *ce,
|
|||
/* Clear the ppHWSP (inc. per-context counters) */
|
||||
memset(state, 0, PAGE_SIZE);
|
||||
|
||||
/* Clear the indirect wa and storage */
|
||||
if (ce->wa_bb_page)
|
||||
memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* The second page of the context object contains some registers which
|
||||
* must be set up prior to the first execution.
|
||||
|
@ -929,6 +951,35 @@ void lrc_init_state(struct intel_context *ce,
|
|||
__lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
|
||||
}
|
||||
|
||||
u32 lrc_indirect_bb(const struct intel_context *ce)
|
||||
{
|
||||
return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
|
||||
}
|
||||
|
||||
static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
|
||||
{
|
||||
/* If predication is active, this will be noop'ed */
|
||||
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
|
||||
*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
|
||||
*cs++ = 0;
|
||||
*cs++ = 0; /* No predication */
|
||||
|
||||
/* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
|
||||
*cs++ = MI_BATCH_BUFFER_END | BIT(15);
|
||||
*cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
|
||||
|
||||
/* Instructions are no longer predicated (disabled), we can proceed */
|
||||
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
|
||||
*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
|
||||
*cs++ = 0;
|
||||
*cs++ = 1; /* enable predication before the next BB */
|
||||
|
||||
*cs++ = MI_BATCH_BUFFER_END;
|
||||
GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
|
||||
|
||||
return cs;
|
||||
}
|
||||
|
||||
static struct i915_vma *
|
||||
__lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
|
||||
{
|
||||
|
@ -1240,24 +1291,6 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
|
|||
return cs;
|
||||
}
|
||||
|
||||
static u32 context_wa_bb_offset(const struct intel_context *ce)
|
||||
{
|
||||
return PAGE_SIZE * ce->wa_bb_page;
|
||||
}
|
||||
|
||||
static u32 *context_indirect_bb(const struct intel_context *ce)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
GEM_BUG_ON(!ce->wa_bb_page);
|
||||
|
||||
ptr = ce->lrc_reg_state;
|
||||
ptr -= LRC_STATE_OFFSET; /* back to start of context image */
|
||||
ptr += context_wa_bb_offset(ce);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
setup_indirect_ctx_bb(const struct intel_context *ce,
|
||||
const struct intel_engine_cs *engine,
|
||||
|
@ -1271,9 +1304,11 @@ setup_indirect_ctx_bb(const struct intel_context *ce,
|
|||
while ((unsigned long)cs % CACHELINE_BYTES)
|
||||
*cs++ = MI_NOOP;
|
||||
|
||||
GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
|
||||
setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
|
||||
|
||||
lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
|
||||
i915_ggtt_offset(ce->state) +
|
||||
context_wa_bb_offset(ce),
|
||||
lrc_indirect_bb(ce),
|
||||
(cs - start) * sizeof(*cs));
|
||||
}
|
||||
|
||||
|
|
|
@ -145,4 +145,9 @@ static inline void lrc_runtime_stop(struct intel_context *ce)
|
|||
WRITE_ONCE(stats->active, 0);
|
||||
}
|
||||
|
||||
#define DG2_PREDICATE_RESULT_WA (PAGE_SIZE - sizeof(u64))
|
||||
#define DG2_PREDICATE_RESULT_BB (2048)
|
||||
|
||||
u32 lrc_indirect_bb(const struct intel_context *ce);
|
||||
|
||||
#endif /* __INTEL_LRC_H__ */
|
||||
|
|
|
@ -485,16 +485,21 @@ static bool wa_1209644611_applies(int ver, u32 size)
|
|||
* And CCS data can be copied in and out of CCS region through
|
||||
* XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
|
||||
*
|
||||
* When we exhaust the lmem, if the object's placements support smem, then we can
|
||||
* directly decompress the compressed lmem object into smem and start using it
|
||||
* from smem itself.
|
||||
* I915 supports Flat-CCS on lmem only objects. When an objects has smem in
|
||||
* its preference list, on memory pressure, i915 needs to migrate the lmem
|
||||
* content into smem. If the lmem object is Flat-CCS compressed by userspace,
|
||||
* then i915 needs to decompress it. But I915 lack the required information
|
||||
* for such decompression. Hence I915 supports Flat-CCS only on lmem only objects.
|
||||
*
|
||||
* But when we need to swapout the compressed lmem object into a smem region
|
||||
* though objects' placement doesn't support smem, then we copy the lmem content
|
||||
* as it is into smem region along with ccs data (using XY_CTRL_SURF_COPY_BLT).
|
||||
* When the object is referred, lmem content will be swaped in along with
|
||||
* restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at corresponding
|
||||
* location.
|
||||
* When we exhaust the lmem, Flat-CCS capable objects' lmem backing memory can
|
||||
* be temporarily evicted to smem, along with the auxiliary CCS state, where
|
||||
* it can be potentially swapped-out at a later point, if required.
|
||||
* If userspace later touches the evicted pages, then we always move
|
||||
* the backing memory back to lmem, which includes restoring the saved CCS state,
|
||||
* and potentially performing any required swap-in.
|
||||
*
|
||||
* For the migration of the lmem objects with smem in placement list, such as
|
||||
* {lmem, smem}, objects are treated as non Flat-CCS capable objects.
|
||||
*/
|
||||
|
||||
static inline u32 *i915_flush_dw(u32 *cmd, u32 flags)
|
||||
|
@ -647,17 +652,9 @@ static int scatter_list_length(struct scatterlist *sg)
|
|||
|
||||
static void
|
||||
calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
|
||||
int *src_sz, int *ccs_sz, u32 bytes_to_cpy,
|
||||
u32 ccs_bytes_to_cpy)
|
||||
int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy)
|
||||
{
|
||||
if (ccs_bytes_to_cpy) {
|
||||
/*
|
||||
* We can only copy the ccs data corresponding to
|
||||
* the CHUNK_SZ of lmem which is
|
||||
* GET_CCS_BYTES(i915, CHUNK_SZ))
|
||||
*/
|
||||
*ccs_sz = min_t(int, ccs_bytes_to_cpy, GET_CCS_BYTES(i915, CHUNK_SZ));
|
||||
|
||||
if (!src_is_lmem)
|
||||
/*
|
||||
* When CHUNK_SZ is passed all the pages upto CHUNK_SZ
|
||||
|
@ -707,10 +704,10 @@ intel_context_migrate_copy(struct intel_context *ce,
|
|||
struct drm_i915_private *i915 = ce->engine->i915;
|
||||
u32 ccs_bytes_to_cpy = 0, bytes_to_cpy;
|
||||
enum i915_cache_level ccs_cache_level;
|
||||
int src_sz, dst_sz, ccs_sz;
|
||||
u32 src_offset, dst_offset;
|
||||
u8 src_access, dst_access;
|
||||
struct i915_request *rq;
|
||||
int src_sz, dst_sz;
|
||||
bool ccs_is_src;
|
||||
int err;
|
||||
|
||||
|
@ -791,7 +788,7 @@ intel_context_migrate_copy(struct intel_context *ce,
|
|||
if (err)
|
||||
goto out_rq;
|
||||
|
||||
calculate_chunk_sz(i915, src_is_lmem, &src_sz, &ccs_sz,
|
||||
calculate_chunk_sz(i915, src_is_lmem, &src_sz,
|
||||
bytes_to_cpy, ccs_bytes_to_cpy);
|
||||
|
||||
len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
|
||||
|
@ -825,37 +822,35 @@ intel_context_migrate_copy(struct intel_context *ce,
|
|||
bytes_to_cpy -= len;
|
||||
|
||||
if (ccs_bytes_to_cpy) {
|
||||
int ccs_sz;
|
||||
|
||||
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
|
||||
if (err)
|
||||
goto out_rq;
|
||||
|
||||
ccs_sz = GET_CCS_BYTES(i915, len);
|
||||
err = emit_pte(rq, &it_ccs, ccs_cache_level, false,
|
||||
ccs_is_src ? src_offset : dst_offset,
|
||||
ccs_sz);
|
||||
if (err < 0)
|
||||
goto out_rq;
|
||||
if (err < ccs_sz) {
|
||||
err = -EINVAL;
|
||||
goto out_rq;
|
||||
}
|
||||
|
||||
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
|
||||
if (err)
|
||||
goto out_rq;
|
||||
|
||||
/*
|
||||
* Using max of src_sz and dst_sz, as we need to
|
||||
* pass the lmem size corresponding to the ccs
|
||||
* blocks we need to handle.
|
||||
*/
|
||||
ccs_sz = max_t(int, ccs_is_src ? ccs_sz : src_sz,
|
||||
ccs_is_src ? dst_sz : ccs_sz);
|
||||
|
||||
err = emit_copy_ccs(rq, dst_offset, dst_access,
|
||||
src_offset, src_access, ccs_sz);
|
||||
src_offset, src_access, len);
|
||||
if (err)
|
||||
goto out_rq;
|
||||
|
||||
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
|
||||
if (err)
|
||||
goto out_rq;
|
||||
|
||||
/* Converting back to ccs bytes */
|
||||
ccs_sz = GET_CCS_BYTES(rq->engine->i915, ccs_sz);
|
||||
ccs_bytes_to_cpy -= ccs_sz;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,8 +10,6 @@
|
|||
#include "intel_gt_regs.h"
|
||||
#include "intel_sseu.h"
|
||||
|
||||
#include "linux/string_helpers.h"
|
||||
|
||||
void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
|
||||
u8 max_subslices, u8 max_eus_per_subslice)
|
||||
{
|
||||
|
|
|
@ -128,6 +128,27 @@ static int context_flush(struct intel_context *ce, long timeout)
|
|||
return err;
|
||||
}
|
||||
|
||||
static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
|
||||
{
|
||||
if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
|
||||
return ~0u;
|
||||
|
||||
if (GRAPHICS_VER(engine->i915) < 12)
|
||||
return 0xfff;
|
||||
|
||||
switch (engine->class) {
|
||||
default:
|
||||
case RENDER_CLASS:
|
||||
case COMPUTE_CLASS:
|
||||
return 0x07ff;
|
||||
case COPY_ENGINE_CLASS:
|
||||
return 0x0fff;
|
||||
case VIDEO_DECODE_CLASS:
|
||||
case VIDEO_ENHANCEMENT_CLASS:
|
||||
return 0x3fff;
|
||||
}
|
||||
}
|
||||
|
||||
static int live_lrc_layout(void *arg)
|
||||
{
|
||||
struct intel_gt *gt = arg;
|
||||
|
@ -167,6 +188,7 @@ static int live_lrc_layout(void *arg)
|
|||
dw = 0;
|
||||
do {
|
||||
u32 lri = READ_ONCE(hw[dw]);
|
||||
u32 lri_mask;
|
||||
|
||||
if (lri == 0) {
|
||||
dw++;
|
||||
|
@ -194,6 +216,18 @@ static int live_lrc_layout(void *arg)
|
|||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* When bit 19 of MI_LOAD_REGISTER_IMM instruction
|
||||
* opcode is set on Gen12+ devices, HW does not
|
||||
* care about certain register address offsets, and
|
||||
* instead check the following for valid address
|
||||
* ranges on specific engines:
|
||||
* RCS && CCS: BITS(0 - 10)
|
||||
* BCS: BITS(0 - 11)
|
||||
* VECS && VCS: BITS(0 - 13)
|
||||
*/
|
||||
lri_mask = get_lri_mask(engine, lri);
|
||||
|
||||
lri &= 0x7f;
|
||||
lri++;
|
||||
dw++;
|
||||
|
@ -201,7 +235,7 @@ static int live_lrc_layout(void *arg)
|
|||
while (lri) {
|
||||
u32 offset = READ_ONCE(hw[dw]);
|
||||
|
||||
if (offset != lrc[dw]) {
|
||||
if ((offset ^ lrc[dw]) & lri_mask) {
|
||||
pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
|
||||
engine->name, dw, offset, lrc[dw]);
|
||||
err = -EINVAL;
|
||||
|
@ -911,6 +945,19 @@ create_user_vma(struct i915_address_space *vm, unsigned long size)
|
|||
return vma;
|
||||
}
|
||||
|
||||
static u32 safe_poison(u32 offset, u32 poison)
|
||||
{
|
||||
/*
|
||||
* Do not enable predication as it will nop all subsequent commands,
|
||||
* not only disabling the tests (by preventing all the other SRM) but
|
||||
* also preventing the arbitration events at the end of the request.
|
||||
*/
|
||||
if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
|
||||
poison &= ~REG_BIT(0);
|
||||
|
||||
return poison;
|
||||
}
|
||||
|
||||
static struct i915_vma *
|
||||
store_context(struct intel_context *ce, struct i915_vma *scratch)
|
||||
{
|
||||
|
@ -1120,7 +1167,9 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
|
|||
*cs++ = MI_LOAD_REGISTER_IMM(len);
|
||||
while (len--) {
|
||||
*cs++ = hw[dw];
|
||||
*cs++ = poison;
|
||||
*cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
|
||||
MI_LRI_LRM_CS_MMIO),
|
||||
poison);
|
||||
dw += 2;
|
||||
}
|
||||
} while (dw < PAGE_SIZE / sizeof(u32) &&
|
||||
|
|
|
@ -1200,20 +1200,6 @@ static u32 gpm_timestamp_shift(struct intel_gt *gt)
|
|||
return 3 - shift;
|
||||
}
|
||||
|
||||
static u64 gpm_timestamp(struct intel_gt *gt)
|
||||
{
|
||||
u32 lo, hi, old_hi, loop = 0;
|
||||
|
||||
hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
|
||||
do {
|
||||
lo = intel_uncore_read(gt->uncore, MISC_STATUS0);
|
||||
old_hi = hi;
|
||||
hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
|
||||
} while (old_hi != hi && loop++ < 2);
|
||||
|
||||
return ((u64)hi << 32) | lo;
|
||||
}
|
||||
|
||||
static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
|
||||
{
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
|
@ -1223,7 +1209,8 @@ static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
|
|||
lockdep_assert_held(&guc->timestamp.lock);
|
||||
|
||||
gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
|
||||
gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift;
|
||||
gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
|
||||
MISC_STATUS1) >> guc->timestamp.shift;
|
||||
gt_stamp_lo = lower_32_bits(gpm_ts);
|
||||
*now = ktime_get();
|
||||
|
||||
|
@ -3910,6 +3897,8 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
|
|||
*/
|
||||
|
||||
engine->emit_bb_start = gen8_emit_bb_start;
|
||||
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
|
||||
engine->emit_bb_start = gen125_emit_bb_start;
|
||||
}
|
||||
|
||||
static void rcs_submission_override(struct intel_engine_cs *engine)
|
||||
|
|
|
@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
|
|||
* firmware as TGL.
|
||||
*/
|
||||
#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
|
||||
fw_def(DG2, 0, guc_def(dg2, 70, 1, 2)) \
|
||||
fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \
|
||||
fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \
|
||||
fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \
|
||||
|
|
|
@ -81,6 +81,7 @@ static const char * const uabi_class_names[] = {
|
|||
[I915_ENGINE_CLASS_COPY] = "copy",
|
||||
[I915_ENGINE_CLASS_VIDEO] = "video",
|
||||
[I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance",
|
||||
[I915_ENGINE_CLASS_COMPUTE] = "compute",
|
||||
};
|
||||
|
||||
static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
#include "gt/intel_engine_types.h"
|
||||
|
||||
#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
|
||||
#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
|
||||
|
||||
struct drm_i915_private;
|
||||
|
||||
|
|
|
@ -1059,6 +1059,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
|
|||
#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_P)
|
||||
#define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV)
|
||||
#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG2)
|
||||
#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, INTEL_PONTEVECCHIO)
|
||||
|
||||
#define IS_DG2_G10(dev_priv) \
|
||||
IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10)
|
||||
#define IS_DG2_G11(dev_priv) \
|
||||
|
|
|
@ -1037,7 +1037,8 @@ static const struct intel_device_info xehpsdv_info = {
|
|||
BIT(RCS0) | BIT(BCS0) |
|
||||
BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) |
|
||||
BIT(VCS0) | BIT(VCS1) | BIT(VCS2) | BIT(VCS3) |
|
||||
BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7),
|
||||
BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7) |
|
||||
BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3),
|
||||
.require_force_probe = 1,
|
||||
};
|
||||
|
||||
|
@ -1056,7 +1057,8 @@ static const struct intel_device_info xehpsdv_info = {
|
|||
.platform_engine_mask = \
|
||||
BIT(RCS0) | BIT(BCS0) | \
|
||||
BIT(VECS0) | BIT(VECS1) | \
|
||||
BIT(VCS0) | BIT(VCS2)
|
||||
BIT(VCS0) | BIT(VCS2) | \
|
||||
BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3)
|
||||
|
||||
__maybe_unused
|
||||
static const struct intel_device_info dg2_info = {
|
||||
|
@ -1074,6 +1076,27 @@ static const struct intel_device_info ats_m_info = {
|
|||
.require_force_probe = 1,
|
||||
};
|
||||
|
||||
#define XE_HPC_FEATURES \
|
||||
XE_HP_FEATURES, \
|
||||
.dma_mask_size = 52
|
||||
|
||||
__maybe_unused
|
||||
static const struct intel_device_info pvc_info = {
|
||||
XE_HPC_FEATURES,
|
||||
XE_HPM_FEATURES,
|
||||
DGFX_FEATURES,
|
||||
.graphics.rel = 60,
|
||||
.media.rel = 60,
|
||||
PLATFORM(INTEL_PONTEVECCHIO),
|
||||
.display = { 0 },
|
||||
.has_flat_ccs = 0,
|
||||
.platform_engine_mask =
|
||||
BIT(BCS0) |
|
||||
BIT(VCS0) |
|
||||
BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3),
|
||||
.require_force_probe = 1,
|
||||
};
|
||||
|
||||
#undef PLATFORM
|
||||
|
||||
/*
|
||||
|
|
|
@ -548,7 +548,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
|
|||
int err;
|
||||
|
||||
if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_GPU_ONLY))
|
||||
return IO_ERR_PTR(-EINVAL);
|
||||
return IOMEM_ERR_PTR(-EINVAL);
|
||||
|
||||
if (!i915_gem_object_is_lmem(vma->obj)) {
|
||||
if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
|
||||
|
@ -601,7 +601,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
|
|||
err_unpin:
|
||||
__i915_vma_unpin(vma);
|
||||
err:
|
||||
return IO_ERR_PTR(err);
|
||||
return IOMEM_ERR_PTR(err);
|
||||
}
|
||||
|
||||
void i915_vma_flush_writes(struct i915_vma *vma)
|
||||
|
@ -1565,9 +1565,7 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
|||
if (ww)
|
||||
return __i915_ggtt_pin(vma, ww, align, flags);
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
WARN_ON(dma_resv_held(vma->obj->base.resv));
|
||||
#endif
|
||||
lockdep_assert_not_held(&vma->obj->base.resv->lock.base);
|
||||
|
||||
for_i915_gem_ww(&_ww, err, true) {
|
||||
err = i915_gem_object_lock(vma->obj, &_ww);
|
||||
|
@ -1615,17 +1613,17 @@ void i915_vma_close(struct i915_vma *vma)
|
|||
|
||||
static void __i915_vma_remove_closed(struct i915_vma *vma)
|
||||
{
|
||||
struct intel_gt *gt = vma->vm->gt;
|
||||
|
||||
spin_lock_irq(>->closed_lock);
|
||||
list_del_init(&vma->closed_link);
|
||||
spin_unlock_irq(>->closed_lock);
|
||||
}
|
||||
|
||||
void i915_vma_reopen(struct i915_vma *vma)
|
||||
{
|
||||
struct intel_gt *gt = vma->vm->gt;
|
||||
|
||||
spin_lock_irq(>->closed_lock);
|
||||
if (i915_vma_is_closed(vma))
|
||||
__i915_vma_remove_closed(vma);
|
||||
spin_unlock_irq(>->closed_lock);
|
||||
}
|
||||
|
||||
static void force_unbind(struct i915_vma *vma)
|
||||
|
@ -1641,6 +1639,7 @@ static void force_unbind(struct i915_vma *vma)
|
|||
static void release_references(struct i915_vma *vma, bool vm_ddestroy)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = vma->obj;
|
||||
struct intel_gt *gt = vma->vm->gt;
|
||||
|
||||
GEM_BUG_ON(i915_vma_is_active(vma));
|
||||
|
||||
|
@ -1651,7 +1650,9 @@ static void release_references(struct i915_vma *vma, bool vm_ddestroy)
|
|||
|
||||
spin_unlock(&obj->vma.lock);
|
||||
|
||||
spin_lock_irq(>->closed_lock);
|
||||
__i915_vma_remove_closed(vma);
|
||||
spin_unlock_irq(>->closed_lock);
|
||||
|
||||
if (vm_ddestroy)
|
||||
i915_vm_resv_put(vma->vm);
|
||||
|
|
|
@ -317,7 +317,6 @@ static inline bool i915_node_color_differs(const struct drm_mm_node *node,
|
|||
* Returns a valid iomapped pointer or ERR_PTR.
|
||||
*/
|
||||
void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
|
||||
#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
|
||||
|
||||
/**
|
||||
* i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap
|
||||
|
|
|
@ -72,6 +72,7 @@ static const char * const platform_names[] = {
|
|||
PLATFORM_NAME(ALDERLAKE_P),
|
||||
PLATFORM_NAME(XEHPSDV),
|
||||
PLATFORM_NAME(DG2),
|
||||
PLATFORM_NAME(PONTEVECCHIO),
|
||||
};
|
||||
#undef PLATFORM_NAME
|
||||
|
||||
|
|
|
@ -88,6 +88,7 @@ enum intel_platform {
|
|||
INTEL_ALDERLAKE_P,
|
||||
INTEL_XEHPSDV,
|
||||
INTEL_DG2,
|
||||
INTEL_PONTEVECCHIO,
|
||||
INTEL_MAX_PLATFORMS
|
||||
};
|
||||
|
||||
|
|
|
@ -154,21 +154,71 @@ enum i915_mocs_table_index {
|
|||
I915_MOCS_CACHED,
|
||||
};
|
||||
|
||||
/*
|
||||
/**
|
||||
* enum drm_i915_gem_engine_class - uapi engine type enumeration
|
||||
*
|
||||
* Different engines serve different roles, and there may be more than one
|
||||
* engine serving each role. enum drm_i915_gem_engine_class provides a
|
||||
* classification of the role of the engine, which may be used when requesting
|
||||
* operations to be performed on a certain subset of engines, or for providing
|
||||
* information about that group.
|
||||
* engine serving each role. This enum provides a classification of the role
|
||||
* of the engine, which may be used when requesting operations to be performed
|
||||
* on a certain subset of engines, or for providing information about that
|
||||
* group.
|
||||
*/
|
||||
enum drm_i915_gem_engine_class {
|
||||
/**
|
||||
* @I915_ENGINE_CLASS_RENDER:
|
||||
*
|
||||
* Render engines support instructions used for 3D, Compute (GPGPU),
|
||||
* and programmable media workloads. These instructions fetch data and
|
||||
* dispatch individual work items to threads that operate in parallel.
|
||||
* The threads run small programs (called "kernels" or "shaders") on
|
||||
* the GPU's execution units (EUs).
|
||||
*/
|
||||
I915_ENGINE_CLASS_RENDER = 0,
|
||||
|
||||
/**
|
||||
* @I915_ENGINE_CLASS_COPY:
|
||||
*
|
||||
* Copy engines (also referred to as "blitters") support instructions
|
||||
* that move blocks of data from one location in memory to another,
|
||||
* or that fill a specified location of memory with fixed data.
|
||||
* Copy engines can perform pre-defined logical or bitwise operations
|
||||
* on the source, destination, or pattern data.
|
||||
*/
|
||||
I915_ENGINE_CLASS_COPY = 1,
|
||||
|
||||
/**
|
||||
* @I915_ENGINE_CLASS_VIDEO:
|
||||
*
|
||||
* Video engines (also referred to as "bit stream decode" (BSD) or
|
||||
* "vdbox") support instructions that perform fixed-function media
|
||||
* decode and encode.
|
||||
*/
|
||||
I915_ENGINE_CLASS_VIDEO = 2,
|
||||
|
||||
/**
|
||||
* @I915_ENGINE_CLASS_VIDEO_ENHANCE:
|
||||
*
|
||||
* Video enhancement engines (also referred to as "vebox") support
|
||||
* instructions related to image enhancement.
|
||||
*/
|
||||
I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
|
||||
|
||||
/* should be kept compact */
|
||||
/**
|
||||
* @I915_ENGINE_CLASS_COMPUTE:
|
||||
*
|
||||
* Compute engines support a subset of the instructions available
|
||||
* on render engines: compute engines support Compute (GPGPU) and
|
||||
* programmable media workloads, but do not support the 3D pipeline.
|
||||
*/
|
||||
I915_ENGINE_CLASS_COMPUTE = 4,
|
||||
|
||||
/* Values in this enum should be kept compact. */
|
||||
|
||||
/**
|
||||
* @I915_ENGINE_CLASS_INVALID:
|
||||
*
|
||||
* Placeholder value to represent an invalid engine class assignment.
|
||||
*/
|
||||
I915_ENGINE_CLASS_INVALID = -1
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue