OpenCloudOS-Kernel/drivers/gpu/drm/i915/gt/intel_gpu_commands.h

/*
 * SPDX-License-Identifier: MIT
 *
 * Copyright <EFBFBD> 2003-2018 Intel Corporation
 */

#ifndef _INTEL_GPU_COMMANDS_H_
#define _INTEL_GPU_COMMANDS_H_

#include <linux/bitops.h>

/*
 * Target address alignments required for GPU access e.g.
 * MI_STORE_DWORD_IMM.
 */
#define alignof_dword 4
#define alignof_qword 8

/*
 * Instruction field definitions used by the command parser
 */
#define INSTR_CLIENT_SHIFT      29
#define   INSTR_MI_CLIENT       0x0
#define   INSTR_BC_CLIENT       0x2
#define   INSTR_RC_CLIENT       0x3
#define INSTR_SUBCLIENT_SHIFT   27
#define INSTR_SUBCLIENT_MASK    0x18000000
#define   INSTR_MEDIA_SUBCLIENT 0x2
#define INSTR_26_TO_24_MASK	0x7000000
#define   INSTR_26_TO_24_SHIFT	24

/*
 * Memory interface instructions used by the kernel
 */
#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */
#define  MI_GLOBAL_GTT    (1<<22)

#define MI_NOOP			MI_INSTR(0, 0)
#define MI_USER_INTERRUPT	MI_INSTR(0x02, 0)
#define MI_WAIT_FOR_EVENT       MI_INSTR(0x03, 0)
#define   MI_WAIT_FOR_OVERLAY_FLIP	(1<<16)
#define   MI_WAIT_FOR_PLANE_B_FLIP      (1<<6)
#define   MI_WAIT_FOR_PLANE_A_FLIP      (1<<2)
#define   MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1)
#define MI_FLUSH		MI_INSTR(0x04, 0)
#define   MI_READ_FLUSH		(1 << 0)
#define   MI_EXE_FLUSH		(1 << 1)
#define   MI_NO_WRITE_FLUSH	(1 << 2)
#define   MI_SCENE_COUNT	(1 << 3) /* just increment scene count */
#define   MI_END_SCENE		(1 << 4) /* flush binner and incr scene count */
#define   MI_INVALIDATE_ISP	(1 << 5) /* invalidate indirect state pointers */
#define MI_REPORT_HEAD		MI_INSTR(0x07, 0)
#define MI_ARB_ON_OFF		MI_INSTR(0x08, 0)
#define   MI_ARB_ENABLE			(1<<0)
#define   MI_ARB_DISABLE		(0<<0)
#define MI_BATCH_BUFFER_END	MI_INSTR(0x0a, 0)
#define MI_SUSPEND_FLUSH	MI_INSTR(0x0b, 0)
#define   MI_SUSPEND_FLUSH_EN	(1<<0)
#define MI_SET_APPID		MI_INSTR(0x0e, 0)
#define MI_OVERLAY_FLIP		MI_INSTR(0x11, 0)
#define   MI_OVERLAY_CONTINUE	(0x0<<21)
#define   MI_OVERLAY_ON		(0x1<<21)
#define   MI_OVERLAY_OFF	(0x2<<21)
#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0)
#define MI_DISPLAY_FLIP		MI_INSTR(0x14, 2)
#define MI_DISPLAY_FLIP_I915	MI_INSTR(0x14, 1)
#define   MI_DISPLAY_FLIP_PLANE(n) ((n) << 20)
/* IVB has funny definitions for which plane to flip. */
#define   MI_DISPLAY_FLIP_IVB_PLANE_A  (0 << 19)
#define   MI_DISPLAY_FLIP_IVB_PLANE_B  (1 << 19)
#define   MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19)
#define   MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
#define   MI_DISPLAY_FLIP_IVB_PLANE_C  (4 << 19)
#define   MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
/* SKL ones */
#define   MI_DISPLAY_FLIP_SKL_PLANE_1_A	(0 << 8)
#define   MI_DISPLAY_FLIP_SKL_PLANE_1_B	(1 << 8)
#define   MI_DISPLAY_FLIP_SKL_PLANE_1_C	(2 << 8)
#define   MI_DISPLAY_FLIP_SKL_PLANE_2_A	(4 << 8)
#define   MI_DISPLAY_FLIP_SKL_PLANE_2_B	(5 << 8)
#define   MI_DISPLAY_FLIP_SKL_PLANE_2_C	(6 << 8)
#define   MI_DISPLAY_FLIP_SKL_PLANE_3_A	(7 << 8)
#define   MI_DISPLAY_FLIP_SKL_PLANE_3_B	(8 << 8)
#define   MI_DISPLAY_FLIP_SKL_PLANE_3_C	(9 << 8)
#define MI_SEMAPHORE_MBOX	MI_INSTR(0x16, 1) /* gen6, gen7 */
#define   MI_SEMAPHORE_GLOBAL_GTT    (1<<22)
#define   MI_SEMAPHORE_UPDATE	    (1<<21)
#define   MI_SEMAPHORE_COMPARE	    (1<<20)
#define   MI_SEMAPHORE_REGISTER	    (1<<18)
#define   MI_SEMAPHORE_SYNC_VR	    (0<<16) /* RCS  wait for VCS  (RVSYNC) */
#define   MI_SEMAPHORE_SYNC_VER	    (1<<16) /* RCS  wait for VECS (RVESYNC) */
#define   MI_SEMAPHORE_SYNC_BR	    (2<<16) /* RCS  wait for BCS  (RBSYNC) */
#define   MI_SEMAPHORE_SYNC_BV	    (0<<16) /* VCS  wait for BCS  (VBSYNC) */
#define   MI_SEMAPHORE_SYNC_VEV	    (1<<16) /* VCS  wait for VECS (VVESYNC) */
#define   MI_SEMAPHORE_SYNC_RV	    (2<<16) /* VCS  wait for RCS  (VRSYNC) */
#define   MI_SEMAPHORE_SYNC_RB	    (0<<16) /* BCS  wait for RCS  (BRSYNC) */
#define   MI_SEMAPHORE_SYNC_VEB	    (1<<16) /* BCS  wait for VECS (BVESYNC) */
#define   MI_SEMAPHORE_SYNC_VB	    (2<<16) /* BCS  wait for VCS  (BVSYNC) */
#define   MI_SEMAPHORE_SYNC_BVE	    (0<<16) /* VECS wait for BCS  (VEBSYNC) */
#define   MI_SEMAPHORE_SYNC_VVE	    (1<<16) /* VECS wait for VCS  (VEVSYNC) */
#define   MI_SEMAPHORE_SYNC_RVE	    (2<<16) /* VECS wait for RCS  (VERSYNC) */
#define   MI_SEMAPHORE_SYNC_INVALID (3<<16)
#define   MI_SEMAPHORE_SYNC_MASK    (3<<16)
#define MI_SET_CONTEXT		MI_INSTR(0x18, 0)
#define   MI_MM_SPACE_GTT		(1<<8)
#define   MI_MM_SPACE_PHYSICAL		(0<<8)
#define   MI_SAVE_EXT_STATE_EN		(1<<3)
#define   MI_RESTORE_EXT_STATE_EN	(1<<2)
#define   MI_FORCE_RESTORE		(1<<1)
#define   MI_RESTORE_INHIBIT		(1<<0)
#define   HSW_MI_RS_SAVE_STATE_EN       (1<<3)
#define   HSW_MI_RS_RESTORE_STATE_EN    (1<<2)
#define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
#define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
#define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
#define MI_SEMAPHORE_WAIT_TOKEN	MI_INSTR(0x1c, 3) /* GEN12+ */
#define   MI_SEMAPHORE_POLL		(1 << 15)
#define   MI_SEMAPHORE_SAD_GT_SDD	(0 << 12)
#define   MI_SEMAPHORE_SAD_GTE_SDD	(1 << 12)
#define   MI_SEMAPHORE_SAD_LT_SDD	(2 << 12)
#define   MI_SEMAPHORE_SAD_LTE_SDD	(3 << 12)
#define   MI_SEMAPHORE_SAD_EQ_SDD	(4 << 12)
#define   MI_SEMAPHORE_SAD_NEQ_SDD	(5 << 12)
#define   MI_SEMAPHORE_TOKEN_MASK	REG_GENMASK(9, 5)
#define   MI_SEMAPHORE_TOKEN_SHIFT	5
#define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
#define MI_STORE_DWORD_IMM_GEN4	MI_INSTR(0x20, 2)
#define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */
#define   MI_USE_GGTT		(1 << 22) /* g4x+ */
#define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
/*
 * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
 * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
 *   simply ignores the register load under certain conditions.
 * - One can actually load arbitrary many arbitrary registers: Simply issue x
 *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
 */
#define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
#define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
#define   MI_LRI_FORCE_POSTED		(1<<12)
#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
#define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
#define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
#define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
#define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
#define   MI_INVALIDATE_TLB		(1<<18)
#define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
#define   MI_FLUSH_DW_OP_MASK		(3<<14)
#define   MI_FLUSH_DW_NOTIFY		(1<<8)
#define   MI_INVALIDATE_BSD		(1<<7)
#define   MI_FLUSH_DW_USE_GTT		(1<<2)
#define   MI_FLUSH_DW_USE_PPGTT		(0<<2)
#define MI_LOAD_REGISTER_MEM	   MI_INSTR(0x29, 1)
#define MI_LOAD_REGISTER_MEM_GEN8  MI_INSTR(0x29, 2)
#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
#define   MI_LRR_SOURCE_CS_MMIO		REG_BIT(18)
#define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
#define   MI_BATCH_NON_SECURE		(1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
#define   MI_BATCH_NON_SECURE_I965	(1<<8)
#define   MI_BATCH_PPGTT_HSW		(1<<8)
#define   MI_BATCH_NON_SECURE_HSW	(1<<13)
#define MI_BATCH_BUFFER_START	MI_INSTR(0x31, 0)
#define   MI_BATCH_GTT		    (2<<6) /* aliased with (1<<7) on gen4 */
#define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
#define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
#define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/

/*
 * 3D instructions used by the kernel
 */
#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags))

#define GEN9_MEDIA_POOL_STATE     ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4)
#define   GEN9_MEDIA_POOL_ENABLE  (1 << 31)
#define GFX_OP_RASTER_RULES    ((0x3<<29)|(0x7<<24))
#define GFX_OP_SCISSOR         ((0x3<<29)|(0x1c<<24)|(0x10<<19))
#define   SC_UPDATE_SCISSOR       (0x1<<1)
#define   SC_ENABLE_MASK          (0x1<<0)
#define   SC_ENABLE               (0x1<<0)
#define GFX_OP_LOAD_INDIRECT   ((0x3<<29)|(0x1d<<24)|(0x7<<16))
#define GFX_OP_SCISSOR_INFO    ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1))
#define   SCI_YMIN_MASK      (0xffff<<16)
#define   SCI_XMIN_MASK      (0xffff<<0)
#define   SCI_YMAX_MASK      (0xffff<<16)
#define   SCI_XMAX_MASK      (0xffff<<0)
#define GFX_OP_SCISSOR_ENABLE	 ((0x3<<29)|(0x1c<<24)|(0x10<<19))
#define GFX_OP_SCISSOR_RECT	 ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1)
#define GFX_OP_COLOR_FACTOR      ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0)
#define GFX_OP_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
#define GFX_OP_MAP_INFO          ((0x3<<29)|(0x1d<<24)|0x4)
#define GFX_OP_DESTBUFFER_VARS   ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0)
#define GFX_OP_DESTBUFFER_INFO	 ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
#define GFX_OP_DRAWRECT_INFO     ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
#define GFX_OP_DRAWRECT_INFO_I965  ((0x7900<<16)|0x2)

#define COLOR_BLT_CMD			(2 << 29 | 0x40 << 22 | (5 - 2))
#define XY_COLOR_BLT_CMD		(2 << 29 | 0x50 << 22)
#define SRC_COPY_BLT_CMD		(2 << 29 | 0x43 << 22)
#define GEN9_XY_FAST_COPY_BLT_CMD	(2 << 29 | 0x42 << 22)
#define XY_SRC_COPY_BLT_CMD		(2 << 29 | 0x53 << 22)
#define XY_MONO_SRC_COPY_IMM_BLT	(2 << 29 | 0x71 << 22 | 5)
#define   BLT_WRITE_A			(2<<20)
#define   BLT_WRITE_RGB			(1<<20)
#define   BLT_WRITE_RGBA		(BLT_WRITE_RGB | BLT_WRITE_A)
#define   BLT_DEPTH_8			(0<<24)
#define   BLT_DEPTH_16_565		(1<<24)
#define   BLT_DEPTH_16_1555		(2<<24)
#define   BLT_DEPTH_32			(3<<24)
#define   BLT_ROP_SRC_COPY		(0xcc<<16)
#define   BLT_ROP_COLOR_COPY		(0xf0<<16)
#define XY_SRC_COPY_BLT_SRC_TILED	(1<<15) /* 965+ only */
#define XY_SRC_COPY_BLT_DST_TILED	(1<<11) /* 965+ only */
#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2)
#define   ASYNC_FLIP                (1<<22)
#define   DISPLAY_PLANE_A           (0<<20)
#define   DISPLAY_PLANE_B           (1<<20)
#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
#define   PIPE_CONTROL_COMMAND_CACHE_INVALIDATE		(1<<29) /* gen11+ */
#define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28) /* gen11+ */
#define   PIPE_CONTROL_FLUSH_L3				(1<<27)
#define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
#define   PIPE_CONTROL_MMIO_WRITE			(1<<23)
#define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
#define   PIPE_CONTROL_CS_STALL				(1<<20)
#define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
#define   PIPE_CONTROL_MEDIA_STATE_CLEAR		(1<<16)
#define   PIPE_CONTROL_WRITE_TIMESTAMP			(3<<14)
#define   PIPE_CONTROL_QW_WRITE				(1<<14)
#define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
#define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
#define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
#define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11) /* MBZ on ILK */
#define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10) /* GM45+ only */
#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
#define   PIPE_CONTROL_HDC_PIPELINE_FLUSH		REG_BIT(9)  /* gen12 */
#define   PIPE_CONTROL_NOTIFY				(1<<8)
#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7) /* gen7+ */
#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
#define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
#define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
#define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */

#define MI_MATH(x)			MI_INSTR(0x1a, (x) - 1)
#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
/* Opcodes for MI_MATH_INSTR */
#define   MI_MATH_NOOP			MI_MATH_INSTR(0x000, 0x0, 0x0)
#define   MI_MATH_LOAD(op1, op2)	MI_MATH_INSTR(0x080, op1, op2)
#define   MI_MATH_LOADINV(op1, op2)	MI_MATH_INSTR(0x480, op1, op2)
#define   MI_MATH_LOAD0(op1)		MI_MATH_INSTR(0x081, op1)
#define   MI_MATH_LOAD1(op1)		MI_MATH_INSTR(0x481, op1)
#define   MI_MATH_ADD			MI_MATH_INSTR(0x100, 0x0, 0x0)
#define   MI_MATH_SUB			MI_MATH_INSTR(0x101, 0x0, 0x0)
#define   MI_MATH_AND			MI_MATH_INSTR(0x102, 0x0, 0x0)
#define   MI_MATH_OR			MI_MATH_INSTR(0x103, 0x0, 0x0)
#define   MI_MATH_XOR			MI_MATH_INSTR(0x104, 0x0, 0x0)
#define   MI_MATH_STORE(op1, op2)	MI_MATH_INSTR(0x180, op1, op2)
#define   MI_MATH_STOREINV(op1, op2)	MI_MATH_INSTR(0x580, op1, op2)
/* Registers used as operands in MI_MATH_INSTR */
#define   MI_MATH_REG(x)		(x)
#define   MI_MATH_REG_SRCA		0x20
#define   MI_MATH_REG_SRCB		0x21
#define   MI_MATH_REG_ACCU		0x31
#define   MI_MATH_REG_ZF		0x32
#define   MI_MATH_REG_CF		0x33

/*
 * Commands used only by the command parser
 */
#define MI_SET_PREDICATE        MI_INSTR(0x01, 0)
#define MI_ARB_CHECK            MI_INSTR(0x05, 0)
#define MI_RS_CONTROL           MI_INSTR(0x06, 0)
#define MI_URB_ATOMIC_ALLOC     MI_INSTR(0x09, 0)
#define MI_PREDICATE            MI_INSTR(0x0C, 0)
#define MI_RS_CONTEXT           MI_INSTR(0x0F, 0)
#define MI_TOPOLOGY_FILTER      MI_INSTR(0x0D, 0)
#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0)
#define MI_URB_CLEAR            MI_INSTR(0x19, 0)
#define MI_UPDATE_GTT           MI_INSTR(0x23, 0)
#define MI_CLFLUSH              MI_INSTR(0x27, 0)
#define MI_REPORT_PERF_COUNT    MI_INSTR(0x28, 0)
#define   MI_REPORT_PERF_COUNT_GGTT (1<<0)
#define MI_RS_STORE_DATA_IMM    MI_INSTR(0x2B, 0)
#define MI_LOAD_URB_MEM         MI_INSTR(0x2C, 0)
#define MI_STORE_URB_MEM        MI_INSTR(0x2D, 0)
#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)

#define STATE_BASE_ADDRESS \
	((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16))
#define BASE_ADDRESS_MODIFY		REG_BIT(0)
#define PIPELINE_SELECT \
	((0x3 << 29) | (0x1 << 27) | (0x1 << 24) | (0x4 << 16))
#define PIPELINE_SELECT_MEDIA	       REG_BIT(0)
#define GFX_OP_3DSTATE_VF_STATISTICS \
	((0x3 << 29) | (0x1 << 27) | (0x0 << 24) | (0xB << 16))
#define MEDIA_VFE_STATE \
	((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x0 << 16))
#define  MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD \
	((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x2 << 16))
#define MEDIA_OBJECT \
	((0x3 << 29) | (0x2 << 27) | (0x1 << 24) | (0x0 << 16))
#define GPGPU_OBJECT                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16))
#define GPGPU_WALKER                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16))
#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16))
#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \
	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16))
#define GFX_OP_3DSTATE_SO_DECL_LIST \
	((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16))

#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \
	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16))
#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \
	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16))
#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \
	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16))
#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \
	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16))
#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))

#define MFX_WAIT  ((0x3<<29)|(0x1<<27)|(0x0<<16))

#define COLOR_BLT     ((0x2<<29)|(0x40<<22))
#define SRC_COPY_BLT  ((0x2<<29)|(0x43<<22))

/*
 * Used to convert any address to canonical form.
 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
 * addresses to be in a canonical form:
 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
 * canonical form [63:48] == [47]."
 */
#define GEN8_HIGH_ADDRESS_BIT 47
static inline u64 gen8_canonical_addr(u64 address)
{
	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
}

static inline u64 gen8_noncanonical_addr(u64 address)
{
	return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
}

static inline u32 *__gen6_emit_bb_start(u32 *cs, u32 addr, unsigned int flags)
{
	*cs++ = MI_BATCH_BUFFER_START | flags;
	*cs++ = addr;

	return cs;
}

#endif /* _INTEL_GPU_COMMANDS_H_ */
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								/*
 								 * SPDX-License-Identifier: MIT
 								 *
 								 * Copyright <EFBFBD> 2003-2018 Intel Corporation
 								 */
 								#ifndef _INTEL_GPU_COMMANDS_H_
 								#define _INTEL_GPU_COMMANDS_H_
-												drm/i915: Remove redundant parameters from intel_engine_cmd_parser

Declutter the calling interface by reducing the parameters to the
i915_vma and associated offsets.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191211110437.4082687-2-chris@chris-wilson.co.uk

											
										
										
											2019-12-11 19:04:34 +08:00
+								#include <linux/bitops.h>
-												drm/i915/selftests: Ensure we don't clamp a random offset to 32b

Specify that we do want a 64b value for sizeof(u32) as we want to
compute the mask of the upper 62bits.

v2: Use round_down() for automatic type promotion

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190710161413.7115-1-chris@chris-wilson.co.uk

											
										
										
											2019-07-11 00:14:13 +08:00
+								/*
 								 * Target address alignments required for GPU access e.g.
 								 * MI_STORE_DWORD_IMM.
 								 */
 								#define alignof_dword 4
 								#define alignof_qword 8
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								/*
 								 * Instruction field definitions used by the command parser
 								 */
 								#define INSTR_CLIENT_SHIFT      29
 								#define   INSTR_MI_CLIENT       0x0
 								#define   INSTR_BC_CLIENT       0x2
 								#define   INSTR_RC_CLIENT       0x3
 								#define INSTR_SUBCLIENT_SHIFT   27
 								#define INSTR_SUBCLIENT_MASK    0x18000000
 								#define   INSTR_MEDIA_SUBCLIENT 0x2
 								#define INSTR_26_TO_24_MASK	0x7000000
 								#define   INSTR_26_TO_24_SHIFT	24
 								/*
 								 * Memory interface instructions used by the kernel
 								 */
 								#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
 								/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */
 								#define  MI_GLOBAL_GTT    (1<<22)
 								#define MI_NOOP			MI_INSTR(0, 0)
 								#define MI_USER_INTERRUPT	MI_INSTR(0x02, 0)
 								#define MI_WAIT_FOR_EVENT       MI_INSTR(0x03, 0)
 								#define   MI_WAIT_FOR_OVERLAY_FLIP	(1<<16)
 								#define   MI_WAIT_FOR_PLANE_B_FLIP      (1<<6)
 								#define   MI_WAIT_FOR_PLANE_A_FLIP      (1<<2)
 								#define   MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1)
 								#define MI_FLUSH		MI_INSTR(0x04, 0)
 								#define   MI_READ_FLUSH		(1 << 0)
 								#define   MI_EXE_FLUSH		(1 << 1)
 								#define   MI_NO_WRITE_FLUSH	(1 << 2)
 								#define   MI_SCENE_COUNT	(1 << 3) /* just increment scene count */
 								#define   MI_END_SCENE		(1 << 4) /* flush binner and incr scene count */
 								#define   MI_INVALIDATE_ISP	(1 << 5) /* invalidate indirect state pointers */
 								#define MI_REPORT_HEAD		MI_INSTR(0x07, 0)
 								#define MI_ARB_ON_OFF		MI_INSTR(0x08, 0)
 								#define   MI_ARB_ENABLE			(1<<0)
 								#define   MI_ARB_DISABLE		(0<<0)
 								#define MI_BATCH_BUFFER_END	MI_INSTR(0x0a, 0)
 								#define MI_SUSPEND_FLUSH	MI_INSTR(0x0b, 0)
 								#define   MI_SUSPEND_FLUSH_EN	(1<<0)
 								#define MI_SET_APPID		MI_INSTR(0x0e, 0)
 								#define MI_OVERLAY_FLIP		MI_INSTR(0x11, 0)
 								#define   MI_OVERLAY_CONTINUE	(0x0<<21)
 								#define   MI_OVERLAY_ON		(0x1<<21)
 								#define   MI_OVERLAY_OFF	(0x2<<21)
 								#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0)
 								#define MI_DISPLAY_FLIP		MI_INSTR(0x14, 2)
 								#define MI_DISPLAY_FLIP_I915	MI_INSTR(0x14, 1)
 								#define   MI_DISPLAY_FLIP_PLANE(n) ((n) << 20)
 								/* IVB has funny definitions for which plane to flip. */
 								#define   MI_DISPLAY_FLIP_IVB_PLANE_A  (0 << 19)
 								#define   MI_DISPLAY_FLIP_IVB_PLANE_B  (1 << 19)
 								#define   MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19)
 								#define   MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
 								#define   MI_DISPLAY_FLIP_IVB_PLANE_C  (4 << 19)
 								#define   MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
 								/* SKL ones */
 								#define   MI_DISPLAY_FLIP_SKL_PLANE_1_A	(0 << 8)
 								#define   MI_DISPLAY_FLIP_SKL_PLANE_1_B	(1 << 8)
 								#define   MI_DISPLAY_FLIP_SKL_PLANE_1_C	(2 << 8)
 								#define   MI_DISPLAY_FLIP_SKL_PLANE_2_A	(4 << 8)
 								#define   MI_DISPLAY_FLIP_SKL_PLANE_2_B	(5 << 8)
 								#define   MI_DISPLAY_FLIP_SKL_PLANE_2_C	(6 << 8)
 								#define   MI_DISPLAY_FLIP_SKL_PLANE_3_A	(7 << 8)
 								#define   MI_DISPLAY_FLIP_SKL_PLANE_3_B	(8 << 8)
 								#define   MI_DISPLAY_FLIP_SKL_PLANE_3_C	(9 << 8)
 								#define MI_SEMAPHORE_MBOX	MI_INSTR(0x16, 1) /* gen6, gen7 */
 								#define   MI_SEMAPHORE_GLOBAL_GTT    (1<<22)
 								#define   MI_SEMAPHORE_UPDATE	    (1<<21)
 								#define   MI_SEMAPHORE_COMPARE	    (1<<20)
 								#define   MI_SEMAPHORE_REGISTER	    (1<<18)
 								#define   MI_SEMAPHORE_SYNC_VR	    (0<<16) /* RCS  wait for VCS  (RVSYNC) */
 								#define   MI_SEMAPHORE_SYNC_VER	    (1<<16) /* RCS  wait for VECS (RVESYNC) */
 								#define   MI_SEMAPHORE_SYNC_BR	    (2<<16) /* RCS  wait for BCS  (RBSYNC) */
 								#define   MI_SEMAPHORE_SYNC_BV	    (0<<16) /* VCS  wait for BCS  (VBSYNC) */
 								#define   MI_SEMAPHORE_SYNC_VEV	    (1<<16) /* VCS  wait for VECS (VVESYNC) */
 								#define   MI_SEMAPHORE_SYNC_RV	    (2<<16) /* VCS  wait for RCS  (VRSYNC) */
 								#define   MI_SEMAPHORE_SYNC_RB	    (0<<16) /* BCS  wait for RCS  (BRSYNC) */
 								#define   MI_SEMAPHORE_SYNC_VEB	    (1<<16) /* BCS  wait for VECS (BVESYNC) */
 								#define   MI_SEMAPHORE_SYNC_VB	    (2<<16) /* BCS  wait for VCS  (BVSYNC) */
 								#define   MI_SEMAPHORE_SYNC_BVE	    (0<<16) /* VECS wait for BCS  (VEBSYNC) */
 								#define   MI_SEMAPHORE_SYNC_VVE	    (1<<16) /* VECS wait for VCS  (VEVSYNC) */
 								#define   MI_SEMAPHORE_SYNC_RVE	    (2<<16) /* VECS wait for RCS  (VERSYNC) */
 								#define   MI_SEMAPHORE_SYNC_INVALID (3<<16)
 								#define   MI_SEMAPHORE_SYNC_MASK    (3<<16)
 								#define MI_SET_CONTEXT		MI_INSTR(0x18, 0)
 								#define   MI_MM_SPACE_GTT		(1<<8)
 								#define   MI_MM_SPACE_PHYSICAL		(0<<8)
 								#define   MI_SAVE_EXT_STATE_EN		(1<<3)
 								#define   MI_RESTORE_EXT_STATE_EN	(1<<2)
 								#define   MI_FORCE_RESTORE		(1<<1)
 								#define   MI_RESTORE_INHIBIT		(1<<0)
 								#define   HSW_MI_RS_SAVE_STATE_EN       (1<<3)
 								#define   HSW_MI_RS_RESTORE_STATE_EN    (1<<2)
 								#define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
 								#define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
 								#define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
-												drm/i915/tgl: Extend MI_SEMAPHORE_WAIT

On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
update the length field and emit that extra parameter and any padding
noop as required.

v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT
v3: Use int instead of bool in the addition so that readers are not left
wondering about the intricacies of the C spec. Now they just have to
worry what the integer value of a boolean operation is...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190917123055.28965-1-chris@chris-wilson.co.uk

											
										
										
											2019-09-17 20:30:55 +08:00
+								#define MI_SEMAPHORE_WAIT_TOKEN	MI_INSTR(0x1c, 3) /* GEN12+ */
-												drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+

Having introduced per-context seqno, we now have a means to identity
progress across the system without feel of rollback as befell the
global_seqno. That is we can program a MI_SEMAPHORE_WAIT operation in
advance of submission safe in the knowledge that our target seqno and
address is stable.

However, since we are telling the GPU to busy-spin on the target address
until it matches the signaling seqno, we only want to do so when we are
sure that busy-spin will be completed quickly. To achieve this we only
submit the request to HW once the signaler is itself executing (modulo
preemption causing us to wait longer), and we only do so for default and
above priority requests (so that idle priority tasks never themselves
hog the GPU waiting for others).

As might be reasonably expected, HW semaphores excel in inter-engine
synchronisation microbenchmarks (where the 3x reduced latency / increased
throughput more than offset the power cost of spinning on a second ring)
and have significant improvement (can be up to ~10%, most see no change)
for single clients that utilize multiple engines (typically media players
and transcoders), without regressing multiple clients that can saturate
the system or changing the power envelope dramatically.

v3: Drop the older NEQ branch, now we pin the signaler's HWSP anyway.
v4: Tell the world and include it as part of scheduler caps.

Testcase: igt/gem_exec_whisper
Testcase: igt/benchmarks/gem_wsim
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190301170901.8340-3-chris@chris-wilson.co.uk

											
										
										
											2019-03-02 01:09:00 +08:00
+								#define   MI_SEMAPHORE_POLL		(1 << 15)
 								#define   MI_SEMAPHORE_SAD_GT_SDD	(0 << 12)
 								#define   MI_SEMAPHORE_SAD_GTE_SDD	(1 << 12)
 								#define   MI_SEMAPHORE_SAD_LT_SDD	(2 << 12)
 								#define   MI_SEMAPHORE_SAD_LTE_SDD	(3 << 12)
 								#define   MI_SEMAPHORE_SAD_EQ_SDD	(4 << 12)
 								#define   MI_SEMAPHORE_SAD_NEQ_SDD	(5 << 12)
-												drm/i915/tgl: Extend MI_SEMAPHORE_WAIT

On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
update the length field and emit that extra parameter and any padding
noop as required.

v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT
v3: Use int instead of bool in the addition so that readers are not left
wondering about the intricacies of the C spec. Now they just have to
worry what the integer value of a boolean operation is...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190917123055.28965-1-chris@chris-wilson.co.uk

											
										
										
											2019-09-17 20:30:55 +08:00
+								#define   MI_SEMAPHORE_TOKEN_MASK	REG_GENMASK(9, 5)
 								#define   MI_SEMAPHORE_TOKEN_SHIFT	5
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								#define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
 								#define MI_STORE_DWORD_IMM_GEN4	MI_INSTR(0x20, 2)
 								#define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */
 								#define   MI_USE_GGTT		(1 << 22) /* g4x+ */
 								#define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
 								/*
 								 * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
 								 * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
 								 *   simply ignores the register load under certain conditions.
 								 * - One can actually load arbitrary many arbitrary registers: Simply issue x
 								 *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
 								 */
 								#define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
-												drm/i915: Use engine relative LRIs on context setup

Daniele pointed out that relative mmio works differently in
on context restore. Instead of adding the engine mmio base to offset,
it masks out the base and adds bits [12:2] to current engine base.

This should allow us to construct context register state to be
applicable to all instances, including virtual. And avoid the trouble
of updating the registers on virtual instances when submitting work.

v2: only enable for gen12 for now (Mika)
v3: make enabling readable (Chris)

Bspec: 20206
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190906134957.25909-1-mika.kuoppala@linux.intel.com

											
										
										
											2019-09-06 21:49:57 +08:00
+								/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
-												drm/i915: Add per ctx batchbuffer wa for timestamp

Restoration of a previous timestamp can collide
with updating the timestamp, causing a value corruption.

Combat this issue by using indirect ctx bb to
modify the context image during restoring process.

We can preload value into scratch register. From which
we then do the actual write with LRR. LRR is faster and
thus less error prone as probability of race drops.

v2: tidying (Chris)
v3: lrr for all engines
v4: grp
v5: reg bit
v6: wa_bb_offset, virtual engines (Chris)

References: HSDES#16010904313
Testcase: igt/i915_selftest/gt_lrc
Suggested-by: Joseph Koston <joseph.koston@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20200424230546.30271-1-mika.kuoppala@linux.intel.com

											
										
										
											2020-04-25 07:05:46 +08:00
+								#define   MI_LRI_LRM_CS_MMIO		REG_BIT(19)
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								#define   MI_LRI_FORCE_POSTED		(1<<12)
-												drm/i915/perf: allow for CS OA configs to be created lazily

Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
    Flush cpu mapping of OA config (Chris)

v3: Properly deal with the perf_metric lock (Chris/Lionel)

v4: Fix oa config unref/put when not found (Lionel)

v5: Allocate BOs for configurations on the stream instead of globally
    (Lionel)

v6: Fix 64bit division (Chris)

v7: Store allocated config BOs into the stream (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20191012072308.30312-1-chris@chris-wilson.co.uk

											
										
										
											2019-10-12 15:23:06 +08:00
+								#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								#define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
 								#define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 								#define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
 								#define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
 								#define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
 								#define   MI_INVALIDATE_TLB		(1<<18)
 								#define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
 								#define   MI_FLUSH_DW_OP_MASK		(3<<14)
 								#define   MI_FLUSH_DW_NOTIFY		(1<<8)
 								#define   MI_INVALIDATE_BSD		(1<<7)
 								#define   MI_FLUSH_DW_USE_GTT		(1<<2)
 								#define   MI_FLUSH_DW_USE_PPGTT		(0<<2)
 								#define MI_LOAD_REGISTER_MEM	   MI_INSTR(0x29, 1)
 								#define MI_LOAD_REGISTER_MEM_GEN8  MI_INSTR(0x29, 2)
-												drm/i915: Adjust length of MI_LOAD_REGISTER_REG

Default length value of MI_LOAD_REGISTER_REG is 1.
Also move it out of cmd-parser-only registers since we're going to use
it in i915.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190926133142.2838-3-chris@chris-wilson.co.uk

											
										
										
											2019-09-26 21:31:42 +08:00
+								#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
-												drm/i915: Add per ctx batchbuffer wa for timestamp

Restoration of a previous timestamp can collide
with updating the timestamp, causing a value corruption.

Combat this issue by using indirect ctx bb to
modify the context image during restoring process.

We can preload value into scratch register. From which
we then do the actual write with LRR. LRR is faster and
thus less error prone as probability of race drops.

v2: tidying (Chris)
v3: lrr for all engines
v4: grp
v5: reg bit
v6: wa_bb_offset, virtual engines (Chris)

References: HSDES#16010904313
Testcase: igt/i915_selftest/gt_lrc
Suggested-by: Joseph Koston <joseph.koston@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20200424230546.30271-1-mika.kuoppala@linux.intel.com

											
										
										
											2020-04-25 07:05:46 +08:00
+								#define   MI_LRR_SOURCE_CS_MMIO		REG_BIT(18)
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								#define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
 								#define   MI_BATCH_NON_SECURE		(1)
 								/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
 								#define   MI_BATCH_NON_SECURE_I965	(1<<8)
 								#define   MI_BATCH_PPGTT_HSW		(1<<8)
 								#define   MI_BATCH_NON_SECURE_HSW	(1<<13)
 								#define MI_BATCH_BUFFER_START	MI_INSTR(0x31, 0)
 								#define   MI_BATCH_GTT		    (2<<6) /* aliased with (1<<7) on gen4 */
 								#define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
-												drm/i915/perf: implement active wait for noa configurations

NOA configuration take some amount of time to apply. That amount of
time depends on the size of the GT. There is no documented time for
this. For example, past experimentations with powergating
configuration changes seem to indicate a 60~70us delay. We go with
500us as default for now which should be over the required amount of
time (according to HW architects).

v2: Don't forget to save/restore registers used for the wait (Chris)

v3: Name used CS_GPR registers (Chris)
    Fix compile issue due to rebase (Lionel)

v4: Fix save/restore helpers (Umesh)

v5: Move noa_wait from drm_i915_private to i915_perf_stream (Lionel)

v6: Add missing struct declarations in i915_perf.h

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20191012072308.30312-2-chris@chris-wilson.co.uk

											
										
										
											2019-10-12 15:23:07 +08:00
+								#define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
 								#define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
 								/*
 								 * 3D instructions used by the kernel
 								 */
 								#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags))
 								#define GEN9_MEDIA_POOL_STATE     ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4)
 								#define   GEN9_MEDIA_POOL_ENABLE  (1 << 31)
 								#define GFX_OP_RASTER_RULES    ((0x3<<29)|(0x7<<24))
 								#define GFX_OP_SCISSOR         ((0x3<<29)|(0x1c<<24)|(0x10<<19))
 								#define   SC_UPDATE_SCISSOR       (0x1<<1)
 								#define   SC_ENABLE_MASK          (0x1<<0)
 								#define   SC_ENABLE               (0x1<<0)
 								#define GFX_OP_LOAD_INDIRECT   ((0x3<<29)|(0x1d<<24)|(0x7<<16))
 								#define GFX_OP_SCISSOR_INFO    ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1))
 								#define   SCI_YMIN_MASK      (0xffff<<16)
 								#define   SCI_XMIN_MASK      (0xffff<<0)
 								#define   SCI_YMAX_MASK      (0xffff<<16)
 								#define   SCI_XMAX_MASK      (0xffff<<0)
 								#define GFX_OP_SCISSOR_ENABLE	 ((0x3<<29)|(0x1c<<24)|(0x10<<19))
 								#define GFX_OP_SCISSOR_RECT	 ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1)
 								#define GFX_OP_COLOR_FACTOR      ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0)
 								#define GFX_OP_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
 								#define GFX_OP_MAP_INFO          ((0x3<<29)|(0x1d<<24)|0x4)
 								#define GFX_OP_DESTBUFFER_VARS   ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0)
 								#define GFX_OP_DESTBUFFER_INFO	 ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
 								#define GFX_OP_DRAWRECT_INFO     ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
 								#define GFX_OP_DRAWRECT_INFO_I965  ((0x7900<<16)|0x2)
-												drm/i915/blt: support copying objects

We can already clear an object with the blt, so try to do the same to
support copying from one object backing store to another. Really this is
just object -> object, which is not that useful yet, what we really want
is two backing stores, but that will require some vma rework first,
otherwise we are stuck with "tmp" objects.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190810174338.19810-1-chris@chris-wilson.co.uk

											
										
										
											2019-08-11 01:43:38 +08:00
+								#define COLOR_BLT_CMD			(2 << 29 | 0x40 << 22 | (5 - 2))
-												drm/i915: add in-kernel blitter client

The plan is to use the blitter engine for async object clearing when
using local memory, but before we can move the worker to get_pages() we
have to first tame some more of our struct_mutex usage. With this in
mind we should be able to upstream the object clearing as some
selftests, which should serve as a guinea pig for the ongoing locking
rework and upcoming async get_pages() framework.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: CQ Tang <cq.tang@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190529123108.24422-2-matthew.auld@intel.com

											
										
										
											2019-05-29 20:31:08 +08:00
+								#define XY_COLOR_BLT_CMD		(2 << 29 | 0x50 << 22)
-												drm/i915/blt: support copying objects

We can already clear an object with the blt, so try to do the same to
support copying from one object backing store to another. Really this is
just object -> object, which is not that useful yet, what we really want
is two backing stores, but that will require some vma rework first,
otherwise we are stuck with "tmp" objects.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190810174338.19810-1-chris@chris-wilson.co.uk

											
										
										
											2019-08-11 01:43:38 +08:00
+								#define SRC_COPY_BLT_CMD		(2 << 29 | 0x43 << 22)
 								#define GEN9_XY_FAST_COPY_BLT_CMD	(2 << 29 | 0x42 << 22)
 								#define XY_SRC_COPY_BLT_CMD		(2 << 29 | 0x53 << 22)
 								#define XY_MONO_SRC_COPY_IMM_BLT	(2 << 29 | 0x71 << 22 | 5)
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								#define   BLT_WRITE_A			(2<<20)
 								#define   BLT_WRITE_RGB			(1<<20)
 								#define   BLT_WRITE_RGBA		(BLT_WRITE_RGB | BLT_WRITE_A)
 								#define   BLT_DEPTH_8			(0<<24)
 								#define   BLT_DEPTH_16_565		(1<<24)
 								#define   BLT_DEPTH_16_1555		(2<<24)
 								#define   BLT_DEPTH_32			(3<<24)
 								#define   BLT_ROP_SRC_COPY		(0xcc<<16)
 								#define   BLT_ROP_COLOR_COPY		(0xf0<<16)
 								#define XY_SRC_COPY_BLT_SRC_TILED	(1<<15) /* 965+ only */
 								#define XY_SRC_COPY_BLT_DST_TILED	(1<<11) /* 965+ only */
 								#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2)
 								#define   ASYNC_FLIP                (1<<22)
 								#define   DISPLAY_PLANE_A           (0<<20)
 								#define   DISPLAY_PLANE_B           (1<<20)
 								#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
-												drm/i915/icl: Add command cache invalidate

On the set of invalidations, we need to add command
cache invalidate as a new domain.

Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190815083055.14132-2-mika.kuoppala@linux.intel.com

											
										
										
											2019-08-15 16:30:54 +08:00
+								#define   PIPE_CONTROL_COMMAND_CACHE_INVALIDATE		(1<<29) /* gen11+ */
-												drm/i915/icl: Implement gen11 flush including tile cache

Add tile cache flushing for gen11. To relive us from the
burden of previous obsolete workarounds, make a dedicated
flush/invalidate callback for gen11.

To fortify an independent single flush, do post
sync op as there are indications that without it
we don't flush everything. This should also make this
callback more readily usable in tgl (see l3 fabric flush).

v2: whitespacing

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190815083055.14132-1-mika.kuoppala@linux.intel.com

											
										
										
											2019-08-15 16:30:53 +08:00
+								#define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28) /* gen11+ */
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								#define   PIPE_CONTROL_FLUSH_L3				(1<<27)
 								#define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
 								#define   PIPE_CONTROL_MMIO_WRITE			(1<<23)
 								#define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
 								#define   PIPE_CONTROL_CS_STALL				(1<<20)
 								#define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
 								#define   PIPE_CONTROL_MEDIA_STATE_CLEAR		(1<<16)
-												drm/i915/perf: implement active wait for noa configurations

NOA configuration take some amount of time to apply. That amount of
time depends on the size of the GT. There is no documented time for
this. For example, past experimentations with powergating
configuration changes seem to indicate a 60~70us delay. We go with
500us as default for now which should be over the required amount of
time (according to HW architects).

v2: Don't forget to save/restore registers used for the wait (Chris)

v3: Name used CS_GPR registers (Chris)
    Fix compile issue due to rebase (Lionel)

v4: Fix save/restore helpers (Umesh)

v5: Move noa_wait from drm_i915_private to i915_perf_stream (Lionel)

v6: Add missing struct declarations in i915_perf.h

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20191012072308.30312-2-chris@chris-wilson.co.uk

											
										
										
											2019-10-12 15:23:07 +08:00
+								#define   PIPE_CONTROL_WRITE_TIMESTAMP			(3<<14)
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								#define   PIPE_CONTROL_QW_WRITE				(1<<14)
 								#define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
 								#define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
 								#define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
 								#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
 								#define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11) /* MBZ on ILK */
 								#define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10) /* GM45+ only */
 								#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
-												drm/i915/tgl: Add HDC Pipeline Flush

Add hdc pipeline flush to ensure memory state is coherent
in L3 when we are done.

v2: Flush also in breadcrumbs (Chris)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20191015154449.10338-3-mika.kuoppala@linux.intel.com

											
										
										
											2019-10-15 23:44:41 +08:00
+								#define   PIPE_CONTROL_HDC_PIPELINE_FLUSH		REG_BIT(9)  /* gen12 */
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								#define   PIPE_CONTROL_NOTIFY				(1<<8)
 								#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7) /* gen7+ */
 								#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
 								#define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
 								#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
 								#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
 								#define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
 								#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
 								#define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
-												drm/i915: Add definitions for MI_MATH command

We can use it in i915 for updating parts of unmasked registers from
within a batch. We're also adding Gen8+ versions of CS_GPR registers
(aka MI_MATH_REG in the coprocessor).

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190926100635.9416-4-michal.winiarski@intel.com

											
										
										
											2019-09-26 18:06:33 +08:00
+								#define MI_MATH(x)			MI_INSTR(0x1a, (x) - 1)
 								#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
 								/* Opcodes for MI_MATH_INSTR */
 								#define   MI_MATH_NOOP			MI_MATH_INSTR(0x000, 0x0, 0x0)
 								#define   MI_MATH_LOAD(op1, op2)	MI_MATH_INSTR(0x080, op1, op2)
 								#define   MI_MATH_LOADINV(op1, op2)	MI_MATH_INSTR(0x480, op1, op2)
 								#define   MI_MATH_LOAD0(op1)		MI_MATH_INSTR(0x081, op1)
 								#define   MI_MATH_LOAD1(op1)		MI_MATH_INSTR(0x481, op1)
 								#define   MI_MATH_ADD			MI_MATH_INSTR(0x100, 0x0, 0x0)
 								#define   MI_MATH_SUB			MI_MATH_INSTR(0x101, 0x0, 0x0)
 								#define   MI_MATH_AND			MI_MATH_INSTR(0x102, 0x0, 0x0)
 								#define   MI_MATH_OR			MI_MATH_INSTR(0x103, 0x0, 0x0)
 								#define   MI_MATH_XOR			MI_MATH_INSTR(0x104, 0x0, 0x0)
 								#define   MI_MATH_STORE(op1, op2)	MI_MATH_INSTR(0x180, op1, op2)
 								#define   MI_MATH_STOREINV(op1, op2)	MI_MATH_INSTR(0x580, op1, op2)
 								/* Registers used as operands in MI_MATH_INSTR */
 								#define   MI_MATH_REG(x)		(x)
 								#define   MI_MATH_REG_SRCA		0x20
 								#define   MI_MATH_REG_SRCB		0x21
 								#define   MI_MATH_REG_ACCU		0x31
 								#define   MI_MATH_REG_ZF		0x32
 								#define   MI_MATH_REG_CF		0x33
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								/*
 								 * Commands used only by the command parser
 								 */
 								#define MI_SET_PREDICATE        MI_INSTR(0x01, 0)
 								#define MI_ARB_CHECK            MI_INSTR(0x05, 0)
 								#define MI_RS_CONTROL           MI_INSTR(0x06, 0)
 								#define MI_URB_ATOMIC_ALLOC     MI_INSTR(0x09, 0)
 								#define MI_PREDICATE            MI_INSTR(0x0C, 0)
 								#define MI_RS_CONTEXT           MI_INSTR(0x0F, 0)
 								#define MI_TOPOLOGY_FILTER      MI_INSTR(0x0D, 0)
 								#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0)
 								#define MI_URB_CLEAR            MI_INSTR(0x19, 0)
 								#define MI_UPDATE_GTT           MI_INSTR(0x23, 0)
 								#define MI_CLFLUSH              MI_INSTR(0x27, 0)
 								#define MI_REPORT_PERF_COUNT    MI_INSTR(0x28, 0)
 								#define   MI_REPORT_PERF_COUNT_GGTT (1<<0)
 								#define MI_RS_STORE_DATA_IMM    MI_INSTR(0x2B, 0)
 								#define MI_LOAD_URB_MEM         MI_INSTR(0x2C, 0)
 								#define MI_STORE_URB_MEM        MI_INSTR(0x2D, 0)
 								#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)
-												drm/i915/gen7: Clear all EU/L3 residual contexts

On gen7 and gen7.5 devices, there could be leftover data residuals in
EU/L3 from the retiring context. This patch introduces workaround to clear
that residual contexts, by submitting a batch buffer with dedicated HW
context to the GPU with ring allocation for each context switching.

This security mitigation changes does not triggers any performance
regression. Performance is on par with current drm-tips.

v2: Add igt generated header file for CB kernel assembled with Mesa tool
and addressed use of Kernel macro for ptr_align comment.

v3: Resolve Sparse warnings with newly generated, and imported CB
kernel.

v4: Include new igt generated CB kernel for gen7 and gen7.5. Also
add code formatting and compiler warnings changes (Chris Wilson)

Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com>
Signed-off-by: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>
Cc: Chris Wilson <chris.p.wilson@intel.com>
Cc: Balestrieri Francesco <francesco.balestrieri@intel.com>
Cc: Bloomfield Jon <jon.bloomfield@intel.com>
Cc: Dutt Sudeep <sudeep.dutt@intel.com>
Acked-by: Chris Wilson <chris@chris-wilso.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20200306000957.2836150-2-chris@chris-wilson.co.uk

											
										
										
											2020-03-06 08:09:57 +08:00
+								#define STATE_BASE_ADDRESS \
 									((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16))
 								#define BASE_ADDRESS_MODIFY		REG_BIT(0)
 								#define PIPELINE_SELECT \
 									((0x3 << 29) | (0x1 << 27) | (0x1 << 24) | (0x4 << 16))
 								#define PIPELINE_SELECT_MEDIA	       REG_BIT(0)
 								#define GFX_OP_3DSTATE_VF_STATISTICS \
 									((0x3 << 29) | (0x1 << 27) | (0x0 << 24) | (0xB << 16))
 								#define MEDIA_VFE_STATE \
 									((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x0 << 16))
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								#define  MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
-												drm/i915/gen7: Clear all EU/L3 residual contexts

On gen7 and gen7.5 devices, there could be leftover data residuals in
EU/L3 from the retiring context. This patch introduces workaround to clear
that residual contexts, by submitting a batch buffer with dedicated HW
context to the GPU with ring allocation for each context switching.

This security mitigation changes does not triggers any performance
regression. Performance is on par with current drm-tips.

v2: Add igt generated header file for CB kernel assembled with Mesa tool
and addressed use of Kernel macro for ptr_align comment.

v3: Resolve Sparse warnings with newly generated, and imported CB
kernel.

v4: Include new igt generated CB kernel for gen7 and gen7.5. Also
add code formatting and compiler warnings changes (Chris Wilson)

Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com>
Signed-off-by: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>
Cc: Chris Wilson <chris.p.wilson@intel.com>
Cc: Balestrieri Francesco <francesco.balestrieri@intel.com>
Cc: Bloomfield Jon <jon.bloomfield@intel.com>
Cc: Dutt Sudeep <sudeep.dutt@intel.com>
Acked-by: Chris Wilson <chris@chris-wilso.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20200306000957.2836150-2-chris@chris-wilson.co.uk

											
										
										
											2020-03-06 08:09:57 +08:00
+								#define MEDIA_INTERFACE_DESCRIPTOR_LOAD \
 									((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x2 << 16))
 								#define MEDIA_OBJECT \
 									((0x3 << 29) | (0x2 << 27) | (0x1 << 24) | (0x0 << 16))
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								#define GPGPU_OBJECT                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16))
 								#define GPGPU_WALKER                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16))
 								#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
 									((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16))
 								#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \
 									((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16))
 								#define GFX_OP_3DSTATE_SO_DECL_LIST \
 									((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16))
 								#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \
 									((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16))
 								#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \
 									((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16))
 								#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \
 									((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16))
 								#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \
 									((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16))
 								#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
 									((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
 								#define MFX_WAIT  ((0x3<<29)|(0x1<<27)|(0x0<<16))
 								#define COLOR_BLT     ((0x2<<29)|(0x40<<22))
 								#define SRC_COPY_BLT  ((0x2<<29)|(0x43<<22))
-												drm/i915: Remove redundant parameters from intel_engine_cmd_parser

Declutter the calling interface by reducing the parameters to the
i915_vma and associated offsets.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191211110437.4082687-2-chris@chris-wilson.co.uk

											
										
										
											2019-12-11 19:04:34 +08:00
+								/*
 								 * Used to convert any address to canonical form.
 								 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
 								 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
 								 * addresses to be in a canonical form:
 								 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
 								 * canonical form [63:48] == [47]."
 								 */
 								#define GEN8_HIGH_ADDRESS_BIT 47
 								static inline u64 gen8_canonical_addr(u64 address)
 								{
 									return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
 								}
 								static inline u64 gen8_noncanonical_addr(u64 address)
 								{
 									return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
 								}
-												drm/i915/gem: Prepare gen7 cmdparser for async execution

The gen7 cmdparser is primarily a promotion-based system to allow access
to additional registers beyond the HW validation, and allows fallback to
normal execution of the user batch buffer if valid and requires
chaining. In the next patch, we will do the cmdparser validation in the
pipeline asynchronously and so at the point of request construction we
will not know if we want to execute the privileged and validated batch,
or the original user batch. The solution employed here is to execute
both batches, one with raised privileges and one as normal. This is
because the gen7 MI_BATCH_BUFFER_START command cannot change privilege
level within a batch and must strictly use the current privilege level
(or undefined behaviour kills the GPU). So in order to execute the
original batch, we need a second non-priviledged batch buffer chain from
the ring, i.e. we need to emit two batches for each user batch. Inside
the two batches we determine which one should actually execute, we
provide a conditional trampoline to call the original batch.

Implementation-wise, we create a single buffer and write the shadow and
the trampoline inside it at different offsets; and bind the buffer into
both the kernel GGTT for the privileged execution of the shadow and into
the user ppGTT for the non-privileged execution of the trampoline and
original batch. One buffer, two batches and two vma.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191211230858.599030-1-chris@chris-wilson.co.uk

											
										
										
											2019-12-12 07:08:56 +08:00
+								static inline u32 *__gen6_emit_bb_start(u32 *cs, u32 addr, unsigned int flags)
 								{
 									*cs++ = MI_BATCH_BUFFER_START | flags;
 									*cs++ = addr;
 									return cs;
 								}
-												drm/i915: Split GPU commands definitions into separate header

We should not mix MMIO with MI_INSTR definitions.

v2: sanitize comment, change include order (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180313124109.39216-1-michal.wajdeczko@intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20180313231920.6932-1-chris@chris-wilson.co.uk

											
										
										
											2018-03-14 07:19:20 +08:00
+								#endif /* _INTEL_GPU_COMMANDS_H_ */