drm/radeon: add IB and fence dispatch functions for CIK gfx (v7)
For gfx ring only. Compute is still todo. v2: add documentation v3: update to latest reset changes, integrate emit update patch. v4: fix count on wait_reg_mem for HDP flush v5: use old hdp flush method for fence v6: set valid bit for IB v7: cleanup for release Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
841cf442fd
commit
2cae3bc3f3
|
@ -1492,6 +1492,140 @@ static void cik_gpu_init(struct radeon_device *rdev)
|
|||
udelay(50);
|
||||
}
|
||||
|
||||
/*
|
||||
* GPU scratch registers helpers function.
|
||||
*/
|
||||
/**
|
||||
* cik_scratch_init - setup driver info for CP scratch regs
|
||||
*
|
||||
* @rdev: radeon_device pointer
|
||||
*
|
||||
* Set up the number and offset of the CP scratch registers.
|
||||
* NOTE: use of CP scratch registers is a legacy inferface and
|
||||
* is not used by default on newer asics (r6xx+). On newer asics,
|
||||
* memory buffers are used for fences rather than scratch regs.
|
||||
*/
|
||||
static void cik_scratch_init(struct radeon_device *rdev)
|
||||
{
|
||||
int i;
|
||||
|
||||
rdev->scratch.num_reg = 7;
|
||||
rdev->scratch.reg_base = SCRATCH_REG0;
|
||||
for (i = 0; i < rdev->scratch.num_reg; i++) {
|
||||
rdev->scratch.free[i] = true;
|
||||
rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* cik_fence_ring_emit - emit a fence on the gfx ring
|
||||
*
|
||||
* @rdev: radeon_device pointer
|
||||
* @fence: radeon fence object
|
||||
*
|
||||
* Emits a fence sequnce number on the gfx ring and flushes
|
||||
* GPU caches.
|
||||
*/
|
||||
void cik_fence_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_fence *fence)
|
||||
{
|
||||
struct radeon_ring *ring = &rdev->ring[fence->ring];
|
||||
u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
|
||||
|
||||
/* EVENT_WRITE_EOP - flush caches, send int */
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
|
||||
radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
|
||||
EOP_TC_ACTION_EN |
|
||||
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
|
||||
EVENT_INDEX(5)));
|
||||
radeon_ring_write(ring, addr & 0xfffffffc);
|
||||
radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
|
||||
radeon_ring_write(ring, fence->seq);
|
||||
radeon_ring_write(ring, 0);
|
||||
/* HDP flush */
|
||||
/* We should be using the new WAIT_REG_MEM special op packet here
|
||||
* but it causes the CP to hang
|
||||
*/
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
|
||||
radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
|
||||
WRITE_DATA_DST_SEL(0)));
|
||||
radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
|
||||
radeon_ring_write(ring, 0);
|
||||
radeon_ring_write(ring, 0);
|
||||
}
|
||||
|
||||
void cik_semaphore_ring_emit(struct radeon_device *rdev,
|
||||
struct radeon_ring *ring,
|
||||
struct radeon_semaphore *semaphore,
|
||||
bool emit_wait)
|
||||
{
|
||||
uint64_t addr = semaphore->gpu_addr;
|
||||
unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
|
||||
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
|
||||
radeon_ring_write(ring, addr & 0xffffffff);
|
||||
radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
|
||||
}
|
||||
|
||||
/*
|
||||
* IB stuff
|
||||
*/
|
||||
/**
|
||||
* cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
|
||||
*
|
||||
* @rdev: radeon_device pointer
|
||||
* @ib: radeon indirect buffer object
|
||||
*
|
||||
* Emits an DE (drawing engine) or CE (constant engine) IB
|
||||
* on the gfx ring. IBs are usually generated by userspace
|
||||
* acceleration drivers and submitted to the kernel for
|
||||
* sheduling on the ring. This function schedules the IB
|
||||
* on the gfx ring for execution by the GPU.
|
||||
*/
|
||||
void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
|
||||
{
|
||||
struct radeon_ring *ring = &rdev->ring[ib->ring];
|
||||
u32 header, control = INDIRECT_BUFFER_VALID;
|
||||
|
||||
if (ib->is_const_ib) {
|
||||
/* set switch buffer packet before const IB */
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
|
||||
radeon_ring_write(ring, 0);
|
||||
|
||||
header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
|
||||
} else {
|
||||
u32 next_rptr;
|
||||
if (ring->rptr_save_reg) {
|
||||
next_rptr = ring->wptr + 3 + 4;
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
|
||||
radeon_ring_write(ring, ((ring->rptr_save_reg -
|
||||
PACKET3_SET_UCONFIG_REG_START) >> 2));
|
||||
radeon_ring_write(ring, next_rptr);
|
||||
} else if (rdev->wb.enabled) {
|
||||
next_rptr = ring->wptr + 5 + 4;
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
|
||||
radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
|
||||
radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
|
||||
radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
|
||||
radeon_ring_write(ring, next_rptr);
|
||||
}
|
||||
|
||||
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
|
||||
}
|
||||
|
||||
control |= ib->length_dw |
|
||||
(ib->vm ? (ib->vm->id << 24) : 0);
|
||||
|
||||
radeon_ring_write(ring, header);
|
||||
radeon_ring_write(ring,
|
||||
#ifdef __BIG_ENDIAN
|
||||
(2 << 0) |
|
||||
#endif
|
||||
(ib->gpu_addr & 0xFFFFFFFC));
|
||||
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
|
||||
radeon_ring_write(ring, control);
|
||||
}
|
||||
|
||||
/*
|
||||
* CP.
|
||||
* On CIK, gfx and compute now have independant command processors.
|
||||
|
|
|
@ -188,6 +188,21 @@
|
|||
|
||||
#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0
|
||||
|
||||
#define GPU_HDP_FLUSH_REQ 0x54DC
|
||||
#define GPU_HDP_FLUSH_DONE 0x54E0
|
||||
#define CP0 (1 << 0)
|
||||
#define CP1 (1 << 1)
|
||||
#define CP2 (1 << 2)
|
||||
#define CP3 (1 << 3)
|
||||
#define CP4 (1 << 4)
|
||||
#define CP5 (1 << 5)
|
||||
#define CP6 (1 << 6)
|
||||
#define CP7 (1 << 7)
|
||||
#define CP8 (1 << 8)
|
||||
#define CP9 (1 << 9)
|
||||
#define SDMA0 (1 << 10)
|
||||
#define SDMA1 (1 << 11)
|
||||
|
||||
#define GRBM_CNTL 0x8000
|
||||
#define GRBM_READ_TIMEOUT(x) ((x) << 0)
|
||||
|
||||
|
@ -492,6 +507,49 @@
|
|||
# define RASTER_CONFIG_RB_MAP_2 2
|
||||
# define RASTER_CONFIG_RB_MAP_3 3
|
||||
|
||||
#define VGT_EVENT_INITIATOR 0x28a90
|
||||
# define SAMPLE_STREAMOUTSTATS1 (1 << 0)
|
||||
# define SAMPLE_STREAMOUTSTATS2 (2 << 0)
|
||||
# define SAMPLE_STREAMOUTSTATS3 (3 << 0)
|
||||
# define CACHE_FLUSH_TS (4 << 0)
|
||||
# define CACHE_FLUSH (6 << 0)
|
||||
# define CS_PARTIAL_FLUSH (7 << 0)
|
||||
# define VGT_STREAMOUT_RESET (10 << 0)
|
||||
# define END_OF_PIPE_INCR_DE (11 << 0)
|
||||
# define END_OF_PIPE_IB_END (12 << 0)
|
||||
# define RST_PIX_CNT (13 << 0)
|
||||
# define VS_PARTIAL_FLUSH (15 << 0)
|
||||
# define PS_PARTIAL_FLUSH (16 << 0)
|
||||
# define CACHE_FLUSH_AND_INV_TS_EVENT (20 << 0)
|
||||
# define ZPASS_DONE (21 << 0)
|
||||
# define CACHE_FLUSH_AND_INV_EVENT (22 << 0)
|
||||
# define PERFCOUNTER_START (23 << 0)
|
||||
# define PERFCOUNTER_STOP (24 << 0)
|
||||
# define PIPELINESTAT_START (25 << 0)
|
||||
# define PIPELINESTAT_STOP (26 << 0)
|
||||
# define PERFCOUNTER_SAMPLE (27 << 0)
|
||||
# define SAMPLE_PIPELINESTAT (30 << 0)
|
||||
# define SO_VGT_STREAMOUT_FLUSH (31 << 0)
|
||||
# define SAMPLE_STREAMOUTSTATS (32 << 0)
|
||||
# define RESET_VTX_CNT (33 << 0)
|
||||
# define VGT_FLUSH (36 << 0)
|
||||
# define BOTTOM_OF_PIPE_TS (40 << 0)
|
||||
# define DB_CACHE_FLUSH_AND_INV (42 << 0)
|
||||
# define FLUSH_AND_INV_DB_DATA_TS (43 << 0)
|
||||
# define FLUSH_AND_INV_DB_META (44 << 0)
|
||||
# define FLUSH_AND_INV_CB_DATA_TS (45 << 0)
|
||||
# define FLUSH_AND_INV_CB_META (46 << 0)
|
||||
# define CS_DONE (47 << 0)
|
||||
# define PS_DONE (48 << 0)
|
||||
# define FLUSH_AND_INV_CB_PIXEL_DATA (49 << 0)
|
||||
# define THREAD_TRACE_START (51 << 0)
|
||||
# define THREAD_TRACE_STOP (52 << 0)
|
||||
# define THREAD_TRACE_FLUSH (54 << 0)
|
||||
# define THREAD_TRACE_FINISH (55 << 0)
|
||||
# define PIXEL_PIPE_STAT_CONTROL (56 << 0)
|
||||
# define PIXEL_PIPE_STAT_DUMP (57 << 0)
|
||||
# define PIXEL_PIPE_STAT_RESET (58 << 0)
|
||||
|
||||
#define SCRATCH_REG0 0x30100
|
||||
#define SCRATCH_REG1 0x30104
|
||||
#define SCRATCH_REG2 0x30108
|
||||
|
@ -508,6 +566,8 @@
|
|||
|
||||
#define CP_SEM_INCOMPLETE_TIMER_CNTL 0x301C8
|
||||
|
||||
#define CP_WAIT_REG_MEM_TIMEOUT 0x301D0
|
||||
|
||||
#define GRBM_GFX_INDEX 0x30800
|
||||
#define INSTANCE_INDEX(x) ((x) << 0)
|
||||
#define SH_INDEX(x) ((x) << 8)
|
||||
|
@ -597,11 +657,63 @@
|
|||
#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
|
||||
#define PACKET3_DRAW_PREAMBLE 0x36
|
||||
#define PACKET3_WRITE_DATA 0x37
|
||||
#define WRITE_DATA_DST_SEL(x) ((x) << 8)
|
||||
/* 0 - register
|
||||
* 1 - memory (sync - via GRBM)
|
||||
* 2 - gl2
|
||||
* 3 - gds
|
||||
* 4 - reserved
|
||||
* 5 - memory (async - direct)
|
||||
*/
|
||||
#define WR_ONE_ADDR (1 << 16)
|
||||
#define WR_CONFIRM (1 << 20)
|
||||
#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
*/
|
||||
#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
* 2 - ce
|
||||
*/
|
||||
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
|
||||
#define PACKET3_MEM_SEMAPHORE 0x39
|
||||
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
|
||||
# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
|
||||
# define PACKET3_SEM_CLIENT_CODE ((x) << 24) /* 0 = CP, 1 = CB, 2 = DB */
|
||||
# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
|
||||
# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
|
||||
#define PACKET3_COPY_DW 0x3B
|
||||
#define PACKET3_WAIT_REG_MEM 0x3C
|
||||
#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
|
||||
/* 0 - always
|
||||
* 1 - <
|
||||
* 2 - <=
|
||||
* 3 - ==
|
||||
* 4 - !=
|
||||
* 5 - >=
|
||||
* 6 - >
|
||||
*/
|
||||
#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
|
||||
/* 0 - reg
|
||||
* 1 - mem
|
||||
*/
|
||||
#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
|
||||
/* 0 - wait_reg_mem
|
||||
* 1 - wr_wait_wr_reg
|
||||
*/
|
||||
#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
|
||||
/* 0 - me
|
||||
* 1 - pfp
|
||||
*/
|
||||
#define PACKET3_INDIRECT_BUFFER 0x3F
|
||||
#define INDIRECT_BUFFER_TCL2_VOLATILE (1 << 22)
|
||||
#define INDIRECT_BUFFER_VALID (1 << 23)
|
||||
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
* 2 - Bypass
|
||||
*/
|
||||
#define PACKET3_COPY_DATA 0x40
|
||||
#define PACKET3_PFP_SYNC_ME 0x42
|
||||
#define PACKET3_SURFACE_SYNC 0x43
|
||||
|
@ -646,12 +758,12 @@
|
|||
#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */
|
||||
#define EOP_TCL1_ACTION_EN (1 << 16)
|
||||
#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
|
||||
#define CACHE_POLICY(x) ((x) << 25)
|
||||
#define EOP_CACHE_POLICY(x) ((x) << 25)
|
||||
/* 0 - LRU
|
||||
* 1 - Stream
|
||||
* 2 - Bypass
|
||||
*/
|
||||
#define TCL2_VOLATILE (1 << 27)
|
||||
#define EOP_TCL2_VOLATILE (1 << 27)
|
||||
#define DATA_SEL(x) ((x) << 29)
|
||||
/* 0 - discard
|
||||
* 1 - send low 32bit data
|
||||
|
@ -693,6 +805,8 @@
|
|||
#define PACKET3_SET_SH_REG_OFFSET 0x77
|
||||
#define PACKET3_SET_QUEUE_REG 0x78
|
||||
#define PACKET3_SET_UCONFIG_REG 0x79
|
||||
#define PACKET3_SET_UCONFIG_REG_START 0x00030000
|
||||
#define PACKET3_SET_UCONFIG_REG_END 0x00031000
|
||||
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
|
||||
#define PACKET3_SCRATCH_RAM_READ 0x7E
|
||||
#define PACKET3_LOAD_CONST_RAM 0x80
|
||||
|
@ -702,6 +816,6 @@
|
|||
#define PACKET3_INCREMENT_DE_COUNTER 0x85
|
||||
#define PACKET3_WAIT_ON_CE_COUNTER 0x86
|
||||
#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
|
||||
|
||||
#define PACKET3_SWITCH_BUFFER 0x8B
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue