2018-11-01 02:54:06 +08:00
|
|
|
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
|
[AMDGPU] Remove -amdgpu-spill-sgpr-to-smem.
Summary: The implementation was never completed and never used except in tests.
Reviewers: arsenm, mareko
Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69163
llvm-svn: 375293
2019-10-19 05:48:22 +08:00
|
|
|
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
|
2018-11-01 02:54:06 +08:00
|
|
|
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
|
[AMDGPU] Remove -amdgpu-spill-sgpr-to-smem.
Summary: The implementation was never completed and never used except in tests.
Reviewers: arsenm, mareko
Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69163
llvm-svn: 375293
2019-10-19 05:48:22 +08:00
|
|
|
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
|
2016-09-03 14:57:55 +08:00
|
|
|
|
|
|
|
; XXX - Why does it like to use vcc?
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}spill_m0:
|
2016-10-29 03:43:31 +08:00
|
|
|
|
2016-11-26 01:37:09 +08:00
|
|
|
; GCN-DAG: s_cmp_lg_u32
|
2016-09-03 14:57:55 +08:00
|
|
|
|
2016-11-26 01:37:09 +08:00
|
|
|
; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
|
2018-11-07 14:57:03 +08:00
|
|
|
; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 2
|
2016-09-03 14:57:55 +08:00
|
|
|
|
2016-11-26 01:37:09 +08:00
|
|
|
; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
|
|
|
|
; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]]
|
2018-11-07 14:57:03 +08:00
|
|
|
; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12 ; 4-byte Folded Spill
|
2016-11-26 01:37:09 +08:00
|
|
|
|
2016-09-03 14:57:55 +08:00
|
|
|
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
|
|
|
|
|
|
|
|
; GCN: [[ENDIF]]:
|
2018-11-07 14:57:03 +08:00
|
|
|
; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 2
|
2016-11-26 01:37:09 +08:00
|
|
|
; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]]
|
2016-09-03 14:57:55 +08:00
|
|
|
|
2018-11-07 14:57:03 +08:00
|
|
|
; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12 ; 4-byte Folded Reload
|
2016-09-03 14:57:55 +08:00
|
|
|
; TOVMEM: s_waitcnt vmcnt(0)
|
2016-11-26 01:37:09 +08:00
|
|
|
; TOVMEM: v_readfirstlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]]
|
|
|
|
; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]]
|
2016-09-03 14:57:55 +08:00
|
|
|
|
2016-11-26 01:37:09 +08:00
|
|
|
; GCN: s_add_i32 s{{[0-9]+}}, m0, 1
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 {
|
2016-09-03 14:57:55 +08:00
|
|
|
entry:
|
2019-06-15 05:16:06 +08:00
|
|
|
%m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0
|
2016-09-03 14:57:55 +08:00
|
|
|
%cmp0 = icmp eq i32 %cond, 0
|
|
|
|
br i1 %cmp0, label %if, label %endif
|
|
|
|
|
|
|
|
if:
|
|
|
|
call void asm sideeffect "v_nop", ""() #0
|
|
|
|
br label %endif
|
|
|
|
|
|
|
|
endif:
|
2019-06-15 05:16:06 +08:00
|
|
|
%foo = call i32 asm sideeffect "s_add_i32 $0, $1, 1", "=s,{m0}"(i32 %m0) #0
|
2016-09-03 14:57:55 +08:00
|
|
|
store i32 %foo, i32 addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
@lds = internal addrspace(3) global [64 x float] undef
|
|
|
|
|
2016-11-30 03:39:53 +08:00
|
|
|
; m0 is killed, so it isn't necessary during the entry block spill to preserve it
|
|
|
|
; GCN-LABEL: {{^}}spill_kill_m0_lds:
|
2016-11-26 01:37:09 +08:00
|
|
|
; GCN: s_mov_b32 m0, s6
|
|
|
|
; GCN: v_interp_mov_f32
|
|
|
|
|
2016-09-03 14:57:55 +08:00
|
|
|
; GCN-NOT: v_readlane_b32 m0
|
2016-11-26 01:37:09 +08:00
|
|
|
; GCN-NOT: s_buffer_store_dword m0
|
|
|
|
; GCN-NOT: s_buffer_load_dword m0
|
2018-02-14 02:00:25 +08:00
|
|
|
define amdgpu_ps void @spill_kill_m0_lds(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %m0) #0 {
|
2016-09-03 14:57:55 +08:00
|
|
|
main_body:
|
2017-02-16 10:01:13 +08:00
|
|
|
%tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0)
|
2016-11-30 03:39:53 +08:00
|
|
|
%cmp = fcmp ueq float 0.000000e+00, %tmp
|
2016-09-03 14:57:55 +08:00
|
|
|
br i1 %cmp, label %if, label %else
|
|
|
|
|
2016-11-30 03:39:53 +08:00
|
|
|
if: ; preds = %main_body
|
2016-09-03 14:57:55 +08:00
|
|
|
%lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
|
[AMDGPU] Don't force WQM for DS op
Summary:
Previously, all DS ops forced WQM in a pixel shader. That was a hack to
allow for graphics frontends using ds_swizzle to implement explicit
derivatives, on SI/CI at least where DPP is not available. But it forced
WQM for _any_ DS op.
With this commit, DS ops no longer force WQM. Both graphics frontends
(Mesa and LLPC) need to change to issue an explicit llvm.amdgcn.wqm
intrinsic call when calculating explicit derivatives.
The required Mesa change is: "amd/common: use llvm.amdgcn.wqm for
explicit derivatives".
Subscribers: qcolombet, arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D46051
Change-Id: I9b745b626fa91bbd66456e6cf41ee07eeea42f81
llvm-svn: 331633
2018-05-07 21:21:26 +08:00
|
|
|
%lds_data_ = load float, float addrspace(3)* %lds_ptr
|
|
|
|
%lds_data = call float @llvm.amdgcn.wqm.f32(float %lds_data_)
|
2016-09-03 14:57:55 +08:00
|
|
|
br label %endif
|
|
|
|
|
2016-11-30 03:39:53 +08:00
|
|
|
else: ; preds = %main_body
|
2017-02-16 10:01:13 +08:00
|
|
|
%interp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0)
|
2016-11-30 03:39:53 +08:00
|
|
|
br label %endif
|
|
|
|
|
|
|
|
endif: ; preds = %else, %if
|
|
|
|
%export = phi float [ %lds_data, %if ], [ %interp, %else ]
|
2017-02-22 08:27:34 +08:00
|
|
|
%tmp4 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %export, float %export)
|
|
|
|
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp4, <2 x half> %tmp4, i1 true, i1 true) #0
|
2016-11-30 03:39:53 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-02-22 08:02:21 +08:00
|
|
|
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
|
|
|
|
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
2017-02-22 08:27:34 +08:00
|
|
|
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
|
|
|
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
|
[AMDGPU] Don't force WQM for DS op
Summary:
Previously, all DS ops forced WQM in a pixel shader. That was a hack to
allow for graphics frontends using ds_swizzle to implement explicit
derivatives, on SI/CI at least where DPP is not available. But it forced
WQM for _any_ DS op.
With this commit, DS ops no longer force WQM. Both graphics frontends
(Mesa and LLPC) need to change to issue an explicit llvm.amdgcn.wqm
intrinsic call when calculating explicit derivatives.
The required Mesa change is: "amd/common: use llvm.amdgcn.wqm for
explicit derivatives".
Subscribers: qcolombet, arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D46051
Change-Id: I9b745b626fa91bbd66456e6cf41ee07eeea42f81
llvm-svn: 331633
2018-05-07 21:21:26 +08:00
|
|
|
declare float @llvm.amdgcn.wqm.f32(float) #1
|
2016-09-03 14:57:55 +08:00
|
|
|
|
|
|
|
attributes #0 = { nounwind }
|
2017-02-16 10:01:13 +08:00
|
|
|
attributes #1 = { nounwind readnone }
|