forked from OSchip/llvm-project
[amdgpu] Add `llvm.amdgcn.endpgm` support.
- `llvm.amdgcn.endpgm` is added to enable "abort" support. Differential Revision: https://reviews.llvm.org/D90809
This commit is contained in:
parent
16e5a347e7
commit
23c6d1501d
|
@ -214,6 +214,8 @@ BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc")
|
|||
BUILTIN(__builtin_amdgcn_read_exec_lo, "Ui", "nc")
|
||||
BUILTIN(__builtin_amdgcn_read_exec_hi, "Ui", "nc")
|
||||
|
||||
BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// R600-NI only builtins.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -16,3 +16,9 @@ void test_ds_fmax(float src) {
|
|||
__shared__ float shared;
|
||||
volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @_Z6endpgmv(
|
||||
// CHECK: call void @llvm.amdgcn.endpgm()
|
||||
__global__ void endpgm() {
|
||||
__builtin_amdgcn_endpgm();
|
||||
}
|
||||
|
|
|
@ -1577,6 +1577,10 @@ def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty],
|
|||
// FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn?
|
||||
def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
|
||||
|
||||
def int_amdgcn_endpgm : GCCBuiltin<"__builtin_amdgcn_endpgm">,
|
||||
Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects]
|
||||
>;
|
||||
|
||||
// Copies the active channels of the source value to the destination value,
|
||||
// with the guarantee that the source value is computed as if the entire
|
||||
// program were executed in Whole Wavefront Mode, i.e. with all channels
|
||||
|
|
|
@ -1118,6 +1118,7 @@ let isTerminator = 1 in {
|
|||
def S_ENDPGM : SOPP_Pseudo<"s_endpgm", (ins EndpgmImm:$simm16), "$simm16"> {
|
||||
let isBarrier = 1;
|
||||
let isReturn = 1;
|
||||
let hasSideEffects = 1;
|
||||
}
|
||||
|
||||
def S_ENDPGM_SAVED : SOPP_Pseudo<"s_endpgm_saved", (ins)> {
|
||||
|
@ -1328,6 +1329,11 @@ def : GCNPat <
|
|||
(S_ENDPGM (i16 0))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(int_amdgcn_endpgm),
|
||||
(S_ENDPGM (i16 0))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(i64 (ctpop i64:$src)),
|
||||
(i64 (REG_SEQUENCE SReg_64,
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
define amdgpu_kernel void @test0() {
|
||||
; CHECK-LABEL: test0:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_endpgm
|
||||
tail call void @llvm.amdgcn.endpgm()
|
||||
unreachable
|
||||
}
|
||||
|
||||
define void @test1() {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_endpgm
|
||||
tail call void @llvm.amdgcn.endpgm()
|
||||
unreachable
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test2(i32* %p, i32 %x) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_load_dword s2, s[0:1], 0x2c
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_cmp_lt_i32 s2, 1
|
||||
; CHECK-NEXT: s_cbranch_scc0 BB2_2
|
||||
; CHECK-NEXT: ; %bb.1: ; %else
|
||||
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, s2
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s1
|
||||
; CHECK-NEXT: flat_store_dword v[0:1], v2
|
||||
; CHECK-NEXT: s_endpgm
|
||||
; CHECK-NEXT: BB2_2: ; %then
|
||||
; CHECK-NEXT: s_endpgm
|
||||
%cond = icmp sgt i32 %x, 0
|
||||
br i1 %cond, label %then, label %else
|
||||
|
||||
then:
|
||||
tail call void @llvm.amdgcn.endpgm()
|
||||
unreachable
|
||||
|
||||
else:
|
||||
store i32 %x, i32* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.endpgm()
|
Loading…
Reference in New Issue