forked from OSchip/llvm-project
[AMDGPU] Added __builtin_amdgcn_ds_bvh_stack_rtn
Differential Revision: https://reviews.llvm.org/D133966
This commit is contained in:
parent
55e6078d21
commit
e540965915
|
@ -281,6 +281,8 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64, "V4iIbV2iIbV2iV4iIb",
|
|||
TARGET_BUILTIN(__builtin_amdgcn_s_sendmsg_rtn, "UiUIi", "n", "gfx11-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_s_sendmsg_rtnl, "UWiUIi", "n", "gfx11-insts")
|
||||
|
||||
TARGET_BUILTIN(__builtin_amdgcn_ds_bvh_stack_rtn, "V2UiUiUiV4UiIi", "n", "gfx11-insts")
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Special builtins.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -16897,6 +16897,21 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
|
|||
RayInverseDir, TextureDescr});
|
||||
}
|
||||
|
||||
case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
|
||||
SmallVector<Value *, 4> Args;
|
||||
for (int i = 0, e = E->getNumArgs(); i != e; ++i)
|
||||
Args.push_back(EmitScalarExpr(E->getArg(i)));
|
||||
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
|
||||
Value *Call = Builder.CreateCall(F, Args);
|
||||
Value *Rtn = Builder.CreateExtractValue(Call, 0);
|
||||
Value *A = Builder.CreateExtractValue(Call, 1);
|
||||
llvm::Type *RetTy = ConvertType(E->getType());
|
||||
Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
|
||||
(uint64_t)0);
|
||||
return Builder.CreateInsertElement(I0, A, 1);
|
||||
}
|
||||
|
||||
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
|
||||
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
|
||||
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -verify -S -emit-llvm -o - %s
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef uint uint2 __attribute__((ext_vector_type(2)));
|
||||
typedef uint uint4 __attribute__((ext_vector_type(4)));
|
||||
|
||||
kernel void builtins_amdgcn_bvh_err(global uint2* out, uint addr, uint data, uint4 data1, uint offset) {
|
||||
*out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, offset); // expected-error {{'__builtin_amdgcn_ds_bvh_stack_rtn' must be a constant integer}}
|
||||
}
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned long ulong;
|
||||
typedef uint uint2 __attribute__((ext_vector_type(2)));
|
||||
typedef uint uint4 __attribute__((ext_vector_type(4)));
|
||||
|
||||
// CHECK-LABEL: @test_s_sendmsg_rtn(
|
||||
// CHECK: call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 0)
|
||||
|
@ -18,3 +20,14 @@ void test_s_sendmsg_rtn(global uint* out) {
|
|||
void test_s_sendmsg_rtnl(global ulong* out) {
|
||||
*out = __builtin_amdgcn_s_sendmsg_rtnl(0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_ds_bvh_stack_rtn(
|
||||
// CHECK: %0 = tail call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32 %addr, i32 %data, <4 x i32> %data1, i32 128)
|
||||
// CHECK: %1 = extractvalue { i32, i32 } %0, 0
|
||||
// CHECK: %2 = extractvalue { i32, i32 } %0, 1
|
||||
// CHECK: %3 = insertelement <2 x i32> poison, i32 %1, i64 0
|
||||
// CHECK: %4 = insertelement <2 x i32> %3, i32 %2, i64 1
|
||||
void test_ds_bvh_stack_rtn(global uint2* out, uint addr, uint data, uint4 data1)
|
||||
{
|
||||
*out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, 128);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue