forked from OSchip/llvm-project
AMDGPU: Emit llvm.fshr for __builtin_amdgcn_alignbit
These are equivalent. The generic rotate builtins do not directly map to the fshr intrinsic.
This commit is contained in:
parent
b20a1d840f
commit
3f533006ba
|
@ -13609,6 +13609,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
|
|||
return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
|
||||
case AMDGPU::BI__builtin_r600_read_tidig_z:
|
||||
return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
|
||||
case AMDGPU::BI__builtin_amdgcn_alignbit: {
|
||||
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
|
||||
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
|
||||
llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
|
||||
return Builder.CreateCall(F, { Src0, Src1, Src2 });
|
||||
}
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -596,7 +596,7 @@ kernel void test_mbcnt_hi(global uint* out, uint src0, uint src1) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: @test_alignbit(
|
||||
// CHECK: tail call i32 @llvm.amdgcn.alignbit(i32 %src0, i32 %src1, i32 %src2)
|
||||
// CHECK: tail call i32 @llvm.fshr.i32(i32 %src0, i32 %src1, i32 %src2)
|
||||
kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) {
|
||||
*out = __builtin_amdgcn_alignbit(src0, src1, src2);
|
||||
}
|
||||
|
|
|
@ -1371,8 +1371,8 @@ def int_amdgcn_writelane :
|
|||
[IntrNoMem, IntrConvergent]
|
||||
>;
|
||||
|
||||
def int_amdgcn_alignbit :
|
||||
GCCBuiltin<"__builtin_amdgcn_alignbit">, Intrinsic<[llvm_i32_ty],
|
||||
// FIXME: Deprecated. This is equivalent to llvm.fshr
|
||||
def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
|
Loading…
Reference in New Issue