AMDGPU: Emit llvm.fshr for __builtin_amdgcn_alignbit

These are equivalent. The generic rotate builtins do not directly map
to the fshr intrinsic.
This commit is contained in:
Matt Arsenault 2020-03-19 21:40:58 -04:00 committed by Matt Arsenault
parent b20a1d840f
commit 3f533006ba
3 changed files with 10 additions and 3 deletions

View File

@ -13609,6 +13609,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
case AMDGPU::BI__builtin_r600_read_tidig_z:
return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
case AMDGPU::BI__builtin_amdgcn_alignbit: {
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
return Builder.CreateCall(F, { Src0, Src1, Src2 });
}
default:
return nullptr;
}

View File

@ -596,7 +596,7 @@ kernel void test_mbcnt_hi(global uint* out, uint src0, uint src1) {
}
// CHECK-LABEL: @test_alignbit(
// CHECK: tail call i32 @llvm.amdgcn.alignbit(i32 %src0, i32 %src1, i32 %src2)
// CHECK: tail call i32 @llvm.fshr.i32(i32 %src0, i32 %src1, i32 %src2)
kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_alignbit(src0, src1, src2);
}

View File

@ -1371,8 +1371,8 @@ def int_amdgcn_writelane :
[IntrNoMem, IntrConvergent]
>;
def int_amdgcn_alignbit :
GCCBuiltin<"__builtin_amdgcn_alignbit">, Intrinsic<[llvm_i32_ty],
// FIXME: Deprecated. This is equivalent to llvm.fshr
def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]
>;