forked from OSchip/llvm-project
AMDGPU: Basic folds for fmed3 intrinsic
Constant fold, canonicalize constants to RHS, reduce to minnum/maxnum when inputs are nan/undef. llvm-svn: 296409
This commit is contained in:
parent
65da45763e
commit
cdb468c0f9
|
@ -560,6 +560,22 @@ public:
|
|||
Type *ResultType,
|
||||
const Twine &Name = "");
|
||||
|
||||
/// Create a call to intrinsic \p ID with 2 operands which is mangled on the
|
||||
/// first type.
|
||||
CallInst *CreateBinaryIntrinsic(Intrinsic::ID ID,
|
||||
Value *LHS, Value *RHS,
|
||||
const Twine &Name = "");
|
||||
|
||||
/// Create call to the minnum intrinsic.
|
||||
CallInst *CreateMinNum(Value *LHS, Value *RHS, const Twine &Name = "") {
|
||||
return CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS, Name);
|
||||
}
|
||||
|
||||
/// Create call to the maxnum intrinsic.
|
||||
CallInst *CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name = "") {
|
||||
return CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS, Name);
|
||||
}
|
||||
|
||||
private:
|
||||
/// \brief Create a call to a masked intrinsic with given Id.
|
||||
CallInst *CreateMaskedIntrinsic(Intrinsic::ID Id, ArrayRef<Value *> Ops,
|
||||
|
|
|
@ -157,6 +157,19 @@ inline match_combine_or<match_zero, match_neg_zero> m_AnyZero() {
|
|||
return m_CombineOr(m_Zero(), m_NegZero());
|
||||
}
|
||||
|
||||
struct match_nan {
|
||||
template <typename ITy> bool match(ITy *V) {
|
||||
if (const auto *C = dyn_cast<ConstantFP>(V)) {
|
||||
const APFloat &APF = C->getValueAPF();
|
||||
return APF.isNaN();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
|
||||
inline match_nan m_NaN() { return match_nan(); }
|
||||
|
||||
struct apint_match {
|
||||
const APInt *&Res;
|
||||
apint_match(const APInt *&R) : Res(R) {}
|
||||
|
|
|
@ -482,3 +482,11 @@ CallInst *IRBuilderBase::CreateGCRelocate(Instruction *Statepoint,
|
|||
getInt32(DerivedOffset)};
|
||||
return createCallHelper(FnGCRelocate, Args, this, Name);
|
||||
}
|
||||
|
||||
CallInst *IRBuilderBase::CreateBinaryIntrinsic(Intrinsic::ID ID,
|
||||
Value *LHS, Value *RHS,
|
||||
const Twine &Name) {
|
||||
Module *M = BB->getParent()->getParent();
|
||||
Function *Fn = Intrinsic::getDeclaration(M, ID, { LHS->getType() });
|
||||
return createCallHelper(Fn, { LHS, RHS }, this, Name);
|
||||
}
|
||||
|
|
|
@ -1533,6 +1533,27 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
|
||||
//
|
||||
// A single NaN input is folded to minnum, so we rely on that folding for
|
||||
// handling NaNs.
|
||||
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
|
||||
const APFloat &Src2) {
|
||||
APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
|
||||
|
||||
APFloat::cmpResult Cmp0 = Max3.compare(Src0);
|
||||
assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
|
||||
if (Cmp0 == APFloat::cmpEqual)
|
||||
return maxnum(Src1, Src2);
|
||||
|
||||
APFloat::cmpResult Cmp1 = Max3.compare(Src1);
|
||||
assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
|
||||
if (Cmp1 == APFloat::cmpEqual)
|
||||
return maxnum(Src0, Src2);
|
||||
|
||||
return maxnum(Src0, Src1);
|
||||
}
|
||||
|
||||
// Returns true iff the 2 intrinsics have the same operands, limiting the
|
||||
// comparison to the first NumOperands.
|
||||
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
|
||||
|
@ -3331,6 +3352,61 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
return II;
|
||||
|
||||
break;
|
||||
|
||||
}
|
||||
case Intrinsic::amdgcn_fmed3: {
|
||||
// Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
|
||||
// for the shader.
|
||||
|
||||
Value *Src0 = II->getArgOperand(0);
|
||||
Value *Src1 = II->getArgOperand(1);
|
||||
Value *Src2 = II->getArgOperand(2);
|
||||
|
||||
bool Swap = false;
|
||||
// Canonicalize constants to RHS operands.
|
||||
//
|
||||
// fmed3(c0, x, c1) -> fmed3(x, c0, c1)
|
||||
if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
|
||||
std::swap(Src0, Src1);
|
||||
Swap = true;
|
||||
}
|
||||
|
||||
if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
|
||||
std::swap(Src1, Src2);
|
||||
Swap = true;
|
||||
}
|
||||
|
||||
if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
|
||||
std::swap(Src0, Src1);
|
||||
Swap = true;
|
||||
}
|
||||
|
||||
if (Swap) {
|
||||
II->setArgOperand(0, Src0);
|
||||
II->setArgOperand(1, Src1);
|
||||
II->setArgOperand(2, Src2);
|
||||
return II;
|
||||
}
|
||||
|
||||
if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
|
||||
CallInst *NewCall = Builder->CreateMinNum(Src0, Src1);
|
||||
NewCall->copyFastMathFlags(II);
|
||||
NewCall->takeName(II);
|
||||
return replaceInstUsesWith(*II, NewCall);
|
||||
}
|
||||
|
||||
if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
|
||||
if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
|
||||
if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
|
||||
APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
|
||||
C2->getValueAPF());
|
||||
return replaceInstUsesWith(*II,
|
||||
ConstantFP::get(Builder->getContext(), Result));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case Intrinsic::stackrestore: {
|
||||
// If the save is right next to the restore, remove the restore. This can
|
||||
|
|
|
@ -1025,3 +1025,185 @@ define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw)
|
|||
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; llvm.amdgcn.fmed3
|
||||
; --------------------------------------------------------------------
|
||||
|
||||
declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: @fmed3_f32(
|
||||
; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
|
||||
define float @fmed3_f32(float %x, float %y, float %z) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32(
|
||||
; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
|
||||
define float @fmed3_canonicalize_x_c0_c1_f32(float %x) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32(
|
||||
; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
|
||||
define float @fmed3_canonicalize_c0_x_c1_f32(float %x) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32(
|
||||
; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
|
||||
define float @fmed3_canonicalize_c0_c1_x_f32(float %x) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32(
|
||||
; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
|
||||
define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32(
|
||||
; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
|
||||
define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32(
|
||||
; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
|
||||
define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_undef_x_y_f32(
|
||||
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
|
||||
define float @fmed3_undef_x_y_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32(
|
||||
; CHECK: call nnan float @llvm.minnum.f32(float %x, float %y)
|
||||
define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) {
|
||||
%med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_x_undef_y_f32(
|
||||
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
|
||||
define float @fmed3_x_undef_y_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_x_y_undef_f32(
|
||||
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
|
||||
define float @fmed3_x_y_undef_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_qnan0_x_y_f32(
|
||||
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
|
||||
define float @fmed3_qnan0_x_y_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_x_qnan0_y_f32(
|
||||
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
|
||||
define float @fmed3_x_qnan0_y_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_x_y_qnan0_f32(
|
||||
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
|
||||
define float @fmed3_x_y_qnan0_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_qnan1_x_y_f32(
|
||||
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
|
||||
define float @fmed3_qnan1_x_y_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; This can return any of the qnans.
|
||||
; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32(
|
||||
; CHECK: ret float 0x7FF8002000000000
|
||||
define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_constant_src0_0_f32(
|
||||
; CHECK: ret float 5.000000e-01
|
||||
define float @fmed3_constant_src0_0_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_constant_src0_1_f32(
|
||||
; CHECK: ret float 5.000000e-01
|
||||
define float @fmed3_constant_src0_1_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_constant_src1_0_f32(
|
||||
; CHECK: ret float 5.000000e-01
|
||||
define float @fmed3_constant_src1_0_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_constant_src1_1_f32(
|
||||
; CHECK: ret float 5.000000e-01
|
||||
define float @fmed3_constant_src1_1_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_constant_src2_0_f32(
|
||||
; CHECK: ret float 5.000000e-01
|
||||
define float @fmed3_constant_src2_0_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_constant_src2_1_f32(
|
||||
; CHECK: ret float 5.000000e-01
|
||||
define float @fmed3_constant_src2_1_f32(float %x, float %y) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32(
|
||||
; CHECK: ret float %x
|
||||
define float @fmed3_x_qnan0_qnan1_f32(float %x) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32(
|
||||
; CHECK: ret float %x
|
||||
define float @fmed3_qnan0_x_qnan1_f32(float %x) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
|
||||
ret float %med3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32(
|
||||
; CHECK: ret float %x
|
||||
define float @fmed3_qnan0_qnan1_x_f32(float %x) {
|
||||
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
|
||||
ret float %med3
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue