forked from OSchip/llvm-project
[NVPTX] Upgrade NVVM intrinsics in InstCombineCalls.
Summary: There are many NVVM intrinsics that we can't entirely get rid of, but that nonetheless often correspond to target-generic LLVM intrinsics. For example, if flush denormals to zero (ftz) is enabled, we can convert @llvm.nvvm.ceil.ftz.f to @llvm.ceil.f32. On the other hand, if ftz is disabled, we can't do this, because @llvm.ceil.f32 will be lowered to a non-ftz PTX instruction. In this case, we can, however, simplify the non-ftz nvvm ceil intrinsic, @llvm.nvvm.ceil.f, to @llvm.ceil.f32. These transformations are particularly useful because they let us constant fold instructions that appear in libdevice, the bitcode library that ships with CUDA and essentially functions as its libm. Reviewers: tra Subscribers: hfinkel, majnemer, llvm-commits Differential Revision: https://reviews.llvm.org/D28794 llvm-svn: 293244
This commit is contained in:
parent
322c127bee
commit
698c31b8db
|
@ -1498,6 +1498,253 @@ static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert NVVM intrinsics to target-generic LLVM code where possible.
|
||||||
|
static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
|
||||||
|
// Each NVVM intrinsic we can simplify can be replaced with one of:
|
||||||
|
//
|
||||||
|
// * an LLVM intrinsic,
|
||||||
|
// * an LLVM cast operation,
|
||||||
|
// * an LLVM binary operation, or
|
||||||
|
// * ad-hoc LLVM IR for the particular operation.
|
||||||
|
|
||||||
|
// Some transformations are only valid when the module's
|
||||||
|
// flush-denormals-to-zero (ftz) setting is true/false, whereas other
|
||||||
|
// transformations are valid regardless of the module's ftz setting.
|
||||||
|
enum FtzRequirementTy {
|
||||||
|
FTZ_Any, // Any ftz setting is ok.
|
||||||
|
FTZ_MustBeOn, // Transformation is valid only if ftz is on.
|
||||||
|
FTZ_MustBeOff, // Transformation is valid only if ftz is off.
|
||||||
|
};
|
||||||
|
// Classes of NVVM intrinsics that can't be replaced one-to-one with a
|
||||||
|
// target-generic intrinsic, cast op, or binary op but that we can nonetheless
|
||||||
|
// simplify.
|
||||||
|
enum SpecialCase {
|
||||||
|
SPC_Reciprocal,
|
||||||
|
};
|
||||||
|
|
||||||
|
// SimplifyAction is a poor-man's variant (plus an additional flag) that
|
||||||
|
// represents how to replace an NVVM intrinsic with target-generic LLVM IR.
|
||||||
|
struct SimplifyAction {
|
||||||
|
// Invariant: At most one of these Optionals has a value.
|
||||||
|
Optional<Intrinsic::ID> IID;
|
||||||
|
Optional<Instruction::CastOps> CastOp;
|
||||||
|
Optional<Instruction::BinaryOps> BinaryOp;
|
||||||
|
Optional<SpecialCase> Special;
|
||||||
|
|
||||||
|
FtzRequirementTy FtzRequirement = FTZ_Any;
|
||||||
|
|
||||||
|
SimplifyAction() = default;
|
||||||
|
|
||||||
|
SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
|
||||||
|
: IID(IID), FtzRequirement(FtzReq) {}
|
||||||
|
|
||||||
|
// Cast operations don't have anything to do with FTZ, so we skip that
|
||||||
|
// argument.
|
||||||
|
SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
|
||||||
|
|
||||||
|
SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
|
||||||
|
: BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
|
||||||
|
|
||||||
|
SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
|
||||||
|
: Special(Special), FtzRequirement(FtzReq) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Try to generate a SimplifyAction describing how to replace our
|
||||||
|
// IntrinsicInstr with target-generic LLVM IR.
|
||||||
|
const SimplifyAction Action = [II]() -> SimplifyAction {
|
||||||
|
switch (II->getIntrinsicID()) {
|
||||||
|
|
||||||
|
// NVVM intrinsics that map directly to LLVM intrinsics.
|
||||||
|
case Intrinsic::nvvm_ceil_d:
|
||||||
|
return {Intrinsic::ceil, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_ceil_f:
|
||||||
|
return {Intrinsic::ceil, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_ceil_ftz_f:
|
||||||
|
return {Intrinsic::ceil, FTZ_MustBeOn};
|
||||||
|
case Intrinsic::nvvm_fabs_d:
|
||||||
|
return {Intrinsic::fabs, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_fabs_f:
|
||||||
|
return {Intrinsic::fabs, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_fabs_ftz_f:
|
||||||
|
return {Intrinsic::fabs, FTZ_MustBeOn};
|
||||||
|
case Intrinsic::nvvm_floor_d:
|
||||||
|
return {Intrinsic::floor, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_floor_f:
|
||||||
|
return {Intrinsic::floor, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_floor_ftz_f:
|
||||||
|
return {Intrinsic::floor, FTZ_MustBeOn};
|
||||||
|
case Intrinsic::nvvm_fma_rn_d:
|
||||||
|
return {Intrinsic::fma, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_fma_rn_f:
|
||||||
|
return {Intrinsic::fma, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_fma_rn_ftz_f:
|
||||||
|
return {Intrinsic::fma, FTZ_MustBeOn};
|
||||||
|
case Intrinsic::nvvm_fmax_d:
|
||||||
|
return {Intrinsic::maxnum, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_fmax_f:
|
||||||
|
return {Intrinsic::maxnum, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_fmax_ftz_f:
|
||||||
|
return {Intrinsic::maxnum, FTZ_MustBeOn};
|
||||||
|
case Intrinsic::nvvm_fmin_d:
|
||||||
|
return {Intrinsic::minnum, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_fmin_f:
|
||||||
|
return {Intrinsic::minnum, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_fmin_ftz_f:
|
||||||
|
return {Intrinsic::minnum, FTZ_MustBeOn};
|
||||||
|
case Intrinsic::nvvm_round_d:
|
||||||
|
return {Intrinsic::round, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_round_f:
|
||||||
|
return {Intrinsic::round, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_round_ftz_f:
|
||||||
|
return {Intrinsic::round, FTZ_MustBeOn};
|
||||||
|
case Intrinsic::nvvm_sqrt_rn_d:
|
||||||
|
return {Intrinsic::sqrt, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_sqrt_f:
|
||||||
|
// nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
|
||||||
|
// ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
|
||||||
|
// the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
|
||||||
|
// the versions with explicit ftz-ness.
|
||||||
|
return {Intrinsic::sqrt, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_sqrt_rn_f:
|
||||||
|
return {Intrinsic::sqrt, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_sqrt_rn_ftz_f:
|
||||||
|
return {Intrinsic::sqrt, FTZ_MustBeOn};
|
||||||
|
case Intrinsic::nvvm_trunc_d:
|
||||||
|
return {Intrinsic::trunc, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_trunc_f:
|
||||||
|
return {Intrinsic::trunc, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_trunc_ftz_f:
|
||||||
|
return {Intrinsic::trunc, FTZ_MustBeOn};
|
||||||
|
|
||||||
|
// NVVM intrinsics that map to LLVM cast operations.
|
||||||
|
//
|
||||||
|
// Note that llvm's target-generic conversion operators correspond to the rz
|
||||||
|
// (round to zero) versions of the nvvm conversion intrinsics, even though
|
||||||
|
// most everything else here uses the rn (round to nearest even) nvvm ops.
|
||||||
|
case Intrinsic::nvvm_d2i_rz:
|
||||||
|
case Intrinsic::nvvm_f2i_rz:
|
||||||
|
case Intrinsic::nvvm_d2ll_rz:
|
||||||
|
case Intrinsic::nvvm_f2ll_rz:
|
||||||
|
return {Instruction::FPToSI};
|
||||||
|
case Intrinsic::nvvm_d2ui_rz:
|
||||||
|
case Intrinsic::nvvm_f2ui_rz:
|
||||||
|
case Intrinsic::nvvm_d2ull_rz:
|
||||||
|
case Intrinsic::nvvm_f2ull_rz:
|
||||||
|
return {Instruction::FPToUI};
|
||||||
|
case Intrinsic::nvvm_i2d_rz:
|
||||||
|
case Intrinsic::nvvm_i2f_rz:
|
||||||
|
case Intrinsic::nvvm_ll2d_rz:
|
||||||
|
case Intrinsic::nvvm_ll2f_rz:
|
||||||
|
return {Instruction::SIToFP};
|
||||||
|
case Intrinsic::nvvm_ui2d_rz:
|
||||||
|
case Intrinsic::nvvm_ui2f_rz:
|
||||||
|
case Intrinsic::nvvm_ull2d_rz:
|
||||||
|
case Intrinsic::nvvm_ull2f_rz:
|
||||||
|
return {Instruction::UIToFP};
|
||||||
|
|
||||||
|
// NVVM intrinsics that map to LLVM binary ops.
|
||||||
|
case Intrinsic::nvvm_add_rn_d:
|
||||||
|
return {Instruction::FAdd, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_add_rn_f:
|
||||||
|
return {Instruction::FAdd, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_add_rn_ftz_f:
|
||||||
|
return {Instruction::FAdd, FTZ_MustBeOn};
|
||||||
|
case Intrinsic::nvvm_mul_rn_d:
|
||||||
|
return {Instruction::FMul, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_mul_rn_f:
|
||||||
|
return {Instruction::FMul, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_mul_rn_ftz_f:
|
||||||
|
return {Instruction::FMul, FTZ_MustBeOn};
|
||||||
|
case Intrinsic::nvvm_div_rn_d:
|
||||||
|
return {Instruction::FDiv, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_div_rn_f:
|
||||||
|
return {Instruction::FDiv, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_div_rn_ftz_f:
|
||||||
|
return {Instruction::FDiv, FTZ_MustBeOn};
|
||||||
|
|
||||||
|
// The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
|
||||||
|
// need special handling.
|
||||||
|
//
|
||||||
|
// We seem to be mising intrinsics for rcp.approx.{ftz.}f32, which is just
|
||||||
|
// as well.
|
||||||
|
case Intrinsic::nvvm_rcp_rn_d:
|
||||||
|
return {SPC_Reciprocal, FTZ_Any};
|
||||||
|
case Intrinsic::nvvm_rcp_rn_f:
|
||||||
|
return {SPC_Reciprocal, FTZ_MustBeOff};
|
||||||
|
case Intrinsic::nvvm_rcp_rn_ftz_f:
|
||||||
|
return {SPC_Reciprocal, FTZ_MustBeOn};
|
||||||
|
|
||||||
|
// We do not currently simplify intrinsics that give an approximate answer.
|
||||||
|
// These include:
|
||||||
|
//
|
||||||
|
// - nvvm_cos_approx_{f,ftz_f}
|
||||||
|
// - nvvm_ex2_approx_{d,f,ftz_f}
|
||||||
|
// - nvvm_lg2_approx_{d,f,ftz_f}
|
||||||
|
// - nvvm_sin_approx_{f,ftz_f}
|
||||||
|
// - nvvm_sqrt_approx_{f,ftz_f}
|
||||||
|
// - nvvm_rsqrt_approx_{d,f,ftz_f}
|
||||||
|
// - nvvm_div_approx_{ftz_d,ftz_f,f}
|
||||||
|
// - nvvm_rcp_approx_ftz_d
|
||||||
|
//
|
||||||
|
// Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
|
||||||
|
// means that fastmath is enabled in the intrinsic. Unfortunately only
|
||||||
|
// binary operators (currently) have a fastmath bit in SelectionDAG, so this
|
||||||
|
// information gets lost and we can't select on it.
|
||||||
|
//
|
||||||
|
// TODO: div and rcp are lowered to a binary op, so these we could in theory
|
||||||
|
// lower them to "fast fdiv".
|
||||||
|
|
||||||
|
default:
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
|
// If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
|
||||||
|
// can bail out now. (Notice that in the case that IID is not an NVVM
|
||||||
|
// intrinsic, we don't have to look up any module metadata, as
|
||||||
|
// FtzRequirementTy will be FTZ_Any.)
|
||||||
|
if (Action.FtzRequirement != FTZ_Any) {
|
||||||
|
bool FtzEnabled =
|
||||||
|
II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() ==
|
||||||
|
"true";
|
||||||
|
|
||||||
|
if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simplify to target-generic intrinsic.
|
||||||
|
if (Action.IID) {
|
||||||
|
SmallVector<Value *, 4> Args(II->arg_operands());
|
||||||
|
// All the target-generic intrinsics currently of interest to us have one
|
||||||
|
// type argument, equal to that of the nvvm intrinsic's argument.
|
||||||
|
ArrayRef<Type *> Tys = {II->getArgOperand(0)->getType()};
|
||||||
|
return CallInst::Create(
|
||||||
|
Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simplify to target-generic binary op.
|
||||||
|
if (Action.BinaryOp)
|
||||||
|
return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
|
||||||
|
II->getArgOperand(1), II->getName());
|
||||||
|
|
||||||
|
// Simplify to target-generic cast op.
|
||||||
|
if (Action.CastOp)
|
||||||
|
return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
|
||||||
|
II->getName());
|
||||||
|
|
||||||
|
// All that's left are the special cases.
|
||||||
|
if (!Action.Special)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
switch (*Action.Special) {
|
||||||
|
case SPC_Reciprocal:
|
||||||
|
// Simplify reciprocal.
|
||||||
|
return BinaryOperator::Create(
|
||||||
|
Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
|
||||||
|
II->getArgOperand(0), II->getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Instruction *InstCombiner::visitVAStartInst(VAStartInst &I) {
|
Instruction *InstCombiner::visitVAStartInst(VAStartInst &I) {
|
||||||
removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
|
removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -1587,6 +1834,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||||
if (Changed) return II;
|
if (Changed) return II;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
|
||||||
|
return I;
|
||||||
|
|
||||||
auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
|
auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
|
||||||
unsigned DemandedWidth) {
|
unsigned DemandedWidth) {
|
||||||
APInt UndefElts(Width, 0);
|
APInt UndefElts(Width, 0);
|
||||||
|
|
|
@ -0,0 +1,471 @@
|
||||||
|
; Check that nvvm intrinsics get simplified to target-generic intrinsics where
|
||||||
|
; possible.
|
||||||
|
;
|
||||||
|
; We run this test twice; once with ftz on, and again with ftz off. Behold the
|
||||||
|
; hackery:
|
||||||
|
|
||||||
|
; RUN: cat %s > %t.ftz
|
||||||
|
; RUN: echo 'attributes #0 = { "nvptx-f32ftz" = "true" }' >> %t.ftz
|
||||||
|
; RUN: opt < %t.ftz -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ
|
||||||
|
|
||||||
|
; RUN: cat %s > %t.noftz
|
||||||
|
; RUN: echo 'attributes #0 = { "nvptx-f32ftz" = "false" }' >> %t.noftz
|
||||||
|
; RUN: opt < %t.noftz -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ
|
||||||
|
|
||||||
|
; We handle nvvm intrinsics with ftz variants as follows:
|
||||||
|
; - If the module is in ftz mode, the ftz variant is transformed into the
|
||||||
|
; regular llvm intrinsic, and the non-ftz variant is left alone.
|
||||||
|
; - If the module is not in ftz mode, it's the reverse: Only the non-ftz
|
||||||
|
; variant is transformed, and the ftz variant is left alone.
|
||||||
|
|
||||||
|
; Check NVVM intrinsics that map directly to LLVM target-generic intrinsics.
|
||||||
|
|
||||||
|
; CHECK-LABEL: @ceil_double
|
||||||
|
define double @ceil_double(double %a) #0 {
|
||||||
|
; CHECK: call double @llvm.ceil.f64
|
||||||
|
%ret = call double @llvm.nvvm.ceil.d(double %a)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @ceil_float
|
||||||
|
define float @ceil_float(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.ceil.f32
|
||||||
|
; FTZ: call float @llvm.nvvm.ceil.f
|
||||||
|
%ret = call float @llvm.nvvm.ceil.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @ceil_float_ftz
|
||||||
|
define float @ceil_float_ftz(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.ceil.ftz.f
|
||||||
|
; FTZ: call float @llvm.ceil.f32
|
||||||
|
%ret = call float @llvm.nvvm.ceil.ftz.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @fabs_double
|
||||||
|
define double @fabs_double(double %a) #0 {
|
||||||
|
; CHECK: call double @llvm.fabs.f64
|
||||||
|
%ret = call double @llvm.nvvm.fabs.d(double %a)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @fabs_float
|
||||||
|
define float @fabs_float(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.fabs.f32
|
||||||
|
; FTZ: call float @llvm.nvvm.fabs.f
|
||||||
|
%ret = call float @llvm.nvvm.fabs.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @fabs_float_ftz
|
||||||
|
define float @fabs_float_ftz(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.fabs.ftz.f
|
||||||
|
; FTZ: call float @llvm.fabs.f32
|
||||||
|
%ret = call float @llvm.nvvm.fabs.ftz.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @floor_double
|
||||||
|
define double @floor_double(double %a) #0 {
|
||||||
|
; CHECK: call double @llvm.floor.f64
|
||||||
|
%ret = call double @llvm.nvvm.floor.d(double %a)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @floor_float
|
||||||
|
define float @floor_float(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.floor.f32
|
||||||
|
; FTZ: call float @llvm.nvvm.floor.f
|
||||||
|
%ret = call float @llvm.nvvm.floor.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @floor_float_ftz
|
||||||
|
define float @floor_float_ftz(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.floor.ftz.f
|
||||||
|
; FTZ: call float @llvm.floor.f32
|
||||||
|
%ret = call float @llvm.nvvm.floor.ftz.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @fma_double
|
||||||
|
define double @fma_double(double %a, double %b, double %c) #0 {
|
||||||
|
; CHECK: call double @llvm.fma.f64
|
||||||
|
%ret = call double @llvm.nvvm.fma.rn.d(double %a, double %b, double %c)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @fma_float
|
||||||
|
define float @fma_float(float %a, float %b, float %c) #0 {
|
||||||
|
; NOFTZ: call float @llvm.fma.f32
|
||||||
|
; FTZ: call float @llvm.nvvm.fma.rn.f
|
||||||
|
%ret = call float @llvm.nvvm.fma.rn.f(float %a, float %b, float %c)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @fma_float_ftz
|
||||||
|
define float @fma_float_ftz(float %a, float %b, float %c) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.fma.rn.ftz.f
|
||||||
|
; FTZ: call float @llvm.fma.f32
|
||||||
|
%ret = call float @llvm.nvvm.fma.rn.ftz.f(float %a, float %b, float %c)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @fmax_double
|
||||||
|
define double @fmax_double(double %a, double %b) #0 {
|
||||||
|
; CHECK: call double @llvm.maxnum.f64
|
||||||
|
%ret = call double @llvm.nvvm.fmax.d(double %a, double %b)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @fmax_float
|
||||||
|
define float @fmax_float(float %a, float %b) #0 {
|
||||||
|
; NOFTZ: call float @llvm.maxnum.f32
|
||||||
|
; FTZ: call float @llvm.nvvm.fmax.f
|
||||||
|
%ret = call float @llvm.nvvm.fmax.f(float %a, float %b)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @fmax_float_ftz
|
||||||
|
define float @fmax_float_ftz(float %a, float %b) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.fmax.ftz.f
|
||||||
|
; FTZ: call float @llvm.maxnum.f32
|
||||||
|
%ret = call float @llvm.nvvm.fmax.ftz.f(float %a, float %b)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @fmin_double
|
||||||
|
define double @fmin_double(double %a, double %b) #0 {
|
||||||
|
; CHECK: call double @llvm.minnum.f64
|
||||||
|
%ret = call double @llvm.nvvm.fmin.d(double %a, double %b)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @fmin_float
|
||||||
|
define float @fmin_float(float %a, float %b) #0 {
|
||||||
|
; NOFTZ: call float @llvm.minnum.f32
|
||||||
|
; FTZ: call float @llvm.nvvm.fmin.f
|
||||||
|
%ret = call float @llvm.nvvm.fmin.f(float %a, float %b)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @fmin_float_ftz
|
||||||
|
define float @fmin_float_ftz(float %a, float %b) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.fmin.ftz.f
|
||||||
|
; FTZ: call float @llvm.minnum.f32
|
||||||
|
%ret = call float @llvm.nvvm.fmin.ftz.f(float %a, float %b)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @round_double
|
||||||
|
define double @round_double(double %a) #0 {
|
||||||
|
; CHECK: call double @llvm.round.f64
|
||||||
|
%ret = call double @llvm.nvvm.round.d(double %a)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @round_float
|
||||||
|
define float @round_float(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.round.f32
|
||||||
|
; FTZ: call float @llvm.nvvm.round.f
|
||||||
|
%ret = call float @llvm.nvvm.round.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @round_float_ftz
|
||||||
|
define float @round_float_ftz(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.round.ftz.f
|
||||||
|
; FTZ: call float @llvm.round.f32
|
||||||
|
%ret = call float @llvm.nvvm.round.ftz.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @trunc_double
|
||||||
|
define double @trunc_double(double %a) #0 {
|
||||||
|
; CHECK: call double @llvm.trunc.f64
|
||||||
|
%ret = call double @llvm.nvvm.trunc.d(double %a)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @trunc_float
|
||||||
|
define float @trunc_float(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.trunc.f32
|
||||||
|
; FTZ: call float @llvm.nvvm.trunc.f
|
||||||
|
%ret = call float @llvm.nvvm.trunc.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @trunc_float_ftz
|
||||||
|
define float @trunc_float_ftz(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.trunc.ftz.f
|
||||||
|
; FTZ: call float @llvm.trunc.f32
|
||||||
|
%ret = call float @llvm.nvvm.trunc.ftz.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check NVVM intrinsics that correspond to LLVM cast operations.
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test_d2i
|
||||||
|
define i32 @test_d2i(double %a) #0 {
|
||||||
|
; CHECK: fptosi double %a to i32
|
||||||
|
%ret = call i32 @llvm.nvvm.d2i.rz(double %a)
|
||||||
|
ret i32 %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_f2i
|
||||||
|
define i32 @test_f2i(float %a) #0 {
|
||||||
|
; CHECK: fptosi float %a to i32
|
||||||
|
%ret = call i32 @llvm.nvvm.f2i.rz(float %a)
|
||||||
|
ret i32 %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_d2ll
|
||||||
|
define i64 @test_d2ll(double %a) #0 {
|
||||||
|
; CHECK: fptosi double %a to i64
|
||||||
|
%ret = call i64 @llvm.nvvm.d2ll.rz(double %a)
|
||||||
|
ret i64 %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_f2ll
|
||||||
|
define i64 @test_f2ll(float %a) #0 {
|
||||||
|
; CHECK: fptosi float %a to i64
|
||||||
|
%ret = call i64 @llvm.nvvm.f2ll.rz(float %a)
|
||||||
|
ret i64 %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_d2ui
|
||||||
|
define i32 @test_d2ui(double %a) #0 {
|
||||||
|
; CHECK: fptoui double %a to i32
|
||||||
|
%ret = call i32 @llvm.nvvm.d2ui.rz(double %a)
|
||||||
|
ret i32 %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_f2ui
|
||||||
|
define i32 @test_f2ui(float %a) #0 {
|
||||||
|
; CHECK: fptoui float %a to i32
|
||||||
|
%ret = call i32 @llvm.nvvm.f2ui.rz(float %a)
|
||||||
|
ret i32 %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_d2ull
|
||||||
|
define i64 @test_d2ull(double %a) #0 {
|
||||||
|
; CHECK: fptoui double %a to i64
|
||||||
|
%ret = call i64 @llvm.nvvm.d2ull.rz(double %a)
|
||||||
|
ret i64 %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_f2ull
|
||||||
|
define i64 @test_f2ull(float %a) #0 {
|
||||||
|
; CHECK: fptoui float %a to i64
|
||||||
|
%ret = call i64 @llvm.nvvm.f2ull.rz(float %a)
|
||||||
|
ret i64 %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test_i2d
|
||||||
|
define double @test_i2d(i32 %a) #0 {
|
||||||
|
; CHECK: sitofp i32 %a to double
|
||||||
|
%ret = call double @llvm.nvvm.i2d.rz(i32 %a)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_i2f
|
||||||
|
define float @test_i2f(i32 %a) #0 {
|
||||||
|
; CHECK: sitofp i32 %a to float
|
||||||
|
%ret = call float @llvm.nvvm.i2f.rz(i32 %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_ll2d
|
||||||
|
define double @test_ll2d(i64 %a) #0 {
|
||||||
|
; CHECK: sitofp i64 %a to double
|
||||||
|
%ret = call double @llvm.nvvm.ll2d.rz(i64 %a)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_ll2f
|
||||||
|
define float @test_ll2f(i64 %a) #0 {
|
||||||
|
; CHECK: sitofp i64 %a to float
|
||||||
|
%ret = call float @llvm.nvvm.ll2f.rz(i64 %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_ui2d
|
||||||
|
define double @test_ui2d(i32 %a) #0 {
|
||||||
|
; CHECK: uitofp i32 %a to double
|
||||||
|
%ret = call double @llvm.nvvm.ui2d.rz(i32 %a)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_ui2f
|
||||||
|
define float @test_ui2f(i32 %a) #0 {
|
||||||
|
; CHECK: uitofp i32 %a to float
|
||||||
|
%ret = call float @llvm.nvvm.ui2f.rz(i32 %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_ull2d
|
||||||
|
define double @test_ull2d(i64 %a) #0 {
|
||||||
|
; CHECK: uitofp i64 %a to double
|
||||||
|
%ret = call double @llvm.nvvm.ull2d.rz(i64 %a)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_ull2f
|
||||||
|
define float @test_ull2f(i64 %a) #0 {
|
||||||
|
; CHECK: uitofp i64 %a to float
|
||||||
|
%ret = call float @llvm.nvvm.ull2f.rz(i64 %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check NVVM intrinsics that map to LLVM binary operations.
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test_add_rn_d
|
||||||
|
define double @test_add_rn_d(double %a, double %b) #0 {
|
||||||
|
; CHECK: fadd
|
||||||
|
%ret = call double @llvm.nvvm.add.rn.d(double %a, double %b)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_add_rn_f
|
||||||
|
define float @test_add_rn_f(float %a, float %b) #0 {
|
||||||
|
; NOFTZ: fadd
|
||||||
|
; FTZ: call float @llvm.nvvm.add.rn.f
|
||||||
|
%ret = call float @llvm.nvvm.add.rn.f(float %a, float %b)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_add_rn_f_ftz
|
||||||
|
define float @test_add_rn_f_ftz(float %a, float %b) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.add.rn.f
|
||||||
|
; FTZ: fadd
|
||||||
|
%ret = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test_mul_rn_d
|
||||||
|
define double @test_mul_rn_d(double %a, double %b) #0 {
|
||||||
|
; CHECK: fmul
|
||||||
|
%ret = call double @llvm.nvvm.mul.rn.d(double %a, double %b)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_mul_rn_f
|
||||||
|
define float @test_mul_rn_f(float %a, float %b) #0 {
|
||||||
|
; NOFTZ: fmul
|
||||||
|
; FTZ: call float @llvm.nvvm.mul.rn.f
|
||||||
|
%ret = call float @llvm.nvvm.mul.rn.f(float %a, float %b)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_mul_rn_f_ftz
|
||||||
|
define float @test_mul_rn_f_ftz(float %a, float %b) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.mul.rn.f
|
||||||
|
; FTZ: fmul
|
||||||
|
%ret = call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test_div_rn_d
|
||||||
|
define double @test_div_rn_d(double %a, double %b) #0 {
|
||||||
|
; CHECK: fdiv
|
||||||
|
%ret = call double @llvm.nvvm.div.rn.d(double %a, double %b)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_div_rn_f
|
||||||
|
define float @test_div_rn_f(float %a, float %b) #0 {
|
||||||
|
; NOFTZ: fdiv
|
||||||
|
; FTZ: call float @llvm.nvvm.div.rn.f
|
||||||
|
%ret = call float @llvm.nvvm.div.rn.f(float %a, float %b)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_div_rn_f_ftz
|
||||||
|
define float @test_div_rn_f_ftz(float %a, float %b) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.div.rn.f
|
||||||
|
; FTZ: fdiv
|
||||||
|
%ret = call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check NVVM intrinsics that require us to emit custom IR.
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test_rcp_rn_f
|
||||||
|
define float @test_rcp_rn_f(float %a) #0 {
|
||||||
|
; NOFTZ: fdiv float 1.0{{.*}} %a
|
||||||
|
; FTZ: call float @llvm.nvvm.rcp.rn.f
|
||||||
|
%ret = call float @llvm.nvvm.rcp.rn.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_rcp_rn_f_ftz
|
||||||
|
define float @test_rcp_rn_f_ftz(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.rcp.rn.f
|
||||||
|
; FTZ: fdiv float 1.0{{.*}} %a
|
||||||
|
%ret = call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test_sqrt_rn_d
|
||||||
|
define double @test_sqrt_rn_d(double %a) #0 {
|
||||||
|
; CHECK: call double @llvm.sqrt.f64(double %a)
|
||||||
|
%ret = call double @llvm.nvvm.sqrt.rn.d(double %a)
|
||||||
|
ret double %ret
|
||||||
|
}
|
||||||
|
; nvvm.sqrt.f is a special case: It goes to a llvm.sqrt.f
|
||||||
|
; CHECK-LABEL: @test_sqrt_f
|
||||||
|
define float @test_sqrt_f(float %a) #0 {
|
||||||
|
; CHECK: call float @llvm.sqrt.f32(float %a)
|
||||||
|
%ret = call float @llvm.nvvm.sqrt.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_sqrt_rn_f
|
||||||
|
define float @test_sqrt_rn_f(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.sqrt.f32(float %a)
|
||||||
|
; FTZ: call float @llvm.nvvm.sqrt.rn.f
|
||||||
|
%ret = call float @llvm.nvvm.sqrt.rn.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: @test_sqrt_rn_f_ftz
|
||||||
|
define float @test_sqrt_rn_f_ftz(float %a) #0 {
|
||||||
|
; NOFTZ: call float @llvm.nvvm.sqrt.rn.f
|
||||||
|
; FTZ: call float @llvm.sqrt.f32(float %a)
|
||||||
|
%ret = call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
declare double @llvm.nvvm.add.rn.d(double, double)
|
||||||
|
declare float @llvm.nvvm.add.rn.f(float, float)
|
||||||
|
declare float @llvm.nvvm.add.rn.ftz.f(float, float)
|
||||||
|
declare double @llvm.nvvm.ceil.d(double)
|
||||||
|
declare float @llvm.nvvm.ceil.f(float)
|
||||||
|
declare float @llvm.nvvm.ceil.ftz.f(float)
|
||||||
|
declare float @llvm.nvvm.d2f.rm(double)
|
||||||
|
declare float @llvm.nvvm.d2f.rm.ftz(double)
|
||||||
|
declare float @llvm.nvvm.d2f.rp(double)
|
||||||
|
declare float @llvm.nvvm.d2f.rp.ftz(double)
|
||||||
|
declare float @llvm.nvvm.d2f.rz(double)
|
||||||
|
declare float @llvm.nvvm.d2f.rz.ftz(double)
|
||||||
|
declare i32 @llvm.nvvm.d2i.rz(double)
|
||||||
|
declare i64 @llvm.nvvm.d2ll.rz(double)
|
||||||
|
declare i32 @llvm.nvvm.d2ui.rz(double)
|
||||||
|
declare i64 @llvm.nvvm.d2ull.rz(double)
|
||||||
|
declare double @llvm.nvvm.div.rn.d(double, double)
|
||||||
|
declare float @llvm.nvvm.div.rn.f(float, float)
|
||||||
|
declare float @llvm.nvvm.div.rn.ftz.f(float, float)
|
||||||
|
declare i16 @llvm.nvvm.f2h.rz(float)
|
||||||
|
declare i16 @llvm.nvvm.f2h.rz.ftz(float)
|
||||||
|
declare i32 @llvm.nvvm.f2i.rz(float)
|
||||||
|
declare i32 @llvm.nvvm.f2i.rz.ftz(float)
|
||||||
|
declare i64 @llvm.nvvm.f2ll.rz(float)
|
||||||
|
declare i64 @llvm.nvvm.f2ll.rz.ftz(float)
|
||||||
|
declare i32 @llvm.nvvm.f2ui.rz(float)
|
||||||
|
declare i32 @llvm.nvvm.f2ui.rz.ftz(float)
|
||||||
|
declare i64 @llvm.nvvm.f2ull.rz(float)
|
||||||
|
declare i64 @llvm.nvvm.f2ull.rz.ftz(float)
|
||||||
|
declare double @llvm.nvvm.fabs.d(double)
|
||||||
|
declare float @llvm.nvvm.fabs.f(float)
|
||||||
|
declare float @llvm.nvvm.fabs.ftz.f(float)
|
||||||
|
declare double @llvm.nvvm.floor.d(double)
|
||||||
|
declare float @llvm.nvvm.floor.f(float)
|
||||||
|
declare float @llvm.nvvm.floor.ftz.f(float)
|
||||||
|
declare double @llvm.nvvm.fma.rn.d(double, double, double)
|
||||||
|
declare float @llvm.nvvm.fma.rn.f(float, float, float)
|
||||||
|
declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float)
|
||||||
|
declare double @llvm.nvvm.fmax.d(double, double)
|
||||||
|
declare float @llvm.nvvm.fmax.f(float, float)
|
||||||
|
declare float @llvm.nvvm.fmax.ftz.f(float, float)
|
||||||
|
declare double @llvm.nvvm.fmin.d(double, double)
|
||||||
|
declare float @llvm.nvvm.fmin.f(float, float)
|
||||||
|
declare float @llvm.nvvm.fmin.ftz.f(float, float)
|
||||||
|
declare double @llvm.nvvm.i2d.rz(i32)
|
||||||
|
declare float @llvm.nvvm.i2f.rz(i32)
|
||||||
|
declare double @llvm.nvvm.ll2d.rz(i64)
|
||||||
|
declare float @llvm.nvvm.ll2f.rz(i64)
|
||||||
|
declare double @llvm.nvvm.lohi.i2d(i32, i32)
|
||||||
|
declare double @llvm.nvvm.mul.rn.d(double, double)
|
||||||
|
declare float @llvm.nvvm.mul.rn.f(float, float)
|
||||||
|
declare float @llvm.nvvm.mul.rn.ftz.f(float, float)
|
||||||
|
declare double @llvm.nvvm.rcp.rm.d(double)
|
||||||
|
declare double @llvm.nvvm.rcp.rn.d(double)
|
||||||
|
declare float @llvm.nvvm.rcp.rn.f(float)
|
||||||
|
declare float @llvm.nvvm.rcp.rn.ftz.f(float)
|
||||||
|
declare double @llvm.nvvm.round.d(double)
|
||||||
|
declare float @llvm.nvvm.round.f(float)
|
||||||
|
declare float @llvm.nvvm.round.ftz.f(float)
|
||||||
|
declare float @llvm.nvvm.sqrt.f(float)
|
||||||
|
declare double @llvm.nvvm.sqrt.rn.d(double)
|
||||||
|
declare float @llvm.nvvm.sqrt.rn.f(float)
|
||||||
|
declare float @llvm.nvvm.sqrt.rn.ftz.f(float)
|
||||||
|
declare double @llvm.nvvm.trunc.d(double)
|
||||||
|
declare float @llvm.nvvm.trunc.f(float)
|
||||||
|
declare float @llvm.nvvm.trunc.ftz.f(float)
|
||||||
|
declare double @llvm.nvvm.ui2d.rz(i32)
|
||||||
|
declare float @llvm.nvvm.ui2f.rn(i32)
|
||||||
|
declare float @llvm.nvvm.ui2f.rz(i32)
|
||||||
|
declare double @llvm.nvvm.ull2d.rz(i64)
|
||||||
|
declare float @llvm.nvvm.ull2f.rz(i64)
|
Loading…
Reference in New Issue