forked from OSchip/llvm-project
DAG: Enhance isKnownNeverNaN
Add a parameter for testing specifically for sNaNs - at least one instruction pattern on AMDGPU needs to check specifically for this. Also handle more cases, and add a target hook for custom nodes, similar to the hooks for known bits. llvm-svn: 338910
This commit is contained in:
parent
ed69e1bc98
commit
c3dc8e65e2
|
@ -1484,8 +1484,15 @@ public:
|
|||
/// X|Cst == X+Cst iff X&Cst = 0.
|
||||
bool isBaseWithConstantOffset(SDValue Op) const;
|
||||
|
||||
/// Test whether the given SDValue is known to never be NaN.
|
||||
bool isKnownNeverNaN(SDValue Op) const;
|
||||
/// Test whether the given SDValue is known to never be NaN. If \p SNaN is
|
||||
/// true, returns if \p Op is known to never be a signaling NaN (it may still
|
||||
/// be a qNaN).
|
||||
bool isKnownNeverNaN(SDValue Op, bool SNaN = false, unsigned Depth = 0) const;
|
||||
|
||||
/// \returns true if \p Op is known to never be a signaling NaN.
|
||||
bool isKnownNeverSNaN(SDValue Op, unsigned Depth = 0) const {
|
||||
return isKnownNeverNaN(Op, true, Depth);
|
||||
}
|
||||
|
||||
/// Test whether the given floating point SDValue is known to never be
|
||||
/// positive or negative zero.
|
||||
|
|
|
@ -2868,6 +2868,13 @@ public:
|
|||
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
|
||||
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
|
||||
|
||||
/// If \p SNaN is false, \returns true if \p Op is known to never be any
|
||||
/// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
|
||||
/// NaN.
|
||||
virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
|
||||
const SelectionDAG &DAG,
|
||||
bool SNaN = false,
|
||||
unsigned Depth = 0) const;
|
||||
struct DAGCombinerInfo {
|
||||
void *DC; // The DAG Combiner object.
|
||||
CombineLevel Level;
|
||||
|
|
|
@ -3622,21 +3622,102 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
|
||||
bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
|
||||
// If we're told that NaNs won't happen, assume they won't.
|
||||
if (getTarget().Options.NoNaNsFPMath)
|
||||
if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
|
||||
return true;
|
||||
|
||||
if (Op->getFlags().hasNoNaNs())
|
||||
return true;
|
||||
if (Depth == 6)
|
||||
return false; // Limit search depth.
|
||||
|
||||
// TODO: Handle vectors.
|
||||
// If the value is a constant, we can obviously see if it is a NaN or not.
|
||||
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
|
||||
return !C->getValueAPF().isNaN();
|
||||
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||
return !C->getValueAPF().isNaN() ||
|
||||
(SNaN && !C->getValueAPF().isSignaling());
|
||||
}
|
||||
|
||||
// TODO: Recognize more cases here.
|
||||
unsigned Opcode = Op.getOpcode();
|
||||
switch (Opcode) {
|
||||
case ISD::FADD:
|
||||
case ISD::FSUB:
|
||||
case ISD::FMUL: {
|
||||
if (SNaN)
|
||||
return true;
|
||||
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
|
||||
isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
|
||||
}
|
||||
case ISD::FCANONICALIZE:
|
||||
case ISD::FEXP:
|
||||
case ISD::FEXP2:
|
||||
case ISD::FTRUNC:
|
||||
case ISD::FFLOOR:
|
||||
case ISD::FCEIL:
|
||||
case ISD::FROUND:
|
||||
case ISD::FRINT:
|
||||
case ISD::FNEARBYINT: {
|
||||
if (SNaN)
|
||||
return true;
|
||||
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
|
||||
}
|
||||
case ISD::FABS:
|
||||
case ISD::FNEG:
|
||||
case ISD::FCOPYSIGN: {
|
||||
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
|
||||
}
|
||||
case ISD::SELECT:
|
||||
return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
|
||||
isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
|
||||
case ISD::FDIV:
|
||||
case ISD::FREM:
|
||||
case ISD::FSIN:
|
||||
case ISD::FCOS: {
|
||||
if (SNaN)
|
||||
return true;
|
||||
// TODO: Need isKnownNeverInfinity
|
||||
return false;
|
||||
}
|
||||
case ISD::FP_EXTEND:
|
||||
case ISD::FP_ROUND: {
|
||||
if (SNaN)
|
||||
return true;
|
||||
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
|
||||
}
|
||||
case ISD::SINT_TO_FP:
|
||||
case ISD::UINT_TO_FP:
|
||||
return true;
|
||||
case ISD::FMA:
|
||||
case ISD::FMAD: {
|
||||
if (SNaN)
|
||||
return true;
|
||||
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
|
||||
isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
|
||||
isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
|
||||
}
|
||||
case ISD::FSQRT: // Need is known positive
|
||||
case ISD::FLOG:
|
||||
case ISD::FLOG2:
|
||||
case ISD::FLOG10:
|
||||
case ISD::FPOWI:
|
||||
case ISD::FPOW: {
|
||||
if (SNaN)
|
||||
return true;
|
||||
// TODO: Refine on operand
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
// TODO: Handle FMINNUM/FMAXNUM/FMINNAN/FMAXNAN when there is an agreement on
|
||||
// what they should do.
|
||||
default:
|
||||
if (Opcode >= ISD::BUILTIN_OP_END ||
|
||||
Opcode == ISD::INTRINSIC_WO_CHAIN ||
|
||||
Opcode == ISD::INTRINSIC_W_CHAIN ||
|
||||
Opcode == ISD::INTRINSIC_VOID) {
|
||||
return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
|
||||
|
|
|
@ -1711,6 +1711,19 @@ bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
return false;
|
||||
}
|
||||
|
||||
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
|
||||
const SelectionDAG &DAG,
|
||||
bool SNaN,
|
||||
unsigned Depth) const {
|
||||
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
|
||||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
|
||||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
|
||||
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
|
||||
"Should use isKnownNeverNaN if you don't know whether Op"
|
||||
" is a target node!");
|
||||
return false;
|
||||
}
|
||||
|
||||
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
|
||||
// work with truncating build vectors and vectors with elements of less than
|
||||
// 8 bits.
|
||||
|
|
|
@ -4320,3 +4320,86 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
|
|||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
|
||||
const SelectionDAG &DAG,
|
||||
bool SNaN,
|
||||
unsigned Depth) const {
|
||||
unsigned Opcode = Op.getOpcode();
|
||||
switch (Opcode) {
|
||||
case AMDGPUISD::FMIN_LEGACY:
|
||||
case AMDGPUISD::FMAX_LEGACY: {
|
||||
if (SNaN)
|
||||
return true;
|
||||
|
||||
// TODO: Can check no nans on one of the operands for each one, but which
|
||||
// one?
|
||||
return false;
|
||||
}
|
||||
case AMDGPUISD::FMUL_LEGACY: {
|
||||
if (SNaN)
|
||||
return true;
|
||||
return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
|
||||
DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
|
||||
}
|
||||
case AMDGPUISD::FMED3:
|
||||
case AMDGPUISD::FMIN3:
|
||||
case AMDGPUISD::FMAX3:
|
||||
case AMDGPUISD::FMAD_FTZ: {
|
||||
if (SNaN)
|
||||
return true;
|
||||
return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
|
||||
DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
|
||||
DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
|
||||
}
|
||||
case AMDGPUISD::CVT_F32_UBYTE0:
|
||||
case AMDGPUISD::CVT_F32_UBYTE1:
|
||||
case AMDGPUISD::CVT_F32_UBYTE2:
|
||||
case AMDGPUISD::CVT_F32_UBYTE3:
|
||||
return true;
|
||||
|
||||
case AMDGPUISD::RCP:
|
||||
case AMDGPUISD::RSQ:
|
||||
case AMDGPUISD::RCP_LEGACY:
|
||||
case AMDGPUISD::RSQ_LEGACY:
|
||||
case AMDGPUISD::RSQ_CLAMP: {
|
||||
if (SNaN)
|
||||
return true;
|
||||
|
||||
// TODO: Need is known positive check.
|
||||
return false;
|
||||
}
|
||||
case AMDGPUISD::LDEXP: {
|
||||
if (SNaN)
|
||||
return true;
|
||||
return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
|
||||
}
|
||||
case AMDGPUISD::DIV_SCALE:
|
||||
case AMDGPUISD::DIV_FMAS:
|
||||
case AMDGPUISD::DIV_FIXUP:
|
||||
case AMDGPUISD::TRIG_PREOP:
|
||||
// TODO: Refine on operands.
|
||||
return SNaN;
|
||||
case AMDGPUISD::SIN_HW:
|
||||
case AMDGPUISD::COS_HW: {
|
||||
// TODO: Need check for infinity
|
||||
return SNaN;
|
||||
}
|
||||
case ISD::INTRINSIC_WO_CHAIN: {
|
||||
unsigned IntrinsicID
|
||||
= cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
||||
// TODO: Handle more intrinsics
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_cubeid:
|
||||
return true;
|
||||
|
||||
case Intrinsic::amdgcn_frexp_mant:
|
||||
return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -246,6 +246,11 @@ public:
|
|||
const SelectionDAG &DAG,
|
||||
unsigned Depth = 0) const override;
|
||||
|
||||
bool isKnownNeverNaNForTargetNode(SDValue Op,
|
||||
const SelectionDAG &DAG,
|
||||
bool SNaN = false,
|
||||
unsigned Depth = 0) const override;
|
||||
|
||||
/// Helper function that adds Reg to the LiveIn list of the DAG's
|
||||
/// MachineFunction.
|
||||
///
|
||||
|
|
|
@ -6745,13 +6745,6 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
|
|||
return AMDGPUTargetLowering::performRcpCombine(N, DCI);
|
||||
}
|
||||
|
||||
static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
|
||||
if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions())
|
||||
return true;
|
||||
|
||||
return DAG.isKnownNeverNaN(Op);
|
||||
}
|
||||
|
||||
static bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
|
||||
const GCNSubtarget *ST, unsigned MaxDepth=5) {
|
||||
// If source is a result of another standard FP operation it is already in
|
||||
|
@ -6846,7 +6839,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
|
|||
|
||||
bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
|
||||
|
||||
if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) &&
|
||||
if ((IsIEEEMode || DAG.isKnownNeverSNaN(N0)) &&
|
||||
isCanonicalized(DAG, N0, ST))
|
||||
return N0;
|
||||
|
||||
|
@ -6991,7 +6984,7 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
|
|||
// then give the other result, which is different from med3 with a NaN
|
||||
// input.
|
||||
SDValue Var = Op0.getOperand(0);
|
||||
if (!isKnownNeverSNan(DAG, Var))
|
||||
if (!DAG.isKnownNeverSNaN(Var))
|
||||
return SDValue();
|
||||
|
||||
return DAG.getNode(AMDGPUISD::FMED3, SL, K0->getValueType(0),
|
||||
|
|
|
@ -53,9 +53,30 @@ define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float ad
|
|||
|
||||
; GCN-LABEL: {{^}}v_clamp_negzero_f32:
|
||||
; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], 0.5, [[A]]
|
||||
; GCN-DAG: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
|
||||
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
|
||||
; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], [[SIGNBIT]], 1.0
|
||||
define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep0
|
||||
%add = fadd nnan float %a, 0.5
|
||||
%max = call float @llvm.maxnum.f32(float %add, float -0.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 1.0)
|
||||
|
||||
store float %med, float addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Weird inconsistency in how -0.0 is treated. Accepted if clamp
|
||||
; matched through med3, not if directly. Is this correct?
|
||||
|
||||
; GCN-LABEL: {{^}}v_clamp_negzero_maybe_snan_f32:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0x80000000, [[A]]
|
||||
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
|
||||
define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -352,13 +373,15 @@ define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #
|
|||
|
||||
; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
|
||||
; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 0.5, [[A]]
|
||||
; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 0, 1.0
|
||||
define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep0
|
||||
%max = call float @llvm.maxnum.f32(float %a, float 0.0)
|
||||
%a.nnan = fadd nnan float %a, 0.5
|
||||
%max = call float @llvm.maxnum.f32(float %a.nnan, float 0.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 1.0)
|
||||
|
||||
store float %med, float addrspace(1)* %out.gep
|
||||
|
@ -367,13 +390,14 @@ define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, f
|
|||
|
||||
; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
|
||||
; GCN: v_add_f32_e64 [[ADD:v[0-9]+]], [[A]], 0.5 clamp{{$}}
|
||||
define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep0
|
||||
%max = call float @llvm.maxnum.f32(float %a, float 0.0)
|
||||
%add = fadd float %a, 0.5
|
||||
%max = call float @llvm.maxnum.f32(float %add, float 0.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 1.0)
|
||||
|
||||
store float %med, float addrspace(1)* %out.gep
|
||||
|
|
|
@ -458,7 +458,10 @@ define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(double addrsp
|
|||
}
|
||||
|
||||
; GCN-LABEL: test_no_fold_canonicalize_fmul_value_f32_no_ieee:
|
||||
; GCN-EXCEPT: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
|
||||
; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
|
||||
; GCN-NOT: v_mul
|
||||
; GCN-NOT: v_max
|
||||
; GCN-NEXT: ; return
|
||||
define amdgpu_ps float @test_no_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) {
|
||||
entry:
|
||||
%v = fmul float %arg, 15.0
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f32:
|
||||
|
@ -22,87 +19,82 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(float addrspace(1)*
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f32:
|
||||
; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
||||
|
||||
; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
||||
; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
||||
define amdgpu_kernel void @v_test_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
||||
; GCN-LABEL: {{^}}v_test_fmed3_nnan_r_i_i_f32:
|
||||
; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
||||
define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep0
|
||||
%a.add = fadd nnan float %a, 1.0
|
||||
|
||||
%max = call float @llvm.maxnum.f32(float %a, float 2.0)
|
||||
%max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
|
||||
store float %med, float addrspace(1)* %outgep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute0_f32:
|
||||
; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
||||
|
||||
; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
||||
; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
||||
define amdgpu_kernel void @v_test_fmed3_r_i_i_commute0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
||||
; GCN-LABEL: {{^}}v_test_fmed3_nnan_r_i_i_commute0_f32:
|
||||
; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
||||
define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep0
|
||||
%a.add = fadd nnan float %a, 1.0
|
||||
|
||||
%max = call float @llvm.maxnum.f32(float 2.0, float %a)
|
||||
%max = call float @llvm.maxnum.f32(float 2.0, float %a.add)
|
||||
%med = call float @llvm.minnum.f32(float 4.0, float %max)
|
||||
|
||||
store float %med, float addrspace(1)* %outgep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute1_f32:
|
||||
; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
||||
|
||||
; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
||||
; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
||||
define amdgpu_kernel void @v_test_fmed3_r_i_i_commute1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
||||
; GCN-LABEL: {{^}}v_test_fmed3_nnan_r_i_i_commute1_f32:
|
||||
; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
||||
define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep0
|
||||
%a.add = fadd nnan float %a, 1.0
|
||||
|
||||
%max = call float @llvm.maxnum.f32(float %a, float 2.0)
|
||||
%max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float 4.0, float %max)
|
||||
|
||||
store float %med, float addrspace(1)* %outgep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_constant_order_f32:
|
||||
; GCN-LABEL: {{^}}v_test_fmed3_nnan_r_i_i_constant_order_f32:
|
||||
; GCN: v_max_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
||||
; GCN: v_min_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
||||
define amdgpu_kernel void @v_test_fmed3_r_i_i_constant_order_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
||||
define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep0
|
||||
%a.add = fadd nnan float %a, 1.0
|
||||
|
||||
%max = call float @llvm.maxnum.f32(float %a, float 4.0)
|
||||
%max = call float @llvm.maxnum.f32(float %a.add, float 4.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 2.0)
|
||||
|
||||
store float %med, float addrspace(1)* %outgep
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_multi_use_f32:
|
||||
; GCN-LABEL: {{^}}v_test_fmed3_nnan_r_i_i_multi_use_f32:
|
||||
; GCN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
||||
; GCN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
||||
define amdgpu_kernel void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
||||
define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep0
|
||||
%a.add = fadd nnan float %a, 1.0
|
||||
|
||||
%max = call float @llvm.maxnum.f32(float %a, float 2.0)
|
||||
%max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
|
||||
store volatile float %med, float addrspace(1)* %outgep
|
||||
|
@ -118,8 +110,9 @@ define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, dou
|
|||
%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
|
||||
%outgep = getelementptr double, double addrspace(1)* %out, i32 %tid
|
||||
%a = load double, double addrspace(1)* %gep0
|
||||
%a.add = fadd nnan double %a, 1.0
|
||||
|
||||
%max = call double @llvm.maxnum.f64(double %a, double 2.0)
|
||||
%max = call double @llvm.maxnum.f64(double %a.add, double 2.0)
|
||||
%med = call double @llvm.minnum.f64(double %max, double 4.0)
|
||||
|
||||
store double %med, double addrspace(1)* %outgep
|
||||
|
@ -142,19 +135,17 @@ define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(float addrspace(1)* %o
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_test_legacy_fmed3_r_i_i_f32:
|
||||
; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
||||
|
||||
; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
||||
; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
||||
; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
|
||||
define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%a = load float, float addrspace(1)* %gep0
|
||||
%a.nnan = fadd nnan float %a, 1.0
|
||||
|
||||
; fmax_legacy
|
||||
%cmp0 = fcmp ule float %a, 2.0
|
||||
%max = select i1 %cmp0, float 2.0, float %a
|
||||
%cmp0 = fcmp ule float %a.nnan, 2.0
|
||||
%max = select i1 %cmp0, float 2.0, float %a.nnan
|
||||
|
||||
; fmin_legacy
|
||||
%cmp1 = fcmp uge float %max, 4.0
|
||||
|
|
|
@ -24,16 +24,16 @@ declare float @llvm.fabs.f32(float) #1
|
|||
; VI: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; VI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
|
||||
define amdgpu_kernel void @multiple_fadd_use_test_f32(float addrspace(1)* %out, float %x, float %y, float %z) #0 {
|
||||
%a11 = fadd fast float %y, -1.0
|
||||
%a11 = fadd float %y, -1.0
|
||||
%a12 = call float @llvm.fabs.f32(float %a11)
|
||||
%a13 = fadd fast float %x, -1.0
|
||||
%a13 = fadd float %x, -1.0
|
||||
%a14 = call float @llvm.fabs.f32(float %a13)
|
||||
%a15 = fcmp ogt float %a12, %a14
|
||||
%a16 = select i1 %a15, float %a12, float %a14
|
||||
%a17 = fmul fast float %a16, 2.0
|
||||
%a18 = fmul fast float %a17, %a17
|
||||
%a19 = fmul fast float %a18, %a17
|
||||
%a20 = fsub fast float 1.0, %a19
|
||||
%a17 = fmul float %a16, 2.0
|
||||
%a18 = fmul float %a17, %a17
|
||||
%a19 = fmul float %a18, %a17
|
||||
%a20 = fsub float 1.0, %a19
|
||||
store float %a20, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -123,16 +123,16 @@ define amdgpu_kernel void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i
|
|||
%x = bitcast i16 %x.arg to half
|
||||
%y = bitcast i16 %y.arg to half
|
||||
%z = bitcast i16 %z.arg to half
|
||||
%a11 = fadd fast half %y, -1.0
|
||||
%a11 = fadd half %y, -1.0
|
||||
%a12 = call half @llvm.fabs.f16(half %a11)
|
||||
%a13 = fadd fast half %x, -1.0
|
||||
%a13 = fadd half %x, -1.0
|
||||
%a14 = call half @llvm.fabs.f16(half %a13)
|
||||
%a15 = fcmp ogt half %a12, %a14
|
||||
%a16 = select i1 %a15, half %a12, half %a14
|
||||
%a17 = fmul fast half %a16, 2.0
|
||||
%a18 = fmul fast half %a17, %a17
|
||||
%a19 = fmul fast half %a18, %a17
|
||||
%a20 = fsub fast half 1.0, %a19
|
||||
%a17 = fmul half %a16, 2.0
|
||||
%a18 = fmul half %a17, %a17
|
||||
%a19 = fmul half %a18, %a17
|
||||
%a20 = fsub half 1.0, %a19
|
||||
store half %a20, half addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -0,0 +1,598 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
|
||||
; Mostly overlaps with fmed3.ll to stress specific cases of
|
||||
; isKnownNeverSNaN.
|
||||
|
||||
define float @v_test_known_not_snan_fabs_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fabs_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_rcp_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, |v0|, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.nnan.add = fdiv nnan float 1.0, %a
|
||||
%known.not.snan = call float @llvm.fabs.f32(float %a.nnan.add)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_fneg_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fneg_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_rcp_f32_e64 v0, -v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.nnan.add = fdiv nnan float 1.0, %a
|
||||
%known.not.snan = fsub float -0.0, %a.nnan.add
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_fpext_input_fmed3_r_i_i_f32(half %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fpext_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_f16_e32 v0, 1.0, v0
|
||||
; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.nnan.add = fadd nnan half %a, 1.0
|
||||
%known.not.snan = fpext half %a.nnan.add to float
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_fptrunc_input_fmed3_r_i_i_f32(double %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fptrunc_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
|
||||
; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.nnan.add = fadd nnan double %a, 1.0
|
||||
%known.not.snan = fptrunc double %a.nnan.add to float
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_copysign_input_fmed3_r_i_i_f32(float %a, float %sign) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_copysign_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_rcp_f32_e32 v0, v0
|
||||
; GCN-NEXT: s_brev_b32 s6, -2
|
||||
; GCN-NEXT: v_bfi_b32 v0, s6, v0, v1
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.nnan.add = fdiv nnan float 1.0, %a
|
||||
%known.not.snan = call float @llvm.copysign.f32(float %a.nnan.add, float %sign)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
; Canonicalize always quiets, so nothing is necessary.
|
||||
define float @v_test_known_canonicalize_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_canonicalize_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.canonicalize.f32(float %a)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_minnum_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_minnum_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_rcp_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
|
||||
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_max_f32_e32 v0, 2.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.nnan.add = fdiv nnan float 1.0, %a
|
||||
%b.nnan.add = fadd nnan float %b, 1.0
|
||||
%known.not.snan = call float @llvm.minnum.f32(float %a.nnan.add, float %b.nnan.add)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_minnum_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
|
||||
; GCN-LABEL: v_minnum_possible_nan_lhs_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
|
||||
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_max_f32_e32 v0, 2.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%b.nnan.add = fadd nnan float %b, 1.0
|
||||
%known.not.snan = call float @llvm.minnum.f32(float %a, float %b.nnan.add)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_minnum_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
|
||||
; GCN-LABEL: v_minnum_possible_nan_rhs_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_rcp_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_max_f32_e32 v0, 2.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.nnan.add = fdiv nnan float 1.0, %a
|
||||
%known.not.snan = call float @llvm.minnum.f32(float %a.nnan.add, float %b)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_maxnum_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_maxnum_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_rcp_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
|
||||
; GCN-NEXT: v_max3_f32 v0, v0, v1, 2.0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.nnan.add = fdiv nnan float 1.0, %a
|
||||
%b.nnan.add = fadd nnan float %b, 1.0
|
||||
%known.not.snan = call float @llvm.maxnum.f32(float %a.nnan.add, float %b.nnan.add)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_maxnum_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
|
||||
; GCN-LABEL: v_maxnum_possible_nan_lhs_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
|
||||
; GCN-NEXT: v_max3_f32 v0, v0, v1, 2.0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%b.nnan.add = fadd nnan float %b, 1.0
|
||||
%known.not.snan = call float @llvm.maxnum.f32(float %a, float %b.nnan.add)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_maxnum_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
|
||||
; GCN-LABEL: v_maxnum_possible_nan_rhs_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_rcp_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_max3_f32 v0, v0, v1, 2.0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.nnan.add = fdiv nnan float 1.0, %a
|
||||
%known.not.snan = call float @llvm.maxnum.f32(float %a.nnan.add, float %b)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_select_input_fmed3_r_i_i_f32(float %a, float %b, i32 %c) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_select_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_rcp_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.nnan.add = fdiv nnan float 1.0, %a
|
||||
%b.nnan.add = fadd nnan float %b, 1.0
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%known.not.snan = select i1 %cmp, float %a.nnan.add, float %b.nnan.add
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_select_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b, i32 %c) #0 {
|
||||
; GCN-LABEL: v_select_possible_nan_lhs_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GCN-NEXT: v_max_f32_e32 v0, 2.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%b.nnan.add = fadd nnan float %b, 1.0
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%known.not.snan = select i1 %cmp, float %a, float %b.nnan.add
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_select_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b, i32 %c) #0 {
|
||||
; GCN-LABEL: v_select_possible_nan_rhs_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_rcp_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GCN-NEXT: v_max_f32_e32 v0, 2.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.nnan.add = fdiv nnan float 1.0, %a
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%known.not.snan = select i1 %cmp, float %a.nnan.add, float %b
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_fadd_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fadd_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = fadd float %a, %b
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_fsub_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fsub_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_sub_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = fsub float %a, %b
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_fmul_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fmul_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = fmul float %a, %b
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_uint_to_fp_input_fmed3_r_i_i_f32(i32 %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_uint_to_fp_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_cvt_f32_u32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = uitofp i32 %a to float
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_sint_to_fp_input_fmed3_r_i_i_f32(i32 %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_sint_to_fp_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_cvt_f32_i32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = sitofp i32 %a to float
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_fma_input_fmed3_r_i_i_f32(float %a, float %b, float %c) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fma_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_fma_f32 v0, v0, v1, v2
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.fma.f32(float %a, float %b, float %c)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_fmad_input_fmed3_r_i_i_f32(float %a, float %b, float %c) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fmad_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mac_f32_e32 v2, v0, v1
|
||||
; GCN-NEXT: v_med3_f32 v0, v2, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
|
||||
define float @v_test_known_not_snan_sin_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_sin_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, 0.15915494, v0
|
||||
; GCN-NEXT: v_fract_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_sin_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.sin.f32(float %a)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_cos_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_cos_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_f32_e32 v0, 0.15915494, v0
|
||||
; GCN-NEXT: v_fract_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_cos_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.cos.f32(float %a)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_exp2_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_exp2_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_exp_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.exp2.f32(float %a)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_trunc_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_trunc_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_trunc_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.trunc.f32(float %a)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_floor_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_floor_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_floor_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.floor.f32(float %a)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_ceil_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_ceil_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_floor_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.floor.f32(float %a)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_round_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_round_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_brev_b32 s6, -2
|
||||
; GCN-NEXT: v_trunc_f32_e32 v2, v0
|
||||
; GCN-NEXT: v_bfi_b32 v1, s6, 1.0, v0
|
||||
; GCN-NEXT: v_sub_f32_e32 v0, v0, v2
|
||||
; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, 0.5
|
||||
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
|
||||
; GCN-NEXT: v_add_f32_e32 v0, v2, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.round.f32(float %a)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_rint_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_rint_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_rndne_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.rint.f32(float %a)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_nearbyint_input_fmed3_r_i_i_f32(float %a) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_nearbyint_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_rndne_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.nearbyint.f32(float %a)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_fmul_legacy_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fmul_legacy_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_ldexp_input_fmed3_r_i_i_f32(float %a, i32 %b) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_ldexp_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_ldexp_f32 v0, v0, v1
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.amdgcn.ldexp.f32(float %a, i32 %b)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_fmed3_input_fmed3_r_i_i_f32(float %a, float %b, float %c) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fmed3_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, v1, v2
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.amdgcn.fmed3.f32(float %a, float %b, float %c)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_fmin3_input_fmed3_r_i_i_f32(float %a, float %b, float %c) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_fmin3_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_min3_f32 v0, v0, v1, v2
|
||||
; GCN-NEXT: v_max_f32_e32 v0, 2.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%min0 = call float @llvm.minnum.f32(float %a, float %b)
|
||||
%known.not.snan = call float @llvm.minnum.f32(float %min0, float %c)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_cvt_ubyte0_input_fmed3_r_i_i_f32(i8 %char) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_cvt_ubyte0_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%cvt = uitofp i8 %char to float
|
||||
%max = call float @llvm.maxnum.f32(float %cvt, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_not_known_frexp_mant_input_fmed3_r_i_i_f32(float %arg) #0 {
|
||||
; GCN-LABEL: v_test_not_known_frexp_mant_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_frexp_mant_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_max_f32_e32 v0, 2.0, v0
|
||||
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.amdgcn.frexp.mant.f32(float %arg)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_frexp_mant_input_fmed3_r_i_i_f32(float %arg) #0 {
|
||||
; GCN-LABEL: v_test_known_not_frexp_mant_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
|
||||
; GCN-NEXT: v_frexp_mant_f32_e32 v0, v0
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%add = fadd float %arg, 1.0
|
||||
%known.not.snan = call float @llvm.amdgcn.frexp.mant.f32(float %add)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
define float @v_test_known_not_snan_cubeid_input_fmed3_r_i_i_f32(float %a, float %b, float %c) #0 {
|
||||
; GCN-LABEL: v_test_known_not_snan_cubeid_input_fmed3_r_i_i_f32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_cubeid_f32 v0, v0, v1, v2
|
||||
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%known.not.snan = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
|
||||
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
|
||||
%med = call float @llvm.minnum.f32(float %max, float 4.0)
|
||||
ret float %med
|
||||
}
|
||||
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare float @llvm.sin.f32(float) #1
|
||||
declare float @llvm.cos.f32(float) #1
|
||||
declare float @llvm.exp2.f32(float) #1
|
||||
declare float @llvm.trunc.f32(float) #1
|
||||
declare float @llvm.floor.f32(float) #1
|
||||
declare float @llvm.ceil.f32(float) #1
|
||||
declare float @llvm.round.f32(float) #1
|
||||
declare float @llvm.rint.f32(float) #1
|
||||
declare float @llvm.nearbyint.f32(float) #1
|
||||
declare float @llvm.canonicalize.f32(float) #1
|
||||
declare float @llvm.minnum.f32(float, float) #1
|
||||
declare float @llvm.maxnum.f32(float, float) #1
|
||||
declare float @llvm.copysign.f32(float, float) #1
|
||||
declare float @llvm.fma.f32(float, float, float) #1
|
||||
declare float @llvm.fmuladd.f32(float, float, float) #1
|
||||
declare float @llvm.amdgcn.ldexp.f32(float, i32) #1
|
||||
declare float @llvm.amdgcn.fmul.legacy(float, float) #1
|
||||
declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
|
||||
declare float @llvm.amdgcn.frexp.mant.f32(float) #1
|
||||
declare float @llvm.amdgcn.cubeid(float, float, float) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone speculatable }
|
Loading…
Reference in New Issue