forked from OSchip/llvm-project
AMDGPU : Add intrinsics for compare with the full wavefront result
Differential Revision: http://reviews.llvm.org/D22482 llvm-svn: 276998
This commit is contained in:
parent
89217f8835
commit
07e03712d3
|
@ -407,6 +407,14 @@ def int_amdgcn_lerp :
|
|||
GCCBuiltin<"__builtin_amdgcn_lerp">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_amdgcn_icmp :
|
||||
Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
|
||||
[IntrNoMem, IntrConvergent]>;
|
||||
|
||||
def int_amdgcn_fcmp :
|
||||
Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
|
||||
[IntrNoMem, IntrConvergent]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CI+ Intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -2658,6 +2658,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
NODE_NAME_CASE(RETURN)
|
||||
NODE_NAME_CASE(DWORDADDR)
|
||||
NODE_NAME_CASE(FRACT)
|
||||
NODE_NAME_CASE(SETCC)
|
||||
NODE_NAME_CASE(CLAMP)
|
||||
NODE_NAME_CASE(COS_HW)
|
||||
NODE_NAME_CASE(SIN_HW)
|
||||
|
|
|
@ -223,6 +223,9 @@ enum NodeType : unsigned {
|
|||
DWORDADDR,
|
||||
FRACT,
|
||||
CLAMP,
|
||||
// This is SETCC with the full mask result which is used for a compare with a
|
||||
// result bit per item in the wavefront.
|
||||
SETCC,
|
||||
|
||||
// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
|
||||
// Denormals handled on some parts.
|
||||
|
|
|
@ -144,6 +144,11 @@ def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>;
|
|||
// out = (src1 > src0) ? 1 : 0
|
||||
def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
|
||||
|
||||
def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
|
||||
SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
|
||||
]>;
|
||||
|
||||
def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
|
||||
|
||||
def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
|
||||
SDTIntToFPOp, []>;
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/Analysis.h"
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
|
||||
|
@ -2213,6 +2214,34 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
|
||||
Denominator, Numerator);
|
||||
}
|
||||
case Intrinsic::amdgcn_icmp: {
|
||||
const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
|
||||
int CondCode = CD->getSExtValue();
|
||||
|
||||
if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
|
||||
CondCode >= ICmpInst::Predicate::BAD_ICMP_PREDICATE)
|
||||
return DAG.getUNDEF(VT);
|
||||
|
||||
ICmpInst::Predicate IcInput =
|
||||
static_cast<ICmpInst::Predicate>(CondCode);
|
||||
ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
|
||||
return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2), DAG.getCondCode(CCOpcode));
|
||||
}
|
||||
case Intrinsic::amdgcn_fcmp: {
|
||||
const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
|
||||
int CondCode = CD->getSExtValue();
|
||||
|
||||
if (CondCode <= FCmpInst::Predicate::FCMP_FALSE ||
|
||||
CondCode >= FCmpInst::Predicate::FCMP_TRUE)
|
||||
return DAG.getUNDEF(VT);
|
||||
|
||||
FCmpInst::Predicate IcInput =
|
||||
static_cast<FCmpInst::Predicate>(CondCode);
|
||||
ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
|
||||
return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2), DAG.getCondCode(CCOpcode));
|
||||
}
|
||||
case Intrinsic::amdgcn_fmul_legacy:
|
||||
return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT,
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
|
|
|
@ -2365,6 +2365,71 @@ def : Pat <
|
|||
(DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// V_ICMPIntrinsic Pattern.
|
||||
//===----------------------------------------------------------------------===//
|
||||
class ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : Pat <
|
||||
(AMDGPUsetcc vt:$src0, vt:$src1, cond),
|
||||
(inst $src0, $src1)
|
||||
>;
|
||||
|
||||
def : ICMP_Pattern <COND_EQ, V_CMP_EQ_I32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_NE, V_CMP_NE_I32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_UGE, V_CMP_GE_U32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_ULT, V_CMP_LT_U32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_ULE, V_CMP_LE_U32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_SGT, V_CMP_GT_I32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_SGE, V_CMP_GE_I32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_SLT, V_CMP_LT_I32_e64, i32>;
|
||||
def : ICMP_Pattern <COND_SLE, V_CMP_LE_I32_e64, i32>;
|
||||
|
||||
def : ICMP_Pattern <COND_EQ, V_CMP_EQ_I64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_NE, V_CMP_NE_I64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_UGT, V_CMP_GT_U64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_UGE, V_CMP_GE_U64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_ULT, V_CMP_LT_U64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_ULE, V_CMP_LE_U64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_SGT, V_CMP_GT_I64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
|
||||
def : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
|
||||
|
||||
class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : Pat <
|
||||
(i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
|
||||
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
|
||||
(inst $src0_modifiers, $src0, $src1_modifiers, $src1,
|
||||
DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
|
||||
def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F32_e64, f32>;
|
||||
def : FCMP_Pattern <COND_OGT, V_CMP_GT_F32_e64, f32>;
|
||||
def : FCMP_Pattern <COND_OGE, V_CMP_GE_F32_e64, f32>;
|
||||
def : FCMP_Pattern <COND_OLT, V_CMP_LT_F32_e64, f32>;
|
||||
def : FCMP_Pattern <COND_OLE, V_CMP_LE_F32_e64, f32>;
|
||||
|
||||
def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;
|
||||
def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;
|
||||
def : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;
|
||||
def : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
|
||||
def : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
|
||||
def : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;
|
||||
|
||||
def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
|
||||
def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
|
||||
def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
|
||||
def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F32_e64, f32>;
|
||||
def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F32_e64, f32>;
|
||||
def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F32_e64, f32>;
|
||||
|
||||
def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F64_e64, f64>;
|
||||
def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F64_e64, f64>;
|
||||
def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F64_e64, f64>;
|
||||
def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
|
||||
def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
|
||||
def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SMRD Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -0,0 +1,228 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
|
||||
declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
|
||||
declare float @llvm.fabs.f32(float) #0
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs:
|
||||
; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
|
||||
define void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
|
||||
%temp = call float @llvm.fabs.f32(float %a)
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_oeq_both_operands_with_fabs:
|
||||
; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}|
|
||||
define void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, float %src, float %a) {
|
||||
%temp = call float @llvm.fabs.f32(float %a)
|
||||
%src_input = call float @llvm.fabs.f32(float %src)
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src_input, float %temp, i32 1)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp:
|
||||
; GCN-NOT: v_cmp_eq_f32_e64
|
||||
define void @v_fcmp(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 -1)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_oeq:
|
||||
; GCN: v_cmp_eq_f32_e64
|
||||
define void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_one:
|
||||
; GCN: v_cmp_neq_f32_e64
|
||||
define void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_ogt:
|
||||
; GCN: v_cmp_gt_f32_e64
|
||||
define void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_oge:
|
||||
; GCN: v_cmp_ge_f32_e64
|
||||
define void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_olt:
|
||||
; GCN: v_cmp_lt_f32_e64
|
||||
define void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_ole:
|
||||
; GCN: v_cmp_le_f32_e64
|
||||
define void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_ueq:
|
||||
; GCN: v_cmp_nlg_f32_e64
|
||||
define void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_une:
|
||||
; GCN: v_cmp_neq_f32_e64
|
||||
define void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_ugt:
|
||||
; GCN: v_cmp_nle_f32_e64
|
||||
define void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_uge:
|
||||
; GCN: v_cmp_nlt_f32_e64
|
||||
define void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_ult:
|
||||
; GCN: v_cmp_nge_f32_e64
|
||||
define void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f32_ule:
|
||||
; GCN: v_cmp_ngt_f32_e64
|
||||
define void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_oeq:
|
||||
; GCN: v_cmp_eq_f64_e64
|
||||
define void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_one:
|
||||
; GCN: v_cmp_neq_f64_e64
|
||||
define void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_ogt:
|
||||
; GCN: v_cmp_gt_f64_e64
|
||||
define void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_oge:
|
||||
; GCN: v_cmp_ge_f64_e64
|
||||
define void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_olt:
|
||||
; GCN: v_cmp_lt_f64_e64
|
||||
define void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_ole:
|
||||
; GCN: v_cmp_le_f64_e64
|
||||
define void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_ueq:
|
||||
; GCN: v_cmp_nlg_f64_e64
|
||||
define void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_une:
|
||||
; GCN: v_cmp_neq_f64_e64
|
||||
define void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_ugt:
|
||||
; GCN: v_cmp_nle_f64_e64
|
||||
define void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_uge:
|
||||
; GCN: v_cmp_nlt_f64_e64
|
||||
define void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_ult:
|
||||
; GCN: v_cmp_nge_f64_e64
|
||||
define void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fcmp_f64_ule:
|
||||
; GCN: v_cmp_ngt_f64_e64
|
||||
define void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone convergent }
|
|
@ -0,0 +1,172 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
|
||||
declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i32_eq:
|
||||
; GCN: v_cmp_eq_i32_e64
|
||||
define void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp:
|
||||
; GCN-NOT: v_cmp_eq_i32_e64
|
||||
define void @v_icmp(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
; GCN-LABEL: {{^}}v_icmp_i32_ne:
|
||||
; GCN: v_cmp_ne_i32_e64
|
||||
define void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_u32_ugt:
|
||||
; GCN: v_cmp_gt_u32_e64
|
||||
define void @v_icmp_u32_ugt(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_u32_uge:
|
||||
; GCN: v_cmp_ge_u32_e64
|
||||
define void @v_icmp_u32_uge(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_u32_ult:
|
||||
; GCN: v_cmp_lt_u32_e64
|
||||
define void @v_icmp_u32_ult(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_u32_ule:
|
||||
; GCN: v_cmp_le_u32_e64
|
||||
define void @v_icmp_u32_ule(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i32_sgt:
|
||||
; GCN: v_cmp_gt_i32_e64
|
||||
define void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i32_sge:
|
||||
; GCN: v_cmp_ge_i32_e64
|
||||
define void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i32_slt:
|
||||
; GCN: v_cmp_lt_i32_e64
|
||||
define void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
; GCN-LABEL: {{^}}v_icmp_i32_sle:
|
||||
; GCN: v_cmp_le_i32_e64
|
||||
define void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i64_eq:
|
||||
; GCN: v_cmp_eq_i64_e64
|
||||
define void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i64_ne:
|
||||
; GCN: v_cmp_ne_i64_e64
|
||||
define void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_u64_ugt:
|
||||
; GCN: v_cmp_gt_u64_e64
|
||||
define void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_u64_uge:
|
||||
; GCN: v_cmp_ge_u64_e64
|
||||
define void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_u64_ult:
|
||||
; GCN: v_cmp_lt_u64_e64
|
||||
define void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_u64_ule:
|
||||
; GCN: v_cmp_le_u64_e64
|
||||
define void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i64_sgt:
|
||||
; GCN: v_cmp_gt_i64_e64
|
||||
define void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i64_sge:
|
||||
; GCN: v_cmp_ge_i64_e64
|
||||
define void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_icmp_i64_slt:
|
||||
; GCN: v_cmp_lt_i64_e64
|
||||
define void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
; GCN-LABEL: {{^}}v_icmp_i64_sle:
|
||||
; GCN: v_cmp_le_i64_e64
|
||||
define void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone convergent }
|
Loading…
Reference in New Issue