forked from OSchip/llvm-project
AMDGPU/GlobalISel: Combine FMIN_LEGACY/FMAX_LEGACY
Try out using combine definition rules. This really should be a post-legalizer combine, but the combiner pass is currently pre-legalize. Most of the target combines are really post-legalize, so we should probably move the pass.
This commit is contained in:
parent
bc1148e7bc
commit
6fb544d1d2
|
@ -8,8 +8,25 @@
|
|||
|
||||
include "llvm/Target/GlobalISel/Combine.td"
|
||||
|
||||
// TODO: This really belongs after legalization after scalarization.
|
||||
// TODO: GICombineRules should accept subtarget predicates
|
||||
|
||||
def fmin_fmax_legacy_matchdata : GIDefMatchData<"FMinFMaxLegacyInfo">;
|
||||
|
||||
def fcmp_select_to_fmin_fmax_legacy : GICombineRule<
|
||||
(defs root:$select, fmin_fmax_legacy_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_SELECT):$select,
|
||||
[{ return matchFMinFMaxLegacy(*${select}, MRI, *MF, ${matchinfo}); }]),
|
||||
(apply [{ applySelectFCmpToFMinToFMaxLegacy(*${select}, ${matchinfo}); }])>;
|
||||
|
||||
|
||||
// Combines which should only apply on SI/VI
|
||||
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
|
||||
|
||||
|
||||
def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
|
||||
"AMDGPUGenPreLegalizerCombinerHelper", [all_combines,
|
||||
elide_br_by_inverting_cond]> {
|
||||
elide_br_by_inverting_cond,
|
||||
gfx6gfx7_combines]> {
|
||||
let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
|
||||
}
|
||||
|
|
|
@ -141,6 +141,9 @@ def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax_glue>;
|
|||
def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>;
|
||||
|
||||
def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32>;
|
||||
def : GINodeEquiv<G_AMDGPU_FMIN_LEGACY, AMDGPUfmin_legacy>;
|
||||
def : GINodeEquiv<G_AMDGPU_FMAX_LEGACY, AMDGPUfmax_legacy>;
|
||||
|
||||
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
|
||||
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
|
||||
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_USHORT, SIbuffer_load_ushort>;
|
||||
|
@ -179,7 +182,6 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;
|
|||
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;
|
||||
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
|
||||
|
||||
|
||||
class GISelSop2Pat <
|
||||
SDPatternOperator node,
|
||||
Instruction inst,
|
||||
|
|
|
@ -21,12 +21,112 @@
|
|||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace MIPatternMatch;
|
||||
|
||||
struct FMinFMaxLegacyInfo {
|
||||
Register LHS;
|
||||
Register RHS;
|
||||
Register True;
|
||||
Register False;
|
||||
CmpInst::Predicate Pred;
|
||||
};
|
||||
|
||||
// TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
|
||||
static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineFunction &MF, FMinFMaxLegacyInfo &Info) {
|
||||
// FIXME: Combines should have subtarget predicates, and we shouldn't need
|
||||
// this here.
|
||||
if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
|
||||
return false;
|
||||
|
||||
// FIXME: Type predicate on pattern
|
||||
if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
|
||||
return false;
|
||||
|
||||
Register Cond = MI.getOperand(1).getReg();
|
||||
if (!MRI.hasOneNonDBGUse(Cond) ||
|
||||
!mi_match(Cond, MRI,
|
||||
m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
|
||||
return false;
|
||||
|
||||
Info.True = MI.getOperand(2).getReg();
|
||||
Info.False = MI.getOperand(3).getReg();
|
||||
|
||||
if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
|
||||
!(Info.LHS == Info.False && Info.RHS == Info.True))
|
||||
return false;
|
||||
|
||||
switch (Info.Pred) {
|
||||
case CmpInst::FCMP_FALSE:
|
||||
case CmpInst::FCMP_OEQ:
|
||||
case CmpInst::FCMP_ONE:
|
||||
case CmpInst::FCMP_ORD:
|
||||
case CmpInst::FCMP_UNO:
|
||||
case CmpInst::FCMP_UEQ:
|
||||
case CmpInst::FCMP_UNE:
|
||||
case CmpInst::FCMP_TRUE:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
|
||||
const FMinFMaxLegacyInfo &Info) {
|
||||
|
||||
auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) {
|
||||
MachineIRBuilder MIB(MI);
|
||||
MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
|
||||
};
|
||||
|
||||
switch (Info.Pred) {
|
||||
case CmpInst::FCMP_ULT:
|
||||
case CmpInst::FCMP_ULE:
|
||||
if (Info.LHS == Info.True)
|
||||
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
|
||||
else
|
||||
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
|
||||
break;
|
||||
case CmpInst::FCMP_OLE:
|
||||
case CmpInst::FCMP_OLT: {
|
||||
// We need to permute the operands to get the correct NaN behavior. The
|
||||
// selected operand is the second one based on the failing compare with NaN,
|
||||
// so permute it based on the compare type the hardware uses.
|
||||
if (Info.LHS == Info.True)
|
||||
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
|
||||
else
|
||||
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
|
||||
break;
|
||||
}
|
||||
case CmpInst::FCMP_UGE:
|
||||
case CmpInst::FCMP_UGT: {
|
||||
if (Info.LHS == Info.True)
|
||||
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
|
||||
else
|
||||
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
|
||||
break;
|
||||
}
|
||||
case CmpInst::FCMP_OGT:
|
||||
case CmpInst::FCMP_OGE: {
|
||||
if (Info.LHS == Info.True)
|
||||
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
|
||||
else
|
||||
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("predicate should not have matched");
|
||||
}
|
||||
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
|
||||
#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
||||
#include "AMDGPUGenGICombiner.inc"
|
||||
#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
||||
|
|
|
@ -2802,6 +2802,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
case AMDGPU::G_FCANONICALIZE:
|
||||
case AMDGPU::G_INTRINSIC_TRUNC:
|
||||
case AMDGPU::G_AMDGPU_FFBH_U32:
|
||||
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
|
||||
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
|
||||
return getDefaultMappingVOP(MI);
|
||||
case AMDGPU::G_UMULH:
|
||||
case AMDGPU::G_SMULH: {
|
||||
|
|
|
@ -2205,6 +2205,18 @@ def G_AMDGPU_BUFFER_STORE_FORMAT_D16 : BufferStoreGenericInstruction;
|
|||
def G_AMDGPU_TBUFFER_STORE_FORMAT : TBufferStoreGenericInstruction;
|
||||
def G_AMDGPU_TBUFFER_STORE_FORMAT_D16 : TBufferStoreGenericInstruction;
|
||||
|
||||
def G_AMDGPU_FMIN_LEGACY : AMDGPUGenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src0, type0:$src1);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def G_AMDGPU_FMAX_LEGACY : AMDGPUGenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src0, type0:$src1);
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
|
||||
// operand Expects a MachineMemOperand in addition to explicit
|
||||
// operands.
|
||||
|
|
|
@ -0,0 +1,255 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX6 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
|
||||
|
||||
define float @v_test_fmax_legacy_ogt_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_ogt_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_max_legacy_f32_e32 v0, v0, v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_ogt_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ogt float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmax_legacy_oge_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_oge_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_max_legacy_f32_e32 v0, v0, v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_oge_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp oge float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmax_legacy_uge_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_uge_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_max_legacy_f32_e32 v0, v1, v0
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_uge_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp uge float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmax_legacy_ugt_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_ugt_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_max_legacy_f32_e32 v0, v1, v0
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_ugt_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_nle_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ugt float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmax_legacy_ole_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_ole_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_max_legacy_f32_e32 v0, v1, v0
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_ole_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_le_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ole float %a, %b
|
||||
%val = select i1 %cmp, float %b, float %a
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmax_legacy_olt_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_olt_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_max_legacy_f32_e32 v0, v1, v0
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_olt_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp olt float %a, %b
|
||||
%val = select i1 %cmp, float %b, float %a
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmax_legacy_ule_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_ule_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_max_legacy_f32_e32 v0, v0, v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_ule_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ule float %a, %b
|
||||
%val = select i1 %cmp, float %b, float %a
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmax_legacy_ult_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_ult_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_max_legacy_f32_e32 v0, v0, v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_ult_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_nge_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ult float %a, %b
|
||||
%val = select i1 %cmp, float %b, float %a
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmax_legacy_oge_f32_fneg_lhs(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_oge_f32_fneg_lhs:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_max_legacy_f32_e64 v0, -v0, v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_oge_f32_fneg_lhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_ge_f32_e64 s[4:5], -v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v1, -v0, s[4:5]
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.neg = fneg float %a
|
||||
%cmp = fcmp oge float %a.neg, %b
|
||||
%val = select i1 %cmp, float %a.neg, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmax_legacy_oge_f32_fneg_rhs(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_oge_f32_fneg_rhs:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_max_legacy_f32_e64 v0, v0, -v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_oge_f32_fneg_rhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_ge_f32_e64 s[4:5], v0, -v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, -v1, v0, s[4:5]
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%b.neg = fneg float %b
|
||||
%cmp = fcmp oge float %a, %b.neg
|
||||
%val = select i1 %cmp, float %a, float %b.neg
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fcmp_select_ord(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fcmp_select_ord:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fcmp_select_ord:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ord float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmax_legacy_ule_f32_multi_use(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_ule_f32_multi_use:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_gt_f32_e32 vcc, v0, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||||
; GFX6-NEXT: s_mov_b32 m0, -1
|
||||
; GFX6-NEXT: ds_write_b32 v0, v1
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_ule_f32_multi_use:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||||
; GFX8-NEXT: s_mov_b32 m0, -1
|
||||
; GFX8-NEXT: ds_write_b32 v0, v1
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ogt float %a, %b
|
||||
%val0 = select i1 %cmp, float %a, float %b
|
||||
%val1 = zext i1 %cmp to i32
|
||||
store i32 %val1, i32 addrspace(3)* undef
|
||||
ret float %val0
|
||||
}
|
||||
|
||||
define double @v_test_fmax_legacy_ult_f64(double %a, double %b) {
|
||||
; GFX6-LABEL: v_test_fmax_legacy_ult_f64:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_nge_f64_e32 vcc, v[0:1], v[2:3]
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmax_legacy_ult_f64:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_nge_f64_e32 vcc, v[0:1], v[2:3]
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ult double %a, %b
|
||||
%val = select i1 %cmp, double %b, double %a
|
||||
ret double %val
|
||||
}
|
|
@ -0,0 +1,384 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX6 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
|
||||
|
||||
; TODO: Merge with DAG test
|
||||
|
||||
define float @v_test_fmin_legacy_ole_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_ole_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_min_legacy_f32_e32 v0, v0, v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_ole_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_le_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ole float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmin_legacy_olt_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_olt_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_min_legacy_f32_e32 v0, v0, v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_olt_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp olt float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmin_legacy_ule_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_ule_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_min_legacy_f32_e32 v0, v1, v0
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_ule_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ule float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmin_legacy_ult_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_ult_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_min_legacy_f32_e32 v0, v1, v0
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_ult_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_nge_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ult float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmin_legacy_ogt_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_ogt_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_min_legacy_f32_e32 v0, v1, v0
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_ogt_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ogt float %a, %b
|
||||
%val = select i1 %cmp, float %b, float %a
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmin_legacy_oge_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_oge_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_min_legacy_f32_e32 v0, v1, v0
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_oge_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp oge float %a, %b
|
||||
%val = select i1 %cmp, float %b, float %a
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmin_legacy_uge_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_uge_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_min_legacy_f32_e32 v0, v0, v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_uge_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp uge float %a, %b
|
||||
%val = select i1 %cmp, float %b, float %a
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmin_legacy_ugt_f32(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_ugt_f32:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_min_legacy_f32_e32 v0, v0, v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_ugt_f32:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_nle_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ugt float %a, %b
|
||||
%val = select i1 %cmp, float %b, float %a
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmin_legacy_ole_f32_fneg_lhs(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_ole_f32_fneg_lhs:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_min_legacy_f32_e64 v0, -v0, v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_ole_f32_fneg_lhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_le_f32_e64 s[4:5], -v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v1, -v0, s[4:5]
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%a.neg = fneg float %a
|
||||
%cmp = fcmp ole float %a.neg, %b
|
||||
%val = select i1 %cmp, float %a.neg, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmin_legacy_ole_f32_fneg_rhs(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_ole_f32_fneg_rhs:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_min_legacy_f32_e64 v0, v0, -v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_ole_f32_fneg_rhs:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_le_f32_e64 s[4:5], v0, -v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, -v1, v0, s[4:5]
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%b.neg = fneg float %b
|
||||
%cmp = fcmp ole float %a, %b.neg
|
||||
%val = select i1 %cmp, float %a, float %b.neg
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fmin_legacy_ule_f32_multi_use(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_ule_f32_multi_use:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||||
; GFX6-NEXT: s_mov_b32 m0, -1
|
||||
; GFX6-NEXT: ds_write_b32 v0, v1
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_ule_f32_multi_use:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||||
; GFX8-NEXT: s_mov_b32 m0, -1
|
||||
; GFX8-NEXT: ds_write_b32 v0, v1
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ule float %a, %b
|
||||
%val0 = select i1 %cmp, float %a, float %b
|
||||
%val1 = zext i1 %cmp to i32
|
||||
store i32 %val1, i32 addrspace(3)* undef
|
||||
ret float %val0
|
||||
}
|
||||
|
||||
define double @v_test_fmin_legacy_ole_f64(double %a, double %b) {
|
||||
; GFX6-LABEL: v_test_fmin_legacy_ole_f64:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_le_f64_e32 vcc, v[0:1], v[2:3]
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fmin_legacy_ole_f64:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_le_f64_e32 vcc, v[0:1], v[2:3]
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ole double %a, %b
|
||||
%val = select i1 %cmp, double %a, double %b
|
||||
ret double %val
|
||||
}
|
||||
|
||||
define float @v_test_fcmp_select_oeq(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fcmp_select_oeq:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fcmp_select_oeq:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp oeq float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fcmp_select_one(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fcmp_select_one:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_lg_f32_e32 vcc, v0, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fcmp_select_one:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_lg_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp one float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fcmp_select_ord(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fcmp_select_ord:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fcmp_select_ord:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ord float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fcmp_select_uno(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fcmp_select_uno:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fcmp_select_uno:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp uno float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fcmp_select_ueq(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fcmp_select_ueq:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_nlg_f32_e32 vcc, v0, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fcmp_select_ueq:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_nlg_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp ueq float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fcmp_select_une(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fcmp_select_une:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_neq_f32_e32 vcc, v0, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fcmp_select_une:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_neq_f32_e32 vcc, v0, v1
|
||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp une float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fcmp_select_true(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fcmp_select_true:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fcmp_select_true:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp true float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
||||
|
||||
define float @v_test_fcmp_select_false(float %a, float %b) {
|
||||
; GFX6-LABEL: v_test_fcmp_select_false:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v1
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: v_test_fcmp_select_false:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, v1
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%cmp = fcmp false float %a, %b
|
||||
%val = select i1 %cmp, float %a, float %b
|
||||
ret float %val
|
||||
}
|
Loading…
Reference in New Issue