forked from OSchip/llvm-project
[AMDGPU] Improve liveness copying in si-optimize-exec-masking-pre-ra
Further improve liveness copying for CC register post optimization by mirroring live internal splits. The fixes a bug in register allocation when CC register liveness is extended across a branches instead of split. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D129557
This commit is contained in:
parent
9e6d1f4b5d
commit
547e3cba7d
|
@ -184,6 +184,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
|
|||
if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And))
|
||||
return false;
|
||||
|
||||
// Cannot safely mirror live intervals with PHI nodes, so check for these
|
||||
// before optimization.
|
||||
SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
|
||||
LiveInterval *SelLI = &LIS->getInterval(SelReg);
|
||||
if (llvm::any_of(SelLI->vnis(),
|
||||
[](const VNInfo *VNI) {
|
||||
return VNI->isPHIDef();
|
||||
}))
|
||||
return false;
|
||||
|
||||
// TODO: Guard against implicit def operands?
|
||||
LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
|
||||
<< *And);
|
||||
|
@ -204,31 +214,34 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
|
|||
|
||||
LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
|
||||
|
||||
SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
|
||||
SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
|
||||
|
||||
LiveInterval *CmpLI =
|
||||
CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
|
||||
LiveInterval *SelLI =
|
||||
SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr;
|
||||
|
||||
// Update live intervals for CCReg before potentially removing CmpReg/SelReg,
|
||||
// and their associated liveness information.
|
||||
SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
|
||||
if (CCReg.isVirtual()) {
|
||||
// Note: this ignores that SelLI might have multiple internal values
|
||||
// or splits and simply extends the live range to cover all cases
|
||||
// where the result of the v_cndmask_b32 was live (e.g. loops).
|
||||
// This could yield worse register allocation in rare edge cases.
|
||||
SlotIndex EndIdx = AndIdx.getRegSlot();
|
||||
if (SelLI && SelLI->endIndex() > EndIdx && SelLI->endIndex().isBlock())
|
||||
EndIdx = SelLI->endIndex();
|
||||
// Apply live ranges from SelLI to CCReg potentially matching splits
|
||||
// and extending to loop boundaries.
|
||||
|
||||
auto applyLiveRanges = [&](LiveRange &Dst, VNInfo *VNI) {
|
||||
// Copy live ranges from SelLI, adjusting start and end as required
|
||||
auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot());
|
||||
assert(DefSegment != SelLI->end() &&
|
||||
"No live interval segment covering definition?");
|
||||
for (auto I = DefSegment; I != SelLI->end(); ++I) {
|
||||
SlotIndex Start = I->start < SelIdx.getRegSlot() ?
|
||||
SelIdx.getRegSlot() : I->start;
|
||||
SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ?
|
||||
I->end : AndIdx.getRegSlot();
|
||||
Dst.addSegment(LiveRange::Segment(Start, End, VNI));
|
||||
}
|
||||
// If SelLI does not cover AndIdx (because Cmp killed Sel) then extend.
|
||||
if (!SelLI->getSegmentContaining(AndIdx.getRegSlot()))
|
||||
Dst.addSegment(LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI));
|
||||
};
|
||||
|
||||
LiveInterval &CCLI = LIS->getInterval(CCReg);
|
||||
auto CCQ = CCLI.Query(SelIdx.getRegSlot());
|
||||
if (CCQ.valueIn()) {
|
||||
CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(),
|
||||
EndIdx, CCQ.valueIn()));
|
||||
}
|
||||
if (CCQ.valueIn())
|
||||
applyLiveRanges(CCLI, CCQ.valueIn());
|
||||
|
||||
if (CC->getSubReg()) {
|
||||
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg());
|
||||
|
@ -237,10 +250,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
|
|||
Allocator, Mask,
|
||||
[=](LiveInterval::SubRange &SR) {
|
||||
auto CCQS = SR.Query(SelIdx.getRegSlot());
|
||||
if (CCQS.valueIn()) {
|
||||
SR.addSegment(LiveRange::Segment(
|
||||
SelIdx.getRegSlot(), EndIdx, CCQS.valueIn()));
|
||||
}
|
||||
if (CCQS.valueIn())
|
||||
applyLiveRanges(SR, CCQS.valueIn());
|
||||
},
|
||||
*LIS->getSlotIndexes(), *TRI);
|
||||
CCLI.removeEmptySubRanges();
|
||||
|
@ -253,7 +264,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
|
|||
|
||||
// Try to remove compare. Cmp value should not used in between of cmp
|
||||
// and s_and_b64 if VCC or just unused if any other register.
|
||||
if ((CmpReg.isVirtual() && CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
|
||||
LiveInterval *CmpLI = CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
|
||||
if ((CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
|
||||
(CmpReg == Register(CondReg) &&
|
||||
std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
|
||||
[&](const MachineInstr &MI) {
|
||||
|
@ -266,18 +278,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
|
|||
Cmp->eraseFromParent();
|
||||
|
||||
// Try to remove v_cndmask_b32.
|
||||
if (SelLI) {
|
||||
// Kill status must be checked before shrinking the live range.
|
||||
bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill();
|
||||
LIS->shrinkToUses(SelLI);
|
||||
bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
|
||||
if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {
|
||||
LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
|
||||
// Kill status must be checked before shrinking the live range.
|
||||
bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill();
|
||||
LIS->shrinkToUses(SelLI);
|
||||
bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
|
||||
if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {
|
||||
LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
|
||||
|
||||
LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
|
||||
LIS->RemoveMachineInstrFromMaps(*Sel);
|
||||
Sel->eraseFromParent();
|
||||
}
|
||||
LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
|
||||
LIS->RemoveMachineInstrFromMaps(*Sel);
|
||||
Sel->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,293 @@
|
|||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -start-before=machine-scheduler -stop-after=virtregrewriter,0 -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
# If optimize-exec-mask-pre-ra over approximates live intervals (not replicating splits)
|
||||
# then this triggers a register allocation failure.
|
||||
|
||||
# CHECK-LABEL: name: test
|
||||
|
||||
name: test
|
||||
alignment: 1
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sreg_32, preferred-register: '$vcc_lo' }
|
||||
- { id: 1, class: sreg_32, preferred-register: '$vcc_lo' }
|
||||
- { id: 2, class: sreg_32_xm0_xexec, preferred-register: '$vcc_lo' }
|
||||
liveins:
|
||||
- { reg: '$sgpr4_sgpr5', virtual-reg: '%3' }
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $vgpr0, $sgpr4_sgpr5
|
||||
|
||||
%3:sgpr_64 = COPY $sgpr4_sgpr5
|
||||
%4:vgpr_32 = COPY $vgpr0
|
||||
%5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 16, 0
|
||||
S_BITCMP1_B32 %5, 0, implicit-def $scc
|
||||
%6:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
|
||||
%7:sreg_32 = S_MOV_B32 -1
|
||||
%8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 136, 0
|
||||
S_CBRANCH_SCC1 %bb.2, implicit undef $scc
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
%9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 8, 0
|
||||
%10:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 24, 0
|
||||
%11:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 40, 0
|
||||
%12:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 48, 0
|
||||
%13:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 64, 0
|
||||
S_BITCMP1_B32 %11, 0, implicit-def $scc
|
||||
%14:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
|
||||
S_BITCMP1_B32 %13, 0, implicit-def $scc
|
||||
%15:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
|
||||
S_BITCMP1_B32 %13, 8, implicit-def $scc
|
||||
%2:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
|
||||
%16:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 72, 0
|
||||
%17:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 80, 0
|
||||
S_BITCMP1_B32 %17, 0, implicit-def $scc
|
||||
%18:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
|
||||
S_BITCMP1_B32 %17, 8, implicit-def $scc
|
||||
%19:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
|
||||
%20:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 88, 0
|
||||
%21:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 104, 0
|
||||
%22:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 120, 0
|
||||
S_BITCMP1_B32 %22, 0, implicit-def $scc
|
||||
%23:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
|
||||
%24:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 128, 0
|
||||
%25:sreg_64 = S_MOV_B64 0
|
||||
%26:sreg_64_xexec = S_LOAD_DWORDX2_IMM %25, 0, 0
|
||||
%27:sreg_64 = S_MOV_B64_IMM_PSEUDO 4652218415073722368
|
||||
%28:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
|
||||
%29:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %14, implicit $exec
|
||||
%30:sreg_64 = S_MOV_B64_IMM_PSEUDO 4358002977218854975
|
||||
undef %31.sub1:sreg_64 = S_MOV_B32 -1064252416
|
||||
%32:sreg_32 = S_OR_B32 %19, %18, implicit-def dead $scc
|
||||
undef %33.sub1:sreg_64 = S_MOV_B32 2146435072
|
||||
%34:sreg_64 = S_MOV_B64_IMM_PSEUDO 4592094252754343337
|
||||
%35:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593089322246397463
|
||||
%36:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593150332132823898
|
||||
%37:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593971714784152002
|
||||
%38:sreg_64 = S_MOV_B64_IMM_PSEUDO 4594710915293070409
|
||||
%39:sreg_64 = S_MOV_B64_IMM_PSEUDO 4595718710613720112
|
||||
%40:sreg_64 = S_MOV_B64_IMM_PSEUDO 4597174419628462798
|
||||
%41:sreg_64 = S_MOV_B64_IMM_PSEUDO 4598818590920614106
|
||||
%42:sreg_64 = S_MOV_B64_IMM_PSEUDO 4600877379321698716
|
||||
%43:sreg_64 = S_MOV_B64_IMM_PSEUDO 4604180019048437077
|
||||
undef %44.sub1:sreg_64 = S_MOV_B32 -1075489451
|
||||
%45:sreg_64 = S_MOV_B64_IMM_PSEUDO 4609176140021203710
|
||||
undef %46.sub1:sreg_64 = S_MOV_B32 -1132807010
|
||||
%47:sreg_64 = S_MOV_B64_IMM_PSEUDO 4508818957471820556
|
||||
%48:sreg_64 = S_MOV_B64_IMM_PSEUDO 4493147761815702327
|
||||
%49:sreg_64 = S_MOV_B64_IMM_PSEUDO 4523617260404727396
|
||||
%50:sreg_64 = S_MOV_B64_IMM_PSEUDO 4537941333260232368
|
||||
%51:sreg_64 = S_MOV_B64_IMM_PSEUDO 4551452160460988270
|
||||
%52:sreg_64 = S_MOV_B64_IMM_PSEUDO 4564047942395279280
|
||||
%53:sreg_64 = S_MOV_B64_IMM_PSEUDO 4575957461383652130
|
||||
%54:sreg_64 = S_MOV_B64_IMM_PSEUDO 4586165620538933921
|
||||
%55:sreg_64 = S_MOV_B64_IMM_PSEUDO 4595172819793696017
|
||||
%56:sreg_64 = S_MOV_B64_IMM_PSEUDO 4602678819172646923
|
||||
undef %57.sub1:sreg_64 = S_MOV_B32 -1101341163
|
||||
%7:sreg_32 = IMPLICIT_DEF
|
||||
%58:sreg_32 = IMPLICIT_DEF
|
||||
%59:sreg_32 = COPY %27.sub0
|
||||
%60:vreg_64 = COPY %28
|
||||
%61:vreg_64 = COPY %28
|
||||
%62:vreg_64 = COPY %28
|
||||
%63:vreg_64 = COPY %28
|
||||
%64:vreg_64 = COPY %28
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.2:
|
||||
%65:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
|
||||
%66:sreg_32 = S_AND_B32 %65, %7, implicit-def dead $scc
|
||||
$exec_lo = S_MOV_B32_term %66
|
||||
S_CBRANCH_EXECZ %bb.18, implicit $exec
|
||||
S_BRANCH %bb.5
|
||||
|
||||
bb.3:
|
||||
%67:sreg_32 = S_AND_B32 $exec_lo, %6, implicit-def dead $scc
|
||||
$vcc_lo = COPY %67
|
||||
%58:sreg_32 = S_OR_B32 %58, $exec_lo, implicit-def dead $scc
|
||||
S_CBRANCH_VCCNZ %bb.7, implicit killed $vcc
|
||||
|
||||
bb.4:
|
||||
%64:vreg_64 = IMPLICIT_DEF
|
||||
%63:vreg_64 = IMPLICIT_DEF
|
||||
%62:vreg_64 = IMPLICIT_DEF
|
||||
%61:vreg_64 = IMPLICIT_DEF
|
||||
%60:vreg_64 = IMPLICIT_DEF
|
||||
%28:vreg_64 = IMPLICIT_DEF
|
||||
%68:sreg_32 = S_MOV_B32 -1
|
||||
S_BRANCH %bb.9
|
||||
|
||||
bb.5:
|
||||
S_CBRANCH_SCC1 %bb.18, implicit undef $scc
|
||||
|
||||
bb.6:
|
||||
%69:sreg_32_xm0_xexec = S_XOR_B32 %6, -1, implicit-def dead $scc
|
||||
%70:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 96, 0
|
||||
S_BITCMP1_B32 %70, 0, implicit-def $scc
|
||||
%71:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
|
||||
S_BITCMP1_B32 %8.sub1, 0, implicit-def $scc
|
||||
%72:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
|
||||
%73:sreg_32_xm0_xexec = S_XOR_B32 %72, -1, implicit-def dead $scc
|
||||
%74:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %73, implicit $exec
|
||||
%75:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %71, implicit $exec
|
||||
%76:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %69, implicit $exec
|
||||
S_BRANCH %bb.14
|
||||
|
||||
bb.7:
|
||||
%77:vreg_64 = COPY %10.sub0_sub1
|
||||
%78:vreg_64 = FLAT_LOAD_DWORDX2 %77, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%79:vreg_64 = COPY %10.sub2_sub3
|
||||
%80:vreg_64 = FLAT_LOAD_DWORDX2 %79, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%81:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %29, implicit $exec
|
||||
$vcc_lo = S_AND_B32 $exec_lo, %81, implicit-def dead $scc
|
||||
%82:sreg_64 = COPY %12.sub0_sub1
|
||||
S_CBRANCH_VCCNZ %bb.10, implicit killed $vcc
|
||||
S_BRANCH %bb.8
|
||||
|
||||
bb.8:
|
||||
%82:sreg_64 = S_MOV_B64 0
|
||||
S_BRANCH %bb.10
|
||||
|
||||
bb.9:
|
||||
%83:sreg_32 = S_XOR_B32 %68, -1, implicit-def dead $scc
|
||||
%84:sreg_32 = S_AND_B32 $exec_lo, %58, implicit-def $scc
|
||||
%59:sreg_32 = S_OR_B32 %84, %59, implicit-def $scc
|
||||
%4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%85:sreg_32 = S_ANDN2_B32 %7, $exec_lo, implicit-def dead $scc
|
||||
%86:sreg_32 = S_AND_B32 %83, $exec_lo, implicit-def dead $scc
|
||||
%7:sreg_32 = S_OR_B32 %85, %86, implicit-def dead $scc
|
||||
$exec_lo = S_ANDN2_B32_term $exec_lo, %59, implicit-def $scc
|
||||
S_CBRANCH_EXECNZ %bb.3, implicit $exec
|
||||
S_BRANCH %bb.19
|
||||
|
||||
bb.10:
|
||||
%87:sreg_64_xexec = S_LOAD_DWORDX2_IMM %26, 16, 0
|
||||
undef %88.sub1:sreg_64 = S_AND_B32 %87.sub1, 2147483647, implicit-def dead $scc
|
||||
%89:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e64 2, %87, 0, 0, implicit $mode, implicit $exec
|
||||
%90:vreg_64 = V_CVT_F64_I32_e32 %89, implicit $mode, implicit $exec
|
||||
%91:vreg_64 = nofpexcept V_FMA_F64_e64 0, 0, 0, %30, 0, %87, 0, 0, implicit $mode, implicit $exec
|
||||
%0:sreg_32 = nofpexcept V_CMP_LT_F64_e64 0, %27, 0, %91, 0, implicit $mode, implicit $exec
|
||||
%31.sub0:sreg_64 = COPY %27.sub0
|
||||
%1:sreg_32 = nofpexcept V_CMP_GT_F64_e64 0, %31, 0, %90, 0, implicit $mode, implicit $exec
|
||||
S_CBRANCH_SCC0 %bb.12, implicit undef $scc
|
||||
|
||||
bb.11:
|
||||
%92:sreg_32_xm0_xexec = S_OR_B32 %1, %0, implicit-def dead $scc
|
||||
undef %93.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %21.sub3, 0, 0, %92, implicit $exec
|
||||
%93.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %21.sub2, 0, 0, %92, implicit $exec
|
||||
S_BRANCH %bb.13
|
||||
|
||||
bb.12:
|
||||
%93:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
|
||||
|
||||
bb.13:
|
||||
%88.sub0:sreg_64 = COPY %87.sub0
|
||||
%94:vgpr_32 = COPY %16.sub0
|
||||
%95:vgpr_32 = V_CNDMASK_B32_e64 0, %82.sub0, 0, %94, %2, implicit $exec
|
||||
%96:vgpr_32 = COPY %16.sub1
|
||||
%97:vgpr_32 = V_CNDMASK_B32_e64 0, %82.sub1, 0, %96, %2, implicit $exec
|
||||
%98:vgpr_32 = V_CNDMASK_B32_e64 0, %97, 0, 2146959360, %18, implicit $exec
|
||||
dead %99:sreg_32 = S_AND_B32 %32, $exec_lo, implicit-def $scc
|
||||
%100:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc
|
||||
undef %101.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %95, 0, 0, %100, implicit $exec
|
||||
%101.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %98, 0, 0, %19, implicit $exec
|
||||
%64:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %64, 0, %101, 0, 0, implicit $mode, implicit $exec
|
||||
%63:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %63, 0, %21.sub0_sub1, 0, 0, implicit $mode, implicit $exec
|
||||
%62:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %62, 0, %12.sub2_sub3, 0, 0, implicit $mode, implicit $exec
|
||||
%33.sub0:sreg_64 = COPY %27.sub0
|
||||
%102:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F64_e64 0, %33, 0, %88, 0, implicit $mode, implicit $exec
|
||||
%103:sreg_32 = nofpexcept V_CMP_EQ_F64_e64 0, 0, 0, %87, 0, implicit $mode, implicit $exec
|
||||
%104:sreg_32 = S_XOR_B32 %15, %103, implicit-def dead $scc
|
||||
dead %105:sreg_32 = S_AND_B32 %104, $exec_lo, implicit-def $scc
|
||||
%106:sgpr_32 = S_CSELECT_B32 0, 2146435072, implicit killed $scc
|
||||
%107:vgpr_32 = V_CNDMASK_B32_e64 0, %93.sub1, 0, %106, %102, implicit $exec
|
||||
undef %108.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %107, 0, 0, %23, implicit $exec
|
||||
%109:sreg_32_xm0_xexec = S_OR_B32 %23, %102, implicit-def dead $scc
|
||||
%108.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %93.sub0, 0, 0, %109, implicit $exec
|
||||
%61:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %61, 0, %108, 0, 0, implicit $mode, implicit $exec
|
||||
%110:vreg_64 = nofpexcept V_FMA_F64_e64 0, %78, 0, %35, 0, %34, 0, 0, implicit $mode, implicit $exec
|
||||
%111:vreg_64 = nofpexcept V_FMA_F64_e64 0, %110, 0, 0, 0, %36, 0, 0, implicit $mode, implicit $exec
|
||||
%112:vreg_64 = nofpexcept V_FMA_F64_e64 0, %111, 0, 0, 0, %37, 0, 0, implicit $mode, implicit $exec
|
||||
%113:vreg_64 = nofpexcept V_FMA_F64_e64 0, %112, 0, 0, 0, %38, 0, 0, implicit $mode, implicit $exec
|
||||
%114:vreg_64 = nofpexcept V_FMA_F64_e64 0, %113, 0, 0, 0, %39, 0, 0, implicit $mode, implicit $exec
|
||||
%115:vreg_64 = nofpexcept V_FMA_F64_e64 0, %114, 0, 0, 0, %40, 0, 0, implicit $mode, implicit $exec
|
||||
%116:vreg_64 = nofpexcept V_FMA_F64_e64 0, %115, 0, 0, 0, %41, 0, 0, implicit $mode, implicit $exec
|
||||
%117:vreg_64 = nofpexcept V_FMA_F64_e64 0, %116, 0, 0, 0, %42, 0, 0, implicit $mode, implicit $exec
|
||||
%118:vreg_64 = nofpexcept V_ADD_F64_e64 0, %117, 0, %43, 0, 0, implicit $mode, implicit $exec
|
||||
%44.sub0:sreg_64 = COPY %43.sub0
|
||||
%119:vreg_64 = nofpexcept V_ADD_F64_e64 0, %118, 0, %44, 0, 0, implicit $mode, implicit $exec
|
||||
%120:vreg_64 = nofpexcept V_ADD_F64_e64 0, %24, 0, %119, 0, 0, implicit $mode, implicit $exec
|
||||
%121:vreg_64 = nofpexcept V_MUL_F64_e64 0, %9, 0, %120, 0, 0, implicit $mode, implicit $exec
|
||||
%122:vreg_64 = nofpexcept V_MUL_F64_e64 0, %121, 0, %45, 0, 0, implicit $mode, implicit $exec
|
||||
%46.sub0:sreg_64 = COPY %30.sub0
|
||||
%123:vreg_64 = nofpexcept V_FMA_F64_e64 0, %122, 0, %46, 0, %20, 0, 0, implicit $mode, implicit $exec
|
||||
%124:vreg_64 = nofpexcept V_FMA_F64_e64 0, %123, 0, %48, 0, %47, 0, 0, implicit $mode, implicit $exec
|
||||
%125:vreg_64 = nofpexcept V_FMA_F64_e64 0, %124, 0, 0, 0, %49, 0, 0, implicit $mode, implicit $exec
|
||||
%126:vreg_64 = nofpexcept V_FMA_F64_e64 0, %125, 0, 0, 0, %50, 0, 0, implicit $mode, implicit $exec
|
||||
%127:vreg_64 = nofpexcept V_FMA_F64_e64 0, %126, 0, 0, 0, %51, 0, 0, implicit $mode, implicit $exec
|
||||
%128:vreg_64 = nofpexcept V_FMA_F64_e64 0, %127, 0, 0, 0, %52, 0, 0, implicit $mode, implicit $exec
|
||||
%129:vreg_64 = nofpexcept V_FMA_F64_e64 0, %128, 0, 0, 0, %53, 0, 0, implicit $mode, implicit $exec
|
||||
%130:vreg_64 = nofpexcept V_FMA_F64_e64 0, %129, 0, 0, 0, %54, 0, 0, implicit $mode, implicit $exec
|
||||
%131:vreg_64 = nofpexcept V_FMA_F64_e64 0, %130, 0, 0, 0, %55, 0, 0, implicit $mode, implicit $exec
|
||||
%132:vreg_64 = nofpexcept V_FMA_F64_e64 0, %131, 0, 0, 0, %56, 0, 0, implicit $mode, implicit $exec
|
||||
%60:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %60, 0, %132, 0, 0, implicit $mode, implicit $exec
|
||||
%133:vreg_64 = nofpexcept V_FMA_F64_e64 0, %80, 0, 0, 0, %36, 0, 0, implicit $mode, implicit $exec
|
||||
%134:vreg_64 = nofpexcept V_FMA_F64_e64 0, %133, 0, 0, 0, %37, 0, 0, implicit $mode, implicit $exec
|
||||
%135:vreg_64 = nofpexcept V_FMA_F64_e64 0, %134, 0, 0, 0, %38, 0, 0, implicit $mode, implicit $exec
|
||||
%136:vreg_64 = nofpexcept V_FMA_F64_e64 0, %135, 0, 0, 0, %39, 0, 0, implicit $mode, implicit $exec
|
||||
%137:vreg_64 = nofpexcept V_FMA_F64_e64 0, %136, 0, 0, 0, %40, 0, 0, implicit $mode, implicit $exec
|
||||
%138:vreg_64 = nofpexcept V_FMA_F64_e64 0, %137, 0, 0, 0, %41, 0, 0, implicit $mode, implicit $exec
|
||||
%139:vreg_64 = nofpexcept V_FMA_F64_e64 0, %138, 0, 0, 0, %42, 0, 0, implicit $mode, implicit $exec
|
||||
%140:vreg_64 = nofpexcept V_MUL_F64_e64 0, %139, 0, %45, 0, 0, implicit $mode, implicit $exec
|
||||
%57.sub0:sreg_64 = COPY %48.sub0
|
||||
%141:vreg_64 = nofpexcept V_FMA_F64_e64 0, %140, 0, %57, 0, %47, 0, 0, implicit $mode, implicit $exec
|
||||
%142:vreg_64 = nofpexcept V_FMA_F64_e64 0, %141, 0, 0, 0, %49, 0, 0, implicit $mode, implicit $exec
|
||||
%143:vreg_64 = nofpexcept V_FMA_F64_e64 0, %142, 0, 0, 0, %50, 0, 0, implicit $mode, implicit $exec
|
||||
%144:vreg_64 = nofpexcept V_FMA_F64_e64 0, %143, 0, 0, 0, %51, 0, 0, implicit $mode, implicit $exec
|
||||
%145:vreg_64 = nofpexcept V_FMA_F64_e64 0, %144, 0, 0, 0, %52, 0, 0, implicit $mode, implicit $exec
|
||||
%146:vreg_64 = nofpexcept V_FMA_F64_e64 0, %145, 0, 0, 0, %53, 0, 0, implicit $mode, implicit $exec
|
||||
%147:vreg_64 = nofpexcept V_FMA_F64_e64 0, %146, 0, 0, 0, %54, 0, 0, implicit $mode, implicit $exec
|
||||
%148:vreg_64 = nofpexcept V_FMA_F64_e64 0, %147, 0, 0, 0, %55, 0, 0, implicit $mode, implicit $exec
|
||||
%149:vreg_64 = nofpexcept V_FMA_F64_e64 0, %148, 0, 0, 0, %56, 0, 0, implicit $mode, implicit $exec
|
||||
%28:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %28, 0, %149, 0, 0, implicit $mode, implicit $exec
|
||||
%58:sreg_32 = V_CMP_LE_U32_e64 %8.sub0, %4, implicit $exec
|
||||
%68:sreg_32 = S_MOV_B32 0
|
||||
S_BRANCH %bb.9
|
||||
|
||||
bb.14:
|
||||
S_CBRANCH_SCC1 %bb.17, implicit undef $scc
|
||||
S_BRANCH %bb.15
|
||||
|
||||
bb.15:
|
||||
%150:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %74, implicit $exec
|
||||
$vcc_lo = S_AND_B32 $exec_lo, %150, implicit-def dead $scc
|
||||
S_CBRANCH_VCCNZ %bb.17, implicit killed $vcc
|
||||
S_BRANCH %bb.16
|
||||
|
||||
bb.16:
|
||||
%151:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %75, implicit $exec
|
||||
$vcc_lo = S_AND_B32 $exec_lo, %151, implicit-def dead $scc
|
||||
S_CBRANCH_VCCNZ %bb.16, implicit killed $vcc
|
||||
S_BRANCH %bb.17
|
||||
|
||||
bb.17:
|
||||
%152:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %76, implicit $exec
|
||||
$vcc_lo = S_AND_B32 $exec_lo, %152, implicit-def dead $scc
|
||||
S_CBRANCH_VCCNZ %bb.14, implicit killed $vcc
|
||||
S_BRANCH %bb.18
|
||||
|
||||
bb.18:
|
||||
$exec_lo = S_OR_B32 $exec_lo, %65, implicit-def $scc
|
||||
S_ENDPGM 0
|
||||
|
||||
bb.19:
|
||||
$exec_lo = S_OR_B32 $exec_lo, %59, implicit-def $scc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
...
|
Loading…
Reference in New Issue