diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp index 6909cac7b484..2ba81c7cfe22 100644 --- a/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -30,6 +30,7 @@ #include #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -73,7 +74,7 @@ private: /// Given a bitmask \p UsedLanes for the used lanes on a def output of a /// COPY-like instruction determine the lanes used on the use operands /// and call addUsedLanesOnOperand() for them. - void transferUsedLanesStep(const MachineOperand &Def, LaneBitmask UsedLanes); + void transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes); /// Given a use regiser operand \p Use and a mask of defined lanes, check /// if the operand belongs to a lowersToCopies() instruction, transfer the @@ -87,9 +88,21 @@ private: LaneBitmask transferDefinedLanes(const MachineOperand &Def, unsigned OpNum, LaneBitmask DefinedLanes) const; + /// Given a mask \p UsedLanes used from the output of instruction \p MI + /// determine which lanes are used from operand \p MO of this instruction. + LaneBitmask transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes, + const MachineOperand &MO) const; + + bool runOnce(MachineFunction &MF); + LaneBitmask determineInitialDefinedLanes(unsigned Reg); LaneBitmask determineInitialUsedLanes(unsigned Reg); + bool isUndefRegAtInput(const MachineOperand &MO, + const VRegInfo &RegInfo) const; + + bool isUndefInput(const MachineOperand &MO, bool *CrossCopy) const; + const MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; @@ -201,39 +214,40 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, PutInWorklist(MORegIdx); } -void DetectDeadLanes::transferUsedLanesStep(const MachineOperand &Def, +void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes) { - const MachineInstr &MI = *Def.getParent(); + for (const MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO); + addUsedLanesOnOperand(MO, UsedOnMO); + } +} + +LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI, + LaneBitmask UsedLanes, + const MachineOperand &MO) const { + unsigned OpNum = MI.getOperandNo(&MO); + assert(lowersToCopies(MI) && DefinedByCopy[ + TargetRegisterInfo::virtReg2Index(MI.getOperand(0).getReg())]); + switch (MI.getOpcode()) { case TargetOpcode::COPY: case TargetOpcode::PHI: - for (const MachineOperand &MO : MI.uses()) { - if (MO.isReg() && MO.isUse()) - addUsedLanesOnOperand(MO, UsedLanes); - } - break; + return UsedLanes; case TargetOpcode::REG_SEQUENCE: { - // Note: This loop makes the conservative assumption that subregister - // indices do not overlap or that we do not know how the overlap is - // resolved when lowering to copies. - for (unsigned I = 1, N = MI.getNumOperands(); I < N; I += 2) { - const MachineOperand &MO = MI.getOperand(I); - unsigned SubIdx = MI.getOperand(I + 1).getImm(); - LaneBitmask MOUsedLanes = - TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes); - - addUsedLanesOnOperand(MO, MOUsedLanes); - } - break; + assert(OpNum % 2 == 1); + unsigned SubIdx = MI.getOperand(OpNum + 1).getImm(); + return TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes); } case TargetOpcode::INSERT_SUBREG: { - const MachineOperand &MO2 = MI.getOperand(2); unsigned SubIdx = MI.getOperand(3).getImm(); LaneBitmask MO2UsedLanes = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes); - addUsedLanesOnOperand(MO2, MO2UsedLanes); + if (OpNum == 2) + return MO2UsedLanes; - const MachineOperand &MO1 = MI.getOperand(1); + const MachineOperand &Def = MI.getOperand(0); unsigned DefReg = Def.getReg(); const TargetRegisterClass *RC = MRI->getRegClass(DefReg); LaneBitmask MO1UsedLanes; @@ -241,16 +255,14 @@ void DetectDeadLanes::transferUsedLanesStep(const MachineOperand &Def, MO1UsedLanes = UsedLanes & ~TRI->getSubRegIndexLaneMask(SubIdx); else MO1UsedLanes = RC->LaneMask; - addUsedLanesOnOperand(MO1, MO1UsedLanes); - break; + + assert(OpNum == 1); + return MO1UsedLanes; } case TargetOpcode::EXTRACT_SUBREG: { - const MachineOperand &MO = MI.getOperand(1); + assert(OpNum == 1); unsigned SubIdx = MI.getOperand(2).getImm(); - LaneBitmask MOUsedLanes = - TRI->composeSubRegIndexLaneMask(SubIdx, UsedLanes); - addUsedLanesOnOperand(MO, MOUsedLanes); - break; + return TRI->composeSubRegIndexLaneMask(SubIdx, UsedLanes); } default: llvm_unreachable("function must be called with COPY-like instruction"); @@ -423,6 +435,8 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) { if (lowersToCopies(UseMI)) { const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg); CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO); + if (CrossCopy) + DEBUG(dbgs() << "Copy accross incompatible classes: " << UseMI); } if (!CrossCopy) @@ -439,6 +453,119 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) { return UsedLanes; } +bool DetectDeadLanes::isUndefRegAtInput(const MachineOperand &MO, + const VRegInfo &RegInfo) const { + unsigned SubReg = MO.getSubReg(); + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); + return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask) == 0; +} + +bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, + bool *CrossCopy) const { + if (!MO.isUse()) + return false; + const MachineInstr &MI = *MO.getParent(); + if (!lowersToCopies(MI)) + return false; + const MachineOperand &Def = MI.getOperand(0); + unsigned DefReg = Def.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + return false; + unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg); + if (!DefinedByCopy.test(DefRegIdx)) + return false; + + const VRegInfo &DefRegInfo = VRegInfos[DefRegIdx]; + LaneBitmask UsedLanes = transferUsedLanes(MI, DefRegInfo.UsedLanes, MO); + if (UsedLanes != 0) + return false; + + unsigned MOReg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(MOReg)) { + const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg); + *CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO); + } + return true; +} + +bool DetectDeadLanes::runOnce(MachineFunction &MF) { + // First pass: Populate defs/uses of vregs with initial values + unsigned NumVirtRegs = MRI->getNumVirtRegs(); + for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + + // Determine used/defined lanes and add copy instructions to worklist. + VRegInfo &Info = VRegInfos[RegIdx]; + Info.DefinedLanes = determineInitialDefinedLanes(Reg); + Info.UsedLanes = determineInitialUsedLanes(Reg); + } + + // Iterate as long as defined lanes/used lanes keep changing. + while (!Worklist.empty()) { + unsigned RegIdx = Worklist.front(); + Worklist.pop_front(); + WorklistMembers.reset(RegIdx); + VRegInfo &Info = VRegInfos[RegIdx]; + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + + // Transfer UsedLanes to operands of DefMI (backwards dataflow). + MachineOperand &Def = *MRI->def_begin(Reg); + const MachineInstr &MI = *Def.getParent(); + transferUsedLanesStep(MI, Info.UsedLanes); + // Transfer DefinedLanes to users of Reg (forward dataflow). + for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) + transferDefinedLanesStep(MO, Info.DefinedLanes); + } + + DEBUG( + dbgs() << "Defined/Used lanes:\n"; + for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + const VRegInfo &Info = VRegInfos[RegIdx]; + dbgs() << PrintReg(Reg, nullptr) + << " Used: " << PrintLaneMask(Info.UsedLanes) + << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; + } + dbgs() << "\n"; + ); + + bool Again = false; + // Mark operands as dead/unused. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + const VRegInfo &RegInfo = VRegInfos[RegIdx]; + if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes == 0) { + DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI); + MO.setIsDead(); + } + if (MO.readsReg()) { + bool CrossCopy = false; + if (isUndefRegAtInput(MO, RegInfo)) { + DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " + << MI); + MO.setIsUndef(); + } else if (isUndefInput(MO, &CrossCopy)) { + DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " + << MI); + MO.setIsUndef(); + if (CrossCopy) + Again = true; + } + } + } + } + } + + return Again; +} + bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) { // Don't bother if we won't track subregister liveness later. This pass is // required for correctness if subregister liveness is enabled because the @@ -458,69 +585,10 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) { WorklistMembers.resize(NumVirtRegs); DefinedByCopy.resize(NumVirtRegs); - // First pass: Populate defs/uses of vregs with initial values - for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); - - // Determine used/defined lanes and add copy instructions to worklist. - VRegInfo &Info = VRegInfos[RegIdx]; - Info.DefinedLanes = determineInitialDefinedLanes(Reg); - Info.UsedLanes = determineInitialUsedLanes(Reg); - } - - // Iterate as long as defined lanes/used lanes keep changing. - while (!Worklist.empty()) { - unsigned RegIdx = Worklist.front(); - Worklist.pop_front(); - WorklistMembers.reset(RegIdx); - VRegInfo &Info = VRegInfos[RegIdx]; - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); - - // Transfer UsedLanes to operands of DefMI (backwards dataflow). - MachineOperand &Def = *MRI->def_begin(Reg); - transferUsedLanesStep(Def, Info.UsedLanes); - // Transfer DefinedLanes to users of Reg (forward dataflow). - for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) - transferDefinedLanesStep(MO, Info.DefinedLanes); - } - - DEBUG( - dbgs() << "Defined/Used lanes:\n"; - for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); - const VRegInfo &Info = VRegInfos[RegIdx]; - dbgs() << PrintReg(Reg, nullptr) - << " Used: " << PrintLaneMask(Info.UsedLanes) - << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; - } - dbgs() << "\n"; - ); - - // Mark operands as dead/unused. - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - for (MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - unsigned SubReg = MO.getSubReg(); - LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); - unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); - const VRegInfo &RegInfo = VRegInfos[RegIdx]; - if (RegInfo.UsedLanes == 0 && MO.isDef() && !MO.isDead()) { - DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI); - MO.setIsDead(); - } - if (((RegInfo.UsedLanes & Mask) == 0 || - (RegInfo.DefinedLanes & Mask) == 0) && MO.readsReg()) { - DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " << MI); - MO.setIsUndef(); - } - } - } - } + bool Again; + do { + Again = runOnce(MF); + } while(Again); DefinedByCopy.clear(); WorklistMembers.clear(); diff --git a/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir index d04b3f13e3f4..8c761298cd2e 100644 --- a/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir +++ b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir @@ -5,6 +5,7 @@ define void @test2() { ret void } define void @test3() { ret void } define void @test4() { ret void } + define void @test5() { ret void } define void @loop0() { ret void } define void @loop1() { ret void } define void @loop2() { ret void } @@ -20,7 +21,7 @@ # CHECK: S_NOP 0, implicit %3:sub1 # CHECK: S_NOP 0, implicit undef %3:sub2 # CHECK: %4 = COPY %3:sub0_sub1 -# CHECK: %5 = COPY %3:sub2_sub3 +# CHECK: %5 = COPY undef %3:sub2_sub3 # CHECK: S_NOP 0, implicit %4:sub0 # CHECK: S_NOP 0, implicit %4:sub1 # CHECK: S_NOP 0, implicit undef %5:sub0 @@ -255,6 +256,25 @@ body: | S_NOP 0, implicit %1 ... --- +# Check that unused inputs are marked as undef, even if the vreg itself is +# used. +# CHECK-LABEL: name: test5 +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: %1 = REG_SEQUENCE undef %0, {{[0-9]+}}, %0, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %1:sub1 +name: test5 +isSSA: true +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + %1 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1 + S_NOP 0, implicit %1:sub1 +... +--- # Check "optimistic" dataflow fixpoint in phi-loops. # CHECK-LABEL: name: loop0 # CHECK: bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll index 1b386ff7d580..4b6f65a77b9a 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll +++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll @@ -6,10 +6,10 @@ target triple="amdgcn--" ; CHECK-LABEL: foobar: ; CHECK: s_load_dword s2, s[0:1], 0x9 ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64 -; CHECK-NEXT: v_cmp_eq_i32_e32 vcc, 0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK: s_and_saveexec_b64 s[2:3], vcc +; CHECK: v_mbcnt_lo_u32_b32_e64 +; CHECK-NEXT: v_cmp_eq_i32_e32 vcc, 0, v0 +; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc ; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3] ; BB0_1: ; CHECK: s_load_dword s0, s[0:1], 0xa