From abda665f5f6b3f569ef28dba660234eee00a5168 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Sun, 25 Jan 2009 03:53:59 +0000 Subject: [PATCH] Teach 2addr pass to be do more commuting. If both uses of a two-address instruction are killed, but the first operand has a use before and after the def, commute if the second operand does not suffer from the same issue. %reg1028 = EXTRACT_SUBREG %reg1027, 1 %reg1029 = MOV8rr %reg1028 %reg1029 = SHR8ri %reg1029, 7, %EFLAGS insert => %reg1030 = MOV8rr %reg1028 %reg1030 = ADD8rr %reg1028, %reg1029, %EFLAGS In this case, it might not be possible to coalesce the second MOV8rr instruction if the first one is coalesced. So it would be profitable to commute it: %reg1028 = EXTRACT_SUBREG %reg1027, 1 %reg1029 = MOV8rr %reg1028 %reg1029 = SHR8ri %reg1029, 7, %EFLAGS insert => %reg1030 = MOV8rr %reg1029 %reg1030 = ADD8rr %reg1029, %reg1028, %EFLAGS llvm-svn: 62954 --- .../lib/CodeGen/TwoAddressInstructionPass.cpp | 110 +++++++++++++++++- llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll | 2 +- .../CodeGen/X86/2008-07-19-movups-spills.ll | 2 +- llvm/test/CodeGen/X86/pmul.ll | 2 +- llvm/test/CodeGen/X86/twoaddr-coalesce.ll | 25 ++++ 5 files changed, 132 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/X86/twoaddr-coalesce.ll diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 071b39954c09..5cf2ffd3771b 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -49,6 +49,7 @@ using namespace llvm; STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); +STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); STATISTIC(NumReMats, "Number of instructions re-materialized"); @@ -70,6 +71,15 @@ namespace { MachineBasicBlock *MBB, unsigned Loc, DenseMap &DistanceMap); + bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, + DenseMap &DistanceMap, + unsigned &LastDef); + + bool isProfitableToCommute(unsigned regB, unsigned regC, + MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Dist, + DenseMap &DistanceMap); + bool CommuteInstruction(MachineBasicBlock::iterator &mi, MachineFunction::iterator &mbbi, unsigned RegC, unsigned Dist, @@ -230,8 +240,6 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); - if (!UseMO.isUse()) - continue; MachineInstr *UseMI = UseMO.getParent(); MachineBasicBlock *UseMBB = UseMI->getParent(); if (UseMBB == MBB) { @@ -255,6 +263,82 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, return MBB == DefMI->getParent(); } +/// NoUseAfterLastDef - Return true if there are no intervening uses between the +/// last instruction in the MBB that defines the specified register and the +/// two-address instruction which is being processed. It also returns the last +/// def location by reference +bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, + MachineBasicBlock *MBB, unsigned Dist, + DenseMap &DistanceMap, + unsigned &LastDef) { + LastDef = 0; + unsigned LastUse = Dist; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), + E = MRI->reg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB) + continue; + DenseMap::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + continue; + if (MO.isUse() && DI->second < LastUse) + LastUse = DI->second; + if (MO.isDef() && DI->second > LastDef) + LastDef = DI->second; + } + + return !(LastUse > LastDef && LastUse < Dist); +} + +/// isProfitableToReMat - Return true if it's potentially profitable to commute +/// the two-address instruction that's being processed. +bool +TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, + MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Dist, DenseMap &DistanceMap) { + // Determine if it's profitable to commute this two address instruction. In + // general, we want no uses between this instruction and the definition of + // the two-address register. + // e.g. + // %reg1028 = EXTRACT_SUBREG %reg1027, 1 + // %reg1029 = MOV8rr %reg1028 + // %reg1029 = SHR8ri %reg1029, 7, %EFLAGS + // insert => %reg1030 = MOV8rr %reg1028 + // %reg1030 = ADD8rr %reg1028, %reg1029, %EFLAGS + // In this case, it might not be possible to coalesce the second MOV8rr + // instruction if the first one is coalesced. So it would be profitable to + // commute it: + // %reg1028 = EXTRACT_SUBREG %reg1027, 1 + // %reg1029 = MOV8rr %reg1028 + // %reg1029 = SHR8ri %reg1029, 7, %EFLAGS + // insert => %reg1030 = MOV8rr %reg1029 + // %reg1030 = ADD8rr %reg1029, %reg1028, %EFLAGS + + if (!MI->killsRegister(regC)) + return false; + + // Ok, we have something like: + // %reg1030 = ADD8rr %reg1028, %reg1029, %EFLAGS + // let's see if it's worth commuting it. + + // If there is a use of regC between its last def (could be livein) and this + // instruction, then bail. + unsigned LastDefC = 0; + if (!NoUseAfterLastDef(regC, MBB, Dist, DistanceMap, LastDefC)) + return false; + + // If there is a use of regB between its last def (could be livein) and this + // instruction, then go ahead and make this transformation. + unsigned LastDefB = 0; + if (!NoUseAfterLastDef(regB, MBB, Dist, DistanceMap, LastDefB)) + return true; + + // Since there are no intervening uses for both registers, then commute + // if the def of regC is closer. Its live interval is shorter. + return LastDefB && LastDefC && LastDefC > LastDefB; +} + /// CommuteInstruction - Commute a two-address instruction and update the basic /// block, distance map, and live variables if needed. Return true if it is /// successful. @@ -419,6 +503,17 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } } + // If it's profitable to commute the instruction, do so. + if (TID.isCommutable() && mi->getNumOperands() >= 3) { + unsigned regC = mi->getOperand(3-si).getReg(); + if (isProfitableToCommute(regB, regC, mi, mbbi, Dist, DistanceMap)) + if (CommuteInstruction(mi, mbbi, regC, Dist, DistanceMap)) { + ++NumAggrCommuted; + ++NumCommuted; + regB = regC; + } + } + InstructionRearranged: const TargetRegisterClass* rc = MRI->getRegClass(regA); MachineInstr *DefMI = MRI->getVRegDef(regB); @@ -436,7 +531,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc); } - MachineBasicBlock::iterator prevMi = prior(mi); + MachineBasicBlock::iterator prevMI = prior(mi); + // Update DistanceMap. + DistanceMap.insert(std::make_pair(prevMI, Dist)); + DistanceMap[mi] = ++Dist; // Update live variables for regB. if (LV) { @@ -446,13 +544,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { varInfoB.UsedBlocks[mbbi->getNumber()] = true; if (LV->removeVirtualRegisterKilled(regB, mi)) - LV->addVirtualRegisterKilled(regB, prevMi); + LV->addVirtualRegisterKilled(regB, prevMI); if (LV->removeVirtualRegisterDead(regB, mi)) - LV->addVirtualRegisterDead(regB, prevMi); + LV->addVirtualRegisterDead(regB, prevMI); } - DOUT << "\t\tprepend:\t"; DEBUG(prevMi->print(*cerr.stream(), &TM)); + DOUT << "\t\tprepend:\t"; DEBUG(prevMI->print(*cerr.stream(), &TM)); // Replace all occurences of regB with regA. for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { diff --git a/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll index f78d52651ded..539fc15fcba5 100644 --- a/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll +++ b/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 3 +; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 4 ; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of dead spill slots removed} ; rdar://5761454 diff --git a/llvm/test/CodeGen/X86/2008-07-19-movups-spills.ll b/llvm/test/CodeGen/X86/2008-07-19-movups-spills.ll index ef5c7c50f66e..880035715f83 100644 --- a/llvm/test/CodeGen/X86/2008-07-19-movups-spills.ll +++ b/llvm/test/CodeGen/X86/2008-07-19-movups-spills.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 76 +; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75 ; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1 ; PR2539 diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll index 5ee09326dd1f..b619411eaff2 100644 --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -1,6 +1,6 @@ ; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -stack-alignment=16 > %t ; RUN: grep pmul %t | count 12 -; RUN: grep mov %t | count 19 +; RUN: grep mov %t | count 15 define <4 x i32> @a(<4 x i32> %i) nounwind { %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 > diff --git a/llvm/test/CodeGen/X86/twoaddr-coalesce.ll b/llvm/test/CodeGen/X86/twoaddr-coalesce.ll new file mode 100644 index 000000000000..c369d91f56ae --- /dev/null +++ b/llvm/test/CodeGen/X86/twoaddr-coalesce.ll @@ -0,0 +1,25 @@ +; RUN: llvm-as < %s | llc -march=x86 -join-cross-class-copies -stats |& \ +; RUN: grep {twoaddrinstr} | grep {Number of instructions aggressively commuted} +; rdar://6523745 + +@"\01LC" = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] + +define i32 @main() nounwind { +bb1.thread: + br label %bb1 + +bb1: ; preds = %bb1, %bb1.thread + %i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ] ; [#uses=2] + %0 = trunc i32 %i.0.reg2mem.0 to i8 ; [#uses=1] + %1 = sdiv i8 %0, 2 ; [#uses=1] + %2 = sext i8 %1 to i32 ; [#uses=1] + %3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %2) nounwind ; [#uses=0] + %indvar.next = add i32 %i.0.reg2mem.0, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, 258 ; [#uses=1] + br i1 %exitcond, label %bb2, label %bb1 + +bb2: ; preds = %bb1 + ret i32 0 +} + +declare i32 @printf(i8*, ...) nounwind