optimize single MBB loops better. In particular, produce:

LBB1_57: #bb207.i movl 72(%esp), %ecx movb (%ecx,%eax), %cl movl 80(%esp), %edx movb %cl, 1(%edx,%eax) incl %eax cmpl $143, %eax jne LBB1_57 #bb207.i jmp LBB1_64 #cond_next255.i intead of: LBB1_57: #bb207.i movl 72(%esp), %ecx movb (%ecx,%eax), %cl movl 80(%esp), %edx movb %cl, 1(%edx,%eax) incl %eax cmpl $143, %eax je LBB1_64 #cond_next255.i jmp LBB1_57 #bb207.i This eliminates a branch per iteration of the loop. This hurted PPC particularly, because the extra branch meant another dispatch group for each iteration of the loop. llvm-svn: 31530
2006-11-08 01:03:21 +00:00 · 2006-11-08 01:03:21 +00:00 · bf3b57f221
parent 800596d60e
commit bf3b57f221
1 changed files with 17 additions and 0 deletions
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@ -687,6 +687,23 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
                                       !CurCond.empty(),
                                       ++MachineFunction::iterator(MBB));

+    // If this is a two-way branch, and the FBB branches to this block, reverse 
+    // the condition so the single-basic-block loop is faster.  Instead of:
+    //    Loop: xxx; jcc Out; jmp Loop
+    // we want:
+    //    Loop: xxx; jncc Loop; jmp Out
+    if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+      std::vector<MachineOperand> NewCond(CurCond);
+      if (!TII->ReverseBranchCondition(NewCond)) {
+        TII->RemoveBranch(*MBB);
+        TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
+        MadeChange = true;
+        ++NumBranchOpts;
+        return OptimizeBlock(MBB);
+      }
+    }
+    
+    
    // If this branch is the only thing in its block, see if we can forward
    // other blocks across it.
    if (CurTBB && CurCond.empty() && CurFBB == 0 &&