Reassociate: Reprocess RedoInsts after each inst

Previously the RedoInsts was processed at the end of the block. However it was possible that it left behind some instructions that were not canonicalized. This should guarantee that any previous instruction in the basic block is canonicalized before we process a new instruction. llvm-svn: 258830
2016-01-26 18:42:36 +00:00 · 2016-01-26 18:42:36 +00:00 · 3d0c46d489
parent 3f81a9c654
commit 3d0c46d489
4 changed files with 101 additions and 33 deletions
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@ -163,7 +163,8 @@ namespace {
      AU.addPreserved<GlobalsAAWrapperPass>();
    }
  private:
-    void BuildRankMap(Function &F);
+    void BuildRankMap(Function &F, ReversePostOrderTraversal<Function *> &RPOT);
+
    unsigned getRank(Value *V);
    void canonicalizeOperands(Instruction *I);
    void ReassociateExpression(BinaryOperator *I);
@ -246,7 +247,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
  return nullptr;
 }

-void Reassociate::BuildRankMap(Function &F) {
+void Reassociate::BuildRankMap(Function &F,
+                               ReversePostOrderTraversal<Function *> &RPOT) {
  unsigned i = 2;

  // Assign distinct ranks to function arguments.
@ -255,7 +257,6 @@ void Reassociate::BuildRankMap(Function &F) {
    DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n");
  }

-  ReversePostOrderTraversal<Function*> RPOT(&F);
  for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
         E = RPOT.end(); I != E; ++I) {
    BasicBlock *BB = *I;
@ -2259,13 +2260,28 @@ bool Reassociate::runOnFunction(Function &F) {
  if (skipOptnoneFunction(F))
    return false;

-  // Calculate the rank map for F
-  BuildRankMap(F);
+  // Reassociate needs for each instruction to have its operands already
+  // processed, so we first perform a RPOT of the basic blocks so that
+  // when we process a basic block, all its dominators have been processed
+  // before.
+  ReversePostOrderTraversal<Function *> RPOT(&F);
+  BuildRankMap(F, RPOT);

  MadeChange = false;
-  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+  for (BasicBlock *BI : RPOT) {
+    // Use a worklist to keep track of which instructions have been processed
+    // (and which insts won't be optimized again) so when redoing insts,
+    // optimize insts rightaway which won't be processed later.
+    SmallSet<Instruction *, 8> Worklist;
+
+    // Insert all instructions in the BB
+    for (Instruction &I : *BI)
+      Worklist.insert(&I);
+
    // Optimize every instruction in the basic block.
-    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; )
+    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) {
+      // This instruction has been processed.
+      Worklist.erase(&*II);
      if (isInstructionTriviallyDead(&*II)) {
        EraseInst(&*II++);
      } else {
@ -2274,27 +2290,22 @@ bool Reassociate::runOnFunction(Function &F) {
        ++II;
      }

-    // Make a copy of all the instructions to be redone so we can remove dead
-    // instructions.
-    SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts);
-    // Iterate over all instructions to be reevaluated and remove trivially dead
-    // instructions. If any operand of the trivially dead instruction becomes
-    // dead mark it for deletion as well. Continue this process until all
-    // trivially dead instructions have been removed.
-    while (!ToRedo.empty()) {
-      Instruction *I = ToRedo.pop_back_val();
-      if (isInstructionTriviallyDead(I))
-        RecursivelyEraseDeadInsts(I, ToRedo);
-    }
-
-    // Now that we have removed dead instructions, we can reoptimize the
-    // remaining instructions.
-    while (!RedoInsts.empty()) {
-      Instruction *I = RedoInsts.pop_back_val();
-      if (isInstructionTriviallyDead(I))
-        EraseInst(I);
-      else
-        OptimizeInst(I);
+      // If the above optimizations produced new instructions to optimize or
+      // made modifications which need to be redone, do them now if they won't
+      // be handled later.
+      while (!RedoInsts.empty()) {
+        Instruction *I = RedoInsts.pop_back_val();
+        // Process instructions that won't be processed later, either
+        // inside the block itself or in another basic block (based on rank),
+        // since these will be processed later.
+        if ((I->getParent() != BI || !Worklist.count(I)) &&
+            RankMap[I->getParent()] <= RankMap[BI]) {
+          if (isInstructionTriviallyDead(I))
+            EraseInst(I);
+          else
+            OptimizeInst(I);
+        }
+      }
    }
  }

--- a/llvm/test/Transforms/Reassociate/prev_insts_canonicalized.ll
+++ b/llvm/test/Transforms/Reassociate/prev_insts_canonicalized.ll
@ -0,0 +1,57 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+; These tests make sure that before processing insts
+; any previous instructions are already canonicalized.
+define i32 @foo(i32 %in) {
+; CHECK-LABEL: @foo
+; CHECK-NEXT: %factor = mul i32 %in, -4
+; CHECK-NEXT: %factor1 = mul i32 %in, 2
+; CHECK-NEXT: %_3 = add i32 %factor, 1
+; CHECK-NEXT: %_5 = add i32 %_3, %factor1
+; CHECK-NEXT: ret i32 %_5
+  %_0 = add i32 %in, 1
+  %_1 = mul i32 %in, -2
+  %_2 = add i32 %_0, %_1
+  %_3 = add i32 %_1, %_2
+  %_4 = add i32 %_3, 1
+  %_5 = add i32 %in, %_3
+  ret i32 %_5
+}
+
+; CHECK-LABEL: @foo1
+define void @foo1(float %in, i1 %cmp) {
+wrapper_entry:
+  br label %foo1
+
+for.body:
+  %0 = fadd float %in1, %in1
+  br label %foo1
+
+foo1:
+  %_0 = fmul fast float %in, -3.000000e+00
+  %_1 = fmul fast float %_0, 3.000000e+00
+  %in1 = fadd fast float -3.000000e+00, %_1
+  %in1use = fadd fast float %in1, %in1
+  br label %for.body
+
+
+}
+
+; CHECK-LABEL: @foo2
+define void @foo2(float %in, i1 %cmp) {
+wrapper_entry:
+  br label %for.body
+
+for.body:
+; If the operands of the phi are sheduled for processing before
+; foo1 is processed, the invariant of reassociate are not preserved
+  %unused = phi float [%in1, %foo1], [undef, %wrapper_entry]
+  br label %foo1
+
+foo1:
+  %_0 = fmul fast float %in, -3.000000e+00
+  %_1 = fmul fast float %_0, 3.000000e+00
+  %in1 = fadd fast float -3.000000e+00, %_1
+  %in1use = fadd fast float %in1, %in1
+  br label %for.body
+}
--- a/llvm/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
+++ b/llvm/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
@ -1,8 +1,8 @@
 ; RUN: opt < %s -reassociate -S | FileCheck %s
 ; CHECK-LABEL: faddsubAssoc1
-; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
-; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500
-; CHECK: fsub fast half [[TMP2]], [[TMP1]]
+; CHECK: [[TMP1:%.*]] = fsub fast half 0xH8000, %a
+; CHECK: [[TMP2:%.*]] = fadd fast half %b, [[TMP1]]
+; CHECK: fmul fast half [[TMP2]], 0xH4500
 ; CHECK: ret
 ; Input is A op (B op C)
 define half @faddsubAssoc1(half %a, half %b) {
--- a/llvm/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/llvm/test/Transforms/Reassociate/xor_reassoc.ll
@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) {
  %xor1 = xor i32 %xor, %and
  ret i32 %xor1
 ; CHECK-LABEL: @xor_special2(
-; CHECK: %xor = xor i32 %x, 123
-; CHECK: %xor1 = xor i32 %xor, %y
+; CHECK: %xor = xor i32 %y, 123
+; CHECK: %xor1 = xor i32 %xor, %x
 ; CHECK: ret i32 %xor1
 }