Reassociate: Reprocess RedoInsts after each inst

Previously the RedoInsts was processed at the end of the block.
However it was possible that it left behind some instructions that
were not canonicalized.
This should guarantee that any previous instruction in the basic
block is canonicalized before we process a new instruction.

llvm-svn: 258830
This commit is contained in:
Aditya Nandakumar 2016-01-26 18:42:36 +00:00
parent 3f81a9c654
commit 3d0c46d489
4 changed files with 101 additions and 33 deletions

View File

@ -163,7 +163,8 @@ namespace {
AU.addPreserved<GlobalsAAWrapperPass>();
}
private:
void BuildRankMap(Function &F);
void BuildRankMap(Function &F, ReversePostOrderTraversal<Function *> &RPOT);
unsigned getRank(Value *V);
void canonicalizeOperands(Instruction *I);
void ReassociateExpression(BinaryOperator *I);
@ -246,7 +247,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
return nullptr;
}
void Reassociate::BuildRankMap(Function &F) {
void Reassociate::BuildRankMap(Function &F,
ReversePostOrderTraversal<Function *> &RPOT) {
unsigned i = 2;
// Assign distinct ranks to function arguments.
@ -255,7 +257,6 @@ void Reassociate::BuildRankMap(Function &F) {
DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n");
}
ReversePostOrderTraversal<Function*> RPOT(&F);
for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
E = RPOT.end(); I != E; ++I) {
BasicBlock *BB = *I;
@ -2259,13 +2260,28 @@ bool Reassociate::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
// Calculate the rank map for F
BuildRankMap(F);
// Reassociate needs for each instruction to have its operands already
// processed, so we first perform a RPOT of the basic blocks so that
// when we process a basic block, all its dominators have been processed
// before.
ReversePostOrderTraversal<Function *> RPOT(&F);
BuildRankMap(F, RPOT);
MadeChange = false;
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
for (BasicBlock *BI : RPOT) {
// Use a worklist to keep track of which instructions have been processed
// (and which insts won't be optimized again) so when redoing insts,
// optimize insts rightaway which won't be processed later.
SmallSet<Instruction *, 8> Worklist;
// Insert all instructions in the BB
for (Instruction &I : *BI)
Worklist.insert(&I);
// Optimize every instruction in the basic block.
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; )
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) {
// This instruction has been processed.
Worklist.erase(&*II);
if (isInstructionTriviallyDead(&*II)) {
EraseInst(&*II++);
} else {
@ -2274,27 +2290,22 @@ bool Reassociate::runOnFunction(Function &F) {
++II;
}
// Make a copy of all the instructions to be redone so we can remove dead
// instructions.
SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts);
// Iterate over all instructions to be reevaluated and remove trivially dead
// instructions. If any operand of the trivially dead instruction becomes
// dead mark it for deletion as well. Continue this process until all
// trivially dead instructions have been removed.
while (!ToRedo.empty()) {
Instruction *I = ToRedo.pop_back_val();
if (isInstructionTriviallyDead(I))
RecursivelyEraseDeadInsts(I, ToRedo);
}
// Now that we have removed dead instructions, we can reoptimize the
// remaining instructions.
while (!RedoInsts.empty()) {
Instruction *I = RedoInsts.pop_back_val();
if (isInstructionTriviallyDead(I))
EraseInst(I);
else
OptimizeInst(I);
// If the above optimizations produced new instructions to optimize or
// made modifications which need to be redone, do them now if they won't
// be handled later.
while (!RedoInsts.empty()) {
Instruction *I = RedoInsts.pop_back_val();
// Process instructions that won't be processed later, either
// inside the block itself or in another basic block (based on rank),
// since these will be processed later.
if ((I->getParent() != BI || !Worklist.count(I)) &&
RankMap[I->getParent()] <= RankMap[BI]) {
if (isInstructionTriviallyDead(I))
EraseInst(I);
else
OptimizeInst(I);
}
}
}
}

View File

@ -0,0 +1,57 @@
; RUN: opt < %s -reassociate -S | FileCheck %s
; These tests make sure that before processing insts
; any previous instructions are already canonicalized.
define i32 @foo(i32 %in) {
; CHECK-LABEL: @foo
; CHECK-NEXT: %factor = mul i32 %in, -4
; CHECK-NEXT: %factor1 = mul i32 %in, 2
; CHECK-NEXT: %_3 = add i32 %factor, 1
; CHECK-NEXT: %_5 = add i32 %_3, %factor1
; CHECK-NEXT: ret i32 %_5
%_0 = add i32 %in, 1
%_1 = mul i32 %in, -2
%_2 = add i32 %_0, %_1
%_3 = add i32 %_1, %_2
%_4 = add i32 %_3, 1
%_5 = add i32 %in, %_3
ret i32 %_5
}
; CHECK-LABEL: @foo1
define void @foo1(float %in, i1 %cmp) {
wrapper_entry:
br label %foo1
for.body:
%0 = fadd float %in1, %in1
br label %foo1
foo1:
%_0 = fmul fast float %in, -3.000000e+00
%_1 = fmul fast float %_0, 3.000000e+00
%in1 = fadd fast float -3.000000e+00, %_1
%in1use = fadd fast float %in1, %in1
br label %for.body
}
; CHECK-LABEL: @foo2
define void @foo2(float %in, i1 %cmp) {
wrapper_entry:
br label %for.body
for.body:
; If the operands of the phi are sheduled for processing before
; foo1 is processed, the invariant of reassociate are not preserved
%unused = phi float [%in1, %foo1], [undef, %wrapper_entry]
br label %foo1
foo1:
%_0 = fmul fast float %in, -3.000000e+00
%_1 = fmul fast float %_0, 3.000000e+00
%in1 = fadd fast float -3.000000e+00, %_1
%in1use = fadd fast float %in1, %in1
br label %for.body
}

View File

@ -1,8 +1,8 @@
; RUN: opt < %s -reassociate -S | FileCheck %s
; CHECK-LABEL: faddsubAssoc1
; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500
; CHECK: fsub fast half [[TMP2]], [[TMP1]]
; CHECK: [[TMP1:%.*]] = fsub fast half 0xH8000, %a
; CHECK: [[TMP2:%.*]] = fadd fast half %b, [[TMP1]]
; CHECK: fmul fast half [[TMP2]], 0xH4500
; CHECK: ret
; Input is A op (B op C)
define half @faddsubAssoc1(half %a, half %b) {

View File

@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) {
%xor1 = xor i32 %xor, %and
ret i32 %xor1
; CHECK-LABEL: @xor_special2(
; CHECK: %xor = xor i32 %x, 123
; CHECK: %xor1 = xor i32 %xor, %y
; CHECK: %xor = xor i32 %y, 123
; CHECK: %xor1 = xor i32 %xor, %x
; CHECK: ret i32 %xor1
}