forked from OSchip/llvm-project
[InstCombine] scalarizePHI should not assume the code it sees has been CSE'd
scalarizePHI only looked for phis that have exactly two uses - the "latch" use, and an extract. Unfortunately, we can not assume all equivalent extracts are CSE'd, since InstCombine itself may create an extract which is a duplicate of an existing one. This extends it to handle several distinct extracts from the same index. This should fix at least some of the performance regressions from PR27988. Differential Revision: http://reviews.llvm.org/D20983 llvm-svn: 271961
This commit is contained in:
parent
4bc848047b
commit
a0c6ae02a5
|
@ -62,21 +62,31 @@ static bool cheapToScalarize(Value *V, bool isConstant) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we have a PHI node with a vector type that has only 2 uses: feed
|
// If we have a PHI node with a vector type that is only used to feed
|
||||||
// itself and be an operand of extractelement at a constant location,
|
// itself and be an operand of extractelement at a constant location,
|
||||||
// try to replace the PHI of the vector type with a PHI of a scalar type.
|
// try to replace the PHI of the vector type with a PHI of a scalar type.
|
||||||
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
|
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
|
||||||
// Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
|
SmallVector<Instruction *, 2> Extracts;
|
||||||
if (!PN->hasNUses(2))
|
// The users we want the PHI to have are:
|
||||||
return nullptr;
|
// 1) The EI ExtractElement (we already know this)
|
||||||
|
// 2) Possibly more ExtractElements with the same index.
|
||||||
|
// 3) Another operand, which will feed back into the PHI.
|
||||||
|
Instruction *PHIUser = nullptr;
|
||||||
|
for (auto U : PN->users()) {
|
||||||
|
if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
|
||||||
|
if (EI.getIndexOperand() == EU->getIndexOperand())
|
||||||
|
Extracts.push_back(EU);
|
||||||
|
else
|
||||||
|
return nullptr;
|
||||||
|
} else if (!PHIUser) {
|
||||||
|
PHIUser = cast<Instruction>(U);
|
||||||
|
} else {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If so, it's known at this point that one operand is PHI and the other is
|
if (!PHIUser)
|
||||||
// an extractelement node. Find the PHI user that is not the extractelement
|
return nullptr;
|
||||||
// node.
|
|
||||||
auto iu = PN->user_begin();
|
|
||||||
Instruction *PHIUser = dyn_cast<Instruction>(*iu);
|
|
||||||
if (PHIUser == cast<Instruction>(&EI))
|
|
||||||
PHIUser = cast<Instruction>(*(++iu));
|
|
||||||
|
|
||||||
// Verify that this PHI user has one use, which is the PHI itself,
|
// Verify that this PHI user has one use, which is the PHI itself,
|
||||||
// and that it is a binary operation which is cheap to scalarize.
|
// and that it is a binary operation which is cheap to scalarize.
|
||||||
|
@ -126,7 +136,11 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
|
||||||
scalarPHI->addIncoming(newEI, inBB);
|
scalarPHI->addIncoming(newEI, inBB);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return replaceInstUsesWith(EI, scalarPHI);
|
|
||||||
|
for (auto E : Extracts)
|
||||||
|
replaceInstUsesWith(*E, scalarPHI);
|
||||||
|
|
||||||
|
return &EI;
|
||||||
}
|
}
|
||||||
|
|
||||||
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
|
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||||
|
|
||||||
define void @f(i64 %val, i32 %limit, i32 *%ptr) {
|
define void @f(i64 %val, i32 %limit, i32 *%ptr) {
|
||||||
;CHECK: %0 = trunc i64
|
; CHECK-LABEL: @f
|
||||||
;CHECK: %1 = phi i32
|
; CHECK: %0 = trunc i64 %val to i32
|
||||||
|
; CHECK: %1 = phi i32 [ %0, %entry ], [ {{.*}}, %loop ]
|
||||||
entry:
|
entry:
|
||||||
%tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
|
%tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
|
||||||
%vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
|
%vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
|
||||||
|
@ -25,18 +26,72 @@ ret:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @copy(i64 %val, i32 %limit, i32 *%ptr) {
|
||||||
|
; CHECK-LABEL: @copy
|
||||||
|
; CHECK: %0 = trunc i64 %val to i32
|
||||||
|
; CHECK: %1 = phi i32 [ %0, %entry ], [ {{.*}}, %loop ]
|
||||||
|
entry:
|
||||||
|
%tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
|
||||||
|
%vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
|
||||||
|
%0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
|
||||||
|
%1 = trunc <16 x i64> %0 to <16 x i32>
|
||||||
|
br label %loop
|
||||||
|
|
||||||
|
loop:
|
||||||
|
%2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
|
||||||
|
%elt = extractelement <16 x i32> %2, i32 0
|
||||||
|
%eltcopy = extractelement <16 x i32> %2, i32 0
|
||||||
|
%end = icmp ult i32 %elt, %limit
|
||||||
|
%3 = add i32 10, %eltcopy
|
||||||
|
%4 = sext i32 %elt to i64
|
||||||
|
%5 = getelementptr i32, i32* %ptr, i64 %4
|
||||||
|
store i32 %3, i32* %5
|
||||||
|
%inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||||
|
br i1 %end, label %loop, label %ret
|
||||||
|
|
||||||
|
ret:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @nocopy(i64 %val, i32 %limit, i32 *%ptr) {
|
||||||
|
; CHECK-LABEL: @nocopy
|
||||||
|
; CHECK-NOT: phi i32
|
||||||
|
; CHECK: phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
|
||||||
|
entry:
|
||||||
|
%tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
|
||||||
|
%vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
|
||||||
|
%0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
|
||||||
|
%1 = trunc <16 x i64> %0 to <16 x i32>
|
||||||
|
br label %loop
|
||||||
|
|
||||||
|
loop:
|
||||||
|
%2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
|
||||||
|
%elt = extractelement <16 x i32> %2, i32 0
|
||||||
|
%eltcopy = extractelement <16 x i32> %2, i32 1
|
||||||
|
%end = icmp ult i32 %elt, %limit
|
||||||
|
%3 = add i32 10, %eltcopy
|
||||||
|
%4 = sext i32 %elt to i64
|
||||||
|
%5 = getelementptr i32, i32* %ptr, i64 %4
|
||||||
|
store i32 %3, i32* %5
|
||||||
|
%inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||||
|
br i1 %end, label %loop, label %ret
|
||||||
|
|
||||||
|
ret:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
define i1 @g(<3 x i32> %input_2) {
|
define i1 @g(<3 x i32> %input_2) {
|
||||||
; CHECK: extractelement
|
; CHECK-LABEL: @g
|
||||||
|
; CHECK: extractelement <3 x i32> %input_2, i32 0
|
||||||
entry:
|
entry:
|
||||||
br label %for.cond
|
br label %for.cond
|
||||||
|
|
||||||
for.cond:
|
for.cond:
|
||||||
; CHECK: phi i32
|
|
||||||
%input_2.addr.0 = phi <3 x i32> [ %input_2, %entry ], [ %div45, %for.body ]
|
%input_2.addr.0 = phi <3 x i32> [ %input_2, %entry ], [ %div45, %for.body ]
|
||||||
%input_1.addr.1 = phi <3 x i32> [ undef, %entry ], [ %dec43, %for.body ]
|
%input_1.addr.1 = phi <3 x i32> [ undef, %entry ], [ %dec43, %for.body ]
|
||||||
br i1 undef, label %for.end, label %for.body
|
br i1 undef, label %for.end, label %for.body
|
||||||
|
|
||||||
; CHECK: extractelement
|
; CHECK: extractelement <3 x i32> %{{.*}}, i32 0
|
||||||
for.body:
|
for.body:
|
||||||
%dec43 = add <3 x i32> %input_1.addr.1, <i32 -1, i32 -1, i32 -1>
|
%dec43 = add <3 x i32> %input_1.addr.1, <i32 -1, i32 -1, i32 -1>
|
||||||
%sub44 = sub <3 x i32> <i32 -1, i32 -1, i32 -1>, %dec43
|
%sub44 = sub <3 x i32> <i32 -1, i32 -1, i32 -1>, %dec43
|
||||||
|
|
|
@ -43,7 +43,7 @@ for.end12: ; preds = %for.end, %entry
|
||||||
|
|
||||||
; CHECK-LABEL: @s173
|
; CHECK-LABEL: @s173
|
||||||
; CHECK: load <4 x float>, <4 x float>*
|
; CHECK: load <4 x float>, <4 x float>*
|
||||||
; CHECK: add nsw i64 %.lhs, 16000
|
; CHECK: add nsw i64 %1, 16000
|
||||||
; CHECK: ret i32 0
|
; CHECK: ret i32 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue