forked from OSchip/llvm-project
When we vectorize across multiple basic blocks we may vectorize PHINodes that create a cycle. We already break the cycle on phi-nodes, but arithmetic operations are still uplicated. This patch adds code that checks if the operation that we are vectorizing was vectorized during the visit of the operands and uses this value if it can.
llvm-svn: 186883
This commit is contained in:
parent
6c655f237e
commit
cf0dcdc71c
|
@ -264,12 +264,16 @@ private:
|
||||||
/// This is the recursive part of buildTree.
|
/// This is the recursive part of buildTree.
|
||||||
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth);
|
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth);
|
||||||
|
|
||||||
/// Vectorizer a single entry in the tree.
|
/// Vectorize a single entry in the tree.
|
||||||
Value *vectorizeTree(TreeEntry *E);
|
Value *vectorizeTree(TreeEntry *E);
|
||||||
|
|
||||||
/// Vectorizer a single entry in the tree, starting in \p VL.
|
/// Vectorize a single entry in the tree, starting in \p VL.
|
||||||
Value *vectorizeTree(ArrayRef<Value *> VL);
|
Value *vectorizeTree(ArrayRef<Value *> VL);
|
||||||
|
|
||||||
|
/// \returns the pointer to the vectorized value if \p VL is already
|
||||||
|
/// vectorized, or NULL. They may happen in cycles.
|
||||||
|
Value *alreadyVectorized(ArrayRef<Value *> VL);
|
||||||
|
|
||||||
/// \brief Take the pointer operand from the Load/Store instruction.
|
/// \brief Take the pointer operand from the Load/Store instruction.
|
||||||
/// \returns NULL if this is not a valid Load/Store instruction.
|
/// \returns NULL if this is not a valid Load/Store instruction.
|
||||||
static Value *getPointerOperand(Value *I);
|
static Value *getPointerOperand(Value *I);
|
||||||
|
@ -1117,6 +1121,16 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
|
||||||
return Vec;
|
return Vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) {
|
||||||
|
if (ScalarToTreeEntry.count(VL[0])) {
|
||||||
|
int Idx = ScalarToTreeEntry[VL[0]];
|
||||||
|
TreeEntry *En = &VectorizableTree[Idx];
|
||||||
|
if (En->isSame(VL) && En->VectorizedValue)
|
||||||
|
return En->VectorizedValue;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
|
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
|
||||||
if (ScalarToTreeEntry.count(VL[0])) {
|
if (ScalarToTreeEntry.count(VL[0])) {
|
||||||
int Idx = ScalarToTreeEntry[VL[0]];
|
int Idx = ScalarToTreeEntry[VL[0]];
|
||||||
|
@ -1206,6 +1220,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
|
|
||||||
Builder.SetInsertPoint(getLastInstruction(E->Scalars));
|
Builder.SetInsertPoint(getLastInstruction(E->Scalars));
|
||||||
Value *InVec = vectorizeTree(INVL);
|
Value *InVec = vectorizeTree(INVL);
|
||||||
|
|
||||||
|
if (Value *V = alreadyVectorized(E->Scalars))
|
||||||
|
return V;
|
||||||
|
|
||||||
CastInst *CI = dyn_cast<CastInst>(VL0);
|
CastInst *CI = dyn_cast<CastInst>(VL0);
|
||||||
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
|
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
|
||||||
E->VectorizedValue = V;
|
E->VectorizedValue = V;
|
||||||
|
@ -1222,9 +1240,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
Builder.SetInsertPoint(getLastInstruction(E->Scalars));
|
Builder.SetInsertPoint(getLastInstruction(E->Scalars));
|
||||||
Value *L = vectorizeTree(LHSV);
|
Value *L = vectorizeTree(LHSV);
|
||||||
Value *R = vectorizeTree(RHSV);
|
Value *R = vectorizeTree(RHSV);
|
||||||
Value *V;
|
|
||||||
|
if (Value *V = alreadyVectorized(E->Scalars))
|
||||||
|
return V;
|
||||||
|
|
||||||
CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
|
CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
|
||||||
|
Value *V;
|
||||||
if (Opcode == Instruction::FCmp)
|
if (Opcode == Instruction::FCmp)
|
||||||
V = Builder.CreateFCmp(P0, L, R);
|
V = Builder.CreateFCmp(P0, L, R);
|
||||||
else
|
else
|
||||||
|
@ -1245,6 +1266,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
Value *Cond = vectorizeTree(CondVec);
|
Value *Cond = vectorizeTree(CondVec);
|
||||||
Value *True = vectorizeTree(TrueVec);
|
Value *True = vectorizeTree(TrueVec);
|
||||||
Value *False = vectorizeTree(FalseVec);
|
Value *False = vectorizeTree(FalseVec);
|
||||||
|
|
||||||
|
if (Value *V = alreadyVectorized(E->Scalars))
|
||||||
|
return V;
|
||||||
|
|
||||||
Value *V = Builder.CreateSelect(Cond, True, False);
|
Value *V = Builder.CreateSelect(Cond, True, False);
|
||||||
E->VectorizedValue = V;
|
E->VectorizedValue = V;
|
||||||
return V;
|
return V;
|
||||||
|
@ -1281,6 +1306,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid order");
|
assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid order");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Value *V = alreadyVectorized(E->Scalars))
|
||||||
|
return V;
|
||||||
|
|
||||||
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
|
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
|
||||||
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
|
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
|
||||||
E->VectorizedValue = V;
|
E->VectorizedValue = V;
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-apple-macosx10.9.0"
|
||||||
|
|
||||||
|
|
||||||
|
; int foo(int *A) {
|
||||||
|
; int r = A[0], g = A[1], b = A[2];
|
||||||
|
; for (int i=0; i < A[13]; i++)
|
||||||
|
; r*=18; g*=19; b*=12;
|
||||||
|
; A[0] = r; A[1] = g; A[2] = b;
|
||||||
|
; }
|
||||||
|
|
||||||
|
;CHECK-LABEL: @foo
|
||||||
|
;CHECK: bitcast i32* %A to <3 x i32>*
|
||||||
|
;CHECK-NEXT: load <3 x i32>
|
||||||
|
;CHECK: phi <3 x i32>
|
||||||
|
;CHECK-NEXT: mul <3 x i32>
|
||||||
|
;CHECK-NOT: mul
|
||||||
|
;CHECK: phi <3 x i32>
|
||||||
|
;CHECK: bitcast i32* %A to <3 x i32>*
|
||||||
|
;CHECK-NEXT: store <3 x i32>
|
||||||
|
;CHECK-NEXT:ret i32 undef
|
||||||
|
define i32 @foo(i32* nocapture %A) {
|
||||||
|
entry:
|
||||||
|
%0 = load i32* %A, align 4
|
||||||
|
%arrayidx1 = getelementptr inbounds i32* %A, i64 1
|
||||||
|
%1 = load i32* %arrayidx1, align 4
|
||||||
|
%arrayidx2 = getelementptr inbounds i32* %A, i64 2
|
||||||
|
%2 = load i32* %arrayidx2, align 4
|
||||||
|
%arrayidx3 = getelementptr inbounds i32* %A, i64 13
|
||||||
|
%3 = load i32* %arrayidx3, align 4
|
||||||
|
%cmp18 = icmp sgt i32 %3, 0
|
||||||
|
br i1 %cmp18, label %for.body, label %for.end
|
||||||
|
|
||||||
|
for.body: ; preds = %entry, %for.body
|
||||||
|
%i.022 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||||||
|
%b.021 = phi i32 [ %mul5, %for.body ], [ %2, %entry ]
|
||||||
|
%g.020 = phi i32 [ %mul4, %for.body ], [ %1, %entry ]
|
||||||
|
%r.019 = phi i32 [ %mul, %for.body ], [ %0, %entry ]
|
||||||
|
%mul = mul nsw i32 %r.019, 18
|
||||||
|
%mul4 = mul nsw i32 %g.020, 19
|
||||||
|
%mul5 = mul nsw i32 %b.021, 12
|
||||||
|
%inc = add nsw i32 %i.022, 1
|
||||||
|
%cmp = icmp slt i32 %inc, %3
|
||||||
|
br i1 %cmp, label %for.body, label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.body, %entry
|
||||||
|
%b.0.lcssa = phi i32 [ %2, %entry ], [ %mul5, %for.body ]
|
||||||
|
%g.0.lcssa = phi i32 [ %1, %entry ], [ %mul4, %for.body ]
|
||||||
|
%r.0.lcssa = phi i32 [ %0, %entry ], [ %mul, %for.body ]
|
||||||
|
store i32 %r.0.lcssa, i32* %A, align 4
|
||||||
|
store i32 %g.0.lcssa, i32* %arrayidx1, align 4
|
||||||
|
store i32 %b.0.lcssa, i32* %arrayidx2, align 4
|
||||||
|
ret i32 undef
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue