diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index dc1ca5c291e0..9a98c44e4ec4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3656,6 +3656,16 @@ void LoopVectorizationLegality::collectLoopUniforms() { // Start with the conditional branch and walk up the block. Worklist.push_back(Latch->getTerminator()->getOperand(0)); + // Also add all consecutive pointer values; these values will be uniform + // after vectorization (and subsequent cleanup) and, until revectorization is + // supported, all dependencies must also be uniform. + for (Loop::block_iterator B = TheLoop->block_begin(), + BE = TheLoop->block_end(); B != BE; ++B) + for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end(); + I != IE; ++I) + if (I->getType()->isPointerTy() && isConsecutivePtr(I)) + Worklist.insert(Worklist.end(), I->op_begin(), I->op_end()); + while (Worklist.size()) { Instruction *I = dyn_cast(Worklist.back()); Worklist.pop_back(); diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg new file mode 100644 index 000000000000..2e463005586f --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'PowerPC' in targets: + config.unsupported = True + diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll new file mode 100644 index 000000000000..6cd9c4d610b5 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -mcpu=pwr7 -mattr=+vsx -loop-vectorize -instcombine -S | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.GlobalData = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float], [5 x i32], [12 x i8], [32000 x float], [7 x i32], [4 x i8], [32000 x float], [11 x i32], [4 x i8], [32000 x float], [13 x i32], [12 x i8], [256 x [256 x float]], [17 x i32], [12 x i8], [256 x [256 x float]], [19 x i32], [4 x i8], [256 x [256 x float]], [23 x i32], [4 x i8], [256 x [256 x float]] } + +@global_data = external global %struct.GlobalData, align 16 +@ntimes = external hidden unnamed_addr global i32, align 4 + +define signext i32 @s173() #0 { +entry: + %0 = load i32* @ntimes, align 4 + %cmp21 = icmp sgt i32 %0, 0 + br i1 %cmp21, label %for.cond1.preheader, label %for.end12 + +for.cond1.preheader: ; preds = %for.end, %entry + %nl.022 = phi i32 [ %inc11, %for.end ], [ 0, %entry ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv + %1 = load float* %arrayidx, align 4 + %arrayidx5 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv + %2 = load float* %arrayidx5, align 4 + %add = fadd float %1, %2 + %3 = add nsw i64 %indvars.iv, 16000 + %arrayidx8 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3 + store float %add, float* %arrayidx8, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 16000 + br i1 %exitcond, label %for.end, label %for.body3 + +for.end: ; preds = %for.body3 + %inc11 = add nsw i32 %nl.022, 1 + %4 = load i32* @ntimes, align 4 + %mul = mul nsw i32 %4, 10 + %cmp = icmp slt i32 %inc11, %mul + br i1 %cmp, label %for.cond1.preheader, label %for.end12 + +for.end12: ; preds = %for.end, %entry + ret i32 0 + +; CHECK-LABEL: @s173 +; CHECK: load <4 x float>* +; CHECK: add i64 %index, 16000 +; CHECK: ret i32 0 +} + +attributes #0 = { nounwind } +