From 0dd463facf61ad7142a2690acb4f61c336c892d3 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Wed, 19 Mar 2014 19:27:24 +0000 Subject: [PATCH] Support for generating vectors for loads with -1 stride This patch enables vectorization of loops containing backward array traversal (array stride is -1). Contributed-by: Chris Jenneisch llvm-svn: 204257 --- polly/include/polly/CodeGen/BlockGenerators.h | 10 ++++- polly/lib/CodeGen/BlockGenerators.cpp | 30 ++++++++++++--- .../CodeGen/simple_vec_stride_negative_one.ll | 38 +++++++++++++++++++ 3 files changed, 71 insertions(+), 7 deletions(-) create mode 100644 polly/test/Isl/CodeGen/simple_vec_stride_negative_one.ll diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h index 759a104eac44..2ee009057a6e 100644 --- a/polly/include/polly/CodeGen/BlockGenerators.h +++ b/polly/include/polly/CodeGen/BlockGenerators.h @@ -265,7 +265,15 @@ private: /// %vector_ptr= bitcast double* %p to <4 x double>* /// %vec_full = load <4 x double>* %vector_ptr /// - Value *generateStrideOneLoad(const LoadInst *Load, ValueMapT &BBMap); + /// @param NegativeStride This is used to indicate a -1 stride. In such + /// a case we load the end of a base address and + /// shuffle the accesses in reverse order into the + /// vector. By default we would do only positive + /// strides. + /// + Value *generateStrideOneLoad(const LoadInst *Load, + VectorValueMapT &ScalarMaps, + bool NegativeStride); /// @brief Load a vector initialized from a single scalar in memory /// diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index 1ea5608a5619..0a9c1736abff 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -439,12 +439,18 @@ Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) { return PointerType::getUnqual(VectorType); } -Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, - ValueMapT &BBMap) { +Value * +VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, + VectorValueMapT &ScalarMaps, + bool NegativeStride = false) { + unsigned VectorWidth = getVectorWidth(); const Value *Pointer = Load->getPointerOperand(); - Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); - Value *NewPointer = - getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); + Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); + unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; + + Value *NewPointer = NULL; + NewPointer = getNewValue(Pointer, ScalarMaps[Offset], GlobalMaps[Offset], + VLTS[Offset], getLoopForInst(Load)); Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); LoadInst *VecLoad = @@ -452,6 +458,16 @@ Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, if (!Aligned) VecLoad->setAlignment(8); + if (NegativeStride) { + SmallVector Indices; + for (int i = VectorWidth - 1; i >= 0; i--) + Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); + Constant *SV = llvm::ConstantVector::get(Indices); + Value *RevVecLoad = Builder.CreateShuffleVector( + VecLoad, VecLoad, SV, Load->getName() + "_reverse"); + return RevVecLoad; + } + return VecLoad; } @@ -516,7 +532,9 @@ void VectorBlockGenerator::generateLoad(const LoadInst *Load, if (Access.isStrideZero(isl_map_copy(Schedule))) NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]); else if (Access.isStrideOne(isl_map_copy(Schedule))) - NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]); + NewLoad = generateStrideOneLoad(Load, ScalarMaps); + else if (Access.isStrideX(isl_map_copy(Schedule), -1)) + NewLoad = generateStrideOneLoad(Load, ScalarMaps, true); else NewLoad = generateUnknownStrideLoad(Load, ScalarMaps); diff --git a/polly/test/Isl/CodeGen/simple_vec_stride_negative_one.ll b/polly/test/Isl/CodeGen/simple_vec_stride_negative_one.ll new file mode 100644 index 000000000000..9741298a4dbe --- /dev/null +++ b/polly/test/Isl/CodeGen/simple_vec_stride_negative_one.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-codegen-isl -polly-codegen-scev %vector-opt -S < %s | FileCheck %s + +; ModuleID = 'reverse.c' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;int A[100]; +;void foo() { +; for (int i=3; i >= 0; i--) +; A[i]+=1; +;} + + +@A = common global [100 x i32] zeroinitializer, align 16 + +; Function Attrs: nounwind uwtable +define void @foo() #0 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 3, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [100 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, 1 + store i32 %add, i32* %arrayidx, align 4 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %1 = trunc i64 %indvars.iv to i32 + %cmp = icmp sgt i32 %1, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; CHECK: @foo +; CHECK: [[LOAD:%[a-zA-Z0-9_]+]] = load <4 x i32>* +; CHECK: [[REVERSE_LOAD:%[a-zA-Z0-9_]+reverse]] = shufflevector <4 x i32> [[LOAD]], <4 x i32> [[LOAD]], <4 x i32>