forked from OSchip/llvm-project
Support for generating vectors for loads with -1 stride
This patch enables vectorization of loops containing backward array traversal (array stride is -1). Contributed-by: Chris Jenneisch <chrisj@codeaurora.org> llvm-svn: 204257
This commit is contained in:
parent
f88287fd7a
commit
0dd463facf
|
@ -265,7 +265,15 @@ private:
|
|||
/// %vector_ptr= bitcast double* %p to <4 x double>*
|
||||
/// %vec_full = load <4 x double>* %vector_ptr
|
||||
///
|
||||
Value *generateStrideOneLoad(const LoadInst *Load, ValueMapT &BBMap);
|
||||
/// @param NegativeStride This is used to indicate a -1 stride. In such
|
||||
/// a case we load the end of a base address and
|
||||
/// shuffle the accesses in reverse order into the
|
||||
/// vector. By default we would do only positive
|
||||
/// strides.
|
||||
///
|
||||
Value *generateStrideOneLoad(const LoadInst *Load,
|
||||
VectorValueMapT &ScalarMaps,
|
||||
bool NegativeStride);
|
||||
|
||||
/// @brief Load a vector initialized from a single scalar in memory
|
||||
///
|
||||
|
|
|
@ -439,12 +439,18 @@ Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
|
|||
return PointerType::getUnqual(VectorType);
|
||||
}
|
||||
|
||||
Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
|
||||
ValueMapT &BBMap) {
|
||||
Value *
|
||||
VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
|
||||
VectorValueMapT &ScalarMaps,
|
||||
bool NegativeStride = false) {
|
||||
unsigned VectorWidth = getVectorWidth();
|
||||
const Value *Pointer = Load->getPointerOperand();
|
||||
Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
|
||||
Value *NewPointer =
|
||||
getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load));
|
||||
Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
|
||||
unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
|
||||
|
||||
Value *NewPointer = NULL;
|
||||
NewPointer = getNewValue(Pointer, ScalarMaps[Offset], GlobalMaps[Offset],
|
||||
VLTS[Offset], getLoopForInst(Load));
|
||||
Value *VectorPtr =
|
||||
Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
|
||||
LoadInst *VecLoad =
|
||||
|
@ -452,6 +458,16 @@ Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
|
|||
if (!Aligned)
|
||||
VecLoad->setAlignment(8);
|
||||
|
||||
if (NegativeStride) {
|
||||
SmallVector<Constant *, 16> Indices;
|
||||
for (int i = VectorWidth - 1; i >= 0; i--)
|
||||
Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
|
||||
Constant *SV = llvm::ConstantVector::get(Indices);
|
||||
Value *RevVecLoad = Builder.CreateShuffleVector(
|
||||
VecLoad, VecLoad, SV, Load->getName() + "_reverse");
|
||||
return RevVecLoad;
|
||||
}
|
||||
|
||||
return VecLoad;
|
||||
}
|
||||
|
||||
|
@ -516,7 +532,9 @@ void VectorBlockGenerator::generateLoad(const LoadInst *Load,
|
|||
if (Access.isStrideZero(isl_map_copy(Schedule)))
|
||||
NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
|
||||
else if (Access.isStrideOne(isl_map_copy(Schedule)))
|
||||
NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
|
||||
NewLoad = generateStrideOneLoad(Load, ScalarMaps);
|
||||
else if (Access.isStrideX(isl_map_copy(Schedule), -1))
|
||||
NewLoad = generateStrideOneLoad(Load, ScalarMaps, true);
|
||||
else
|
||||
NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
|
||||
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
; RUN: opt %loadPolly -polly-codegen-isl -polly-codegen-scev %vector-opt -S < %s | FileCheck %s
|
||||
|
||||
; ModuleID = 'reverse.c'
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
;int A[100];
|
||||
;void foo() {
|
||||
; for (int i=3; i >= 0; i--)
|
||||
; A[i]+=1;
|
||||
;}
|
||||
|
||||
|
||||
@A = common global [100 x i32] zeroinitializer, align 16
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @foo() #0 {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ 3, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [100 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %0, 1
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
||||
%1 = trunc i64 %indvars.iv to i32
|
||||
%cmp = icmp sgt i32 %1, 0
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @foo
|
||||
; CHECK: [[LOAD:%[a-zA-Z0-9_]+]] = load <4 x i32>*
|
||||
; CHECK: [[REVERSE_LOAD:%[a-zA-Z0-9_]+reverse]] = shufflevector <4 x i32> [[LOAD]], <4 x i32> [[LOAD]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
Loading…
Reference in New Issue