forked from OSchip/llvm-project
CodeGen: Generate scalar code if vector instructions cannot be generated
This fixes two crashes that appeared in case of: - A load of a non vectorizable type (e.g. float**) - An instruction that is not vectorizable (e.g. call) llvm-svn: 154586
This commit is contained in:
parent
2fca3c2c87
commit
4cb5461dae
|
@ -569,6 +569,12 @@ private:
|
|||
void copyStore(const StoreInst *Store, ValueMapT &VectorMap,
|
||||
VectorValueMapT &ScalarMaps);
|
||||
|
||||
void copyInstScalarized(const Instruction *Inst, ValueMapT &VectorMap,
|
||||
VectorValueMapT &ScalarMaps);
|
||||
|
||||
bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
|
||||
VectorValueMapT &ScalarMaps);
|
||||
|
||||
bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
|
||||
|
||||
void copyInstruction(const Instruction *Inst, ValueMapT &VectorMap,
|
||||
|
@ -680,18 +686,16 @@ Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
|
|||
void VectorBlockGenerator::generateLoad(const LoadInst *Load,
|
||||
ValueMapT &VectorMap,
|
||||
VectorValueMapT &ScalarMaps) {
|
||||
Value *NewLoad;
|
||||
|
||||
if (GroupedUnrolling) {
|
||||
if (GroupedUnrolling || !VectorType::isValidElementType(Load->getType())) {
|
||||
for (int i = 0; i < getVectorWidth(); i++)
|
||||
ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
|
||||
GlobalMaps[i]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
MemoryAccess &Access = Statement.getAccessFor(Load);
|
||||
|
||||
Value *NewLoad;
|
||||
if (Access.isStrideZero(isl_set_copy(Domain)))
|
||||
NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
|
||||
else if (Access.isStrideOne(isl_set_copy(Domain)))
|
||||
|
@ -772,6 +776,63 @@ bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
|
||||
ValueMapT &VectorMap,
|
||||
VectorValueMapT &ScalarMaps) {
|
||||
bool HasVectorOperand = false;
|
||||
int VectorWidth = getVectorWidth();
|
||||
|
||||
for (Instruction::const_op_iterator OI = Inst->op_begin(),
|
||||
OE = Inst->op_end(); OI != OE; ++OI) {
|
||||
ValueMapT::iterator VecOp = VectorMap.find(*OI);
|
||||
|
||||
if (VecOp == VectorMap.end())
|
||||
continue;
|
||||
|
||||
HasVectorOperand = true;
|
||||
Value *NewVector = VecOp->second;
|
||||
|
||||
for (int i = 0; i < VectorWidth; ++i) {
|
||||
ValueMapT &SM = ScalarMaps[i];
|
||||
|
||||
// If there is one scalar extracted, all scalar elements should have
|
||||
// already been extracted by the code here. So no need to check for the
|
||||
// existance of all of them.
|
||||
if (SM.count(*OI))
|
||||
break;
|
||||
|
||||
SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
|
||||
}
|
||||
}
|
||||
|
||||
return HasVectorOperand;
|
||||
}
|
||||
|
||||
void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
|
||||
ValueMapT &VectorMap,
|
||||
VectorValueMapT &ScalarMaps) {
|
||||
bool HasVectorOperand;
|
||||
int VectorWidth = getVectorWidth();
|
||||
|
||||
HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
|
||||
|
||||
for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
|
||||
copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
|
||||
|
||||
if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
|
||||
return;
|
||||
|
||||
// Make the result available as vector value.
|
||||
VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
|
||||
Value *Vector = UndefValue::get(VectorType);
|
||||
|
||||
for (int i = 0; i < VectorWidth; i++)
|
||||
Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
|
||||
Builder.getInt32(i));
|
||||
|
||||
VectorMap[Inst] = Vector;
|
||||
}
|
||||
|
||||
int VectorBlockGenerator::getVectorWidth() {
|
||||
return GlobalMaps.size();
|
||||
}
|
||||
|
@ -805,11 +866,11 @@ void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
|
|||
return;
|
||||
}
|
||||
|
||||
llvm_unreachable("Cannot issue vector code for this instruction");
|
||||
// Falltrough: We generate scalar instructions, if we don't know how to
|
||||
// generate vector code.
|
||||
}
|
||||
|
||||
for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
|
||||
copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
|
||||
copyInstScalarized(Inst, VectorMap, ScalarMaps);
|
||||
}
|
||||
|
||||
void VectorBlockGenerator::copyBB() {
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -S %s | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@A = common global [1024 x float] zeroinitializer, align 16
|
||||
@B = common global [1024 x float] zeroinitializer, align 16
|
||||
|
||||
declare float @foo(float) readnone
|
||||
|
||||
define void @simple_vec_call() nounwind {
|
||||
entry:
|
||||
br label %body
|
||||
|
||||
body:
|
||||
%indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
|
||||
%scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
|
||||
%value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
|
||||
%result = tail call float @foo(float %value) nounwind
|
||||
store float %result, float* %scevgep, align 4
|
||||
%indvar_next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar_next, 4
|
||||
br i1 %exitcond, label %return, label %body
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
|
||||
; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
|
||||
; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
|
||||
; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
|
||||
; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
|
||||
; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
|
||||
; CHECK: %p_result = tail call float @foo(float %0) nounwind
|
||||
; CHECK: %p_result4 = tail call float @foo(float %1) nounwind
|
||||
; CHECK: %p_result5 = tail call float @foo(float %2) nounwind
|
||||
; CHECK: %p_result6 = tail call float @foo(float %3) nounwind
|
||||
; CHECK: %4 = insertelement <4 x float> undef, float %p_result, i32 0
|
||||
; CHECK: %5 = insertelement <4 x float> %4, float %p_result4, i32 1
|
||||
; CHECK: %6 = insertelement <4 x float> %5, float %p_result5, i32 2
|
||||
; CHECK: %7 = insertelement <4 x float> %6, float %p_result6, i32 3
|
||||
; CHECK: %vector_ptr = bitcast float* %p_scevgep to <4 x float>*
|
||||
; CHECK: store <4 x float> %7, <4 x float>* %vector_ptr, align 8
|
|
@ -0,0 +1,45 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@A = common global [1024 x float] zeroinitializer, align 16
|
||||
@B = common global [1024 x float**] zeroinitializer, align 16
|
||||
|
||||
declare float** @foo(float) readnone
|
||||
|
||||
define void @simple_vec_call() nounwind {
|
||||
entry:
|
||||
br label %body
|
||||
|
||||
body:
|
||||
%indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
|
||||
%scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
|
||||
%value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
|
||||
%result = tail call float** @foo(float %value) nounwind
|
||||
store float** %result, float*** %scevgep, align 4
|
||||
%indvar_next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar_next, 4
|
||||
br i1 %exitcond, label %return, label %body
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
|
||||
; CHECK: %p_scevgep1 = getelementptr [1024 x float**]* @B, i64 0, i64 1
|
||||
; CHECK: %p_scevgep2 = getelementptr [1024 x float**]* @B, i64 0, i64 2
|
||||
; CHECK: %p_scevgep3 = getelementptr [1024 x float**]* @B, i64 0, i64 3
|
||||
; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
|
||||
; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
|
||||
; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
|
||||
; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
|
||||
; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
|
||||
; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
|
||||
; CHECK: %p_result = tail call float** @foo(float %0) nounwind
|
||||
; CHECK: %p_result4 = tail call float** @foo(float %1) nounwind
|
||||
; CHECK: %p_result5 = tail call float** @foo(float %2) nounwind
|
||||
; CHECK: %p_result6 = tail call float** @foo(float %3) nounwind
|
||||
; CHECK: store float** %p_result, float*** %p_scevgep, align 4
|
||||
; CHECK: store float** %p_result4, float*** %p_scevgep1, align 4
|
||||
; CHECK: store float** %p_result5, float*** %p_scevgep2, align 4
|
||||
; CHECK: store float** %p_result6, float*** %p_scevgep3, align 4
|
|
@ -0,0 +1,38 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -S %s | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@A = common global [1024 x float**] zeroinitializer, align 16
|
||||
@B = common global [1024 x float**] zeroinitializer, align 16
|
||||
|
||||
declare float @foo(float) readnone
|
||||
|
||||
define void @simple_vec_call() nounwind {
|
||||
entry:
|
||||
br label %body
|
||||
|
||||
body:
|
||||
%indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
|
||||
%scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
|
||||
%value = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0), align 16
|
||||
store float** %value, float*** %scevgep, align 4
|
||||
%indvar_next = add i64 %indvar, 1
|
||||
%exitcond = icmp eq i64 %indvar_next, 4
|
||||
br i1 %exitcond, label %return, label %body
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
|
||||
; CHECK: %p_scevgep1 = getelementptr [1024 x float**]* @B, i64 0, i64 1
|
||||
; CHECK: %p_scevgep2 = getelementptr [1024 x float**]* @B, i64 0, i64 2
|
||||
; CHECK: %p_scevgep3 = getelementptr [1024 x float**]* @B, i64 0, i64 3
|
||||
; CHECK: %value_p_scalar_ = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
|
||||
; CHECK: %value_p_scalar_4 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
|
||||
; CHECK: %value_p_scalar_5 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
|
||||
; CHECK: %value_p_scalar_6 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
|
||||
; CHECK: store float** %value_p_scalar_, float*** %p_scevgep, align 4
|
||||
; CHECK: store float** %value_p_scalar_4, float*** %p_scevgep1, align 4
|
||||
; CHECK: store float** %value_p_scalar_5, float*** %p_scevgep2, align 4
|
||||
; CHECK: store float** %value_p_scalar_6, float*** %p_scevgep3, align 4
|
Loading…
Reference in New Issue