diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c44a34fe6673..955134077bb6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -585,6 +585,11 @@ protected: /// Returns (and creates if needed) the trip count of the widened loop. Value *getOrCreateVectorTripCount(Loop *NewLoop); + /// Returns a bitcasted value to the requested vector type. + /// Also handles bitcasts of vector <-> vector types. + Value *createBitOrPointerCast(Value *V, VectorType *DstVTy, + const DataLayout &DL); + /// Emit a bypass check to see if the vector trip count is zero, including if /// it overflows. void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass); @@ -2866,6 +2871,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { if (Instr != Group->getInsertPos()) return; + const DataLayout &DL = Instr->getModule()->getDataLayout(); Value *Ptr = getPointerOperand(Instr); // Prepare for the vector type of the interleaved load/store. @@ -2940,7 +2946,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { // If this member has different type, cast the result type. if (Member->getType() != ScalarTy) { VectorType *OtherVTy = VectorType::get(Member->getType(), VF); - StridedVec = Builder.CreateBitOrPointerCast(StridedVec, OtherVTy); + StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL); } if (Group->isReverse()) @@ -2969,9 +2975,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { if (Group->isReverse()) StoredVec = reverseVector(StoredVec); - // If this member has different type, cast it to an unified type. + // If this member has different type, cast it to a unified type. + if (StoredVec->getType() != SubVT) - StoredVec = Builder.CreateBitOrPointerCast(StoredVec, SubVT); + StoredVec = createBitOrPointerCast(StoredVec, SubVT, DL); StoredVecs.push_back(StoredVec); } @@ -3272,6 +3279,36 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { return VectorTripCount; } +Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy, + const DataLayout &DL) { + // Verify that V is a vector type with same number of elements as DstVTy. + unsigned VF = DstVTy->getNumElements(); + VectorType *SrcVecTy = cast(V->getType()); + assert((VF == SrcVecTy->getNumElements()) && "Vector dimensions do not match"); + Type *SrcElemTy = SrcVecTy->getElementType(); + Type *DstElemTy = DstVTy->getElementType(); + assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) && + "Vector elements must have same size"); + + // Do a direct cast if element types are castable. + if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) { + return Builder.CreateBitOrPointerCast(V, DstVTy); + } + // V cannot be directly casted to desired vector type. + // May happen when V is a floating point vector but DstVTy is a vector of + // pointers or vice-versa. Handle this using a two-step bitcast using an + // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float. + assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) && + "Only one type should be a pointer type"); + assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) && + "Only one type should be a floating point type"); + Type *IntTy = + IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy)); + VectorType *VecIntTy = VectorType::get(IntTy, VF); + Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy); + return Builder.CreateBitOrPointerCast(CastVal, DstVTy); +} + void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass) { Value *Count = getOrCreateTripCount(L); diff --git a/llvm/test/CodeGen/AArch64/loopvectorize_pr33804_double.ll b/llvm/test/CodeGen/AArch64/loopvectorize_pr33804_double.ll new file mode 100644 index 000000000000..7468784f38dd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/loopvectorize_pr33804_double.ll @@ -0,0 +1,66 @@ +; RUN: opt -loop-vectorize -S < %s | FileCheck %s + +; These tests check that we don't crash if vectorizer decides to cast +; a double value to be stored into a pointer type or vice-versa. + +; This test checks when a double value is stored into a pointer type. + +; ModuleID = 'bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-26dbd81.bc" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +%struct.CvNode1D = type { double, %struct.CvNode1D* } + +; CHECK-LABEL: @cvCalcEMD2 +; CHECK: vector.body +; CHECK: store <{{[0-9]+}} x %struct.CvNode1D*> +define void @cvCalcEMD2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + br label %for.body14.i.i + +for.body14.i.i: ; preds = %for.body14.i.i, %entry + %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ] + %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i + %val.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* %arrayidx15.i.i1427, i32 0, i32 0 + store double 0xC415AF1D80000000, double* %val.i.i, align 4 + %next19.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i, i32 1 + store %struct.CvNode1D* undef, %struct.CvNode1D** %next19.i.i, align 4 + %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1 + %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0 + br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i + +for.end22.i.i: ; preds = %for.body14.i.i + unreachable +} + +; This test checks when a pointer value is stored into a double type. + +%struct.CvNode1D2 = type { %struct.CvNode1D2*, double } + +; CHECK-LABEL: @cvCalcEMD2_2 +; CHECK: vector.body +; CHECK: store <{{[0-9]+}} x double> +define void @cvCalcEMD2_2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + br label %for.body14.i.i + +for.body14.i.i: ; preds = %for.body14.i.i, %entry + %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ] + %next19.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i, i32 0 + store %struct.CvNode1D2* undef, %struct.CvNode1D2** %next19.i.i, align 4 + %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i + %val.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* %arrayidx15.i.i1427, i32 0, i32 1 + store double 0xC415AF1D80000000, double* %val.i.i, align 4 + %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1 + %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0 + br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i + +for.end22.i.i: ; preds = %for.body14.i.i + unreachable +} + +declare i32 @__gxx_personality_v0(...) + +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + diff --git a/llvm/test/CodeGen/ARM/loopvectorize_pr33804.ll b/llvm/test/CodeGen/ARM/loopvectorize_pr33804.ll new file mode 100644 index 000000000000..a218af3306d8 --- /dev/null +++ b/llvm/test/CodeGen/ARM/loopvectorize_pr33804.ll @@ -0,0 +1,66 @@ +; RUN: opt -loop-vectorize -S < %s | FileCheck %s + +; These tests check that we don't crash if vectorizer decides to cast +; a float value to be stored into a pointer type or vice-versa. + +; This test checks when a float value is stored into a pointer type. + +; ModuleID = 'bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-26dbd81.bc" +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7-unknown-linux-gnueabihf" + +%struct.CvNode1D = type { float, %struct.CvNode1D* } + +; CHECK-LABEL: @cvCalcEMD2 +; CHECK: vector.body +; CHECK: store <{{[0-9]+}} x %struct.CvNode1D*> +define void @cvCalcEMD2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + br label %for.body14.i.i + +for.body14.i.i: ; preds = %for.body14.i.i, %entry + %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ] + %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i + %val.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* %arrayidx15.i.i1427, i32 0, i32 0 + store float 0xC415AF1D80000000, float* %val.i.i, align 4 + %next19.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i, i32 1 + store %struct.CvNode1D* undef, %struct.CvNode1D** %next19.i.i, align 4 + %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1 + %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0 + br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i + +for.end22.i.i: ; preds = %for.body14.i.i + unreachable +} + +; This test checks when a pointer value is stored into a float type. + +%struct.CvNode1D2 = type { %struct.CvNode1D2*, float } + +; CHECK-LABEL: @cvCalcEMD2_2 +; CHECK: vector.body +; CHECK: store <{{[0-9]+}} x float> +define void @cvCalcEMD2_2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + br label %for.body14.i.i + +for.body14.i.i: ; preds = %for.body14.i.i, %entry + %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ] + %next19.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i, i32 0 + store %struct.CvNode1D2* undef, %struct.CvNode1D2** %next19.i.i, align 4 + %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i + %val.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* %arrayidx15.i.i1427, i32 0, i32 1 + store float 0xC415AF1D80000000, float* %val.i.i, align 4 + %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1 + %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0 + br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i + +for.end22.i.i: ; preds = %for.body14.i.i + unreachable +} + +declare i32 @__gxx_personality_v0(...) + +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+dsp,+neon,+vfp3,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } +