forked from OSchip/llvm-project
[LoopVectorize] propagate fast-math-flags from induction instructions
This code assumed that FP math was only permissable if it was fully "fast", so it hard-coded "fast" when creating new instructions. The underlying code already allows matching recurrences/reductions that are only "reassoc", so this change should prevent the potential miscompile seen in the test diffs (we created "fast" ops even though none existed in the original code). I don't know if we need to create the temporary IRBuilder objects used here, so that could be follow-up clean-up. There's an open question about whether we should require "nsz" in addition to "reassoc" here. InstCombine uses that combo for its reassociative folds, but I think codegen is not as strict.
This commit is contained in:
parent
51bd42ef9b
commit
1bee549737
|
@ -395,13 +395,6 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) {
|
|||
/// we always assume predicated blocks have a 50% chance of executing.
|
||||
static unsigned getReciprocalPredBlockProb() { return 2; }
|
||||
|
||||
/// A helper function that adds a 'fast' flag to floating-point operations.
|
||||
static Value *addFastMathFlag(Value *V) {
|
||||
if (isa<FPMathOperator>(V))
|
||||
cast<Instruction>(V)->setFastMathFlags(FastMathFlags::getFast());
|
||||
return V;
|
||||
}
|
||||
|
||||
/// A helper function that returns an integer or floating-point constant with
|
||||
/// value C.
|
||||
static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
|
||||
|
@ -2247,7 +2240,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
|
|||
// floating-point arithmetic as appropriate.
|
||||
Value *ConstVF =
|
||||
getSignedIntOrFpConstant(Step->getType(), VF.getKnownMinValue());
|
||||
Value *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, Step, ConstVF));
|
||||
Value *Mul = Builder.CreateBinOp(MulOp, Step, ConstVF);
|
||||
|
||||
// Create a vector splat to use in the induction update.
|
||||
//
|
||||
|
@ -2274,8 +2267,8 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
|
|||
recordVectorLoopValueForInductionCast(II, EntryVal, LastInduction, CastDef,
|
||||
State, Part);
|
||||
|
||||
LastInduction = cast<Instruction>(addFastMathFlag(
|
||||
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
|
||||
LastInduction = cast<Instruction>(
|
||||
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
|
||||
LastInduction->setDebugLoc(EntryVal->getDebugLoc());
|
||||
}
|
||||
|
||||
|
@ -2407,6 +2400,11 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
|
|||
}
|
||||
};
|
||||
|
||||
// Fast-math-flags propagate from the original induction instruction.
|
||||
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
|
||||
if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
|
||||
Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
|
||||
|
||||
// Now do the actual transformations, and start with creating the step value.
|
||||
Value *Step = CreateStepValue(ID.getStep());
|
||||
if (VF.isZero() || VF.isScalar()) {
|
||||
|
@ -2486,23 +2484,11 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
|
|||
Indices.push_back(ConstantFP::get(STy, (double)(StartIdx + i)));
|
||||
|
||||
// Add the consecutive indices to the vector value.
|
||||
// Floating-point operations inherit FMF via the builder's flags.
|
||||
Constant *Cv = ConstantVector::get(Indices);
|
||||
|
||||
Step = Builder.CreateVectorSplat(VLen, Step);
|
||||
|
||||
// Floating point operations had to be 'fast' to enable the induction.
|
||||
FastMathFlags Flags;
|
||||
Flags.setFast();
|
||||
|
||||
Value *MulOp = Builder.CreateFMul(Cv, Step);
|
||||
if (isa<Instruction>(MulOp))
|
||||
// Have to check, MulOp may be a constant
|
||||
cast<Instruction>(MulOp)->setFastMathFlags(Flags);
|
||||
|
||||
Value *BOp = Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
|
||||
if (isa<Instruction>(BOp))
|
||||
cast<Instruction>(BOp)->setFastMathFlags(Flags);
|
||||
return BOp;
|
||||
return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
|
||||
|
@ -2547,15 +2533,15 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
|
|||
createStepForVF(Builder, ConstantInt::get(IntStepTy, Part), VF);
|
||||
if (ScalarIVTy->isFloatingPointTy())
|
||||
StartIdx = Builder.CreateSIToFP(StartIdx, ScalarIVTy);
|
||||
StartIdx = addFastMathFlag(Builder.CreateBinOp(
|
||||
AddOp, StartIdx, getSignedIntOrFpConstant(ScalarIVTy, Lane)));
|
||||
StartIdx = Builder.CreateBinOp(
|
||||
AddOp, StartIdx, getSignedIntOrFpConstant(ScalarIVTy, Lane));
|
||||
// The step returned by `createStepForVF` is a runtime-evaluated value
|
||||
// when VF is scalable. Otherwise, it should be folded into a Constant.
|
||||
assert((VF.isScalable() || isa<Constant>(StartIdx)) &&
|
||||
"Expected StartIdx to be folded to a constant when VF is not "
|
||||
"scalable");
|
||||
auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
|
||||
auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
|
||||
auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
|
||||
auto *Add = Builder.CreateBinOp(AddOp, ScalarIV, Mul);
|
||||
State.set(Def, Add, VPIteration(Part, Lane));
|
||||
recordVectorLoopValueForInductionCast(ID, EntryVal, Add, CastDef, State,
|
||||
Part, Lane);
|
||||
|
@ -3325,6 +3311,7 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
|
|||
return LoopVectorBody->getTerminator();
|
||||
return &*B.GetInsertPoint();
|
||||
};
|
||||
|
||||
switch (ID.getKind()) {
|
||||
case InductionDescriptor::IK_IntInduction: {
|
||||
assert(Index->getType() == StartValue->getType() &&
|
||||
|
@ -3352,22 +3339,9 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
|
|||
"Original bin op should be defined for FP induction");
|
||||
|
||||
Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
|
||||
|
||||
// Floating point operations had to be 'fast' to enable the induction.
|
||||
FastMathFlags Flags;
|
||||
Flags.setFast();
|
||||
|
||||
Value *MulExp = B.CreateFMul(StepValue, Index);
|
||||
if (isa<Instruction>(MulExp))
|
||||
// We have to check, the MulExp may be a constant.
|
||||
cast<Instruction>(MulExp)->setFastMathFlags(Flags);
|
||||
|
||||
Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
|
||||
"induction");
|
||||
if (isa<Instruction>(BOp))
|
||||
cast<Instruction>(BOp)->setFastMathFlags(Flags);
|
||||
|
||||
return BOp;
|
||||
return B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
|
||||
"induction");
|
||||
}
|
||||
case InductionDescriptor::IK_NoInduction:
|
||||
return nullptr;
|
||||
|
@ -3454,6 +3428,11 @@ void InnerLoopVectorizer::createInductionResumeValues(
|
|||
EndValue = VectorTripCount;
|
||||
} else {
|
||||
IRBuilder<> B(L->getLoopPreheader()->getTerminator());
|
||||
|
||||
// Fast-math-flags propagate from the original induction instruction.
|
||||
if (II.getInductionBinOp() && isa<FPMathOperator>(II.getInductionBinOp()))
|
||||
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
|
||||
|
||||
Type *StepType = II.getStep()->getType();
|
||||
Instruction::CastOps CastOp =
|
||||
CastInst::getCastOpcode(VectorTripCount, true, StepType, true);
|
||||
|
@ -3675,6 +3654,11 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
|
|||
assert(isa<PHINode>(UI) && "Expected LCSSA form");
|
||||
|
||||
IRBuilder<> B(MiddleBlock->getTerminator());
|
||||
|
||||
// Fast-math-flags propagate from the original induction instruction.
|
||||
if (II.getInductionBinOp() && isa<FPMathOperator>(II.getInductionBinOp()))
|
||||
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
|
||||
|
||||
Value *CountMinusOne = B.CreateSub(
|
||||
CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1));
|
||||
Value *CMO =
|
||||
|
@ -7889,9 +7873,9 @@ Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step,
|
|||
if (Ty->isFloatingPointTy()) {
|
||||
Constant *C = ConstantFP::get(Ty, (double)StartIdx);
|
||||
|
||||
// Floating point operations had to be 'fast' to enable the unrolling.
|
||||
Value *MulOp = addFastMathFlag(Builder.CreateFMul(C, Step));
|
||||
return addFastMathFlag(Builder.CreateBinOp(BinOp, Val, MulOp));
|
||||
// Floating-point operations inherit FMF via the builder's flags.
|
||||
Value *MulOp = Builder.CreateFMul(C, Step);
|
||||
return Builder.CreateBinOp(BinOp, Val, MulOp);
|
||||
}
|
||||
Constant *C = ConstantInt::get(Ty, StartIdx);
|
||||
return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
|
||||
|
|
|
@ -105,8 +105,8 @@ for.end: ; preds = %for.end.loopexit, %
|
|||
ret void
|
||||
}
|
||||
|
||||
; FIXME: We do not need the full 'fast' FMF to vectorize the loop, but the code can't become
|
||||
; 'fast' spontaneously. Something is wrong with FMF expectations/propagation.
|
||||
; We do not need the full 'fast' FMF to vectorize the loop, but the code can't become
|
||||
; 'fast' spontaneously - FMF should propagate from the original IR.
|
||||
|
||||
define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i32 %N) {
|
||||
; VEC4_INTERL1-LABEL: @fp_iv_loop1_reassoc_FMF(
|
||||
|
@ -123,15 +123,15 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
|
|||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
|
||||
; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
|
||||
; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]]
|
||||
; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]]
|
||||
; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i32 0
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i32 0
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
|
||||
; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP4]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
|
||||
; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
|
||||
; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP4]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i32 0
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
|
@ -142,7 +142,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
|
|||
; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
|
||||
; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
|
||||
; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
|
||||
; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
|
||||
; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; VEC4_INTERL1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
|
||||
; VEC4_INTERL1: middle.block:
|
||||
|
@ -181,22 +181,22 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
|
|||
; VEC4_INTERL2: vector.ph:
|
||||
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
|
||||
; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
|
||||
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]]
|
||||
; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]]
|
||||
; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i32 0
|
||||
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i32 0
|
||||
; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
|
||||
; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP4]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
|
||||
; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP4]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i32 0
|
||||
; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; VEC4_INTERL2: vector.body:
|
||||
; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
|
||||
; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
|
||||
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
|
||||
|
@ -204,7 +204,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
|
|||
; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
|
||||
; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4
|
||||
; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
|
||||
; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
|
||||
; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
|
||||
; VEC4_INTERL2: middle.block:
|
||||
|
@ -265,15 +265,15 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
|
|||
; VEC2_INTERL1_PRED_STORE: vector.ph:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i32 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i32 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00>
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP4]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00>
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT]], [[TMP4]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
|
@ -284,7 +284,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
|
|||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>*
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
|
||||
; VEC2_INTERL1_PRED_STORE: middle.block:
|
||||
|
|
Loading…
Reference in New Issue