forked from OSchip/llvm-project
Revert "[SLP] Fix lookahead operand reordering for splat loads." due to build failures
This reverts commit 5efa78985b
.
This commit is contained in:
parent
511fa0800f
commit
9136145eb0
|
@ -658,10 +658,6 @@ public:
|
|||
/// Return true if the target supports nontemporal load.
|
||||
bool isLegalNTLoad(Type *DataType, Align Alignment) const;
|
||||
|
||||
/// \Returns true if the target supports broadcasting a load to a vector of
|
||||
/// type <NumElements x ElementTy>.
|
||||
bool isLegalBroadcastLoad(Type *ElementTy, unsigned NumElements) const;
|
||||
|
||||
/// Return true if the target supports masked scatter.
|
||||
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
|
||||
/// Return true if the target supports masked gather.
|
||||
|
@ -1048,14 +1044,11 @@ public:
|
|||
/// The exact mask may be passed as Mask, or else the array will be empty.
|
||||
/// The index and subtype parameters are used by the subvector insertion and
|
||||
/// extraction shuffle kinds to show the insert/extract point and the type of
|
||||
/// the subvector being inserted/extracted. The operands of the shuffle can be
|
||||
/// passed through \p Args, which helps improve the cost estimation in some
|
||||
/// cases, like in broadcast loads.
|
||||
/// the subvector being inserted/extracted.
|
||||
/// NOTE: For subvector extractions Tp represents the source type.
|
||||
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
|
||||
ArrayRef<int> Mask = None, int Index = 0,
|
||||
VectorType *SubTp = nullptr,
|
||||
ArrayRef<Value *> Args = None) const;
|
||||
VectorType *SubTp = nullptr) const;
|
||||
|
||||
/// Represents a hint about the context in which a cast is used.
|
||||
///
|
||||
|
@ -1556,8 +1549,6 @@ public:
|
|||
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
|
||||
virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
|
||||
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
|
||||
virtual bool isLegalBroadcastLoad(Type *ElementTy,
|
||||
unsigned NumElements) const = 0;
|
||||
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
|
||||
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
|
||||
virtual bool forceScalarizeMaskedGather(VectorType *DataType,
|
||||
|
@ -1668,8 +1659,7 @@ public:
|
|||
ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
|
||||
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
|
||||
ArrayRef<int> Mask, int Index,
|
||||
VectorType *SubTp,
|
||||
ArrayRef<Value *> Args) = 0;
|
||||
VectorType *SubTp) = 0;
|
||||
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||
Type *Src, CastContextHint CCH,
|
||||
TTI::TargetCostKind CostKind,
|
||||
|
@ -1962,10 +1952,6 @@ public:
|
|||
bool isLegalNTLoad(Type *DataType, Align Alignment) override {
|
||||
return Impl.isLegalNTLoad(DataType, Alignment);
|
||||
}
|
||||
bool isLegalBroadcastLoad(Type *ElementTy,
|
||||
unsigned NumElements) const override {
|
||||
return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
|
||||
}
|
||||
bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
|
||||
return Impl.isLegalMaskedScatter(DataType, Alignment);
|
||||
}
|
||||
|
@ -2193,9 +2179,8 @@ public:
|
|||
}
|
||||
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
|
||||
ArrayRef<int> Mask, int Index,
|
||||
VectorType *SubTp,
|
||||
ArrayRef<Value *> Args) override {
|
||||
return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp, Args);
|
||||
VectorType *SubTp) override {
|
||||
return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
|
||||
}
|
||||
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
CastContextHint CCH,
|
||||
|
|
|
@ -256,10 +256,6 @@ public:
|
|||
return Alignment >= DataSize && isPowerOf2_32(DataSize);
|
||||
}
|
||||
|
||||
bool isLegalBroadcastLoad(Type *ElementTy, unsigned NumElements) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
|
||||
return false;
|
||||
}
|
||||
|
@ -492,8 +488,7 @@ public:
|
|||
|
||||
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
|
||||
ArrayRef<int> Mask, int Index,
|
||||
VectorType *SubTp,
|
||||
ArrayRef<Value *> Args = None) const {
|
||||
VectorType *SubTp) const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -871,8 +871,7 @@ public:
|
|||
|
||||
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
|
||||
ArrayRef<int> Mask, int Index,
|
||||
VectorType *SubTp,
|
||||
ArrayRef<Value *> Args = None) {
|
||||
VectorType *SubTp) {
|
||||
|
||||
switch (improveShuffleKindFromMask(Kind, Mask)) {
|
||||
case TTI::SK_Broadcast:
|
||||
|
|
|
@ -396,11 +396,6 @@ bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
|
|||
return TTIImpl->isLegalNTLoad(DataType, Alignment);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isLegalBroadcastLoad(Type *ElementTy,
|
||||
unsigned NumElements) const {
|
||||
return TTIImpl->isLegalBroadcastLoad(ElementTy, NumElements);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isLegalMaskedGather(Type *DataType,
|
||||
Align Alignment) const {
|
||||
return TTIImpl->isLegalMaskedGather(DataType, Alignment);
|
||||
|
@ -745,11 +740,12 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost(
|
|||
return Cost;
|
||||
}
|
||||
|
||||
InstructionCost TargetTransformInfo::getShuffleCost(
|
||||
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, int Index,
|
||||
VectorType *SubTp, ArrayRef<Value *> Args) const {
|
||||
InstructionCost Cost =
|
||||
TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp, Args);
|
||||
InstructionCost TargetTransformInfo::getShuffleCost(ShuffleKind Kind,
|
||||
VectorType *Ty,
|
||||
ArrayRef<int> Mask,
|
||||
int Index,
|
||||
VectorType *SubTp) const {
|
||||
InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp);
|
||||
assert(Cost >= 0 && "TTI should not produce negative costs!");
|
||||
return Cost;
|
||||
}
|
||||
|
|
|
@ -1085,8 +1085,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
|
|||
InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||
VectorType *BaseTp,
|
||||
ArrayRef<int> Mask, int Index,
|
||||
VectorType *SubTp,
|
||||
ArrayRef<Value *> Args) {
|
||||
VectorType *SubTp) {
|
||||
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
|
||||
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
|
||||
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
|
||||
|
@ -1546,27 +1545,9 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
|||
{ TTI::SK_PermuteTwoSrc, MVT::v16i8, 13 }, // blend+permute
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE3BroadcastLoadTbl[] = {
|
||||
{TTI::SK_Broadcast, MVT::v2f64, 0}, // broadcast handled by movddup
|
||||
};
|
||||
|
||||
if (ST->hasSSE2()) {
|
||||
bool IsLoad = !Args.empty() && llvm::all_of(Args, [](const Value *V) {
|
||||
return isa<LoadInst>(V);
|
||||
});
|
||||
if (ST->hasSSE3() && IsLoad)
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
|
||||
assert(isLegalBroadcastLoad(
|
||||
BaseTp->getElementType(),
|
||||
cast<FixedVectorType>(BaseTp)->getNumElements()) &&
|
||||
"Table entry missing from isLegalBroadcastLoad()");
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
if (ST->hasSSE2())
|
||||
if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry SSE1ShuffleTbl[] = {
|
||||
{ TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps
|
||||
|
@ -5131,13 +5112,6 @@ bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool X86TTIImpl::isLegalBroadcastLoad(Type *ElementTy,
|
||||
unsigned NumElements) const {
|
||||
// movddup
|
||||
return ST->hasSSSE3() && NumElements == 2 &&
|
||||
ElementTy == Type::getDoubleTy(ElementTy->getContext());
|
||||
}
|
||||
|
||||
bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) {
|
||||
if (!isa<VectorType>(DataTy))
|
||||
return false;
|
||||
|
|
|
@ -131,7 +131,7 @@ public:
|
|||
const Instruction *CxtI = nullptr);
|
||||
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
|
||||
ArrayRef<int> Mask, int Index,
|
||||
VectorType *SubTp, ArrayRef<Value *> = None);
|
||||
VectorType *SubTp);
|
||||
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
TTI::CastContextHint CCH,
|
||||
TTI::TargetCostKind CostKind,
|
||||
|
@ -226,7 +226,6 @@ public:
|
|||
bool isLegalMaskedStore(Type *DataType, Align Alignment);
|
||||
bool isLegalNTLoad(Type *DataType, Align Alignment);
|
||||
bool isLegalNTStore(Type *DataType, Align Alignment);
|
||||
bool isLegalBroadcastLoad(Type *ElementTy, unsigned NumElements) const;
|
||||
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment);
|
||||
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
|
||||
return forceScalarizeMaskedGather(VTy, Alignment);
|
||||
|
|
|
@ -1142,11 +1142,6 @@ public:
|
|||
|
||||
/// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]).
|
||||
static const int ScoreConsecutiveLoads = 4;
|
||||
/// The same load multiple times. This should have a better score than
|
||||
/// `ScoreSplat` because it in x86 for a 2-lane vector we can represent it
|
||||
/// with `movddup (%reg), xmm0` which has a throughput of 0.5 versus 0.5 for
|
||||
/// a vector load and 1.0 for a broadcast.
|
||||
static const int ScoreSplatLoads = 3;
|
||||
/// Loads from reversed memory addresses, e.g. load(A[i+1]), load(A[i]).
|
||||
static const int ScoreReversedLoads = 3;
|
||||
/// ExtractElementInst from same vector and consecutive indexes.
|
||||
|
@ -1173,18 +1168,9 @@ public:
|
|||
/// MainAltOps.
|
||||
static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL,
|
||||
ScalarEvolution &SE, int NumLanes,
|
||||
ArrayRef<Value *> MainAltOps,
|
||||
const TargetTransformInfo *TTI) {
|
||||
if (V1 == V2) {
|
||||
if (isa<LoadInst>(V1)) {
|
||||
// A broadcast of a load can be cheaper on some targets.
|
||||
// TODO: For now accept a broadcast load with no other internal uses.
|
||||
if (TTI->isLegalBroadcastLoad(V1->getType(), NumLanes) &&
|
||||
(int)V1->getNumUses() == NumLanes)
|
||||
return VLOperands::ScoreSplatLoads;
|
||||
}
|
||||
ArrayRef<Value *> MainAltOps) {
|
||||
if (V1 == V2)
|
||||
return VLOperands::ScoreSplat;
|
||||
}
|
||||
|
||||
auto *LI1 = dyn_cast<LoadInst>(V1);
|
||||
auto *LI2 = dyn_cast<LoadInst>(V2);
|
||||
|
@ -1363,7 +1349,7 @@ public:
|
|||
|
||||
// Get the shallow score of V1 and V2.
|
||||
int ShallowScoreAtThisLevel =
|
||||
getShallowScore(LHS, RHS, DL, SE, getNumLanes(), MainAltOps, R.TTI);
|
||||
getShallowScore(LHS, RHS, DL, SE, getNumLanes(), MainAltOps);
|
||||
|
||||
// If reached MaxLevel,
|
||||
// or if V1 and V2 are not instructions,
|
||||
|
@ -5252,9 +5238,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
|||
// broadcast.
|
||||
assert(VecTy == FinalVecTy &&
|
||||
"No reused scalars expected for broadcast.");
|
||||
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
|
||||
/*Mask=*/None, /*Index=*/0,
|
||||
/*SubTp=*/nullptr, /*Args=*/VL);
|
||||
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
|
||||
}
|
||||
InstructionCost ReuseShuffleCost = 0;
|
||||
if (NeedToShuffleReuses)
|
||||
|
|
|
@ -591,8 +591,8 @@ define void @ChecksExtractScores_different_vectors(double* %storeArray, double*
|
|||
; CHECK-LABEL: @ChecksExtractScores_different_vectors(
|
||||
; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
|
||||
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
|
||||
; CHECK-NEXT: [[LOADA0:%.*]] = load double, double* [[IDX0]], align 4
|
||||
; CHECK-NEXT: [[LOADA1:%.*]] = load double, double* [[IDX1]], align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4
|
||||
; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
|
||||
; CHECK-NEXT: [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0
|
||||
|
@ -601,21 +601,18 @@ define void @ChecksExtractScores_different_vectors(double* %storeArray, double*
|
|||
; CHECK-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
|
||||
; CHECK-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
|
||||
; CHECK-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[EXTRA1]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[LOADA0]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRA1]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRB0]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]]
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[LOADA1]], i32 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP7]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP5]], [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[SHUFFLE]], [[TMP8]]
|
||||
; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
|
||||
; CHECK-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%idx0 = getelementptr inbounds double, double* %array, i64 0
|
||||
|
@ -657,18 +654,19 @@ define double @splat_loads(double *%array1, double *%array2, double *%ptrA, doub
|
|||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0
|
||||
; CHECK-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1
|
||||
; CHECK-NEXT: [[LD_2_0:%.*]] = load double, double* [[GEP_2_0]], align 8
|
||||
; CHECK-NEXT: [[LD_2_1:%.*]] = load double, double* [[GEP_2_1]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
|
||||
; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[TMP9]], [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[GEP_2_0]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 1
|
||||
; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[TMP11]], [[TMP12]]
|
||||
; CHECK-NEXT: ret double [[ADD3]]
|
||||
;
|
||||
entry:
|
||||
|
|
|
@ -253,16 +253,13 @@ define void @vecload_vs_broadcast4(double * noalias %from, double * noalias %to,
|
|||
; CHECK-NEXT: br label [[LP:%.*]]
|
||||
; CHECK: lp:
|
||||
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
|
||||
; CHECK-NEXT: [[V0_1:%.*]] = load double, double* [[FROM]], align 4
|
||||
; CHECK-NEXT: [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i64 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[SHUFFLE]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: br i1 undef, label [[LP]], label [[EXT:%.*]]
|
||||
; CHECK: ext:
|
||||
; CHECK-NEXT: ret void
|
||||
|
@ -309,16 +306,13 @@ define void @shuffle_nodes_match2(double * noalias %from, double * noalias %to,
|
|||
; CHECK-NEXT: br label [[LP:%.*]]
|
||||
; CHECK: lp:
|
||||
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
|
||||
; CHECK-NEXT: [[V0_1:%.*]] = load double, double* [[FROM]], align 4
|
||||
; CHECK-NEXT: [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i64 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[P]], i64 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[SHUFFLE]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: br i1 undef, label [[LP]], label [[EXT:%.*]]
|
||||
; CHECK: ext:
|
||||
; CHECK-NEXT: ret void
|
||||
|
|
Loading…
Reference in New Issue