forked from OSchip/llvm-project
[AArch64][CostModel] Add cost model for experimental.vector.splice
This patch adds a new ShuffleKind SK_Splice and then handle the cost in getShuffleCost, as in experimental.vector.reverse. Differential Revision: https://reviews.llvm.org/D104630
This commit is contained in:
parent
9ab99f773f
commit
a2c5c56055
|
@ -858,8 +858,11 @@ public:
|
||||||
SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
|
SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
|
||||||
SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
|
SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
|
||||||
///< with any shuffle mask.
|
///< with any shuffle mask.
|
||||||
SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
|
SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
|
||||||
///< shuffle mask.
|
///< shuffle mask.
|
||||||
|
SK_Splice ///< Concatenates elements from the first input vector
|
||||||
|
///< with elements of the second input vector. Returning
|
||||||
|
///< a vector of the same type as the input vectors.
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Kind of the reduction data.
|
/// Kind of the reduction data.
|
||||||
|
|
|
@ -825,6 +825,7 @@ public:
|
||||||
case TTI::SK_Transpose:
|
case TTI::SK_Transpose:
|
||||||
case TTI::SK_InsertSubvector:
|
case TTI::SK_InsertSubvector:
|
||||||
case TTI::SK_ExtractSubvector:
|
case TTI::SK_ExtractSubvector:
|
||||||
|
case TTI::SK_Splice:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return Kind;
|
return Kind;
|
||||||
|
@ -838,6 +839,7 @@ public:
|
||||||
case TTI::SK_Broadcast:
|
case TTI::SK_Broadcast:
|
||||||
return getBroadcastShuffleOverhead(cast<FixedVectorType>(Tp));
|
return getBroadcastShuffleOverhead(cast<FixedVectorType>(Tp));
|
||||||
case TTI::SK_Select:
|
case TTI::SK_Select:
|
||||||
|
case TTI::SK_Splice:
|
||||||
case TTI::SK_Reverse:
|
case TTI::SK_Reverse:
|
||||||
case TTI::SK_Transpose:
|
case TTI::SK_Transpose:
|
||||||
case TTI::SK_PermuteSingleSrc:
|
case TTI::SK_PermuteSingleSrc:
|
||||||
|
@ -1371,6 +1373,12 @@ public:
|
||||||
cast<VectorType>(Args[0]->getType()), None,
|
cast<VectorType>(Args[0]->getType()), None,
|
||||||
0, cast<VectorType>(RetTy));
|
0, cast<VectorType>(RetTy));
|
||||||
}
|
}
|
||||||
|
case Intrinsic::experimental_vector_splice: {
|
||||||
|
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
|
||||||
|
return thisT()->getShuffleCost(TTI::SK_Splice,
|
||||||
|
cast<VectorType>(Args[0]->getType()), None,
|
||||||
|
Index, cast<VectorType>(RetTy));
|
||||||
|
}
|
||||||
case Intrinsic::vector_reduce_add:
|
case Intrinsic::vector_reduce_add:
|
||||||
case Intrinsic::vector_reduce_mul:
|
case Intrinsic::vector_reduce_mul:
|
||||||
case Intrinsic::vector_reduce_and:
|
case Intrinsic::vector_reduce_and:
|
||||||
|
|
|
@ -18508,6 +18508,10 @@ bool AArch64TargetLowering::isAllActivePredicate(SDValue N) const {
|
||||||
return ::isAllActivePredicate(N);
|
return ::isAllActivePredicate(N);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const {
|
||||||
|
return ::getPromotedVTForPredicate(VT);
|
||||||
|
}
|
||||||
|
|
||||||
bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
|
bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
|
||||||
SDValue Op, const APInt &OriginalDemandedBits,
|
SDValue Op, const APInt &OriginalDemandedBits,
|
||||||
const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
|
const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
|
||||||
|
|
|
@ -822,6 +822,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isAllActivePredicate(SDValue N) const;
|
bool isAllActivePredicate(SDValue N) const;
|
||||||
|
EVT getPromotedVTForPredicate(EVT VT) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Keep a pointer to the AArch64Subtarget around so that we can
|
/// Keep a pointer to the AArch64Subtarget around so that we can
|
||||||
|
|
|
@ -839,6 +839,7 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||||
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 1 },
|
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 1 },
|
||||||
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 3 },
|
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 3 },
|
||||||
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 5 },
|
{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 5 },
|
||||||
|
{ ISD::TRUNCATE, MVT::nxv16i1, MVT::nxv16i8, 1 },
|
||||||
{ ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 1 },
|
{ ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 1 },
|
||||||
{ ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 1 },
|
{ ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 1 },
|
||||||
{ ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 1 },
|
{ ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 1 },
|
||||||
|
@ -1897,6 +1898,55 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||||
CostKind);
|
CostKind);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
|
||||||
|
static const CostTblEntry ShuffleTbl[] = {
|
||||||
|
{ TTI::SK_Splice, MVT::nxv16i8, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv8i16, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv4i32, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv2i64, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv2f16, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv4f16, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv8f16, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv2bf16, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv4bf16, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv8bf16, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv2f32, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv4f32, 1 },
|
||||||
|
{ TTI::SK_Splice, MVT::nxv2f64, 1 },
|
||||||
|
};
|
||||||
|
|
||||||
|
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||||
|
Type *LegalVTy = EVT(LT.second).getTypeForEVT(Tp->getContext());
|
||||||
|
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
||||||
|
EVT PromotedVT = LT.second.getScalarType() == MVT::i1
|
||||||
|
? TLI->getPromotedVTForPredicate(EVT(LT.second))
|
||||||
|
: LT.second;
|
||||||
|
Type *PromotedVTy = EVT(PromotedVT).getTypeForEVT(Tp->getContext());
|
||||||
|
InstructionCost LegalizationCost = 0;
|
||||||
|
if (Index < 0) {
|
||||||
|
LegalizationCost =
|
||||||
|
getCmpSelInstrCost(Instruction::ICmp, PromotedVTy, PromotedVTy,
|
||||||
|
CmpInst::BAD_ICMP_PREDICATE, CostKind) +
|
||||||
|
getCmpSelInstrCost(Instruction::Select, PromotedVTy, LegalVTy,
|
||||||
|
CmpInst::BAD_ICMP_PREDICATE, CostKind);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Predicated splice are promoted when lowering. See AArch64ISelLowering.cpp
|
||||||
|
// Cost performed on a promoted type.
|
||||||
|
if (LT.second.getScalarType() == MVT::i1) {
|
||||||
|
LegalizationCost +=
|
||||||
|
getCastInstrCost(Instruction::ZExt, PromotedVTy, LegalVTy,
|
||||||
|
TTI::CastContextHint::None, CostKind) +
|
||||||
|
getCastInstrCost(Instruction::Trunc, LegalVTy, PromotedVTy,
|
||||||
|
TTI::CastContextHint::None, CostKind);
|
||||||
|
}
|
||||||
|
const auto *Entry =
|
||||||
|
CostTableLookup(ShuffleTbl, TTI::SK_Splice, PromotedVT.getSimpleVT());
|
||||||
|
assert(Entry && "Illegal Type for Splice");
|
||||||
|
LegalizationCost += Entry->Cost;
|
||||||
|
return LegalizationCost * LT.first;
|
||||||
|
}
|
||||||
|
|
||||||
InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
VectorType *Tp,
|
VectorType *Tp,
|
||||||
ArrayRef<int> Mask, int Index,
|
ArrayRef<int> Mask, int Index,
|
||||||
|
@ -1993,6 +2043,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
||||||
if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
|
if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
|
||||||
return LT.first * Entry->Cost;
|
return LT.first * Entry->Cost;
|
||||||
}
|
}
|
||||||
|
if (Kind == TTI::SK_Splice && isa<ScalableVectorType>(Tp))
|
||||||
|
return getSpliceCost(Tp, Index);
|
||||||
return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
|
return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
|
||||||
}
|
}
|
||||||
|
|
|
@ -166,6 +166,8 @@ public:
|
||||||
bool IsPairwiseForm,
|
bool IsPairwiseForm,
|
||||||
TTI::TargetCostKind CostKind);
|
TTI::TargetCostKind CostKind);
|
||||||
|
|
||||||
|
InstructionCost getSpliceCost(VectorType *Tp, int Index);
|
||||||
|
|
||||||
InstructionCost getArithmeticInstrCost(
|
InstructionCost getArithmeticInstrCost(
|
||||||
unsigned Opcode, Type *Ty,
|
unsigned Opcode, Type *Ty,
|
||||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
||||||
|
|
|
@ -0,0 +1,94 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
|
||||||
|
; RUN: opt < %s -analyze -cost-model -S -mtriple=aarch64--linux-gnu | FileCheck %s
|
||||||
|
|
||||||
|
define void @vector_splice() #0 {
|
||||||
|
; CHECK-LABEL: 'vector_splice'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %splice.v16i8 = call <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %splice.v32i8 = call <32 x i8> @llvm.experimental.vector.splice.v32i8(<32 x i8> zeroinitializer, <32 x i8> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2i16 = call <2 x i16> @llvm.experimental.vector.splice.v2i16(<2 x i16> zeroinitializer, <2 x i16> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4i16 = call <4 x i16> @llvm.experimental.vector.splice.v4i16(<4 x i16> zeroinitializer, <4 x i16> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %splice.v8i16 = call <8 x i16> @llvm.experimental.vector.splice.v8i16(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %splice.v16i16 = call <16 x i16> @llvm.experimental.vector.splice.v16i16(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4i32 = call <4 x i32> @llvm.experimental.vector.splice.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %splice.v8i32 = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2i64 = call <2 x i64> @llvm.experimental.vector.splice.v2i64(<2 x i64> zeroinitializer, <2 x i64> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %splice.v4i64 = call <4 x i64> @llvm.experimental.vector.splice.v4i64(<4 x i64> zeroinitializer, <4 x i64> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2f16 = call <2 x half> @llvm.experimental.vector.splice.v2f16(<2 x half> zeroinitializer, <2 x half> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4f16 = call <4 x half> @llvm.experimental.vector.splice.v4f16(<4 x half> zeroinitializer, <4 x half> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %splice.v8f16 = call <8 x half> @llvm.experimental.vector.splice.v8f16(<8 x half> zeroinitializer, <8 x half> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %splice.v16f16 = call <16 x half> @llvm.experimental.vector.splice.v16f16(<16 x half> zeroinitializer, <16 x half> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2f32 = call <2 x float> @llvm.experimental.vector.splice.v2f32(<2 x float> zeroinitializer, <2 x float> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4f32 = call <4 x float> @llvm.experimental.vector.splice.v4f32(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %splice.v8f32 = call <8 x float> @llvm.experimental.vector.splice.v8f32(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2f64 = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %splice.v4f64 = call <4 x double> @llvm.experimental.vector.splice.v4f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2bf16 = call <2 x bfloat> @llvm.experimental.vector.splice.v2bf16(<2 x bfloat> zeroinitializer, <2 x bfloat> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4bf16 = call <4 x bfloat> @llvm.experimental.vector.splice.v4bf16(<4 x bfloat> zeroinitializer, <4 x bfloat> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %splice.v8bf16 = call <8 x bfloat> @llvm.experimental.vector.splice.v8bf16(<8 x bfloat> zeroinitializer, <8 x bfloat> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %splice.v16bf16 = call <16 x bfloat> @llvm.experimental.vector.splice.v16bf16(<16 x bfloat> zeroinitializer, <16 x bfloat> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %splice.v16i1 = call <16 x i1> @llvm.experimental.vector.splice.v16i1(<16 x i1> zeroinitializer, <16 x i1> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %splice.v8i1 = call <8 x i1> @llvm.experimental.vector.splice.v8i1(<8 x i1> zeroinitializer, <8 x i1> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %splice.v4i1 = call <4 x i1> @llvm.experimental.vector.splice.v4i1(<4 x i1> zeroinitializer, <4 x i1> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice.v2i1 = call <2 x i1> @llvm.experimental.vector.splice.v2i1(<2 x i1> zeroinitializer, <2 x i1> zeroinitializer, i32 1)
|
||||||
|
;
|
||||||
|
%splice.v16i8 = call <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer, i32 1)
|
||||||
|
%splice.v32i8 = call <32 x i8> @llvm.experimental.vector.splice.v32i8(<32 x i8> zeroinitializer, <32 x i8> zeroinitializer, i32 1)
|
||||||
|
%splice.v2i16 = call <2 x i16> @llvm.experimental.vector.splice.v2i16(<2 x i16> zeroinitializer, <2 x i16> zeroinitializer, i32 1)
|
||||||
|
%splice.v4i16 = call <4 x i16> @llvm.experimental.vector.splice.v4i16(<4 x i16> zeroinitializer, <4 x i16> zeroinitializer, i32 1)
|
||||||
|
%splice.v8i16 = call <8 x i16> @llvm.experimental.vector.splice.v8i16(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer, i32 1)
|
||||||
|
%splice.v16i16 = call <16 x i16> @llvm.experimental.vector.splice.v16i16(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer, i32 1)
|
||||||
|
%splice.v4i32 = call <4 x i32> @llvm.experimental.vector.splice.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 1)
|
||||||
|
%splice.v8i32 = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer, i32 1)
|
||||||
|
%splice.v2i64 = call <2 x i64> @llvm.experimental.vector.splice.v2i64(<2 x i64> zeroinitializer, <2 x i64> zeroinitializer, i32 1)
|
||||||
|
%splice.v4i64 = call <4 x i64> @llvm.experimental.vector.splice.v4i64(<4 x i64> zeroinitializer, <4 x i64> zeroinitializer, i32 1)
|
||||||
|
%splice.v2f16 = call <2 x half> @llvm.experimental.vector.splice.v2f16(<2 x half> zeroinitializer, <2 x half> zeroinitializer, i32 1)
|
||||||
|
%splice.v4f16 = call <4 x half> @llvm.experimental.vector.splice.v4f16(<4 x half> zeroinitializer, <4 x half> zeroinitializer, i32 1)
|
||||||
|
%splice.v8f16 = call <8 x half> @llvm.experimental.vector.splice.v8f16(<8 x half> zeroinitializer, <8 x half> zeroinitializer, i32 1)
|
||||||
|
%splice.v16f16 = call <16 x half> @llvm.experimental.vector.splice.v16f16(<16 x half> zeroinitializer, <16 x half> zeroinitializer, i32 1)
|
||||||
|
%splice.v2f32 = call <2 x float> @llvm.experimental.vector.splice.v2f32(<2 x float> zeroinitializer, <2 x float> zeroinitializer, i32 1)
|
||||||
|
%splice.v4f32 = call <4 x float> @llvm.experimental.vector.splice.v4f32(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i32 1)
|
||||||
|
%splice.v8f32 = call <8 x float> @llvm.experimental.vector.splice.v8f32(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i32 1)
|
||||||
|
%splice.v2f64 = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, i32 1)
|
||||||
|
%splice.v4f64 = call <4 x double> @llvm.experimental.vector.splice.v4f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i32 1)
|
||||||
|
%splice.v2bf16 = call <2 x bfloat> @llvm.experimental.vector.splice.v2bf16(<2 x bfloat> zeroinitializer, <2 x bfloat> zeroinitializer, i32 1)
|
||||||
|
%splice.v4bf16 = call <4 x bfloat> @llvm.experimental.vector.splice.v4bf16(<4 x bfloat> zeroinitializer, <4 x bfloat> zeroinitializer, i32 1)
|
||||||
|
%splice.v8bf16 = call <8 x bfloat> @llvm.experimental.vector.splice.v8bf16(<8 x bfloat> zeroinitializer, <8 x bfloat> zeroinitializer, i32 1)
|
||||||
|
%splice.v16bf16 = call <16 x bfloat> @llvm.experimental.vector.splice.v16bf16(<16 x bfloat> zeroinitializer, <16 x bfloat> zeroinitializer, i32 1)
|
||||||
|
%splice.v16i1 = call <16 x i1> @llvm.experimental.vector.splice.v16i1(<16 x i1> zeroinitializer, <16 x i1> zeroinitializer, i32 1)
|
||||||
|
%splice.v8i1 = call <8 x i1> @llvm.experimental.vector.splice.v8i1(<8 x i1> zeroinitializer, <8 x i1> zeroinitializer, i32 1)
|
||||||
|
%splice.v4i1 = call <4 x i1> @llvm.experimental.vector.splice.v4i1(<4 x i1> zeroinitializer, <4 x i1> zeroinitializer, i32 1)
|
||||||
|
%splice.v2i1 = call <2 x i1> @llvm.experimental.vector.splice.v2i1(<2 x i1> zeroinitializer, <2 x i1> zeroinitializer, i32 1)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <2 x i1> @llvm.experimental.vector.splice.v2i1(<2 x i1>, <2 x i1>, i32)
|
||||||
|
declare <4 x i1> @llvm.experimental.vector.splice.v4i1(<4 x i1>, <4 x i1>, i32)
|
||||||
|
declare <8 x i1> @llvm.experimental.vector.splice.v8i1(<8 x i1>, <8 x i1>, i32)
|
||||||
|
declare <16 x i1> @llvm.experimental.vector.splice.v16i1(<16 x i1>, <16 x i1>, i32)
|
||||||
|
declare <2 x i8> @llvm.experimental.vector.splice.v2i8(<2 x i8>, <2 x i8>, i32)
|
||||||
|
declare <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8>, <16 x i8>, i32)
|
||||||
|
declare <32 x i8> @llvm.experimental.vector.splice.v32i8(<32 x i8>, <32 x i8>, i32)
|
||||||
|
declare <2 x i16> @llvm.experimental.vector.splice.v2i16(<2 x i16>, <2 x i16>, i32)
|
||||||
|
declare <4 x i16> @llvm.experimental.vector.splice.v4i16(<4 x i16>, <4 x i16>, i32)
|
||||||
|
declare <8 x i16> @llvm.experimental.vector.splice.v8i16(<8 x i16>, <8 x i16>, i32)
|
||||||
|
declare <16 x i16> @llvm.experimental.vector.splice.v16i16(<16 x i16>, <16 x i16>, i32)
|
||||||
|
declare <4 x i32> @llvm.experimental.vector.splice.v4i32(<4 x i32>, <4 x i32>, i32)
|
||||||
|
declare <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32>, <8 x i32>, i32)
|
||||||
|
declare <2 x i64> @llvm.experimental.vector.splice.v2i64(<2 x i64>, <2 x i64>, i32)
|
||||||
|
declare <4 x i64> @llvm.experimental.vector.splice.v4i64(<4 x i64>, <4 x i64>, i32)
|
||||||
|
declare <2 x half> @llvm.experimental.vector.splice.v2f16(<2 x half>, <2 x half>, i32)
|
||||||
|
declare <4 x half> @llvm.experimental.vector.splice.v4f16(<4 x half>, <4 x half>, i32)
|
||||||
|
declare <8 x half> @llvm.experimental.vector.splice.v8f16(<8 x half>, <8 x half>, i32)
|
||||||
|
declare <16 x half> @llvm.experimental.vector.splice.v16f16(<16 x half>, <16 x half>, i32)
|
||||||
|
declare <2 x bfloat> @llvm.experimental.vector.splice.v2bf16(<2 x bfloat>, <2 x bfloat>, i32)
|
||||||
|
declare <4 x bfloat> @llvm.experimental.vector.splice.v4bf16(<4 x bfloat>, <4 x bfloat>, i32)
|
||||||
|
declare <8 x bfloat> @llvm.experimental.vector.splice.v8bf16(<8 x bfloat>, <8 x bfloat>, i32)
|
||||||
|
declare <16 x bfloat> @llvm.experimental.vector.splice.v16bf16(<16 x bfloat>, <16 x bfloat>, i32)
|
||||||
|
declare <2 x float> @llvm.experimental.vector.splice.v2f32(<2 x float>, <2 x float>, i32)
|
||||||
|
declare <4 x float> @llvm.experimental.vector.splice.v4f32(<4 x float>, <4 x float>, i32)
|
||||||
|
declare <8 x float> @llvm.experimental.vector.splice.v8f32(<8 x float>, <8 x float>, i32)
|
||||||
|
declare <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float>, <16 x float>, i32)
|
||||||
|
declare <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double>, <2 x double>, i32)
|
||||||
|
declare <4 x double> @llvm.experimental.vector.splice.v4f64(<4 x double>, <4 x double>, i32)
|
||||||
|
|
||||||
|
attributes #0 = { "target-features"="+bf16" }
|
|
@ -1,4 +1,5 @@
|
||||||
; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
|
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
|
||||||
|
; RUN: opt < %s -analyze -cost-model -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s
|
||||||
|
|
||||||
define void @vector_insert_extract(<vscale x 4 x i32> %v0, <vscale x 16 x i32> %v1, <16 x i32> %v2) {
|
define void @vector_insert_extract(<vscale x 4 x i32> %v0, <vscale x 16 x i32> %v1, <16 x i32> %v2) {
|
||||||
; CHECK-LABEL: 'vector_insert_extract'
|
; CHECK-LABEL: 'vector_insert_extract'
|
||||||
|
@ -231,4 +232,149 @@ declare <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float>)
|
||||||
declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>)
|
declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>)
|
||||||
declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>)
|
declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>)
|
||||||
|
|
||||||
|
define void @vector_splice() #0 {
|
||||||
|
; CHECK-LABEL: 'vector_splice'
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2f16 = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4f16 = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv8f16 = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16f16 = call <vscale x 16 x half> @llvm.experimental.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2f32 = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4f32 = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8f32 = call <vscale x 8 x float> @llvm.experimental.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2f64 = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4f64 = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.experimental.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.experimental.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.experimental.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i8_neg = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv32i8_neg = call <vscale x 32 x i8> @llvm.experimental.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2i16_neg = call <vscale x 2 x i16> @llvm.experimental.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4i16_neg = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8i16_neg = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16i16_neg = call <vscale x 16 x i16> @llvm.experimental.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4i32_neg = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv8i32_neg = call <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2i64_neg = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv4i64_neg = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2f16_neg = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4f16_neg = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8f16_neg = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16f16_neg = call <vscale x 16 x half> @llvm.experimental.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2f32_neg = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4f32_neg = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv8f32_neg = call <vscale x 8 x float> @llvm.experimental.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2f64_neg = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv4f64_neg = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2bf16_neg = call <vscale x 2 x bfloat> @llvm.experimental.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4bf16_neg = call <vscale x 4 x bfloat> @llvm.experimental.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8bf16_neg = call <vscale x 8 x bfloat> @llvm.experimental.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16bf16_neg = call <vscale x 16 x bfloat> @llvm.experimental.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
|
||||||
|
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
|
||||||
|
|
||||||
|
%splice_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv2f16 = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv4f16 = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv8f16 = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv16f16 = call <vscale x 16 x half> @llvm.experimental.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv2f32 = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv4f32 = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv8f32 = call <vscale x 8 x float> @llvm.experimental.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv2f64 = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv4f64 = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.experimental.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.experimental.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.experimental.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv8i1 = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv4i1 = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
|
||||||
|
%splice_nxv2i1 = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
|
||||||
|
;; negative Index
|
||||||
|
%splice_nxv16i8_neg = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv32i8_neg = call <vscale x 32 x i8> @llvm.experimental.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv2i16_neg = call <vscale x 2 x i16> @llvm.experimental.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv4i16_neg = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv8i16_neg = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv16i16_neg = call <vscale x 16 x i16> @llvm.experimental.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv4i32_neg = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv8i32_neg = call <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv2i64_neg= call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv4i64_neg = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv2f16_neg = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv4f16_neg = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv8f16_neg = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv16f16_neg = call <vscale x 16 x half> @llvm.experimental.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv2f32_neg = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv4f32_neg = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv8f32_neg = call <vscale x 8 x float> @llvm.experimental.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv2f64_neg = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv4f64_neg = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv2bf16_neg = call <vscale x 2 x bfloat> @llvm.experimental.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv4bf16_neg = call <vscale x 4 x bfloat> @llvm.experimental.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv8bf16_neg = call <vscale x 8 x bfloat> @llvm.experimental.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv16bf16_neg = call <vscale x 16 x bfloat> @llvm.experimental.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
|
||||||
|
%splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, i32)
|
||||||
|
declare <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, i32)
|
||||||
|
declare <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, i32)
|
||||||
|
declare <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32)
|
||||||
|
declare <vscale x 2 x i8> @llvm.experimental.vector.splice.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, i32)
|
||||||
|
declare <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
|
||||||
|
declare <vscale x 32 x i8> @llvm.experimental.vector.splice.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, i32)
|
||||||
|
declare <vscale x 2 x i16> @llvm.experimental.vector.splice.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, i32)
|
||||||
|
declare <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, i32)
|
||||||
|
declare <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
|
||||||
|
declare <vscale x 16 x i16> @llvm.experimental.vector.splice.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, i32)
|
||||||
|
declare <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
|
||||||
|
declare <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32)
|
||||||
|
declare <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
|
||||||
|
declare <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, i32)
|
||||||
|
declare <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, i32)
|
||||||
|
declare <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, i32)
|
||||||
|
declare <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
|
||||||
|
declare <vscale x 16 x half> @llvm.experimental.vector.splice.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, i32)
|
||||||
|
declare <vscale x 2 x bfloat> @llvm.experimental.vector.splice.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32)
|
||||||
|
declare <vscale x 4 x bfloat> @llvm.experimental.vector.splice.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32)
|
||||||
|
declare <vscale x 8 x bfloat> @llvm.experimental.vector.splice.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
|
||||||
|
declare <vscale x 16 x bfloat> @llvm.experimental.vector.splice.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, i32)
|
||||||
|
declare <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i32)
|
||||||
|
declare <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
|
||||||
|
declare <vscale x 8 x float> @llvm.experimental.vector.splice.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, i32)
|
||||||
|
declare <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, i32)
|
||||||
|
declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
|
||||||
|
declare <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, i32)
|
||||||
|
|
||||||
attributes #0 = { "target-features"="+sve,+bf16" }
|
attributes #0 = { "target-features"="+sve,+bf16" }
|
||||||
|
|
Loading…
Reference in New Issue