forked from OSchip/llvm-project
[ARM] Add larger than legal ICmp costs
A v8i32 compare will produce a v8i1 predicate, but during codegen the v8i32 will be split into two v4i32, potentially requiring two v4i1 predicates to be merged into a single v8i1. Because this merging of two v4i1's into a v8i1 is very expensive, we need to make the cost of the compare equally high. This patch adds the cost of that to ARMTTIImpl::getCmpSelInstrCost. Because we don't know whether the user of the predicate can be split, and the cost model is mostly pre-instruction, we may be pessimistic but that should only be for larger and legal types. This also adds min/max detection to the costmodel where it can be detected, to keep those in line with the cost of simple min/max instructions. Otherwise for the most part, costs that were already expensive have become more expensive. Differential Revision: https://reviews.llvm.org/D96692
This commit is contained in:
parent
059cfe3093
commit
1a6744e3dc
|
@ -859,6 +859,51 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
|||
return Cost;
|
||||
}
|
||||
|
||||
// If this is a vector min/max/abs, use the cost of that intrinsic directly
|
||||
// instead. Hopefully when min/max intrinsics are more prevalent this code
|
||||
// will not be needed.
|
||||
const Instruction *Sel = I;
|
||||
if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
|
||||
Sel->hasOneUse())
|
||||
Sel = cast<Instruction>(Sel->user_back());
|
||||
if (Sel && ValTy->isVectorTy()) {
|
||||
const Value *LHS, *RHS;
|
||||
SelectPatternFlavor SPF = matchSelectPattern(Sel, LHS, RHS).Flavor;
|
||||
unsigned IID = 0;
|
||||
switch (SPF) {
|
||||
case SPF_ABS:
|
||||
IID = Intrinsic::abs;
|
||||
break;
|
||||
case SPF_SMIN:
|
||||
IID = Intrinsic::smin;
|
||||
break;
|
||||
case SPF_SMAX:
|
||||
IID = Intrinsic::smax;
|
||||
break;
|
||||
case SPF_UMIN:
|
||||
IID = Intrinsic::umin;
|
||||
break;
|
||||
case SPF_UMAX:
|
||||
IID = Intrinsic::umax;
|
||||
break;
|
||||
case SPF_FMINNUM:
|
||||
IID = Intrinsic::minnum;
|
||||
break;
|
||||
case SPF_FMAXNUM:
|
||||
IID = Intrinsic::maxnum;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (IID) {
|
||||
// The ICmp is free, the select gets the cost of the min/max/etc
|
||||
if (Sel != I)
|
||||
return 0;
|
||||
IntrinsicCostAttributes CostAttrs(IID, ValTy, {ValTy, ValTy});
|
||||
return getIntrinsicInstrCost(CostAttrs, CostKind);
|
||||
}
|
||||
}
|
||||
|
||||
// On NEON a vector select gets lowered to vbsl.
|
||||
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) {
|
||||
// Lowering of some vector selects is currently far from perfect.
|
||||
|
@ -881,6 +926,41 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
|||
return LT.first;
|
||||
}
|
||||
|
||||
if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() &&
|
||||
(Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
|
||||
cast<FixedVectorType>(ValTy)->getNumElements() > 1) {
|
||||
FixedVectorType *VecValTy = cast<FixedVectorType>(ValTy);
|
||||
FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy);
|
||||
if (!VecCondTy)
|
||||
VecCondTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(VecValTy));
|
||||
|
||||
// If we don't have mve.fp any fp operations will need to be scalarized.
|
||||
if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
|
||||
// One scalaization insert, one scalarization extract and the cost of the
|
||||
// fcmps.
|
||||
return BaseT::getScalarizationOverhead(VecValTy, false, true) +
|
||||
BaseT::getScalarizationOverhead(VecCondTy, true, false) +
|
||||
VecValTy->getNumElements() *
|
||||
getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
|
||||
CondTy->getScalarType(), VecPred, CostKind,
|
||||
I);
|
||||
}
|
||||
|
||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
|
||||
int BaseCost = ST->getMVEVectorCostFactor(CostKind);
|
||||
// There are two types - the input that specifies the type of the compare
|
||||
// and the output vXi1 type. Because we don't know how the output will be
|
||||
// split, we may need an expensive shuffle to get two in sync. This has the
|
||||
// effect of making larger than legal compares (v8i32 for example)
|
||||
// expensive.
|
||||
if (LT.second.getVectorNumElements() > 2) {
|
||||
if (LT.first > 1)
|
||||
return LT.first * BaseCost +
|
||||
BaseT::getScalarizationOverhead(VecCondTy, true, false);
|
||||
return BaseCost;
|
||||
}
|
||||
}
|
||||
|
||||
// Default to cheap (throughput/size of 1 instruction) but adjust throughput
|
||||
// for "multiple beats" potentially needed by MVE instructions.
|
||||
int BaseCost = 1;
|
||||
|
|
|
@ -72,16 +72,16 @@ define i32 @sadd(i32 %arg) {
|
|||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 866 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 306 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1122 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1890 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1106 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4260 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1874 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7332 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1098 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4244 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16680 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7316 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28968 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; V8M-SIZE-LABEL: 'sadd'
|
||||
|
@ -129,16 +129,16 @@ define i32 @sadd(i32 %arg) {
|
|||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 787 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 779 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3092 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3084 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12310 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
%I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
|
||||
|
@ -230,16 +230,16 @@ define i32 @uadd(i32 %arg) {
|
|||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1040 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4112 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; V8M-SIZE-LABEL: 'uadd'
|
||||
|
@ -287,16 +287,16 @@ define i32 @uadd(i32 %arg) {
|
|||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1028 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4104 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
%I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
|
||||
|
@ -388,16 +388,16 @@ define i32 @ssub(i32 %arg) {
|
|||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 866 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 306 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1122 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1890 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1106 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4260 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1874 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7332 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1098 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4244 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16680 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7316 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28968 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; V8M-SIZE-LABEL: 'ssub'
|
||||
|
@ -445,16 +445,16 @@ define i32 @ssub(i32 %arg) {
|
|||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 787 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 779 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3092 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3084 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12310 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
%I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
|
||||
|
@ -546,16 +546,16 @@ define i32 @usub(i32 %arg) {
|
|||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1040 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4112 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; V8M-SIZE-LABEL: 'usub'
|
||||
|
@ -603,16 +603,16 @@ define i32 @usub(i32 %arg) {
|
|||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1028 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4104 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
%I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
|
||||
|
@ -704,16 +704,16 @@ define i32 @smul(i32 %arg) {
|
|||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1208 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 380 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1464 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1512 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 616 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1252 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4712 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; V8M-SIZE-LABEL: 'smul'
|
||||
|
@ -761,16 +761,16 @@ define i32 @smul(i32 %arg) {
|
|||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 325 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 557 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 332 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1174 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 275 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1164 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4374 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
%I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
|
||||
|
@ -862,16 +862,16 @@ define i32 @umul(i32 %arg) {
|
|||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1200 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1456 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1504 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1248 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4704 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; V8M-SIZE-LABEL: 'umul'
|
||||
|
@ -919,16 +919,16 @@ define i32 @umul(i32 %arg) {
|
|||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 553 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 812 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1170 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 271 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1162 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4370 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
|
||||
; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
%I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
|
||||
|
|
|
@ -225,7 +225,7 @@ define void @ctlz(i32 %a, <16 x i32> %va) {
|
|||
define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) {
|
||||
; THRU-LABEL: 'fshl'
|
||||
; THRU-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
|
||||
; THRU-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
|
||||
; THRU-NEXT: Cost Model: Found an estimated cost of 832 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
|
||||
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; LATE-LABEL: 'fshl'
|
||||
|
@ -235,12 +235,12 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x
|
|||
;
|
||||
; SIZE-LABEL: 'fshl'
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 546 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 805 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; SIZE_LATE-LABEL: 'fshl'
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 564 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 826 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
|
||||
|
@ -292,7 +292,7 @@ define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) {
|
|||
|
||||
define void @reduce_fmax(<16 x float> %va) {
|
||||
; THRU-LABEL: 'reduce_fmax'
|
||||
; THRU-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; THRU-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; LATE-LABEL: 'reduce_fmax'
|
||||
|
@ -300,11 +300,11 @@ define void @reduce_fmax(<16 x float> %va) {
|
|||
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; SIZE-LABEL: 'reduce_fmax'
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 685 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; SIZE_LATE-LABEL: 'reduce_fmax'
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 628 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 694 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
|
||||
; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK
|
||||
; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK
|
||||
; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVE
|
||||
; RUN: opt -cost-model -analyze -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP
|
||||
|
||||
define void @icmp() {
|
||||
; CHECK-LABEL: 'icmp'
|
||||
|
@ -8,15 +8,15 @@ define void @icmp() {
|
|||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = icmp slt <4 x i8> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = icmp slt <8 x i8> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = icmp slt <16 x i8> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8 = icmp slt <32 x i8> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1028 for instruction: %v32i8 = icmp slt <32 x i8> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i16 = icmp slt <2 x i16> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = icmp slt <4 x i16> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = icmp slt <8 x i16> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = icmp slt <16 x i16> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16i16 = icmp slt <16 x i16> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i32 = icmp slt <2 x i32> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = icmp slt <4 x i32> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = icmp slt <8 x i32> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = icmp slt <16 x i32> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v8i32 = icmp slt <8 x i32> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16i32 = icmp slt <16 x i32> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i64 = icmp slt <2 x i64> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4i64 = icmp slt <4 x i64> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v8i64 = icmp slt <8 x i64> undef, undef
|
||||
|
@ -46,19 +46,33 @@ define void @icmp() {
|
|||
}
|
||||
|
||||
define void @fcmp() {
|
||||
; CHECK-LABEL: 'fcmp'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
; CHECK-MVE-LABEL: 'fcmp'
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef
|
||||
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; CHECK-MVEFP-LABEL: 'fcmp'
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef
|
||||
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
%v2f16 = fcmp olt <2 x half> undef, undef
|
||||
%v4f16 = fcmp olt <4 x half> undef, undef
|
||||
|
|
|
@ -57,8 +57,8 @@ define i32 @reduce_i32(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
|
||||
|
@ -93,8 +93,8 @@ define i32 @reduce_i16(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
|
||||
|
@ -133,8 +133,8 @@ define i32 @reduce_i8(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
|
||||
|
|
|
@ -57,8 +57,8 @@ define i32 @reduce_i32(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
|
||||
|
@ -93,8 +93,8 @@ define i32 @reduce_i16(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
|
||||
|
@ -133,8 +133,8 @@ define i32 @reduce_i8(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
|
||||
|
|
|
@ -57,8 +57,8 @@ define i32 @reduce_i32(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
|
||||
|
@ -93,8 +93,8 @@ define i32 @reduce_i16(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
|
||||
|
@ -133,8 +133,8 @@ define i32 @reduce_i8(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
|
||||
|
|
|
@ -57,8 +57,8 @@ define i32 @reduce_i32(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
|
||||
|
@ -93,8 +93,8 @@ define i32 @reduce_i16(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
|
||||
|
@ -133,8 +133,8 @@ define i32 @reduce_i8(i32 %arg) {
|
|||
; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
|
||||
; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
|
||||
|
|
|
@ -21,8 +21,8 @@ define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
|
|||
; CHECK: vmin.s16
|
||||
; CHECK: vmin.s16
|
||||
; COST: func_blend10
|
||||
; COST: cost of 2 {{.*}} icmp
|
||||
; COST: cost of 2 {{.*}} select
|
||||
; COST: cost of 0 {{.*}} icmp
|
||||
; COST: cost of 4 {{.*}} select
|
||||
%r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
|
||||
store %T0_10 %r, %T0_10* %storeaddr
|
||||
ret void
|
||||
|
@ -38,8 +38,8 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
|
|||
; CHECK: vmin.s32
|
||||
; CHECK: vmin.s32
|
||||
; COST: func_blend14
|
||||
; COST: cost of 2 {{.*}} icmp
|
||||
; COST: cost of 2 {{.*}} select
|
||||
; COST: cost of 0 {{.*}} icmp
|
||||
; COST: cost of 4 {{.*}} select
|
||||
%r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
|
||||
store %T0_14 %r, %T0_14* %storeaddr
|
||||
ret void
|
||||
|
@ -55,8 +55,8 @@ define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
|
|||
%v1 = load %T0_15, %T0_15* %loadaddr2
|
||||
%c = icmp slt %T0_15 %v0, %v1
|
||||
; COST: func_blend15
|
||||
; COST: cost of 4 {{.*}} icmp
|
||||
; COST: cost of 4 {{.*}} select
|
||||
; COST: cost of 0 {{.*}} icmp
|
||||
; COST: cost of 8 {{.*}} select
|
||||
%r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
|
||||
store %T0_15 %r, %T0_15* %storeaddr
|
||||
ret void
|
||||
|
@ -130,8 +130,8 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
|
|||
%v1 = load %T0_18, %T0_18* %loadaddr2
|
||||
%c = icmp slt %T0_18 %v0, %v1
|
||||
; COST: func_blend18
|
||||
; COST: cost of 2 {{.*}} icmp
|
||||
; COST: cost of 19 {{.*}} select
|
||||
; COST: cost of 0 {{.*}} icmp
|
||||
; COST: cost of 21 {{.*}} select
|
||||
%r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
|
||||
store %T0_18 %r, %T0_18* %storeaddr
|
||||
ret void
|
||||
|
@ -260,8 +260,8 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
|
|||
%v1 = load %T0_19, %T0_19* %loadaddr2
|
||||
%c = icmp slt %T0_19 %v0, %v1
|
||||
; COST: func_blend19
|
||||
; COST: cost of 4 {{.*}} icmp
|
||||
; COST: cost of 50 {{.*}} select
|
||||
; COST: cost of 0 {{.*}} icmp
|
||||
; COST: cost of 54 {{.*}} select
|
||||
%r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
|
||||
store %T0_19 %r, %T0_19* %storeaddr
|
||||
ret void
|
||||
|
@ -516,8 +516,8 @@ define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
|
|||
%v1 = load %T0_20, %T0_20* %loadaddr2
|
||||
%c = icmp slt %T0_20 %v0, %v1
|
||||
; COST: func_blend20
|
||||
; COST: cost of 8 {{.*}} icmp
|
||||
; COST: cost of 100 {{.*}} select
|
||||
; COST: cost of 0 {{.*}} icmp
|
||||
; COST: cost of 108 {{.*}} select
|
||||
%r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
|
||||
store %T0_20 %r, %T0_20* %storeaddr
|
||||
ret void
|
||||
|
|
|
@ -50,7 +50,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
|
|||
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %1 = load i16, i16* %arrayidx, align 2
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv = sext i16 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1
|
||||
; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %cmp2, label %if.then, label %for.inc
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv6 = add i16 %1, %0
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016
|
||||
|
@ -59,8 +59,8 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
|
|||
; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %inc = add nuw nsw i32 %i.016, 1
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %exitcond.not = icmp eq i32 %inc, %n
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
|
||||
; CHECK: LV: Vector loop of width 8 costs: 1.
|
||||
; CHECK: LV: Selecting VF: 8.
|
||||
; CHECK: LV: Vector loop of width 8 costs: 9.
|
||||
; CHECK: LV: Selecting VF: 4.
|
||||
define void @expensive_icmp(i16* noalias nocapture %d, i16* nocapture readonly %s, i32 %n, i16 zeroext %m) #0 {
|
||||
entry:
|
||||
%cmp15 = icmp sgt i32 %n, 0
|
||||
|
@ -127,8 +127,8 @@ for.inc: ; preds = %for.body, %if.then
|
|||
; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction: %conv3 = sext i8 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: %mul = mul nsw i32 %conv3, %conv1
|
||||
; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: %shr = ashr i32 %mul, 7
|
||||
; CHECK: LV: Found an estimated cost of 12 for VF 2 For instruction: %2 = icmp slt i32 %shr, 127
|
||||
; CHECK: LV: Found an estimated cost of 12 for VF 2 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %2 = icmp slt i32 %shr, 127
|
||||
; CHECK: LV: Found an estimated cost of 24 for VF 2 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv4 = trunc i32 %spec.select.i to i8
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
|
||||
; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %conv4, i8* %pDst.addr.010, align 1
|
||||
|
@ -148,7 +148,7 @@ for.inc: ; preds = %for.body, %if.then
|
|||
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv3 = sext i8 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %mul = mul nsw i32 %conv3, %conv1
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %shr = ashr i32 %mul, 7
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %2 = icmp slt i32 %shr, 127
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %2 = icmp slt i32 %shr, 127
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv4 = trunc i32 %spec.select.i to i8
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
|
||||
|
@ -156,7 +156,7 @@ for.inc: ; preds = %for.body, %if.then
|
|||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %dec = add i32 %blkCnt.012, -1
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %cmp.not = icmp eq i32 %dec, 0
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body
|
||||
; CHECK: LV: Vector loop of width 4 costs: 4.
|
||||
; CHECK: LV: Vector loop of width 4 costs: 3.
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
|
||||
|
@ -169,7 +169,7 @@ for.inc: ; preds = %for.body, %if.then
|
|||
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv3 = sext i8 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %mul = mul nsw i32 %conv3, %conv1
|
||||
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %shr = ashr i32 %mul, 7
|
||||
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %2 = icmp slt i32 %shr, 127
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %2 = icmp slt i32 %shr, 127
|
||||
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv4 = trunc i32 %spec.select.i to i8
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
|
||||
|
@ -190,7 +190,7 @@ for.inc: ; preds = %for.body, %if.then
|
|||
; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %conv3 = sext i8 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %mul = mul nsw i32 %conv3, %conv1
|
||||
; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %shr = ashr i32 %mul, 7
|
||||
; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %2 = icmp slt i32 %shr, 127
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %2 = icmp slt i32 %shr, 127
|
||||
; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
|
||||
; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %conv4 = trunc i32 %spec.select.i to i8
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
|
||||
|
|
Loading…
Reference in New Issue