forked from OSchip/llvm-project
[CostModel] Add costs for llvm.experimental.vector.{extract,insert} intrinsics
Adds cost model support for the new llvm.experimental.vector.{extract,insert} intrinsics, using the existing getExtractSubvectorOverhead and getInsertSubvectorOverhead functions for shuffles. Previously this case would throw an assertion. Differential Revision: https://reviews.llvm.org/D93043
This commit is contained in:
parent
6551c9ac36
commit
e0b9c5df26
|
@ -114,12 +114,14 @@ private:
|
|||
|
||||
/// Estimate a cost of subvector extraction as a sequence of extract and
|
||||
/// insert operations.
|
||||
unsigned getExtractSubvectorOverhead(FixedVectorType *VTy, int Index,
|
||||
unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index,
|
||||
FixedVectorType *SubVTy) {
|
||||
assert(VTy && SubVTy &&
|
||||
"Can only extract subvectors from vectors");
|
||||
int NumSubElts = SubVTy->getNumElements();
|
||||
assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&
|
||||
assert((!isa<FixedVectorType>(VTy) ||
|
||||
(Index + NumSubElts) <=
|
||||
(int)cast<FixedVectorType>(VTy)->getNumElements()) &&
|
||||
"SK_ExtractSubvector index out of range");
|
||||
|
||||
unsigned Cost = 0;
|
||||
|
@ -137,12 +139,14 @@ private:
|
|||
|
||||
/// Estimate a cost of subvector insertion as a sequence of extract and
|
||||
/// insert operations.
|
||||
unsigned getInsertSubvectorOverhead(FixedVectorType *VTy, int Index,
|
||||
unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index,
|
||||
FixedVectorType *SubVTy) {
|
||||
assert(VTy && SubVTy &&
|
||||
"Can only insert subvectors into vectors");
|
||||
int NumSubElts = SubVTy->getNumElements();
|
||||
assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&
|
||||
assert((!isa<FixedVectorType>(VTy) ||
|
||||
(Index + NumSubElts) <=
|
||||
(int)cast<FixedVectorType>(VTy)->getNumElements()) &&
|
||||
"SK_InsertSubvector index out of range");
|
||||
|
||||
unsigned Cost = 0;
|
||||
|
@ -723,10 +727,10 @@ public:
|
|||
case TTI::SK_PermuteTwoSrc:
|
||||
return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp));
|
||||
case TTI::SK_ExtractSubvector:
|
||||
return getExtractSubvectorOverhead(cast<FixedVectorType>(Tp), Index,
|
||||
return getExtractSubvectorOverhead(Tp, Index,
|
||||
cast<FixedVectorType>(SubTp));
|
||||
case TTI::SK_InsertSubvector:
|
||||
return getInsertSubvectorOverhead(cast<FixedVectorType>(Tp), Index,
|
||||
return getInsertSubvectorOverhead(Tp, Index,
|
||||
cast<FixedVectorType>(SubTp));
|
||||
}
|
||||
llvm_unreachable("Unknown TTI::ShuffleKind");
|
||||
|
@ -1255,6 +1259,26 @@ public:
|
|||
return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
|
||||
VarMask, Alignment, CostKind, I);
|
||||
}
|
||||
case Intrinsic::experimental_vector_extract: {
|
||||
// FIXME: Handle case where a scalable vector is extracted from a scalable
|
||||
// vector
|
||||
if (isa<ScalableVectorType>(RetTy))
|
||||
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
|
||||
unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
|
||||
return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
|
||||
cast<VectorType>(Args[0]->getType()),
|
||||
Index, cast<VectorType>(RetTy));
|
||||
}
|
||||
case Intrinsic::experimental_vector_insert: {
|
||||
// FIXME: Handle case where a scalable vector is inserted into a scalable
|
||||
// vector
|
||||
if (isa<ScalableVectorType>(Args[1]->getType()))
|
||||
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
|
||||
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
|
||||
return thisT()->getShuffleCost(
|
||||
TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), Index,
|
||||
cast<VectorType>(Args[1]->getType()));
|
||||
}
|
||||
case Intrinsic::vector_reduce_add:
|
||||
case Intrinsic::vector_reduce_mul:
|
||||
case Intrinsic::vector_reduce_and:
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s
|
||||
|
||||
define <16 x i32> @extract_cost(<vscale x 4 x i32> %vec) {
|
||||
; CHECK-LABEL: 'extract_cost'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ret
|
||||
|
||||
%ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @insert_cost(<vscale x 4 x i32> %vec, <16 x i32> %subVec) {
|
||||
; CHECK-LABEL: 'insert_cost'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ret = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %vec, <16 x i32> %subVec, i64 0)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 4 x i32> %ret
|
||||
|
||||
%ret = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %vec, <16 x i32> %subVec, i64 0)
|
||||
ret <vscale x 4 x i32> %ret
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @extract_cost_scalable(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: 'extract_cost_scalable'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ret = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 4 x i32> %ret
|
||||
|
||||
%ret = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
|
||||
ret <vscale x 4 x i32> %ret
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_cost_scalable(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec) {
|
||||
; CHECK-LABEL: 'insert_cost_scalable'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ret = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec, i64 0)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 16 x i32> %ret
|
||||
|
||||
%ret = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec, i64 0)
|
||||
ret <vscale x 16 x i32> %ret
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32>, i64)
|
||||
declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32>, <16 x i32>, i64)
|
||||
declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32>, i64)
|
||||
declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64)
|
Loading…
Reference in New Issue