From e0b9c5df260c9d778be3b6ee56e6d0ffd7af9be7 Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Wed, 9 Dec 2020 11:03:54 +0000 Subject: [PATCH] [CostModel] Add costs for llvm.experimental.vector.{extract,insert} intrinsics Adds cost model support for the new llvm.experimental.vector.{extract,insert} intrinsics, using the existing getExtractSubvectorOverhead and getInsertSubvectorOverhead functions for shuffles. Previously this case would throw an assertion. Differential Revision: https://reviews.llvm.org/D93043 --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 36 +++++++++++++--- ...etIntrinsicInstrCost-vec-insert-extract.ll | 42 +++++++++++++++++++ 2 files changed, 72 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 7dca7cd291c9..02f1b73226fc 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -114,12 +114,14 @@ private: /// Estimate a cost of subvector extraction as a sequence of extract and /// insert operations. - unsigned getExtractSubvectorOverhead(FixedVectorType *VTy, int Index, + unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index, FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only extract subvectors from vectors"); int NumSubElts = SubVTy->getNumElements(); - assert((Index + NumSubElts) <= (int)VTy->getNumElements() && + assert((!isa(VTy) || + (Index + NumSubElts) <= + (int)cast(VTy)->getNumElements()) && "SK_ExtractSubvector index out of range"); unsigned Cost = 0; @@ -137,12 +139,14 @@ private: /// Estimate a cost of subvector insertion as a sequence of extract and /// insert operations. - unsigned getInsertSubvectorOverhead(FixedVectorType *VTy, int Index, + unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index, FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only insert subvectors into vectors"); int NumSubElts = SubVTy->getNumElements(); - assert((Index + NumSubElts) <= (int)VTy->getNumElements() && + assert((!isa(VTy) || + (Index + NumSubElts) <= + (int)cast(VTy)->getNumElements()) && "SK_InsertSubvector index out of range"); unsigned Cost = 0; @@ -723,10 +727,10 @@ public: case TTI::SK_PermuteTwoSrc: return getPermuteShuffleOverhead(cast(Tp)); case TTI::SK_ExtractSubvector: - return getExtractSubvectorOverhead(cast(Tp), Index, + return getExtractSubvectorOverhead(Tp, Index, cast(SubTp)); case TTI::SK_InsertSubvector: - return getInsertSubvectorOverhead(cast(Tp), Index, + return getInsertSubvectorOverhead(Tp, Index, cast(SubTp)); } llvm_unreachable("Unknown TTI::ShuffleKind"); @@ -1255,6 +1259,26 @@ public: return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); } + case Intrinsic::experimental_vector_extract: { + // FIXME: Handle case where a scalable vector is extracted from a scalable + // vector + if (isa(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + unsigned Index = cast(Args[1])->getZExtValue(); + return thisT()->getShuffleCost(TTI::SK_ExtractSubvector, + cast(Args[0]->getType()), + Index, cast(RetTy)); + } + case Intrinsic::experimental_vector_insert: { + // FIXME: Handle case where a scalable vector is inserted into a scalable + // vector + if (isa(Args[1]->getType())) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + unsigned Index = cast(Args[2])->getZExtValue(); + return thisT()->getShuffleCost( + TTI::SK_InsertSubvector, cast(Args[0]->getType()), Index, + cast(Args[1]->getType())); + } case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_mul: case Intrinsic::vector_reduce_and: diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll new file mode 100644 index 000000000000..9523e17cb5de --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s + +define <16 x i32> @extract_cost( %vec) { +; CHECK-LABEL: 'extract_cost' +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %vec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ret + + %ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %vec, i64 0) + ret <16 x i32> %ret +} + +define @insert_cost( %vec, <16 x i32> %subVec) { +; CHECK-LABEL: 'insert_cost' +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ret = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %vec, <16 x i32> %subVec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %ret + + %ret = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %vec, <16 x i32> %subVec, i64 0) + ret %ret +} + +define @extract_cost_scalable( %vec) { +; CHECK-LABEL: 'extract_cost_scalable' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ret = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( %vec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %ret + + %ret = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( %vec, i64 0) + ret %ret +} + +define @insert_cost_scalable( %vec, %subVec) { +; CHECK-LABEL: 'insert_cost_scalable' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ret = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( %vec, %subVec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %ret + + %ret = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( %vec, %subVec, i64 0) + ret %ret +} + +declare <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(, i64) +declare @llvm.experimental.vector.insert.nxv4i32.v16i32(, <16 x i32>, i64) +declare @llvm.experimental.vector.extract.nxv4i32.nxv16i32(, i64) +declare @llvm.experimental.vector.insert.nxv16i32.nxv4i32(, , i64)