From ef1ca4d3e965e76d190cdf3c6d6d4f8db67e7a2b Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 12 Jan 2022 09:51:34 +0000 Subject: [PATCH] [AArch64] Fix incorrect use of MVT::getVectorNumElements in AArch64TTIImpl::getVectorInstrCost If we are inserting into or extracting from a scalable vector we do not know the number of elements at runtime, so we can only let the index wrap for fixed-length vectors. Tests added here: Analysis/CostModel/AArch64/sve-insert-extract.ll Differential Revision: https://reviews.llvm.org/D117099 --- .../AArch64/AArch64TargetTransformInfo.cpp | 9 ++- .../CostModel/AArch64/sve-insert-extract.ll | 57 +++++++++++++++++++ .../CostModel/AArch64/sve-intrinsics.ll | 4 +- 3 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 8fd875106b13..0e8c5b820b91 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1594,9 +1594,12 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, if (!LT.second.isVector()) return 0; - // The type may be split. Normalize the index to the new type. - unsigned Width = LT.second.getVectorNumElements(); - Index = Index % Width; + // The type may be split. For fixed-width vectors we can normalize the + // index to the new type. + if (LT.second.isFixedLengthVector()) { + unsigned Width = LT.second.getVectorNumElements(); + Index = Index % Width; + } // The element at index zero is already inside the vector. if (Index == 0) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll b/llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll new file mode 100644 index 000000000000..a69cb543a3e3 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -cost-model -analyze -S < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + + +define void @ins_el0() #0 { +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 0 + %v0 = insertelement zeroinitializer, i8 0, i64 0 + %v1 = insertelement zeroinitializer, i16 0, i64 0 + %v2 = insertelement zeroinitializer, i32 0, i64 0 + %v3 = insertelement zeroinitializer, i64 0, i64 0 + ret void +} + +define void @ins_el1() #0 { +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 1 + %v0 = insertelement zeroinitializer, i8 0, i64 1 + %v1 = insertelement zeroinitializer, i16 0, i64 1 + %v2 = insertelement zeroinitializer, i32 0, i64 1 + %v3 = insertelement zeroinitializer, i64 0, i64 1 + ret void +} + + +define void @ext_el0() #0 { +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v0 = extractelement zeroinitializer, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v1 = extractelement zeroinitializer, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v2 = extractelement zeroinitializer, i64 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %v3 = extractelement zeroinitializer, i64 0 + %v0 = extractelement zeroinitializer, i64 0 + %v1 = extractelement zeroinitializer, i64 0 + %v2 = extractelement zeroinitializer, i64 0 + %v3 = extractelement zeroinitializer, i64 0 + ret void +} + +define void @ext_el1() #0 { +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v0 = extractelement zeroinitializer, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v1 = extractelement zeroinitializer, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v2 = extractelement zeroinitializer, i64 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %v3 = extractelement zeroinitializer, i64 1 + %v0 = extractelement zeroinitializer, i64 1 + %v1 = extractelement zeroinitializer, i64 1 + %v2 = extractelement zeroinitializer, i64 1 + %v3 = extractelement zeroinitializer, i64 1 + ret void +} + + +attributes #0 = { "target-features"="+sve" vscale_range(1, 16) } diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 7c4a46c1343b..01c7d87097e1 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -3,8 +3,8 @@ define void @vector_insert_extract( %v0, %v1, <16 x i32> %v2) { ; CHECK-LABEL: 'vector_insert_extract' -; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %v0, i64 0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %insert_fixed_into_scalable = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %v0, <16 x i32> %v2, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %v0, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %insert_fixed_into_scalable = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %v0, <16 x i32> %v2, i64 0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_scalable_from_scalable = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( %v1, i64 0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_scalable_into_scalable = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( %v1, %v0, i64 0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void