diff --git a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index e3ecf14d1f2f..03c5eba36926 100644 --- a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -20,7 +20,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Target/TargetLowering.h" #include - using namespace llvm; namespace { @@ -405,7 +404,9 @@ unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { - return 1; + std::pair LT = getTLI()->getTypeLegalizationCost(Val->getScalarType()); + + return LT.first; } unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll index a6ed798b95b8..662110f2720d 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -221,9 +221,9 @@ define i32 @casts() { %r96 = fptoui <2 x float> undef to <2 x i32> ; CHECK: cost of 1 {{.*}} fptosi %r97 = fptosi <2 x float> undef to <2 x i32> - ; CHECK: cost of 24 {{.*}} fptoui + ; CHECK: cost of 28 {{.*}} fptoui %r98 = fptoui <2 x float> undef to <2 x i64> - ; CHECK: cost of 24 {{.*}} fptosi + ; CHECK: cost of 28 {{.*}} fptosi %r99 = fptosi <2 x float> undef to <2 x i64> ; CHECK: cost of 8 {{.*}} fptoui @@ -242,9 +242,9 @@ define i32 @casts() { %r106 = fptoui <2 x double> undef to <2 x i32> ; CHECK: cost of 2 {{.*}} fptosi %r107 = fptosi <2 x double> undef to <2 x i32> - ; CHECK: cost of 24 {{.*}} fptoui + ; CHECK: cost of 28 {{.*}} fptoui %r108 = fptoui <2 x double> undef to <2 x i64> - ; CHECK: cost of 24 {{.*}} fptosi + ; CHECK: cost of 28 {{.*}} fptosi %r109 = fptosi <2 x double> undef to <2 x i64> ; CHECK: cost of 16 {{.*}} fptoui @@ -263,9 +263,9 @@ define i32 @casts() { %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK: cost of 1 {{.*}} fptosi %r117 = fptosi <4 x float> undef to <4 x i32> - ; CHECK: cost of 48 {{.*}} fptoui + ; CHECK: cost of 56 {{.*}} fptoui %r118 = fptoui <4 x float> undef to <4 x i64> - ; CHECK: cost of 48 {{.*}} fptosi + ; CHECK: cost of 56 {{.*}} fptosi %r119 = fptosi <4 x float> undef to <4 x i64> ; CHECK: cost of 16 {{.*}} fptoui @@ -284,9 +284,9 @@ define i32 @casts() { %r126 = fptoui <4 x double> undef to <4 x i32> ; CHECK: cost of 16 {{.*}} fptosi %r127 = fptosi <4 x double> undef to <4 x i32> - ; CHECK: cost of 48 {{.*}} fptoui + ; CHECK: cost of 56 {{.*}} fptoui %r128 = fptoui <4 x double> undef to <4 x i64> - ; CHECK: cost of 48 {{.*}} fptosi + ; CHECK: cost of 56 {{.*}} fptosi %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK: cost of 32 {{.*}} fptoui @@ -305,9 +305,9 @@ define i32 @casts() { %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK: cost of 2 {{.*}} fptosi %r137 = fptosi <8 x float> undef to <8 x i32> - ; CHECK: cost of 96 {{.*}} fptoui + ; CHECK: cost of 112 {{.*}} fptoui %r138 = fptoui <8 x float> undef to <8 x i64> - ; CHECK: cost of 96 {{.*}} fptosi + ; CHECK: cost of 112 {{.*}} fptosi %r139 = fptosi <8 x float> undef to <8 x i64> ; CHECK: cost of 32 {{.*}} fptoui @@ -326,9 +326,9 @@ define i32 @casts() { %r146 = fptoui <8 x double> undef to <8 x i32> ; CHECK: cost of 32 {{.*}} fptosi %r147 = fptosi <8 x double> undef to <8 x i32> - ; CHECK: cost of 96 {{.*}} fptoui + ; CHECK: cost of 112 {{.*}} fptoui %r148 = fptoui <8 x double> undef to <8 x i64> - ; CHECK: cost of 96 {{.*}} fptosi + ; CHECK: cost of 112 {{.*}} fptosi %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK: cost of 64 {{.*}} fptoui @@ -347,9 +347,9 @@ define i32 @casts() { %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK: cost of 4 {{.*}} fptosi %r157 = fptosi <16 x float> undef to <16 x i32> - ; CHECK: cost of 192 {{.*}} fptoui + ; CHECK: cost of 224 {{.*}} fptoui %r158 = fptoui <16 x float> undef to <16 x i64> - ; CHECK: cost of 192 {{.*}} fptosi + ; CHECK: cost of 224 {{.*}} fptosi %r159 = fptosi <16 x float> undef to <16 x i64> ; CHECK: cost of 64 {{.*}} fptoui @@ -368,9 +368,9 @@ define i32 @casts() { %r166 = fptoui <16 x double> undef to <16 x i32> ; CHECK: cost of 64 {{.*}} fptosi %r167 = fptosi <16 x double> undef to <16 x i32> - ; CHECK: cost of 192 {{.*}} fptoui + ; CHECK: cost of 224 {{.*}} fptoui %r168 = fptoui <16 x double> undef to <16 x i64> - ; CHECK: cost of 192 {{.*}} fptosi + ; CHECK: cost of 224 {{.*}} fptosi %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK: cost of 8 {{.*}} uitofp diff --git a/llvm/test/Analysis/CostModel/X86/scalarize.ll b/llvm/test/Analysis/CostModel/X86/scalarize.ll new file mode 100644 index 000000000000..fc25fcbc563f --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/scalarize.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=i386 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK32 +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK64 + +; Test vector scalarization costs. +; RUN: llc < %s -march=x86 -mcpu=i386 +; RUN: llc < %s -march=x86 -mcpu=yonah + +%i4 = type <4 x i32> +%i8 = type <2 x i64> + +;;; TEST HANDLING OF VARIOUS VECTOR SIZES + +declare %i4 @llvm.bswap.v4i32(%i4) +declare %i8 @llvm.bswap.v2i64(%i8) + +declare %i4 @llvm.ctpop.v4i32(%i4) +declare %i8 @llvm.ctpop.v2i64(%i8) + +; CHECK32-LABEL: test_scalarized_intrinsics +; CHECK64-LABEL: test_scalarized_intrinsics +define void @test_scalarized_intrinsics() { + %r1 = add %i8 undef, undef + +; CHECK32: cost of 12 {{.*}}bswap.v4i32 +; CHECK64: cost of 12 {{.*}}bswap.v4i32 + %r2 = call %i4 @llvm.bswap.v4i32(%i4 undef) +; CHECK32: cost of 10 {{.*}}bswap.v2i64 +; CHECK64: cost of 6 {{.*}}bswap.v2i64 + %r3 = call %i8 @llvm.bswap.v2i64(%i8 undef) + +; CHECK32: cost of 12 {{.*}}ctpop.v4i32 +; CHECK64: cost of 12 {{.*}}ctpop.v4i32 + %r4 = call %i4 @llvm.ctpop.v4i32(%i4 undef) +; CHECK32: cost of 10 {{.*}}ctpop.v2i64 +; CHECK64: cost of 6 {{.*}}ctpop.v2i64 + %r5 = call %i8 @llvm.ctpop.v2i64(%i8 undef) + +; CHECK32: ret +; CHECK64: ret + ret void +}