forked from OSchip/llvm-project
When analyzing vectors of element type that require legalization,
the legalization cost must be included to get an accurate estimation of the total cost of the scalarized vector. The inaccurate cost triggered unprofitable SLP vectorization on 32-bit X86. Summary: Include legalization overhead when computing scalarization cost Reviewers: hfinkel, nadav CC: chandlerc, rnk, llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D2992 llvm-svn: 203509
This commit is contained in:
parent
92aa8c220a
commit
ce376c0fcb
|
@ -20,7 +20,6 @@
|
||||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||||
#include "llvm/Target/TargetLowering.h"
|
#include "llvm/Target/TargetLowering.h"
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -405,7 +404,9 @@ unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||||
|
|
||||||
unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
|
unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||||
unsigned Index) const {
|
unsigned Index) const {
|
||||||
return 1;
|
std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Val->getScalarType());
|
||||||
|
|
||||||
|
return LT.first;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
|
unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||||
|
|
|
@ -221,9 +221,9 @@ define i32 @casts() {
|
||||||
%r96 = fptoui <2 x float> undef to <2 x i32>
|
%r96 = fptoui <2 x float> undef to <2 x i32>
|
||||||
; CHECK: cost of 1 {{.*}} fptosi
|
; CHECK: cost of 1 {{.*}} fptosi
|
||||||
%r97 = fptosi <2 x float> undef to <2 x i32>
|
%r97 = fptosi <2 x float> undef to <2 x i32>
|
||||||
; CHECK: cost of 24 {{.*}} fptoui
|
; CHECK: cost of 28 {{.*}} fptoui
|
||||||
%r98 = fptoui <2 x float> undef to <2 x i64>
|
%r98 = fptoui <2 x float> undef to <2 x i64>
|
||||||
; CHECK: cost of 24 {{.*}} fptosi
|
; CHECK: cost of 28 {{.*}} fptosi
|
||||||
%r99 = fptosi <2 x float> undef to <2 x i64>
|
%r99 = fptosi <2 x float> undef to <2 x i64>
|
||||||
|
|
||||||
; CHECK: cost of 8 {{.*}} fptoui
|
; CHECK: cost of 8 {{.*}} fptoui
|
||||||
|
@ -242,9 +242,9 @@ define i32 @casts() {
|
||||||
%r106 = fptoui <2 x double> undef to <2 x i32>
|
%r106 = fptoui <2 x double> undef to <2 x i32>
|
||||||
; CHECK: cost of 2 {{.*}} fptosi
|
; CHECK: cost of 2 {{.*}} fptosi
|
||||||
%r107 = fptosi <2 x double> undef to <2 x i32>
|
%r107 = fptosi <2 x double> undef to <2 x i32>
|
||||||
; CHECK: cost of 24 {{.*}} fptoui
|
; CHECK: cost of 28 {{.*}} fptoui
|
||||||
%r108 = fptoui <2 x double> undef to <2 x i64>
|
%r108 = fptoui <2 x double> undef to <2 x i64>
|
||||||
; CHECK: cost of 24 {{.*}} fptosi
|
; CHECK: cost of 28 {{.*}} fptosi
|
||||||
%r109 = fptosi <2 x double> undef to <2 x i64>
|
%r109 = fptosi <2 x double> undef to <2 x i64>
|
||||||
|
|
||||||
; CHECK: cost of 16 {{.*}} fptoui
|
; CHECK: cost of 16 {{.*}} fptoui
|
||||||
|
@ -263,9 +263,9 @@ define i32 @casts() {
|
||||||
%r116 = fptoui <4 x float> undef to <4 x i32>
|
%r116 = fptoui <4 x float> undef to <4 x i32>
|
||||||
; CHECK: cost of 1 {{.*}} fptosi
|
; CHECK: cost of 1 {{.*}} fptosi
|
||||||
%r117 = fptosi <4 x float> undef to <4 x i32>
|
%r117 = fptosi <4 x float> undef to <4 x i32>
|
||||||
; CHECK: cost of 48 {{.*}} fptoui
|
; CHECK: cost of 56 {{.*}} fptoui
|
||||||
%r118 = fptoui <4 x float> undef to <4 x i64>
|
%r118 = fptoui <4 x float> undef to <4 x i64>
|
||||||
; CHECK: cost of 48 {{.*}} fptosi
|
; CHECK: cost of 56 {{.*}} fptosi
|
||||||
%r119 = fptosi <4 x float> undef to <4 x i64>
|
%r119 = fptosi <4 x float> undef to <4 x i64>
|
||||||
|
|
||||||
; CHECK: cost of 16 {{.*}} fptoui
|
; CHECK: cost of 16 {{.*}} fptoui
|
||||||
|
@ -284,9 +284,9 @@ define i32 @casts() {
|
||||||
%r126 = fptoui <4 x double> undef to <4 x i32>
|
%r126 = fptoui <4 x double> undef to <4 x i32>
|
||||||
; CHECK: cost of 16 {{.*}} fptosi
|
; CHECK: cost of 16 {{.*}} fptosi
|
||||||
%r127 = fptosi <4 x double> undef to <4 x i32>
|
%r127 = fptosi <4 x double> undef to <4 x i32>
|
||||||
; CHECK: cost of 48 {{.*}} fptoui
|
; CHECK: cost of 56 {{.*}} fptoui
|
||||||
%r128 = fptoui <4 x double> undef to <4 x i64>
|
%r128 = fptoui <4 x double> undef to <4 x i64>
|
||||||
; CHECK: cost of 48 {{.*}} fptosi
|
; CHECK: cost of 56 {{.*}} fptosi
|
||||||
%r129 = fptosi <4 x double> undef to <4 x i64>
|
%r129 = fptosi <4 x double> undef to <4 x i64>
|
||||||
|
|
||||||
; CHECK: cost of 32 {{.*}} fptoui
|
; CHECK: cost of 32 {{.*}} fptoui
|
||||||
|
@ -305,9 +305,9 @@ define i32 @casts() {
|
||||||
%r136 = fptoui <8 x float> undef to <8 x i32>
|
%r136 = fptoui <8 x float> undef to <8 x i32>
|
||||||
; CHECK: cost of 2 {{.*}} fptosi
|
; CHECK: cost of 2 {{.*}} fptosi
|
||||||
%r137 = fptosi <8 x float> undef to <8 x i32>
|
%r137 = fptosi <8 x float> undef to <8 x i32>
|
||||||
; CHECK: cost of 96 {{.*}} fptoui
|
; CHECK: cost of 112 {{.*}} fptoui
|
||||||
%r138 = fptoui <8 x float> undef to <8 x i64>
|
%r138 = fptoui <8 x float> undef to <8 x i64>
|
||||||
; CHECK: cost of 96 {{.*}} fptosi
|
; CHECK: cost of 112 {{.*}} fptosi
|
||||||
%r139 = fptosi <8 x float> undef to <8 x i64>
|
%r139 = fptosi <8 x float> undef to <8 x i64>
|
||||||
|
|
||||||
; CHECK: cost of 32 {{.*}} fptoui
|
; CHECK: cost of 32 {{.*}} fptoui
|
||||||
|
@ -326,9 +326,9 @@ define i32 @casts() {
|
||||||
%r146 = fptoui <8 x double> undef to <8 x i32>
|
%r146 = fptoui <8 x double> undef to <8 x i32>
|
||||||
; CHECK: cost of 32 {{.*}} fptosi
|
; CHECK: cost of 32 {{.*}} fptosi
|
||||||
%r147 = fptosi <8 x double> undef to <8 x i32>
|
%r147 = fptosi <8 x double> undef to <8 x i32>
|
||||||
; CHECK: cost of 96 {{.*}} fptoui
|
; CHECK: cost of 112 {{.*}} fptoui
|
||||||
%r148 = fptoui <8 x double> undef to <8 x i64>
|
%r148 = fptoui <8 x double> undef to <8 x i64>
|
||||||
; CHECK: cost of 96 {{.*}} fptosi
|
; CHECK: cost of 112 {{.*}} fptosi
|
||||||
%r149 = fptosi <8 x double> undef to <8 x i64>
|
%r149 = fptosi <8 x double> undef to <8 x i64>
|
||||||
|
|
||||||
; CHECK: cost of 64 {{.*}} fptoui
|
; CHECK: cost of 64 {{.*}} fptoui
|
||||||
|
@ -347,9 +347,9 @@ define i32 @casts() {
|
||||||
%r156 = fptoui <16 x float> undef to <16 x i32>
|
%r156 = fptoui <16 x float> undef to <16 x i32>
|
||||||
; CHECK: cost of 4 {{.*}} fptosi
|
; CHECK: cost of 4 {{.*}} fptosi
|
||||||
%r157 = fptosi <16 x float> undef to <16 x i32>
|
%r157 = fptosi <16 x float> undef to <16 x i32>
|
||||||
; CHECK: cost of 192 {{.*}} fptoui
|
; CHECK: cost of 224 {{.*}} fptoui
|
||||||
%r158 = fptoui <16 x float> undef to <16 x i64>
|
%r158 = fptoui <16 x float> undef to <16 x i64>
|
||||||
; CHECK: cost of 192 {{.*}} fptosi
|
; CHECK: cost of 224 {{.*}} fptosi
|
||||||
%r159 = fptosi <16 x float> undef to <16 x i64>
|
%r159 = fptosi <16 x float> undef to <16 x i64>
|
||||||
|
|
||||||
; CHECK: cost of 64 {{.*}} fptoui
|
; CHECK: cost of 64 {{.*}} fptoui
|
||||||
|
@ -368,9 +368,9 @@ define i32 @casts() {
|
||||||
%r166 = fptoui <16 x double> undef to <16 x i32>
|
%r166 = fptoui <16 x double> undef to <16 x i32>
|
||||||
; CHECK: cost of 64 {{.*}} fptosi
|
; CHECK: cost of 64 {{.*}} fptosi
|
||||||
%r167 = fptosi <16 x double> undef to <16 x i32>
|
%r167 = fptosi <16 x double> undef to <16 x i32>
|
||||||
; CHECK: cost of 192 {{.*}} fptoui
|
; CHECK: cost of 224 {{.*}} fptoui
|
||||||
%r168 = fptoui <16 x double> undef to <16 x i64>
|
%r168 = fptoui <16 x double> undef to <16 x i64>
|
||||||
; CHECK: cost of 192 {{.*}} fptosi
|
; CHECK: cost of 224 {{.*}} fptosi
|
||||||
%r169 = fptosi <16 x double> undef to <16 x i64>
|
%r169 = fptosi <16 x double> undef to <16 x i64>
|
||||||
|
|
||||||
; CHECK: cost of 8 {{.*}} uitofp
|
; CHECK: cost of 8 {{.*}} uitofp
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
; RUN: opt < %s -cost-model -analyze -mtriple=i386 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK32
|
||||||
|
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK64
|
||||||
|
|
||||||
|
; Test vector scalarization costs.
|
||||||
|
; RUN: llc < %s -march=x86 -mcpu=i386
|
||||||
|
; RUN: llc < %s -march=x86 -mcpu=yonah
|
||||||
|
|
||||||
|
%i4 = type <4 x i32>
|
||||||
|
%i8 = type <2 x i64>
|
||||||
|
|
||||||
|
;;; TEST HANDLING OF VARIOUS VECTOR SIZES
|
||||||
|
|
||||||
|
declare %i4 @llvm.bswap.v4i32(%i4)
|
||||||
|
declare %i8 @llvm.bswap.v2i64(%i8)
|
||||||
|
|
||||||
|
declare %i4 @llvm.ctpop.v4i32(%i4)
|
||||||
|
declare %i8 @llvm.ctpop.v2i64(%i8)
|
||||||
|
|
||||||
|
; CHECK32-LABEL: test_scalarized_intrinsics
|
||||||
|
; CHECK64-LABEL: test_scalarized_intrinsics
|
||||||
|
define void @test_scalarized_intrinsics() {
|
||||||
|
%r1 = add %i8 undef, undef
|
||||||
|
|
||||||
|
; CHECK32: cost of 12 {{.*}}bswap.v4i32
|
||||||
|
; CHECK64: cost of 12 {{.*}}bswap.v4i32
|
||||||
|
%r2 = call %i4 @llvm.bswap.v4i32(%i4 undef)
|
||||||
|
; CHECK32: cost of 10 {{.*}}bswap.v2i64
|
||||||
|
; CHECK64: cost of 6 {{.*}}bswap.v2i64
|
||||||
|
%r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
|
||||||
|
|
||||||
|
; CHECK32: cost of 12 {{.*}}ctpop.v4i32
|
||||||
|
; CHECK64: cost of 12 {{.*}}ctpop.v4i32
|
||||||
|
%r4 = call %i4 @llvm.ctpop.v4i32(%i4 undef)
|
||||||
|
; CHECK32: cost of 10 {{.*}}ctpop.v2i64
|
||||||
|
; CHECK64: cost of 6 {{.*}}ctpop.v2i64
|
||||||
|
%r5 = call %i8 @llvm.ctpop.v2i64(%i8 undef)
|
||||||
|
|
||||||
|
; CHECK32: ret
|
||||||
|
; CHECK64: ret
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue