forked from OSchip/llvm-project
[CostModel][X86] Adjust pre-SSE41 fp scalar select costs to account for vector ops
Based off the script from D103695, we now mainly use BLENDV or OR(AND,ANDN) to select scalar float/double ops
This commit is contained in:
parent
7cc8377f2c
commit
d21bf51494
|
@ -2753,7 +2753,9 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
|||
|
||||
static const CostTblEntry SSE41CostTbl[] = {
|
||||
{ ISD::SELECT, MVT::v2f64, 1 }, // blendvpd
|
||||
{ ISD::SELECT, MVT::f64, 1 }, // blendvpd
|
||||
{ ISD::SELECT, MVT::v4f32, 1 }, // blendvps
|
||||
{ ISD::SELECT, MVT::f32 , 1 }, // blendvps
|
||||
{ ISD::SELECT, MVT::v2i64, 1 }, // pblendvb
|
||||
{ ISD::SELECT, MVT::v4i32, 1 }, // pblendvb
|
||||
{ ISD::SELECT, MVT::v8i16, 1 }, // pblendvb
|
||||
|
@ -2769,6 +2771,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
|||
{ ISD::SETCC, MVT::v16i8, 1 },
|
||||
|
||||
{ ISD::SELECT, MVT::v2f64, 2 }, // andpd + andnpd + orpd
|
||||
{ ISD::SELECT, MVT::f64, 2 }, // andpd + andnpd + orpd
|
||||
{ ISD::SELECT, MVT::v2i64, 2 }, // pand + pandn + por
|
||||
{ ISD::SELECT, MVT::v4i32, 2 }, // pand + pandn + por
|
||||
{ ISD::SELECT, MVT::v8i16, 2 }, // pand + pandn + por
|
||||
|
@ -2780,6 +2783,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
|||
{ ISD::SETCC, MVT::f32, 1 },
|
||||
|
||||
{ ISD::SELECT, MVT::v4f32, 2 }, // andps + andnps + orps
|
||||
{ ISD::SELECT, MVT::f32, 2 }, // andps + andnps + orps
|
||||
};
|
||||
|
||||
if (ST->useSLMArithCosts())
|
||||
|
|
|
@ -148,11 +148,11 @@ define i32 @test_select() {
|
|||
|
||||
define i32 @test_select_fp() {
|
||||
; SSE2-LABEL: 'test_select_fp'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = select i1 undef, double undef, double undef
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = select i1 undef, double undef, double undef
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = select <4 x i1> undef, <4 x double> undef, <4 x double> undef
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = select <8 x i1> undef, <8 x double> undef, <8 x double> undef
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = select i1 undef, float undef, float undef
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = select i1 undef, float undef, float undef
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = select <8 x i1> undef, <8 x float> undef, <8 x float> undef
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = select <16 x i1> undef, <16 x float> undef, <16 x float> undef
|
||||
|
|
|
@ -1,87 +1,47 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -basic-aa -slp-vectorizer -S | FileCheck %s -check-prefix=SSE
|
||||
; RUN: opt < %s -basic-aa -slp-vectorizer -S -mattr=+avx | FileCheck %s -check-prefix=AVX
|
||||
; RUN: opt < %s -basic-aa -slp-vectorizer -S | FileCheck %s
|
||||
; RUN: opt < %s -basic-aa -slp-vectorizer -S -mattr=+avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.10.0"
|
||||
|
||||
define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) {
|
||||
; SSE-LABEL: @testfunc(
|
||||
; SSE-NEXT: entry:
|
||||
; SSE-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; SSE: for.body:
|
||||
; SSE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; SSE-NEXT: [[ACC1_056:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD13:%.*]], [[FOR_BODY]] ]
|
||||
; SSE-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ]
|
||||
; SSE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[INDVARS_IV]]
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
|
||||
; SSE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; SSE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
|
||||
; SSE-NEXT: store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4
|
||||
; SSE-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer
|
||||
; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
|
||||
; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0
|
||||
; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
|
||||
; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP5]], i32 1
|
||||
; SSE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
|
||||
; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[TMP1]], i32 1
|
||||
; SSE-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP6]], [[TMP8]]
|
||||
; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[TMP2]], [[TMP9]]
|
||||
; SSE-NEXT: [[TMP11:%.*]] = fcmp olt <2 x float> [[TMP10]], <float 1.000000e+00, float 1.000000e+00>
|
||||
; SSE-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP10]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
|
||||
; SSE-NEXT: [[TMP13:%.*]] = fcmp olt <2 x float> [[TMP12]], <float -1.000000e+00, float -1.000000e+00>
|
||||
; SSE-NEXT: [[TMP14:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer
|
||||
; SSE-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP13]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[TMP14]]
|
||||
; SSE-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP15]], i32 0
|
||||
; SSE-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP15]], i32 1
|
||||
; SSE-NEXT: [[ADD13]] = fadd float [[TMP16]], [[TMP17]]
|
||||
; SSE-NEXT: [[TMP18:%.*]] = insertelement <2 x float> poison, float [[TMP17]], i32 0
|
||||
; SSE-NEXT: [[TMP19:%.*]] = insertelement <2 x float> [[TMP18]], float [[ADD13]], i32 1
|
||||
; SSE-NEXT: [[TMP20:%.*]] = fcmp olt <2 x float> [[TMP19]], <float 1.000000e+00, float 1.000000e+00>
|
||||
; SSE-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP20]], <2 x float> [[TMP19]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
|
||||
; SSE-NEXT: [[TMP22:%.*]] = fcmp olt <2 x float> [[TMP21]], <float -1.000000e+00, float -1.000000e+00>
|
||||
; SSE-NEXT: [[TMP23]] = select <2 x i1> [[TMP22]], <2 x float> <float -1.000000e+00, float -1.000000e+00>, <2 x float> [[TMP21]]
|
||||
; SSE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32
|
||||
; SSE-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
||||
; SSE: for.end:
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @testfunc(
|
||||
; AVX-NEXT: entry:
|
||||
; AVX-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; AVX: for.body:
|
||||
; AVX-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; AVX-NEXT: [[ACC1_056:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD13:%.*]], [[FOR_BODY]] ]
|
||||
; AVX-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ]
|
||||
; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[INDVARS_IV]]
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
|
||||
; AVX-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; AVX-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
|
||||
; AVX-NEXT: store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4
|
||||
; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
|
||||
; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
|
||||
; AVX-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP0]], [[TMP3]]
|
||||
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
|
||||
; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer
|
||||
; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE]]
|
||||
; AVX-NEXT: [[TMP7:%.*]] = fcmp olt <2 x float> [[TMP6]], <float 1.000000e+00, float 1.000000e+00>
|
||||
; AVX-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP6]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
|
||||
; AVX-NEXT: [[TMP9:%.*]] = fcmp olt <2 x float> [[TMP8]], <float -1.000000e+00, float -1.000000e+00>
|
||||
; AVX-NEXT: [[TMP10:%.*]] = fmul <2 x float> [[TMP8]], zeroinitializer
|
||||
; AVX-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP9]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[TMP10]]
|
||||
; AVX-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
|
||||
; AVX-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
|
||||
; AVX-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]]
|
||||
; AVX-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0
|
||||
; AVX-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1
|
||||
; AVX-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], <float 1.000000e+00, float 1.000000e+00>
|
||||
; AVX-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
|
||||
; AVX-NEXT: [[TMP18:%.*]] = fcmp olt <2 x float> [[TMP17]], <float -1.000000e+00, float -1.000000e+00>
|
||||
; AVX-NEXT: [[TMP19]] = select <2 x i1> [[TMP18]], <2 x float> <float -1.000000e+00, float -1.000000e+00>, <2 x float> [[TMP17]]
|
||||
; AVX-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32
|
||||
; AVX-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
||||
; AVX: for.end:
|
||||
; AVX-NEXT: ret void
|
||||
; CHECK-LABEL: @testfunc(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[ACC1_056:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD13:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP0]], [[TMP3]]
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <2 x float> [[TMP6]], <float 1.000000e+00, float 1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP6]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fcmp olt <2 x float> [[TMP8]], <float -1.000000e+00, float -1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x float> [[TMP8]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP9]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
|
||||
; CHECK-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], <float 1.000000e+00, float 1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = fcmp olt <2 x float> [[TMP17]], <float -1.000000e+00, float -1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP19]] = select <2 x i1> [[TMP18]], <2 x float> <float -1.000000e+00, float -1.000000e+00>, <2 x float> [[TMP17]]
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
|
Loading…
Reference in New Issue