[X86][AVX1] Account for cost of extract/insert of 256-bit SDIV/UDIV by mul sequences

llvm-svn: 303017
This commit is contained in:
Simon Pilgrim 2017-05-14 18:52:15 +00:00
parent c1c78617e7
commit d3f0d03cc5
2 changed files with 31 additions and 31 deletions

View File

@ -247,29 +247,29 @@ int X86TTIImpl::getArithmeticInstrCost(
} }
static const CostTblEntry SSE2UniformConstCostTable[] = { static const CostTblEntry SSE2UniformConstCostTable[] = {
{ ISD::SHL, MVT::v16i8, 2 }, // psllw + pand. { ISD::SHL, MVT::v16i8, 2 }, // psllw + pand.
{ ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand. { ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand.
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SHL, MVT::v32i8, 4+2 }, // 2*(psllw + pand) + split. { ISD::SHL, MVT::v32i8, 4+2 }, // 2*(psllw + pand) + split.
{ ISD::SRL, MVT::v32i8, 4+2 }, // 2*(psrlw + pand) + split. { ISD::SRL, MVT::v32i8, 4+2 }, // 2*(psrlw + pand) + split.
{ ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split. { ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split.
{ ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence { ISD::SDIV, MVT::v16i16, 12+2 }, // 2*pmulhw sequence + split.
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
{ ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence { ISD::UDIV, MVT::v16i16, 12+2 }, // 2*pmulhuw sequence + split.
{ ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
{ ISD::SDIV, MVT::v8i32, 38 }, // pmuludq sequence { ISD::SDIV, MVT::v8i32, 38+2 }, // 2*pmuludq sequence + split.
{ ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence
{ ISD::UDIV, MVT::v8i32, 30 }, // pmuludq sequence { ISD::UDIV, MVT::v8i32, 30+2 }, // 2*pmuludq sequence + split.
{ ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
}; };
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
ST->hasSSE2()) { ST->hasSSE2()) {
// pmuldq sequence. // pmuldq sequence.
if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX()) if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX())
return LT.first * 30; return LT.first * 32;
if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
return LT.first * 15; return LT.first * 15;

View File

@ -139,14 +139,14 @@ define i32 @sdiv_uniformconst() {
; SSE2: cost of 38 {{.*}} %V8i32 = sdiv ; SSE2: cost of 38 {{.*}} %V8i32 = sdiv
; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv ; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv
; SSE42: cost of 30 {{.*}} %V8i32 = sdiv ; SSE42: cost of 30 {{.*}} %V8i32 = sdiv
; AVX1: cost of 30 {{.*}} %V8i32 = sdiv ; AVX1: cost of 32 {{.*}} %V8i32 = sdiv
; AVX2: cost of 15 {{.*}} %V8i32 = sdiv ; AVX2: cost of 15 {{.*}} %V8i32 = sdiv
; AVX512: cost of 15 {{.*}} %V8i32 = sdiv ; AVX512: cost of 15 {{.*}} %V8i32 = sdiv
%V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
; SSE2: cost of 76 {{.*}} %V16i32 = sdiv ; SSE2: cost of 76 {{.*}} %V16i32 = sdiv
; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv ; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv
; SSE42: cost of 60 {{.*}} %V16i32 = sdiv ; SSE42: cost of 60 {{.*}} %V16i32 = sdiv
; AVX1: cost of 60 {{.*}} %V16i32 = sdiv ; AVX1: cost of 64 {{.*}} %V16i32 = sdiv
; AVX2: cost of 30 {{.*}} %V16i32 = sdiv ; AVX2: cost of 30 {{.*}} %V16i32 = sdiv
; AVX512: cost of 15 {{.*}} %V16i32 = sdiv ; AVX512: cost of 15 {{.*}} %V16i32 = sdiv
%V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
@ -157,12 +157,12 @@ define i32 @sdiv_uniformconst() {
; AVX: cost of 6 {{.*}} %V8i16 = sdiv ; AVX: cost of 6 {{.*}} %V8i16 = sdiv
%V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
; SSE: cost of 12 {{.*}} %V16i16 = sdiv ; SSE: cost of 12 {{.*}} %V16i16 = sdiv
; AVX1: cost of 12 {{.*}} %V16i16 = sdiv ; AVX1: cost of 14 {{.*}} %V16i16 = sdiv
; AVX2: cost of 6 {{.*}} %V16i16 = sdiv ; AVX2: cost of 6 {{.*}} %V16i16 = sdiv
; AVX512: cost of 6 {{.*}} %V16i16 = sdiv ; AVX512: cost of 6 {{.*}} %V16i16 = sdiv
%V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
; SSE: cost of 24 {{.*}} %V32i16 = sdiv ; SSE: cost of 24 {{.*}} %V32i16 = sdiv
; AVX1: cost of 24 {{.*}} %V32i16 = sdiv ; AVX1: cost of 28 {{.*}} %V32i16 = sdiv
; AVX2: cost of 12 {{.*}} %V32i16 = sdiv ; AVX2: cost of 12 {{.*}} %V32i16 = sdiv
; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv ; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv
; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv ; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv
@ -203,12 +203,12 @@ define i32 @udiv_uniformconst() {
; AVX: cost of 15 {{.*}} %V4i32 = udiv ; AVX: cost of 15 {{.*}} %V4i32 = udiv
%V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7> %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
; SSE: cost of 30 {{.*}} %V8i32 = udiv ; SSE: cost of 30 {{.*}} %V8i32 = udiv
; AVX1: cost of 30 {{.*}} %V8i32 = udiv ; AVX1: cost of 32 {{.*}} %V8i32 = udiv
; AVX2: cost of 15 {{.*}} %V8i32 = udiv ; AVX2: cost of 15 {{.*}} %V8i32 = udiv
; AVX512: cost of 15 {{.*}} %V8i32 = udiv ; AVX512: cost of 15 {{.*}} %V8i32 = udiv
%V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
; SSE: cost of 60 {{.*}} %V16i32 = udiv ; SSE: cost of 60 {{.*}} %V16i32 = udiv
; AVX1: cost of 60 {{.*}} %V16i32 = udiv ; AVX1: cost of 64 {{.*}} %V16i32 = udiv
; AVX2: cost of 30 {{.*}} %V16i32 = udiv ; AVX2: cost of 30 {{.*}} %V16i32 = udiv
; AVX512: cost of 15 {{.*}} %V16i32 = udiv ; AVX512: cost of 15 {{.*}} %V16i32 = udiv
%V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
@ -219,12 +219,12 @@ define i32 @udiv_uniformconst() {
; AVX: cost of 6 {{.*}} %V8i16 = udiv ; AVX: cost of 6 {{.*}} %V8i16 = udiv
%V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
; SSE: cost of 12 {{.*}} %V16i16 = udiv ; SSE: cost of 12 {{.*}} %V16i16 = udiv
; AVX1: cost of 12 {{.*}} %V16i16 = udiv ; AVX1: cost of 14 {{.*}} %V16i16 = udiv
; AVX2: cost of 6 {{.*}} %V16i16 = udiv ; AVX2: cost of 6 {{.*}} %V16i16 = udiv
; AVX512: cost of 6 {{.*}} %V16i16 = udiv ; AVX512: cost of 6 {{.*}} %V16i16 = udiv
%V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
; SSE: cost of 24 {{.*}} %V32i16 = udiv ; SSE: cost of 24 {{.*}} %V32i16 = udiv
; AVX1: cost of 24 {{.*}} %V32i16 = udiv ; AVX1: cost of 28 {{.*}} %V32i16 = udiv
; AVX2: cost of 12 {{.*}} %V32i16 = udiv ; AVX2: cost of 12 {{.*}} %V32i16 = udiv
; AVX512F: cost of 12 {{.*}} %V32i16 = udiv ; AVX512F: cost of 12 {{.*}} %V32i16 = udiv
; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv ; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv
@ -269,14 +269,14 @@ define i32 @sdiv_uniformconstpow2() {
; SSE2: cost of 38 {{.*}} %V8i32 = sdiv ; SSE2: cost of 38 {{.*}} %V8i32 = sdiv
; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv ; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv
; SSE42: cost of 30 {{.*}} %V8i32 = sdiv ; SSE42: cost of 30 {{.*}} %V8i32 = sdiv
; AVX1: cost of 30 {{.*}} %V8i32 = sdiv ; AVX1: cost of 32 {{.*}} %V8i32 = sdiv
; AVX2: cost of 15 {{.*}} %V8i32 = sdiv ; AVX2: cost of 15 {{.*}} %V8i32 = sdiv
; AVX512: cost of 15 {{.*}} %V8i32 = sdiv ; AVX512: cost of 15 {{.*}} %V8i32 = sdiv
%V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; SSE2: cost of 76 {{.*}} %V16i32 = sdiv ; SSE2: cost of 76 {{.*}} %V16i32 = sdiv
; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv ; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv
; SSE42: cost of 60 {{.*}} %V16i32 = sdiv ; SSE42: cost of 60 {{.*}} %V16i32 = sdiv
; AVX1: cost of 60 {{.*}} %V16i32 = sdiv ; AVX1: cost of 64 {{.*}} %V16i32 = sdiv
; AVX2: cost of 30 {{.*}} %V16i32 = sdiv ; AVX2: cost of 30 {{.*}} %V16i32 = sdiv
; AVX512: cost of 15 {{.*}} %V16i32 = sdiv ; AVX512: cost of 15 {{.*}} %V16i32 = sdiv
%V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@ -287,12 +287,12 @@ define i32 @sdiv_uniformconstpow2() {
; AVX: cost of 6 {{.*}} %V8i16 = sdiv ; AVX: cost of 6 {{.*}} %V8i16 = sdiv
%V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
; SSE: cost of 12 {{.*}} %V16i16 = sdiv ; SSE: cost of 12 {{.*}} %V16i16 = sdiv
; AVX1: cost of 12 {{.*}} %V16i16 = sdiv ; AVX1: cost of 14 {{.*}} %V16i16 = sdiv
; AVX2: cost of 6 {{.*}} %V16i16 = sdiv ; AVX2: cost of 6 {{.*}} %V16i16 = sdiv
; AVX512: cost of 6 {{.*}} %V16i16 = sdiv ; AVX512: cost of 6 {{.*}} %V16i16 = sdiv
%V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
; SSE: cost of 24 {{.*}} %V32i16 = sdiv ; SSE: cost of 24 {{.*}} %V32i16 = sdiv
; AVX1: cost of 24 {{.*}} %V32i16 = sdiv ; AVX1: cost of 28 {{.*}} %V32i16 = sdiv
; AVX2: cost of 12 {{.*}} %V32i16 = sdiv ; AVX2: cost of 12 {{.*}} %V32i16 = sdiv
; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv ; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv
; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv ; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv
@ -333,12 +333,12 @@ define i32 @udiv_uniformconstpow2() {
; AVX: cost of 15 {{.*}} %V4i32 = udiv ; AVX: cost of 15 {{.*}} %V4i32 = udiv
%V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16> %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
; SSE: cost of 30 {{.*}} %V8i32 = udiv ; SSE: cost of 30 {{.*}} %V8i32 = udiv
; AVX1: cost of 30 {{.*}} %V8i32 = udiv ; AVX1: cost of 32 {{.*}} %V8i32 = udiv
; AVX2: cost of 15 {{.*}} %V8i32 = udiv ; AVX2: cost of 15 {{.*}} %V8i32 = udiv
; AVX512: cost of 15 {{.*}} %V8i32 = udiv ; AVX512: cost of 15 {{.*}} %V8i32 = udiv
%V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; SSE: cost of 60 {{.*}} %V16i32 = udiv ; SSE: cost of 60 {{.*}} %V16i32 = udiv
; AVX1: cost of 60 {{.*}} %V16i32 = udiv ; AVX1: cost of 64 {{.*}} %V16i32 = udiv
; AVX2: cost of 30 {{.*}} %V16i32 = udiv ; AVX2: cost of 30 {{.*}} %V16i32 = udiv
; AVX512: cost of 15 {{.*}} %V16i32 = udiv ; AVX512: cost of 15 {{.*}} %V16i32 = udiv
%V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@ -349,12 +349,12 @@ define i32 @udiv_uniformconstpow2() {
; AVX: cost of 6 {{.*}} %V8i16 = udiv ; AVX: cost of 6 {{.*}} %V8i16 = udiv
%V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
; SSE: cost of 12 {{.*}} %V16i16 = udiv ; SSE: cost of 12 {{.*}} %V16i16 = udiv
; AVX1: cost of 12 {{.*}} %V16i16 = udiv ; AVX1: cost of 14 {{.*}} %V16i16 = udiv
; AVX2: cost of 6 {{.*}} %V16i16 = udiv ; AVX2: cost of 6 {{.*}} %V16i16 = udiv
; AVX512: cost of 6 {{.*}} %V16i16 = udiv ; AVX512: cost of 6 {{.*}} %V16i16 = udiv
%V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
; SSE: cost of 24 {{.*}} %V32i16 = udiv ; SSE: cost of 24 {{.*}} %V32i16 = udiv
; AVX1: cost of 24 {{.*}} %V32i16 = udiv ; AVX1: cost of 28 {{.*}} %V32i16 = udiv
; AVX2: cost of 12 {{.*}} %V32i16 = udiv ; AVX2: cost of 12 {{.*}} %V32i16 = udiv
; AVX512F: cost of 12 {{.*}} %V32i16 = udiv ; AVX512F: cost of 12 {{.*}} %V32i16 = udiv
; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv ; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv