diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 32e0f986695d..c0a12a82a32d 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -248,19 +248,31 @@ int X86TTIImpl::getArithmeticInstrCost( // custom. // Constant splats are cheaper for the following instructions. { ISD::SHL, MVT::v16i8, 1 }, // psllw. + { ISD::SHL, MVT::v32i8, 2 }, // psllw. { ISD::SHL, MVT::v8i16, 1 }, // psllw. + { ISD::SHL, MVT::v16i16, 2 }, // psllw. { ISD::SHL, MVT::v4i32, 1 }, // pslld + { ISD::SHL, MVT::v8i32, 2 }, // pslld { ISD::SHL, MVT::v2i64, 1 }, // psllq. + { ISD::SHL, MVT::v4i64, 2 }, // psllq. { ISD::SRL, MVT::v16i8, 1 }, // psrlw. + { ISD::SRL, MVT::v32i8, 2 }, // psrlw. { ISD::SRL, MVT::v8i16, 1 }, // psrlw. + { ISD::SRL, MVT::v16i16, 2 }, // psrlw. { ISD::SRL, MVT::v4i32, 1 }, // psrld. + { ISD::SRL, MVT::v8i32, 2 }, // psrld. { ISD::SRL, MVT::v2i64, 1 }, // psrlq. + { ISD::SRL, MVT::v4i64, 2 }, // psrlq. { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. + { ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb. { ISD::SRA, MVT::v8i16, 1 }, // psraw. + { ISD::SRA, MVT::v16i16, 2 }, // psraw. { ISD::SRA, MVT::v4i32, 1 }, // psrad. + { ISD::SRA, MVT::v8i32, 2 }, // psrad. { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle. + { ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle. { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence @@ -282,15 +294,22 @@ int X86TTIImpl::getArithmeticInstrCost( if (ISD == ISD::SHL && Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) { EVT VT = LT.second; + // Vector shift left by non uniform constant can be lowered + // into vector multiply (pmullw/pmulld). if ((VT == MVT::v8i16 && ST->hasSSE2()) || (VT == MVT::v4i32 && ST->hasSSE41())) - // Vector shift left by non uniform constant can be lowered - // into vector multiply (pmullw/pmulld). return LT.first; + + // v16i16 and v8i32 shifts by non-uniform constants are lowered into a + // sequence of extract + two vector multiply + insert. + if ((VT == MVT::v8i32 || VT == MVT::v16i16) && + (ST->hasAVX() && !ST->hasAVX2())) + ISD = ISD::MUL; + + // A vector shift left by non uniform constant is converted + // into a vector multiply; the new multiply is eventually + // lowered into a sequence of shuffles and 2 x pmuludq. if (VT == MVT::v4i32 && ST->hasSSE2()) - // A vector shift left by non uniform constant is converted - // into a vector multiply; the new multiply is eventually - // lowered into a sequence of shuffles and 2 x pmuludq. ISD = ISD::MUL; } @@ -304,20 +323,31 @@ int X86TTIImpl::getArithmeticInstrCost( // used for vectorization and we don't want to make vectorized code worse // than scalar code. { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. + { ISD::SHL, MVT::v32i8, 2*26 }, // cmpgtb sequence. { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SHL, MVT::v16i16, 2*32 }, // cmpgtb sequence. { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. + { ISD::SHL, MVT::v8i32, 2*2*5 }, // We optimized this using mul. { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. - { ISD::SHL, MVT::v4i64, 8 }, // splat+shuffle sequence. + { ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. + { ISD::SRL, MVT::v32i8, 2*26 }, // cmpgtb sequence. { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SRL, MVT::v16i16, 2*32 }, // cmpgtb sequence. { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. + { ISD::SRL, MVT::v8i32, 2*16 }, // Shift each lane + blend. { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. + { ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. + { ISD::SRA, MVT::v32i8, 2*54 }, // unpacked cmpgtb sequence. { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SRA, MVT::v16i16, 2*32 }, // cmpgtb sequence. { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. + { ISD::SRA, MVT::v8i32, 2*16 }, // Shift each lane + blend. { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. + { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence. // It is not a good idea to vectorize division. We have to scalarize it and // in the process we will often end up having to spilling regular @@ -363,12 +393,6 @@ int X86TTIImpl::getArithmeticInstrCost( if (ST->hasAVX() && !ST->hasAVX2()) { EVT VT = LT.second; - // v16i16 and v8i32 shifts by non-uniform constants are lowered into a - // sequence of extract + two vector multiply + insert. - if (ISD == ISD::SHL && (VT == MVT::v8i32 || VT == MVT::v16i16) && - Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) - ISD = ISD::MUL; - int Idx = CostTableLookup(AVX1CostTable, ISD, VT); if (Idx != -1) return LT.first * AVX1CostTable[Idx].Cost; diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index cb1aa60f034c..a0d07d7b6ec0 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -26,7 +26,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': ; SSE2: Found an estimated cost of 24 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <4 x i64> %a, %b @@ -49,7 +49,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 32 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -72,7 +72,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 64 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <16 x i16> %a, %b @@ -94,7 +94,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift ; SSE41: Found an estimated cost of 108 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 108 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, %b @@ -121,7 +121,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64': ; SSE2: Found an estimated cost of 24 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer @@ -146,7 +146,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 32 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -171,7 +171,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 64 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer @@ -195,7 +195,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift ; SSE41: Found an estimated cost of 108 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 108 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer @@ -222,7 +222,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': ; SSE2: Found an estimated cost of 24 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <4 x i64> %a, @@ -245,7 +245,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 32 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -268,7 +268,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 64 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <16 x i16> %a, @@ -290,7 +290,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift ; SSE41: Found an estimated cost of 108 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 108 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, @@ -316,7 +316,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <4 x i64> %a, @@ -384,7 +384,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index 08b752b778e2..a686b4368f21 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -27,7 +27,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -51,7 +51,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 32 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -74,7 +74,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 64 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <16 x i16> %a, %b @@ -96,7 +96,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <32 x i8> %a, %b @@ -124,7 +124,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -150,7 +150,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 32 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -175,7 +175,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 64 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer @@ -199,7 +199,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer @@ -227,7 +227,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -251,7 +251,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 32 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -274,7 +274,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 64 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <16 x i16> %a, @@ -296,7 +296,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <32 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll index 116828c9964c..85ca5a5a7f32 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -52,7 +52,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 20 for instruction: %shift ; SSE41: Found an estimated cost of 20 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -75,7 +75,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 64 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <16 x i16> %a, %b @@ -97,7 +97,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, %b @@ -151,7 +151,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': ; SSE2: Found an estimated cost of 20 for instruction: %shift ; SSE41: Found an estimated cost of 20 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -176,7 +176,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 64 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer @@ -200,7 +200,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer @@ -298,7 +298,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, @@ -325,7 +325,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift