From acbf51ab60e12c2b02a8fef836ff6edf6f67d389 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 11 Oct 2015 17:29:26 +0000 Subject: [PATCH] [X86] Added LSHR cost model tests There are several dodgy costings due to AVX1 legalizing 256-bit integer vectors that need fixing. As discussed in D8690. llvm-svn: 249983 --- .../CostModel/X86/vshift-lshr-cost.ll | 400 ++++++++++++++++++ 1 file changed, 400 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll new file mode 100644 index 000000000000..08b752b778e2 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -0,0 +1,400 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 + +; Verify the cost of vector logical shift right instructions. + +; +; Variable Shifts +; + +define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <2 x i64> %a, %b + ret <2 x i64> %shift +} + +define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': +; SSE2: Found an estimated cost of 8 for instruction: %shift +; SSE41: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <4 x i64> %a, %b + ret <4 x i64> %shift +} + +define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32': +; SSE2: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <4 x i32> %a, %b + ret <4 x i32> %shift +} + +define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': +; SSE2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 32 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <8 x i32> %a, %b + ret <8 x i32> %shift +} + +define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16': +; SSE2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 32 for instruction: %shift +; AVX: Found an estimated cost of 32 for instruction: %shift +; AVX2: Found an estimated cost of 32 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <8 x i16> %a, %b + ret <8 x i16> %shift +} + +define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': +; SSE2: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 64 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 10 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %shift = lshr <16 x i16> %a, %b + ret <16 x i16> %shift +} + +define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8': +; SSE2: Found an estimated cost of 26 for instruction: %shift +; SSE41: Found an estimated cost of 26 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift +; AVX2: Found an estimated cost of 26 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <16 x i8> %a, %b + ret <16 x i8> %shift +} + +define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': +; SSE2: Found an estimated cost of 52 for instruction: %shift +; SSE41: Found an estimated cost of 52 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 11 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %shift = lshr <32 x i8> %a, %b + ret <32 x i8> %shift +} + +; +; Uniform Variable Shifts +; + +define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer + %shift = lshr <2 x i64> %a, %splat + ret <2 x i64> %shift +} + +define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64': +; SSE2: Found an estimated cost of 8 for instruction: %shift +; SSE41: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer + %shift = lshr <4 x i64> %a, %splat + ret <4 x i64> %shift +} + +define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32': +; SSE2: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %shift = lshr <4 x i32> %a, %splat + ret <4 x i32> %shift +} + +define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': +; SSE2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 32 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer + %shift = lshr <8 x i32> %a, %splat + ret <8 x i32> %shift +} + +define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16': +; SSE2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 32 for instruction: %shift +; AVX: Found an estimated cost of 32 for instruction: %shift +; AVX2: Found an estimated cost of 32 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift + %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %shift = lshr <8 x i16> %a, %splat + ret <8 x i16> %shift +} + +define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': +; SSE2: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 64 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 10 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer + %shift = lshr <16 x i16> %a, %splat + ret <16 x i16> %shift +} + +define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8': +; SSE2: Found an estimated cost of 26 for instruction: %shift +; SSE41: Found an estimated cost of 26 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift +; AVX2: Found an estimated cost of 26 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift + %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %shift = lshr <16 x i8> %a, %splat + ret <16 x i8> %shift +} + +define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': +; SSE2: Found an estimated cost of 52 for instruction: %shift +; SSE41: Found an estimated cost of 52 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 11 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer + %shift = lshr <32 x i8> %a, %splat + ret <32 x i8> %shift +} + +; +; Constant Shifts +; + +define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64': +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <2 x i64> %a, + ret <2 x i64> %shift +} + +define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': +; SSE2: Found an estimated cost of 8 for instruction: %shift +; SSE41: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <4 x i64> %a, + ret <4 x i64> %shift +} + +define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32': +; SSE2: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <4 x i32> %a, + ret <4 x i32> %shift +} + +define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': +; SSE2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 32 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <8 x i32> %a, + ret <8 x i32> %shift +} + +define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16': +; SSE2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 32 for instruction: %shift +; AVX: Found an estimated cost of 32 for instruction: %shift +; AVX2: Found an estimated cost of 32 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <8 x i16> %a, + ret <8 x i16> %shift +} + +define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': +; SSE2: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 64 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 10 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %shift = lshr <16 x i16> %a, + ret <16 x i16> %shift +} + +define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8': +; SSE2: Found an estimated cost of 26 for instruction: %shift +; SSE41: Found an estimated cost of 26 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift +; AVX2: Found an estimated cost of 26 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <16 x i8> %a, + ret <16 x i8> %shift +} + +define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': +; SSE2: Found an estimated cost of 52 for instruction: %shift +; SSE41: Found an estimated cost of 52 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 11 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %shift = lshr <32 x i8> %a, + ret <32 x i8> %shift +} + +; +; Uniform Constant Shifts +; + +define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64': +; SSE2: Found an estimated cost of 1 for instruction: %shift +; SSE41: Found an estimated cost of 1 for instruction: %shift +; AVX: Found an estimated cost of 1 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <2 x i64> %a, + ret <2 x i64> %shift +} + +define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64': +; SSE2: Found an estimated cost of 2 for instruction: %shift +; SSE41: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <4 x i64> %a, + ret <4 x i64> %shift +} + +define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32': +; SSE2: Found an estimated cost of 1 for instruction: %shift +; SSE41: Found an estimated cost of 1 for instruction: %shift +; AVX: Found an estimated cost of 1 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <4 x i32> %a, + ret <4 x i32> %shift +} + +define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32': +; SSE2: Found an estimated cost of 2 for instruction: %shift +; SSE41: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift + %shift = lshr <8 x i32> %a, + ret <8 x i32> %shift +} + +define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16': +; SSE2: Found an estimated cost of 1 for instruction: %shift +; SSE41: Found an estimated cost of 1 for instruction: %shift +; AVX: Found an estimated cost of 1 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <8 x i16> %a, + ret <8 x i16> %shift +} + +define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16': +; SSE2: Found an estimated cost of 2 for instruction: %shift +; SSE41: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 10 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %shift = lshr <16 x i16> %a, + ret <16 x i16> %shift +} + +define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': +; SSE2: Found an estimated cost of 1 for instruction: %shift +; SSE41: Found an estimated cost of 1 for instruction: %shift +; AVX: Found an estimated cost of 1 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift + %shift = lshr <16 x i8> %a, + ret <16 x i8> %shift +} + +define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': +; SSE2: Found an estimated cost of 2 for instruction: %shift +; SSE41: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 11 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift + %shift = lshr <32 x i8> %a, + ret <32 x i8> %shift +}