From f23aa2a9c95345a126f1895b4af2dd835b7debef Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 27 Apr 2016 12:04:44 +0000 Subject: [PATCH] [InstCombine][SSE] Regenerated vector shift tests llvm-svn: 267699 --- .../InstCombine/x86-vector-shifts.ll | 861 ++++++++++-------- 1 file changed, 505 insertions(+), 356 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll index 59e445a40bef..39fb15b263ea 100644 --- a/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -6,93 +7,105 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psrai_w_0 -; CHECK-NEXT: ret <8 x i16> %v +; CHECK-LABEL: @sse2_psrai_w_0( +; CHECK-NEXT: ret <8 x i16> %v +; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0) ret <8 x i16> %1 } define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psrai_w_15 -; CHECK-NEXT: %1 = ashr <8 x i16> %v, -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_psrai_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15) ret <8 x i16> %1 } define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psrai_w_64 -; CHECK-NEXT: %1 = ashr <8 x i16> %v, -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_psrai_w_64( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64) ret <8 x i16> %1 } define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psrai_d_0 -; CHECK-NEXT: ret <4 x i32> %v +; CHECK-LABEL: @sse2_psrai_d_0( +; CHECK-NEXT: ret <4 x i32> %v +; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0) ret <4 x i32> %1 } define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psrai_d_15 -; CHECK-NEXT: %1 = ashr <4 x i32> %v, -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_psrai_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15) ret <4 x i32> %1 } define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psrai_d_64 -; CHECK-NEXT: %1 = ashr <4 x i32> %v, -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_psrai_d_64( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64) ret <4 x i32> %1 } define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psrai_w_0 -; CHECK-NEXT: ret <16 x i16> %v +; CHECK-LABEL: @avx2_psrai_w_0( +; CHECK-NEXT: ret <16 x i16> %v +; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0) ret <16 x i16> %1 } define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psrai_w_15 -; CHECK-NEXT: %1 = ashr <16 x i16> %v, -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_psrai_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15) ret <16 x i16> %1 } define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psrai_w_64 -; CHECK-NEXT: %1 = ashr <16 x i16> %v, -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_psrai_w_64( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64) ret <16 x i16> %1 } define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psrai_d_0 -; CHECK-NEXT: ret <8 x i32> %v +; CHECK-LABEL: @avx2_psrai_d_0( +; CHECK-NEXT: ret <8 x i32> %v +; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0) ret <8 x i32> %1 } define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psrai_d_15 -; CHECK-NEXT: %1 = ashr <8 x i32> %v, -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_psrai_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15) ret <8 x i32> %1 } define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psrai_d_64 -; CHECK-NEXT: %1 = ashr <8 x i32> %v, -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_psrai_d_64( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64) ret <8 x i32> %1 } @@ -102,133 +115,151 @@ define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) { ; define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psrli_w_0 -; CHECK-NEXT: ret <8 x i16> %v +; CHECK-LABEL: @sse2_psrli_w_0( +; CHECK-NEXT: ret <8 x i16> %v +; %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0) ret <8 x i16> %1 } define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psrli_w_15 -; CHECK-NEXT: %1 = lshr <8 x i16> %v, -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_psrli_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15) ret <8 x i16> %1 } define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psrli_w_64 -; CHECK-NEXT: ret <8 x i16> zeroinitializer +; CHECK-LABEL: @sse2_psrli_w_64( +; CHECK-NEXT: ret <8 x i16> zeroinitializer +; %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64) ret <8 x i16> %1 } define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psrli_d_0 -; CHECK-NEXT: ret <4 x i32> %v +; CHECK-LABEL: @sse2_psrli_d_0( +; CHECK-NEXT: ret <4 x i32> %v +; %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0) ret <4 x i32> %1 } define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psrli_d_15 -; CHECK-NEXT: %1 = lshr <4 x i32> %v, -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_psrli_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15) ret <4 x i32> %1 } define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psrli_d_64 -; CHECK-NEXT: ret <4 x i32> zeroinitializer +; CHECK-LABEL: @sse2_psrli_d_64( +; CHECK-NEXT: ret <4 x i32> zeroinitializer +; %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64) ret <4 x i32> %1 } define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) { -; CHECK-LABEL: @sse2_psrli_q_0 -; CHECK-NEXT: ret <2 x i64> %v +; CHECK-LABEL: @sse2_psrli_q_0( +; CHECK-NEXT: ret <2 x i64> %v +; %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0) ret <2 x i64> %1 } define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) { -; CHECK-LABEL: @sse2_psrli_q_15 -; CHECK-NEXT: %1 = lshr <2 x i64> %v, -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @sse2_psrli_q_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15) ret <2 x i64> %1 } define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) { -; CHECK-LABEL: @sse2_psrli_q_64 -; CHECK-NEXT: ret <2 x i64> zeroinitializer +; CHECK-LABEL: @sse2_psrli_q_64( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64) ret <2 x i64> %1 } define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psrli_w_0 -; CHECK-NEXT: ret <16 x i16> %v +; CHECK-LABEL: @avx2_psrli_w_0( +; CHECK-NEXT: ret <16 x i16> %v +; %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0) ret <16 x i16> %1 } define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psrli_w_15 -; CHECK-NEXT: %1 = lshr <16 x i16> %v, -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_psrli_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15) ret <16 x i16> %1 } define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psrli_w_64 -; CHECK-NEXT: ret <16 x i16> zeroinitializer +; CHECK-LABEL: @avx2_psrli_w_64( +; CHECK-NEXT: ret <16 x i16> zeroinitializer +; %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64) ret <16 x i16> %1 } define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psrli_d_0 -; CHECK-NEXT: ret <8 x i32> %v +; CHECK-LABEL: @avx2_psrli_d_0( +; CHECK-NEXT: ret <8 x i32> %v +; %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0) ret <8 x i32> %1 } define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psrli_d_15 -; CHECK-NEXT: %1 = lshr <8 x i32> %v, -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_psrli_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15) ret <8 x i32> %1 } define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psrli_d_64 -; CHECK-NEXT: ret <8 x i32> zeroinitializer +; CHECK-LABEL: @avx2_psrli_d_64( +; CHECK-NEXT: ret <8 x i32> zeroinitializer +; %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64) ret <8 x i32> %1 } define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) { -; CHECK-LABEL: @avx2_psrli_q_0 -; CHECK-NEXT: ret <4 x i64> %v +; CHECK-LABEL: @avx2_psrli_q_0( +; CHECK-NEXT: ret <4 x i64> %v +; %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0) ret <4 x i64> %1 } define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) { -; CHECK-LABEL: @avx2_psrli_q_15 -; CHECK-NEXT: %1 = lshr <4 x i64> %v, -; CHECK-NEXT: ret <4 x i64> %1 +; CHECK-LABEL: @avx2_psrli_q_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15) ret <4 x i64> %1 } define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) { -; CHECK-LABEL: @avx2_psrli_q_64 -; CHECK-NEXT: ret <4 x i64> zeroinitializer +; CHECK-LABEL: @avx2_psrli_q_64( +; CHECK-NEXT: ret <4 x i64> zeroinitializer +; %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64) ret <4 x i64> %1 } @@ -238,133 +269,151 @@ define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) { ; define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) { -; CHECK-LABEL: @sse2_pslli_w_0 -; CHECK-NEXT: ret <8 x i16> %v +; CHECK-LABEL: @sse2_pslli_w_0( +; CHECK-NEXT: ret <8 x i16> %v +; %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0) ret <8 x i16> %1 } define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) { -; CHECK-LABEL: @sse2_pslli_w_15 -; CHECK-NEXT: %1 = shl <8 x i16> %v, -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_pslli_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15) ret <8 x i16> %1 } define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) { -; CHECK-LABEL: @sse2_pslli_w_64 -; CHECK-NEXT: ret <8 x i16> zeroinitializer +; CHECK-LABEL: @sse2_pslli_w_64( +; CHECK-NEXT: ret <8 x i16> zeroinitializer +; %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64) ret <8 x i16> %1 } define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) { -; CHECK-LABEL: @sse2_pslli_d_0 -; CHECK-NEXT: ret <4 x i32> %v +; CHECK-LABEL: @sse2_pslli_d_0( +; CHECK-NEXT: ret <4 x i32> %v +; %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0) ret <4 x i32> %1 } define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) { -; CHECK-LABEL: @sse2_pslli_d_15 -; CHECK-NEXT: %1 = shl <4 x i32> %v, -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_pslli_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15) ret <4 x i32> %1 } define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) { -; CHECK-LABEL: @sse2_pslli_d_64 -; CHECK-NEXT: ret <4 x i32> zeroinitializer +; CHECK-LABEL: @sse2_pslli_d_64( +; CHECK-NEXT: ret <4 x i32> zeroinitializer +; %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64) ret <4 x i32> %1 } define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) { -; CHECK-LABEL: @sse2_pslli_q_0 -; CHECK-NEXT: ret <2 x i64> %v +; CHECK-LABEL: @sse2_pslli_q_0( +; CHECK-NEXT: ret <2 x i64> %v +; %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0) ret <2 x i64> %1 } define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) { -; CHECK-LABEL: @sse2_pslli_q_15 -; CHECK-NEXT: %1 = shl <2 x i64> %v, -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @sse2_pslli_q_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15) ret <2 x i64> %1 } define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) { -; CHECK-LABEL: @sse2_pslli_q_64 -; CHECK-NEXT: ret <2 x i64> zeroinitializer +; CHECK-LABEL: @sse2_pslli_q_64( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64) ret <2 x i64> %1 } define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) { -; CHECK-LABEL: @avx2_pslli_w_0 -; CHECK-NEXT: ret <16 x i16> %v +; CHECK-LABEL: @avx2_pslli_w_0( +; CHECK-NEXT: ret <16 x i16> %v +; %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0) ret <16 x i16> %1 } define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) { -; CHECK-LABEL: @avx2_pslli_w_15 -; CHECK-NEXT: %1 = shl <16 x i16> %v, -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_pslli_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15) ret <16 x i16> %1 } define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) { -; CHECK-LABEL: @avx2_pslli_w_64 -; CHECK-NEXT: ret <16 x i16> zeroinitializer +; CHECK-LABEL: @avx2_pslli_w_64( +; CHECK-NEXT: ret <16 x i16> zeroinitializer +; %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64) ret <16 x i16> %1 } define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) { -; CHECK-LABEL: @avx2_pslli_d_0 -; CHECK-NEXT: ret <8 x i32> %v +; CHECK-LABEL: @avx2_pslli_d_0( +; CHECK-NEXT: ret <8 x i32> %v +; %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0) ret <8 x i32> %1 } define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) { -; CHECK-LABEL: @avx2_pslli_d_15 -; CHECK-NEXT: %1 = shl <8 x i32> %v, -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_pslli_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15) ret <8 x i32> %1 } define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) { -; CHECK-LABEL: @avx2_pslli_d_64 -; CHECK-NEXT: ret <8 x i32> zeroinitializer +; CHECK-LABEL: @avx2_pslli_d_64( +; CHECK-NEXT: ret <8 x i32> zeroinitializer +; %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64) ret <8 x i32> %1 } define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) { -; CHECK-LABEL: @avx2_pslli_q_0 -; CHECK-NEXT: ret <4 x i64> %v +; CHECK-LABEL: @avx2_pslli_q_0( +; CHECK-NEXT: ret <4 x i64> %v +; %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0) ret <4 x i64> %1 } define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) { -; CHECK-LABEL: @avx2_pslli_q_15 -; CHECK-NEXT: %1 = shl <4 x i64> %v, -; CHECK-NEXT: ret <4 x i64> %1 +; CHECK-LABEL: @avx2_pslli_q_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15) ret <4 x i64> %1 } define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) { -; CHECK-LABEL: @avx2_pslli_q_64 -; CHECK-NEXT: ret <4 x i64> zeroinitializer +; CHECK-LABEL: @avx2_pslli_q_64( +; CHECK-NEXT: ret <4 x i64> zeroinitializer +; %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64) ret <4 x i64> %1 } @@ -374,125 +423,141 @@ define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) { ; define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psra_w_0 -; CHECK-NEXT: ret <8 x i16> %v +; CHECK-LABEL: @sse2_psra_w_0( +; CHECK-NEXT: ret <8 x i16> %v +; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psra_w_15 -; CHECK-NEXT: %1 = ashr <8 x i16> %v, -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_psra_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psra_w_15_splat -; CHECK-NEXT: %1 = ashr <8 x i16> %v, -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_psra_w_15_splat( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psra_w_64 -; CHECK-NEXT: %1 = ashr <8 x i16> %v, -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_psra_w_64( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psra_d_0 -; CHECK-NEXT: ret <4 x i32> %v +; CHECK-LABEL: @sse2_psra_d_0( +; CHECK-NEXT: ret <4 x i32> %v +; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 } define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psra_d_15 -; CHECK-NEXT: %1 = ashr <4 x i32> %v, -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_psra_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psra_d_15_splat -; CHECK-NEXT: %1 = ashr <4 x i32> %v, -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_psra_d_15_splat( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psra_d_64 -; CHECK-NEXT: %1 = ashr <4 x i32> %v, -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_psra_d_64( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psra_w_0 -; CHECK-NEXT: ret <16 x i16> %v +; CHECK-LABEL: @avx2_psra_w_0( +; CHECK-NEXT: ret <16 x i16> %v +; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer) ret <16 x i16> %1 } define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psra_w_15 -; CHECK-NEXT: %1 = ashr <16 x i16> %v, -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_psra_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psra_w_15_splat -; CHECK-NEXT: %1 = ashr <16 x i16> %v, -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_psra_w_15_splat( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psra_w_64 -; CHECK-NEXT: %1 = ashr <16 x i16> %v, -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_psra_w_64( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psra_d_0 -; CHECK-NEXT: ret <8 x i32> %v +; CHECK-LABEL: @avx2_psra_d_0( +; CHECK-NEXT: ret <8 x i32> %v +; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer) ret <8 x i32> %1 } define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psra_d_15 -; CHECK-NEXT: %1 = ashr <8 x i32> %v, -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_psra_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psra_d_15_splat -; CHECK-NEXT: %1 = ashr <8 x i32> %v, -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_psra_d_15_splat( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psra_d_64 -; CHECK-NEXT: %1 = ashr <8 x i32> %v, -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_psra_d_64( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } @@ -502,161 +567,183 @@ define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) { ; define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psrl_w_0 -; CHECK-NEXT: ret <8 x i16> %v +; CHECK-LABEL: @sse2_psrl_w_0( +; CHECK-NEXT: ret <8 x i16> %v +; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psrl_w_15 -; CHECK-NEXT: %1 = lshr <8 x i16> %v, -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_psrl_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psrl_w_15_splat -; CHECK-NEXT: ret <8 x i16> zeroinitializer +; CHECK-LABEL: @sse2_psrl_w_15_splat( +; CHECK-NEXT: ret <8 x i16> zeroinitializer +; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psrl_w_64 -; CHECK-NEXT: ret <8 x i16> zeroinitializer +; CHECK-LABEL: @sse2_psrl_w_64( +; CHECK-NEXT: ret <8 x i16> zeroinitializer +; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psrl_d_0 -; CHECK-NEXT: ret <4 x i32> %v +; CHECK-LABEL: @sse2_psrl_d_0( +; CHECK-NEXT: ret <4 x i32> %v +; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 } define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psrl_d_15 -; CHECK-NEXT: %1 = lshr <4 x i32> %v, -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_psrl_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psrl_d_15_splat -; CHECK-NEXT: ret <4 x i32> zeroinitializer +; CHECK-LABEL: @sse2_psrl_d_15_splat( +; CHECK-NEXT: ret <4 x i32> zeroinitializer +; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psrl_d_64 -; CHECK-NEXT: ret <4 x i32> zeroinitializer +; CHECK-LABEL: @sse2_psrl_d_64( +; CHECK-NEXT: ret <4 x i32> zeroinitializer +; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) { -; CHECK-LABEL: @sse2_psrl_q_0 -; CHECK-NEXT: ret <2 x i64> %v +; CHECK-LABEL: @sse2_psrl_q_0( +; CHECK-NEXT: ret <2 x i64> %v +; %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 } define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) { -; CHECK-LABEL: @sse2_psrl_q_15 -; CHECK-NEXT: %1 = lshr <2 x i64> %v, -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @sse2_psrl_q_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) { -; CHECK-LABEL: @sse2_psrl_q_64 -; CHECK-NEXT: ret <2 x i64> zeroinitializer +; CHECK-LABEL: @sse2_psrl_q_64( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psrl_w_0 -; CHECK-NEXT: ret <16 x i16> %v +; CHECK-LABEL: @avx2_psrl_w_0( +; CHECK-NEXT: ret <16 x i16> %v +; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer) ret <16 x i16> %1 } define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psrl_w_15 -; CHECK-NEXT: %1 = lshr <16 x i16> %v, -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_psrl_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psrl_w_15_splat -; CHECK-NEXT: ret <16 x i16> zeroinitializer +; CHECK-LABEL: @avx2_psrl_w_15_splat( +; CHECK-NEXT: ret <16 x i16> zeroinitializer +; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psrl_w_64 -; CHECK-NEXT: ret <16 x i16> zeroinitializer +; CHECK-LABEL: @avx2_psrl_w_64( +; CHECK-NEXT: ret <16 x i16> zeroinitializer +; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psrl_d_0 -; CHECK-NEXT: ret <8 x i32> %v +; CHECK-LABEL: @avx2_psrl_d_0( +; CHECK-NEXT: ret <8 x i32> %v +; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer) ret <8 x i32> %1 } define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psrl_d_15 -; CHECK-NEXT: %1 = lshr <8 x i32> %v, -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_psrl_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psrl_d_15_splat -; CHECK-NEXT: ret <8 x i32> zeroinitializer +; CHECK-LABEL: @avx2_psrl_d_15_splat( +; CHECK-NEXT: ret <8 x i32> zeroinitializer +; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psrl_d_64 -; CHECK-NEXT: ret <8 x i32> zeroinitializer +; CHECK-LABEL: @avx2_psrl_d_64( +; CHECK-NEXT: ret <8 x i32> zeroinitializer +; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) { -; CHECK-LABEL: @avx2_psrl_q_0 -; CHECK-NEXT: ret <4 x i64> %v +; CHECK-LABEL: @avx2_psrl_q_0( +; CHECK-NEXT: ret <4 x i64> %v +; %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer) ret <4 x i64> %1 } define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) { -; CHECK-LABEL: @avx2_psrl_q_15 -; CHECK-NEXT: %1 = lshr <4 x i64> %v, -; CHECK-NEXT: ret <4 x i64> %1 +; CHECK-LABEL: @avx2_psrl_q_15( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) ret <4 x i64> %1 } define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) { -; CHECK-LABEL: @avx2_psrl_q_64 -; CHECK-NEXT: ret <4 x i64> zeroinitializer +; CHECK-LABEL: @avx2_psrl_q_64( +; CHECK-NEXT: ret <4 x i64> zeroinitializer +; %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) ret <4 x i64> %1 } @@ -666,161 +753,183 @@ define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) { ; define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psll_w_0 -; CHECK-NEXT: ret <8 x i16> %v +; CHECK-LABEL: @sse2_psll_w_0( +; CHECK-NEXT: ret <8 x i16> %v +; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer) ret <8 x i16> %1 } define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psll_w_15 -; CHECK-NEXT: %1 = shl <8 x i16> %v, -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_psll_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> %v, +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psll_w_15_splat -; CHECK-NEXT: ret <8 x i16> zeroinitializer +; CHECK-LABEL: @sse2_psll_w_15_splat( +; CHECK-NEXT: ret <8 x i16> zeroinitializer +; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) { -; CHECK-LABEL: @sse2_psll_w_64 -; CHECK-NEXT: ret <8 x i16> zeroinitializer +; CHECK-LABEL: @sse2_psll_w_64( +; CHECK-NEXT: ret <8 x i16> zeroinitializer +; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 } define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psll_d_0 -; CHECK-NEXT: ret <4 x i32> %v +; CHECK-LABEL: @sse2_psll_d_0( +; CHECK-NEXT: ret <4 x i32> %v +; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 } define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psll_d_15 -; CHECK-NEXT: %1 = shl <4 x i32> %v, -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_psll_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psll_d_15_splat -; CHECK-NEXT: ret <4 x i32> zeroinitializer +; CHECK-LABEL: @sse2_psll_d_15_splat( +; CHECK-NEXT: ret <4 x i32> zeroinitializer +; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) { -; CHECK-LABEL: @sse2_psll_d_64 -; CHECK-NEXT: ret <4 x i32> zeroinitializer +; CHECK-LABEL: @sse2_psll_d_64( +; CHECK-NEXT: ret <4 x i32> zeroinitializer +; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 } define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) { -; CHECK-LABEL: @sse2_psll_q_0 -; CHECK-NEXT: ret <2 x i64> %v +; CHECK-LABEL: @sse2_psll_q_0( +; CHECK-NEXT: ret <2 x i64> %v +; %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 } define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) { -; CHECK-LABEL: @sse2_psll_q_15 -; CHECK-NEXT: %1 = shl <2 x i64> %v, -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @sse2_psll_q_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) { -; CHECK-LABEL: @sse2_psll_q_64 -; CHECK-NEXT: ret <2 x i64> zeroinitializer +; CHECK-LABEL: @sse2_psll_q_64( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 } define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psll_w_0 -; CHECK-NEXT: ret <16 x i16> %v +; CHECK-LABEL: @avx2_psll_w_0( +; CHECK-NEXT: ret <16 x i16> %v +; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer) ret <16 x i16> %1 } define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psll_w_15 -; CHECK-NEXT: %1 = shl <16 x i16> %v, -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_psll_w_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> %v, +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psll_w_15_splat -; CHECK-NEXT: ret <16 x i16> zeroinitializer +; CHECK-LABEL: @avx2_psll_w_15_splat( +; CHECK-NEXT: ret <16 x i16> zeroinitializer +; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) { -; CHECK-LABEL: @avx2_psll_w_64 -; CHECK-NEXT: ret <16 x i16> zeroinitializer +; CHECK-LABEL: @avx2_psll_w_64( +; CHECK-NEXT: ret <16 x i16> zeroinitializer +; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 } define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psll_d_0 -; CHECK-NEXT: ret <8 x i32> %v +; CHECK-LABEL: @avx2_psll_d_0( +; CHECK-NEXT: ret <8 x i32> %v +; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer) ret <8 x i32> %1 } define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psll_d_15 -; CHECK-NEXT: %1 = shl <8 x i32> %v, -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_psll_d_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psll_d_15_splat -; CHECK-NEXT: ret <8 x i32> zeroinitializer +; CHECK-LABEL: @avx2_psll_d_15_splat( +; CHECK-NEXT: ret <8 x i32> zeroinitializer +; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) { -; CHECK-LABEL: @avx2_psll_d_64 -; CHECK-NEXT: ret <8 x i32> zeroinitializer +; CHECK-LABEL: @avx2_psll_d_64( +; CHECK-NEXT: ret <8 x i32> zeroinitializer +; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 } define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) { -; CHECK-LABEL: @avx2_psll_q_0 -; CHECK-NEXT: ret <4 x i64> %v +; CHECK-LABEL: @avx2_psll_q_0( +; CHECK-NEXT: ret <4 x i64> %v +; %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer) ret <4 x i64> %1 } define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) { -; CHECK-LABEL: @avx2_psll_q_15 -; CHECK-NEXT: %1 = shl <4 x i64> %v, -; CHECK-NEXT: ret <4 x i64> %1 +; CHECK-LABEL: @avx2_psll_q_15( +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> ) ret <4 x i64> %1 } define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) { -; CHECK-LABEL: @avx2_psll_q_64 -; CHECK-NEXT: ret <4 x i64> zeroinitializer +; CHECK-LABEL: @avx2_psll_q_64( +; CHECK-NEXT: ret <4 x i64> zeroinitializer +; %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> ) ret <4 x i64> %1 } @@ -830,19 +939,21 @@ define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) { ; define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) { -; CHECK-LABEL: @sse2_psra_w_var -; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a) -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_psra_w_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a) +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) { -; CHECK-LABEL: @sse2_psra_w_var_bc -; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <8 x i16> -; CHECK-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1) -; CHECK-NEXT: ret <8 x i16> %2 +; CHECK-LABEL: @sse2_psra_w_var_bc( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %a to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <8 x i16> [[TMP2]] +; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = bitcast <2 x i64> %1 to <8 x i16> %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2) @@ -850,19 +961,21 @@ define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) { } define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) { -; CHECK-LABEL: @sse2_psra_d_var -; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a) -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_psra_d_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) { -; CHECK-LABEL: @sse2_psra_d_var_bc -; CHECK-NEXT: %1 = bitcast <8 x i16> %a to <4 x i32> -; CHECK-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1) -; CHECK-NEXT: ret <4 x i32> %2 +; CHECK-LABEL: @sse2_psra_d_var_bc( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> %a to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = bitcast <8 x i16> %1 to <4 x i32> %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2) @@ -870,64 +983,71 @@ define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) { } define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) { -; CHECK-LABEL: @avx2_psra_w_var -; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a) -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_psra_w_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a) +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1) ret <16 x i16> %2 } define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) { -; CHECK-LABEL: @avx2_psra_d_var -; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a) -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_psra_d_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a) +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1) ret <8 x i32> %2 } define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) { -; CHECK-LABEL: @sse2_psrl_w_var -; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a) -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_psrl_w_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a) +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) { -; CHECK-LABEL: @sse2_psrl_d_var -; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a) -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_psrl_d_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) { -; CHECK-LABEL: @sse2_psrl_q_var -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @sse2_psrl_q_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) { -; CHECK-LABEL: @avx2_psrl_w_var -; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a) -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_psrl_w_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a) +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1) ret <16 x i16> %2 } define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) { -; CHECK-LABEL: @avx2_psrl_w_var_bc -; CHECK-NEXT: %1 = bitcast <16 x i8> %a to <8 x i16> -; CHECK-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1) -; CHECK-NEXT: ret <16 x i16> %2 +; CHECK-LABEL: @avx2_psrl_w_var_bc( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> %a to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <16 x i16> [[TMP2]] +; %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> %2 = bitcast <16 x i8> %1 to <8 x i16> %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2) @@ -935,19 +1055,21 @@ define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) { } define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) { -; CHECK-LABEL: @avx2_psrl_d_var -; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a) -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_psrl_d_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a) +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1) ret <8 x i32> %2 } define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) { -; CHECK-LABEL: @avx2_psrl_d_var_bc -; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <4 x i32> -; CHECK-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1) -; CHECK-NEXT: ret <8 x i32> %2 +; CHECK-LABEL: @avx2_psrl_d_var_bc( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = bitcast <2 x i64> %1 to <4 x i32> %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2) @@ -955,63 +1077,70 @@ define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) { } define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) { -; CHECK-LABEL: @avx2_psrl_q_var -; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a) -; CHECK-NEXT: ret <4 x i64> %1 +; CHECK-LABEL: @avx2_psrl_q_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a) +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1) ret <4 x i64> %2 } define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) { -; CHECK-LABEL: @sse2_psll_w_var -; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a) -; CHECK-NEXT: ret <8 x i16> %1 +; CHECK-LABEL: @sse2_psll_w_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a) +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) { -; CHECK-LABEL: @sse2_psll_d_var -; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a) -; CHECK-NEXT: ret <4 x i32> %1 +; CHECK-LABEL: @sse2_psll_d_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) { -; CHECK-LABEL: @sse2_psll_q_var -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @sse2_psll_q_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) { -; CHECK-LABEL: @avx2_psll_w_var -; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a) -; CHECK-NEXT: ret <16 x i16> %1 +; CHECK-LABEL: @avx2_psll_w_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a) +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1) ret <16 x i16> %2 } define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) { -; CHECK-LABEL: @avx2_psll_d_var -; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a) -; CHECK-NEXT: ret <8 x i32> %1 +; CHECK-LABEL: @avx2_psll_d_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a) +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1) ret <8 x i32> %2 } define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) { -; CHECK-LABEL: @avx2_psll_q_var -; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a) -; CHECK-NEXT: ret <4 x i64> %1 +; CHECK-LABEL: @avx2_psll_q_var( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a) +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1) ret <4 x i64> %2 @@ -1022,8 +1151,9 @@ define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) { ; define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) { -; CHECK-LABEL: @test_sse2_psra_w_0 -; CHECK-NEXT: ret <8 x i16> %A +; CHECK-LABEL: @test_sse2_psra_w_0( +; CHECK-NEXT: ret <8 x i16> %A +; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0) %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> ) %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0) @@ -1031,8 +1161,9 @@ define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) { } define <8 x i16> @test_sse2_psra_w_8() { -; CHECK-LABEL: @test_sse2_psra_w_8 -; CHECK-NEXT: ret <8 x i16> +; CHECK-LABEL: @test_sse2_psra_w_8( +; CHECK-NEXT: ret <8 x i16> +; %1 = bitcast <2 x i64> to <8 x i16> %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3) %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> ) @@ -1041,8 +1172,9 @@ define <8 x i16> @test_sse2_psra_w_8() { } define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) { -; CHECK-LABEL: @test_sse2_psra_d_0 -; CHECK-NEXT: ret <4 x i32> %A +; CHECK-LABEL: @test_sse2_psra_d_0( +; CHECK-NEXT: ret <4 x i32> %A +; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0) %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> ) %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0) @@ -1050,8 +1182,9 @@ define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) { } define <4 x i32> @sse2_psra_d_8() { -; CHECK-LABEL: @sse2_psra_d_8 -; CHECK-NEXT: ret <4 x i32> +; CHECK-LABEL: @sse2_psra_d_8( +; CHECK-NEXT: ret <4 x i32> +; %1 = bitcast <2 x i64> to <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3) %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> ) @@ -1060,8 +1193,9 @@ define <4 x i32> @sse2_psra_d_8() { } define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) { -; CHECK-LABEL: @test_avx2_psra_w_0 -; CHECK-NEXT: ret <16 x i16> %A +; CHECK-LABEL: @test_avx2_psra_w_0( +; CHECK-NEXT: ret <16 x i16> %A +; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0) %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> ) %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0) @@ -1069,8 +1203,9 @@ define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) { } define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) { -; CHECK-LABEL: @test_avx2_psra_w_8 -; CHECK-NEXT: ret <16 x i16> +; CHECK-LABEL: @test_avx2_psra_w_8( +; CHECK-NEXT: ret <16 x i16> +; %1 = bitcast <4 x i64> to <16 x i16> %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3) %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> ) @@ -1079,8 +1214,9 @@ define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) { } define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) { -; CHECK-LABEL: @test_avx2_psra_d_0 -; CHECK-NEXT: ret <8 x i32> %A +; CHECK-LABEL: @test_avx2_psra_d_0( +; CHECK-NEXT: ret <8 x i32> %A +; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0) %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> ) %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0) @@ -1088,8 +1224,9 @@ define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) { } define <8 x i32> @test_avx2_psra_d_8() { -; CHECK-LABEL: @test_avx2_psra_d_8 -; CHECK-NEXT: ret <8 x i32> +; CHECK-LABEL: @test_avx2_psra_d_8( +; CHECK-NEXT: ret <8 x i32> +; %1 = bitcast <4 x i64> to <8 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3) %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> ) @@ -1097,7 +1234,14 @@ define <8 x i32> @test_avx2_psra_d_8() { ret <8 x i32> %4 } +; +; Old Tests +; + define <2 x i64> @test_sse2_1() { +; CHECK-LABEL: @test_sse2_1( +; CHECK-NEXT: ret <2 x i64> +; %S = bitcast i32 1 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 @@ -1116,11 +1260,12 @@ define <2 x i64> @test_sse2_1() { %15 = bitcast <4 x i32> %14 to <2 x i64> %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S) ret <2 x i64> %16 -; CHECK: test_sse2_1 -; CHECK: ret <2 x i64> } define <4 x i64> @test_avx2_1() { +; CHECK-LABEL: @test_avx2_1( +; CHECK-NEXT: ret <4 x i64> +; %S = bitcast i32 1 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 @@ -1139,11 +1284,12 @@ define <4 x i64> @test_avx2_1() { %15 = bitcast <8 x i32> %14 to <4 x i64> %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S) ret <4 x i64> %16 -; CHECK: test_avx2_1 -; CHECK: ret <4 x i64> } define <2 x i64> @test_sse2_0() { +; CHECK-LABEL: @test_sse2_0( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; %S = bitcast i32 128 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 @@ -1162,11 +1308,12 @@ define <2 x i64> @test_sse2_0() { %15 = bitcast <4 x i32> %14 to <2 x i64> %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S) ret <2 x i64> %16 -; CHECK: test_sse2_0 -; CHECK: ret <2 x i64> zeroinitializer } define <4 x i64> @test_avx2_0() { +; CHECK-LABEL: @test_avx2_0( +; CHECK-NEXT: ret <4 x i64> zeroinitializer +; %S = bitcast i32 128 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 @@ -1185,10 +1332,11 @@ define <4 x i64> @test_avx2_0() { %15 = bitcast <8 x i32> %14 to <4 x i64> %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S) ret <4 x i64> %16 -; CHECK: test_avx2_0 -; CHECK: ret <4 x i64> zeroinitializer } define <2 x i64> @test_sse2_psrl_1() { +; CHECK-LABEL: @test_sse2_psrl_1( +; CHECK-NEXT: ret <2 x i64> +; %S = bitcast i32 1 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 @@ -1207,11 +1355,12 @@ define <2 x i64> @test_sse2_psrl_1() { %15 = bitcast <4 x i32> %14 to <2 x i64> %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S) ret <2 x i64> %16 -; CHECK: test_sse2_psrl_1 -; CHECK: ret <2 x i64> } define <4 x i64> @test_avx2_psrl_1() { +; CHECK-LABEL: @test_avx2_psrl_1( +; CHECK-NEXT: ret <4 x i64> +; %S = bitcast i32 1 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 @@ -1230,11 +1379,12 @@ define <4 x i64> @test_avx2_psrl_1() { %15 = bitcast <8 x i32> %14 to <4 x i64> %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S) ret <4 x i64> %16 -; CHECK: test_avx2_psrl_1 -; CHECK: ret <4 x i64> } define <2 x i64> @test_sse2_psrl_0() { +; CHECK-LABEL: @test_sse2_psrl_0( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; %S = bitcast i32 128 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 @@ -1253,11 +1403,12 @@ define <2 x i64> @test_sse2_psrl_0() { %15 = bitcast <4 x i32> %14 to <2 x i64> %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S) ret <2 x i64> %16 -; CHECK: test_sse2_psrl_0 -; CHECK: ret <2 x i64> zeroinitializer } define <4 x i64> @test_avx2_psrl_0() { +; CHECK-LABEL: @test_avx2_psrl_0( +; CHECK-NEXT: ret <4 x i64> zeroinitializer +; %S = bitcast i32 128 to i32 %1 = zext i32 %S to i64 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 @@ -1276,8 +1427,6 @@ define <4 x i64> @test_avx2_psrl_0() { %15 = bitcast <8 x i32> %14 to <4 x i64> %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S) ret <4 x i64> %16 -; CHECK: test_avx2_psrl_0 -; CHECK: ret <4 x i64> zeroinitializer } declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1