From 010c445fcf94eafc8f89753171159d3c61e4fa44 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 26 Sep 2020 15:49:19 +0100 Subject: [PATCH] [X86] Cleanup check-prefixes for vector-mul.ll tests Many x86/x64 SSE tests codegen are the same so avoid duplication --- llvm/test/CodeGen/X86/vector-mul.ll | 1363 ++++++++++++--------------- 1 file changed, 593 insertions(+), 770 deletions(-) diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll index e41f24125f36..20e6e05440d8 100644 --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -1,24 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s --check-prefix=X64-AVX --check-prefix=X64-XOP -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX --check-prefix=X64-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64-AVX --check-prefix=X64-AVX512DQ +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,X86-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,X64-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s --check-prefixes=X64-AVX,X64-XOP +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512DQ ; ; PowOf2 (uniform) ; define <2 x i64> @mul_v2i64_8(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_8: -; X86: # %bb.0: -; X86-NEXT: psllq $3, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: mul_v2i64_8: -; X64: # %bb.0: -; X64-NEXT: psllq $3, %xmm0 -; X64-NEXT: retq +; SSE-LABEL: mul_v2i64_8: +; SSE: # %bb.0: +; SSE-NEXT: psllq $3, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-AVX-LABEL: mul_v2i64_8: ; X64-AVX: # %bb.0: @@ -29,15 +24,10 @@ define <2 x i64> @mul_v2i64_8(<2 x i64> %a0) nounwind { } define <4 x i32> @mul_v4i32_8(<4 x i32> %a0) nounwind { -; X86-LABEL: mul_v4i32_8: -; X86: # %bb.0: -; X86-NEXT: pslld $3, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: mul_v4i32_8: -; X64: # %bb.0: -; X64-NEXT: pslld $3, %xmm0 -; X64-NEXT: retq +; SSE-LABEL: mul_v4i32_8: +; SSE: # %bb.0: +; SSE-NEXT: pslld $3, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-AVX-LABEL: mul_v4i32_8: ; X64-AVX: # %bb.0: @@ -48,15 +38,10 @@ define <4 x i32> @mul_v4i32_8(<4 x i32> %a0) nounwind { } define <8 x i16> @mul_v8i16_8(<8 x i16> %a0) nounwind { -; X86-LABEL: mul_v8i16_8: -; X86: # %bb.0: -; X86-NEXT: psllw $3, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: mul_v8i16_8: -; X64: # %bb.0: -; X64-NEXT: psllw $3, %xmm0 -; X64-NEXT: retq +; SSE-LABEL: mul_v8i16_8: +; SSE: # %bb.0: +; SSE-NEXT: psllw $3, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-AVX-LABEL: mul_v8i16_8: ; X64-AVX: # %bb.0: @@ -67,17 +52,17 @@ define <8 x i16> @mul_v8i16_8(<8 x i16> %a0) nounwind { } define <16 x i8> @mul_v16i8_32(<16 x i8> %a0) nounwind { -; X86-LABEL: mul_v16i8_32: -; X86: # %bb.0: -; X86-NEXT: psllw $5, %xmm0 -; X86-NEXT: pand {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v16i8_32: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: psllw $5, %xmm0 +; X86-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v16i8_32: -; X64: # %bb.0: -; X64-NEXT: psllw $5, %xmm0 -; X64-NEXT: pand {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v16i8_32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: psllw $5, %xmm0 +; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v16i8_32: ; X64-XOP: # %bb.0: @@ -104,21 +89,13 @@ define <16 x i8> @mul_v16i8_32(<16 x i8> %a0) nounwind { ; define <2 x i64> @mul_v2i64_32_8(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_32_8: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psllq $3, %xmm1 -; X86-NEXT: psllq $5, %xmm0 -; X86-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] -; X86-NEXT: retl -; -; X64-LABEL: mul_v2i64_32_8: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psllq $3, %xmm1 -; X64-NEXT: psllq $5, %xmm0 -; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] -; X64-NEXT: retq +; SSE-LABEL: mul_v2i64_32_8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psllq $3, %xmm1 +; SSE-NEXT: psllq $5, %xmm0 +; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v2i64_32_8: ; X64-XOP: # %bb.0: @@ -139,15 +116,15 @@ define <2 x i64> @mul_v2i64_32_8(<2 x i64> %a0) nounwind { } define <4 x i32> @mul_v4i32_1_2_4_8(<4 x i32> %a0) nounwind { -; X86-LABEL: mul_v4i32_1_2_4_8: -; X86: # %bb.0: -; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v4i32_1_2_4_8: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmulld {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v4i32_1_2_4_8: -; X64: # %bb.0: -; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v4i32_1_2_4_8: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmulld {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v4i32_1_2_4_8: ; X64-XOP: # %bb.0: @@ -168,15 +145,15 @@ define <4 x i32> @mul_v4i32_1_2_4_8(<4 x i32> %a0) nounwind { } define <8 x i16> @mul_v8i16_1_2_4_8_16_32_64_128(<8 x i16> %a0) nounwind { -; X86-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: -; X86: # %bb.0: -; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: -; X64: # %bb.0: -; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: ; X64-XOP: # %bb.0: @@ -197,33 +174,19 @@ define <8 x i16> @mul_v8i16_1_2_4_8_16_32_64_128(<8 x i16> %a0) nounwind { } define <16 x i8> @mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8(<16 x i8> %a0) nounwind { -; X86-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8: -; X86: # %bb.0: -; X86-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; X86-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X86-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,1,2,4,8] -; X86-NEXT: pmullw %xmm2, %xmm0 -; X86-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; X86-NEXT: pand %xmm3, %xmm0 -; X86-NEXT: pmullw %xmm2, %xmm1 -; X86-NEXT: pand %xmm3, %xmm1 -; X86-NEXT: packuswb %xmm0, %xmm1 -; X86-NEXT: movdqa %xmm1, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8: -; X64: # %bb.0: -; X64-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X64-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,1,2,4,8] -; X64-NEXT: pmullw %xmm2, %xmm0 -; X64-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; X64-NEXT: pand %xmm3, %xmm0 -; X64-NEXT: pmullw %xmm2, %xmm1 -; X64-NEXT: pand %xmm3, %xmm1 -; X64-NEXT: packuswb %xmm0, %xmm1 -; X64-NEXT: movdqa %xmm1, %xmm0 -; X64-NEXT: retq +; SSE-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8: +; SSE: # %bb.0: +; SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,1,2,4,8] +; SSE-NEXT: pmullw %xmm2, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] +; SSE-NEXT: pand %xmm3, %xmm0 +; SSE-NEXT: pmullw %xmm2, %xmm1 +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: packuswb %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8: ; X64-XOP: # %bb.0: @@ -256,21 +219,13 @@ define <16 x i8> @mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8(<16 x i8> %a0) nounw ; define <2 x i64> @mul_v2i64_17(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_17: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psllq $4, %xmm1 -; X86-NEXT: paddq %xmm0, %xmm1 -; X86-NEXT: movdqa %xmm1, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: mul_v2i64_17: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psllq $4, %xmm1 -; X64-NEXT: paddq %xmm0, %xmm1 -; X64-NEXT: movdqa %xmm1, %xmm0 -; X64-NEXT: retq +; SSE-LABEL: mul_v2i64_17: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psllq $4, %xmm1 +; SSE-NEXT: paddq %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v2i64_17: ; X64-XOP: # %bb.0: @@ -293,15 +248,15 @@ define <2 x i64> @mul_v2i64_17(<2 x i64> %a0) nounwind { } define <4 x i32> @mul_v4i32_17(<4 x i32> %a0) nounwind { -; X86-LABEL: mul_v4i32_17: -; X86: # %bb.0: -; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v4i32_17: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmulld {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v4i32_17: -; X64: # %bb.0: -; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v4i32_17: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmulld {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v4i32_17: ; X64-XOP: # %bb.0: @@ -323,15 +278,15 @@ define <4 x i32> @mul_v4i32_17(<4 x i32> %a0) nounwind { } define <8 x i16> @mul_v8i16_17(<8 x i16> %a0) nounwind { -; X86-LABEL: mul_v8i16_17: -; X86: # %bb.0: -; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v8i16_17: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v8i16_17: -; X64: # %bb.0: -; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v8i16_17: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_v8i16_17: ; X64-AVX: # %bb.0: @@ -342,23 +297,23 @@ define <8 x i16> @mul_v8i16_17(<8 x i16> %a0) nounwind { } define <16 x i8> @mul_v16i8_17(<16 x i8> %a0) nounwind { -; X86-LABEL: mul_v16i8_17: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psllw $4, %xmm1 -; X86-NEXT: pand {{\.LCPI.*}}, %xmm1 -; X86-NEXT: paddb %xmm0, %xmm1 -; X86-NEXT: movdqa %xmm1, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v16i8_17: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE-NEXT: psllw $4, %xmm1 +; X86-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 +; X86-SSE-NEXT: paddb %xmm0, %xmm1 +; X86-SSE-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v16i8_17: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psllw $4, %xmm1 -; X64-NEXT: pand {{.*}}(%rip), %xmm1 -; X64-NEXT: paddb %xmm0, %xmm1 -; X64-NEXT: movdqa %xmm1, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v16i8_17: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE-NEXT: psllw $4, %xmm1 +; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1 +; X64-SSE-NEXT: paddb %xmm0, %xmm1 +; X64-SSE-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v16i8_17: ; X64-XOP: # %bb.0: @@ -384,29 +339,17 @@ define <16 x i8> @mul_v16i8_17(<16 x i8> %a0) nounwind { } define <4 x i64> @mul_v4i64_17(<4 x i64> %a0) nounwind { -; X86-LABEL: mul_v4i64_17: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: psllq $4, %xmm2 -; X86-NEXT: paddq %xmm0, %xmm2 -; X86-NEXT: movdqa %xmm1, %xmm3 -; X86-NEXT: psllq $4, %xmm3 -; X86-NEXT: paddq %xmm1, %xmm3 -; X86-NEXT: movdqa %xmm2, %xmm0 -; X86-NEXT: movdqa %xmm3, %xmm1 -; X86-NEXT: retl -; -; X64-LABEL: mul_v4i64_17: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: psllq $4, %xmm2 -; X64-NEXT: paddq %xmm0, %xmm2 -; X64-NEXT: movdqa %xmm1, %xmm3 -; X64-NEXT: psllq $4, %xmm3 -; X64-NEXT: paddq %xmm1, %xmm3 -; X64-NEXT: movdqa %xmm2, %xmm0 -; X64-NEXT: movdqa %xmm3, %xmm1 -; X64-NEXT: retq +; SSE-LABEL: mul_v4i64_17: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm2 +; SSE-NEXT: psllq $4, %xmm2 +; SSE-NEXT: paddq %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm1, %xmm3 +; SSE-NEXT: psllq $4, %xmm3 +; SSE-NEXT: paddq %xmm1, %xmm3 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: movdqa %xmm3, %xmm1 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v4i64_17: ; X64-XOP: # %bb.0: @@ -433,19 +376,12 @@ define <4 x i64> @mul_v4i64_17(<4 x i64> %a0) nounwind { } define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind { -; X86-LABEL: mul_v8i32_17: -; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17] -; X86-NEXT: pmulld %xmm2, %xmm0 -; X86-NEXT: pmulld %xmm2, %xmm1 -; X86-NEXT: retl -; -; X64-LABEL: mul_v8i32_17: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17] -; X64-NEXT: pmulld %xmm2, %xmm0 -; X64-NEXT: pmulld %xmm2, %xmm1 -; X64-NEXT: retq +; SSE-LABEL: mul_v8i32_17: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17] +; SSE-NEXT: pmulld %xmm2, %xmm0 +; SSE-NEXT: pmulld %xmm2, %xmm1 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v8i32_17: ; X64-XOP: # %bb.0: @@ -472,19 +408,12 @@ define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind { } define <16 x i16> @mul_v16i16_17(<16 x i16> %a0) nounwind { -; X86-LABEL: mul_v16i16_17: -; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17] -; X86-NEXT: pmullw %xmm2, %xmm0 -; X86-NEXT: pmullw %xmm2, %xmm1 -; X86-NEXT: retl -; -; X64-LABEL: mul_v16i16_17: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17] -; X64-NEXT: pmullw %xmm2, %xmm0 -; X64-NEXT: pmullw %xmm2, %xmm1 -; X64-NEXT: retq +; SSE-LABEL: mul_v16i16_17: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17] +; SSE-NEXT: pmullw %xmm2, %xmm0 +; SSE-NEXT: pmullw %xmm2, %xmm1 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v16i16_17: ; X64-XOP: # %bb.0: @@ -510,35 +439,20 @@ define <16 x i16> @mul_v16i16_17(<16 x i16> %a0) nounwind { } define <32 x i8> @mul_v32i8_17(<32 x i8> %a0) nounwind { -; X86-LABEL: mul_v32i8_17: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: psllw $4, %xmm2 -; X86-NEXT: movdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; X86-NEXT: pand %xmm4, %xmm2 -; X86-NEXT: paddb %xmm0, %xmm2 -; X86-NEXT: movdqa %xmm1, %xmm3 -; X86-NEXT: psllw $4, %xmm3 -; X86-NEXT: pand %xmm4, %xmm3 -; X86-NEXT: paddb %xmm1, %xmm3 -; X86-NEXT: movdqa %xmm2, %xmm0 -; X86-NEXT: movdqa %xmm3, %xmm1 -; X86-NEXT: retl -; -; X64-LABEL: mul_v32i8_17: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: psllw $4, %xmm2 -; X64-NEXT: movdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; X64-NEXT: pand %xmm4, %xmm2 -; X64-NEXT: paddb %xmm0, %xmm2 -; X64-NEXT: movdqa %xmm1, %xmm3 -; X64-NEXT: psllw $4, %xmm3 -; X64-NEXT: pand %xmm4, %xmm3 -; X64-NEXT: paddb %xmm1, %xmm3 -; X64-NEXT: movdqa %xmm2, %xmm0 -; X64-NEXT: movdqa %xmm3, %xmm1 -; X64-NEXT: retq +; SSE-LABEL: mul_v32i8_17: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm2 +; SSE-NEXT: psllw $4, %xmm2 +; SSE-NEXT: movdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; SSE-NEXT: pand %xmm4, %xmm2 +; SSE-NEXT: paddb %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm1, %xmm3 +; SSE-NEXT: psllw $4, %xmm3 +; SSE-NEXT: pand %xmm4, %xmm3 +; SSE-NEXT: paddb %xmm1, %xmm3 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: movdqa %xmm3, %xmm1 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v32i8_17: ; X64-XOP: # %bb.0: @@ -573,23 +487,14 @@ define <32 x i8> @mul_v32i8_17(<32 x i8> %a0) nounwind { ; define <2 x i64> @mul_v2i64_neg1025(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_neg1025: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psllq $10, %xmm1 -; X86-NEXT: paddq %xmm0, %xmm1 -; X86-NEXT: pxor %xmm0, %xmm0 -; X86-NEXT: psubq %xmm1, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: mul_v2i64_neg1025: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psllq $10, %xmm1 -; X64-NEXT: paddq %xmm0, %xmm1 -; X64-NEXT: pxor %xmm0, %xmm0 -; X64-NEXT: psubq %xmm1, %xmm0 -; X64-NEXT: retq +; SSE-LABEL: mul_v2i64_neg1025: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psllq $10, %xmm1 +; SSE-NEXT: paddq %xmm0, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: psubq %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v2i64_neg1025: ; X64-XOP: # %bb.0: @@ -616,15 +521,15 @@ define <2 x i64> @mul_v2i64_neg1025(<2 x i64> %a0) nounwind { } define <4 x i32> @mul_v4i32_neg33(<4 x i32> %a0) nounwind { -; X86-LABEL: mul_v4i32_neg33: -; X86: # %bb.0: -; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v4i32_neg33: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmulld {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v4i32_neg33: -; X64: # %bb.0: -; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v4i32_neg33: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmulld {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v4i32_neg33: ; X64-XOP: # %bb.0: @@ -646,15 +551,15 @@ define <4 x i32> @mul_v4i32_neg33(<4 x i32> %a0) nounwind { } define <8 x i16> @mul_v8i16_neg9(<8 x i16> %a0) nounwind { -; X86-LABEL: mul_v8i16_neg9: -; X86: # %bb.0: -; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v8i16_neg9: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v8i16_neg9: -; X64: # %bb.0: -; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v8i16_neg9: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_v8i16_neg9: ; X64-AVX: # %bb.0: @@ -665,25 +570,25 @@ define <8 x i16> @mul_v8i16_neg9(<8 x i16> %a0) nounwind { } define <16 x i8> @mul_v16i8_neg5(<16 x i8> %a0) nounwind { -; X86-LABEL: mul_v16i8_neg5: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psllw $2, %xmm1 -; X86-NEXT: pand {{\.LCPI.*}}, %xmm1 -; X86-NEXT: paddb %xmm0, %xmm1 -; X86-NEXT: pxor %xmm0, %xmm0 -; X86-NEXT: psubb %xmm1, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v16i8_neg5: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE-NEXT: psllw $2, %xmm1 +; X86-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 +; X86-SSE-NEXT: paddb %xmm0, %xmm1 +; X86-SSE-NEXT: pxor %xmm0, %xmm0 +; X86-SSE-NEXT: psubb %xmm1, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v16i8_neg5: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psllw $2, %xmm1 -; X64-NEXT: pand {{.*}}(%rip), %xmm1 -; X64-NEXT: paddb %xmm0, %xmm1 -; X64-NEXT: pxor %xmm0, %xmm0 -; X64-NEXT: psubb %xmm1, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v16i8_neg5: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE-NEXT: psllw $2, %xmm1 +; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1 +; X64-SSE-NEXT: paddb %xmm0, %xmm1 +; X64-SSE-NEXT: pxor %xmm0, %xmm0 +; X64-SSE-NEXT: psubb %xmm1, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v16i8_neg5: ; X64-XOP: # %bb.0: @@ -715,35 +620,20 @@ define <16 x i8> @mul_v16i8_neg5(<16 x i8> %a0) nounwind { } define <4 x i64> @mul_v4i64_neg1025(<4 x i64> %a0) nounwind { -; X86-LABEL: mul_v4i64_neg1025: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm3 -; X86-NEXT: psllq $10, %xmm3 -; X86-NEXT: paddq %xmm0, %xmm3 -; X86-NEXT: pxor %xmm2, %xmm2 -; X86-NEXT: pxor %xmm0, %xmm0 -; X86-NEXT: psubq %xmm3, %xmm0 -; X86-NEXT: movdqa %xmm1, %xmm3 -; X86-NEXT: psllq $10, %xmm3 -; X86-NEXT: paddq %xmm1, %xmm3 -; X86-NEXT: psubq %xmm3, %xmm2 -; X86-NEXT: movdqa %xmm2, %xmm1 -; X86-NEXT: retl -; -; X64-LABEL: mul_v4i64_neg1025: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm3 -; X64-NEXT: psllq $10, %xmm3 -; X64-NEXT: paddq %xmm0, %xmm3 -; X64-NEXT: pxor %xmm2, %xmm2 -; X64-NEXT: pxor %xmm0, %xmm0 -; X64-NEXT: psubq %xmm3, %xmm0 -; X64-NEXT: movdqa %xmm1, %xmm3 -; X64-NEXT: psllq $10, %xmm3 -; X64-NEXT: paddq %xmm1, %xmm3 -; X64-NEXT: psubq %xmm3, %xmm2 -; X64-NEXT: movdqa %xmm2, %xmm1 -; X64-NEXT: retq +; SSE-LABEL: mul_v4i64_neg1025: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm3 +; SSE-NEXT: psllq $10, %xmm3 +; SSE-NEXT: paddq %xmm0, %xmm3 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: psubq %xmm3, %xmm0 +; SSE-NEXT: movdqa %xmm1, %xmm3 +; SSE-NEXT: psllq $10, %xmm3 +; SSE-NEXT: paddq %xmm1, %xmm3 +; SSE-NEXT: psubq %xmm3, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm1 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v4i64_neg1025: ; X64-XOP: # %bb.0: @@ -775,19 +665,12 @@ define <4 x i64> @mul_v4i64_neg1025(<4 x i64> %a0) nounwind { } define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind { -; X86-LABEL: mul_v8i32_neg33: -; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263] -; X86-NEXT: pmulld %xmm2, %xmm0 -; X86-NEXT: pmulld %xmm2, %xmm1 -; X86-NEXT: retl -; -; X64-LABEL: mul_v8i32_neg33: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263] -; X64-NEXT: pmulld %xmm2, %xmm0 -; X64-NEXT: pmulld %xmm2, %xmm1 -; X64-NEXT: retq +; SSE-LABEL: mul_v8i32_neg33: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263] +; SSE-NEXT: pmulld %xmm2, %xmm0 +; SSE-NEXT: pmulld %xmm2, %xmm1 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v8i32_neg33: ; X64-XOP: # %bb.0: @@ -817,19 +700,12 @@ define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind { } define <16 x i16> @mul_v16i16_neg9(<16 x i16> %a0) nounwind { -; X86-LABEL: mul_v16i16_neg9: -; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527] -; X86-NEXT: pmullw %xmm2, %xmm0 -; X86-NEXT: pmullw %xmm2, %xmm1 -; X86-NEXT: retl -; -; X64-LABEL: mul_v16i16_neg9: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527] -; X64-NEXT: pmullw %xmm2, %xmm0 -; X64-NEXT: pmullw %xmm2, %xmm1 -; X64-NEXT: retq +; SSE-LABEL: mul_v16i16_neg9: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527] +; SSE-NEXT: pmullw %xmm2, %xmm0 +; SSE-NEXT: pmullw %xmm2, %xmm1 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v16i16_neg9: ; X64-XOP: # %bb.0: @@ -858,41 +734,23 @@ define <16 x i16> @mul_v16i16_neg9(<16 x i16> %a0) nounwind { } define <32 x i8> @mul_v32i8_neg5(<32 x i8> %a0) nounwind { -; X86-LABEL: mul_v32i8_neg5: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm3 -; X86-NEXT: psllw $2, %xmm3 -; X86-NEXT: movdqa {{.*#+}} xmm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] -; X86-NEXT: pand %xmm4, %xmm3 -; X86-NEXT: paddb %xmm0, %xmm3 -; X86-NEXT: pxor %xmm2, %xmm2 -; X86-NEXT: pxor %xmm0, %xmm0 -; X86-NEXT: psubb %xmm3, %xmm0 -; X86-NEXT: movdqa %xmm1, %xmm3 -; X86-NEXT: psllw $2, %xmm3 -; X86-NEXT: pand %xmm4, %xmm3 -; X86-NEXT: paddb %xmm1, %xmm3 -; X86-NEXT: psubb %xmm3, %xmm2 -; X86-NEXT: movdqa %xmm2, %xmm1 -; X86-NEXT: retl -; -; X64-LABEL: mul_v32i8_neg5: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm3 -; X64-NEXT: psllw $2, %xmm3 -; X64-NEXT: movdqa {{.*#+}} xmm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] -; X64-NEXT: pand %xmm4, %xmm3 -; X64-NEXT: paddb %xmm0, %xmm3 -; X64-NEXT: pxor %xmm2, %xmm2 -; X64-NEXT: pxor %xmm0, %xmm0 -; X64-NEXT: psubb %xmm3, %xmm0 -; X64-NEXT: movdqa %xmm1, %xmm3 -; X64-NEXT: psllw $2, %xmm3 -; X64-NEXT: pand %xmm4, %xmm3 -; X64-NEXT: paddb %xmm1, %xmm3 -; X64-NEXT: psubb %xmm3, %xmm2 -; X64-NEXT: movdqa %xmm2, %xmm1 -; X64-NEXT: retq +; SSE-LABEL: mul_v32i8_neg5: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm3 +; SSE-NEXT: psllw $2, %xmm3 +; SSE-NEXT: movdqa {{.*#+}} xmm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] +; SSE-NEXT: pand %xmm4, %xmm3 +; SSE-NEXT: paddb %xmm0, %xmm3 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: psubb %xmm3, %xmm0 +; SSE-NEXT: movdqa %xmm1, %xmm3 +; SSE-NEXT: psllw $2, %xmm3 +; SSE-NEXT: pand %xmm4, %xmm3 +; SSE-NEXT: paddb %xmm1, %xmm3 +; SSE-NEXT: psubb %xmm3, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm1 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v32i8_neg5: ; X64-XOP: # %bb.0: @@ -934,27 +792,27 @@ define <32 x i8> @mul_v32i8_neg5(<32 x i8> %a0) nounwind { ; define <2 x i64> @mul_v2i64_17_65(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_17_65: -; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm1 = [17,0,65,0] -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: pmuludq %xmm1, %xmm2 -; X86-NEXT: psrlq $32, %xmm0 -; X86-NEXT: pmuludq %xmm1, %xmm0 -; X86-NEXT: psllq $32, %xmm0 -; X86-NEXT: paddq %xmm2, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v2i64_17_65: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [17,0,65,0] +; X86-SSE-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X86-SSE-NEXT: psrlq $32, %xmm0 +; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X86-SSE-NEXT: psllq $32, %xmm0 +; X86-SSE-NEXT: paddq %xmm2, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v2i64_17_65: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm1 = [17,65] -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: pmuludq %xmm1, %xmm2 -; X64-NEXT: psrlq $32, %xmm0 -; X64-NEXT: pmuludq %xmm1, %xmm0 -; X64-NEXT: psllq $32, %xmm0 -; X64-NEXT: paddq %xmm2, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v2i64_17_65: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [17,65] +; X64-SSE-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X64-SSE-NEXT: psrlq $32, %xmm0 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X64-SSE-NEXT: psllq $32, %xmm0 +; X64-SSE-NEXT: paddq %xmm2, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v2i64_17_65: ; X64-XOP: # %bb.0: @@ -985,15 +843,15 @@ define <2 x i64> @mul_v2i64_17_65(<2 x i64> %a0) nounwind { } define <4 x i32> @mul_v4i32_5_17_33_65(<4 x i32> %a0) nounwind { -; X86-LABEL: mul_v4i32_5_17_33_65: -; X86: # %bb.0: -; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v4i32_5_17_33_65: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmulld {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v4i32_5_17_33_65: -; X64: # %bb.0: -; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v4i32_5_17_33_65: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmulld {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_v4i32_5_17_33_65: ; X64-AVX: # %bb.0: @@ -1004,15 +862,15 @@ define <4 x i32> @mul_v4i32_5_17_33_65(<4 x i32> %a0) nounwind { } define <8 x i16> @mul_v8i16_2_3_9_17_33_65_129_257(<8 x i16> %a0) nounwind { -; X86-LABEL: mul_v8i16_2_3_9_17_33_65_129_257: -; X86: # %bb.0: -; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v8i16_2_3_9_17_33_65_129_257: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v8i16_2_3_9_17_33_65_129_257: -; X64: # %bb.0: -; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v8i16_2_3_9_17_33_65_129_257: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_v8i16_2_3_9_17_33_65_129_257: ; X64-AVX: # %bb.0: @@ -1023,31 +881,31 @@ define <8 x i16> @mul_v8i16_2_3_9_17_33_65_129_257(<8 x i16> %a0) nounwind { } define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8> %a0) nounwind { -; X86-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: -; X86: # %bb.0: -; X86-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; X86-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 -; X86-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; X86-NEXT: pand %xmm2, %xmm0 -; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm1 -; X86-NEXT: pand %xmm2, %xmm1 -; X86-NEXT: packuswb %xmm0, %xmm1 -; X86-NEXT: movdqa %xmm1, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; X86-SSE-NEXT: pand %xmm2, %xmm0 +; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm1 +; X86-SSE-NEXT: pand %xmm2, %xmm1 +; X86-SSE-NEXT: packuswb %xmm0, %xmm1 +; X86-SSE-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: -; X64: # %bb.0: -; X64-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 -; X64-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; X64-NEXT: pand %xmm2, %xmm0 -; X64-NEXT: pmullw {{.*}}(%rip), %xmm1 -; X64-NEXT: pand %xmm2, %xmm1 -; X64-NEXT: packuswb %xmm0, %xmm1 -; X64-NEXT: movdqa %xmm1, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; X64-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; X64-SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; X64-SSE-NEXT: pand %xmm2, %xmm0 +; X64-SSE-NEXT: pmullw {{.*}}(%rip), %xmm1 +; X64-SSE-NEXT: pand %xmm2, %xmm1 +; X64-SSE-NEXT: packuswb %xmm0, %xmm1 +; X64-SSE-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: ; X64-XOP: # %bb.0: @@ -1085,21 +943,13 @@ define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8> ; define <2 x i64> @mul_v2i64_7(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_7: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psllq $3, %xmm1 -; X86-NEXT: psubq %xmm0, %xmm1 -; X86-NEXT: movdqa %xmm1, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: mul_v2i64_7: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psllq $3, %xmm1 -; X64-NEXT: psubq %xmm0, %xmm1 -; X64-NEXT: movdqa %xmm1, %xmm0 -; X64-NEXT: retq +; SSE-LABEL: mul_v2i64_7: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psllq $3, %xmm1 +; SSE-NEXT: psubq %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v2i64_7: ; X64-XOP: # %bb.0: @@ -1122,15 +972,15 @@ define <2 x i64> @mul_v2i64_7(<2 x i64> %a0) nounwind { } define <4 x i32> @mul_v4i32_7(<4 x i32> %a0) nounwind { -; X86-LABEL: mul_v4i32_7: -; X86: # %bb.0: -; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v4i32_7: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmulld {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v4i32_7: -; X64: # %bb.0: -; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v4i32_7: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmulld {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v4i32_7: ; X64-XOP: # %bb.0: @@ -1152,15 +1002,15 @@ define <4 x i32> @mul_v4i32_7(<4 x i32> %a0) nounwind { } define <8 x i16> @mul_v8i16_7(<8 x i16> %a0) nounwind { -; X86-LABEL: mul_v8i16_7: -; X86: # %bb.0: -; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v8i16_7: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v8i16_7: -; X64: # %bb.0: -; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v8i16_7: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_v8i16_7: ; X64-AVX: # %bb.0: @@ -1171,23 +1021,23 @@ define <8 x i16> @mul_v8i16_7(<8 x i16> %a0) nounwind { } define <16 x i8> @mul_v16i8_31(<16 x i8> %a0) nounwind { -; X86-LABEL: mul_v16i8_31: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psllw $5, %xmm1 -; X86-NEXT: pand {{\.LCPI.*}}, %xmm1 -; X86-NEXT: psubb %xmm0, %xmm1 -; X86-NEXT: movdqa %xmm1, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v16i8_31: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE-NEXT: psllw $5, %xmm1 +; X86-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 +; X86-SSE-NEXT: psubb %xmm0, %xmm1 +; X86-SSE-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v16i8_31: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psllw $5, %xmm1 -; X64-NEXT: pand {{.*}}(%rip), %xmm1 -; X64-NEXT: psubb %xmm0, %xmm1 -; X64-NEXT: movdqa %xmm1, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v16i8_31: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE-NEXT: psllw $5, %xmm1 +; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1 +; X64-SSE-NEXT: psubb %xmm0, %xmm1 +; X64-SSE-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v16i8_31: ; X64-XOP: # %bb.0: @@ -1217,19 +1067,12 @@ define <16 x i8> @mul_v16i8_31(<16 x i8> %a0) nounwind { ; define <2 x i64> @mul_v2i64_neg7(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_neg7: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psllq $3, %xmm1 -; X86-NEXT: psubq %xmm1, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: mul_v2i64_neg7: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psllq $3, %xmm1 -; X64-NEXT: psubq %xmm1, %xmm0 -; X64-NEXT: retq +; SSE-LABEL: mul_v2i64_neg7: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psllq $3, %xmm1 +; SSE-NEXT: psubq %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v2i64_neg7: ; X64-XOP: # %bb.0: @@ -1252,15 +1095,15 @@ define <2 x i64> @mul_v2i64_neg7(<2 x i64> %a0) nounwind { } define <4 x i32> @mul_v4i32_neg63(<4 x i32> %a0) nounwind { -; X86-LABEL: mul_v4i32_neg63: -; X86: # %bb.0: -; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v4i32_neg63: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmulld {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v4i32_neg63: -; X64: # %bb.0: -; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v4i32_neg63: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmulld {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v4i32_neg63: ; X64-XOP: # %bb.0: @@ -1282,15 +1125,15 @@ define <4 x i32> @mul_v4i32_neg63(<4 x i32> %a0) nounwind { } define <8 x i16> @mul_v8i16_neg31(<8 x i16> %a0) nounwind { -; X86-LABEL: mul_v8i16_neg31: -; X86: # %bb.0: -; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v8i16_neg31: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v8i16_neg31: -; X64: # %bb.0: -; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v8i16_neg31: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_v8i16_neg31: ; X64-AVX: # %bb.0: @@ -1301,21 +1144,21 @@ define <8 x i16> @mul_v8i16_neg31(<8 x i16> %a0) nounwind { } define <16 x i8> @mul_v16i8_neg15(<16 x i8> %a0) nounwind { -; X86-LABEL: mul_v16i8_neg15: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psllw $4, %xmm1 -; X86-NEXT: pand {{\.LCPI.*}}, %xmm1 -; X86-NEXT: psubb %xmm1, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v16i8_neg15: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE-NEXT: psllw $4, %xmm1 +; X86-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 +; X86-SSE-NEXT: psubb %xmm1, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v16i8_neg15: -; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: psllw $4, %xmm1 -; X64-NEXT: pand {{.*}}(%rip), %xmm1 -; X64-NEXT: psubb %xmm1, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v16i8_neg15: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE-NEXT: psllw $4, %xmm1 +; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1 +; X64-SSE-NEXT: psubb %xmm1, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v16i8_neg15: ; X64-XOP: # %bb.0: @@ -1345,27 +1188,27 @@ define <16 x i8> @mul_v16i8_neg15(<16 x i8> %a0) nounwind { ; define <2 x i64> @mul_v2i64_15_63(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_15_63: -; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm1 = [15,0,63,0] -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: pmuludq %xmm1, %xmm2 -; X86-NEXT: psrlq $32, %xmm0 -; X86-NEXT: pmuludq %xmm1, %xmm0 -; X86-NEXT: psllq $32, %xmm0 -; X86-NEXT: paddq %xmm2, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v2i64_15_63: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [15,0,63,0] +; X86-SSE-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X86-SSE-NEXT: psrlq $32, %xmm0 +; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X86-SSE-NEXT: psllq $32, %xmm0 +; X86-SSE-NEXT: paddq %xmm2, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v2i64_15_63: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm1 = [15,63] -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: pmuludq %xmm1, %xmm2 -; X64-NEXT: psrlq $32, %xmm0 -; X64-NEXT: pmuludq %xmm1, %xmm0 -; X64-NEXT: psllq $32, %xmm0 -; X64-NEXT: paddq %xmm2, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v2i64_15_63: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [15,63] +; X64-SSE-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X64-SSE-NEXT: psrlq $32, %xmm0 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X64-SSE-NEXT: psllq $32, %xmm0 +; X64-SSE-NEXT: paddq %xmm2, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v2i64_15_63: ; X64-XOP: # %bb.0: @@ -1396,33 +1239,33 @@ define <2 x i64> @mul_v2i64_15_63(<2 x i64> %a0) nounwind { } define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_neg_15_63: -; X86: # %bb.0: -; X86-NEXT: pcmpeqd %xmm1, %xmm1 -; X86-NEXT: pmuludq %xmm0, %xmm1 -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: psrlq $32, %xmm2 -; X86-NEXT: movdqa {{.*#+}} xmm3 = [4294967281,4294967295,4294967233,4294967295] -; X86-NEXT: pmuludq %xmm3, %xmm2 -; X86-NEXT: paddq %xmm1, %xmm2 -; X86-NEXT: psllq $32, %xmm2 -; X86-NEXT: pmuludq %xmm3, %xmm0 -; X86-NEXT: paddq %xmm2, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v2i64_neg_15_63: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; X86-SSE-NEXT: pmuludq %xmm0, %xmm1 +; X86-SSE-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE-NEXT: psrlq $32, %xmm2 +; X86-SSE-NEXT: movdqa {{.*#+}} xmm3 = [4294967281,4294967295,4294967233,4294967295] +; X86-SSE-NEXT: pmuludq %xmm3, %xmm2 +; X86-SSE-NEXT: paddq %xmm1, %xmm2 +; X86-SSE-NEXT: psllq $32, %xmm2 +; X86-SSE-NEXT: pmuludq %xmm3, %xmm0 +; X86-SSE-NEXT: paddq %xmm2, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v2i64_neg_15_63: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551601,18446744073709551553] -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: pmuludq %xmm1, %xmm2 -; X64-NEXT: movdqa %xmm0, %xmm3 -; X64-NEXT: psrlq $32, %xmm3 -; X64-NEXT: pmuludq %xmm1, %xmm3 -; X64-NEXT: pmuludq {{.*}}(%rip), %xmm0 -; X64-NEXT: paddq %xmm3, %xmm0 -; X64-NEXT: psllq $32, %xmm0 -; X64-NEXT: paddq %xmm2, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v2i64_neg_15_63: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551601,18446744073709551553] +; X64-SSE-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X64-SSE-NEXT: movdqa %xmm0, %xmm3 +; X64-SSE-NEXT: psrlq $32, %xmm3 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm3 +; X64-SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: paddq %xmm3, %xmm0 +; X64-SSE-NEXT: psllq $32, %xmm0 +; X64-SSE-NEXT: paddq %xmm2, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v2i64_neg_15_63: ; X64-XOP: # %bb.0: @@ -1457,33 +1300,33 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind { } define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_neg_17_65: -; X86: # %bb.0: -; X86-NEXT: pcmpeqd %xmm1, %xmm1 -; X86-NEXT: pmuludq %xmm0, %xmm1 -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: psrlq $32, %xmm2 -; X86-NEXT: movdqa {{.*#+}} xmm3 = [4294967279,4294967295,4294967231,4294967295] -; X86-NEXT: pmuludq %xmm3, %xmm2 -; X86-NEXT: paddq %xmm1, %xmm2 -; X86-NEXT: psllq $32, %xmm2 -; X86-NEXT: pmuludq %xmm3, %xmm0 -; X86-NEXT: paddq %xmm2, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v2i64_neg_17_65: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; X86-SSE-NEXT: pmuludq %xmm0, %xmm1 +; X86-SSE-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE-NEXT: psrlq $32, %xmm2 +; X86-SSE-NEXT: movdqa {{.*#+}} xmm3 = [4294967279,4294967295,4294967231,4294967295] +; X86-SSE-NEXT: pmuludq %xmm3, %xmm2 +; X86-SSE-NEXT: paddq %xmm1, %xmm2 +; X86-SSE-NEXT: psllq $32, %xmm2 +; X86-SSE-NEXT: pmuludq %xmm3, %xmm0 +; X86-SSE-NEXT: paddq %xmm2, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v2i64_neg_17_65: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551599,18446744073709551551] -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: pmuludq %xmm1, %xmm2 -; X64-NEXT: movdqa %xmm0, %xmm3 -; X64-NEXT: psrlq $32, %xmm3 -; X64-NEXT: pmuludq %xmm1, %xmm3 -; X64-NEXT: pmuludq {{.*}}(%rip), %xmm0 -; X64-NEXT: paddq %xmm3, %xmm0 -; X64-NEXT: psllq $32, %xmm0 -; X64-NEXT: paddq %xmm2, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v2i64_neg_17_65: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551599,18446744073709551551] +; X64-SSE-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X64-SSE-NEXT: movdqa %xmm0, %xmm3 +; X64-SSE-NEXT: psrlq $32, %xmm3 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm3 +; X64-SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: paddq %xmm3, %xmm0 +; X64-SSE-NEXT: psllq $32, %xmm0 +; X64-SSE-NEXT: paddq %xmm2, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v2i64_neg_17_65: ; X64-XOP: # %bb.0: @@ -1518,17 +1361,11 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind { } define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_0_1: -; X86: # %bb.0: -; X86-NEXT: xorps %xmm1, %xmm1 -; X86-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; X86-NEXT: retl -; -; X64-LABEL: mul_v2i64_0_1: -; X64: # %bb.0: -; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; X64-NEXT: retq +; SSE-LABEL: mul_v2i64_0_1: +; SSE: # %bb.0: +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; SSE-NEXT: ret{{[l|q]}} ; ; X64-AVX-LABEL: mul_v2i64_0_1: ; X64-AVX: # %bb.0: @@ -1540,34 +1377,34 @@ define <2 x i64> @mul_v2i64_0_1(<2 x i64> %a0) nounwind { } define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_neg_0_1: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psrlq $32, %xmm1 -; X86-NEXT: movdqa {{.*#+}} xmm2 = [0,0,4294967295,4294967295] -; X86-NEXT: pmuludq %xmm2, %xmm1 -; X86-NEXT: movdqa %xmm2, %xmm3 -; X86-NEXT: psrlq $32, %xmm3 -; X86-NEXT: pmuludq %xmm0, %xmm3 -; X86-NEXT: paddq %xmm1, %xmm3 -; X86-NEXT: psllq $32, %xmm3 -; X86-NEXT: pmuludq %xmm2, %xmm0 -; X86-NEXT: paddq %xmm3, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v2i64_neg_0_1: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE-NEXT: psrlq $32, %xmm1 +; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,0,4294967295,4294967295] +; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 +; X86-SSE-NEXT: movdqa %xmm2, %xmm3 +; X86-SSE-NEXT: psrlq $32, %xmm3 +; X86-SSE-NEXT: pmuludq %xmm0, %xmm3 +; X86-SSE-NEXT: paddq %xmm1, %xmm3 +; X86-SSE-NEXT: psllq $32, %xmm3 +; X86-SSE-NEXT: pmuludq %xmm2, %xmm0 +; X86-SSE-NEXT: paddq %xmm3, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v2i64_neg_0_1: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: pmuludq %xmm1, %xmm2 -; X64-NEXT: movdqa %xmm0, %xmm3 -; X64-NEXT: psrlq $32, %xmm3 -; X64-NEXT: pmuludq %xmm1, %xmm3 -; X64-NEXT: pmuludq {{.*}}(%rip), %xmm0 -; X64-NEXT: paddq %xmm3, %xmm0 -; X64-NEXT: psllq $32, %xmm0 -; X64-NEXT: paddq %xmm2, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v2i64_neg_0_1: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] +; X64-SSE-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X64-SSE-NEXT: movdqa %xmm0, %xmm3 +; X64-SSE-NEXT: psrlq $32, %xmm3 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm3 +; X64-SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: paddq %xmm3, %xmm0 +; X64-SSE-NEXT: psllq $32, %xmm0 +; X64-SSE-NEXT: paddq %xmm2, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v2i64_neg_0_1: ; X64-XOP: # %bb.0: @@ -1602,34 +1439,34 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind { } define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { -; X86-LABEL: mul_v2i64_15_neg_63: -; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: psrlq $32, %xmm1 -; X86-NEXT: movdqa {{.*#+}} xmm2 = [15,0,4294967233,4294967295] -; X86-NEXT: pmuludq %xmm2, %xmm1 -; X86-NEXT: movdqa %xmm2, %xmm3 -; X86-NEXT: psrlq $32, %xmm3 -; X86-NEXT: pmuludq %xmm0, %xmm3 -; X86-NEXT: paddq %xmm1, %xmm3 -; X86-NEXT: psllq $32, %xmm3 -; X86-NEXT: pmuludq %xmm2, %xmm0 -; X86-NEXT: paddq %xmm3, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v2i64_15_neg_63: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE-NEXT: psrlq $32, %xmm1 +; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [15,0,4294967233,4294967295] +; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 +; X86-SSE-NEXT: movdqa %xmm2, %xmm3 +; X86-SSE-NEXT: psrlq $32, %xmm3 +; X86-SSE-NEXT: pmuludq %xmm0, %xmm3 +; X86-SSE-NEXT: paddq %xmm1, %xmm3 +; X86-SSE-NEXT: psllq $32, %xmm3 +; X86-SSE-NEXT: pmuludq %xmm2, %xmm0 +; X86-SSE-NEXT: paddq %xmm3, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v2i64_15_neg_63: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm1 = [15,18446744073709551553] -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: pmuludq %xmm1, %xmm2 -; X64-NEXT: movdqa %xmm0, %xmm3 -; X64-NEXT: psrlq $32, %xmm3 -; X64-NEXT: pmuludq %xmm1, %xmm3 -; X64-NEXT: pmuludq {{.*}}(%rip), %xmm0 -; X64-NEXT: paddq %xmm3, %xmm0 -; X64-NEXT: psllq $32, %xmm0 -; X64-NEXT: paddq %xmm2, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v2i64_15_neg_63: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [15,18446744073709551553] +; X64-SSE-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X64-SSE-NEXT: movdqa %xmm0, %xmm3 +; X64-SSE-NEXT: psrlq $32, %xmm3 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm3 +; X64-SSE-NEXT: pmuludq {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: paddq %xmm3, %xmm0 +; X64-SSE-NEXT: psllq $32, %xmm0 +; X64-SSE-NEXT: paddq %xmm2, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v2i64_15_neg_63: ; X64-XOP: # %bb.0: @@ -1664,15 +1501,15 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind { } define <4 x i32> @mul_v4i32_0_15_31_7(<4 x i32> %a0) nounwind { -; X86-LABEL: mul_v4i32_0_15_31_7: -; X86: # %bb.0: -; X86-NEXT: pmulld {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v4i32_0_15_31_7: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmulld {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v4i32_0_15_31_7: -; X64: # %bb.0: -; X64-NEXT: pmulld {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v4i32_0_15_31_7: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmulld {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_v4i32_0_15_31_7: ; X64-AVX: # %bb.0: @@ -1683,15 +1520,15 @@ define <4 x i32> @mul_v4i32_0_15_31_7(<4 x i32> %a0) nounwind { } define <8 x i16> @mul_v8i16_0_1_7_15_31_63_127_255(<8 x i16> %a0) nounwind { -; X86-LABEL: mul_v8i16_0_1_7_15_31_63_127_255: -; X86: # %bb.0: -; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v8i16_0_1_7_15_31_63_127_255: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v8i16_0_1_7_15_31_63_127_255: -; X64: # %bb.0: -; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v8i16_0_1_7_15_31_63_127_255: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_v8i16_0_1_7_15_31_63_127_255: ; X64-AVX: # %bb.0: @@ -1702,33 +1539,19 @@ define <8 x i16> @mul_v8i16_0_1_7_15_31_63_127_255(<8 x i16> %a0) nounwind { } define <16 x i8> @mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127(<16 x i8> %a0) nounwind { -; X86-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: -; X86: # %bb.0: -; X86-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; X86-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X86-NEXT: movdqa {{.*#+}} xmm2 = [0,1,3,7,15,31,63,127] -; X86-NEXT: pmullw %xmm2, %xmm0 -; X86-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; X86-NEXT: pand %xmm3, %xmm0 -; X86-NEXT: pmullw %xmm2, %xmm1 -; X86-NEXT: pand %xmm3, %xmm1 -; X86-NEXT: packuswb %xmm0, %xmm1 -; X86-NEXT: movdqa %xmm1, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: -; X64: # %bb.0: -; X64-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X64-NEXT: movdqa {{.*#+}} xmm2 = [0,1,3,7,15,31,63,127] -; X64-NEXT: pmullw %xmm2, %xmm0 -; X64-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; X64-NEXT: pand %xmm3, %xmm0 -; X64-NEXT: pmullw %xmm2, %xmm1 -; X64-NEXT: pand %xmm3, %xmm1 -; X64-NEXT: packuswb %xmm0, %xmm1 -; X64-NEXT: movdqa %xmm1, %xmm0 -; X64-NEXT: retq +; SSE-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: +; SSE: # %bb.0: +; SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,3,7,15,31,63,127] +; SSE-NEXT: pmullw %xmm2, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] +; SSE-NEXT: pand %xmm3, %xmm0 +; SSE-NEXT: pmullw %xmm2, %xmm1 +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: packuswb %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; ; X64-XOP-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: ; X64-XOP: # %bb.0: @@ -1763,27 +1586,27 @@ define <16 x i8> @mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127(<16 x i8> } define <2 x i64> @mul_v2i64_68_132(<2 x i64> %x) nounwind { -; X86-LABEL: mul_v2i64_68_132: -; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm1 = [68,0,132,0] -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: pmuludq %xmm1, %xmm2 -; X86-NEXT: psrlq $32, %xmm0 -; X86-NEXT: pmuludq %xmm1, %xmm0 -; X86-NEXT: psllq $32, %xmm0 -; X86-NEXT: paddq %xmm2, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v2i64_68_132: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [68,0,132,0] +; X86-SSE-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X86-SSE-NEXT: psrlq $32, %xmm0 +; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X86-SSE-NEXT: psllq $32, %xmm0 +; X86-SSE-NEXT: paddq %xmm2, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v2i64_68_132: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm1 = [68,132] -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: pmuludq %xmm1, %xmm2 -; X64-NEXT: psrlq $32, %xmm0 -; X64-NEXT: pmuludq %xmm1, %xmm0 -; X64-NEXT: psllq $32, %xmm0 -; X64-NEXT: paddq %xmm2, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v2i64_68_132: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [68,132] +; X64-SSE-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X64-SSE-NEXT: psrlq $32, %xmm0 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X64-SSE-NEXT: psllq $32, %xmm0 +; X64-SSE-NEXT: paddq %xmm2, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v2i64_68_132: ; X64-XOP: # %bb.0: @@ -1814,27 +1637,27 @@ define <2 x i64> @mul_v2i64_68_132(<2 x i64> %x) nounwind { } define <2 x i64> @mul_v2i64_60_120(<2 x i64> %x) nounwind { -; X86-LABEL: mul_v2i64_60_120: -; X86: # %bb.0: -; X86-NEXT: movdqa {{.*#+}} xmm1 = [60,0,124,0] -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: pmuludq %xmm1, %xmm2 -; X86-NEXT: psrlq $32, %xmm0 -; X86-NEXT: pmuludq %xmm1, %xmm0 -; X86-NEXT: psllq $32, %xmm0 -; X86-NEXT: paddq %xmm2, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v2i64_60_120: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [60,0,124,0] +; X86-SSE-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X86-SSE-NEXT: psrlq $32, %xmm0 +; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X86-SSE-NEXT: psllq $32, %xmm0 +; X86-SSE-NEXT: paddq %xmm2, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v2i64_60_120: -; X64: # %bb.0: -; X64-NEXT: movdqa {{.*#+}} xmm1 = [60,124] -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: pmuludq %xmm1, %xmm2 -; X64-NEXT: psrlq $32, %xmm0 -; X64-NEXT: pmuludq %xmm1, %xmm0 -; X64-NEXT: psllq $32, %xmm0 -; X64-NEXT: paddq %xmm2, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v2i64_60_120: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [60,124] +; X64-SSE-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X64-SSE-NEXT: psrlq $32, %xmm0 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X64-SSE-NEXT: psllq $32, %xmm0 +; X64-SSE-NEXT: paddq %xmm2, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v2i64_60_120: ; X64-XOP: # %bb.0: @@ -1870,21 +1693,21 @@ define <2 x i64> @mul_v2i64_60_120(<2 x i64> %x) nounwind { ; multiply inputs is loop invariant. ; FIXME: We should be able to insert an AssertZExt for this. define <2 x i64> @mul_v2i64_zext_cross_bb(<2 x i32>* %in, <2 x i32>* %y) { -; X86-LABEL: mul_v2i64_zext_cross_bb: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; X86-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; X86-NEXT: pmuludq %xmm1, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v2i64_zext_cross_bb: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; X86-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero +; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v2i64_zext_cross_bb: -; X64: # %bb.0: -; X64-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; X64-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; X64-NEXT: pmuludq %xmm1, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v2i64_zext_cross_bb: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; X64-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero +; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_v2i64_zext_cross_bb: ; X64-AVX: # %bb.0: @@ -1904,27 +1727,27 @@ foo: } define <4 x i64> @mul_v4i64_zext_cross_bb(<4 x i32>* %in, <4 x i32>* %y) { -; X86-LABEL: mul_v4i64_zext_cross_bb: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; X86-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; X86-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero -; X86-NEXT: pmuludq %xmm2, %xmm1 -; X86-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero -; X86-NEXT: pmuludq %xmm2, %xmm0 -; X86-NEXT: retl +; X86-SSE-LABEL: mul_v4i64_zext_cross_bb: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero +; X86-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; X86-SSE-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero +; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 +; X86-SSE-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero +; X86-SSE-NEXT: pmuludq %xmm2, %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: mul_v4i64_zext_cross_bb: -; X64: # %bb.0: -; X64-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; X64-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; X64-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero -; X64-NEXT: pmuludq %xmm2, %xmm1 -; X64-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero -; X64-NEXT: pmuludq %xmm2, %xmm0 -; X64-NEXT: retq +; X64-SSE-LABEL: mul_v4i64_zext_cross_bb: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero +; X64-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; X64-SSE-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero +; X64-SSE-NEXT: pmuludq %xmm2, %xmm1 +; X64-SSE-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero +; X64-SSE-NEXT: pmuludq %xmm2, %xmm0 +; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v4i64_zext_cross_bb: ; X64-XOP: # %bb.0: