[X86][SSE] Rename pmulh tests to show they're from sign/zero-extends

I'm intending to add additional coverage based off computeKnownBits/ComputeNumSignBits as suggested by PR45897
This commit is contained in:
Simon Pilgrim 2021-03-25 11:52:28 +00:00
parent 99211352c1
commit c0515f0cea
1 changed files with 84 additions and 84 deletions

View File

@ -5,13 +5,13 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
define <4 x i16> @mulhuw_v4i16(<4 x i16> %a, <4 x i16> %b) {
; SSE-LABEL: mulhuw_v4i16:
define <4 x i16> @zext_mulhuw_v4i16(<4 x i16> %a, <4 x i16> %b) {
; SSE-LABEL: zext_mulhuw_v4i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mulhuw_v4i16:
; AVX-LABEL: zext_mulhuw_v4i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
@ -23,13 +23,13 @@ define <4 x i16> @mulhuw_v4i16(<4 x i16> %a, <4 x i16> %b) {
ret <4 x i16> %e
}
define <4 x i16> @mulhw_v4i16(<4 x i16> %a, <4 x i16> %b) {
; SSE-LABEL: mulhw_v4i16:
define <4 x i16> @sext_mulhw_v4i16(<4 x i16> %a, <4 x i16> %b) {
; SSE-LABEL: sext_mulhw_v4i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mulhw_v4i16:
; AVX-LABEL: sext_mulhw_v4i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
@ -41,13 +41,13 @@ define <4 x i16> @mulhw_v4i16(<4 x i16> %a, <4 x i16> %b) {
ret <4 x i16> %e
}
define <8 x i16> @mulhuw_v8i16(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: mulhuw_v8i16:
define <8 x i16> @zext_mulhuw_v8i16(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: zext_mulhuw_v8i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mulhuw_v8i16:
; AVX-LABEL: zext_mulhuw_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
@ -59,13 +59,13 @@ define <8 x i16> @mulhuw_v8i16(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i16> %e
}
define <8 x i16> @mulhw_v8i16(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: mulhw_v8i16:
define <8 x i16> @sext_mulhw_v8i16(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: sext_mulhw_v8i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mulhw_v8i16:
; AVX-LABEL: sext_mulhw_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
@ -77,14 +77,14 @@ define <8 x i16> @mulhw_v8i16(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i16> %e
}
define <16 x i16> @mulhuw_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-LABEL: mulhuw_v16i16:
define <16 x i16> @zext_mulhuw_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-LABEL: zext_mulhuw_v16i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm2, %xmm0
; SSE-NEXT: pmulhuw %xmm3, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: mulhuw_v16i16:
; AVX-LABEL: zext_mulhuw_v16i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
@ -96,14 +96,14 @@ define <16 x i16> @mulhuw_v16i16(<16 x i16> %a, <16 x i16> %b) {
ret <16 x i16> %e
}
define <16 x i16> @mulhw_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-LABEL: mulhw_v16i16:
define <16 x i16> @sext_mulhuw_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-LABEL: sext_mulhuw_v16i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm2, %xmm0
; SSE-NEXT: pmulhw %xmm3, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: mulhw_v16i16:
; AVX-LABEL: sext_mulhuw_v16i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
@ -115,8 +115,8 @@ define <16 x i16> @mulhw_v16i16(<16 x i16> %a, <16 x i16> %b) {
ret <16 x i16> %e
}
define <32 x i16> @mulhuw_v32i16(<32 x i16> %a, <32 x i16> %b) {
; SSE-LABEL: mulhuw_v32i16:
define <32 x i16> @zext_mulhuw_v32i16(<32 x i16> %a, <32 x i16> %b) {
; SSE-LABEL: zext_mulhuw_v32i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm4, %xmm0
; SSE-NEXT: pmulhuw %xmm5, %xmm1
@ -124,13 +124,13 @@ define <32 x i16> @mulhuw_v32i16(<32 x i16> %a, <32 x i16> %b) {
; SSE-NEXT: pmulhuw %xmm7, %xmm3
; SSE-NEXT: retq
;
; AVX2-LABEL: mulhuw_v32i16:
; AVX2-LABEL: zext_mulhuw_v32i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmulhuw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-LABEL: mulhuw_v32i16:
; AVX512F-LABEL: zext_mulhuw_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
@ -139,7 +139,7 @@ define <32 x i16> @mulhuw_v32i16(<32 x i16> %a, <32 x i16> %b) {
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: mulhuw_v32i16:
; AVX512BW-LABEL: zext_mulhuw_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
@ -151,8 +151,8 @@ define <32 x i16> @mulhuw_v32i16(<32 x i16> %a, <32 x i16> %b) {
ret <32 x i16> %e
}
define <32 x i16> @mulhw_v32i16(<32 x i16> %a, <32 x i16> %b) {
; SSE-LABEL: mulhw_v32i16:
define <32 x i16> @sext_mulhuw_v32i16(<32 x i16> %a, <32 x i16> %b) {
; SSE-LABEL: sext_mulhuw_v32i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm4, %xmm0
; SSE-NEXT: pmulhw %xmm5, %xmm1
@ -160,13 +160,13 @@ define <32 x i16> @mulhw_v32i16(<32 x i16> %a, <32 x i16> %b) {
; SSE-NEXT: pmulhw %xmm7, %xmm3
; SSE-NEXT: retq
;
; AVX2-LABEL: mulhw_v32i16:
; AVX2-LABEL: sext_mulhuw_v32i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmulhw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-LABEL: mulhw_v32i16:
; AVX512F-LABEL: sext_mulhuw_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
@ -175,7 +175,7 @@ define <32 x i16> @mulhw_v32i16(<32 x i16> %a, <32 x i16> %b) {
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: mulhw_v32i16:
; AVX512BW-LABEL: sext_mulhuw_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
@ -187,8 +187,8 @@ define <32 x i16> @mulhw_v32i16(<32 x i16> %a, <32 x i16> %b) {
ret <32 x i16> %e
}
define <64 x i16> @mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; SSE-LABEL: mulhuw_v64i16:
define <64 x i16> @zext_mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; SSE-LABEL: zext_mulhuw_v64i16:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm0
@ -209,7 +209,7 @@ define <64 x i16> @mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; SSE-NEXT: movdqa %xmm0, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: mulhuw_v64i16:
; AVX2-LABEL: zext_mulhuw_v64i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vpmulhuw %ymm5, %ymm1, %ymm1
@ -217,7 +217,7 @@ define <64 x i16> @mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; AVX2-NEXT: vpmulhuw %ymm7, %ymm3, %ymm3
; AVX2-NEXT: retq
;
; AVX512F-LABEL: mulhuw_v64i16:
; AVX512F-LABEL: zext_mulhuw_v64i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
@ -231,7 +231,7 @@ define <64 x i16> @mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: mulhuw_v64i16:
; AVX512BW-LABEL: zext_mulhuw_v64i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhuw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpmulhuw %zmm3, %zmm1, %zmm1
@ -244,8 +244,8 @@ define <64 x i16> @mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
ret <64 x i16> %e
}
define <64 x i16> @mulhw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; SSE-LABEL: mulhw_v64i16:
define <64 x i16> @sext_mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; SSE-LABEL: sext_mulhuw_v64i16:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm0
@ -266,7 +266,7 @@ define <64 x i16> @mulhw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; SSE-NEXT: movdqa %xmm0, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: mulhw_v64i16:
; AVX2-LABEL: sext_mulhuw_v64i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhw %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vpmulhw %ymm5, %ymm1, %ymm1
@ -274,7 +274,7 @@ define <64 x i16> @mulhw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; AVX2-NEXT: vpmulhw %ymm7, %ymm3, %ymm3
; AVX2-NEXT: retq
;
; AVX512F-LABEL: mulhw_v64i16:
; AVX512F-LABEL: sext_mulhuw_v64i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
@ -288,7 +288,7 @@ define <64 x i16> @mulhw_v64i16(<64 x i16> %a, <64 x i16> %b) {
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: mulhw_v64i16:
; AVX512BW-LABEL: sext_mulhuw_v64i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpmulhw %zmm3, %zmm1, %zmm1
@ -301,13 +301,13 @@ define <64 x i16> @mulhw_v64i16(<64 x i16> %a, <64 x i16> %b) {
ret <64 x i16> %e
}
define <8 x i16> @mulhuw_v8i16_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: mulhuw_v8i16_i64:
define <8 x i16> @zext_mulhuw_v8i16_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: zext_mulhuw_v8i16_i64:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mulhuw_v8i16_i64:
; AVX-LABEL: zext_mulhuw_v8i16_i64:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
@ -319,13 +319,13 @@ define <8 x i16> @mulhuw_v8i16_i64(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i16> %e
}
define <8 x i16> @mulhw_v8i16_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: mulhw_v8i16_i64:
define <8 x i16> @sext_mulhuw_v8i16_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: sext_mulhuw_v8i16_i64:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mulhw_v8i16_i64:
; AVX-LABEL: sext_mulhuw_v8i16_i64:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
@ -337,21 +337,21 @@ define <8 x i16> @mulhw_v8i16_i64(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i16> %e
}
define <4 x i32> @mulhuw_v4i16_lshr(<4 x i16> %a, <4 x i16> %b) {
; SSE2-LABEL: mulhuw_v4i16_lshr:
define <4 x i32> @zext_mulhuw_v4i16_lshr(<4 x i16> %a, <4 x i16> %b) {
; SSE2-LABEL: zext_mulhuw_v4i16_lshr:
; SSE2: # %bb.0:
; SSE2-NEXT: pmulhuw %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: mulhuw_v4i16_lshr:
; SSE41-LABEL: zext_mulhuw_v4i16_lshr:
; SSE41: # %bb.0:
; SSE41-NEXT: pmulhuw %xmm1, %xmm0
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: mulhuw_v4i16_lshr:
; AVX-LABEL: zext_mulhuw_v4i16_lshr:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
@ -417,8 +417,8 @@ define <4 x i32> @mulhsw_v4i16_ashr(<4 x i16> %a, <4 x i16> %b) {
ret <4 x i32> %d
}
define <8 x i32> @mulhuw_v8i16_lshr(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: mulhuw_v8i16_lshr:
define <8 x i32> @zext_mulhuw_v8i16_lshr(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: zext_mulhuw_v8i16_lshr:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pmulhuw %xmm1, %xmm2
@ -429,7 +429,7 @@ define <8 x i32> @mulhuw_v8i16_lshr(<8 x i16> %a, <8 x i16> %b) {
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSE41-LABEL: mulhuw_v8i16_lshr:
; SSE41-LABEL: zext_mulhuw_v8i16_lshr:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: pmulhuw %xmm1, %xmm2
@ -439,7 +439,7 @@ define <8 x i32> @mulhuw_v8i16_lshr(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: movdqa %xmm2, %xmm1
; SSE41-NEXT: retq
;
; AVX-LABEL: mulhuw_v8i16_lshr:
; AVX-LABEL: zext_mulhuw_v8i16_lshr:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
@ -517,8 +517,8 @@ define <8 x i32> @mulhsw_v8i16_ashr(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i32> %d
}
define <16 x i32> @mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
; SSE2-LABEL: mulhuw_v16i16_lshr:
define <16 x i32> @zext_mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
; SSE2-LABEL: zext_mulhuw_v16i16_lshr:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: movdqa %xmm0, %xmm1
@ -534,7 +534,7 @@ define <16 x i32> @mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
; SSE2-NEXT: movdqa %xmm4, %xmm3
; SSE2-NEXT: retq
;
; SSE41-LABEL: mulhuw_v16i16_lshr:
; SSE41-LABEL: zext_mulhuw_v16i16_lshr:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm1, %xmm4
; SSE41-NEXT: movdqa %xmm0, %xmm1
@ -548,7 +548,7 @@ define <16 x i32> @mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
; SSE41-NEXT: movdqa %xmm4, %xmm3
; SSE41-NEXT: retq
;
; AVX2-LABEL: mulhuw_v16i16_lshr:
; AVX2-LABEL: zext_mulhuw_v16i16_lshr:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
@ -556,7 +556,7 @@ define <16 x i32> @mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX2-NEXT: retq
;
; AVX512-LABEL: mulhuw_v16i16_lshr:
; AVX512-LABEL: zext_mulhuw_v16i16_lshr:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
@ -669,8 +669,8 @@ define <16 x i32> @mulhsw_v16i16_ashr(<16 x i16> %a, <16 x i16> %b) {
ret <16 x i32> %d
}
define <32 x i32> @mulhuw_v32i16_lshr(<32 x i16> %a, <32 x i16> %b) {
; SSE2-LABEL: mulhuw_v32i16_lshr:
define <32 x i32> @zext_mulhuw_v32i16_lshr(<32 x i16> %a, <32 x i16> %b) {
; SSE2-LABEL: zext_mulhuw_v32i16_lshr:
; SSE2: # %bb.0:
; SSE2-NEXT: movq %rdi, %rax
; SSE2-NEXT: pmulhuw %xmm7, %xmm3
@ -700,7 +700,7 @@ define <32 x i32> @mulhuw_v32i16_lshr(<32 x i16> %a, <32 x i16> %b) {
; SSE2-NEXT: movdqa %xmm8, (%rdi)
; SSE2-NEXT: retq
;
; SSE41-LABEL: mulhuw_v32i16_lshr:
; SSE41-LABEL: zext_mulhuw_v32i16_lshr:
; SSE41: # %bb.0:
; SSE41-NEXT: movq %rdi, %rax
; SSE41-NEXT: pmulhuw %xmm4, %xmm0
@ -726,7 +726,7 @@ define <32 x i32> @mulhuw_v32i16_lshr(<32 x i16> %a, <32 x i16> %b) {
; SSE41-NEXT: movdqa %xmm8, (%rdi)
; SSE41-NEXT: retq
;
; AVX2-LABEL: mulhuw_v32i16_lshr:
; AVX2-LABEL: zext_mulhuw_v32i16_lshr:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
@ -739,7 +739,7 @@ define <32 x i32> @mulhuw_v32i16_lshr(<32 x i16> %a, <32 x i16> %b) {
; AVX2-NEXT: vmovdqa %ymm4, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-LABEL: mulhuw_v32i16_lshr:
; AVX512F-LABEL: zext_mulhuw_v32i16_lshr:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
@ -750,7 +750,7 @@ define <32 x i32> @mulhuw_v32i16_lshr(<32 x i16> %a, <32 x i16> %b) {
; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: mulhuw_v32i16_lshr:
; AVX512BW-LABEL: zext_mulhuw_v32i16_lshr:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm1
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
@ -960,8 +960,8 @@ define <32 x i32> @mulhsw_v32i16_ashr(<32 x i16> %a, <32 x i16> %b) {
ret <32 x i32> %d
}
define <64 x i32> @mulhuw_v64i16_lshr(<64 x i16> %a, <64 x i16> %b) {
; SSE2-LABEL: mulhuw_v64i16_lshr:
define <64 x i32> @zext_mulhuw_v64i16_lshr(<64 x i16> %a, <64 x i16> %b) {
; SSE2-LABEL: zext_mulhuw_v64i16_lshr:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm7, %xmm8
; SSE2-NEXT: movq %rdi, %rax
@ -1018,7 +1018,7 @@ define <64 x i32> @mulhuw_v64i16_lshr(<64 x i16> %a, <64 x i16> %b) {
; SSE2-NEXT: movaps %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; SSE41-LABEL: mulhuw_v64i16_lshr:
; SSE41-LABEL: zext_mulhuw_v64i16_lshr:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm8
; SSE41-NEXT: movq %rdi, %rax
@ -1074,7 +1074,7 @@ define <64 x i32> @mulhuw_v64i16_lshr(<64 x i16> %a, <64 x i16> %b) {
; SSE41-NEXT: movdqa %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX2-LABEL: mulhuw_v64i16_lshr:
; AVX2-LABEL: zext_mulhuw_v64i16_lshr:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: vpmulhuw %ymm4, %ymm0, %ymm0
@ -1104,7 +1104,7 @@ define <64 x i32> @mulhuw_v64i16_lshr(<64 x i16> %a, <64 x i16> %b) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: mulhuw_v64i16_lshr:
; AVX512F-LABEL: zext_mulhuw_v64i16_lshr:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm4
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
@ -1122,7 +1122,7 @@ define <64 x i32> @mulhuw_v64i16_lshr(<64 x i16> %a, <64 x i16> %b) {
; AVX512F-NEXT: vmovdqa64 %zmm5, %zmm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: mulhuw_v64i16_lshr:
; AVX512BW-LABEL: zext_mulhuw_v64i16_lshr:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhuw %zmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
@ -1504,8 +1504,8 @@ define <64 x i32> @mulhsw_v64i16_ashr(<64 x i16> %a, <64 x i16> %b) {
ret <64 x i32> %d
}
define <8 x i64> @mulhuw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: mulhuw_v8i16_lshr_i64:
define <8 x i64> @zext_mulhuw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: zext_mulhuw_v8i16_lshr_i64:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm3
@ -1533,7 +1533,7 @@ define <8 x i64> @mulhuw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE2-NEXT: movdqa %xmm4, %xmm1
; SSE2-NEXT: retq
;
; SSE41-LABEL: mulhuw_v8i16_lshr_i64:
; SSE41-LABEL: zext_mulhuw_v8i16_lshr_i64:
; SSE41: # %bb.0:
; SSE41-NEXT: pmulhuw %xmm1, %xmm0
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
@ -1546,7 +1546,7 @@ define <8 x i64> @mulhuw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: movdqa %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX2-LABEL: mulhuw_v8i16_lshr_i64:
; AVX2-LABEL: zext_mulhuw_v8i16_lshr_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
@ -1554,7 +1554,7 @@ define <8 x i64> @mulhuw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; AVX2-NEXT: retq
;
; AVX512-LABEL: mulhuw_v8i16_lshr_i64:
; AVX512-LABEL: zext_mulhuw_v8i16_lshr_i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
@ -1566,8 +1566,8 @@ define <8 x i64> @mulhuw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i64> %d
}
define <8 x i64> @mulhsw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: mulhsw_v8i16_lshr_i64:
define <8 x i64> @sext_mulhsw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: sext_mulhsw_v8i16_lshr_i64:
; SSE2: # %bb.0:
; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3]
; SSE2-NEXT: psrad $16, %xmm6
@ -1635,7 +1635,7 @@ define <8 x i64> @mulhsw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE2-NEXT: psrlq $16, %xmm3
; SSE2-NEXT: retq
;
; SSE41-LABEL: mulhsw_v8i16_lshr_i64:
; SSE41-LABEL: sext_mulhsw_v8i16_lshr_i64:
; SSE41: # %bb.0:
; SSE41-NEXT: pmulhw %xmm1, %xmm0
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
@ -1648,7 +1648,7 @@ define <8 x i64> @mulhsw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: movdqa %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX2-LABEL: mulhsw_v8i16_lshr_i64:
; AVX2-LABEL: sext_mulhsw_v8i16_lshr_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhw %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
@ -1656,7 +1656,7 @@ define <8 x i64> @mulhsw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; AVX2-NEXT: retq
;
; AVX512-LABEL: mulhsw_v8i16_lshr_i64:
; AVX512-LABEL: sext_mulhsw_v8i16_lshr_i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
@ -1668,8 +1668,8 @@ define <8 x i64> @mulhsw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i64> %d
}
define <8 x i64> @mulhsw_v8i16_ashr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: mulhsw_v8i16_ashr_i64:
define <8 x i64> @sext_mulhsw_v8i16_ashr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: sext_mulhsw_v8i16_ashr_i64:
; SSE2: # %bb.0:
; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3]
; SSE2-NEXT: psrad $16, %xmm5
@ -1757,7 +1757,7 @@ define <8 x i64> @mulhsw_v8i16_ashr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: mulhsw_v8i16_ashr_i64:
; SSE41-LABEL: sext_mulhsw_v8i16_ashr_i64:
; SSE41: # %bb.0:
; SSE41-NEXT: pmulhw %xmm1, %xmm0
; SSE41-NEXT: pmovsxwq %xmm0, %xmm4
@ -1770,7 +1770,7 @@ define <8 x i64> @mulhsw_v8i16_ashr_i64(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: movdqa %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX2-LABEL: mulhsw_v8i16_ashr_i64:
; AVX2-LABEL: sext_mulhsw_v8i16_ashr_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhw %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovsxwq %xmm1, %ymm0
@ -1778,7 +1778,7 @@ define <8 x i64> @mulhsw_v8i16_ashr_i64(<8 x i16> %a, <8 x i16> %b) {
; AVX2-NEXT: vpmovsxwq %xmm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: mulhsw_v8i16_ashr_i64:
; AVX512-LABEL: sext_mulhsw_v8i16_ashr_i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0